This rewrites the way that CPU alerts work, making them a bitmap (since multiple alerts can happen simultaneously, like SMC and IRQ). This doesn't really fix many games but improves accuracy overall and improves performance on some I/O writes (the ones without side effects). The IRQ raising is now decoupled and explicitely called via a new function (check_and_raise_interrupts) to avoid issues such as invalid CPSR values (doesn't seem to bother most games!). There's more side effects missing, so this just lays the ground for more fixes.
770 lines
33 KiB
ArmAsm
770 lines
33 KiB
ArmAsm
# gameplaySP
|
|
#
|
|
# Copyright (C) 2021 David Guillen Fandos <david@davidgf.net>
|
|
#
|
|
# This program is free software; you can redistribute it and/or
|
|
# modify it under the terms of the GNU General Public License as
|
|
# published by the Free Software Foundation; either version 2 of
|
|
# the License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
# General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
|
|
#include "../gpsp_config.h"
|
|
|
|
#define defsymbl(symbol) \
|
|
.align 2; \
|
|
.type symbol, %function ;\
|
|
.global symbol ; \
|
|
.global _##symbol ; \
|
|
symbol: \
|
|
_##symbol:
|
|
|
|
.text
|
|
.align 2
|
|
|
|
#define REG_R0 (0 * 4)
|
|
#define REG_R1 (1 * 4)
|
|
#define REG_R2 (2 * 4)
|
|
#define REG_R3 (3 * 4)
|
|
#define REG_R4 (4 * 4)
|
|
#define REG_R5 (5 * 4)
|
|
#define REG_R6 (6 * 4)
|
|
#define REG_R7 (7 * 4)
|
|
#define REG_R8 (8 * 4)
|
|
#define REG_R9 (9 * 4)
|
|
#define REG_R10 (10 * 4)
|
|
#define REG_R11 (11 * 4)
|
|
#define REG_R12 (12 * 4)
|
|
#define REG_R13 (13 * 4)
|
|
#define REG_R14 (14 * 4)
|
|
#define REG_SP (13 * 4)
|
|
#define REG_LR (14 * 4)
|
|
#define REG_PC (15 * 4)
|
|
#define REG_CPSR (16 * 4)
|
|
#define CPU_MODE (17 * 4)
|
|
#define CPU_HALT_STATE (18 * 4)
|
|
|
|
#define REG_BUS_VALUE (19 * 4)
|
|
#define REG_N_FLAG (20 * 4)
|
|
#define REG_Z_FLAG (21 * 4)
|
|
#define REG_C_FLAG (22 * 4)
|
|
#define REG_V_FLAG (23 * 4)
|
|
#define CHANGED_PC_STATUS (24 * 4)
|
|
#define COMPLETED_FRAME (25 * 4)
|
|
#define OAM_UPDATED (26 * 4)
|
|
#define REG_SAVE (27 * 4)
|
|
#define REG_SAVE2 (28 * 4)
|
|
#define REG_SAVE3 (29 * 4)
|
|
#define REG_SAVE4 (30 * 4)
|
|
#define REG_SAVE5 (31 * 4)
|
|
|
|
#define CPU_ALERT_HALT_B 0
|
|
#define CPU_ALERT_SMC_B 1
|
|
#define CPU_ALERT_IRQ_B 2
|
|
|
|
#define reg_base x20
|
|
#define reg_cycles w21
|
|
|
|
#define reg_c_flag w22
|
|
#define reg_v_flag w23
|
|
#define reg_z_flag w24
|
|
#define reg_n_flag w25
|
|
#define reg_save0 w19
|
|
|
|
|
|
// Memory offsets from reg_base to the different buffers
|
|
#define RDMAP_OFF -0xB9000 // 8K pointers (64KB)
|
|
#define IWRAM_OFF -0xA9000 // 32KB (double for shadow)
|
|
#define VRAM_OFF -0x99000 // 96KB
|
|
#define EWRAM_OFF -0x81000 // 256KB (double for shadow)
|
|
#define MEM_TBL_OFF -0x1000 // Some space for the tables
|
|
#define SPSR_RAM_OFF 0x100
|
|
#define REGMODE_RAM_OFF 0x118
|
|
#define OAM_RAM_OFF 0x200
|
|
#define PAL_RAM_OFF 0x600
|
|
#define IOREG_OFF 0xA00
|
|
#define PALCNV_RAM_OFF 0xE00
|
|
|
|
// Used for SWI handling
|
|
#define MODE_SUPERVISOR 0x13
|
|
#define SUPERVISOR_SPSR (SPSR_RAM_OFF + 3*4) // spsr[3]
|
|
#define SUPERVISOR_LR (REGMODE_RAM_OFF + (3 * (7 * 4)) + (6 * 4)) // reg_mode[3][6]
|
|
|
|
|
|
// Stores and restores registers to their register storage in RAM
|
|
|
|
#define load_registers() ;\
|
|
ldp w6, w7, [reg_base, #0] ;\
|
|
ldp w8, w9, [reg_base, #8] ;\
|
|
ldp w10, w11, [reg_base, #16] ;\
|
|
ldp w12, w13, [reg_base, #24] ;\
|
|
ldp w14, w15, [reg_base, #32] ;\
|
|
ldp w16, w17, [reg_base, #40] ;\
|
|
ldp w26, w27, [reg_base, #48] ;\
|
|
ldr w28, [reg_base, #56] ;\
|
|
|
|
#define store_registers() ;\
|
|
stp w6, w7, [reg_base, #0] ;\
|
|
stp w8, w9, [reg_base, #8] ;\
|
|
stp w10, w11, [reg_base, #16] ;\
|
|
stp w12, w13, [reg_base, #24] ;\
|
|
stp w14, w15, [reg_base, #32] ;\
|
|
stp w16, w17, [reg_base, #40] ;\
|
|
stp w26, w27, [reg_base, #48] ;\
|
|
str w28, [reg_base, #56] ;\
|
|
|
|
|
|
// Extracts flags from CPSR into the cache flag registers
|
|
|
|
#define extract_flags_reg(tmpreg) ;\
|
|
ubfx reg_n_flag, tmpreg, #31, #1 ;\
|
|
ubfx reg_z_flag, tmpreg, #30, #1 ;\
|
|
ubfx reg_c_flag, tmpreg, #29, #1 ;\
|
|
ubfx reg_v_flag, tmpreg, #28, #1 ;\
|
|
|
|
#define extract_flags(tmpreg) ;\
|
|
ldr tmpreg, [reg_base, #REG_CPSR] ;\
|
|
extract_flags_reg(tmpreg) ;\
|
|
|
|
// Collects cache flag bits and consolidates them to the CPSR reg
|
|
|
|
#define consolidate_flags(tmpreg) ;\
|
|
ldr tmpreg, [reg_base, #REG_CPSR] ;\
|
|
bfi tmpreg, reg_n_flag, #31, #1 ;\
|
|
bfi tmpreg, reg_z_flag, #30, #1 ;\
|
|
bfi tmpreg, reg_c_flag, #29, #1 ;\
|
|
bfi tmpreg, reg_v_flag, #28, #1 ;\
|
|
str tmpreg, [reg_base, #REG_CPSR] ;\
|
|
|
|
|
|
// Update the GBA hardware (video, sound, input, etc)
|
|
// w0: current PC
|
|
|
|
defsymbl(a64_update_gba)
|
|
str w0, [reg_base, #REG_PC] // update the PC value
|
|
str lr, [reg_base, #REG_SAVE] // Save LR for later if needed
|
|
|
|
consolidate_flags(w0) // update the CPSR
|
|
store_registers() // save out registers
|
|
|
|
mov w0, reg_cycles // load remaining cycles
|
|
bl update_gba // update GBA state
|
|
|
|
ldr w1, [reg_base, #COMPLETED_FRAME] // return to main if new frame
|
|
cbnz w1, return_to_main
|
|
|
|
// Resume execution (perhaps from a new PC)
|
|
mov reg_cycles, w0 // load new cycle count
|
|
extract_flags(w2) // reload flag cache bits
|
|
|
|
ldr w0, [reg_base, #CHANGED_PC_STATUS] // see if PC has change
|
|
cbnz w0, 1f // go start from new PC
|
|
|
|
ldr lr, [reg_base, #REG_SAVE] // Restore return point
|
|
load_registers() // reload registers
|
|
ret // resume execution, no PC change
|
|
|
|
1: // Resume from new PC
|
|
ldr w0, [reg_base, #REG_PC] // load new PC
|
|
tbnz w2, #5, 2f // CPSR.T means in thumb mode
|
|
|
|
bl block_lookup_address_arm
|
|
load_registers() // reload registers
|
|
br x0 // jump to new ARM block
|
|
2:
|
|
bl block_lookup_address_thumb
|
|
load_registers() // reload registers
|
|
br x0 // jump to new Thumb block
|
|
.size a64_update_gba, .-a64_update_gba
|
|
|
|
|
|
// Cheat hooks for master function
|
|
// This is called whenever PC == cheats-master-function
|
|
// Just calls the C function to process cheats
|
|
|
|
defsymbl(a64_cheat_hook)
|
|
store_registers()
|
|
str lr, [reg_base, #REG_SAVE]
|
|
bl process_cheats
|
|
ldr lr, [reg_base, #REG_SAVE]
|
|
load_registers()
|
|
ret
|
|
|
|
|
|
// These are b stubs for performing indirect branches. They are not
|
|
// linked to and don't return, instead they link elsewhere.
|
|
|
|
// Input:
|
|
// r0: PC to branch to
|
|
|
|
defsymbl(a64_indirect_branch_arm)
|
|
store_registers()
|
|
bl block_lookup_address_arm
|
|
load_registers()
|
|
br x0
|
|
|
|
defsymbl(a64_indirect_branch_thumb)
|
|
store_registers()
|
|
bl block_lookup_address_thumb
|
|
load_registers()
|
|
br x0
|
|
|
|
defsymbl(a64_indirect_branch_dual)
|
|
store_registers()
|
|
bl block_lookup_address_dual
|
|
load_registers()
|
|
br x0
|
|
|
|
|
|
// Read CPSR and SPSR values
|
|
|
|
defsymbl(execute_read_cpsr)
|
|
consolidate_flags(w0) // Consolidate on ret value
|
|
ret
|
|
|
|
defsymbl(execute_read_spsr)
|
|
ldr w1, [reg_base, #CPU_MODE] // read cpu mode to w1
|
|
and w1, w1, #0xF // Like REG_SPSR() macro
|
|
add x0, reg_base, #SPSR_RAM_OFF // ptr to spsr table
|
|
ldr w0, [x0, x1, lsl #2] // Read actual value from table
|
|
ret
|
|
|
|
|
|
// Update the cpsr.
|
|
|
|
// Input:
|
|
// w0: new cpsr value
|
|
// w1: current PC
|
|
// w2: store bitmask (user-mode)
|
|
// w3: store bitmask (privileged mode)
|
|
|
|
defsymbl(execute_store_cpsr)
|
|
ldr w4, [reg_base, #CPU_MODE] // w4 = cpu_mode
|
|
tst x4, #0x10 // Bit 4 is set on privileged modes
|
|
csel x2, x2, x3, eq // Select the correct mask
|
|
|
|
ldr w4, [reg_base, #REG_CPSR] // read current CPSR
|
|
and w3, w0, w2 // reg_flags = new_cpsr & store_mask
|
|
bic w4, w4, w2 // current_cpsr & ~store_mask
|
|
orr w0, w3, w4 // w2 = final CPSR value
|
|
extract_flags_reg(w0) // Update cached flags too
|
|
|
|
str lr, [reg_base, #REG_SAVE]
|
|
store_registers()
|
|
bl execute_store_cpsr_body // Do the remaining work in C mode
|
|
|
|
cbnz w0, 1f // If PC has changed due to this
|
|
|
|
ldr lr, [reg_base, #REG_SAVE] // Resume execution where we left it
|
|
load_registers()
|
|
ret
|
|
|
|
1:
|
|
// Returned value contains the PC, resume execution there
|
|
bl block_lookup_address_arm
|
|
load_registers()
|
|
br x0 // Resume in the returned block
|
|
.size execute_store_cpsr, .-execute_store_cpsr
|
|
|
|
|
|
// Write to SPSR
|
|
// w0: new SPSR value
|
|
// w1: store mask
|
|
|
|
defsymbl(execute_store_spsr)
|
|
ldr w2, [reg_base, #CPU_MODE] // read cpu mode to w1
|
|
and w2, w2, #0xF // Like REG_SPSR() macro
|
|
add x2, reg_base, x2, lsl #2 // calculate table offset
|
|
ldr w3, [x2, #SPSR_RAM_OFF] // Read actual value from trable
|
|
|
|
and w0, w0, w1 // new-spsr & mask
|
|
bic w3, w3, w1 // old-spsr & ~mask
|
|
orr w0, w0, w3 // final spsr value
|
|
|
|
str w0, [x2, #SPSR_RAM_OFF] // Store new SPSR
|
|
ret
|
|
.size execute_store_spsr, .-execute_store_spsr
|
|
|
|
// Restore the cpsr from the mode spsr and mode shift.
|
|
|
|
// Input:
|
|
// r0: current pc
|
|
|
|
defsymbl(execute_spsr_restore)
|
|
ldr w1, [reg_base, #CPU_MODE] // w1 = cpu_mode
|
|
and w1, w1, 0xF // Fold user and system modes
|
|
cbz w1, 1f // Ignore if in user or system mode
|
|
|
|
lsl w2, w1, #2 // We access 32 bit words
|
|
add w2, w2, #SPSR_RAM_OFF
|
|
ldr w3, [reg_base, x2] // w3 = spsr[cpu_mode]
|
|
str w3, [reg_base, #REG_CPSR] // update CPSR with SPSR value
|
|
extract_flags_reg(w3) // update cached flag values
|
|
|
|
// This function call will pass r0 (address) and return it.
|
|
str lr, [reg_base, #REG_SAVE]
|
|
store_registers() // save ARM registers
|
|
bl execute_spsr_restore_body
|
|
ldr lr, [reg_base, #REG_SAVE]
|
|
load_registers()
|
|
|
|
1:
|
|
ret
|
|
.size execute_spsr_restore, .-execute_spsr_restore
|
|
|
|
|
|
// Setup the mode transition work for calling an SWI.
|
|
|
|
// Input:
|
|
// r0: current pc
|
|
|
|
defsymbl(execute_swi)
|
|
str lr, [reg_base, #REG_SAVE]
|
|
str w0, [reg_base, #SUPERVISOR_LR] // Store next PC into supervisor LR
|
|
consolidate_flags(w1) // Calculate current CPSR flags
|
|
str w1, [reg_base, #SUPERVISOR_SPSR] // Store them in the SPSR
|
|
bic w1, w1, #0x3F // Clear mode bits
|
|
mov w2, #(0x13 | 0x80) // Set supervisor mode bits
|
|
orr w1, w1, w2
|
|
str w1, [reg_base, #REG_CPSR] // Update CPSR with new value
|
|
store_registers()
|
|
mov w0, #MODE_SUPERVISOR
|
|
bl set_cpu_mode // Set supervisor mode
|
|
ldr w0, =0xe3a02004
|
|
str w0, [reg_base, REG_BUS_VALUE]
|
|
ldr lr, [reg_base, #REG_SAVE]
|
|
load_registers()
|
|
ret
|
|
.size execute_swi, .-execute_swi
|
|
|
|
defsymbl(execute_arm_translate_internal)
|
|
// save registers that will be clobbered
|
|
sub sp, sp, #96
|
|
stp x19, x20, [sp, #0]
|
|
stp x21, x22, [sp, #16]
|
|
stp x23, x24, [sp, #32]
|
|
stp x25, x26, [sp, #48]
|
|
stp x27, x28, [sp, #64]
|
|
stp x29, x30, [sp, #80]
|
|
|
|
mov reg_cycles, w0 // load cycle counter
|
|
mov reg_base, x1 // init base_reg
|
|
|
|
// Check whether the CPU is sleeping already, we should just wait for IRQs
|
|
ldr w1, [reg_base, #CPU_HALT_STATE]
|
|
cbnz w1, alert_loop
|
|
|
|
ldr w0, [reg_base, #REG_PC] // load current PC
|
|
|
|
// Resume execution at PC (at w0)
|
|
lookup_pc:
|
|
ldr w1, [reg_base, #REG_CPSR] // w1 = flags
|
|
extract_flags_reg(w1)
|
|
tbnz w1, #5, 2f // see if Thumb bit is set
|
|
|
|
// Lookup and jump to the right mode block
|
|
bl block_lookup_address_arm
|
|
load_registers()
|
|
br x0
|
|
2:
|
|
bl block_lookup_address_thumb
|
|
load_registers()
|
|
br x0
|
|
|
|
// Epilogue to return to the main thread (whatever called execute_arm_translate)
|
|
|
|
return_to_main:
|
|
// restore the saved regs and return
|
|
ldp x19, x20, [sp, #0]
|
|
ldp x21, x22, [sp, #16]
|
|
ldp x23, x24, [sp, #32]
|
|
ldp x25, x26, [sp, #48]
|
|
ldp x27, x28, [sp, #64]
|
|
ldp x29, x30, [sp, #80]
|
|
add sp, sp, #96
|
|
ret
|
|
|
|
|
|
// Memory read stub routines
|
|
|
|
#define execute_load_builder(load_type, ldop, ldmask, tblidx, ldfn) ;\
|
|
;\
|
|
defsymbl(execute_load_##load_type) ;\
|
|
tst w0, #(0xf0000000 | ldmask) ;\
|
|
lsr w3, w0, #24 ;\
|
|
csinc w3, wzr, w3, ne ;\
|
|
add x4, reg_base, (MEM_TBL_OFF + tblidx*136) ;\
|
|
ldr x3, [x4, x3, lsl #3] ;\
|
|
br x3 ;\
|
|
;\
|
|
ld_bios_##load_type: /* BIOS area, need to verify PC */;\
|
|
lsr w3, w1, #24 /* Are we running the BIOS */;\
|
|
cbnz w3, ld_slow_##load_type ;\
|
|
and w0, w0, #(0x7fff) /* BIOS only 16 KB */;\
|
|
add x3, reg_base, #(RDMAP_OFF) ;\
|
|
ldr x3, [x3] /* x3 = bios mem buffer */;\
|
|
ldop w0, [x3, x0] /* load actual value */;\
|
|
ret ;\
|
|
;\
|
|
ld_ewram_##load_type: /* EWRAM area */;\
|
|
and w0, w0, #(0x3ffff) ;\
|
|
add x3, reg_base, #EWRAM_OFF ;\
|
|
ldop w0, [x3, x0] ;\
|
|
ret ;\
|
|
;\
|
|
ld_iwram_##load_type: /* IWRAM area */;\
|
|
and w0, w0, #(0x7fff) ;\
|
|
add x3, reg_base, #(IWRAM_OFF+0x8000) ;\
|
|
ldop w0, [x3, x0] ;\
|
|
ret ;\
|
|
;\
|
|
ld_ioram_##load_type: /* I/O RAM area */;\
|
|
and w0, w0, #(0x3ff) ;\
|
|
add x3, reg_base, #(IOREG_OFF) ;\
|
|
ldop w0, [x3, x0] ;\
|
|
ret ;\
|
|
;\
|
|
ld_palram_##load_type: /* PAL RAM area */;\
|
|
and w0, w0, #(0x3ff) ;\
|
|
add x3, reg_base, #(PAL_RAM_OFF) ;\
|
|
ldop w0, [x3, x0] ;\
|
|
ret ;\
|
|
;\
|
|
ld_oamram_##load_type: /* OAM RAM area */;\
|
|
and w0, w0, #(0x3ff) ;\
|
|
add x3, reg_base, #(OAM_RAM_OFF) ;\
|
|
ldop w0, [x3, x0] ;\
|
|
ret ;\
|
|
;\
|
|
ld_rdmap_##load_type: ;\
|
|
lsr w4, w0, #15 /* Each block is 32KB */;\
|
|
add x3, reg_base, #(RDMAP_OFF) ;\
|
|
ldr x4, [x3, x4, lsl #3] /* x4 = table pointer */;\
|
|
cbz x4, ld_slow_##load_type /* not mapped, go slow */ ;\
|
|
and w0, w0, #(0x7fff) /* 32KB pages */;\
|
|
ldop w0, [x4, x0] /* load actual value */;\
|
|
ret ;\
|
|
;\
|
|
ld_slow_##load_type: /* Slow C path */;\
|
|
str w1, [reg_base, #REG_PC] /* write out PC */;\
|
|
str lr, [reg_base, #REG_SAVE] /* Save LR */;\
|
|
store_registers() ;\
|
|
bl ldfn ;\
|
|
ldr lr, [reg_base, #REG_SAVE] ;\
|
|
load_registers() ;\
|
|
ret ;\
|
|
.size execute_load_##load_type, .-execute_load_##load_type
|
|
|
|
#define load_lookup_table(load_type, aload_type) ;\
|
|
.quad ld_slow_##aload_type /* -1: Unaligned/Bad access */;\
|
|
.quad ld_bios_##aload_type /* 0x00: BIOS */;\
|
|
.quad ld_slow_##aload_type /* 0x01: Open bus */;\
|
|
.quad ld_ewram_##load_type /* 0x02: ewram */;\
|
|
.quad ld_iwram_##load_type /* 0x03: iwram */;\
|
|
.quad ld_ioram_##load_type /* 0x04: I/O regs */;\
|
|
.quad ld_palram_##load_type /* 0x05: palette RAM */;\
|
|
.quad ld_rdmap_##load_type /* 0x06: vram */;\
|
|
.quad ld_oamram_##load_type /* 0x07: oam ram */;\
|
|
.quad ld_rdmap_##load_type /* 0x08: gamepak: ignore */;\
|
|
.quad ld_rdmap_##load_type /* 0x09: gamepak: ignore */;\
|
|
.quad ld_rdmap_##load_type /* 0x0A: gamepak: ignore */;\
|
|
.quad ld_rdmap_##load_type /* 0x0B: gamepak: ignore */;\
|
|
.quad ld_rdmap_##load_type /* 0x0C: gamepak: ignore */;\
|
|
.quad ld_slow_##aload_type /* 0x0D: EEPROM */;\
|
|
.quad ld_slow_##aload_type /* 0x0E: backup */;\
|
|
.quad ld_slow_##aload_type /* 0x0F: ignore */;\
|
|
|
|
// Aligned load is a bit special
|
|
defsymbl(execute_aligned_load32)
|
|
tst w0, #(0xf0000000)
|
|
lsr w3, w0, #24
|
|
csinc w3, wzr, w3, ne
|
|
add x4, reg_base, (MEM_TBL_OFF + 5*136)
|
|
ldr x3, [x4, x3, lsl #3]
|
|
br x3
|
|
ld_slow_aligned_u32: // Slow C path for multiple loads
|
|
str lr, [reg_base, #REG_SAVE] // Save LR
|
|
store_registers()
|
|
bl read_memory32
|
|
ldr lr, [reg_base, #REG_SAVE]
|
|
load_registers()
|
|
ret
|
|
ld_bios_aligned_u32:
|
|
and w0, w0, #(0x7fff) // Do not verify PC on purpose
|
|
add x3, reg_base, #(RDMAP_OFF)
|
|
ldr x3, [x3]
|
|
ldr w0, [x3, x0]
|
|
ret
|
|
|
|
|
|
execute_load_builder( u8, ldrb, 0, 0, read_memory8)
|
|
execute_load_builder( s8, ldrsb, 0, 1, read_memory8s)
|
|
execute_load_builder(u16, ldrh, 1, 2, read_memory16)
|
|
execute_load_builder(s16, ldrsh, 1, 3, read_memory16s)
|
|
execute_load_builder(u32, ldr, 3, 4, read_memory32)
|
|
|
|
|
|
// Prepares for a external store (calls C code)
|
|
#define store_align_8() and w1, w1, #0xff
|
|
#define store_align_16() and w1, w1, #0xffff; bic w0, w0, #1
|
|
#define store_align_32() bic w0, w0, #3
|
|
|
|
// Write out to memory.
|
|
|
|
// Input:
|
|
// w0: address
|
|
// w1: value
|
|
// w2: PC value
|
|
|
|
#define execute_store_builder(store_type, str_op, str_op16, load_op, \
|
|
stmask, stmask16, tblidx) ;\
|
|
;\
|
|
defsymbl(execute_store_u##store_type) ;\
|
|
lsr w4, w0, #28 ;\
|
|
lsr w3, w0, #24 ;\
|
|
cbnz w4, ext_store_u##store_type ;\
|
|
add x4, reg_base, (MEM_TBL_OFF + 816 + tblidx*128) ;\
|
|
ldr x3, [x4, x3, lsl #3] ;\
|
|
br x3 ;\
|
|
;\
|
|
ext_store_u##store_type: ;\
|
|
ext_store_u##store_type##_safe: ;\
|
|
str w2, [reg_base, #REG_PC] /* write out PC */;\
|
|
str lr, [reg_base, #REG_SAVE] /* Preserve LR */;\
|
|
store_align_##store_type() ;\
|
|
store_registers() ;\
|
|
bl write_memory##store_type ;\
|
|
ldr lr, [reg_base, #REG_SAVE] ;\
|
|
load_registers() ;\
|
|
ret /* resume if no side effects */;\
|
|
;\
|
|
ext_store_iwram_u##store_type: ;\
|
|
and w0, w0, #(0x7fff & ~stmask) /* Mask to mirror memory (+align)*/;\
|
|
add x3, reg_base, #(IWRAM_OFF+0x8000) /* x3 = iwram base */;\
|
|
str_op w1, [x0, x3] /* store data */;\
|
|
sub x3, x3, #0x8000 /* x3 = iwram smc base */;\
|
|
load_op w1, [x0, x3] /* w1 = SMC sentinel */;\
|
|
cbnz w1, 3f /* Check value, should be zero */;\
|
|
ret /* return */;\
|
|
;\
|
|
ext_store_ewram_u##store_type: ;\
|
|
and w0, w0, #(0x3ffff & ~stmask) /* Mask to mirror memory (+align)*/;\
|
|
add x3, reg_base, #EWRAM_OFF /* x3 = ewram base */;\
|
|
str_op w1, [x0, x3] /* store data */;\
|
|
add x3, x3, #0x40000 /* x3 = ewram smc base */;\
|
|
load_op w1, [x0, x3] /* w1 = SMC sentinel */;\
|
|
cbnz w1, 3f /* Check value, should be zero */;\
|
|
ret /* return */;\
|
|
;\
|
|
ext_store_vram_u##store_type: ;\
|
|
ext_store_vram_u##store_type##_safe: ;\
|
|
and w0, w0, #(0x1ffff & ~stmask16) /* Mask to mirror memory (+align)*/;\
|
|
sub w3, w0, #0x8000 /* Mirrored addr for last bank */;\
|
|
cmp w0, #0x18000 /* Check if exceeds 96KB */;\
|
|
csel w0, w3, w0, cs /* If it does, pick the mirror */;\
|
|
add x3, reg_base, #VRAM_OFF /* x3 = ewram base */;\
|
|
str_op16 w1, [x0, x3] /* store data */;\
|
|
ret /* return */;\
|
|
;\
|
|
ext_store_oam_ram_u##store_type: ;\
|
|
ext_store_oam_ram_u##store_type##_safe: ;\
|
|
and w0, w0, #(0x3ff & ~stmask16) /* Mask to mirror memory (+align)*/;\
|
|
add x3, reg_base, #OAM_RAM_OFF /* x3 = oam ram base */;\
|
|
str_op16 w1, [x0, x3] /* store data */;\
|
|
str w29, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\
|
|
ret /* return */;\
|
|
;\
|
|
ext_store_ioreg_u##store_type: ;\
|
|
str w2, [reg_base, #REG_PC] /* write out PC */;\
|
|
str lr, [reg_base, #REG_SAVE] /* Preserve LR */;\
|
|
and w0, w0, #(0x3ff & ~stmask) ;\
|
|
store_registers() ;\
|
|
bl write_io_register##store_type ;\
|
|
cbnz w0, write_epilogue /* handle additional write stuff */;\
|
|
ldr lr, [reg_base, #REG_SAVE] ;\
|
|
load_registers() ;\
|
|
ret /* resume if no side effects */;\
|
|
;\
|
|
3: /* SMC write (iwram/ewram) */ ;\
|
|
str w2, [reg_base, #REG_PC] /* write out PC */;\
|
|
store_registers() /* store registers */;\
|
|
consolidate_flags(w1) ;\
|
|
bl flush_translation_cache_ram ;\
|
|
ldr w0, [reg_base, #REG_PC] /* load "current new" PC */;\
|
|
b lookup_pc /* continue execution */;\
|
|
.size execute_store_u##store_type, .-execute_store_u##store_type
|
|
|
|
// for ignored areas, just return
|
|
ext_store_ignore:
|
|
ret // return
|
|
|
|
#define store_lookup_table(store_type) ;\
|
|
.quad ext_store_ignore /* 0x00: BIOS, ignore */;\
|
|
.quad ext_store_ignore /* 0x01: ignore */;\
|
|
.quad ext_store_ewram_u##store_type /* 0x02: ewram */;\
|
|
.quad ext_store_iwram_u##store_type /* 0x03: iwram */;\
|
|
.quad ext_store_ioreg_u##store_type /* 0x04: I/O regs */;\
|
|
.quad ext_store_palette_u##store_type /* 0x05: palette RAM */;\
|
|
.quad ext_store_vram_u##store_type /* 0x06: vram */;\
|
|
.quad ext_store_oam_ram_u##store_type /* 0x07: oam ram */;\
|
|
.quad ext_store_u##store_type /* 0x08: gamepak: ignore */;\
|
|
.quad ext_store_u##store_type /* 0x09: gamepak: ignore */;\
|
|
.quad ext_store_u##store_type /* 0x0A: gamepak: ignore */;\
|
|
.quad ext_store_u##store_type /* 0x0B: gamepak: ignore */;\
|
|
.quad ext_store_u##store_type /* 0x0C: gamepak: ignore */;\
|
|
.quad ext_store_u##store_type /* 0x0D: EEPROM */;\
|
|
.quad ext_store_u##store_type /* 0x0E: backup */;\
|
|
.quad ext_store_ignore /* 0x0F: ignore */;\
|
|
|
|
execute_store_builder(8, strb, strh, ldrb, 0, 1, 0)
|
|
execute_store_builder(16, strh, strh, ldrh, 1, 1, 1)
|
|
execute_store_builder(32, str, str, ldr, 3, 3, 2)
|
|
|
|
// Palette writes are special since they are converted on the fly for speed
|
|
|
|
ext_store_palette_u8:
|
|
bfi w1, w1, #8, #24 // Duplicate the byte
|
|
ext_store_palette_u16:
|
|
and w0, w0, #(0x3fe)
|
|
add x3, reg_base, #(PAL_RAM_OFF)
|
|
strh w1, [x3, x0]
|
|
|
|
ubfx w2, w1, #10, #5 // Extract blue to red
|
|
bfi w2, w1, #11, #5 // Move red to blue
|
|
and w1, w1, #0x03E0 // Extract green component
|
|
orr w1, w2, w1, lsl #1 // Merge the three components
|
|
|
|
add x3, reg_base, #(PALCNV_RAM_OFF)
|
|
strh w1, [x3, x0]
|
|
ret
|
|
|
|
ext_store_palette_u32_safe:
|
|
ext_store_palette_u32:
|
|
and w0, w0, #(0x3fc)
|
|
add x3, reg_base, #(PAL_RAM_OFF)
|
|
str w1, [x3, x0]
|
|
|
|
and w2, w1, #0x7C007C00 // Get blue components
|
|
and w3, w1, #0x001F001F // Get red components
|
|
lsr w2, w2, #10 // Place blue in the final register
|
|
orr w2, w2, w3, lsl #11 // Merge red
|
|
and w3, w1, #0x03E003E0 // Get green component
|
|
orr w1, w2, w3, lsl #1 // Merge green
|
|
|
|
add x3, reg_base, #(PALCNV_RAM_OFF)
|
|
str w1, [x3, x0]
|
|
ret
|
|
|
|
// This is a store that is executed in a strm case (so no SMC checks in-between)
|
|
|
|
defsymbl(execute_aligned_store32)
|
|
lsr w4, w0, #28
|
|
lsr w3, w0, #24
|
|
cbnz w4, ext_store_u32
|
|
add x4, reg_base, MEM_TBL_OFF + 816 + 3*128
|
|
ldr x3, [x4, x3, lsl #3]
|
|
br x3
|
|
ext_store_iwram_u32_safe:
|
|
and w0, w0, #(0x7fff) // Mask to mirror memory (no need to align!)
|
|
add x3, reg_base, #(IWRAM_OFF+0x8000) // x3 = iwram base
|
|
str w1, [x0, x3] // store data
|
|
ret // Return
|
|
ext_store_ewram_u32_safe:
|
|
and w0, w0, #(0x3ffff) // Mask to mirror memory (no need to align!)
|
|
add x3, reg_base, #(EWRAM_OFF) // x3 = ewram base
|
|
str w1, [x0, x3] // store data
|
|
ret // Return
|
|
ext_store_ioreg_u32_safe:
|
|
str lr, [reg_base, #REG_SAVE]
|
|
and w0, w0, #(0x3fc)
|
|
store_registers()
|
|
bl write_io_register32
|
|
ldr lr, [reg_base, #REG_SAVE]
|
|
load_registers()
|
|
ret
|
|
.size execute_aligned_store32, .-execute_aligned_store32
|
|
|
|
// This is called whenever an external store with side effects was performed
|
|
write_epilogue:
|
|
mov reg_save0, w0 // Save reg for later
|
|
consolidate_flags(w1) // Update CPSR for IRQ/
|
|
tbz w0, #CPU_ALERT_SMC_B, 1f // Skip if SMC did not happen
|
|
bl flush_translation_cache_ram // Flush RAM if bit is set
|
|
|
|
1:
|
|
tbz reg_save0, #CPU_ALERT_IRQ_B, 2f // Skip if IRQ did not happen
|
|
bl check_and_raise_interrupts
|
|
|
|
2:
|
|
ldr w0, [reg_base, #REG_PC] // load new PC
|
|
tbz reg_save0, #CPU_ALERT_HALT_B, lookup_pc // Resume execution if running
|
|
|
|
// explicit fallthrough to alert_loop, while CPU is halted
|
|
|
|
alert_loop:
|
|
mov w0, reg_cycles // load remaining cycles
|
|
bl update_gba // update GBA until CPU isn't halted
|
|
mov reg_cycles, w0 // load new cycle count
|
|
|
|
ldr w1, [reg_base, #COMPLETED_FRAME] // Check whether a frame was completed
|
|
cbnz w1, return_to_main // and return to caller function.
|
|
|
|
ldr w1, [reg_base, #CPU_HALT_STATE] // Check whether the CPU is halted
|
|
cbnz w1, alert_loop // and keep looping until it is
|
|
|
|
ldr w0, [reg_base, #REG_PC] // load new PC
|
|
b lookup_pc // Resume execution at that PC
|
|
|
|
|
|
.data
|
|
.align 4
|
|
defsymbl(ldst_handler_functions)
|
|
load_lookup_table(u8, u8)
|
|
load_lookup_table(s8, s8)
|
|
load_lookup_table(u16, u16)
|
|
load_lookup_table(s16, s16)
|
|
load_lookup_table(u32, u32)
|
|
load_lookup_table(u32, aligned_u32)
|
|
store_lookup_table(8)
|
|
store_lookup_table(16)
|
|
store_lookup_table(32)
|
|
store_lookup_table(32_safe)
|
|
|
|
.bss
|
|
.align 4
|
|
|
|
defsymbl(memory_map_read)
|
|
.space 0x10000
|
|
defsymbl(iwram)
|
|
.space 0x10000
|
|
defsymbl(vram)
|
|
.space 0x18000
|
|
defsymbl(ewram)
|
|
.space 0x80000
|
|
defsymbl(ldst_lookup_tables)
|
|
.space 4096
|
|
defsymbl(reg)
|
|
.space 0x100
|
|
defsymbl(spsr)
|
|
.space 24
|
|
defsymbl(reg_mode)
|
|
.space 196
|
|
.space 36 // Padding
|
|
defsymbl(oam_ram)
|
|
.space 0x400
|
|
defsymbl(palette_ram)
|
|
.space 0x400
|
|
defsymbl(io_registers)
|
|
.space 0x400
|
|
defsymbl(palette_ram_converted)
|
|
.space 0x400
|
|
|
|
|