Forgot to add byte-doubling codepath for byte writes. This is correctly implemented in x86 and MIPS.
		
			
				
	
	
		
			771 lines
		
	
	
	
		
			33 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			771 lines
		
	
	
	
		
			33 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
# gameplaySP
 | 
						|
#
 | 
						|
# Copyright (C) 2021 David Guillen Fandos <david@davidgf.net>
 | 
						|
#
 | 
						|
# This program is free software; you can redistribute it and/or
 | 
						|
# modify it under the terms of the GNU General Public License as
 | 
						|
# published by the Free Software Foundation; either version 2 of
 | 
						|
# the License, or (at your option) any later version.
 | 
						|
#
 | 
						|
# This program is distributed in the hope that it will be useful,
 | 
						|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
# General Public License for more details.
 | 
						|
#
 | 
						|
# You should have received a copy of the GNU General Public License
 | 
						|
# along with this program; if not, write to the Free Software
 | 
						|
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 | 
						|
 | 
						|
 | 
						|
#include "../gpsp_config.h"
 | 
						|
 | 
						|
#define defsymbl(symbol) \
 | 
						|
.align 2;                \
 | 
						|
.type symbol, %function ;\
 | 
						|
.global symbol ;         \
 | 
						|
.global _##symbol ;      \
 | 
						|
symbol:                  \
 | 
						|
_##symbol:
 | 
						|
 | 
						|
.text
 | 
						|
.align 2
 | 
						|
 | 
						|
#define REG_R0            (0 * 4)
 | 
						|
#define REG_R1            (1 * 4)
 | 
						|
#define REG_R2            (2 * 4)
 | 
						|
#define REG_R3            (3 * 4)
 | 
						|
#define REG_R4            (4 * 4)
 | 
						|
#define REG_R5            (5 * 4)
 | 
						|
#define REG_R6            (6 * 4)
 | 
						|
#define REG_R7            (7 * 4)
 | 
						|
#define REG_R8            (8 * 4)
 | 
						|
#define REG_R9            (9 * 4)
 | 
						|
#define REG_R10           (10 * 4)
 | 
						|
#define REG_R11           (11 * 4)
 | 
						|
#define REG_R12           (12 * 4)
 | 
						|
#define REG_R13           (13 * 4)
 | 
						|
#define REG_R14           (14 * 4)
 | 
						|
#define REG_SP            (13 * 4)
 | 
						|
#define REG_LR            (14 * 4)
 | 
						|
#define REG_PC            (15 * 4)
 | 
						|
#define REG_CPSR          (16 * 4)
 | 
						|
#define CPU_MODE          (17 * 4)
 | 
						|
#define CPU_HALT_STATE    (18 * 4)
 | 
						|
 | 
						|
#define REG_BUS_VALUE     (19 * 4)
 | 
						|
#define REG_N_FLAG        (20 * 4)
 | 
						|
#define REG_Z_FLAG        (21 * 4)
 | 
						|
#define REG_C_FLAG        (22 * 4)
 | 
						|
#define REG_V_FLAG        (23 * 4)
 | 
						|
#define COMPLETED_FRAME   (24 * 4)
 | 
						|
#define OAM_UPDATED       (25 * 4)
 | 
						|
#define REG_SAVE          (26 * 4)
 | 
						|
 | 
						|
#define CPU_ALERT_HALT_B        0
 | 
						|
#define CPU_ALERT_SMC_B         1
 | 
						|
#define CPU_ALERT_IRQ_B         2
 | 
						|
 | 
						|
#define reg_base          x20
 | 
						|
#define reg_cycles        w21
 | 
						|
 | 
						|
#define reg_c_flag        w22
 | 
						|
#define reg_v_flag        w23
 | 
						|
#define reg_z_flag        w24
 | 
						|
#define reg_n_flag        w25
 | 
						|
#define reg_save0         w19
 | 
						|
 | 
						|
 | 
						|
// Memory offsets from reg_base to the different buffers
 | 
						|
#define RDMAP_OFF      -0xB9000   // 8K pointers (64KB)
 | 
						|
#define IWRAM_OFF      -0xA9000   // 32KB (double for shadow)
 | 
						|
#define VRAM_OFF       -0x99000   // 96KB
 | 
						|
#define EWRAM_OFF      -0x81000   // 256KB (double for shadow)
 | 
						|
#define MEM_TBL_OFF     -0x1000   // Some space for the tables
 | 
						|
#define SPSR_RAM_OFF      0x100
 | 
						|
#define REGMODE_RAM_OFF   0x118
 | 
						|
#define OAM_RAM_OFF       0x200
 | 
						|
#define PAL_RAM_OFF       0x600
 | 
						|
#define IOREG_OFF         0xA00
 | 
						|
#define PALCNV_RAM_OFF    0xE00
 | 
						|
 | 
						|
// Used for SWI handling
 | 
						|
#define MODE_SUPERVISOR       0x13
 | 
						|
#define SUPERVISOR_SPSR      (SPSR_RAM_OFF + 3*4)  // spsr[3]
 | 
						|
#define SUPERVISOR_LR        (REGMODE_RAM_OFF + (3 * (7 * 4)) + (6 * 4))  // reg_mode[3][6]
 | 
						|
 | 
						|
 | 
						|
// Stores and restores registers to their register storage in RAM
 | 
						|
 | 
						|
#define load_registers()                                                     ;\
 | 
						|
  ldp  w6,  w7, [reg_base,  #0]                                              ;\
 | 
						|
  ldp  w8,  w9, [reg_base,  #8]                                              ;\
 | 
						|
  ldp w10, w11, [reg_base, #16]                                              ;\
 | 
						|
  ldp w12, w13, [reg_base, #24]                                              ;\
 | 
						|
  ldp w14, w15, [reg_base, #32]                                              ;\
 | 
						|
  ldp w16, w17, [reg_base, #40]                                              ;\
 | 
						|
  ldp w26, w27, [reg_base, #48]                                              ;\
 | 
						|
  ldr w28,      [reg_base, #56]                                              ;\
 | 
						|
 | 
						|
#define store_registers()                                                    ;\
 | 
						|
  stp  w6,  w7, [reg_base,  #0]                                              ;\
 | 
						|
  stp  w8,  w9, [reg_base,  #8]                                              ;\
 | 
						|
  stp w10, w11, [reg_base, #16]                                              ;\
 | 
						|
  stp w12, w13, [reg_base, #24]                                              ;\
 | 
						|
  stp w14, w15, [reg_base, #32]                                              ;\
 | 
						|
  stp w16, w17, [reg_base, #40]                                              ;\
 | 
						|
  stp w26, w27, [reg_base, #48]                                              ;\
 | 
						|
  str w28,      [reg_base, #56]                                              ;\
 | 
						|
 | 
						|
 | 
						|
// Extracts flags from CPSR into the cache flag registers
 | 
						|
 | 
						|
#define extract_flags_reg(tmpreg)                                            ;\
 | 
						|
  ubfx reg_n_flag, tmpreg, #31, #1                                           ;\
 | 
						|
  ubfx reg_z_flag, tmpreg, #30, #1                                           ;\
 | 
						|
  ubfx reg_c_flag, tmpreg, #29, #1                                           ;\
 | 
						|
  ubfx reg_v_flag, tmpreg, #28, #1                                           ;\
 | 
						|
 | 
						|
#define extract_flags(tmpreg)                                                ;\
 | 
						|
  ldr tmpreg, [reg_base, #REG_CPSR]                                          ;\
 | 
						|
  extract_flags_reg(tmpreg)                                                  ;\
 | 
						|
 | 
						|
// Collects cache flag bits and consolidates them to the CPSR reg
 | 
						|
 | 
						|
#define consolidate_flags(tmpreg)                                            ;\
 | 
						|
  ldr tmpreg, [reg_base, #REG_CPSR]                                          ;\
 | 
						|
  bfi tmpreg, reg_n_flag, #31, #1                                            ;\
 | 
						|
  bfi tmpreg, reg_z_flag, #30, #1                                            ;\
 | 
						|
  bfi tmpreg, reg_c_flag, #29, #1                                            ;\
 | 
						|
  bfi tmpreg, reg_v_flag, #28, #1                                            ;\
 | 
						|
  str tmpreg, [reg_base, #REG_CPSR]                                          ;\
 | 
						|
 | 
						|
 | 
						|
// Update the GBA hardware (video, sound, input, etc)
 | 
						|
// w0: current PC
 | 
						|
 | 
						|
defsymbl(a64_update_gba)
 | 
						|
  str w0, [reg_base, #REG_PC]             // update the PC value
 | 
						|
  str lr, [reg_base, #REG_SAVE]           // Save LR for later if needed
 | 
						|
 | 
						|
  consolidate_flags(w0)                   // update the CPSR
 | 
						|
  store_registers()                       // save out registers
 | 
						|
 | 
						|
  mov w0, reg_cycles                      // load remaining cycles
 | 
						|
  bl update_gba                           // update GBA state
 | 
						|
 | 
						|
  ldr w1, [reg_base, #COMPLETED_FRAME]    // return to main if new frame
 | 
						|
  cbnz w1, return_to_main
 | 
						|
 | 
						|
  // Resume execution (perhaps from a new PC)
 | 
						|
  and reg_cycles, w0, 0x7fff              // load new cycle count
 | 
						|
  extract_flags(w2)                       // reload flag cache bits
 | 
						|
 | 
						|
  tbnz w0, #31, 1f                        // check if PC changed
 | 
						|
 | 
						|
  ldr lr, [reg_base, #REG_SAVE]           // Restore return point
 | 
						|
  load_registers()                        // reload registers
 | 
						|
  ret                                     // resume execution, no PC change
 | 
						|
 | 
						|
1:  // Resume from new PC
 | 
						|
  ldr w0, [reg_base, #REG_PC]             // load new PC
 | 
						|
  tbnz w2, #5, 2f                         // CPSR.T means in thumb mode
 | 
						|
 | 
						|
  bl block_lookup_address_arm
 | 
						|
  load_registers()                        // reload registers
 | 
						|
  br x0                                   // jump to new ARM block
 | 
						|
2:
 | 
						|
  bl block_lookup_address_thumb
 | 
						|
  load_registers()                        // reload registers
 | 
						|
  br x0                                   // jump to new Thumb block
 | 
						|
.size a64_update_gba, .-a64_update_gba
 | 
						|
 | 
						|
 | 
						|
// Cheat hooks for master function
 | 
						|
// This is called whenever PC == cheats-master-function
 | 
						|
// Just calls the C function to process cheats
 | 
						|
 | 
						|
defsymbl(a64_cheat_hook)
 | 
						|
  store_registers()
 | 
						|
  str lr, [reg_base, #REG_SAVE]
 | 
						|
  bl process_cheats
 | 
						|
  ldr lr, [reg_base, #REG_SAVE]
 | 
						|
  load_registers()
 | 
						|
  ret
 | 
						|
 | 
						|
 | 
						|
// These are b stubs for performing indirect branches. They are not
 | 
						|
// linked to and don't return, instead they link elsewhere.
 | 
						|
 | 
						|
// Input:
 | 
						|
// r0: PC to branch to
 | 
						|
 | 
						|
defsymbl(a64_indirect_branch_arm)
 | 
						|
  store_registers()
 | 
						|
  bl block_lookup_address_arm
 | 
						|
  load_registers()
 | 
						|
  br x0
 | 
						|
 | 
						|
defsymbl(a64_indirect_branch_thumb)
 | 
						|
  store_registers()
 | 
						|
  bl block_lookup_address_thumb
 | 
						|
  load_registers()
 | 
						|
  br x0
 | 
						|
 | 
						|
defsymbl(a64_indirect_branch_dual)
 | 
						|
  store_registers()
 | 
						|
  bl block_lookup_address_dual
 | 
						|
  load_registers()
 | 
						|
  br x0
 | 
						|
 | 
						|
 | 
						|
// Read CPSR and SPSR values
 | 
						|
 | 
						|
defsymbl(execute_read_cpsr)
 | 
						|
  consolidate_flags(w0)                   // Consolidate on ret value
 | 
						|
  ret
 | 
						|
 | 
						|
defsymbl(execute_read_spsr)
 | 
						|
  ldr w1, [reg_base, #CPU_MODE]           // read cpu mode to w1
 | 
						|
  and w1, w1, #0xF                        // Like REG_SPSR() macro
 | 
						|
  add x0, reg_base, #SPSR_RAM_OFF         // ptr to spsr table
 | 
						|
  ldr w0, [x0, x1, lsl #2]                // Read actual value from table
 | 
						|
  ret
 | 
						|
 | 
						|
 | 
						|
// Update the cpsr.
 | 
						|
 | 
						|
// Input:
 | 
						|
// w0: new cpsr value
 | 
						|
// w1: current PC
 | 
						|
// w2: store bitmask (user-mode)
 | 
						|
// w3: store bitmask (privileged mode)
 | 
						|
 | 
						|
defsymbl(execute_store_cpsr)
 | 
						|
  ldr w4, [reg_base, #CPU_MODE]           // w4 = cpu_mode
 | 
						|
  tst x4, #0x10                           // Bit 4 is set on privileged modes
 | 
						|
  csel x2, x2, x3, eq                     // Select the correct mask
 | 
						|
 | 
						|
  ldr w4, [reg_base, #REG_CPSR]           // read current CPSR
 | 
						|
  and w3, w0, w2                          // reg_flags = new_cpsr & store_mask
 | 
						|
  bic w4, w4, w2                          // current_cpsr & ~store_mask
 | 
						|
  orr w0, w3, w4                          // w2 = final CPSR value
 | 
						|
  extract_flags_reg(w0)                   // Update cached flags too
 | 
						|
 | 
						|
  str lr, [reg_base, #REG_SAVE]
 | 
						|
  store_registers()
 | 
						|
  bl execute_store_cpsr_body              // Do the remaining work in C mode
 | 
						|
 | 
						|
  cbnz w0, 1f                             // If PC has changed due to this
 | 
						|
 | 
						|
  ldr lr, [reg_base, #REG_SAVE]           // Resume execution where we left it
 | 
						|
  load_registers()
 | 
						|
  ret
 | 
						|
 | 
						|
1:
 | 
						|
  // Returned value contains the PC, resume execution there
 | 
						|
  bl block_lookup_address_arm
 | 
						|
  load_registers()
 | 
						|
  br x0                                   // Resume in the returned block
 | 
						|
.size execute_store_cpsr, .-execute_store_cpsr
 | 
						|
 | 
						|
 | 
						|
// Write to SPSR
 | 
						|
// w0: new SPSR value
 | 
						|
// w1: store mask
 | 
						|
 | 
						|
defsymbl(execute_store_spsr)
 | 
						|
  ldr w2, [reg_base, #CPU_MODE]           // read cpu mode to w1
 | 
						|
  and w2, w2, #0xF                        // Like REG_SPSR() macro
 | 
						|
  add x2, reg_base, x2, lsl #2            // calculate table offset
 | 
						|
  ldr w3, [x2, #SPSR_RAM_OFF]             // Read actual value from trable
 | 
						|
 | 
						|
  and w0, w0, w1                          // new-spsr & mask
 | 
						|
  bic w3, w3, w1                          // old-spsr & ~mask
 | 
						|
  orr w0, w0, w3                          // final spsr value
 | 
						|
 | 
						|
  str w0, [x2, #SPSR_RAM_OFF]             // Store new SPSR
 | 
						|
  ret
 | 
						|
.size execute_store_spsr, .-execute_store_spsr
 | 
						|
 | 
						|
// Restore the cpsr from the mode spsr and mode shift.
 | 
						|
 | 
						|
// Input:
 | 
						|
// r0: current pc
 | 
						|
 | 
						|
defsymbl(execute_spsr_restore)
 | 
						|
  ldr w1, [reg_base, #CPU_MODE]           // w1 = cpu_mode
 | 
						|
  and w1, w1, 0xF                         // Fold user and system modes
 | 
						|
  cbz w1, 1f                              // Ignore if in user or system mode
 | 
						|
 | 
						|
  lsl w2, w1, #2                          // We access 32 bit words
 | 
						|
  add w2, w2, #SPSR_RAM_OFF
 | 
						|
  ldr w3, [reg_base, x2]                  // w3 = spsr[cpu_mode]
 | 
						|
  str w3, [reg_base, #REG_CPSR]           // update CPSR with SPSR value
 | 
						|
  extract_flags_reg(w3)                   // update cached flag values
 | 
						|
 | 
						|
  // This function call will pass r0 (address) and return it.
 | 
						|
  str lr, [reg_base, #REG_SAVE]
 | 
						|
  store_registers()                       // save ARM registers
 | 
						|
  bl execute_spsr_restore_body
 | 
						|
  ldr lr, [reg_base, #REG_SAVE]
 | 
						|
  load_registers()
 | 
						|
 | 
						|
1:
 | 
						|
  ret
 | 
						|
.size execute_spsr_restore, .-execute_spsr_restore
 | 
						|
 | 
						|
 | 
						|
// Setup the mode transition work for calling an SWI.
 | 
						|
 | 
						|
// Input:
 | 
						|
// r0: current pc
 | 
						|
 | 
						|
defsymbl(execute_swi)
 | 
						|
  str lr, [reg_base, #REG_SAVE]
 | 
						|
  str w0, [reg_base, #SUPERVISOR_LR]      // Store next PC into supervisor LR
 | 
						|
  consolidate_flags(w1)                   // Calculate current CPSR flags
 | 
						|
  str w1, [reg_base, #SUPERVISOR_SPSR]    // Store them in the SPSR  
 | 
						|
  bic w1, w1, #0x3F                       // Clear mode bits
 | 
						|
  mov w2, #(0x13 | 0x80)                  // Set supervisor mode bits
 | 
						|
  orr w1, w1, w2
 | 
						|
  str w1, [reg_base, #REG_CPSR]           // Update CPSR with new value
 | 
						|
  store_registers()
 | 
						|
  mov w0, #MODE_SUPERVISOR
 | 
						|
  bl set_cpu_mode                         // Set supervisor mode
 | 
						|
  ldr w0, =0xe3a02004
 | 
						|
  str w0, [reg_base, REG_BUS_VALUE]
 | 
						|
  ldr lr, [reg_base, #REG_SAVE]
 | 
						|
  load_registers()
 | 
						|
  ret
 | 
						|
.size execute_swi, .-execute_swi
 | 
						|
 | 
						|
defsymbl(execute_arm_translate_internal)
 | 
						|
  // save registers that will be clobbered
 | 
						|
  sub sp, sp, #96
 | 
						|
  stp x19, x20, [sp,  #0]
 | 
						|
  stp x21, x22, [sp, #16]
 | 
						|
  stp x23, x24, [sp, #32]
 | 
						|
  stp x25, x26, [sp, #48]
 | 
						|
  stp x27, x28, [sp, #64]
 | 
						|
  stp x29, x30, [sp, #80]
 | 
						|
  
 | 
						|
  mov reg_cycles, w0                      // load cycle counter
 | 
						|
  mov reg_base, x1                        // init base_reg
 | 
						|
 | 
						|
  // Check whether the CPU is sleeping already, we should just wait for IRQs
 | 
						|
  ldr w1, [reg_base, #CPU_HALT_STATE]
 | 
						|
  cbnz w1, alert_loop
 | 
						|
 | 
						|
  ldr w0, [reg_base, #REG_PC]             // load current PC
 | 
						|
 | 
						|
// Resume execution at PC (at w0)
 | 
						|
lookup_pc:
 | 
						|
  ldr w1, [reg_base, #REG_CPSR]           // w1 = flags
 | 
						|
  extract_flags_reg(w1)
 | 
						|
  tbnz w1, #5, 2f                         // see if Thumb bit is set
 | 
						|
 | 
						|
  // Lookup and jump to the right mode block
 | 
						|
  bl block_lookup_address_arm
 | 
						|
  load_registers()
 | 
						|
  br x0
 | 
						|
2:
 | 
						|
  bl block_lookup_address_thumb
 | 
						|
  load_registers()
 | 
						|
  br x0
 | 
						|
 | 
						|
// Epilogue to return to the main thread (whatever called execute_arm_translate)
 | 
						|
 | 
						|
return_to_main:
 | 
						|
  // restore the saved regs and return
 | 
						|
  ldp x19, x20, [sp,  #0]
 | 
						|
  ldp x21, x22, [sp, #16]
 | 
						|
  ldp x23, x24, [sp, #32]
 | 
						|
  ldp x25, x26, [sp, #48]
 | 
						|
  ldp x27, x28, [sp, #64]
 | 
						|
  ldp x29, x30, [sp, #80]
 | 
						|
  add sp, sp, #96
 | 
						|
  ret
 | 
						|
 | 
						|
 | 
						|
// Memory read stub routines
 | 
						|
 | 
						|
#define execute_load_builder(load_type, ldop, ldmask, tblidx, ldfn)          ;\
 | 
						|
                                                                             ;\
 | 
						|
defsymbl(execute_load_##load_type)                                           ;\
 | 
						|
  tst w0, #(0xf0000000 | ldmask)                                             ;\
 | 
						|
  lsr w3, w0, #24                                                            ;\
 | 
						|
  csinc	w3, wzr, w3, ne                                                      ;\
 | 
						|
  add x4, reg_base, (MEM_TBL_OFF + tblidx*136)                               ;\
 | 
						|
  ldr x3, [x4, x3, lsl #3]                                                   ;\
 | 
						|
  br x3                                                                      ;\
 | 
						|
                                                                             ;\
 | 
						|
ld_bios_##load_type:                      /* BIOS area, need to verify PC  */;\
 | 
						|
  lsr w3, w1, #24                         /* Are we running the BIOS       */;\
 | 
						|
  cbnz w3, ld_slow_##load_type                                               ;\
 | 
						|
  and w0, w0, #(0x7fff)                   /* BIOS only 16 KB               */;\
 | 
						|
  add x3, reg_base, #(RDMAP_OFF)                                             ;\
 | 
						|
  ldr x3, [x3]                            /* x3 = bios mem buffer          */;\
 | 
						|
  ldop w0, [x3, x0]                       /* load actual value             */;\
 | 
						|
  ret                                                                        ;\
 | 
						|
                                                                             ;\
 | 
						|
ld_ewram_##load_type:                     /* EWRAM area                    */;\
 | 
						|
  and w0, w0, #(0x3ffff)                                                     ;\
 | 
						|
  add x3, reg_base, #EWRAM_OFF                                               ;\
 | 
						|
  ldop w0, [x3, x0]                                                          ;\
 | 
						|
  ret                                                                        ;\
 | 
						|
                                                                             ;\
 | 
						|
ld_iwram_##load_type:                     /* IWRAM area                    */;\
 | 
						|
  and w0, w0, #(0x7fff)                                                      ;\
 | 
						|
  add x3, reg_base, #(IWRAM_OFF+0x8000)                                      ;\
 | 
						|
  ldop w0, [x3, x0]                                                          ;\
 | 
						|
  ret                                                                        ;\
 | 
						|
                                                                             ;\
 | 
						|
ld_ioram_##load_type:                     /* I/O RAM area                  */;\
 | 
						|
  and w0, w0, #(0x3ff)                                                       ;\
 | 
						|
  add x3, reg_base, #(IOREG_OFF)                                             ;\
 | 
						|
  ldop w0, [x3, x0]                                                          ;\
 | 
						|
  ret                                                                        ;\
 | 
						|
                                                                             ;\
 | 
						|
ld_palram_##load_type:                    /* PAL RAM area                  */;\
 | 
						|
  and w0, w0, #(0x3ff)                                                       ;\
 | 
						|
  add x3, reg_base, #(PAL_RAM_OFF)                                           ;\
 | 
						|
  ldop w0, [x3, x0]                                                          ;\
 | 
						|
  ret                                                                        ;\
 | 
						|
                                                                             ;\
 | 
						|
ld_oamram_##load_type:                    /* OAM RAM area                  */;\
 | 
						|
  and w0, w0, #(0x3ff)                                                       ;\
 | 
						|
  add x3, reg_base, #(OAM_RAM_OFF)                                           ;\
 | 
						|
  ldop w0, [x3, x0]                                                          ;\
 | 
						|
  ret                                                                        ;\
 | 
						|
                                                                             ;\
 | 
						|
ld_rdmap_##load_type:                                                        ;\
 | 
						|
  lsr w4, w0, #15                         /* Each block is 32KB            */;\
 | 
						|
  add x3, reg_base, #(RDMAP_OFF)                                             ;\
 | 
						|
  ldr x4, [x3, x4, lsl #3]                /* x4 = table pointer            */;\
 | 
						|
  cbz x4, ld_slow_##load_type             /* not mapped, go slow */          ;\
 | 
						|
  and w0, w0, #(0x7fff)                   /* 32KB pages                    */;\
 | 
						|
  ldop w0, [x4, x0]                       /* load actual value             */;\
 | 
						|
  ret                                                                        ;\
 | 
						|
                                                                             ;\
 | 
						|
ld_slow_##load_type:                      /* Slow C path                   */;\
 | 
						|
  str w1, [reg_base, #REG_PC]             /* write out PC                  */;\
 | 
						|
  str lr, [reg_base, #REG_SAVE]           /* Save LR                       */;\
 | 
						|
  store_registers()                                                          ;\
 | 
						|
  bl ldfn                                                                    ;\
 | 
						|
  ldr lr, [reg_base, #REG_SAVE]                                              ;\
 | 
						|
  load_registers()                                                           ;\
 | 
						|
  ret                                                                        ;\
 | 
						|
.size execute_load_##load_type, .-execute_load_##load_type
 | 
						|
 | 
						|
#define load_lookup_table(load_type, aload_type)                             ;\
 | 
						|
  .quad ld_slow_##aload_type              /* -1: Unaligned/Bad access      */;\
 | 
						|
  .quad ld_bios_##aload_type              /* 0x00: BIOS                    */;\
 | 
						|
  .quad ld_slow_##aload_type              /* 0x01: Open bus                */;\
 | 
						|
  .quad ld_ewram_##load_type              /* 0x02: ewram                   */;\
 | 
						|
  .quad ld_iwram_##load_type              /* 0x03: iwram                   */;\
 | 
						|
  .quad ld_ioram_##load_type              /* 0x04: I/O regs                */;\
 | 
						|
  .quad ld_palram_##load_type             /* 0x05: palette RAM             */;\
 | 
						|
  .quad ld_rdmap_##load_type              /* 0x06: vram                    */;\
 | 
						|
  .quad ld_oamram_##load_type             /* 0x07: oam ram                 */;\
 | 
						|
  .quad ld_rdmap_##load_type              /* 0x08: gamepak: ignore         */;\
 | 
						|
  .quad ld_rdmap_##load_type              /* 0x09: gamepak: ignore         */;\
 | 
						|
  .quad ld_rdmap_##load_type              /* 0x0A: gamepak: ignore         */;\
 | 
						|
  .quad ld_rdmap_##load_type              /* 0x0B: gamepak: ignore         */;\
 | 
						|
  .quad ld_rdmap_##load_type              /* 0x0C: gamepak: ignore         */;\
 | 
						|
  .quad ld_slow_##aload_type              /* 0x0D: EEPROM                  */;\
 | 
						|
  .quad ld_slow_##aload_type              /* 0x0E: backup                  */;\
 | 
						|
  .quad ld_slow_##aload_type              /* 0x0F: ignore                  */;\
 | 
						|
 | 
						|
// Aligned load is a bit special
 | 
						|
defsymbl(execute_aligned_load32)
 | 
						|
  tst w0, #(0xf0000000)
 | 
						|
  lsr w3, w0, #24
 | 
						|
  csinc	w3, wzr, w3, ne
 | 
						|
  add x4, reg_base, (MEM_TBL_OFF + 5*136)
 | 
						|
  ldr x3, [x4, x3, lsl #3]
 | 
						|
  br x3
 | 
						|
ld_slow_aligned_u32:                      // Slow C path for multiple loads
 | 
						|
  str lr, [reg_base, #REG_SAVE]           // Save LR
 | 
						|
  store_registers()
 | 
						|
  bl read_memory32
 | 
						|
  ldr lr, [reg_base, #REG_SAVE]
 | 
						|
  load_registers()
 | 
						|
  ret
 | 
						|
ld_bios_aligned_u32:
 | 
						|
  and w0, w0, #(0x7fff)                   // Do not verify PC on purpose
 | 
						|
  add x3, reg_base, #(RDMAP_OFF)
 | 
						|
  ldr x3, [x3]
 | 
						|
  ldr w0, [x3, x0]
 | 
						|
  ret
 | 
						|
 | 
						|
 | 
						|
execute_load_builder( u8,  ldrb, 0, 0, read_memory8)
 | 
						|
execute_load_builder( s8, ldrsb, 0, 1, read_memory8s)
 | 
						|
execute_load_builder(u16,  ldrh, 1, 2, read_memory16)
 | 
						|
execute_load_builder(s16, ldrsh, 1, 3, read_memory16s)
 | 
						|
execute_load_builder(u32,   ldr, 3, 4, read_memory32)
 | 
						|
 | 
						|
 | 
						|
// Prepares for a external store (calls C code)
 | 
						|
#define store_align_8()            and w1, w1, #0xff
 | 
						|
#define store_align_16()           and w1, w1, #0xffff; bic w0, w0, #1
 | 
						|
#define store_align_32()           bic w0, w0, #3
 | 
						|
 | 
						|
// For byte-accesses on 16 bit buses
 | 
						|
#define dup8(reg)  bfi reg, reg, #8, #24    // Duplicates byte to u16
 | 
						|
#define dup16(reg)
 | 
						|
#define dup32(reg)
 | 
						|
 | 
						|
// Write out to memory.
 | 
						|
 | 
						|
// Input:
 | 
						|
// w0: address
 | 
						|
// w1: value
 | 
						|
// w2: PC value
 | 
						|
 | 
						|
#define execute_store_builder(store_type, str_op, str_op16, load_op,          \
 | 
						|
                              stmask, stmask16, tblidx)                      ;\
 | 
						|
                                                                             ;\
 | 
						|
defsymbl(execute_store_u##store_type)                                        ;\
 | 
						|
  lsr w4, w0, #28                                                            ;\
 | 
						|
  lsr w3, w0, #24                                                            ;\
 | 
						|
  cbnz w4, ext_store_u##store_type                                           ;\
 | 
						|
  add x4, reg_base, (MEM_TBL_OFF + 816 + tblidx*128)                         ;\
 | 
						|
  ldr x3, [x4, x3, lsl #3]                                                   ;\
 | 
						|
  br x3                                                                      ;\
 | 
						|
                                                                             ;\
 | 
						|
ext_store_u##store_type:                                                     ;\
 | 
						|
ext_store_u##store_type##_safe:                                              ;\
 | 
						|
  str w2, [reg_base, #REG_PC]             /* write out PC                  */;\
 | 
						|
  str lr, [reg_base, #REG_SAVE]           /* Preserve LR                   */;\
 | 
						|
  store_align_##store_type()                                                 ;\
 | 
						|
  store_registers()                                                          ;\
 | 
						|
  bl write_memory##store_type                                                ;\
 | 
						|
  ldr lr, [reg_base, #REG_SAVE]                                              ;\
 | 
						|
  load_registers()                                                           ;\
 | 
						|
  ret                                     /* resume if no side effects     */;\
 | 
						|
                                                                             ;\
 | 
						|
ext_store_iwram_u##store_type:                                               ;\
 | 
						|
  and w0, w0, #(0x7fff & ~stmask)         /* Mask to mirror memory (+align)*/;\
 | 
						|
  add x3, reg_base, #(IWRAM_OFF+0x8000)   /* x3 = iwram base               */;\
 | 
						|
  str_op w1, [x0, x3]                     /* store data                    */;\
 | 
						|
  sub x3, x3, #0x8000                     /* x3 = iwram smc base           */;\
 | 
						|
  load_op w1, [x0, x3]                    /* w1 = SMC sentinel             */;\
 | 
						|
  cbnz w1, 3f                             /* Check value, should be zero   */;\
 | 
						|
  ret                                     /* return                        */;\
 | 
						|
                                                                             ;\
 | 
						|
ext_store_ewram_u##store_type:                                               ;\
 | 
						|
  and w0, w0, #(0x3ffff & ~stmask)        /* Mask to mirror memory (+align)*/;\
 | 
						|
  add x3, reg_base, #EWRAM_OFF            /* x3 = ewram base               */;\
 | 
						|
  str_op w1, [x0, x3]                     /* store data                    */;\
 | 
						|
  add x3, x3, #0x40000                    /* x3 = ewram smc base           */;\
 | 
						|
  load_op w1, [x0, x3]                    /* w1 = SMC sentinel             */;\
 | 
						|
  cbnz w1, 3f                             /* Check value, should be zero   */;\
 | 
						|
  ret                                     /* return                        */;\
 | 
						|
                                                                             ;\
 | 
						|
ext_store_vram_u##store_type:                                                ;\
 | 
						|
ext_store_vram_u##store_type##_safe:                                         ;\
 | 
						|
  dup##store_type(w1)                     /* Duplicate byte if necessary   */;\
 | 
						|
  and w0, w0, #(0x1ffff & ~stmask16)      /* Mask to mirror memory (+align)*/;\
 | 
						|
  sub w3, w0, #0x8000                     /* Mirrored addr for last bank   */;\
 | 
						|
  cmp w0, #0x18000                        /* Check if exceeds 96KB         */;\
 | 
						|
  csel w0, w3, w0, cs                     /* If it does, pick the mirror   */;\
 | 
						|
  add x3, reg_base, #VRAM_OFF             /* x3 = ewram base               */;\
 | 
						|
  str_op16 w1, [x0, x3]                   /* store data                    */;\
 | 
						|
  ret                                     /* return                        */;\
 | 
						|
                                                                             ;\
 | 
						|
ext_store_oam_ram_u##store_type:                                             ;\
 | 
						|
ext_store_oam_ram_u##store_type##_safe:                                      ;\
 | 
						|
  dup##store_type(w1)                     /* Duplicate byte if necessary   */;\
 | 
						|
  and w0, w0, #(0x3ff & ~stmask16)        /* Mask to mirror memory (+align)*/;\
 | 
						|
  add x3, reg_base, #OAM_RAM_OFF          /* x3 = oam ram base             */;\
 | 
						|
  str_op16 w1, [x0, x3]                   /* store data                    */;\
 | 
						|
  str w29, [reg_base, #OAM_UPDATED]       /* write non zero to signal      */;\
 | 
						|
  ret                                     /* return                        */;\
 | 
						|
                                                                             ;\
 | 
						|
ext_store_ioreg_u##store_type:                                               ;\
 | 
						|
  str w2, [reg_base, #REG_PC]             /* write out PC                  */;\
 | 
						|
  str lr, [reg_base, #REG_SAVE]           /* Preserve LR                   */;\
 | 
						|
  and w0, w0, #(0x3ff & ~stmask)                                             ;\
 | 
						|
  store_registers()                                                          ;\
 | 
						|
  bl write_io_register##store_type                                           ;\
 | 
						|
  cbnz w0, write_epilogue                 /* handle additional write stuff */;\
 | 
						|
  ldr lr, [reg_base, #REG_SAVE]                                              ;\
 | 
						|
  load_registers()                                                           ;\
 | 
						|
  ret                                     /* resume if no side effects     */;\
 | 
						|
                                                                             ;\
 | 
						|
3: /* SMC write (iwram/ewram) */                                             ;\
 | 
						|
  str w2, [reg_base, #REG_PC]             /* write out PC                  */;\
 | 
						|
  store_registers()                       /* store registers               */;\
 | 
						|
  consolidate_flags(w1)                                                      ;\
 | 
						|
  bl flush_translation_cache_ram                                             ;\
 | 
						|
  ldr w0, [reg_base, #REG_PC]             /* load "current new" PC         */;\
 | 
						|
  b lookup_pc                             /* continue execution            */;\
 | 
						|
.size execute_store_u##store_type, .-execute_store_u##store_type
 | 
						|
 | 
						|
// for ignored areas, just return
 | 
						|
ext_store_ignore:
 | 
						|
  ret                                     // return
 | 
						|
 | 
						|
#define store_lookup_table(store_type)                                       ;\
 | 
						|
  .quad ext_store_ignore                  /* 0x00: BIOS, ignore            */;\
 | 
						|
  .quad ext_store_ignore                  /* 0x01: ignore                  */;\
 | 
						|
  .quad ext_store_ewram_u##store_type     /* 0x02: ewram                   */;\
 | 
						|
  .quad ext_store_iwram_u##store_type     /* 0x03: iwram                   */;\
 | 
						|
  .quad ext_store_ioreg_u##store_type     /* 0x04: I/O regs                */;\
 | 
						|
  .quad ext_store_palette_u##store_type   /* 0x05: palette RAM             */;\
 | 
						|
  .quad ext_store_vram_u##store_type      /* 0x06: vram                    */;\
 | 
						|
  .quad ext_store_oam_ram_u##store_type   /* 0x07: oam ram                 */;\
 | 
						|
  .quad ext_store_u##store_type           /* 0x08: gamepak: ignore         */;\
 | 
						|
  .quad ext_store_u##store_type           /* 0x09: gamepak: ignore         */;\
 | 
						|
  .quad ext_store_u##store_type           /* 0x0A: gamepak: ignore         */;\
 | 
						|
  .quad ext_store_u##store_type           /* 0x0B: gamepak: ignore         */;\
 | 
						|
  .quad ext_store_u##store_type           /* 0x0C: gamepak: ignore         */;\
 | 
						|
  .quad ext_store_u##store_type           /* 0x0D: EEPROM                  */;\
 | 
						|
  .quad ext_store_u##store_type           /* 0x0E: backup                  */;\
 | 
						|
  .quad ext_store_ignore                  /* 0x0F: ignore                  */;\
 | 
						|
 | 
						|
execute_store_builder(8,  strb, strh, ldrb, 0, 1, 0)
 | 
						|
execute_store_builder(16, strh, strh, ldrh, 1, 1, 1)
 | 
						|
execute_store_builder(32, str,  str,  ldr,  3, 3, 2)
 | 
						|
 | 
						|
// Palette writes are special since they are converted on the fly for speed
 | 
						|
 | 
						|
ext_store_palette_u8:
 | 
						|
  bfi w1, w1, #8, #24                     // Duplicate the byte
 | 
						|
ext_store_palette_u16:
 | 
						|
  and w0, w0, #(0x3fe)
 | 
						|
  add x3, reg_base, #(PAL_RAM_OFF)
 | 
						|
  strh w1, [x3, x0]
 | 
						|
 | 
						|
  ubfx w2, w1, #10, #5                    // Extract blue to red
 | 
						|
  bfi  w2, w1, #11, #5                    // Move red to blue
 | 
						|
  and  w1, w1, #0x03E0                    // Extract green component
 | 
						|
  orr  w1, w2, w1, lsl #1                 // Merge the three components
 | 
						|
 | 
						|
  add x3, reg_base, #(PALCNV_RAM_OFF)
 | 
						|
  strh w1, [x3, x0]
 | 
						|
  ret
 | 
						|
 | 
						|
ext_store_palette_u32_safe:
 | 
						|
ext_store_palette_u32:
 | 
						|
  and w0, w0, #(0x3fc)
 | 
						|
  add x3, reg_base, #(PAL_RAM_OFF)
 | 
						|
  str w1, [x3, x0]
 | 
						|
 | 
						|
  and   w2, w1, #0x7C007C00               // Get blue components
 | 
						|
  and   w3, w1, #0x001F001F               // Get red components
 | 
						|
  lsr   w2, w2, #10                       // Place blue in the final register
 | 
						|
  orr   w2, w2, w3, lsl #11               // Merge red
 | 
						|
  and   w3, w1, #0x03E003E0               // Get green component
 | 
						|
  orr   w1, w2, w3, lsl #1                // Merge green
 | 
						|
 | 
						|
  add x3, reg_base, #(PALCNV_RAM_OFF)
 | 
						|
  str w1, [x3, x0]
 | 
						|
  ret
 | 
						|
 | 
						|
// This is a store that is executed in a strm case (so no SMC checks in-between)
 | 
						|
 | 
						|
defsymbl(execute_aligned_store32)
 | 
						|
  lsr w4, w0, #28
 | 
						|
  lsr w3, w0, #24
 | 
						|
  cbnz w4, ext_store_u32
 | 
						|
  add x4, reg_base, MEM_TBL_OFF + 816 + 3*128
 | 
						|
  ldr x3, [x4, x3, lsl #3]
 | 
						|
  br x3
 | 
						|
ext_store_iwram_u32_safe:
 | 
						|
  and w0, w0, #(0x7fff)                   // Mask to mirror memory (no need to align!)
 | 
						|
  add x3, reg_base, #(IWRAM_OFF+0x8000)   // x3 = iwram base
 | 
						|
  str w1, [x0, x3]                        // store data
 | 
						|
  ret                                     // Return
 | 
						|
ext_store_ewram_u32_safe:
 | 
						|
  and w0, w0, #(0x3ffff)                  // Mask to mirror memory (no need to align!)
 | 
						|
  add x3, reg_base, #(EWRAM_OFF)          // x3 = ewram base
 | 
						|
  str w1, [x0, x3]                        // store data
 | 
						|
  ret                                     // Return
 | 
						|
ext_store_ioreg_u32_safe:
 | 
						|
  str lr, [reg_base, #REG_SAVE]
 | 
						|
  and w0, w0, #(0x3fc)
 | 
						|
  store_registers()
 | 
						|
  bl write_io_register32
 | 
						|
  ldr lr, [reg_base, #REG_SAVE]
 | 
						|
  load_registers()
 | 
						|
  ret
 | 
						|
.size execute_aligned_store32, .-execute_aligned_store32
 | 
						|
 | 
						|
// This is called whenever an external store with side effects was performed
 | 
						|
write_epilogue:
 | 
						|
  mov reg_save0, w0                       // Save reg for later
 | 
						|
  consolidate_flags(w1)                   // Update CPSR for IRQ/
 | 
						|
  tbz w0, #CPU_ALERT_SMC_B, 1f            // Skip if SMC did not happen
 | 
						|
  bl flush_translation_cache_ram          // Flush RAM if bit is set
 | 
						|
 | 
						|
1:
 | 
						|
  tbz reg_save0, #CPU_ALERT_IRQ_B, 2f     // Skip if IRQ did not happen
 | 
						|
  bl check_and_raise_interrupts
 | 
						|
 | 
						|
2:
 | 
						|
  ldr w0, [reg_base, #REG_PC]             // load new PC
 | 
						|
  tbz reg_save0, #CPU_ALERT_HALT_B, lookup_pc   // Resume execution if running
 | 
						|
 | 
						|
  // explicit fallthrough to alert_loop, while CPU is halted
 | 
						|
 | 
						|
alert_loop:
 | 
						|
  mov w0, reg_cycles                      // load remaining cycles
 | 
						|
  bl update_gba                           // update GBA until CPU isn't halted
 | 
						|
  and reg_cycles, w0, 0x7fff              // load new cycle count
 | 
						|
 | 
						|
  ldr w1, [reg_base, #COMPLETED_FRAME]    // Check whether a frame was completed
 | 
						|
  cbnz w1, return_to_main                 // and return to caller function.
 | 
						|
 | 
						|
  ldr w1, [reg_base, #CPU_HALT_STATE]     // Check whether the CPU is halted
 | 
						|
  cbnz w1, alert_loop                     // and keep looping until it is
 | 
						|
 | 
						|
  ldr w0, [reg_base, #REG_PC]             // load new PC
 | 
						|
  b lookup_pc                             // Resume execution at that PC
 | 
						|
 | 
						|
 | 
						|
.data
 | 
						|
.align 4
 | 
						|
defsymbl(ldst_handler_functions)
 | 
						|
  load_lookup_table(u8,   u8)
 | 
						|
  load_lookup_table(s8,   s8)
 | 
						|
  load_lookup_table(u16, u16)
 | 
						|
  load_lookup_table(s16, s16)
 | 
						|
  load_lookup_table(u32, u32)
 | 
						|
  load_lookup_table(u32, aligned_u32)
 | 
						|
  store_lookup_table(8)
 | 
						|
  store_lookup_table(16)
 | 
						|
  store_lookup_table(32)
 | 
						|
  store_lookup_table(32_safe)
 | 
						|
 | 
						|
.bss
 | 
						|
.align 4
 | 
						|
 | 
						|
defsymbl(memory_map_read)
 | 
						|
  .space 0x10000
 | 
						|
defsymbl(iwram)
 | 
						|
  .space 0x10000
 | 
						|
defsymbl(vram)
 | 
						|
  .space 0x18000
 | 
						|
defsymbl(ewram)
 | 
						|
  .space 0x80000
 | 
						|
defsymbl(ldst_lookup_tables)
 | 
						|
  .space 4096
 | 
						|
defsymbl(reg)
 | 
						|
  .space 0x100
 | 
						|
defsymbl(spsr)
 | 
						|
  .space 24
 | 
						|
defsymbl(reg_mode)
 | 
						|
  .space 196
 | 
						|
  .space 36  // Padding
 | 
						|
defsymbl(oam_ram)
 | 
						|
  .space 0x400
 | 
						|
defsymbl(palette_ram)
 | 
						|
  .space 0x400
 | 
						|
defsymbl(io_registers)
 | 
						|
  .space 0x400
 | 
						|
defsymbl(palette_ram_converted)
 | 
						|
  .space 0x400
 | 
						|
 | 
						|
 |