From 71ebc49b59d3b85ed9b8dc81d40e13a05a4f805f Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 30 Mar 2021 21:06:52 +0200 Subject: [PATCH] Improve indirect jumps in ARM Handle already translated blocks in the ARM asm to speed up indirect branches (affect some games more than others) --- arm/arm_stub.S | 161 ++++++++++++++++++++++--------------------------- cpu.h | 3 +- cpu_threaded.c | 4 +- gba_memory.c | 2 +- libretro.c | 2 +- main.c | 2 +- 6 files changed, 81 insertions(+), 93 deletions(-) diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 8160bfe..5be4ca4 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -156,6 +156,66 @@ _##symbol: ldmia sp!, { call_c_saved_regs } ;\ ldr sp, =reg ;\ +@ Jumps to PC (ARM or Thumb modes) +@ This is really two functions/routines in one +@ r0 contains the PC + +.align 2 +#define execute_pc_builder(mode, align) ;\ +defsymbl(arm_indirect_branch_##mode) ;\ + save_flags() ;\ +execute_pc_##mode: ;\ + bic r0, r0, #(align) /* Align PC */;\ + mov r1, r0, lsr #24 /* Get region */;\ + cmp r1, #2 ;\ + beq 1f /* ewram */;\ + cmp r1, #3 ;\ + beq 2f /* iwram */;\ +3: ;\ + call_c_function(block_lookup_address_##mode) ;\ + restore_flags() ;\ + bx r0 ;\ +1: ;\ + ldr r1, =(ewram+0x40000) /* Load base addr */;\ + mov r2, r0, lsl #14 /* addr &= 0x3ffff */;\ + mov r2, r2, lsr #14 ;\ + ldrh r2, [r1, r2] /* Load half word there */;\ + ldr r1, =(ram_block_ptrs) ;\ + ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\ + cmp r1, #0 /* NULL means not translated */;\ + beq 3b /* Need to translate */;\ + restore_flags() ;\ + bx r1 ;\ +2: ;\ + ldr r1, =(iwram) /* Load base addr */;\ + mov r2, r0, lsl #17 /* addr &= 0x7fff */;\ + mov r2, r2, lsr #17 ;\ + ldrh r2, [r1, r2] /* Load half word there */;\ + ldr r1, =(ram_block_ptrs) ;\ + ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\ + cmp r1, #0 /* NULL means not translated */;\ + beq 3b /* Need to translate */;\ + restore_flags() ;\ + bx r1 ;\ + + +execute_pc_builder(arm, 0x3) +execute_pc_builder(thumb, 0x1) + +@ Resumes execution from saved PC, in any mode + +execute_pc: + ldr r0, [reg_base, #REG_PC] @ load new PC + ldr r1, [reg_base, #REG_CPSR] @ r1 = flags + tst r1, #0x20 @ see if Thumb bit is set + bne 2f + + load_registers_arm() + b execute_pc_arm + +2: + load_registers_thumb() + b execute_pc_thumb @ Update the GBA hardware (video, sound, input, etc) @@ -201,28 +261,11 @@ wait_halt_##name: ;\ ;\ ldr r0, [reg_base, #CHANGED_PC_STATUS] /* load PC changed status */;\ cmp r0, #0 /* see if PC has changed */;\ - beq 1f /* if not return */;\ + bne execute_pc /* go jump/translate */;\ ;\ - ldr r0, [reg_base, #REG_PC] /* load new PC */;\ - ldr r1, [reg_base, #REG_CPSR] /* r1 = flags */;\ - tst r1, #0x20 /* see if Thumb bit is set */;\ - bne 2f /* if so load Thumb PC */;\ - ;\ - load_registers_arm() /* load ARM regs */;\ - call_c_function(block_lookup_address_arm) ;\ - restore_flags() ;\ - bx r0 /* jump to new ARM block */;\ - ;\ -1: ;\ load_registers_##mode() /* reload registers */;\ restore_flags() ;\ - return_##return_op() ;\ - ;\ -2: ;\ - load_registers_thumb() /* load Thumb regs */;\ - call_c_function(block_lookup_address_thumb) ;\ - restore_flags() ;\ - bx r0 /* jump to new ARM block */;\ + return_##return_op() /* continue, no PC change */;\ arm_update_gba_builder(arm, arm, straight) @@ -239,59 +282,32 @@ arm_update_gba_builder(idle_thumb, thumb, add) @ Input: @ r0: PC to branch to -.align 2 -defsymbl(arm_indirect_branch_arm) - save_flags() - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 - -.align 2 -defsymbl(arm_indirect_branch_thumb) - save_flags() - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 - .align 2 defsymbl(arm_indirect_branch_dual_arm) save_flags() tst r0, #0x01 @ check lower bit - bne 1f @ if set going to Thumb mode - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ return + beq execute_pc_arm @ Keep executing ARM code -1: - bic r0, r0, #0x01 + bic r0, r0, #0x01 @ Switch to Thumb mode store_registers_arm() @ save out ARM registers load_registers_thumb() @ load in Thumb registers ldr r1, [reg_base, #REG_CPSR] @ load cpsr orr r1, r1, #0x20 @ set Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ return + b execute_pc_thumb @ Now execute Thumb .align 2 defsymbl(arm_indirect_branch_dual_thumb) save_flags() tst r0, #0x01 @ check lower bit - beq 1f @ if set going to ARM mode - bic r0, r0, #0x01 - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ return + bne execute_pc_thumb @ Keep executing Thumb mode -1: store_registers_thumb() @ save out Thumb registers load_registers_arm() @ load in ARM registers ldr r1, [reg_base, #REG_CPSR] @ load cpsr bic r1, r1, #0x20 @ clear Thumb mode str r1, [reg_base, #REG_CPSR] @ store flags - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ return + b execute_pc_arm @ Now execute ARM @ Update the cpsr. @@ -319,10 +335,7 @@ defsymbl(execute_store_cpsr) cmp r0, #0 @ check new PC beq 1f @ if it's zero, return - call_c_function(block_lookup_address_arm) - - restore_flags() - bx r0 @ return to new ARM address + b execute_pc_arm 1: restore_flags() @@ -378,16 +391,11 @@ defsymbl(execute_spsr_restore) bne 2f @ if so handle it load_registers_arm() @ restore ARM registers - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 + b execute_pc_arm 2: load_registers_thumb() @ load Thumb registers - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 - + b execute_pc_thumb @ Setup the mode transition work for calling an SWI. @@ -718,21 +726,7 @@ alert_loop: bne alert_loop @ Keep looping until it is mvn reg_cycles, r0 @ load new cycle count - ldr r0, [reg_base, #REG_PC] @ load new PC - ldr r1, [reg_base, #REG_CPSR] @ r1 = flags - tst r1, #0x20 @ see if Thumb bit is set - bne 2f - - load_registers_arm() - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ jump to new ARM block - -2: - load_registers_thumb() - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ jump to new Thumb block + b execute_pc @ restart execution at PC 4: restore_flags() @@ -746,17 +740,8 @@ lookup_pc: ldr r0, [reg_base, #REG_PC] @ r0 = new pc ldr r1, [reg_base, #REG_CPSR] @ r1 = flags tst r1, #0x20 @ see if Thumb bit is set - beq lookup_pc_arm @ if not lookup ARM - -lookup_pc_thumb: - call_c_function(block_lookup_address_thumb) - restore_flags() - bx r0 @ jump to new Thumb block - -lookup_pc_arm: - call_c_function(block_lookup_address_arm) - restore_flags() - bx r0 @ jump to new ARM block + beq execute_pc_arm @ if not lookup ARM + b execute_pc_thumb #define sign_extend_u8(reg) diff --git a/cpu.h b/cpu.h index 2b250ca..2dacd6a 100644 --- a/cpu.h +++ b/cpu.h @@ -157,7 +157,8 @@ extern u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE]; void flush_translation_cache_rom(void); void flush_translation_cache_ram(void); void dump_translation_cache(void); -void wipe_caches(void); +void init_caches(void); +void init_emitter(void); extern u32 reg_mode[7][7]; extern u32 spsr[6]; diff --git a/cpu_threaded.c b/cpu_threaded.c index 7f12b4f..e5c027e 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -3644,7 +3644,7 @@ void flush_translation_cache_rom(void) memset(rom_branch_hash, 0, sizeof(rom_branch_hash)); } -void wipe_caches(void) +void init_caches(void) { /* Ensure we wipe everything including the SMC mirrors */ flush_translation_cache_rom(); @@ -3653,6 +3653,8 @@ void wipe_caches(void) iwram_code_min = 0; iwram_code_max = 0x7FFF; flush_translation_cache_ram(); + /* Ensure 0 and FFFF get zeroed out */ + memset(ram_block_ptrs, 0, sizeof(ram_block_ptrs)); } #define cache_dump_prefix "" diff --git a/gba_memory.c b/gba_memory.c index b66dce7..8d3d39e 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -3322,7 +3322,7 @@ void gba_load_state(const void* src) #ifdef HAVE_DYNAREC if (dynarec_enable) - wipe_caches(); + init_caches(); #endif reg[OAM_UPDATED] = 1; diff --git a/libretro.c b/libretro.c index d94ddcb..0373c94 100644 --- a/libretro.c +++ b/libretro.c @@ -675,7 +675,7 @@ static void check_variables(int started_from_load) dynarec_enable = 1; if (dynarec_enable != prevvalue) - wipe_caches(); + init_caches(); } else dynarec_enable = 1; diff --git a/main.c b/main.c index 2a82338..759aa94 100644 --- a/main.c +++ b/main.c @@ -114,7 +114,7 @@ void init_main(void) video_count = 960; #ifdef HAVE_DYNAREC - wipe_caches(); + init_caches(); init_emitter(); #endif }