Improve indirect jumps in ARM

Handle already translated blocks in the ARM asm to speed up indirect
branches (affect some games more than others)
This commit is contained in:
David Guillen Fandos 2021-03-30 21:06:52 +02:00
parent 336b14a876
commit 71ebc49b59
6 changed files with 81 additions and 93 deletions

View File

@ -156,6 +156,66 @@ _##symbol:
ldmia sp!, { call_c_saved_regs } ;\ ldmia sp!, { call_c_saved_regs } ;\
ldr sp, =reg ;\ ldr sp, =reg ;\
@ Jumps to PC (ARM or Thumb modes)
@ This is really two functions/routines in one
@ r0 contains the PC
.align 2
#define execute_pc_builder(mode, align) ;\
defsymbl(arm_indirect_branch_##mode) ;\
save_flags() ;\
execute_pc_##mode: ;\
bic r0, r0, #(align) /* Align PC */;\
mov r1, r0, lsr #24 /* Get region */;\
cmp r1, #2 ;\
beq 1f /* ewram */;\
cmp r1, #3 ;\
beq 2f /* iwram */;\
3: ;\
call_c_function(block_lookup_address_##mode) ;\
restore_flags() ;\
bx r0 ;\
1: ;\
ldr r1, =(ewram+0x40000) /* Load base addr */;\
mov r2, r0, lsl #14 /* addr &= 0x3ffff */;\
mov r2, r2, lsr #14 ;\
ldrh r2, [r1, r2] /* Load half word there */;\
ldr r1, =(ram_block_ptrs) ;\
ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\
cmp r1, #0 /* NULL means not translated */;\
beq 3b /* Need to translate */;\
restore_flags() ;\
bx r1 ;\
2: ;\
ldr r1, =(iwram) /* Load base addr */;\
mov r2, r0, lsl #17 /* addr &= 0x7fff */;\
mov r2, r2, lsr #17 ;\
ldrh r2, [r1, r2] /* Load half word there */;\
ldr r1, =(ram_block_ptrs) ;\
ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\
cmp r1, #0 /* NULL means not translated */;\
beq 3b /* Need to translate */;\
restore_flags() ;\
bx r1 ;\
execute_pc_builder(arm, 0x3)
execute_pc_builder(thumb, 0x1)
@ Resumes execution from saved PC, in any mode
execute_pc:
ldr r0, [reg_base, #REG_PC] @ load new PC
ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
tst r1, #0x20 @ see if Thumb bit is set
bne 2f
load_registers_arm()
b execute_pc_arm
2:
load_registers_thumb()
b execute_pc_thumb
@ Update the GBA hardware (video, sound, input, etc) @ Update the GBA hardware (video, sound, input, etc)
@ -201,28 +261,11 @@ wait_halt_##name: ;\
;\ ;\
ldr r0, [reg_base, #CHANGED_PC_STATUS] /* load PC changed status */;\ ldr r0, [reg_base, #CHANGED_PC_STATUS] /* load PC changed status */;\
cmp r0, #0 /* see if PC has changed */;\ cmp r0, #0 /* see if PC has changed */;\
beq 1f /* if not return */;\ bne execute_pc /* go jump/translate */;\
;\ ;\
ldr r0, [reg_base, #REG_PC] /* load new PC */;\
ldr r1, [reg_base, #REG_CPSR] /* r1 = flags */;\
tst r1, #0x20 /* see if Thumb bit is set */;\
bne 2f /* if so load Thumb PC */;\
;\
load_registers_arm() /* load ARM regs */;\
call_c_function(block_lookup_address_arm) ;\
restore_flags() ;\
bx r0 /* jump to new ARM block */;\
;\
1: ;\
load_registers_##mode() /* reload registers */;\ load_registers_##mode() /* reload registers */;\
restore_flags() ;\ restore_flags() ;\
return_##return_op() ;\ return_##return_op() /* continue, no PC change */;\
;\
2: ;\
load_registers_thumb() /* load Thumb regs */;\
call_c_function(block_lookup_address_thumb) ;\
restore_flags() ;\
bx r0 /* jump to new ARM block */;\
arm_update_gba_builder(arm, arm, straight) arm_update_gba_builder(arm, arm, straight)
@ -239,59 +282,32 @@ arm_update_gba_builder(idle_thumb, thumb, add)
@ Input: @ Input:
@ r0: PC to branch to @ r0: PC to branch to
.align 2
defsymbl(arm_indirect_branch_arm)
save_flags()
call_c_function(block_lookup_address_arm)
restore_flags()
bx r0
.align 2
defsymbl(arm_indirect_branch_thumb)
save_flags()
call_c_function(block_lookup_address_thumb)
restore_flags()
bx r0
.align 2 .align 2
defsymbl(arm_indirect_branch_dual_arm) defsymbl(arm_indirect_branch_dual_arm)
save_flags() save_flags()
tst r0, #0x01 @ check lower bit tst r0, #0x01 @ check lower bit
bne 1f @ if set going to Thumb mode beq execute_pc_arm @ Keep executing ARM code
call_c_function(block_lookup_address_arm)
restore_flags()
bx r0 @ return
1: bic r0, r0, #0x01 @ Switch to Thumb mode
bic r0, r0, #0x01
store_registers_arm() @ save out ARM registers store_registers_arm() @ save out ARM registers
load_registers_thumb() @ load in Thumb registers load_registers_thumb() @ load in Thumb registers
ldr r1, [reg_base, #REG_CPSR] @ load cpsr ldr r1, [reg_base, #REG_CPSR] @ load cpsr
orr r1, r1, #0x20 @ set Thumb mode orr r1, r1, #0x20 @ set Thumb mode
str r1, [reg_base, #REG_CPSR] @ store flags str r1, [reg_base, #REG_CPSR] @ store flags
call_c_function(block_lookup_address_thumb) b execute_pc_thumb @ Now execute Thumb
restore_flags()
bx r0 @ return
.align 2 .align 2
defsymbl(arm_indirect_branch_dual_thumb) defsymbl(arm_indirect_branch_dual_thumb)
save_flags() save_flags()
tst r0, #0x01 @ check lower bit tst r0, #0x01 @ check lower bit
beq 1f @ if set going to ARM mode bne execute_pc_thumb @ Keep executing Thumb mode
bic r0, r0, #0x01
call_c_function(block_lookup_address_thumb)
restore_flags()
bx r0 @ return
1:
store_registers_thumb() @ save out Thumb registers store_registers_thumb() @ save out Thumb registers
load_registers_arm() @ load in ARM registers load_registers_arm() @ load in ARM registers
ldr r1, [reg_base, #REG_CPSR] @ load cpsr ldr r1, [reg_base, #REG_CPSR] @ load cpsr
bic r1, r1, #0x20 @ clear Thumb mode bic r1, r1, #0x20 @ clear Thumb mode
str r1, [reg_base, #REG_CPSR] @ store flags str r1, [reg_base, #REG_CPSR] @ store flags
call_c_function(block_lookup_address_arm) b execute_pc_arm @ Now execute ARM
restore_flags()
bx r0 @ return
@ Update the cpsr. @ Update the cpsr.
@ -319,10 +335,7 @@ defsymbl(execute_store_cpsr)
cmp r0, #0 @ check new PC cmp r0, #0 @ check new PC
beq 1f @ if it's zero, return beq 1f @ if it's zero, return
call_c_function(block_lookup_address_arm) b execute_pc_arm
restore_flags()
bx r0 @ return to new ARM address
1: 1:
restore_flags() restore_flags()
@ -378,16 +391,11 @@ defsymbl(execute_spsr_restore)
bne 2f @ if so handle it bne 2f @ if so handle it
load_registers_arm() @ restore ARM registers load_registers_arm() @ restore ARM registers
call_c_function(block_lookup_address_arm) b execute_pc_arm
restore_flags()
bx r0
2: 2:
load_registers_thumb() @ load Thumb registers load_registers_thumb() @ load Thumb registers
call_c_function(block_lookup_address_thumb) b execute_pc_thumb
restore_flags()
bx r0
@ Setup the mode transition work for calling an SWI. @ Setup the mode transition work for calling an SWI.
@ -718,21 +726,7 @@ alert_loop:
bne alert_loop @ Keep looping until it is bne alert_loop @ Keep looping until it is
mvn reg_cycles, r0 @ load new cycle count mvn reg_cycles, r0 @ load new cycle count
ldr r0, [reg_base, #REG_PC] @ load new PC b execute_pc @ restart execution at PC
ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
tst r1, #0x20 @ see if Thumb bit is set
bne 2f
load_registers_arm()
call_c_function(block_lookup_address_arm)
restore_flags()
bx r0 @ jump to new ARM block
2:
load_registers_thumb()
call_c_function(block_lookup_address_thumb)
restore_flags()
bx r0 @ jump to new Thumb block
4: 4:
restore_flags() restore_flags()
@ -746,17 +740,8 @@ lookup_pc:
ldr r0, [reg_base, #REG_PC] @ r0 = new pc ldr r0, [reg_base, #REG_PC] @ r0 = new pc
ldr r1, [reg_base, #REG_CPSR] @ r1 = flags ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
tst r1, #0x20 @ see if Thumb bit is set tst r1, #0x20 @ see if Thumb bit is set
beq lookup_pc_arm @ if not lookup ARM beq execute_pc_arm @ if not lookup ARM
b execute_pc_thumb
lookup_pc_thumb:
call_c_function(block_lookup_address_thumb)
restore_flags()
bx r0 @ jump to new Thumb block
lookup_pc_arm:
call_c_function(block_lookup_address_arm)
restore_flags()
bx r0 @ jump to new ARM block
#define sign_extend_u8(reg) #define sign_extend_u8(reg)

3
cpu.h
View File

@ -157,7 +157,8 @@ extern u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE];
void flush_translation_cache_rom(void); void flush_translation_cache_rom(void);
void flush_translation_cache_ram(void); void flush_translation_cache_ram(void);
void dump_translation_cache(void); void dump_translation_cache(void);
void wipe_caches(void); void init_caches(void);
void init_emitter(void);
extern u32 reg_mode[7][7]; extern u32 reg_mode[7][7];
extern u32 spsr[6]; extern u32 spsr[6];

View File

@ -3644,7 +3644,7 @@ void flush_translation_cache_rom(void)
memset(rom_branch_hash, 0, sizeof(rom_branch_hash)); memset(rom_branch_hash, 0, sizeof(rom_branch_hash));
} }
void wipe_caches(void) void init_caches(void)
{ {
/* Ensure we wipe everything including the SMC mirrors */ /* Ensure we wipe everything including the SMC mirrors */
flush_translation_cache_rom(); flush_translation_cache_rom();
@ -3653,6 +3653,8 @@ void wipe_caches(void)
iwram_code_min = 0; iwram_code_min = 0;
iwram_code_max = 0x7FFF; iwram_code_max = 0x7FFF;
flush_translation_cache_ram(); flush_translation_cache_ram();
/* Ensure 0 and FFFF get zeroed out */
memset(ram_block_ptrs, 0, sizeof(ram_block_ptrs));
} }
#define cache_dump_prefix "" #define cache_dump_prefix ""

View File

@ -3322,7 +3322,7 @@ void gba_load_state(const void* src)
#ifdef HAVE_DYNAREC #ifdef HAVE_DYNAREC
if (dynarec_enable) if (dynarec_enable)
wipe_caches(); init_caches();
#endif #endif
reg[OAM_UPDATED] = 1; reg[OAM_UPDATED] = 1;

View File

@ -675,7 +675,7 @@ static void check_variables(int started_from_load)
dynarec_enable = 1; dynarec_enable = 1;
if (dynarec_enable != prevvalue) if (dynarec_enable != prevvalue)
wipe_caches(); init_caches();
} }
else else
dynarec_enable = 1; dynarec_enable = 1;

2
main.c
View File

@ -114,7 +114,7 @@ void init_main(void)
video_count = 960; video_count = 960;
#ifdef HAVE_DYNAREC #ifdef HAVE_DYNAREC
wipe_caches(); init_caches();
init_emitter(); init_emitter();
#endif #endif
} }