Improve indirect jumps in ARM
Handle already translated blocks in the ARM asm to speed up indirect branches (affect some games more than others)
This commit is contained in:
parent
336b14a876
commit
71ebc49b59
161
arm/arm_stub.S
161
arm/arm_stub.S
|
@ -156,6 +156,66 @@ _##symbol:
|
|||
ldmia sp!, { call_c_saved_regs } ;\
|
||||
ldr sp, =reg ;\
|
||||
|
||||
@ Jumps to PC (ARM or Thumb modes)
|
||||
@ This is really two functions/routines in one
|
||||
@ r0 contains the PC
|
||||
|
||||
.align 2
|
||||
#define execute_pc_builder(mode, align) ;\
|
||||
defsymbl(arm_indirect_branch_##mode) ;\
|
||||
save_flags() ;\
|
||||
execute_pc_##mode: ;\
|
||||
bic r0, r0, #(align) /* Align PC */;\
|
||||
mov r1, r0, lsr #24 /* Get region */;\
|
||||
cmp r1, #2 ;\
|
||||
beq 1f /* ewram */;\
|
||||
cmp r1, #3 ;\
|
||||
beq 2f /* iwram */;\
|
||||
3: ;\
|
||||
call_c_function(block_lookup_address_##mode) ;\
|
||||
restore_flags() ;\
|
||||
bx r0 ;\
|
||||
1: ;\
|
||||
ldr r1, =(ewram+0x40000) /* Load base addr */;\
|
||||
mov r2, r0, lsl #14 /* addr &= 0x3ffff */;\
|
||||
mov r2, r2, lsr #14 ;\
|
||||
ldrh r2, [r1, r2] /* Load half word there */;\
|
||||
ldr r1, =(ram_block_ptrs) ;\
|
||||
ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\
|
||||
cmp r1, #0 /* NULL means not translated */;\
|
||||
beq 3b /* Need to translate */;\
|
||||
restore_flags() ;\
|
||||
bx r1 ;\
|
||||
2: ;\
|
||||
ldr r1, =(iwram) /* Load base addr */;\
|
||||
mov r2, r0, lsl #17 /* addr &= 0x7fff */;\
|
||||
mov r2, r2, lsr #17 ;\
|
||||
ldrh r2, [r1, r2] /* Load half word there */;\
|
||||
ldr r1, =(ram_block_ptrs) ;\
|
||||
ldr r1, [r1, r2, lsl #2] /* Pointer to the cache */;\
|
||||
cmp r1, #0 /* NULL means not translated */;\
|
||||
beq 3b /* Need to translate */;\
|
||||
restore_flags() ;\
|
||||
bx r1 ;\
|
||||
|
||||
|
||||
execute_pc_builder(arm, 0x3)
|
||||
execute_pc_builder(thumb, 0x1)
|
||||
|
||||
@ Resumes execution from saved PC, in any mode
|
||||
|
||||
execute_pc:
|
||||
ldr r0, [reg_base, #REG_PC] @ load new PC
|
||||
ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
|
||||
tst r1, #0x20 @ see if Thumb bit is set
|
||||
bne 2f
|
||||
|
||||
load_registers_arm()
|
||||
b execute_pc_arm
|
||||
|
||||
2:
|
||||
load_registers_thumb()
|
||||
b execute_pc_thumb
|
||||
|
||||
@ Update the GBA hardware (video, sound, input, etc)
|
||||
|
||||
|
@ -201,28 +261,11 @@ wait_halt_##name: ;\
|
|||
;\
|
||||
ldr r0, [reg_base, #CHANGED_PC_STATUS] /* load PC changed status */;\
|
||||
cmp r0, #0 /* see if PC has changed */;\
|
||||
beq 1f /* if not return */;\
|
||||
bne execute_pc /* go jump/translate */;\
|
||||
;\
|
||||
ldr r0, [reg_base, #REG_PC] /* load new PC */;\
|
||||
ldr r1, [reg_base, #REG_CPSR] /* r1 = flags */;\
|
||||
tst r1, #0x20 /* see if Thumb bit is set */;\
|
||||
bne 2f /* if so load Thumb PC */;\
|
||||
;\
|
||||
load_registers_arm() /* load ARM regs */;\
|
||||
call_c_function(block_lookup_address_arm) ;\
|
||||
restore_flags() ;\
|
||||
bx r0 /* jump to new ARM block */;\
|
||||
;\
|
||||
1: ;\
|
||||
load_registers_##mode() /* reload registers */;\
|
||||
restore_flags() ;\
|
||||
return_##return_op() ;\
|
||||
;\
|
||||
2: ;\
|
||||
load_registers_thumb() /* load Thumb regs */;\
|
||||
call_c_function(block_lookup_address_thumb) ;\
|
||||
restore_flags() ;\
|
||||
bx r0 /* jump to new ARM block */;\
|
||||
return_##return_op() /* continue, no PC change */;\
|
||||
|
||||
|
||||
arm_update_gba_builder(arm, arm, straight)
|
||||
|
@ -239,59 +282,32 @@ arm_update_gba_builder(idle_thumb, thumb, add)
|
|||
@ Input:
|
||||
@ r0: PC to branch to
|
||||
|
||||
.align 2
|
||||
defsymbl(arm_indirect_branch_arm)
|
||||
save_flags()
|
||||
call_c_function(block_lookup_address_arm)
|
||||
restore_flags()
|
||||
bx r0
|
||||
|
||||
.align 2
|
||||
defsymbl(arm_indirect_branch_thumb)
|
||||
save_flags()
|
||||
call_c_function(block_lookup_address_thumb)
|
||||
restore_flags()
|
||||
bx r0
|
||||
|
||||
.align 2
|
||||
defsymbl(arm_indirect_branch_dual_arm)
|
||||
save_flags()
|
||||
tst r0, #0x01 @ check lower bit
|
||||
bne 1f @ if set going to Thumb mode
|
||||
call_c_function(block_lookup_address_arm)
|
||||
restore_flags()
|
||||
bx r0 @ return
|
||||
beq execute_pc_arm @ Keep executing ARM code
|
||||
|
||||
1:
|
||||
bic r0, r0, #0x01
|
||||
bic r0, r0, #0x01 @ Switch to Thumb mode
|
||||
store_registers_arm() @ save out ARM registers
|
||||
load_registers_thumb() @ load in Thumb registers
|
||||
ldr r1, [reg_base, #REG_CPSR] @ load cpsr
|
||||
orr r1, r1, #0x20 @ set Thumb mode
|
||||
str r1, [reg_base, #REG_CPSR] @ store flags
|
||||
call_c_function(block_lookup_address_thumb)
|
||||
restore_flags()
|
||||
bx r0 @ return
|
||||
b execute_pc_thumb @ Now execute Thumb
|
||||
|
||||
.align 2
|
||||
defsymbl(arm_indirect_branch_dual_thumb)
|
||||
save_flags()
|
||||
tst r0, #0x01 @ check lower bit
|
||||
beq 1f @ if set going to ARM mode
|
||||
bic r0, r0, #0x01
|
||||
call_c_function(block_lookup_address_thumb)
|
||||
restore_flags()
|
||||
bx r0 @ return
|
||||
bne execute_pc_thumb @ Keep executing Thumb mode
|
||||
|
||||
1:
|
||||
store_registers_thumb() @ save out Thumb registers
|
||||
load_registers_arm() @ load in ARM registers
|
||||
ldr r1, [reg_base, #REG_CPSR] @ load cpsr
|
||||
bic r1, r1, #0x20 @ clear Thumb mode
|
||||
str r1, [reg_base, #REG_CPSR] @ store flags
|
||||
call_c_function(block_lookup_address_arm)
|
||||
restore_flags()
|
||||
bx r0 @ return
|
||||
b execute_pc_arm @ Now execute ARM
|
||||
|
||||
|
||||
@ Update the cpsr.
|
||||
|
@ -319,10 +335,7 @@ defsymbl(execute_store_cpsr)
|
|||
cmp r0, #0 @ check new PC
|
||||
beq 1f @ if it's zero, return
|
||||
|
||||
call_c_function(block_lookup_address_arm)
|
||||
|
||||
restore_flags()
|
||||
bx r0 @ return to new ARM address
|
||||
b execute_pc_arm
|
||||
|
||||
1:
|
||||
restore_flags()
|
||||
|
@ -378,16 +391,11 @@ defsymbl(execute_spsr_restore)
|
|||
bne 2f @ if so handle it
|
||||
|
||||
load_registers_arm() @ restore ARM registers
|
||||
call_c_function(block_lookup_address_arm)
|
||||
restore_flags()
|
||||
bx r0
|
||||
b execute_pc_arm
|
||||
|
||||
2:
|
||||
load_registers_thumb() @ load Thumb registers
|
||||
call_c_function(block_lookup_address_thumb)
|
||||
restore_flags()
|
||||
bx r0
|
||||
|
||||
b execute_pc_thumb
|
||||
|
||||
|
||||
@ Setup the mode transition work for calling an SWI.
|
||||
|
@ -718,21 +726,7 @@ alert_loop:
|
|||
bne alert_loop @ Keep looping until it is
|
||||
|
||||
mvn reg_cycles, r0 @ load new cycle count
|
||||
ldr r0, [reg_base, #REG_PC] @ load new PC
|
||||
ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
|
||||
tst r1, #0x20 @ see if Thumb bit is set
|
||||
bne 2f
|
||||
|
||||
load_registers_arm()
|
||||
call_c_function(block_lookup_address_arm)
|
||||
restore_flags()
|
||||
bx r0 @ jump to new ARM block
|
||||
|
||||
2:
|
||||
load_registers_thumb()
|
||||
call_c_function(block_lookup_address_thumb)
|
||||
restore_flags()
|
||||
bx r0 @ jump to new Thumb block
|
||||
b execute_pc @ restart execution at PC
|
||||
|
||||
4:
|
||||
restore_flags()
|
||||
|
@ -746,17 +740,8 @@ lookup_pc:
|
|||
ldr r0, [reg_base, #REG_PC] @ r0 = new pc
|
||||
ldr r1, [reg_base, #REG_CPSR] @ r1 = flags
|
||||
tst r1, #0x20 @ see if Thumb bit is set
|
||||
beq lookup_pc_arm @ if not lookup ARM
|
||||
|
||||
lookup_pc_thumb:
|
||||
call_c_function(block_lookup_address_thumb)
|
||||
restore_flags()
|
||||
bx r0 @ jump to new Thumb block
|
||||
|
||||
lookup_pc_arm:
|
||||
call_c_function(block_lookup_address_arm)
|
||||
restore_flags()
|
||||
bx r0 @ jump to new ARM block
|
||||
beq execute_pc_arm @ if not lookup ARM
|
||||
b execute_pc_thumb
|
||||
|
||||
|
||||
#define sign_extend_u8(reg)
|
||||
|
|
3
cpu.h
3
cpu.h
|
@ -157,7 +157,8 @@ extern u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE];
|
|||
void flush_translation_cache_rom(void);
|
||||
void flush_translation_cache_ram(void);
|
||||
void dump_translation_cache(void);
|
||||
void wipe_caches(void);
|
||||
void init_caches(void);
|
||||
void init_emitter(void);
|
||||
|
||||
extern u32 reg_mode[7][7];
|
||||
extern u32 spsr[6];
|
||||
|
|
|
@ -3644,7 +3644,7 @@ void flush_translation_cache_rom(void)
|
|||
memset(rom_branch_hash, 0, sizeof(rom_branch_hash));
|
||||
}
|
||||
|
||||
void wipe_caches(void)
|
||||
void init_caches(void)
|
||||
{
|
||||
/* Ensure we wipe everything including the SMC mirrors */
|
||||
flush_translation_cache_rom();
|
||||
|
@ -3653,6 +3653,8 @@ void wipe_caches(void)
|
|||
iwram_code_min = 0;
|
||||
iwram_code_max = 0x7FFF;
|
||||
flush_translation_cache_ram();
|
||||
/* Ensure 0 and FFFF get zeroed out */
|
||||
memset(ram_block_ptrs, 0, sizeof(ram_block_ptrs));
|
||||
}
|
||||
|
||||
#define cache_dump_prefix ""
|
||||
|
|
|
@ -3322,7 +3322,7 @@ void gba_load_state(const void* src)
|
|||
|
||||
#ifdef HAVE_DYNAREC
|
||||
if (dynarec_enable)
|
||||
wipe_caches();
|
||||
init_caches();
|
||||
#endif
|
||||
|
||||
reg[OAM_UPDATED] = 1;
|
||||
|
|
|
@ -675,7 +675,7 @@ static void check_variables(int started_from_load)
|
|||
dynarec_enable = 1;
|
||||
|
||||
if (dynarec_enable != prevvalue)
|
||||
wipe_caches();
|
||||
init_caches();
|
||||
}
|
||||
else
|
||||
dynarec_enable = 1;
|
||||
|
|
Loading…
Reference in New Issue