[ARM] Rework memory handlers for speed amb simplicity
This removes one branch and emits the region selection code directly in the JIT cache. Trading memory for speed (although it's not a big improvement). This is a step towards enabling MMAP caches in ARM (due to the 32MB offset limitation in branches).
This commit is contained in:
parent
4bebb6135d
commit
d558fb4fc4
|
@ -1378,6 +1378,12 @@ typedef struct {
|
|||
(1 << 25) | \
|
||||
ARM_DEF_COND(cond)
|
||||
|
||||
#define ARM_USAT_ASR(p, rd, sat, rm, sa, cond) \
|
||||
ARM_EMIT(p, ARM_DEF_DPI_REG_IMMSHIFT_COND((rm) | 0x10, 2, sa, rd, sat, 0, 0x37, cond))
|
||||
|
||||
#define ARM_USAT_LSL(p, rd, sat, rm, sa, cond) \
|
||||
ARM_EMIT(p, ARM_DEF_DPI_REG_IMMSHIFT_COND((rm) | 0x10, 0, sa, rd, sat, 0, 0x37, cond))
|
||||
|
||||
|
||||
typedef union {
|
||||
ARMInstrBR br;
|
||||
|
|
|
@ -51,8 +51,6 @@ u32 execute_spsr_restore(u32 address);
|
|||
void execute_swi_arm(u32 pc);
|
||||
void execute_swi_thumb(u32 pc);
|
||||
|
||||
void execute_store_u32_safe(u32 address, u32 source);
|
||||
|
||||
#define STORE_TBL_OFF 0x1DC
|
||||
#define SPSR_RAM_OFF 0x100
|
||||
|
||||
|
@ -1271,16 +1269,46 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
|
|||
* (same with the stores)
|
||||
*/
|
||||
|
||||
#define generate_load_call_byte(tblnum) \
|
||||
ARM_USAT_ASR(0, reg_a1, 4, reg_a0, 24, ARMCOND_AL); \
|
||||
generate_add_imm(reg_a1, (STORE_TBL_OFF + 64*tblnum) >> 2, 0); \
|
||||
ARM_LDR_REG_REG_SHIFT(0, reg_a1, reg_base, reg_a1, 0, 2); \
|
||||
ARM_BLX(0, reg_a1); \
|
||||
|
||||
#define generate_load_call_mbyte(tblnum, abits) \
|
||||
ARM_MOV_REG_IMMSHIFT(0, reg_a1, reg_a0, ARMSHIFT_ROR, abits) \
|
||||
ARM_USAT_ASR(0, reg_a1, 4, reg_a1, 24-abits, ARMCOND_AL); \
|
||||
generate_add_imm(reg_a1, (STORE_TBL_OFF + 64*tblnum) >> 2, 0); \
|
||||
ARM_LDR_REG_REG_SHIFT(0, reg_a1, reg_base, reg_a1, 0, 2); \
|
||||
ARM_BLX(0, reg_a1); \
|
||||
|
||||
#define generate_store_call(tblnum) \
|
||||
ARM_USAT_ASR(0, reg_a2, 4, reg_a0, 24, ARMCOND_AL); \
|
||||
generate_add_imm(reg_a2, (STORE_TBL_OFF + 64*tblnum) >> 2, 0); \
|
||||
ARM_LDR_REG_REG_SHIFT(0, reg_a2, reg_base, reg_a2, 0, 2); \
|
||||
ARM_BLX(0, reg_a2); \
|
||||
|
||||
#define generate_store_call_u8() generate_store_call(0)
|
||||
#define generate_store_call_u16() generate_store_call(1)
|
||||
#define generate_store_call_u32() generate_store_call(2)
|
||||
#define generate_store_call_u32_safe() generate_store_call(3)
|
||||
#define generate_load_call_u8() generate_load_call_byte(4)
|
||||
#define generate_load_call_s8() generate_load_call_byte(5)
|
||||
#define generate_load_call_u16() generate_load_call_mbyte(6, 1)
|
||||
#define generate_load_call_s16() generate_load_call_mbyte(7, 1)
|
||||
#define generate_load_call_u32() generate_load_call_mbyte(8, 2)
|
||||
|
||||
|
||||
#define arm_access_memory_load(mem_type) \
|
||||
cycle_count += 2; \
|
||||
generate_function_call(execute_load_##mem_type); \
|
||||
generate_load_call_##mem_type(); \
|
||||
write32((pc + 8)); \
|
||||
arm_generate_store_reg_pc_no_flags(reg_rv, rd) \
|
||||
|
||||
#define arm_access_memory_store(mem_type) \
|
||||
cycle_count++; \
|
||||
arm_generate_load_reg_pc(reg_a1, rd, 12); \
|
||||
generate_function_call(execute_store_##mem_type); \
|
||||
generate_store_call_##mem_type(); \
|
||||
write32((pc + 4)) \
|
||||
|
||||
/* Calculate the address into a0 from _rn, _rm */
|
||||
|
@ -1384,20 +1412,20 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
|
|||
/* TODO: Make these use cached registers. Implement iwram_stack_optimize. */
|
||||
|
||||
#define arm_block_memory_load() \
|
||||
generate_function_call(execute_load_u32); \
|
||||
generate_load_call_u32(); \
|
||||
write32((pc + 8)); \
|
||||
arm_generate_store_reg(reg_rv, i) \
|
||||
|
||||
#define arm_block_memory_store() \
|
||||
arm_generate_load_reg_pc(reg_a1, i, 8); \
|
||||
generate_function_call(execute_store_u32_safe) \
|
||||
generate_store_call_u32_safe() \
|
||||
|
||||
#define arm_block_memory_final_load() \
|
||||
arm_block_memory_load() \
|
||||
|
||||
#define arm_block_memory_final_store() \
|
||||
arm_generate_load_reg_pc(reg_a1, i, 12); \
|
||||
generate_function_call(execute_store_u32); \
|
||||
generate_store_call_u32(); \
|
||||
write32((pc + 4)) \
|
||||
|
||||
#define arm_block_memory_adjust_pc_store() \
|
||||
|
@ -1482,13 +1510,13 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
|
|||
arm_decode_swap(); \
|
||||
cycle_count += 3; \
|
||||
arm_generate_load_reg(reg_a0, rn); \
|
||||
generate_function_call(execute_load_##type); \
|
||||
generate_load_call_##type(); \
|
||||
write32((pc + 8)); \
|
||||
generate_mov(reg_a2, reg_rv); \
|
||||
arm_generate_load_reg(reg_a0, rn); \
|
||||
arm_generate_load_reg(reg_a1, rm); \
|
||||
arm_generate_store_reg(reg_a2, rd); \
|
||||
generate_function_call(execute_store_##type); \
|
||||
generate_store_call_##type(); \
|
||||
write32((pc + 4)); \
|
||||
} \
|
||||
|
||||
|
@ -1651,14 +1679,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
|
|||
|
||||
#define thumb_access_memory_load(mem_type, _rd) \
|
||||
cycle_count += 2; \
|
||||
generate_function_call(execute_load_##mem_type); \
|
||||
generate_load_call_##mem_type(); \
|
||||
write32((pc + 4)); \
|
||||
thumb_generate_store_reg(reg_rv, _rd) \
|
||||
|
||||
#define thumb_access_memory_store(mem_type, _rd) \
|
||||
cycle_count++; \
|
||||
thumb_generate_load_reg(reg_a1, _rd); \
|
||||
generate_function_call(execute_store_##mem_type); \
|
||||
generate_store_call_##mem_type(); \
|
||||
write32((pc + 2)) \
|
||||
|
||||
#define thumb_access_memory_generate_address_pc_relative(offset, _rb, _ro) \
|
||||
|
@ -1727,7 +1755,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
|
|||
#define thumb_block_memory_extra_pop_pc() \
|
||||
thumb_generate_load_reg(reg_s0, REG_SAVE); \
|
||||
generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \
|
||||
generate_function_call(execute_load_u32); \
|
||||
generate_load_call_u32(); \
|
||||
write32((pc + 4)); \
|
||||
generate_indirect_branch_cycle_update(thumb) \
|
||||
|
||||
|
@ -1735,23 +1763,23 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
|
|||
thumb_generate_load_reg(reg_s0, REG_SAVE); \
|
||||
generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \
|
||||
thumb_generate_load_reg(reg_a1, REG_LR); \
|
||||
generate_function_call(execute_store_u32_safe) \
|
||||
generate_store_call_u32_safe()
|
||||
|
||||
#define thumb_block_memory_load() \
|
||||
generate_function_call(execute_load_u32); \
|
||||
generate_load_call_u32(); \
|
||||
write32((pc + 4)); \
|
||||
thumb_generate_store_reg(reg_rv, i) \
|
||||
|
||||
#define thumb_block_memory_store() \
|
||||
thumb_generate_load_reg(reg_a1, i); \
|
||||
generate_function_call(execute_store_u32_safe) \
|
||||
generate_store_call_u32_safe()
|
||||
|
||||
#define thumb_block_memory_final_load() \
|
||||
thumb_block_memory_load() \
|
||||
|
||||
#define thumb_block_memory_final_store() \
|
||||
thumb_generate_load_reg(reg_a1, i); \
|
||||
generate_function_call(execute_store_u32); \
|
||||
generate_store_call_u32(); \
|
||||
write32((pc + 2)) \
|
||||
|
||||
#define thumb_block_memory_final_no(access_type) \
|
||||
|
|
|
@ -94,12 +94,7 @@ _##symbol:
|
|||
ldr reg_x5, [reg_base, #REG_R14] ;\
|
||||
|
||||
#define load_registers_thumb() ;\
|
||||
ldr reg_x0, [reg_base, #REG_R0] ;\
|
||||
ldr reg_x1, [reg_base, #REG_R1] ;\
|
||||
ldr reg_x2, [reg_base, #REG_R2] ;\
|
||||
ldr reg_x3, [reg_base, #REG_R3] ;\
|
||||
ldr reg_x4, [reg_base, #REG_R4] ;\
|
||||
ldr reg_x5, [reg_base, #REG_R5] ;\
|
||||
ldm reg_base, {reg_x0, reg_x1, reg_x2, reg_x3, reg_x4, reg_x5}
|
||||
|
||||
|
||||
@ Will store the register set from cached registers back to memory.
|
||||
|
@ -113,12 +108,7 @@ _##symbol:
|
|||
str reg_x5, [reg_base, #REG_R14] ;\
|
||||
|
||||
#define store_registers_thumb() ;\
|
||||
str reg_x0, [reg_base, #REG_R0] ;\
|
||||
str reg_x1, [reg_base, #REG_R1] ;\
|
||||
str reg_x2, [reg_base, #REG_R2] ;\
|
||||
str reg_x3, [reg_base, #REG_R3] ;\
|
||||
str reg_x4, [reg_base, #REG_R4] ;\
|
||||
str reg_x5, [reg_base, #REG_R5] ;\
|
||||
stm reg_base, {reg_x0, reg_x1, reg_x2, reg_x3, reg_x4, reg_x5}
|
||||
|
||||
|
||||
@ Returns an updated persistent cpsr with the cached flags register.
|
||||
|
@ -512,12 +502,6 @@ return_to_main:
|
|||
|
||||
#define execute_store_builder(store_type, str_op, str_op16, load_op, tnum) ;\
|
||||
;\
|
||||
defsymbl(execute_store_u##store_type) ;\
|
||||
usat r2, #4, r0, asr #24 /* r2 contains [0-15] */;\
|
||||
add r2, r2, #((STORE_TBL_OFF + 16*4*tnum) >> 2) /* add table offset */;\
|
||||
ldr pc, [reg_base, r2, lsl #2] /* load handler addr */;\
|
||||
nop ;\
|
||||
;\
|
||||
ext_store_u##store_type: ;\
|
||||
save_flags() ;\
|
||||
ldr r2, [lr] /* load PC */;\
|
||||
|
@ -571,7 +555,6 @@ ext_store_oam_ram_u##store_type: ;\
|
|||
ldr r0, [lr] /* load PC */;\
|
||||
str r0, [reg_base, #REG_PC] /* write out PC */;\
|
||||
b smc_write /* perform smc write */;\
|
||||
.size execute_store_u##store_type, .-execute_store_u##store_type
|
||||
|
||||
@ for ignored areas, just return
|
||||
ext_store_ignore:
|
||||
|
@ -601,12 +584,6 @@ execute_store_builder(32, str, str, ldr, 2)
|
|||
|
||||
@ This is a store that is executed in a strm case (so no SMC checks in-between)
|
||||
|
||||
defsymbl(execute_store_u32_safe)
|
||||
usat r2, #4, r0, asr #24
|
||||
add r2, r2, #((STORE_TBL_OFF + 16*4*3) >> 2)
|
||||
ldr pc, [reg_base, r2, lsl #2]
|
||||
nop
|
||||
|
||||
ext_store_u32_safe:
|
||||
str lr, [reg_base, #REG_SAVE3] @ Restore lr
|
||||
save_flags()
|
||||
|
@ -642,7 +619,6 @@ ext_store_oam_ram_u32_safe:
|
|||
str r1, [r0, r2] @ store data
|
||||
str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here
|
||||
bx lr @ Return
|
||||
.size execute_store_u32_safe, .-execute_store_u32_safe
|
||||
|
||||
|
||||
write_epilogue:
|
||||
|
@ -744,17 +720,6 @@ lookup_pc_arm:
|
|||
|
||||
#define execute_load_builder(load_type, albits, load_function, tnum) ;\
|
||||
;\
|
||||
defsymbl(execute_load_##load_type) ;\
|
||||
.if albits >= 1 ;\
|
||||
ror r1, r0, #(albits) /* move alignment bits to MSB */;\
|
||||
usat r1, #4, r1, asr #(24-albits) /* r1 contains [0-15] */;\
|
||||
.else ;\
|
||||
usat r1, #4, r0, asr #24 /* r1 contains [0-15] */;\
|
||||
.endif ;\
|
||||
add r1, r1, #((STORE_TBL_OFF + 16*4*tnum) >> 2) /* add table offset */;\
|
||||
ldr pc, [reg_base, r1, lsl #2] /* load handler addr */;\
|
||||
nop ;\
|
||||
;\
|
||||
ld_bios_##load_type: /* BIOS area, need to verify PC */;\
|
||||
save_flags() ;\
|
||||
ldr r1, [lr] /* r1 = PC */;\
|
||||
|
@ -809,8 +774,7 @@ ld_slow_##load_type: ;\
|
|||
call_c_function(load_function) ;\
|
||||
restore_flags() ;\
|
||||
add pc, lr, #4 /* return */;\
|
||||
;\
|
||||
.size execute_load_##load_type, .-execute_load_##load_type
|
||||
|
||||
|
||||
#define load_table_gen(load_type) ;\
|
||||
.long ld_bios_##load_type /* 0 BIOS */;\
|
||||
|
|
Loading…
Reference in New Issue