diff --git a/arm/arm_codegen.h b/arm/arm_codegen.h index 3b25bac..4f805f9 100644 --- a/arm/arm_codegen.h +++ b/arm/arm_codegen.h @@ -1378,6 +1378,12 @@ typedef struct { (1 << 25) | \ ARM_DEF_COND(cond) +#define ARM_USAT_ASR(p, rd, sat, rm, sa, cond) \ + ARM_EMIT(p, ARM_DEF_DPI_REG_IMMSHIFT_COND((rm) | 0x10, 2, sa, rd, sat, 0, 0x37, cond)) + +#define ARM_USAT_LSL(p, rd, sat, rm, sa, cond) \ + ARM_EMIT(p, ARM_DEF_DPI_REG_IMMSHIFT_COND((rm) | 0x10, 0, sa, rd, sat, 0, 0x37, cond)) + typedef union { ARMInstrBR br; diff --git a/arm/arm_emit.h b/arm/arm_emit.h index 1e55675..b1ed88c 100644 --- a/arm/arm_emit.h +++ b/arm/arm_emit.h @@ -51,8 +51,6 @@ u32 execute_spsr_restore(u32 address); void execute_swi_arm(u32 pc); void execute_swi_thumb(u32 pc); -void execute_store_u32_safe(u32 address, u32 source); - #define STORE_TBL_OFF 0x1DC #define SPSR_RAM_OFF 0x100 @@ -1271,16 +1269,46 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) * (same with the stores) */ +#define generate_load_call_byte(tblnum) \ + ARM_USAT_ASR(0, reg_a1, 4, reg_a0, 24, ARMCOND_AL); \ + generate_add_imm(reg_a1, (STORE_TBL_OFF + 64*tblnum) >> 2, 0); \ + ARM_LDR_REG_REG_SHIFT(0, reg_a1, reg_base, reg_a1, 0, 2); \ + ARM_BLX(0, reg_a1); \ + +#define generate_load_call_mbyte(tblnum, abits) \ + ARM_MOV_REG_IMMSHIFT(0, reg_a1, reg_a0, ARMSHIFT_ROR, abits) \ + ARM_USAT_ASR(0, reg_a1, 4, reg_a1, 24-abits, ARMCOND_AL); \ + generate_add_imm(reg_a1, (STORE_TBL_OFF + 64*tblnum) >> 2, 0); \ + ARM_LDR_REG_REG_SHIFT(0, reg_a1, reg_base, reg_a1, 0, 2); \ + ARM_BLX(0, reg_a1); \ + +#define generate_store_call(tblnum) \ + ARM_USAT_ASR(0, reg_a2, 4, reg_a0, 24, ARMCOND_AL); \ + generate_add_imm(reg_a2, (STORE_TBL_OFF + 64*tblnum) >> 2, 0); \ + ARM_LDR_REG_REG_SHIFT(0, reg_a2, reg_base, reg_a2, 0, 2); \ + ARM_BLX(0, reg_a2); \ + +#define generate_store_call_u8() generate_store_call(0) +#define generate_store_call_u16() generate_store_call(1) +#define generate_store_call_u32() generate_store_call(2) +#define generate_store_call_u32_safe() generate_store_call(3) +#define generate_load_call_u8() generate_load_call_byte(4) +#define generate_load_call_s8() generate_load_call_byte(5) +#define generate_load_call_u16() generate_load_call_mbyte(6, 1) +#define generate_load_call_s16() generate_load_call_mbyte(7, 1) +#define generate_load_call_u32() generate_load_call_mbyte(8, 2) + + #define arm_access_memory_load(mem_type) \ cycle_count += 2; \ - generate_function_call(execute_load_##mem_type); \ + generate_load_call_##mem_type(); \ write32((pc + 8)); \ arm_generate_store_reg_pc_no_flags(reg_rv, rd) \ #define arm_access_memory_store(mem_type) \ cycle_count++; \ arm_generate_load_reg_pc(reg_a1, rd, 12); \ - generate_function_call(execute_store_##mem_type); \ + generate_store_call_##mem_type(); \ write32((pc + 4)) \ /* Calculate the address into a0 from _rn, _rm */ @@ -1384,20 +1412,20 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) /* TODO: Make these use cached registers. Implement iwram_stack_optimize. */ #define arm_block_memory_load() \ - generate_function_call(execute_load_u32); \ + generate_load_call_u32(); \ write32((pc + 8)); \ arm_generate_store_reg(reg_rv, i) \ #define arm_block_memory_store() \ arm_generate_load_reg_pc(reg_a1, i, 8); \ - generate_function_call(execute_store_u32_safe) \ + generate_store_call_u32_safe() \ #define arm_block_memory_final_load() \ arm_block_memory_load() \ #define arm_block_memory_final_store() \ arm_generate_load_reg_pc(reg_a1, i, 12); \ - generate_function_call(execute_store_u32); \ + generate_store_call_u32(); \ write32((pc + 4)) \ #define arm_block_memory_adjust_pc_store() \ @@ -1482,13 +1510,13 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) arm_decode_swap(); \ cycle_count += 3; \ arm_generate_load_reg(reg_a0, rn); \ - generate_function_call(execute_load_##type); \ + generate_load_call_##type(); \ write32((pc + 8)); \ generate_mov(reg_a2, reg_rv); \ arm_generate_load_reg(reg_a0, rn); \ arm_generate_load_reg(reg_a1, rm); \ arm_generate_store_reg(reg_a2, rd); \ - generate_function_call(execute_store_##type); \ + generate_store_call_##type(); \ write32((pc + 4)); \ } \ @@ -1651,14 +1679,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) #define thumb_access_memory_load(mem_type, _rd) \ cycle_count += 2; \ - generate_function_call(execute_load_##mem_type); \ + generate_load_call_##mem_type(); \ write32((pc + 4)); \ thumb_generate_store_reg(reg_rv, _rd) \ #define thumb_access_memory_store(mem_type, _rd) \ cycle_count++; \ thumb_generate_load_reg(reg_a1, _rd); \ - generate_function_call(execute_store_##mem_type); \ + generate_store_call_##mem_type(); \ write32((pc + 2)) \ #define thumb_access_memory_generate_address_pc_relative(offset, _rb, _ro) \ @@ -1727,7 +1755,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) #define thumb_block_memory_extra_pop_pc() \ thumb_generate_load_reg(reg_s0, REG_SAVE); \ generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \ - generate_function_call(execute_load_u32); \ + generate_load_call_u32(); \ write32((pc + 4)); \ generate_indirect_branch_cycle_update(thumb) \ @@ -1735,23 +1763,23 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) thumb_generate_load_reg(reg_s0, REG_SAVE); \ generate_add_reg_reg_imm(reg_a0, reg_s0, (bit_count[reg_list] * 4), 0); \ thumb_generate_load_reg(reg_a1, REG_LR); \ - generate_function_call(execute_store_u32_safe) \ + generate_store_call_u32_safe() #define thumb_block_memory_load() \ - generate_function_call(execute_load_u32); \ + generate_load_call_u32(); \ write32((pc + 4)); \ thumb_generate_store_reg(reg_rv, i) \ #define thumb_block_memory_store() \ thumb_generate_load_reg(reg_a1, i); \ - generate_function_call(execute_store_u32_safe) \ + generate_store_call_u32_safe() #define thumb_block_memory_final_load() \ thumb_block_memory_load() \ #define thumb_block_memory_final_store() \ thumb_generate_load_reg(reg_a1, i); \ - generate_function_call(execute_store_u32); \ + generate_store_call_u32(); \ write32((pc + 2)) \ #define thumb_block_memory_final_no(access_type) \ diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 4cd2cea..7ed0d9c 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -94,12 +94,7 @@ _##symbol: ldr reg_x5, [reg_base, #REG_R14] ;\ #define load_registers_thumb() ;\ - ldr reg_x0, [reg_base, #REG_R0] ;\ - ldr reg_x1, [reg_base, #REG_R1] ;\ - ldr reg_x2, [reg_base, #REG_R2] ;\ - ldr reg_x3, [reg_base, #REG_R3] ;\ - ldr reg_x4, [reg_base, #REG_R4] ;\ - ldr reg_x5, [reg_base, #REG_R5] ;\ + ldm reg_base, {reg_x0, reg_x1, reg_x2, reg_x3, reg_x4, reg_x5} @ Will store the register set from cached registers back to memory. @@ -113,12 +108,7 @@ _##symbol: str reg_x5, [reg_base, #REG_R14] ;\ #define store_registers_thumb() ;\ - str reg_x0, [reg_base, #REG_R0] ;\ - str reg_x1, [reg_base, #REG_R1] ;\ - str reg_x2, [reg_base, #REG_R2] ;\ - str reg_x3, [reg_base, #REG_R3] ;\ - str reg_x4, [reg_base, #REG_R4] ;\ - str reg_x5, [reg_base, #REG_R5] ;\ + stm reg_base, {reg_x0, reg_x1, reg_x2, reg_x3, reg_x4, reg_x5} @ Returns an updated persistent cpsr with the cached flags register. @@ -512,12 +502,6 @@ return_to_main: #define execute_store_builder(store_type, str_op, str_op16, load_op, tnum) ;\ ;\ -defsymbl(execute_store_u##store_type) ;\ - usat r2, #4, r0, asr #24 /* r2 contains [0-15] */;\ - add r2, r2, #((STORE_TBL_OFF + 16*4*tnum) >> 2) /* add table offset */;\ - ldr pc, [reg_base, r2, lsl #2] /* load handler addr */;\ - nop ;\ - ;\ ext_store_u##store_type: ;\ save_flags() ;\ ldr r2, [lr] /* load PC */;\ @@ -571,7 +555,6 @@ ext_store_oam_ram_u##store_type: ;\ ldr r0, [lr] /* load PC */;\ str r0, [reg_base, #REG_PC] /* write out PC */;\ b smc_write /* perform smc write */;\ -.size execute_store_u##store_type, .-execute_store_u##store_type @ for ignored areas, just return ext_store_ignore: @@ -601,12 +584,6 @@ execute_store_builder(32, str, str, ldr, 2) @ This is a store that is executed in a strm case (so no SMC checks in-between) -defsymbl(execute_store_u32_safe) - usat r2, #4, r0, asr #24 - add r2, r2, #((STORE_TBL_OFF + 16*4*3) >> 2) - ldr pc, [reg_base, r2, lsl #2] - nop - ext_store_u32_safe: str lr, [reg_base, #REG_SAVE3] @ Restore lr save_flags() @@ -642,7 +619,6 @@ ext_store_oam_ram_u32_safe: str r1, [r0, r2] @ store data str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here bx lr @ Return -.size execute_store_u32_safe, .-execute_store_u32_safe write_epilogue: @@ -744,17 +720,6 @@ lookup_pc_arm: #define execute_load_builder(load_type, albits, load_function, tnum) ;\ ;\ -defsymbl(execute_load_##load_type) ;\ -.if albits >= 1 ;\ - ror r1, r0, #(albits) /* move alignment bits to MSB */;\ - usat r1, #4, r1, asr #(24-albits) /* r1 contains [0-15] */;\ -.else ;\ - usat r1, #4, r0, asr #24 /* r1 contains [0-15] */;\ -.endif ;\ - add r1, r1, #((STORE_TBL_OFF + 16*4*tnum) >> 2) /* add table offset */;\ - ldr pc, [reg_base, r1, lsl #2] /* load handler addr */;\ - nop ;\ - ;\ ld_bios_##load_type: /* BIOS area, need to verify PC */;\ save_flags() ;\ ldr r1, [lr] /* r1 = PC */;\ @@ -809,8 +774,7 @@ ld_slow_##load_type: ;\ call_c_function(load_function) ;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ - ;\ -.size execute_load_##load_type, .-execute_load_##load_type + #define load_table_gen(load_type) ;\ .long ld_bios_##load_type /* 0 BIOS */;\