[ARM] Rewrite HLE emulation for div, make it faster and simpler.

Moves the handlers to the cache, removes C usage.
This commit is contained in:
David Guillen Fandos 2021-10-24 18:00:06 +02:00
parent b65df123f8
commit f65c3939b5
3 changed files with 89 additions and 51 deletions

View File

@ -249,6 +249,9 @@ typedef enum {
ARMOP_MUL = 0x0, /* Rd := Rm*Rs */
ARMOP_MLA = 0x1, /* Rd := (Rm*Rs)+Rn */
/* ARM7+ */
ARMOP_MLS = 0x3, /* Rd := Rn-(Rm*Rs) */
/* ARM3M+ */
ARMOP_UMULL = 0x4,
ARMOP_UMLAL = 0x5,
@ -648,6 +651,11 @@ typedef struct {
#define ARM_IASM_MLAS(rd, rm, rs, rn) \
ARM_IASM_MLAS_COND(rd, rm, rs, rn, ARMCOND_AL)
/* Rd := Rn - (Rm * Rs); 32x32+32->32 */
#define ARM_MLS_COND(p, rd, rm, rs, rn, cond) \
ARM_EMIT(p, ARM_DEF_MUL_COND(ARMOP_MLS, rd, rm, rs, rn, 0, cond))
#define ARM_MLS(p, rd, rm, rs, rn) \
ARM_MLS_COND(p, rd, rm, rs, rn, ARMCOND_AL)
#define ARM_SMULL_COND(p, rn, rd, rm, rs, cond) \
ARM_EMIT(p, ARM_DEF_MUL_COND(ARMOP_SMULL, rd, rm, rs, rn, 0, cond))
@ -1405,5 +1413,18 @@ typedef union {
arminstr_t raw;
} ARMInstr;
/* ARMv7VE and others */
#define ARM_SDIV_COND(p, rd, rm, rn, cond) \
ARM_DEF_DPI_REG_REGSHIFT_COND(rm, 0, rn, 31, rd, 1, 0x38, cond)
#define ARM_SDIV(p, rd, rm, rn) \
ARM_SDIV_COND(p, rd, rm, rn, ARMCOND_AL)
#define ARM_UDIV_COND(p, rd, rm, rn, cond) \
ARM_DEF_DPI_REG_REGSHIFT_COND(rm, 0, rn, 31, rd, 1, 0x39, cond)
#define ARM_UDIV(p, rd, rm, rn) \
ARM_UDIV_COND(p, rd, rm, rn, ARMCOND_AL)
#endif /* ARM_CG_H */

View File

@ -1924,40 +1924,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
/* We're in ARM mode now */ \
generate_branch(arm) \
void execute_swi_hle_div_arm(void);
void execute_swi_hle_div_thumb(void);
void execute_swi_hle_divarm_arm(void);
void execute_swi_hle_divarm_thumb(void);
void execute_swi_hle_div_c(void)
{
/* real BIOS supposedly locks up, but game can recover on interrupt */
if (reg[1] == 0)
return;
s32 result = (s32)reg[0] / (s32)reg[1];
reg[1] = (s32)reg[0] % (s32)reg[1];
reg[0] = result;
reg[3] = (result ^ (result >> 31)) - (result >> 31);
}
void execute_swi_hle_divarm_c(void)
{
/* real BIOS supposedly locks up, but game can recover on interrupt */
if (reg[0] == 0)
return;
s32 result = (s32)reg[1] / (s32)reg[0];
reg[1] = (s32)reg[1] % (s32)reg[0];
reg[0] = result;
reg[3] = (result ^ (result >> 31)) - (result >> 31);
}
// Use software division
void *div6, *divarm7;
#define arm_hle_div(cpu_mode) \
generate_function_call(execute_swi_hle_div_##cpu_mode);
cycle_count += 11 + 32; \
generate_function_call(div6);
#define arm_hle_div_arm(cpu_mode) \
generate_function_call(execute_swi_hle_divarm_##cpu_mode);
cycle_count += 14 + 32; \
generate_function_call(divarm7);
#define generate_translation_gate(type) \
generate_update_pc(pc); \
@ -1968,9 +1942,71 @@ extern u32 ldst_handler_functions[9][17];
extern u32 ldst_lookup_tables[9][17];
void init_emitter(void) {
int i;
// Generate handler table
memcpy(ldst_lookup_tables, ldst_handler_functions, sizeof(ldst_lookup_tables));
rom_cache_watermark = 0;
u8 *translation_ptr = (u8*)&rom_translation_cache[0];
// Generate ARMv5+ division code, uses a mix of libgcc and some open bioses.
// This is meant for ARMv5 or higher, uses CLZ
// Invert operands for SWI 7 (divarm)
divarm7 = translation_ptr;
ARM_MOV_REG_REG(0, reg_a2, reg_x0);
ARM_MOV_REG_REG(0, reg_x0, reg_x1);
ARM_MOV_REG_REG(0, reg_x1, reg_a2);
div6 = translation_ptr;
// Save flags before using them
generate_save_flags();
// Stores result and remainder signs
ARM_ANDS_REG_IMM(0, reg_a2, reg_x1, 0x80, arm_imm_lsl_to_rot(24));
ARM_EOR_REG_IMMSHIFT(0, reg_a2, reg_a2, reg_x0, ARMSHIFT_ASR, 1);
// Make numbers positive if they are negative
ARM_RSB_REG_IMM_COND(0, reg_x1, reg_x1, 0, 0, ARMCOND_MI);
ARM_TST_REG_REG(0, reg_x0, reg_x0);
ARM_RSB_REG_IMM_COND(0, reg_x0, reg_x0, 0, 0, ARMCOND_MI);
// Calculates the number of iterations to division, and jumps to unrolled code
ARM_CLZ(0, reg_a0, reg_x0);
ARM_CLZ(0, reg_a1, reg_x1);
ARM_SUBS_REG_REG(0, reg_a0, reg_a1, reg_a0); // Align and check if a<b
ARM_RSB_REG_IMM(0, reg_a0, reg_a0, 31, 0);
ARM_MOV_REG_IMM_COND(0, reg_a0, 32, 0, ARMCOND_MI); // Cap to 32 (skip division)
ARM_ADD_REG_IMMSHIFT(0, reg_a0, reg_a0, reg_a0, ARMSHIFT_LSL, 1);
ARM_MOV_REG_IMM(0, reg_a1, 0, 0);
ARM_ADD_REG_IMMSHIFT(0, ARMREG_PC, ARMREG_PC, reg_a0, ARMSHIFT_LSL, 2);
ARM_NOP(0);
for (i = 31; i >= 0; i--) {
ARM_CMP_REG_IMMSHIFT(0, reg_x0, reg_x1, ARMSHIFT_LSL, i);
ARM_ADC_REG_REG(0, reg_a1, reg_a1, reg_a1);
ARM_SUB_REG_IMMSHIFT_COND(0, reg_x0, reg_x0, reg_x1, ARMSHIFT_LSL, i, ARMCOND_HS);
}
ARM_MOV_REG_REG(0, reg_x1, reg_x0);
ARM_MOV_REG_REG(0, reg_x0, reg_a1);
// Negate result if sign is negative
ARM_SHLS_IMM(0, reg_a2, reg_a2, 1);
ARM_RSB_REG_IMM_COND(0, reg_x0, reg_x0, 0, 0, ARMCOND_HS);
ARM_RSB_REG_IMM_COND(0, reg_x1, reg_x1, 0, 0, ARMCOND_MI);
// Register R3 stores the abs(r0/r1), store it in the right reg/mem-reg
arm_generate_load_reg(reg_a2, REG_CPSR);
ARM_TST_REG_IMM8(0, reg_a2, 0x20);
arm_generate_store_reg(reg_a1, 3 /* r3 */);
ARM_MOV_REG_REG_COND(0, reg_x3, reg_a1, ARMCOND_NE);
// Return and continue regular emulation
generate_restore_flags();
ARM_BX(0, ARMREG_LR);
// Now generate BIOS hooks
rom_cache_watermark = (u32)(translation_ptr - rom_translation_cache);
init_bios_hooks();
}

View File

@ -397,25 +397,6 @@ execute_swi_builder(arm)
execute_swi_builder(thumb)
@ Wrapper for calling SWI functions in C (or can implement some in ASM if
@ desired)
#define execute_swi_function_builder(swi_function, mode) ;\
;\
defsymbl(execute_swi_hle_##swi_function##_##mode) ;\
save_flags() ;\
store_registers_##mode() ;\
call_c_function(execute_swi_hle_##swi_function##_c) ;\
load_registers_##mode() ;\
restore_flags() ;\
bx lr ;\
execute_swi_function_builder(div, arm)
execute_swi_function_builder(div, thumb)
execute_swi_function_builder(divarm, arm)
execute_swi_function_builder(divarm, thumb)
@ Start program execution. Normally the mode should be Thumb and the
@ PC should be 0x8000000, however if a save state is preloaded this
@ will be different.