[ARM] Rewrite HLE emulation for div, make it faster and simpler.
Moves the handlers to the cache, removes C usage.
This commit is contained in:
		
							parent
							
								
									b65df123f8
								
							
						
					
					
						commit
						f65c3939b5
					
				
					 3 changed files with 89 additions and 51 deletions
				
			
		| 
						 | 
				
			
			@ -249,6 +249,9 @@ typedef enum {
 | 
			
		|||
  ARMOP_MUL   = 0x0, /* Rd := Rm*Rs */
 | 
			
		||||
  ARMOP_MLA   = 0x1, /* Rd := (Rm*Rs)+Rn */
 | 
			
		||||
 | 
			
		||||
  /* ARM7+ */
 | 
			
		||||
  ARMOP_MLS   = 0x3, /* Rd := Rn-(Rm*Rs) */
 | 
			
		||||
 | 
			
		||||
  /* ARM3M+ */
 | 
			
		||||
  ARMOP_UMULL = 0x4,
 | 
			
		||||
  ARMOP_UMLAL = 0x5,
 | 
			
		||||
| 
						 | 
				
			
			@ -648,6 +651,11 @@ typedef struct {
 | 
			
		|||
#define ARM_IASM_MLAS(rd, rm, rs, rn) \
 | 
			
		||||
  ARM_IASM_MLAS_COND(rd, rm, rs, rn, ARMCOND_AL)
 | 
			
		||||
 | 
			
		||||
/* Rd := Rn - (Rm * Rs); 32x32+32->32 */
 | 
			
		||||
#define ARM_MLS_COND(p, rd, rm, rs, rn, cond) \
 | 
			
		||||
  ARM_EMIT(p, ARM_DEF_MUL_COND(ARMOP_MLS, rd, rm, rs, rn, 0, cond))
 | 
			
		||||
#define ARM_MLS(p, rd, rm, rs, rn) \
 | 
			
		||||
  ARM_MLS_COND(p, rd, rm, rs, rn, ARMCOND_AL)
 | 
			
		||||
 | 
			
		||||
#define ARM_SMULL_COND(p, rn, rd, rm, rs, cond) \
 | 
			
		||||
  ARM_EMIT(p, ARM_DEF_MUL_COND(ARMOP_SMULL, rd, rm, rs, rn, 0, cond))
 | 
			
		||||
| 
						 | 
				
			
			@ -1405,5 +1413,18 @@ typedef union {
 | 
			
		|||
  arminstr_t      raw;
 | 
			
		||||
} ARMInstr;
 | 
			
		||||
 | 
			
		||||
/* ARMv7VE and others */
 | 
			
		||||
 | 
			
		||||
#define ARM_SDIV_COND(p, rd, rm, rn, cond) \
 | 
			
		||||
  ARM_DEF_DPI_REG_REGSHIFT_COND(rm, 0, rn, 31, rd, 1, 0x38, cond)
 | 
			
		||||
#define ARM_SDIV(p, rd, rm, rn) \
 | 
			
		||||
  ARM_SDIV_COND(p, rd, rm, rn, ARMCOND_AL)
 | 
			
		||||
 | 
			
		||||
#define ARM_UDIV_COND(p, rd, rm, rn, cond) \
 | 
			
		||||
  ARM_DEF_DPI_REG_REGSHIFT_COND(rm, 0, rn, 31, rd, 1, 0x39, cond)
 | 
			
		||||
#define ARM_UDIV(p, rd, rm, rn) \
 | 
			
		||||
  ARM_UDIV_COND(p, rd, rm, rn, ARMCOND_AL)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#endif /* ARM_CG_H */
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										100
									
								
								arm/arm_emit.h
									
										
									
									
									
								
							
							
						
						
									
										100
									
								
								arm/arm_emit.h
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -1924,40 +1924,14 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
 | 
			
		|||
  /* We're in ARM mode now */                                                 \
 | 
			
		||||
  generate_branch(arm)                                                        \
 | 
			
		||||
 | 
			
		||||
void execute_swi_hle_div_arm(void);
 | 
			
		||||
void execute_swi_hle_div_thumb(void);
 | 
			
		||||
void execute_swi_hle_divarm_arm(void);
 | 
			
		||||
void execute_swi_hle_divarm_thumb(void);
 | 
			
		||||
 | 
			
		||||
void execute_swi_hle_div_c(void)
 | 
			
		||||
{
 | 
			
		||||
   /* real BIOS supposedly locks up, but game can recover on interrupt */
 | 
			
		||||
   if (reg[1] == 0)
 | 
			
		||||
      return;
 | 
			
		||||
   s32 result = (s32)reg[0] / (s32)reg[1];
 | 
			
		||||
   reg[1] = (s32)reg[0] % (s32)reg[1];
 | 
			
		||||
   reg[0] = result;
 | 
			
		||||
 | 
			
		||||
   reg[3] = (result ^ (result >> 31)) - (result >> 31);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void execute_swi_hle_divarm_c(void)
 | 
			
		||||
{
 | 
			
		||||
   /* real BIOS supposedly locks up, but game can recover on interrupt */
 | 
			
		||||
   if (reg[0] == 0)
 | 
			
		||||
      return;
 | 
			
		||||
   s32 result = (s32)reg[1] / (s32)reg[0];
 | 
			
		||||
   reg[1] = (s32)reg[1] % (s32)reg[0];
 | 
			
		||||
   reg[0] = result;
 | 
			
		||||
 | 
			
		||||
   reg[3] = (result ^ (result >> 31)) - (result >> 31);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Use software division
 | 
			
		||||
void *div6, *divarm7;
 | 
			
		||||
#define arm_hle_div(cpu_mode)                                                 \
 | 
			
		||||
  generate_function_call(execute_swi_hle_div_##cpu_mode);
 | 
			
		||||
 | 
			
		||||
  cycle_count += 11 + 32;                                                     \
 | 
			
		||||
  generate_function_call(div6);
 | 
			
		||||
#define arm_hle_div_arm(cpu_mode)                                             \
 | 
			
		||||
  generate_function_call(execute_swi_hle_divarm_##cpu_mode);
 | 
			
		||||
  cycle_count += 14 + 32;                                                     \
 | 
			
		||||
  generate_function_call(divarm7);
 | 
			
		||||
 | 
			
		||||
#define generate_translation_gate(type)                                       \
 | 
			
		||||
  generate_update_pc(pc);                                                     \
 | 
			
		||||
| 
						 | 
				
			
			@ -1968,9 +1942,71 @@ extern u32 ldst_handler_functions[9][17];
 | 
			
		|||
extern u32 ldst_lookup_tables[9][17];
 | 
			
		||||
 | 
			
		||||
void init_emitter(void) {
 | 
			
		||||
  int i;
 | 
			
		||||
 | 
			
		||||
  // Generate handler table
 | 
			
		||||
  memcpy(ldst_lookup_tables, ldst_handler_functions, sizeof(ldst_lookup_tables));
 | 
			
		||||
 | 
			
		||||
  rom_cache_watermark = 0;
 | 
			
		||||
  u8 *translation_ptr = (u8*)&rom_translation_cache[0];
 | 
			
		||||
 | 
			
		||||
  // Generate ARMv5+ division code, uses a mix of libgcc and some open bioses.
 | 
			
		||||
  // This is meant for ARMv5 or higher, uses CLZ
 | 
			
		||||
 | 
			
		||||
  // Invert operands for SWI 7 (divarm)
 | 
			
		||||
  divarm7 = translation_ptr;
 | 
			
		||||
  ARM_MOV_REG_REG(0, reg_a2, reg_x0);
 | 
			
		||||
  ARM_MOV_REG_REG(0, reg_x0, reg_x1);
 | 
			
		||||
  ARM_MOV_REG_REG(0, reg_x1, reg_a2);
 | 
			
		||||
 | 
			
		||||
  div6 = translation_ptr;
 | 
			
		||||
  // Save flags before using them
 | 
			
		||||
  generate_save_flags();
 | 
			
		||||
  // Stores result and remainder signs 
 | 
			
		||||
  ARM_ANDS_REG_IMM(0, reg_a2, reg_x1, 0x80, arm_imm_lsl_to_rot(24));
 | 
			
		||||
  ARM_EOR_REG_IMMSHIFT(0, reg_a2, reg_a2, reg_x0, ARMSHIFT_ASR, 1);
 | 
			
		||||
 | 
			
		||||
  // Make numbers positive if they are negative
 | 
			
		||||
  ARM_RSB_REG_IMM_COND(0, reg_x1, reg_x1, 0, 0, ARMCOND_MI);
 | 
			
		||||
  ARM_TST_REG_REG(0, reg_x0, reg_x0);
 | 
			
		||||
  ARM_RSB_REG_IMM_COND(0, reg_x0, reg_x0, 0, 0, ARMCOND_MI);
 | 
			
		||||
 | 
			
		||||
  // Calculates the number of iterations to division, and jumps to unrolled code
 | 
			
		||||
  ARM_CLZ(0, reg_a0, reg_x0);
 | 
			
		||||
  ARM_CLZ(0, reg_a1, reg_x1);
 | 
			
		||||
  ARM_SUBS_REG_REG(0, reg_a0, reg_a1, reg_a0);          // Align and check if a<b
 | 
			
		||||
  ARM_RSB_REG_IMM(0, reg_a0, reg_a0, 31, 0);
 | 
			
		||||
  ARM_MOV_REG_IMM_COND(0, reg_a0, 32, 0, ARMCOND_MI);   // Cap to 32 (skip division)
 | 
			
		||||
  ARM_ADD_REG_IMMSHIFT(0, reg_a0, reg_a0, reg_a0, ARMSHIFT_LSL, 1);
 | 
			
		||||
  ARM_MOV_REG_IMM(0, reg_a1, 0, 0);
 | 
			
		||||
  ARM_ADD_REG_IMMSHIFT(0, ARMREG_PC, ARMREG_PC, reg_a0, ARMSHIFT_LSL, 2);
 | 
			
		||||
  ARM_NOP(0);
 | 
			
		||||
 | 
			
		||||
  for (i = 31; i >= 0; i--) {
 | 
			
		||||
    ARM_CMP_REG_IMMSHIFT(0, reg_x0, reg_x1, ARMSHIFT_LSL, i);
 | 
			
		||||
    ARM_ADC_REG_REG(0, reg_a1, reg_a1, reg_a1);
 | 
			
		||||
    ARM_SUB_REG_IMMSHIFT_COND(0, reg_x0, reg_x0, reg_x1, ARMSHIFT_LSL, i, ARMCOND_HS);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  ARM_MOV_REG_REG(0, reg_x1, reg_x0);
 | 
			
		||||
  ARM_MOV_REG_REG(0, reg_x0, reg_a1);
 | 
			
		||||
  // Negate result if sign is negative
 | 
			
		||||
  ARM_SHLS_IMM(0, reg_a2, reg_a2, 1);
 | 
			
		||||
  ARM_RSB_REG_IMM_COND(0, reg_x0, reg_x0, 0, 0, ARMCOND_HS);
 | 
			
		||||
  ARM_RSB_REG_IMM_COND(0, reg_x1, reg_x1, 0, 0, ARMCOND_MI);
 | 
			
		||||
 | 
			
		||||
  // Register R3 stores the abs(r0/r1), store it in the right reg/mem-reg
 | 
			
		||||
  arm_generate_load_reg(reg_a2, REG_CPSR);
 | 
			
		||||
  ARM_TST_REG_IMM8(0, reg_a2, 0x20);
 | 
			
		||||
  arm_generate_store_reg(reg_a1, 3 /* r3 */);
 | 
			
		||||
  ARM_MOV_REG_REG_COND(0, reg_x3, reg_a1, ARMCOND_NE);
 | 
			
		||||
 | 
			
		||||
  // Return and continue regular emulation
 | 
			
		||||
  generate_restore_flags();
 | 
			
		||||
  ARM_BX(0, ARMREG_LR);
 | 
			
		||||
 | 
			
		||||
  // Now generate BIOS hooks
 | 
			
		||||
  rom_cache_watermark = (u32)(translation_ptr - rom_translation_cache);
 | 
			
		||||
  init_bios_hooks();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -397,25 +397,6 @@ execute_swi_builder(arm)
 | 
			
		|||
execute_swi_builder(thumb)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ Wrapper for calling SWI functions in C (or can implement some in ASM if
 | 
			
		||||
@ desired)
 | 
			
		||||
 | 
			
		||||
#define execute_swi_function_builder(swi_function, mode)                     ;\
 | 
			
		||||
                                                                             ;\
 | 
			
		||||
defsymbl(execute_swi_hle_##swi_function##_##mode)                            ;\
 | 
			
		||||
  save_flags()                                                               ;\
 | 
			
		||||
  store_registers_##mode()                                                   ;\
 | 
			
		||||
  call_c_function(execute_swi_hle_##swi_function##_c)                        ;\
 | 
			
		||||
  load_registers_##mode()                                                    ;\
 | 
			
		||||
  restore_flags()                                                            ;\
 | 
			
		||||
  bx lr                                                                      ;\
 | 
			
		||||
 | 
			
		||||
execute_swi_function_builder(div, arm)
 | 
			
		||||
execute_swi_function_builder(div, thumb)
 | 
			
		||||
execute_swi_function_builder(divarm, arm)
 | 
			
		||||
execute_swi_function_builder(divarm, thumb)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ Start program execution. Normally the mode should be Thumb and the
 | 
			
		||||
@ PC should be 0x8000000, however if a save state is preloaded this
 | 
			
		||||
@ will be different.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		
		Reference in a new issue