Improve SWI codepaths and implement div&divarm natively

This commit is contained in:
David Guillen Fandos 2021-09-03 01:01:37 +02:00
parent e0708b1dcf
commit f51ed9de13
6 changed files with 102 additions and 230 deletions

View File

@ -1851,7 +1851,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
generate_indirect_branch_dual(); \
#define arm_swi() \
generate_swi_hle_handler((opcode >> 16) & 0xFF, arm); \
generate_function_call(execute_swi_arm); \
write32((pc + 4)); \
generate_branch(arm) \
@ -1888,61 +1887,15 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
generate_function_call(arm_cheat_hook);
#define thumb_swi() \
generate_swi_hle_handler(opcode & 0xFF, thumb); \
generate_function_call(execute_swi_thumb); \
write32((pc + 2)); \
/* We're in ARM mode now */ \
generate_branch(arm) \
u8 swi_hle_handle[256] =
{
0x0, // SWI 0: SoftReset
0x0, // SWI 1: RegisterRAMReset
0x0, // SWI 2: Halt
0x0, // SWI 3: Stop/Sleep
0x0, // SWI 4: IntrWait
0x0, // SWI 5: VBlankIntrWait
0x1, // SWI 6: Div
0x0, // SWI 7: DivArm
0x0, // SWI 8: Sqrt
0x0, // SWI 9: ArcTan
0x0, // SWI A: ArcTan2
0x0, // SWI B: CpuSet
0x0, // SWI C: CpuFastSet
0x0, // SWI D: GetBIOSCheckSum
0x0, // SWI E: BgAffineSet
0x0, // SWI F: ObjAffineSet
0x0, // SWI 10: BitUnpack
0x0, // SWI 11: LZ77UnCompWram
0x0, // SWI 12: LZ77UnCompVram
0x0, // SWI 13: HuffUnComp
0x0, // SWI 14: RLUnCompWram
0x0, // SWI 15: RLUnCompVram
0x0, // SWI 16: Diff8bitUnFilterWram
0x0, // SWI 17: Diff8bitUnFilterVram
0x0, // SWI 18: Diff16bitUnFilter
0x0, // SWI 19: SoundBias
0x0, // SWI 1A: SoundDriverInit
0x0, // SWI 1B: SoundDriverMode
0x0, // SWI 1C: SoundDriverMain
0x0, // SWI 1D: SoundDriverVSync
0x0, // SWI 1E: SoundChannelClear
0x0, // SWI 1F: MidiKey2Freq
0x0, // SWI 20: SoundWhatever0
0x0, // SWI 21: SoundWhatever1
0x0, // SWI 22: SoundWhatever2
0x0, // SWI 23: SoundWhatever3
0x0, // SWI 24: SoundWhatever4
0x0, // SWI 25: MultiBoot
0x0, // SWI 26: HardReset
0x0, // SWI 27: CustomHalt
0x0, // SWI 28: SoundDriverVSyncOff
0x0, // SWI 29: SoundDriverVSyncOn
0x0 // SWI 2A: SoundGetJumpList
};
void execute_swi_hle_div_arm(void);
void execute_swi_hle_div_thumb(void);
void execute_swi_hle_divarm_arm(void);
void execute_swi_hle_divarm_thumb(void);
void execute_swi_hle_div_c(void)
{
@ -1956,19 +1909,23 @@ void execute_swi_hle_div_c(void)
reg[3] = (result ^ (result >> 31)) - (result >> 31);
}
#define generate_swi_hle_handler(_swi_number, mode) \
{ \
u32 swi_number = _swi_number; \
if(swi_hle_handle[swi_number]) \
{ \
/* Div */ \
if(swi_number == 0x06) \
{ \
generate_function_call(execute_swi_hle_div_##mode); \
} \
break; \
} \
} \
void execute_swi_hle_divarm_c(void)
{
/* real BIOS supposedly locks up, but game can recover on interrupt */
if (reg[0] == 0)
return;
s32 result = (s32)reg[1] / (s32)reg[0];
reg[1] = (s32)reg[1] % (s32)reg[0];
reg[0] = result;
reg[3] = (result ^ (result >> 31)) - (result >> 31);
}
#define arm_hle_div(cpu_mode) \
generate_function_call(execute_swi_hle_div_##cpu_mode);
#define arm_hle_div_arm(cpu_mode) \
generate_function_call(execute_swi_hle_divarm_##cpu_mode);
#define generate_translation_gate(type) \
generate_update_pc(pc); \
@ -1983,7 +1940,7 @@ void init_emitter(void) {
}
u32 execute_arm_translate_internal(u32 cycles, void *regptr);
u32 function_cc execute_arm_translate(u32 cycles) {
u32 execute_arm_translate(u32 cycles) {
return execute_arm_translate_internal(cycles, &reg[0]);
}

View File

@ -441,6 +441,8 @@ defsymbl(execute_swi_hle_##swi_function##_##mode) ;\
execute_swi_function_builder(div, arm)
execute_swi_function_builder(div, thumb)
execute_swi_function_builder(divarm, arm)
execute_swi_function_builder(divarm, thumb)
@ Start program execution. Normally the mode should be Thumb and the

2
cpu.h
View File

@ -117,7 +117,7 @@ u32 function_cc execute_load_s16(u32 address);
void function_cc execute_store_u8(u32 address, u32 source);
void function_cc execute_store_u16(u32 address, u32 source);
void function_cc execute_store_u32(u32 address, u32 source);
u32 function_cc execute_arm_translate(u32 cycles);
u32 execute_arm_translate(u32 cycles);
void init_translater(void);
unsigned cpu_write_savestate(u8* dst);
bool cpu_read_savestate(const u8 *src);

View File

@ -78,6 +78,9 @@ typedef struct
extern u8 bit_count[256];
// Div (6) and DivArm (7)
#define is_div_swi(swinum) (((swinum) & 0xFE) == 0x06)
#define arm_decode_data_proc_reg(opcode) \
u32 rn = (opcode >> 16) & 0x0F; \
u32 rd = (opcode >> 12) & 0x0F; \
@ -192,9 +195,6 @@ extern u8 bit_count[256];
#define thumb_decode_branch_cond() \
s32 offset = (s8)(opcode & 0xFF) \
#define thumb_decode_swi() \
u32 comment = opcode & 0xFF \
#define thumb_decode_branch() \
u32 offset = opcode & 0x07FF \
@ -1708,8 +1708,16 @@ void translate_icache_sync() {
\
case 0xF0 ... 0xFF: \
{ \
/* SWI comment */ \
arm_swi(); \
u32 swinum = (opcode >> 16) & 0xFF; \
if (swinum == 6) { \
arm_hle_div(arm); \
} \
else if (swinum == 7) { \
arm_hle_div_arm(arm); \
} \
else { \
arm_swi(); \
} \
break; \
} \
} \
@ -2270,8 +2278,16 @@ void translate_icache_sync() {
\
case 0xDF: \
{ \
/* SWI comment */ \
thumb_swi(); \
u32 swinum = opcode & 0xFF; \
if (swinum == 6) { \
arm_hle_div(thumb); \
} \
else if (swinum == 7) { \
arm_hle_div_arm(thumb); \
} \
else { \
thumb_swi(); \
} \
break; \
} \
\
@ -2688,7 +2704,7 @@ block_lookup_address_builder(dual);
((opcode & 0x12FFF10) == 0x12FFF10) || \
((opcode & 0x8108000) == 0x8108000) || \
((opcode >= 0xA000000) && (opcode < 0xF000000)) || \
((opcode > 0xF000000) && (!swi_hle_handle[((opcode >> 16) & 0xFF)]))) \
((opcode > 0xF000000) && (!is_div_swi((opcode >> 16) & 0xFF)))) \
#define arm_opcode_branch \
((opcode & 0xE000000) == 0xA000000) \
@ -2783,7 +2799,7 @@ block_lookup_address_builder(dual);
#define thumb_exit_point \
(((opcode >= 0xD000) && (opcode < 0xDF00)) || \
(((opcode & 0xFF00) == 0xDF00) && \
(!swi_hle_handle[opcode & 0xFF])) || \
(!is_div_swi(opcode & 0xFF))) || \
((opcode >= 0xE000) && (opcode < 0xE800)) || \
((opcode & 0xFF00) == 0x4700) || \
((opcode & 0xFF00) == 0xBD00) || \

View File

@ -2304,7 +2304,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
generate_indirect_branch_dual() \
#define arm_swi() \
generate_swi_hle_handler((opcode >> 16) & 0xFF); \
generate_load_pc(reg_a0, (pc + 4)); \
generate_function_call_swap_delay(execute_swi); \
generate_branch() \
@ -2369,7 +2368,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
#endif
#define thumb_swi() \
generate_swi_hle_handler(opcode & 0xFF); \
generate_load_pc(reg_a0, (pc + 2)); \
generate_function_call_swap_delay(execute_swi); \
generate_branch_cycle_update( \
@ -2377,71 +2375,22 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
block_exits[block_exit_position].branch_target); \
block_exit_position++ \
u8 swi_hle_handle[256] =
{
0x0, // SWI 0: SoftReset
0x0, // SWI 1: RegisterRAMReset
0x0, // SWI 2: Halt
0x0, // SWI 3: Stop/Sleep
0x0, // SWI 4: IntrWait
0x0, // SWI 5: VBlankIntrWait
0x1, // SWI 6: Div
0x0, // SWI 7: DivArm
0x0, // SWI 8: Sqrt
0x0, // SWI 9: ArcTan
0x0, // SWI A: ArcTan2
0x0, // SWI B: CpuSet
0x0, // SWI C: CpuFastSet
0x0, // SWI D: GetBIOSCheckSum
0x0, // SWI E: BgAffineSet
0x0, // SWI F: ObjAffineSet
0x0, // SWI 10: BitUnpack
0x0, // SWI 11: LZ77UnCompWram
0x0, // SWI 12: LZ77UnCompVram
0x0, // SWI 13: HuffUnComp
0x0, // SWI 14: RLUnCompWram
0x0, // SWI 15: RLUnCompVram
0x0, // SWI 16: Diff8bitUnFilterWram
0x0, // SWI 17: Diff8bitUnFilterVram
0x0, // SWI 18: Diff16bitUnFilter
0x0, // SWI 19: SoundBias
0x0, // SWI 1A: SoundDriverInit
0x0, // SWI 1B: SoundDriverMode
0x0, // SWI 1C: SoundDriverMain
0x0, // SWI 1D: SoundDriverVSync
0x0, // SWI 1E: SoundChannelClear
0x0, // SWI 1F: MidiKey2Freq
0x0, // SWI 20: SoundWhatever0
0x0, // SWI 21: SoundWhatever1
0x0, // SWI 22: SoundWhatever2
0x0, // SWI 23: SoundWhatever3
0x0, // SWI 24: SoundWhatever4
0x0, // SWI 25: MultiBoot
0x0, // SWI 26: HardReset
0x0, // SWI 27: CustomHalt
0x0, // SWI 28: SoundDriverVSyncOff
0x0, // SWI 29: SoundDriverVSyncOn
0x0 // SWI 2A: SoundGetJumpList
};
#define arm_hle_div(cpu_mode) \
mips_emit_div(reg_r0, reg_r1); \
mips_emit_mflo(reg_r0); \
mips_emit_mfhi(reg_r1); \
mips_emit_sra(reg_a0, reg_r0, 31); \
mips_emit_xor(reg_r3, reg_r0, reg_a0); \
mips_emit_subu(reg_r3, reg_r3, reg_a0); \
#define arm_hle_div_arm(cpu_mode) \
mips_emit_div(reg_r1, reg_r0); \
mips_emit_mflo(reg_r0); \
mips_emit_mfhi(reg_r1); \
mips_emit_sra(reg_a0, reg_r0, 31); \
mips_emit_xor(reg_r3, reg_r0, reg_a0); \
mips_emit_subu(reg_r3, reg_r3, reg_a0); \
#define generate_swi_hle_handler(_swi_number) \
{ \
u32 swi_number = _swi_number; \
if(swi_hle_handle[swi_number]) \
{ \
/* Div */ \
if(swi_number == 0x06) \
{ \
mips_emit_div(reg_r0, reg_r1); \
mips_emit_mflo(reg_r0); \
mips_emit_mfhi(reg_r1); \
mips_emit_sra(reg_a0, reg_r0, 31); \
mips_emit_xor(reg_r3, reg_r0, reg_a0); \
mips_emit_subu(reg_r3, reg_r3, reg_a0); \
} \
break; \
} \
} \
#define generate_translation_gate(type) \
generate_load_pc(reg_a0, pc); \
@ -3343,7 +3292,7 @@ void init_emitter() {
}
u32 execute_arm_translate_internal(u32 cycles, void *regptr);
u32 function_cc execute_arm_translate(u32 cycles) {
u32 execute_arm_translate(u32 cycles) {
return execute_arm_translate_internal(cycles, &reg[0]);
}

View File

@ -99,10 +99,6 @@ typedef enum
x86_opcode_shr_reg_rm = 0x05D3,
x86_opcode_sar_reg_rm = 0x07D3,
x86_opcode_rcr_reg1 = 0x03D1,
x86_opcode_push_reg = 0x50,
x86_opcode_push_rm = 0xFF,
x86_opcode_push_imm = 0x0668,
x86_opcode_pop_reg = 0x58,
x86_opcode_call_offset = 0xE8,
x86_opcode_ret = 0xC3,
x86_opcode_test_rm_imm = 0x00F7,
@ -126,6 +122,14 @@ typedef enum
x86_opcode_cmp_rm_imm = 0x0781,
x86_opcode_lea_reg_rm = 0x8D,
x86_opcode_j = 0x80,
x86_opcode_cdq = 0x99,
x86_opcode_jmp = 0xE9,
x86_opcode_jmp_reg = 0x04FF,
x86_opcode_ext = 0x0F
} x86_opcodes;
typedef enum
{
x86_opcode_seto = 0x90,
x86_opcode_setc = 0x92,
x86_opcode_setnc = 0x93,
@ -133,10 +137,7 @@ typedef enum
x86_opcode_setnz = 0x95,
x86_opcode_sets = 0x98,
x86_opcode_setns = 0x99,
x86_opcode_jmp = 0xE9,
x86_opcode_jmp_reg = 0x04FF,
x86_opcode_ext = 0x0F
} x86_opcodes;
} x86_ext_opcodes;
typedef enum
{
@ -314,18 +315,8 @@ typedef enum
#define x86_emit_not_reg(srcdst) \
x86_emit_opcode_1b_ext_reg(not_rm, srcdst) \
#define x86_emit_pop_reg(regn) \
x86_emit_opcode_1b(pop_reg, regn) \
#define x86_emit_push_reg(regn) \
x86_emit_opcode_1b(push_reg, regn) \
#define x86_emit_push_mem(base, offset) \
x86_emit_opcode_1b_mem(push_rm, 0x06, base, offset) \
#define x86_emit_push_imm(imm) \
x86_emit_byte(x86_opcode_push_imm); \
x86_emit_dword(imm) \
#define x86_emit_cdq() \
x86_emit_byte(x86_opcode_cdq) \
#define x86_emit_call_offset(relative_offset) \
x86_emit_byte(x86_opcode_call_offset); \
@ -2196,7 +2187,6 @@ static void function_cc execute_swi(u32 pc)
generate_indirect_branch_dual(); \
#define arm_swi() \
generate_swi_hle_handler((opcode >> 16) & 0xFF); \
generate_update_pc((pc + 4)); \
generate_function_call(execute_swi); \
generate_branch() \
@ -2240,7 +2230,6 @@ static void function_cc execute_swi(u32 pc)
generate_function_call(process_cheats);
#define thumb_swi() \
generate_swi_hle_handler(opcode & 0xFF); \
generate_update_pc((pc + 2)); \
generate_function_call(execute_swi); \
generate_branch_cycle_update( \
@ -2248,74 +2237,33 @@ static void function_cc execute_swi(u32 pc)
block_exits[block_exit_position].branch_target); \
block_exit_position++ \
u8 swi_hle_handle[256] =
{
0x0, // SWI 0: SoftReset
0x0, // SWI 1: RegisterRAMReset
0x0, // SWI 2: Halt
0x0, // SWI 3: Stop/Sleep
0x0, // SWI 4: IntrWait
0x0, // SWI 5: VBlankIntrWait
0x1, // SWI 6: Div
0x0, // SWI 7: DivArm
0x0, // SWI 8: Sqrt
0x0, // SWI 9: ArcTan
0x0, // SWI A: ArcTan2
0x0, // SWI B: CpuSet
0x0, // SWI C: CpuFastSet
0x0, // SWI D: GetBIOSCheckSum
0x0, // SWI E: BgAffineSet
0x0, // SWI F: ObjAffineSet
0x0, // SWI 10: BitUnpack
0x0, // SWI 11: LZ77UnCompWram
0x0, // SWI 12: LZ77UnCompVram
0x0, // SWI 13: HuffUnComp
0x0, // SWI 14: RLUnCompWram
0x0, // SWI 15: RLUnCompVram
0x0, // SWI 16: Diff8bitUnFilterWram
0x0, // SWI 17: Diff8bitUnFilterVram
0x0, // SWI 18: Diff16bitUnFilter
0x0, // SWI 19: SoundBias
0x0, // SWI 1A: SoundDriverInit
0x0, // SWI 1B: SoundDriverMode
0x0, // SWI 1C: SoundDriverMain
0x0, // SWI 1D: SoundDriverVSync
0x0, // SWI 1E: SoundChannelClear
0x0, // SWI 1F: MidiKey2Freq
0x0, // SWI 20: SoundWhatever0
0x0, // SWI 21: SoundWhatever1
0x0, // SWI 22: SoundWhatever2
0x0, // SWI 23: SoundWhatever3
0x0, // SWI 24: SoundWhatever4
0x0, // SWI 25: MultiBoot
0x0, // SWI 26: HardReset
0x0, // SWI 27: CustomHalt
0x0, // SWI 28: SoundDriverVSyncOff
0x0, // SWI 29: SoundDriverVSyncOn
0x0 // SWI 2A: SoundGetJumpList
};
#define arm_hle_div(cpu_mode) \
generate_load_reg(a0, 0); \
generate_load_reg(a2, 1); \
x86_emit_cdq(); \
x86_emit_idiv_eax_reg(ecx); \
generate_store_reg(a0, 0); \
generate_store_reg(a1, 1); \
generate_mov(a1, a0); \
generate_shift_right(a1, 31); \
generate_xor(a1, a0); \
generate_shift_right_arithmetic(a0, 31); \
generate_add(a0, a1); \
generate_store_reg(a0, 3); \
void function_cc swi_hle_div(void)
{
s32 result = (s32)reg[0] / (s32)reg[1];
reg[1] = (s32)reg[0] % (s32)reg[1];
reg[0] = result;
reg[3] = (result ^ (result >> 31)) - (result >> 31);
}
#define generate_swi_hle_handler(_swi_number) \
{ \
u32 swi_number = _swi_number; \
if(swi_hle_handle[swi_number]) \
{ \
/* Div */ \
if(swi_number == 0x06) \
{ \
generate_function_call(swi_hle_div); \
} \
break; \
} \
} \
#define arm_hle_div_arm(cpu_mode) \
generate_load_reg(a0, 1); \
generate_load_reg(a2, 0); \
x86_emit_cdq(); \
x86_emit_idiv_eax_reg(ecx); \
generate_store_reg(a0, 0); \
generate_store_reg(a1, 1); \
generate_mov(a1, a0); \
generate_shift_right(a1, 31); \
generate_xor(a1, a0); \
generate_shift_right_arithmetic(a0, 31); \
generate_add(a0, a1); \
generate_store_reg(a0, 3); \
#define generate_translation_gate(type) \
generate_update_pc(pc); \
@ -2330,7 +2278,7 @@ void init_emitter(void) {
u32 function_cc execute_arm_translate_internal(u32 cycles, void *regptr);
u32 function_cc execute_arm_translate(u32 cycles) {
u32 execute_arm_translate(u32 cycles) {
return execute_arm_translate_internal(cycles, &reg[0]);
}