From f51ed9de13638fccd09b766534097eef643c80f7 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 3 Sep 2021 01:01:37 +0200 Subject: [PATCH] Improve SWI codepaths and implement div&divarm natively --- arm/arm_emit.h | 83 ++++++++----------------------- arm/arm_stub.S | 2 + cpu.h | 2 +- cpu_threaded.c | 34 +++++++++---- psp/mips_emit.h | 83 ++++++------------------------- x86/x86_emit.h | 128 ++++++++++++++---------------------------------- 6 files changed, 102 insertions(+), 230 deletions(-) diff --git a/arm/arm_emit.h b/arm/arm_emit.h index b0e88aa..64878e4 100644 --- a/arm/arm_emit.h +++ b/arm/arm_emit.h @@ -1851,7 +1851,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) generate_indirect_branch_dual(); \ #define arm_swi() \ - generate_swi_hle_handler((opcode >> 16) & 0xFF, arm); \ generate_function_call(execute_swi_arm); \ write32((pc + 4)); \ generate_branch(arm) \ @@ -1888,61 +1887,15 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) generate_function_call(arm_cheat_hook); #define thumb_swi() \ - generate_swi_hle_handler(opcode & 0xFF, thumb); \ generate_function_call(execute_swi_thumb); \ write32((pc + 2)); \ /* We're in ARM mode now */ \ generate_branch(arm) \ -u8 swi_hle_handle[256] = -{ - 0x0, // SWI 0: SoftReset - 0x0, // SWI 1: RegisterRAMReset - 0x0, // SWI 2: Halt - 0x0, // SWI 3: Stop/Sleep - 0x0, // SWI 4: IntrWait - 0x0, // SWI 5: VBlankIntrWait - 0x1, // SWI 6: Div - 0x0, // SWI 7: DivArm - 0x0, // SWI 8: Sqrt - 0x0, // SWI 9: ArcTan - 0x0, // SWI A: ArcTan2 - 0x0, // SWI B: CpuSet - 0x0, // SWI C: CpuFastSet - 0x0, // SWI D: GetBIOSCheckSum - 0x0, // SWI E: BgAffineSet - 0x0, // SWI F: ObjAffineSet - 0x0, // SWI 10: BitUnpack - 0x0, // SWI 11: LZ77UnCompWram - 0x0, // SWI 12: LZ77UnCompVram - 0x0, // SWI 13: HuffUnComp - 0x0, // SWI 14: RLUnCompWram - 0x0, // SWI 15: RLUnCompVram - 0x0, // SWI 16: Diff8bitUnFilterWram - 0x0, // SWI 17: Diff8bitUnFilterVram - 0x0, // SWI 18: Diff16bitUnFilter - 0x0, // SWI 19: SoundBias - 0x0, // SWI 1A: SoundDriverInit - 0x0, // SWI 1B: SoundDriverMode - 0x0, // SWI 1C: SoundDriverMain - 0x0, // SWI 1D: SoundDriverVSync - 0x0, // SWI 1E: SoundChannelClear - 0x0, // SWI 1F: MidiKey2Freq - 0x0, // SWI 20: SoundWhatever0 - 0x0, // SWI 21: SoundWhatever1 - 0x0, // SWI 22: SoundWhatever2 - 0x0, // SWI 23: SoundWhatever3 - 0x0, // SWI 24: SoundWhatever4 - 0x0, // SWI 25: MultiBoot - 0x0, // SWI 26: HardReset - 0x0, // SWI 27: CustomHalt - 0x0, // SWI 28: SoundDriverVSyncOff - 0x0, // SWI 29: SoundDriverVSyncOn - 0x0 // SWI 2A: SoundGetJumpList -}; - void execute_swi_hle_div_arm(void); void execute_swi_hle_div_thumb(void); +void execute_swi_hle_divarm_arm(void); +void execute_swi_hle_divarm_thumb(void); void execute_swi_hle_div_c(void) { @@ -1956,19 +1909,23 @@ void execute_swi_hle_div_c(void) reg[3] = (result ^ (result >> 31)) - (result >> 31); } -#define generate_swi_hle_handler(_swi_number, mode) \ -{ \ - u32 swi_number = _swi_number; \ - if(swi_hle_handle[swi_number]) \ - { \ - /* Div */ \ - if(swi_number == 0x06) \ - { \ - generate_function_call(execute_swi_hle_div_##mode); \ - } \ - break; \ - } \ -} \ +void execute_swi_hle_divarm_c(void) +{ + /* real BIOS supposedly locks up, but game can recover on interrupt */ + if (reg[0] == 0) + return; + s32 result = (s32)reg[1] / (s32)reg[0]; + reg[1] = (s32)reg[1] % (s32)reg[0]; + reg[0] = result; + + reg[3] = (result ^ (result >> 31)) - (result >> 31); +} + +#define arm_hle_div(cpu_mode) \ + generate_function_call(execute_swi_hle_div_##cpu_mode); + +#define arm_hle_div_arm(cpu_mode) \ + generate_function_call(execute_swi_hle_divarm_##cpu_mode); #define generate_translation_gate(type) \ generate_update_pc(pc); \ @@ -1983,7 +1940,7 @@ void init_emitter(void) { } u32 execute_arm_translate_internal(u32 cycles, void *regptr); -u32 function_cc execute_arm_translate(u32 cycles) { +u32 execute_arm_translate(u32 cycles) { return execute_arm_translate_internal(cycles, ®[0]); } diff --git a/arm/arm_stub.S b/arm/arm_stub.S index f5bd867..34fe188 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -441,6 +441,8 @@ defsymbl(execute_swi_hle_##swi_function##_##mode) ;\ execute_swi_function_builder(div, arm) execute_swi_function_builder(div, thumb) +execute_swi_function_builder(divarm, arm) +execute_swi_function_builder(divarm, thumb) @ Start program execution. Normally the mode should be Thumb and the diff --git a/cpu.h b/cpu.h index 9ab4cd9..1e58099 100644 --- a/cpu.h +++ b/cpu.h @@ -117,7 +117,7 @@ u32 function_cc execute_load_s16(u32 address); void function_cc execute_store_u8(u32 address, u32 source); void function_cc execute_store_u16(u32 address, u32 source); void function_cc execute_store_u32(u32 address, u32 source); -u32 function_cc execute_arm_translate(u32 cycles); +u32 execute_arm_translate(u32 cycles); void init_translater(void); unsigned cpu_write_savestate(u8* dst); bool cpu_read_savestate(const u8 *src); diff --git a/cpu_threaded.c b/cpu_threaded.c index 4724a5a..a9520a8 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -78,6 +78,9 @@ typedef struct extern u8 bit_count[256]; +// Div (6) and DivArm (7) +#define is_div_swi(swinum) (((swinum) & 0xFE) == 0x06) + #define arm_decode_data_proc_reg(opcode) \ u32 rn = (opcode >> 16) & 0x0F; \ u32 rd = (opcode >> 12) & 0x0F; \ @@ -192,9 +195,6 @@ extern u8 bit_count[256]; #define thumb_decode_branch_cond() \ s32 offset = (s8)(opcode & 0xFF) \ -#define thumb_decode_swi() \ - u32 comment = opcode & 0xFF \ - #define thumb_decode_branch() \ u32 offset = opcode & 0x07FF \ @@ -1708,8 +1708,16 @@ void translate_icache_sync() { \ case 0xF0 ... 0xFF: \ { \ - /* SWI comment */ \ - arm_swi(); \ + u32 swinum = (opcode >> 16) & 0xFF; \ + if (swinum == 6) { \ + arm_hle_div(arm); \ + } \ + else if (swinum == 7) { \ + arm_hle_div_arm(arm); \ + } \ + else { \ + arm_swi(); \ + } \ break; \ } \ } \ @@ -2270,8 +2278,16 @@ void translate_icache_sync() { \ case 0xDF: \ { \ - /* SWI comment */ \ - thumb_swi(); \ + u32 swinum = opcode & 0xFF; \ + if (swinum == 6) { \ + arm_hle_div(thumb); \ + } \ + else if (swinum == 7) { \ + arm_hle_div_arm(thumb); \ + } \ + else { \ + thumb_swi(); \ + } \ break; \ } \ \ @@ -2688,7 +2704,7 @@ block_lookup_address_builder(dual); ((opcode & 0x12FFF10) == 0x12FFF10) || \ ((opcode & 0x8108000) == 0x8108000) || \ ((opcode >= 0xA000000) && (opcode < 0xF000000)) || \ - ((opcode > 0xF000000) && (!swi_hle_handle[((opcode >> 16) & 0xFF)]))) \ + ((opcode > 0xF000000) && (!is_div_swi((opcode >> 16) & 0xFF)))) \ #define arm_opcode_branch \ ((opcode & 0xE000000) == 0xA000000) \ @@ -2783,7 +2799,7 @@ block_lookup_address_builder(dual); #define thumb_exit_point \ (((opcode >= 0xD000) && (opcode < 0xDF00)) || \ (((opcode & 0xFF00) == 0xDF00) && \ - (!swi_hle_handle[opcode & 0xFF])) || \ + (!is_div_swi(opcode & 0xFF))) || \ ((opcode >= 0xE000) && (opcode < 0xE800)) || \ ((opcode & 0xFF00) == 0x4700) || \ ((opcode & 0xFF00) == 0xBD00) || \ diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 7282610..7dda195 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2304,7 +2304,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) generate_indirect_branch_dual() \ #define arm_swi() \ - generate_swi_hle_handler((opcode >> 16) & 0xFF); \ generate_load_pc(reg_a0, (pc + 4)); \ generate_function_call_swap_delay(execute_swi); \ generate_branch() \ @@ -2369,7 +2368,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) #endif #define thumb_swi() \ - generate_swi_hle_handler(opcode & 0xFF); \ generate_load_pc(reg_a0, (pc + 2)); \ generate_function_call_swap_delay(execute_swi); \ generate_branch_cycle_update( \ @@ -2377,71 +2375,22 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) block_exits[block_exit_position].branch_target); \ block_exit_position++ \ -u8 swi_hle_handle[256] = -{ - 0x0, // SWI 0: SoftReset - 0x0, // SWI 1: RegisterRAMReset - 0x0, // SWI 2: Halt - 0x0, // SWI 3: Stop/Sleep - 0x0, // SWI 4: IntrWait - 0x0, // SWI 5: VBlankIntrWait - 0x1, // SWI 6: Div - 0x0, // SWI 7: DivArm - 0x0, // SWI 8: Sqrt - 0x0, // SWI 9: ArcTan - 0x0, // SWI A: ArcTan2 - 0x0, // SWI B: CpuSet - 0x0, // SWI C: CpuFastSet - 0x0, // SWI D: GetBIOSCheckSum - 0x0, // SWI E: BgAffineSet - 0x0, // SWI F: ObjAffineSet - 0x0, // SWI 10: BitUnpack - 0x0, // SWI 11: LZ77UnCompWram - 0x0, // SWI 12: LZ77UnCompVram - 0x0, // SWI 13: HuffUnComp - 0x0, // SWI 14: RLUnCompWram - 0x0, // SWI 15: RLUnCompVram - 0x0, // SWI 16: Diff8bitUnFilterWram - 0x0, // SWI 17: Diff8bitUnFilterVram - 0x0, // SWI 18: Diff16bitUnFilter - 0x0, // SWI 19: SoundBias - 0x0, // SWI 1A: SoundDriverInit - 0x0, // SWI 1B: SoundDriverMode - 0x0, // SWI 1C: SoundDriverMain - 0x0, // SWI 1D: SoundDriverVSync - 0x0, // SWI 1E: SoundChannelClear - 0x0, // SWI 1F: MidiKey2Freq - 0x0, // SWI 20: SoundWhatever0 - 0x0, // SWI 21: SoundWhatever1 - 0x0, // SWI 22: SoundWhatever2 - 0x0, // SWI 23: SoundWhatever3 - 0x0, // SWI 24: SoundWhatever4 - 0x0, // SWI 25: MultiBoot - 0x0, // SWI 26: HardReset - 0x0, // SWI 27: CustomHalt - 0x0, // SWI 28: SoundDriverVSyncOff - 0x0, // SWI 29: SoundDriverVSyncOn - 0x0 // SWI 2A: SoundGetJumpList -}; +#define arm_hle_div(cpu_mode) \ + mips_emit_div(reg_r0, reg_r1); \ + mips_emit_mflo(reg_r0); \ + mips_emit_mfhi(reg_r1); \ + mips_emit_sra(reg_a0, reg_r0, 31); \ + mips_emit_xor(reg_r3, reg_r0, reg_a0); \ + mips_emit_subu(reg_r3, reg_r3, reg_a0); \ + +#define arm_hle_div_arm(cpu_mode) \ + mips_emit_div(reg_r1, reg_r0); \ + mips_emit_mflo(reg_r0); \ + mips_emit_mfhi(reg_r1); \ + mips_emit_sra(reg_a0, reg_r0, 31); \ + mips_emit_xor(reg_r3, reg_r0, reg_a0); \ + mips_emit_subu(reg_r3, reg_r3, reg_a0); \ -#define generate_swi_hle_handler(_swi_number) \ -{ \ - u32 swi_number = _swi_number; \ - if(swi_hle_handle[swi_number]) \ - { \ - /* Div */ \ - if(swi_number == 0x06) \ - { \ - mips_emit_div(reg_r0, reg_r1); \ - mips_emit_mflo(reg_r0); \ - mips_emit_mfhi(reg_r1); \ - mips_emit_sra(reg_a0, reg_r0, 31); \ - mips_emit_xor(reg_r3, reg_r0, reg_a0); \ - mips_emit_subu(reg_r3, reg_r3, reg_a0); \ - } \ - break; \ - } \ -} \ #define generate_translation_gate(type) \ generate_load_pc(reg_a0, pc); \ @@ -3343,7 +3292,7 @@ void init_emitter() { } u32 execute_arm_translate_internal(u32 cycles, void *regptr); -u32 function_cc execute_arm_translate(u32 cycles) { +u32 execute_arm_translate(u32 cycles) { return execute_arm_translate_internal(cycles, ®[0]); } diff --git a/x86/x86_emit.h b/x86/x86_emit.h index 58bde29..c2726ef 100644 --- a/x86/x86_emit.h +++ b/x86/x86_emit.h @@ -99,10 +99,6 @@ typedef enum x86_opcode_shr_reg_rm = 0x05D3, x86_opcode_sar_reg_rm = 0x07D3, x86_opcode_rcr_reg1 = 0x03D1, - x86_opcode_push_reg = 0x50, - x86_opcode_push_rm = 0xFF, - x86_opcode_push_imm = 0x0668, - x86_opcode_pop_reg = 0x58, x86_opcode_call_offset = 0xE8, x86_opcode_ret = 0xC3, x86_opcode_test_rm_imm = 0x00F7, @@ -126,6 +122,14 @@ typedef enum x86_opcode_cmp_rm_imm = 0x0781, x86_opcode_lea_reg_rm = 0x8D, x86_opcode_j = 0x80, + x86_opcode_cdq = 0x99, + x86_opcode_jmp = 0xE9, + x86_opcode_jmp_reg = 0x04FF, + x86_opcode_ext = 0x0F +} x86_opcodes; + +typedef enum +{ x86_opcode_seto = 0x90, x86_opcode_setc = 0x92, x86_opcode_setnc = 0x93, @@ -133,10 +137,7 @@ typedef enum x86_opcode_setnz = 0x95, x86_opcode_sets = 0x98, x86_opcode_setns = 0x99, - x86_opcode_jmp = 0xE9, - x86_opcode_jmp_reg = 0x04FF, - x86_opcode_ext = 0x0F -} x86_opcodes; +} x86_ext_opcodes; typedef enum { @@ -314,18 +315,8 @@ typedef enum #define x86_emit_not_reg(srcdst) \ x86_emit_opcode_1b_ext_reg(not_rm, srcdst) \ -#define x86_emit_pop_reg(regn) \ - x86_emit_opcode_1b(pop_reg, regn) \ - -#define x86_emit_push_reg(regn) \ - x86_emit_opcode_1b(push_reg, regn) \ - -#define x86_emit_push_mem(base, offset) \ - x86_emit_opcode_1b_mem(push_rm, 0x06, base, offset) \ - -#define x86_emit_push_imm(imm) \ - x86_emit_byte(x86_opcode_push_imm); \ - x86_emit_dword(imm) \ +#define x86_emit_cdq() \ + x86_emit_byte(x86_opcode_cdq) \ #define x86_emit_call_offset(relative_offset) \ x86_emit_byte(x86_opcode_call_offset); \ @@ -2196,7 +2187,6 @@ static void function_cc execute_swi(u32 pc) generate_indirect_branch_dual(); \ #define arm_swi() \ - generate_swi_hle_handler((opcode >> 16) & 0xFF); \ generate_update_pc((pc + 4)); \ generate_function_call(execute_swi); \ generate_branch() \ @@ -2240,7 +2230,6 @@ static void function_cc execute_swi(u32 pc) generate_function_call(process_cheats); #define thumb_swi() \ - generate_swi_hle_handler(opcode & 0xFF); \ generate_update_pc((pc + 2)); \ generate_function_call(execute_swi); \ generate_branch_cycle_update( \ @@ -2248,74 +2237,33 @@ static void function_cc execute_swi(u32 pc) block_exits[block_exit_position].branch_target); \ block_exit_position++ \ -u8 swi_hle_handle[256] = -{ - 0x0, // SWI 0: SoftReset - 0x0, // SWI 1: RegisterRAMReset - 0x0, // SWI 2: Halt - 0x0, // SWI 3: Stop/Sleep - 0x0, // SWI 4: IntrWait - 0x0, // SWI 5: VBlankIntrWait - 0x1, // SWI 6: Div - 0x0, // SWI 7: DivArm - 0x0, // SWI 8: Sqrt - 0x0, // SWI 9: ArcTan - 0x0, // SWI A: ArcTan2 - 0x0, // SWI B: CpuSet - 0x0, // SWI C: CpuFastSet - 0x0, // SWI D: GetBIOSCheckSum - 0x0, // SWI E: BgAffineSet - 0x0, // SWI F: ObjAffineSet - 0x0, // SWI 10: BitUnpack - 0x0, // SWI 11: LZ77UnCompWram - 0x0, // SWI 12: LZ77UnCompVram - 0x0, // SWI 13: HuffUnComp - 0x0, // SWI 14: RLUnCompWram - 0x0, // SWI 15: RLUnCompVram - 0x0, // SWI 16: Diff8bitUnFilterWram - 0x0, // SWI 17: Diff8bitUnFilterVram - 0x0, // SWI 18: Diff16bitUnFilter - 0x0, // SWI 19: SoundBias - 0x0, // SWI 1A: SoundDriverInit - 0x0, // SWI 1B: SoundDriverMode - 0x0, // SWI 1C: SoundDriverMain - 0x0, // SWI 1D: SoundDriverVSync - 0x0, // SWI 1E: SoundChannelClear - 0x0, // SWI 1F: MidiKey2Freq - 0x0, // SWI 20: SoundWhatever0 - 0x0, // SWI 21: SoundWhatever1 - 0x0, // SWI 22: SoundWhatever2 - 0x0, // SWI 23: SoundWhatever3 - 0x0, // SWI 24: SoundWhatever4 - 0x0, // SWI 25: MultiBoot - 0x0, // SWI 26: HardReset - 0x0, // SWI 27: CustomHalt - 0x0, // SWI 28: SoundDriverVSyncOff - 0x0, // SWI 29: SoundDriverVSyncOn - 0x0 // SWI 2A: SoundGetJumpList -}; +#define arm_hle_div(cpu_mode) \ + generate_load_reg(a0, 0); \ + generate_load_reg(a2, 1); \ + x86_emit_cdq(); \ + x86_emit_idiv_eax_reg(ecx); \ + generate_store_reg(a0, 0); \ + generate_store_reg(a1, 1); \ + generate_mov(a1, a0); \ + generate_shift_right(a1, 31); \ + generate_xor(a1, a0); \ + generate_shift_right_arithmetic(a0, 31); \ + generate_add(a0, a1); \ + generate_store_reg(a0, 3); \ -void function_cc swi_hle_div(void) -{ - s32 result = (s32)reg[0] / (s32)reg[1]; - reg[1] = (s32)reg[0] % (s32)reg[1]; - reg[0] = result; - reg[3] = (result ^ (result >> 31)) - (result >> 31); -} - -#define generate_swi_hle_handler(_swi_number) \ -{ \ - u32 swi_number = _swi_number; \ - if(swi_hle_handle[swi_number]) \ - { \ - /* Div */ \ - if(swi_number == 0x06) \ - { \ - generate_function_call(swi_hle_div); \ - } \ - break; \ - } \ -} \ +#define arm_hle_div_arm(cpu_mode) \ + generate_load_reg(a0, 1); \ + generate_load_reg(a2, 0); \ + x86_emit_cdq(); \ + x86_emit_idiv_eax_reg(ecx); \ + generate_store_reg(a0, 0); \ + generate_store_reg(a1, 1); \ + generate_mov(a1, a0); \ + generate_shift_right(a1, 31); \ + generate_xor(a1, a0); \ + generate_shift_right_arithmetic(a0, 31); \ + generate_add(a0, a1); \ + generate_store_reg(a0, 3); \ #define generate_translation_gate(type) \ generate_update_pc(pc); \ @@ -2330,7 +2278,7 @@ void init_emitter(void) { u32 function_cc execute_arm_translate_internal(u32 cycles, void *regptr); -u32 function_cc execute_arm_translate(u32 cycles) { +u32 execute_arm_translate(u32 cycles) { return execute_arm_translate_internal(cycles, ®[0]); }