From 11d87b89df744f3b850dc63dda971e90ac39c152 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 14 Apr 2023 01:41:55 +0200 Subject: [PATCH] Rewrite I/O side effects write and IRQ triggers This rewrites the way that CPU alerts work, making them a bitmap (since multiple alerts can happen simultaneously, like SMC and IRQ). This doesn't really fix many games but improves accuracy overall and improves performance on some I/O writes (the ones without side effects). The IRQ raising is now decoupled and explicitely called via a new function (check_and_raise_interrupts) to avoid issues such as invalid CPSR values (doesn't seem to bother most games!). There's more side effects missing, so this just lays the ground for more fixes. --- arm/arm64_stub.S | 69 ++++++++++++++-------------- arm/arm_stub.S | 116 +++++++++++++++++++++++------------------------ cpu.c | 43 ++++++++++-------- cpu.h | 18 ++++---- gba_memory.c | 33 +++++--------- input.c | 10 +++- main.c | 8 +++- mips/mips_emit.h | 16 +++++-- mips/mips_stub.S | 80 ++++++++++++++++---------------- x86/x86_stub.S | 36 +++++++++++---- 10 files changed, 224 insertions(+), 205 deletions(-) diff --git a/arm/arm64_stub.S b/arm/arm64_stub.S index 5e5a2c1..8e4d551 100644 --- a/arm/arm64_stub.S +++ b/arm/arm64_stub.S @@ -66,6 +66,10 @@ _##symbol: #define REG_SAVE4 (30 * 4) #define REG_SAVE5 (31 * 4) +#define CPU_ALERT_HALT_B 0 +#define CPU_ALERT_SMC_B 1 +#define CPU_ALERT_IRQ_B 2 + #define reg_base x20 #define reg_cycles w21 @@ -73,6 +77,7 @@ _##symbol: #define reg_v_flag w23 #define reg_z_flag w24 #define reg_n_flag w25 +#define reg_save0 w19 // Memory offsets from reg_base to the different buffers @@ -355,24 +360,24 @@ defsymbl(execute_arm_translate_internal) // Check whether the CPU is sleeping already, we should just wait for IRQs ldr w1, [reg_base, #CPU_HALT_STATE] - cmp w1, #0 - bne alert_loop + cbnz w1, alert_loop - ldr w0, [reg_base, #REG_PC] // r0 = current pc - ldr w1, [reg_base, #REG_CPSR] // r1 = flags - tst w1, #0x20 // see if Thumb bit is set - extract_flags(w2) // load flags + ldr w0, [reg_base, #REG_PC] // load current PC - bne 1f // if so lookup thumb +// Resume execution at PC (at w0) +lookup_pc: + ldr w1, [reg_base, #REG_CPSR] // w1 = flags + extract_flags_reg(w1) + tbnz w1, #5, 2f // see if Thumb bit is set + // Lookup and jump to the right mode block bl block_lookup_address_arm load_registers() - br x0 // jump to first ARM block -1: + br x0 +2: bl block_lookup_address_thumb load_registers() - br x0 // jump to first Thumb block - + br x0 // Epilogue to return to the main thread (whatever called execute_arm_translate) @@ -588,11 +593,13 @@ ext_store_ioreg_u##store_type: ;\ load_registers() ;\ ret /* resume if no side effects */;\ ;\ -3: ;\ +3: /* SMC write (iwram/ewram) */ ;\ str w2, [reg_base, #REG_PC] /* write out PC */;\ store_registers() /* store registers */;\ consolidate_flags(w1) ;\ - b smc_write /* perform smc write */;\ + bl flush_translation_cache_ram ;\ + ldr w0, [reg_base, #REG_PC] /* load "current new" PC */;\ + b lookup_pc /* continue execution */;\ .size execute_store_u##store_type, .-execute_store_u##store_type // for ignored areas, just return @@ -680,7 +687,6 @@ ext_store_ioreg_u32_safe: and w0, w0, #(0x3fc) store_registers() bl write_io_register32 - cbnz w0, write_epilogue ldr lr, [reg_base, #REG_SAVE] load_registers() ret @@ -688,10 +694,20 @@ ext_store_ioreg_u32_safe: // This is called whenever an external store with side effects was performed write_epilogue: - consolidate_flags(w1) // update the CPSR before update + mov reg_save0, w0 // Save reg for later + consolidate_flags(w1) // Update CPSR for IRQ/ + tbz w0, #CPU_ALERT_SMC_B, 1f // Skip if SMC did not happen + bl flush_translation_cache_ram // Flush RAM if bit is set - cmp w0, #2 // see if the alert is due to SMC - beq smc_write // if so, goto SMC handler +1: + tbz reg_save0, #CPU_ALERT_IRQ_B, 2f // Skip if IRQ did not happen + bl check_and_raise_interrupts + +2: + ldr w0, [reg_base, #REG_PC] // load new PC + tbz reg_save0, #CPU_ALERT_HALT_B, lookup_pc // Resume execution if running + + // explicit fallthrough to alert_loop, while CPU is halted alert_loop: mov w0, reg_cycles // load remaining cycles @@ -708,25 +724,6 @@ alert_loop: b lookup_pc // Resume execution at that PC -smc_write: - bl flush_translation_cache_ram - ldr w0, [reg_base, #REG_PC] // load "current new" PC - -// Resume execution at PC (at w0) -lookup_pc: - ldr w1, [reg_base, #REG_CPSR] // w1 = flags - extract_flags_reg(w1) - tbnz w1, #5, 2f // see if Thumb bit is set - - // Lookup and jump to the right mode block - bl block_lookup_address_arm - load_registers() - br x0 -2: - bl block_lookup_address_thumb - load_registers() - br x0 - .data .align 4 defsymbl(ldst_handler_functions) diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 3d536b4..5df3c9e 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -48,6 +48,10 @@ _##symbol: #define REG_SAVE4 (30 * 4) #define REG_SAVE5 (31 * 4) +#define CPU_ALERT_HALT (1 << 0) +#define CPU_ALERT_SMC (1 << 1) +#define CPU_ALERT_IRQ (1 << 2) + #define reg_a0 r0 #define reg_a1 r1 #define reg_a2 r2 @@ -119,6 +123,16 @@ _##symbol: #define store_registers_thumb() ;\ stm reg_base, {reg_x0, reg_x1, reg_x2, reg_x3, reg_x4, reg_x5} +#define store_registers_cond() ;\ + stmne reg_base, {reg_x0, reg_x1, reg_x2, reg_x3, reg_x4, reg_x5} ;\ + bne 80f ;\ + str reg_x0, [reg_base, #REG_R0] ;\ + str reg_x1, [reg_base, #REG_R1] ;\ + str reg_x2, [reg_base, #REG_R6] ;\ + str reg_x3, [reg_base, #REG_R9] ;\ + str reg_x4, [reg_base, #REG_R12] ;\ + str reg_x5, [reg_base, #REG_R14] ;\ + 80: @ Returns an updated persistent cpsr with the cached flags register. @ Uses reg as a temporary register and returns the CPSR here. @@ -430,23 +444,7 @@ defsymbl(execute_arm_translate_internal) ldr r1, [reg_base, #CPU_HALT_STATE] cmp r1, #0 bne alert_loop - - ldr r0, [reg_base, #REG_PC] @ r0 = current pc - ldr r1, [reg_base, #REG_CPSR] @ r1 = flags - tst r1, #0x20 @ see if Thumb bit is set - - bne 1f @ if so lookup thumb - - load_registers_arm() @ load ARM registers - call_c_function(block_lookup_address_arm) - extract_flags() @ load flags - bx r0 @ jump to first ARM block - -1: - load_registers_thumb() @ load Thumb registers - call_c_function(block_lookup_address_thumb) - extract_flags() @ load flags - bx r0 @ jump to first Thumb block + b lookup_pc @ Epilogue to return to the main thread (whatever called execute_arm_translate) @@ -505,17 +503,13 @@ ext_io_store_u##store_type: ;\ ldr r2, [lr] /* load PC */;\ str r2, [reg_base, #REG_PC] /* write out PC */;\ ;\ - ldr r2, [reg_base, #REG_CPSR] /* load CPSR */;\ - tst r2, #0x20 /* check Thumb bit is set */;\ - bne 1f /* Store arm/thumb regs */;\ - store_registers_arm() ;\ - b 2f ;\ -1: ;\ - store_registers_thumb() ;\ -2: ;\ mask_addr_##store_type(10) /* Mask to IO memory (+align) */;\ call_c_function(write_io_register##store_type) ;\ - b write_epilogue /* handle additional write stuff */;\ + ;\ + cmp r0, #0 ;\ + bne write_epilogue /* handle additional write stuff */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ ;\ ext_store_iwram_u##store_type: ;\ save_flags() ;\ @@ -558,10 +552,11 @@ ext_store_oam_ram_u##store_type: ;\ str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\ add pc, lr, #4 /* return */;\ ;\ -3: ;\ +3: /* Flush RAM cache and "resume" execution via re-compile */ ;\ ldr r0, [lr] /* load PC */;\ str r0, [reg_base, #REG_PC] /* write out PC */;\ - b smc_write /* perform smc write */;\ + call_c_function(flush_translation_cache_ram) ;\ + b resume_pc /* continue execution */;\ @ for ignored areas, just return ext_store_ignore: @@ -678,16 +673,27 @@ ext_store_oam_ram_u32_safe: write_epilogue: - cmp r0, #0 @ if nothing happened, we can resume - beq no_alert - - cmp r0, #3 @ check if the write rose an alert - beq irq_alert @ triggered an IRQ, go execute it - - cmp r0, #2 @ see if the alert is due to SMC (via DMA) - beq smc_write @ if so, goto SMC handler - + ldr r2, [reg_base, #REG_CPSR] @ Save all register and CPSR + tst r2, #0x20 @ Check thumb bit + store_registers_cond() @ Store ARM/Thumb regs collapse_flags(r1) @ interrupt needs current flags + + mov r2, r0 @ r2 is stored across C calls + tst r2, #CPU_ALERT_SMC @ check for SMC code + beq 1f + call_c_function(flush_translation_cache_ram) @ Flush RAM if bit is set + +1: + tst r2, #CPU_ALERT_IRQ @ check for IRQs + beq 2f + call_c_function(check_and_raise_interrupts) @ Update CPU state to raise IRQ + +2: + tst r2, #CPU_ALERT_HALT @ check for CPU halt bit + beq lookup_pc @ Resume execution if not halted + + @ Fallthrough to alert_loop on purpose (CPU is now halted) + mvn r0, reg_cycles @ setup for update_gba alert_loop: @@ -707,55 +713,49 @@ alert_loop: tst r1, #0x20 @ see if Thumb bit is set bne 2f - load_registers_arm() call_c_function(block_lookup_address_arm) - restore_flags() + load_registers_arm() + extract_flags() bx r0 @ jump to new ARM block 2: + call_c_function(block_lookup_address_thumb) load_registers_thumb() - call_c_function(block_lookup_address_thumb) - restore_flags() + extract_flags() bx r0 @ jump to new Thumb block -no_alert: - restore_flags() - add pc, lr, #4 @ return, skip inlined PC - - -smc_write: - call_c_function(flush_translation_cache_ram) - +resume_pc: @ Resume regular execution (except we might need to recompile due to flush) + @ assume flags are spilled to reg_flags ldr r0, [reg_base, #REG_PC] @ r0 = new pc - ldr r1, [reg_base, #REG_CPSR] @ r1 = flags + ldr r1, [reg_base, #REG_CPSR] @ r1 = partial flags valid tst r1, #0x20 @ see if Thumb bit is set - beq 3f @ if not lookup ARM + beq 1f @ if not lookup ARM call_c_function(block_lookup_address_thumb) restore_flags() bx r0 @ jump to new Thumb block -3: +1: call_c_function(block_lookup_address_arm) restore_flags() bx r0 @ jump to new ARM block -irq_alert: - @ Resume regular execution, usually ARM mode, need to reload registers +lookup_pc: + @ Restart CPU execution, assumes CPU mode might have changed ldr r0, [reg_base, #REG_PC] @ r0 = new pc ldr r1, [reg_base, #REG_CPSR] @ r1 = flags tst r1, #0x20 @ see if Thumb bit is set - beq 4f @ if not lookup ARM + beq 2f @ if not lookup ARM call_c_function(block_lookup_address_thumb) - restore_flags() load_registers_thumb() + extract_flags() bx r0 @ jump to new Thumb block -4: +2: call_c_function(block_lookup_address_arm) - restore_flags() load_registers_arm() + extract_flags() bx r0 @ jump to new ARM block diff --git a/cpu.c b/cpu.c index a051d60..b662be6 100644 --- a/cpu.c +++ b/cpu.c @@ -1573,11 +1573,21 @@ void set_cpu_mode(cpu_mode_type new_mode) reg[CPU_MODE] = new_mode; } -cpu_alert_type check_interrupts() +#define cpu_has_interrupt() \ + (!(reg[REG_CPSR] & 0x80) && read_ioreg(REG_IME) && \ + (read_ioreg(REG_IE) & io_registers[REG_IF])) + +// Returns whether the CPU has a pending interrupt. +cpu_alert_type check_interrupt() { + return (cpu_has_interrupt()) ? CPU_ALERT_IRQ : CPU_ALERT_NONE; +} + +// Checks for pending IRQs and raises them. This changes the CPU mode +// which means that it must be called with a valid CPU state. +void check_and_raise_interrupts() { // Check any IRQ flag pending, IME and CPSR-IRQ enabled - u16 umirq = read_ioreg(REG_IE) & io_registers[REG_IF]; - if(!(reg[REG_CPSR] & 0x80) && read_ioreg(REG_IME) && umirq) + if (cpu_has_interrupt()) { // Value after the FIQ returns, should be improved reg[REG_BUS_VALUE] = 0xe55ec002; @@ -1591,18 +1601,19 @@ cpu_alert_type check_interrupts() set_cpu_mode(MODE_IRQ); reg[CPU_HALT_STATE] = CPU_ACTIVE; reg[CHANGED_PC_STATUS] = 1; - return CPU_ALERT_IRQ; } - return CPU_ALERT_NONE; } -void raise_interrupt(irq_type irq_raised) +// This function marks a pending interrupt but does not raise it. +// It simply updates IF register and returns whether the IRQ needs +// to be raised (that is, IE/IME/CPSR enable the IRQ). +// Safe to call via dynarec without proper registers saved. +cpu_alert_type flag_interrupt(irq_type irq_raised) { - // The specific IRQ must be enabled in IE, master IRQ enable must be on, - // and it must be on in the flags. + // Flag interrupt write_ioreg(REG_IF, read_ioreg(REG_IF) | irq_raised); - check_interrupts(); + return check_interrupt(); } #ifndef HAVE_DYNAREC @@ -3233,7 +3244,7 @@ skip_instruction: if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0; - if(cpu_alert) + if (cpu_alert & (CPU_ALERT_HALT | CPU_ALERT_IRQ)) goto alert; } while(cycles_remaining > 0); @@ -3746,7 +3757,7 @@ thumb_loop: if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0; - if(cpu_alert) + if (cpu_alert & (CPU_ALERT_HALT | CPU_ALERT_IRQ)) goto alert; } while(cycles_remaining > 0); @@ -3758,16 +3769,8 @@ thumb_loop: continue; alert: - - if(cpu_alert != CPU_ALERT_IRQ) { + /* CPU stopped or switch to IRQ handler */ collapse_flags(); - - while(reg[CPU_HALT_STATE] != CPU_ACTIVE) { - cycles_remaining = update_gba(cycles_remaining); - if (reg[COMPLETED_FRAME]) - return; - } - } } } diff --git a/cpu.h b/cpu.h index db2f2d2..5cc61ff 100644 --- a/cpu.h +++ b/cpu.h @@ -46,13 +46,12 @@ typedef u32 cpu_mode_type; #define CPU_HALT 1 #define CPU_STOP 2 -typedef enum -{ - CPU_ALERT_NONE = 0, - CPU_ALERT_HALT = 1, - CPU_ALERT_SMC = 2, - CPU_ALERT_IRQ = 3 -} cpu_alert_type; +typedef u8 cpu_alert_type; + +#define CPU_ALERT_NONE 0 +#define CPU_ALERT_HALT (1 << 0) +#define CPU_ALERT_SMC (1 << 1) +#define CPU_ALERT_IRQ (1 << 2) typedef u16 irq_type; @@ -110,8 +109,9 @@ typedef enum extern u32 instruction_count; void execute_arm(u32 cycles); -cpu_alert_type check_interrupts(void); -void raise_interrupt(irq_type irq_raised); +void check_and_raise_interrupts(void); +cpu_alert_type check_interrupt(void); +cpu_alert_type flag_interrupt(irq_type irq_raised); void set_cpu_mode(cpu_mode_type new_mode); u32 function_cc execute_load_u8(u32 address); diff --git a/gba_memory.c b/gba_memory.c index 99ca684..9cdf1b5 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -1342,7 +1342,7 @@ cpu_alert_type function_cc write_io_register16(u32 address, u32 value) // REG_IE case 0x200: write_ioreg(REG_IE, value); - return check_interrupts(); + return check_interrupt(); // Interrupt flag case 0x202: @@ -1357,7 +1357,7 @@ cpu_alert_type function_cc write_io_register16(u32 address, u32 value) // REG_IME case 0x208: write_ioreg(REG_IME, value); - return check_interrupts(); + return check_interrupt(); // Halt case 0x300: @@ -1417,16 +1417,9 @@ cpu_alert_type function_cc write_io_register32(u32 address, u32 value) default: { - cpu_alert_type alert_low = - write_io_register16(address, value & 0xFFFF); - - cpu_alert_type alert_high = - write_io_register16(address + 2, value >> 16); - - if(alert_high) - return alert_high; - - return alert_low; + cpu_alert_type allow = write_io_register16(address, value & 0xFFFF); + cpu_alert_type alhigh = write_io_register16(address + 2, value >> 16); + return allow | alhigh; } } @@ -2739,8 +2732,8 @@ cpu_alert_type dma_transfer(unsigned dma_chan, int *usedcycles) dst_ptr & 0xFFFFFF : 0x1000000 - (dst_ptr & 0xFFFFFF); u32 src1 = src_ptr + blen0 * dma_stride[dmach->source_direction]; u32 dst1 = dst_ptr + blen0 * dma_stride[dmach->dest_direction]; - ret = dma_transfer_copy(dmach, src_ptr, dst_ptr, blen0 >> tfsizes); - ret = dma_transfer_copy(dmach, src1, dst1, (byte_length - blen0) >> tfsizes); + ret = dma_transfer_copy(dmach, src_ptr, dst_ptr, blen0 >> tfsizes); + ret |= dma_transfer_copy(dmach, src1, dst1, (byte_length - blen0) >> tfsizes); } else if (dst_reg0 == dst_reg1) { // Dest stays within the region, source crosses over @@ -2748,8 +2741,8 @@ cpu_alert_type dma_transfer(unsigned dma_chan, int *usedcycles) src_ptr & 0xFFFFFF : 0x1000000 - (src_ptr & 0xFFFFFF); u32 src1 = src_ptr + blen0 * dma_stride[dmach->source_direction]; u32 dst1 = dst_ptr + blen0 * dma_stride[dmach->dest_direction]; - ret = dma_transfer_copy(dmach, src_ptr, dst_ptr, blen0 >> tfsizes); - ret = dma_transfer_copy(dmach, src1, dst1, (byte_length - blen0) >> tfsizes); + ret = dma_transfer_copy(dmach, src_ptr, dst_ptr, blen0 >> tfsizes); + ret |= dma_transfer_copy(dmach, src1, dst1, (byte_length - blen0) >> tfsizes); } // TODO: We do not cover the three-region case, seems no game uses that? // Lucky Luke does cross dest region due to some off-by-one error. @@ -2762,11 +2755,9 @@ cpu_alert_type dma_transfer(unsigned dma_chan, int *usedcycles) dmach->start_type = DMA_INACTIVE; } - if(dmach->irq) - { - raise_interrupt(IRQ_DMA0 << dma_chan); - ret = CPU_ALERT_IRQ; - } + // Trigger an IRQ if configured to do so. + if (dmach->irq) + ret |= flag_interrupt(IRQ_DMA0 << dma_chan); // This is an approximation for the most common case (no region cross) if (usedcycles) diff --git a/input.c b/input.c index f74b161..4a81a78 100644 --- a/input.c +++ b/input.c @@ -47,12 +47,18 @@ static void trigger_key(u32 key) if(p1_cnt >> 15) { if(key_intersection == (p1_cnt & 0x3FF)) - raise_interrupt(IRQ_KEYPAD); + { + flag_interrupt(IRQ_KEYPAD); + check_and_raise_interrupts(); + } } else { if(key_intersection) - raise_interrupt(IRQ_KEYPAD); + { + flag_interrupt(IRQ_KEYPAD); + check_and_raise_interrupts(); + } } } } diff --git a/main.c b/main.c index 5b84953..b97eecf 100644 --- a/main.c +++ b/main.c @@ -240,8 +240,12 @@ u32 function_cc update_gba(int remaining_cycles) write_ioreg(REG_DISPSTAT, dispstat); } - if(irq_raised) - raise_interrupt(irq_raised); + // Flag any V/H blank interrupts. + if (irq_raised) + flag_interrupt(irq_raised); + + // Raise any pending interrupts. This changes the CPU mode. + check_and_raise_interrupts(); execute_cycles = MAX(video_count, 0); diff --git a/mips/mips_emit.h b/mips/mips_emit.h index 4c7da99..3d09cdb 100644 --- a/mips/mips_emit.h +++ b/mips/mips_emit.h @@ -2079,7 +2079,8 @@ static void emit_mem_access_loadop( #endif #define SMC_WRITE_OFF (10*16*4) /* 10 handlers (16 insts) */ -#define EWRAM_SPM_OFF (SMC_WRITE_OFF + 4*2) /* Just a jmp + nop */ +#define IOEPILOGUE_OFF (SMC_WRITE_OFF + 4*2) /* Trampolines are two insts */ +#define EWRAM_SPM_OFF (IOEPILOGUE_OFF + 4*2) // Describes a "plain" memory are, that is, an area that is just accessed // as normal memory (with some caveats tho). @@ -2557,14 +2558,16 @@ static void emit_saveaccess_stub(u8 **tr_ptr) { if (strop < 3) { mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Save PC (delay) - mips_emit_j(((u32)&write_io_epilogue) >> 2); - mips_emit_nop(); + // If I/O writes returns non-zero, means we need to process side-effects. + mips_emit_b(bne, reg_zero, reg_rv, branch_offset(&rom_translation_cache[IOEPILOGUE_OFF])); + mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR3); // (in delay slot but not used) + emit_restore_regs(false); } else { mips_emit_nop(); mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR3); emit_restore_regs(true); - generate_function_return_swap_delay(); } + generate_function_return_swap_delay(); } *tr_ptr = translation_ptr; @@ -2711,10 +2714,13 @@ void init_emitter(bool must_swap) { emit_phand(&translation_ptr, 2, 13 * 16, false); // st u32 emit_phand(&translation_ptr, 2, 14 * 16, false); // st aligned 32 - // This is just a trampoline (for the SMC branches) + // Trampoline area mips_emit_j(((u32)&smc_write) >> 2); mips_emit_nop(); + mips_emit_j(((u32)&write_io_epilogue) >> 2); + mips_emit_nop(); + // Special trampoline for SP-relative ldm/stm (to EWRAM) generate_load_imm(reg_a1, 0x3FFFC); mips_emit_and(reg_a1, reg_a1, reg_a2); diff --git a/mips/mips_stub.S b/mips/mips_stub.S index 8e41b3d..e55a84b 100644 --- a/mips/mips_stub.S +++ b/mips/mips_stub.S @@ -113,6 +113,10 @@ symbol: .equ GP_SAVE, (30 * 4) .equ GP_SAVE_HI, (31 * 4) +.equ CPU_ALERT_HALT, (1 << 0) +.equ CPU_ALERT_SMC, (1 << 1) +.equ CPU_ALERT_IRQ, (1 << 2) + .equ SPSR_BASE, (0x100 + 0x400 * 3) .equ REGMODE_BASE, (SPSR_BASE + 24) .equ SUPERVISOR_MODE, (0x13) @@ -319,17 +323,26 @@ defsymbl(mips_indirect_branch_dual) nop +# Called on I/O writes that have side-effects defsymbl(write_io_epilogue) - beq $2, $0, no_alert # 0 means nothing happened - addiu $4, $2, -2 # see if return value is 2 (delay slot) - beq $4, $0, smc_dma # is it an SMC alert? (return value = 2) - nop - addiu $4, $2, -3 # see if return value is 3 - beq $4, $0, irq_alert # is it an IRQ alert? (return value = 3) - nop - collapse_flags # make sure flags are good for update_gba + # Check for SMC, IRQ, and HALT bits + move $19, $2 # destroy $19 (saved ~reg_pc) since we won't return + collapse_flags # CPSR needs to be updated + andi $4, $19, CPU_ALERT_SMC # check if SMC code happened + beqz $4, 1f # skip if no SMC happened + cfncall flush_translation_cache_ram, 4 +1: + andi $4, $19, CPU_ALERT_IRQ # check if IRQ was raised + beqz $4, 2f # skip if no IRQ was raised + cfncall check_and_raise_interrupts, 9 + +2: + andi $4, $19, CPU_ALERT_HALT # check if CPU is halted + beqz $4, lookup_pc # continue running if not halted + + # Purposely fallthrough to alert_loop, wait for CPU wakeup alert_loop: move $4, reg_cycles # Remaining cycles as asg0 @@ -343,52 +356,34 @@ alert_loop: bne $1, $0, alert_loop # see if it hasn't changed nop - lw $4, REG_PC($16) # $4 = new PC - - j lookup_pc - nop - -irq_alert: - restore_registers - j lookup_pc # PC has changed, get a new one - nop - -no_alert: - restore_registers - lw $ra, REG_SAVE3($16) # restore return - jr $ra # we can return - nop - -smc_dma: - cfncall flush_translation_cache_ram, 4 - j lookup_pc - nop - -defsymbl(smc_write) - save_registers - sw $6, REG_PC($16) # save PC - cfncall flush_translation_cache_ram, 4 + # Fall through to lookup_pc to resume execution lookup_pc: - lw $2, REG_CPSR($16) # $2 = cpsr - andi $2, $2, 0x20 # isolate mode bit - beq $2, $0, lookup_pc_arm # if T bit is zero use arm handler - nop + extract_flags # $1 contains CPSR now +lookup_pc_noflags: + andi $1, $1, 0x20 # isolate mode bit + beq $1, $0, 1f # if T bit is zero use arm handler + lw $4, REG_PC($16) # load PC as arg 0 -lookup_pc_thumb: - lw $4, REG_PC($16) # load PC as arg 0 cfncall block_lookup_address_thumb, 2 # get Thumb address restore_registers jr $2 # jump to result nop - -lookup_pc_arm: - lw $4, REG_PC($16) # load PC as arg 0 +1: cfncall block_lookup_address_arm, 1 # get ARM address restore_registers jr $2 # jump to result nop + +defsymbl(smc_write) + sw $6, REG_PC($16) # save PC + save_registers + cfncall flush_translation_cache_ram, 4 + b lookup_pc_noflags + lw $1, REG_CPSR($16) # (delay) + + # Return the current cpsr defsymbl(execute_read_cpsr) @@ -622,6 +617,7 @@ fnptrs: .long execute_spsr_restore_body # 6 .long execute_store_cpsr_body # 7 .long process_cheats # 8 + .long check_and_raise_interrupts # 9 #if !defined(MMAP_JIT_CACHE) diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 90a156b..f4bd5e9 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -79,6 +79,10 @@ _##symbol: call fnm(name) #endif +.equ CPU_ALERT_HALT, (1 << 0) +.equ CPU_ALERT_SMC, (1 << 1) +.equ CPU_ALERT_IRQ, (1 << 2) + .equ REG_SP, (13 * 4) .equ REG_LR, (14 * 4) .equ REG_PC, (15 * 4) @@ -239,14 +243,27 @@ ext_store_backup8: ret - +# Handle I/O write side-effects: +# SMC: Flush RAM caches +# IRQ: Perform CPU mode change +# HLT: spin in the alert_loop until an IRQ is triggered write_epilogue: - cmp $0, %eax # 0 return means nothing happened - jz no_alert # if so we can leave + mov %eax, REG_SAVE(REG_BASE)# Save ret value for later use + collapse_flags # Consolidate CPSR + test $CPU_ALERT_SMC, %eax # Check for CPU_ALERT_SMC bit + jz 1f # skip if not set + CALL_FUNC(flush_translation_cache_ram) - collapse_flags # make sure flags are good for function call - cmp $2, %eax # see if it was an SMC trigger - je smc_write +1: + testl $CPU_ALERT_IRQ, REG_SAVE(REG_BASE) # Check for CPU_ALERT_IRQ bit + jz 2f # skip if not set + CALL_FUNC(check_and_raise_interrupts) + +2: + testl $CPU_ALERT_HALT, REG_SAVE(REG_BASE) # Check for CPU_ALERT_HALT bit + jz lookup_pc # if not halt, continue executing + + # explicit fallthrough to alert_loop, while CPU is halted alert_loop: mov REG_CYCLES, CARG1_REG # Load remaining cycles as arg0 @@ -265,8 +282,6 @@ alert_loop: jmp lookup_pc # pc has definitely changed -no_alert: - ret ext_store_eeprom: CALL_FUNC(write_eeprom) # perform eeprom write @@ -336,8 +351,9 @@ ext_##fname##_io##wsize: ;\ and $(0x3FF & addrm), %eax /* Addr wrap */ ;\ SETUP_ARGS ;\ CALL_FUNC(write_io_register##wsize) /* Call C code */ ;\ - jmp write_epilogue /* Might need an update */ ;\ - + cmp $0, %eax /* Check for side-effects */ ;\ + jnz write_epilogue /* Act on SMC and IRQs */ ;\ + ret ;\ write_stubs(store, 32, l, reg32, reg32, ~3, noop) write_stubs(store, 16, w, reg16, reg16, ~1, noop)