From 3d558413fd42078f112dfee4ccc2e3c36978923f Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sat, 6 Mar 2021 21:15:22 +0100 Subject: [PATCH] Fix x86 dynarec, broken by d10c4afe The dynarec expects function args to be located in registers instead of the stack, which is not the default calling convetion in GCC/clang. --- Makefile | 2 + common.h | 6 +++ cpu.h | 23 +++++------ cpu_threaded.c | 5 +-- gba_memory.c | 28 ++++++------- gba_memory.h | 14 +++---- x86/x86_emit.h | 104 +++++++++++++++++++++---------------------------- 7 files changed, 87 insertions(+), 95 deletions(-) diff --git a/Makefile b/Makefile index c728e9f..e846b3a 100644 --- a/Makefile +++ b/Makefile @@ -442,6 +442,8 @@ endif ifeq ($(CPU_ARCH), arm) DEFINES += -DARM_ARCH +else ifeq ($(CPU_ARCH), x86_32) +DEFINES += -DX86_ARCH endif diff --git a/common.h b/common.h index e3539f6..a85bc57 100644 --- a/common.h +++ b/common.h @@ -31,6 +31,12 @@ #define PATH_SEPARATOR_CHAR '/' #endif +#ifdef X86_ARCH + #define function_cc __attribute__((regparm(2))) +#else + #define function_cc +#endif + #ifdef ARM_ARCH #define _BSD_SOURCE // sync diff --git a/cpu.h b/cpu.h index 2659b10..3a0d85e 100644 --- a/cpu.h +++ b/cpu.h @@ -99,21 +99,22 @@ void execute_arm(u32 cycles); void raise_interrupt(irq_type irq_raised); void set_cpu_mode(cpu_mode_type new_mode); -u32 execute_load_u8(u32 address); -u32 execute_load_u16(u32 address); -u32 execute_load_u32(u32 address); -u32 execute_load_s8(u32 address); -u32 execute_load_s16(u32 address); -void execute_store_u8(u32 address, u32 source); -void execute_store_u16(u32 address, u32 source); -void execute_store_u32(u32 address, u32 source); -u32 execute_arm_translate(u32 cycles); +u32 function_cc execute_load_u8(u32 address); +u32 function_cc execute_load_u16(u32 address); +u32 function_cc execute_load_u32(u32 address); +u32 function_cc execute_load_s8(u32 address); +u32 function_cc execute_load_s16(u32 address); +void function_cc execute_store_u8(u32 address, u32 source); +void function_cc execute_store_u16(u32 address, u32 source); +void function_cc execute_store_u32(u32 address, u32 source); +u32 function_cc execute_arm_translate(u32 cycles); void init_translater(void); void cpu_write_savestate(void); void cpu_read_savestate(void); -u8 *block_lookup_address_arm(u32 pc); -u8 *block_lookup_address_thumb(u32 pc); +u8 function_cc *block_lookup_address_arm(u32 pc); +u8 function_cc *block_lookup_address_thumb(u32 pc); +u8 function_cc *block_lookup_address_dual(u32 pc); s32 translate_block_arm(u32 pc, translation_region_type translation_region, u32 smc_enable); s32 translate_block_thumb(u32 pc, translation_region_type translation_region, diff --git a/cpu_threaded.c b/cpu_threaded.c index c7b8850..df563d2 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -2839,7 +2839,7 @@ u32 translation_flush_count = 0; #define block_lookup_address_builder(type) \ -u8 *block_lookup_address_##type(u32 pc) \ +u8 function_cc *block_lookup_address_##type(u32 pc) \ { \ u16 *location; \ u32 block_tag; \ @@ -2848,8 +2848,7 @@ u8 *block_lookup_address_##type(u32 pc) \ /* Starting at the beginning, we allow for one translation cache flush. */ \ if(translation_recursion_level == 0){ \ translation_flush_count = 0; \ - \ - } \ + } \ block_lookup_address_pc_##type(); \ \ switch(pc >> 24) \ diff --git a/gba_memory.c b/gba_memory.c index 7675bb0..c0f76de 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -437,7 +437,7 @@ u32 eeprom_address = 0; s32 eeprom_counter = 0; u8 eeprom_buffer[8]; -void write_eeprom(u32 address, u32 value) +void function_cc write_eeprom(u32 address, u32 value) { switch(eeprom_mode) { @@ -554,7 +554,7 @@ void write_eeprom(u32 address, u32 value) value = current_instruction | (current_instruction << 16); \ } \ -u32 read_eeprom(void) +u32 function_cc read_eeprom(void) { u32 value; @@ -757,7 +757,7 @@ static cpu_alert_type trigger_dma(u32 dma_number, u32 value) #define access_register16_low(address) \ value = ((address16(io_registers, address + 2)) << 16) | value \ -cpu_alert_type write_io_register8(u32 address, u32 value) +cpu_alert_type function_cc write_io_register8(u32 address, u32 value) { switch(address) { @@ -1173,7 +1173,7 @@ cpu_alert_type write_io_register8(u32 address, u32 value) return CPU_ALERT_NONE; } -cpu_alert_type write_io_register16(u32 address, u32 value) +cpu_alert_type function_cc write_io_register16(u32 address, u32 value) { switch(address) { @@ -1426,7 +1426,7 @@ cpu_alert_type write_io_register16(u32 address, u32 value) } -cpu_alert_type write_io_register32(u32 address, u32 value) +cpu_alert_type function_cc write_io_register32(u32 address, u32 value) { switch(address) { @@ -1505,7 +1505,7 @@ cpu_alert_type write_io_register32(u32 address, u32 value) } \ -void write_backup(u32 address, u32 value) +void function_cc write_backup(u32 address, u32 value) { value &= 0xFF; @@ -1703,7 +1703,7 @@ static u32 encode_bcd(u8 value) \ address16(map, update_address & 0x7FFF) = _value \ -void write_rtc(u32 address, u32 value) +void function_cc write_rtc(u32 address, u32 value) { u32 rtc_page_index; u32 update_address; @@ -1970,14 +1970,14 @@ void write_rtc(u32 address, u32 value) break; \ } \ -u8 read_memory8(u32 address) +u8 function_cc read_memory8(u32 address) { u8 value; read_memory(8); return value; } -u16 read_memory16_signed(u32 address) +u16 function_cc read_memory16_signed(u32 address) { u16 value; @@ -1991,7 +1991,7 @@ u16 read_memory16_signed(u32 address) // unaligned reads are actually 32bit -u32 read_memory16(u32 address) +u32 function_cc read_memory16(u32 address) { u32 value; @@ -2010,7 +2010,7 @@ u32 read_memory16(u32 address) } -u32 read_memory32(u32 address) +u32 function_cc read_memory32(u32 address) { u32 value; if(address & 0x03) @@ -2028,19 +2028,19 @@ u32 read_memory32(u32 address) return value; } -cpu_alert_type write_memory8(u32 address, u8 value) +cpu_alert_type function_cc write_memory8(u32 address, u8 value) { write_memory(8); return CPU_ALERT_NONE; } -cpu_alert_type write_memory16(u32 address, u16 value) +cpu_alert_type function_cc write_memory16(u32 address, u16 value) { write_memory(16); return CPU_ALERT_NONE; } -cpu_alert_type write_memory32(u32 address, u32 value) +cpu_alert_type function_cc write_memory32(u32 address, u32 value) { write_memory(32); return CPU_ALERT_NONE; diff --git a/gba_memory.h b/gba_memory.h index 3a79d6e..946ef37 100644 --- a/gba_memory.h +++ b/gba_memory.h @@ -156,13 +156,13 @@ typedef enum FLASH_MANUFACTURER_SST = 0xBF } flash_manufacturer_id_type; -u8 read_memory8(u32 address); -u32 read_memory16(u32 address); -u16 read_memory16_signed(u32 address); -u32 read_memory32(u32 address); -cpu_alert_type write_memory8(u32 address, u8 value); -cpu_alert_type write_memory16(u32 address, u16 value); -cpu_alert_type write_memory32(u32 address, u32 value); +u8 function_cc read_memory8(u32 address); +u32 function_cc read_memory16(u32 address); +u16 function_cc read_memory16_signed(u32 address); +u32 function_cc read_memory32(u32 address); +cpu_alert_type function_cc write_memory8(u32 address, u8 value); +cpu_alert_type function_cc write_memory16(u32 address, u16 value); +cpu_alert_type function_cc write_memory32(u32 address, u32 value); extern u8 *memory_regions[16]; extern u32 memory_limits[16]; diff --git a/x86/x86_emit.h b/x86/x86_emit.h index c2f7783..4839618 100644 --- a/x86/x86_emit.h +++ b/x86/x86_emit.h @@ -28,7 +28,7 @@ void x86_indirect_branch_arm(u32 address); void x86_indirect_branch_thumb(u32 address); void x86_indirect_branch_dual(u32 address); -void execute_store_cpsr(u32 new_cpsr, u32 store_mask); +void function_cc execute_store_cpsr(u32 new_cpsr, u32 store_mask); typedef enum { @@ -523,7 +523,7 @@ typedef enum generate_function_call(execute_##name##_##flags_op##_reg); \ generate_mov(ireg, rv) \ -u32 execute_lsl_no_flags_reg(u32 value, u32 shift) +u32 function_cc execute_lsl_no_flags_reg(u32 value, u32 shift) { if(shift != 0) { @@ -535,7 +535,7 @@ u32 execute_lsl_no_flags_reg(u32 value, u32 shift) return value; } -u32 execute_lsr_no_flags_reg(u32 value, u32 shift) +u32 function_cc execute_lsr_no_flags_reg(u32 value, u32 shift) { if(shift != 0) { @@ -547,7 +547,7 @@ u32 execute_lsr_no_flags_reg(u32 value, u32 shift) return value; } -u32 execute_asr_no_flags_reg(u32 value, u32 shift) +u32 function_cc execute_asr_no_flags_reg(u32 value, u32 shift) { if(shift != 0) { @@ -559,7 +559,7 @@ u32 execute_asr_no_flags_reg(u32 value, u32 shift) return value; } -u32 execute_ror_no_flags_reg(u32 value, u32 shift) +u32 function_cc execute_ror_no_flags_reg(u32 value, u32 shift) { if(shift != 0) { @@ -570,7 +570,7 @@ u32 execute_ror_no_flags_reg(u32 value, u32 shift) } -u32 execute_lsl_flags_reg(u32 value, u32 shift) +u32 function_cc execute_lsl_flags_reg(u32 value, u32 shift) { if(shift != 0) { @@ -592,7 +592,7 @@ u32 execute_lsl_flags_reg(u32 value, u32 shift) return value; } -u32 execute_lsr_flags_reg(u32 value, u32 shift) +u32 function_cc execute_lsr_flags_reg(u32 value, u32 shift) { if(shift != 0) { @@ -614,7 +614,7 @@ u32 execute_lsr_flags_reg(u32 value, u32 shift) return value; } -u32 execute_asr_flags_reg(u32 value, u32 shift) +u32 function_cc execute_asr_flags_reg(u32 value, u32 shift) { if(shift != 0) { @@ -632,7 +632,7 @@ u32 execute_asr_flags_reg(u32 value, u32 shift) return value; } -u32 execute_ror_flags_reg(u32 value, u32 shift) +u32 function_cc execute_ror_flags_reg(u32 value, u32 shift) { if(shift != 0) { @@ -643,14 +643,14 @@ u32 execute_ror_flags_reg(u32 value, u32 shift) return value; } -u32 execute_rrx_flags(u32 value) +u32 function_cc execute_rrx_flags(u32 value) { u32 c_flag = reg[REG_C_FLAG]; reg[REG_C_FLAG] = value & 0x01; return (value >> 1) | (c_flag << 31); } -u32 execute_rrx(u32 value) +u32 function_cc execute_rrx(u32 value) { return (value >> 1) | (reg[REG_C_FLAG] << 31); } @@ -917,7 +917,7 @@ u32 execute_rrx(u32 value) generate_indirect_branch_arm(); \ } \ -u32 execute_spsr_restore(u32 address) +u32 function_cc execute_spsr_restore(u32 address) { if(reg[CPU_MODE] != MODE_USER) { @@ -1195,7 +1195,7 @@ typedef enum generate_store_reg_pc_no_flags(a0, rd); \ } \ -static void execute_mul_flags(u32 dest) +static void function_cc execute_mul_flags(u32 dest) { calculate_z_flag(dest); calculate_n_flag(dest); @@ -1223,7 +1223,7 @@ static void execute_mul_flags(u32 dest) arm_multiply_flags_##flags(); \ } \ -static void execute_mul_long_flags(u32 dest_lo, u32 dest_hi) +static void function_cc execute_mul_long_flags(u32 dest_lo, u32 dest_hi) { reg[REG_Z_FLAG] = (dest_lo == 0) & (dest_hi == 0); calculate_n_flag(dest_hi); @@ -1253,13 +1253,13 @@ static void execute_mul_long_flags(u32 dest_lo, u32 dest_hi) arm_multiply_long_flags_##flags(); \ } \ -u32 execute_read_cpsr(void) +u32 function_cc execute_read_cpsr(void) { collapse_flags(); return reg[REG_CPSR]; } -u32 execute_read_spsr(void) +u32 function_cc execute_read_spsr(void) { collapse_flags(); return spsr[reg[CPU_MODE]]; @@ -1272,7 +1272,7 @@ u32 execute_read_spsr(void) // store_mask and address are stored in the SAVE slots, since there's no real // register space to nicely pass them. -u32 execute_store_cpsr_body(u32 _cpsr) +u32 function_cc execute_store_cpsr_body(u32 _cpsr) { reg[REG_CPSR] = _cpsr; if(reg[REG_SAVE] & 0xFF) @@ -1293,7 +1293,7 @@ u32 execute_store_cpsr_body(u32 _cpsr) } -void execute_store_spsr(u32 new_spsr, u32 store_mask) +void function_cc execute_store_spsr(u32 new_spsr, u32 store_mask) { u32 _spsr = spsr[reg[CPU_MODE]]; spsr[reg[CPU_MODE]] = (new_spsr & store_mask) | (_spsr & (~store_mask)); @@ -1365,7 +1365,7 @@ void execute_store_spsr(u32 new_spsr, u32 store_mask) } \ #define access_memory_generate_read_function(mem_size, mem_type) \ -u32 execute_load_##mem_type(u32 address) \ +u32 function_cc execute_load_##mem_type(u32 address) \ { \ u32 dest; \ read_memory(mem_size, mem_type, address, dest); \ @@ -1377,29 +1377,13 @@ access_memory_generate_read_function(8, s8); access_memory_generate_read_function(16, u16); access_memory_generate_read_function(32, u32); -u32 execute_load_s16(u32 address) +u32 function_cc execute_load_s16(u32 address) { u32 dest; read_memory_s16(address, dest); return dest; } -#define access_memory_generate_write_function(mem_size, mem_type) \ -void execute_store_##mem_type(u32 address, u32 source) \ -{ \ - u8 *map; \ - \ - if(((address & aligned_address_mask##mem_size) == 0) && \ - (map = memory_map_write[address >> 15])) \ - { \ - *((mem_type *)((u8 *)map + (address & 0x7FFF))) = source; \ - } \ - else \ - { \ - write_memory##mem_size(address, source); \ - } \ -} \ - #define arm_access_memory_load(mem_type) \ cycle_count += 2; \ generate_function_call(execute_load_##mem_type); \ @@ -1492,7 +1476,7 @@ void execute_store_##mem_type(u32 address, u32 source) \ #define sprint_yes(access_type, pre_op, post_op, wb) \ printf("sbit on %s %s %s %s\n", #access_type, #pre_op, #post_op, #wb) \ -u32 execute_aligned_load32(u32 address) +u32 function_cc execute_aligned_load32(u32 address) { u8 *map; if(!(address & 0xF0000000) && (map = memory_map_read[address >> 15])) @@ -1501,7 +1485,7 @@ u32 execute_aligned_load32(u32 address) return read_memory32(address); } -void execute_aligned_store32(u32 address, u32 source) +void function_cc execute_aligned_store32(u32 address, u32 source) { u8 *map; @@ -1729,7 +1713,7 @@ void execute_aligned_store32(u32 address, u32 source) // Operation types: lsl, lsr, asr, ror // Affects N/Z/C flags -u32 execute_lsl_reg_op(u32 value, u32 shift) +u32 function_cc execute_lsl_reg_op(u32 value, u32 shift) { if(shift != 0) { @@ -1753,7 +1737,7 @@ u32 execute_lsl_reg_op(u32 value, u32 shift) return value; } -u32 execute_lsr_reg_op(u32 value, u32 shift) +u32 function_cc execute_lsr_reg_op(u32 value, u32 shift) { if(shift != 0) { @@ -1777,7 +1761,7 @@ u32 execute_lsr_reg_op(u32 value, u32 shift) return value; } -u32 execute_asr_reg_op(u32 value, u32 shift) +u32 function_cc execute_asr_reg_op(u32 value, u32 shift) { if(shift != 0) { @@ -1797,7 +1781,7 @@ u32 execute_asr_reg_op(u32 value, u32 shift) return value; } -u32 execute_ror_reg_op(u32 value, u32 shift) +u32 function_cc execute_ror_reg_op(u32 value, u32 shift) { if(shift != 0) { @@ -1809,7 +1793,7 @@ u32 execute_ror_reg_op(u32 value, u32 shift) return value; } -u32 execute_lsl_imm_op(u32 value, u32 shift) +u32 function_cc execute_lsl_imm_op(u32 value, u32 shift) { if(shift != 0) { @@ -1821,7 +1805,7 @@ u32 execute_lsl_imm_op(u32 value, u32 shift) return value; } -u32 execute_lsr_imm_op(u32 value, u32 shift) +u32 function_cc execute_lsr_imm_op(u32 value, u32 shift) { if(shift != 0) { @@ -1838,7 +1822,7 @@ u32 execute_lsr_imm_op(u32 value, u32 shift) return value; } -u32 execute_asr_imm_op(u32 value, u32 shift) +u32 function_cc execute_asr_imm_op(u32 value, u32 shift) { if(shift != 0) { @@ -1855,7 +1839,7 @@ u32 execute_asr_imm_op(u32 value, u32 shift) return value; } -u32 execute_ror_imm_op(u32 value, u32 shift) +u32 function_cc execute_ror_imm_op(u32 value, u32 shift) { if(shift != 0) { @@ -2057,12 +2041,12 @@ u32 execute_ror_imm_op(u32 value, u32 shift) const u32 _sb = src_b \ #define data_proc_generate_logic_function(name, expr) \ -u32 execute_##name(u32 rm, u32 rn) \ +u32 function_cc execute_##name(u32 rm, u32 rn) \ { \ return expr; \ } \ \ -u32 execute_##name##s(u32 rm, u32 rn) \ +u32 function_cc execute_##name##s(u32 rm, u32 rn) \ { \ u32 dest = expr; \ calculate_z_flag(dest); \ @@ -2071,12 +2055,12 @@ u32 execute_##name##s(u32 rm, u32 rn) \ } \ #define data_proc_generate_logic_unary_function(name, expr) \ -u32 execute_##name(u32 rm) \ +u32 function_cc execute_##name(u32 rm) \ { \ return expr; \ } \ \ -u32 execute_##name##s(u32 rm) \ +u32 function_cc execute_##name##s(u32 rm) \ { \ u32 dest = expr; \ calculate_z_flag(dest); \ @@ -2086,12 +2070,12 @@ u32 execute_##name##s(u32 rm) \ #define data_proc_generate_sub_function(name, src_a, src_b) \ -u32 execute_##name(u32 rm, u32 rn) \ +u32 function_cc execute_##name(u32 rm, u32 rn) \ { \ return (src_a) - (src_b); \ } \ \ -u32 execute_##name##s(u32 rm, u32 rn) \ +u32 function_cc execute_##name##s(u32 rm, u32 rn) \ { \ flags_vars(src_a, src_b); \ dest = _sa - _sb; \ @@ -2100,12 +2084,12 @@ u32 execute_##name##s(u32 rm, u32 rn) \ } \ #define data_proc_generate_add_function(name, src_a, src_b) \ -u32 execute_##name(u32 rm, u32 rn) \ +u32 function_cc execute_##name(u32 rm, u32 rn) \ { \ return (src_a) + (src_b); \ } \ \ -u32 execute_##name##s(u32 rm, u32 rn) \ +u32 function_cc execute_##name##s(u32 rm, u32 rn) \ { \ flags_vars(src_a, src_b); \ dest = _sa + _sb; \ @@ -2114,7 +2098,7 @@ u32 execute_##name##s(u32 rm, u32 rn) \ } \ #define data_proc_generate_sub_test_function(name, src_a, src_b) \ -void execute_##name(u32 rm, u32 rn) \ +void function_cc execute_##name(u32 rm, u32 rn) \ { \ flags_vars(src_a, src_b); \ dest = _sa - _sb; \ @@ -2122,7 +2106,7 @@ void execute_##name(u32 rm, u32 rn) \ } \ #define data_proc_generate_add_test_function(name, src_a, src_b) \ -void execute_##name(u32 rm, u32 rn) \ +void function_cc execute_##name(u32 rm, u32 rn) \ { \ flags_vars(src_a, src_b); \ dest = _sa + _sb; \ @@ -2130,14 +2114,14 @@ void execute_##name(u32 rm, u32 rn) \ } \ #define data_proc_generate_logic_test_function(name, expr) \ -void execute_##name(u32 rm, u32 rn) \ +void function_cc execute_##name(u32 rm, u32 rn) \ { \ u32 dest = expr; \ calculate_z_flag(dest); \ calculate_n_flag(dest); \ } \ -u32 execute_neg(u32 rm) \ +u32 function_cc execute_neg(u32 rm) \ { \ u32 dest = 0 - rm; \ calculate_flags_sub(dest, 0, rm); \ @@ -2166,7 +2150,7 @@ data_proc_generate_logic_test_function(teq, rn ^ rm); data_proc_generate_sub_test_function(cmp, rn, rm); data_proc_generate_add_test_function(cmn, rn, rm); -static void execute_swi(u32 pc) +static void function_cc execute_swi(u32 pc) { reg_mode[MODE_SUPERVISOR][6] = pc; collapse_flags(); @@ -2289,7 +2273,7 @@ u8 swi_hle_handle[256] = 0x0 // SWI 2A: SoundGetJumpList }; -void swi_hle_div(void) +void function_cc swi_hle_div(void) { s32 result = (s32)reg[0] / (s32)reg[1]; reg[1] = (s32)reg[0] % (s32)reg[1];