From ff510e7f7a0c04c7862e598e8bfc75747f3bf7d1 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 23 Mar 2021 19:47:51 +0100 Subject: [PATCH] Move caches to stub files to get around gcc 10 Seems that using the __atribute__ magic for sections is not the best way of doing this, since it injects some default atributtes that collide with the user defined ones. Using assembly is far easier in this case. Reworked definitions a bit to make it easier to import from assembly. Also wrapped stuff around macros for easy and less verbose implementation of the symbol prefix issue. --- arm/arm_stub.S | 130 +++++++++++++++++++----------------------------- cpu.h | 19 ++----- cpu_threaded.c | 18 +------ gpsp_config.h | 22 ++++++++ psp/mips_emit.h | 9 +--- psp/mips_stub.S | 21 ++++++++ x86/x86_stub.S | 85 ++++++++++++++----------------- 7 files changed, 138 insertions(+), 166 deletions(-) create mode 100644 gpsp_config.h diff --git a/arm/arm_stub.S b/arm/arm_stub.S index f5fceb0..f0b7f52 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -1,15 +1,15 @@ + +#include "../gpsp_config.h" + +#define defsymbl(symbol) \ +.global symbol ; \ +.global _##symbol ; \ +symbol: \ +_##symbol: + +.text .align 2 -.globl invalidate_icache_region -.globl invalidate_cache_region - -.globl memory_map_read -.globl reg -.globl palette_ram -.globl palette_ram_converted -.globl reg_mode -.globl spsr - #define REG_R0 (0 * 4) #define REG_R1 (1 * 4) #define REG_R2 (2 * 4) @@ -178,10 +178,7 @@ #define arm_update_gba_builder(name, mode, return_op) ;\ ;\ .align 2 ;\ -.globl arm_update_gba_##name ;\ -.globl _arm_update_gba_##name ;\ -arm_update_gba_##name: ;\ -_arm_update_gba_##name: ;\ +defsymbl(arm_update_gba_##name) ;\ load_pc_##return_op() ;\ str r0, [reg_base, #REG_PC] /* write out the PC */;\ ;\ @@ -243,30 +240,21 @@ arm_update_gba_builder(idle_thumb, thumb, add) @ r0: PC to branch to .align 2 -.globl arm_indirect_branch_arm -.globl _arm_indirect_branch_arm -arm_indirect_branch_arm: -_arm_indirect_branch_arm: +defsymbl(arm_indirect_branch_arm) save_flags() call_c_function(block_lookup_address_arm) restore_flags() bx r0 .align 2 -.globl arm_indirect_branch_thumb -.globl _arm_indirect_branch_thumb -arm_indirect_branch_thumb: -_arm_indirect_branch_thumb: +defsymbl(arm_indirect_branch_thumb) save_flags() call_c_function(block_lookup_address_thumb) restore_flags() bx r0 .align 2 -.globl arm_indirect_branch_dual_arm -.globl _arm_indirect_branch_dual_arm -arm_indirect_branch_dual_arm: -_arm_indirect_branch_dual_arm: +defsymbl(arm_indirect_branch_dual_arm) save_flags() tst r0, #0x01 @ check lower bit bne 1f @ if set going to Thumb mode @@ -286,10 +274,7 @@ _arm_indirect_branch_dual_arm: bx r0 @ return .align 2 -.globl arm_indirect_branch_dual_thumb -.globl _arm_indirect_branch_dual_thumb -arm_indirect_branch_dual_thumb: -_arm_indirect_branch_dual_thumb: +defsymbl(arm_indirect_branch_dual_thumb) save_flags() tst r0, #0x01 @ check lower bit beq 1f @ if set going to ARM mode @@ -317,10 +302,7 @@ _arm_indirect_branch_dual_thumb: @ r2: current PC .align 2 -.globl execute_store_cpsr -.globl _execute_store_cpsr -execute_store_cpsr: -_execute_store_cpsr: +defsymbl(execute_store_cpsr) save_flags() and reg_flags, r0, r1 @ reg_flags = new_cpsr & store_mask ldr r0, [reg_base, #REG_CPSR] @ r0 = cpsr @@ -354,10 +336,7 @@ _execute_store_cpsr: @ r1: bitmask of which bits in spsr to update .align 2 -.globl execute_store_spsr -.globl _execute_store_spsr -execute_store_spsr: -_execute_store_spsr: +defsymbl(execute_store_spsr) ldr r1, =spsr @ r1 = spsr ldr r2, [reg_base, #CPU_MODE] @ r2 = CPU_MODE str r0, [r1, r2, lsl #2] @ spsr[CPU_MODE] = new_spsr @@ -369,10 +348,7 @@ _execute_store_spsr: @ r0: spsr .align 2 -.globl execute_read_spsr -.globl _execute_read_spsr -execute_read_spsr: -_execute_read_spsr: +defsymbl(execute_read_spsr) ldr r0, =spsr @ r0 = spsr ldr r1, [reg_base, #CPU_MODE] @ r1 = CPU_MODE ldr r0, [r0, r1, lsl #2] @ r0 = spsr[CPU_MODE] @@ -385,10 +361,7 @@ _execute_read_spsr: @ r0: current pc .align 2 -.globl execute_spsr_restore -.globl _execute_spsr_restore -execute_spsr_restore: -_execute_spsr_restore: +defsymbl(execute_spsr_restore) save_flags() ldr r1, =spsr @ r1 = spsr ldr r2, [reg_base, #CPU_MODE] @ r2 = cpu_mode @@ -425,10 +398,7 @@ _execute_spsr_restore: #define execute_swi_builder(mode) ;\ ;\ .align 2 ;\ -.globl execute_swi_##mode ;\ -.globl _execute_swi_##mode ;\ -execute_swi_##mode: ;\ -_execute_swi_##mode: ;\ +defsymbl(execute_swi_##mode) ;\ save_flags() ;\ ldr r1, =reg_mode /* r1 = reg_mode */;\ /* reg_mode[MODE_SUPERVISOR][6] = pc */;\ @@ -460,10 +430,7 @@ execute_swi_builder(thumb) #define execute_swi_function_builder(swi_function, mode) ;\ ;\ .align 2 ;\ -.globl execute_swi_hle_##swi_function##_##mode ;\ -.globl _execute_swi_hle_##swi_function##_##mode ;\ -execute_swi_hle_##swi_function##_##mode: ;\ -_execute_swi_hle_##swi_function##_##mode: ;\ +defsymbl(execute_swi_hle_##swi_function##_##mode) ;\ save_flags() ;\ store_registers_##mode() ;\ call_c_function(execute_swi_hle_##swi_function##_c) ;\ @@ -485,10 +452,7 @@ execute_swi_function_builder(div, thumb) @ Uses sp as reg_base; must hold consistently true. .align 2 -.globl execute_arm_translate -.globl _execute_arm_translate -execute_arm_translate: -_execute_arm_translate: +defsymbl(execute_arm_translate) @ save the registers to be able to return later stmdb sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } @@ -615,10 +579,7 @@ ext_store_ignore: #define execute_store_builder(store_type, store_op, load_op) ;\ ;\ .align 2 ;\ -.globl execute_store_u##store_type ;\ -.globl _execute_store_u##store_type ;\ -execute_store_u##store_type: ;\ -_execute_store_u##store_type: ;\ +defsymbl(execute_store_u##store_type) ;\ execute_store_body(store_type, store_op) ;\ ;\ ext_store_u##store_type: ;\ @@ -676,10 +637,7 @@ execute_store_builder(32, str, ldr) @ This is a store that is executed in a strm case (so no SMC checks in-between) -.globl execute_store_u32_safe -.globl _execute_store_u32_safe -execute_store_u32_safe: -_execute_store_u32_safe: +defsymbl(execute_store_u32_safe) execute_store_body(32_safe, str) restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return @@ -822,10 +780,7 @@ lookup_pc_arm: #define execute_load_builder(load_type, load_function, load_op, mask) ;\ ;\ .align 2 ;\ -.globl execute_load_##load_type ;\ -.globl _execute_load_##load_type ;\ -execute_load_##load_type: ;\ -_execute_load_##load_type: ;\ +defsymbl(execute_load_##load_type) ;\ save_flags() ;\ tst r0, mask /* make sure address is in range */;\ bne ext_load_##load_type /* if not do ext load */;\ @@ -859,19 +814,38 @@ execute_load_builder(u32, 32, ldrne, #0xF0000000) .data -memory_map_read: +defsymbl(memory_map_read) .space 0x8000 -palette_ram: +defsymbl(palette_ram) .space 0x400 -palette_ram_converted: +defsymbl(palette_ram_converted) .space 0x400 -spsr: +defsymbl(spsr) .space 24 -reg_mode: +defsymbl(reg_mode) .space 196 -.globl reg -.globl _reg -reg: +defsymbl(reg) .space 0x100, 0 +@ Vita and 3DS (and of course mmap) map their own cache sections through some +@ platform-speficic mechanisms. +#if !defined(HAVE_MMAP) && !defined(VITA) && !defined(_3DS) + +@ Make this section executable! +.text +#ifdef __ANDROID__ +@ Unfortunately Android builds don't like nobits, so we ship a ton of zeros +@ TODO: Revisit this whenever we upgrade to the latest clang NDK +.section .jit,"awx",%progbits +#else +.section .jit,"awx",%nobits +#endif +.align 4 +defsymbl(rom_translation_cache) + .space ROM_TRANSLATION_CACHE_SIZE +defsymbl(ram_translation_cache) + .space RAM_TRANSLATION_CACHE_SIZE + +#endif + diff --git a/cpu.h b/cpu.h index faa3bc1..fc57626 100644 --- a/cpu.h +++ b/cpu.h @@ -20,6 +20,8 @@ #ifndef CPU_H #define CPU_H +#include "gpsp_config.h" + // System mode and user mode are represented as the same here typedef enum @@ -120,18 +122,6 @@ s32 translate_block_arm(u32 pc, translation_region_type translation_region, s32 translate_block_thumb(u32 pc, translation_region_type translation_region, u32 smc_enable); -#if defined(PSP) - #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4) - #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384) - #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024) -#else - #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5) - #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2) - #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32) -#endif - -#define STUB_ARENA_SIZE (4*1024) - #if defined(HAVE_MMAP) extern u8* rom_translation_cache; extern u8* ram_translation_cache; @@ -147,8 +137,8 @@ extern int sceBlock; #else extern u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE]; extern u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE]; -extern u32 stub_arena[STUB_ARENA_SIZE]; #endif +extern u32 stub_arena[STUB_ARENA_SIZE / 4]; extern u8 *rom_translation_ptr; extern u8 *ram_translation_ptr; @@ -162,9 +152,6 @@ extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES]; extern u32 in_interrupt; -#define ROM_BRANCH_HASH_SIZE (1024 * 64) - -/* EDIT: Shouldn't this be extern ?! */ extern u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE]; void flush_translation_cache_rom(void); diff --git a/cpu_threaded.c b/cpu_threaded.c index 555b9c6..7f12b4f 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -47,26 +47,10 @@ u8* ram_translation_cache_ptr; u8 *rom_translation_ptr = rom_translation_cache; u8 *ram_translation_ptr = ram_translation_cache; #else - -#ifdef __ANDROID__ -// Workaround for 'attempt to map x bytes at offset y' -__asm__(".section .jit,\"awx\",%progbits"); -#else -__asm__(".section .jit,\"awx\",%nobits"); -#endif - -u32 stub_arena[STUB_ARENA_SIZE] - __attribute__ ((aligned(4),section(".jit"))); -u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE] - __attribute__ ((aligned(4),section(".jit"))); u8 *rom_translation_ptr = rom_translation_cache; - -u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE] - __attribute__ ((aligned(4),section(".jit"))); u8 *ram_translation_ptr = ram_translation_cache; - -__asm__(".section .text"); #endif +/* Note, see stub files for more cache definitions */ u32 iwram_code_min = 0xFFFFFFFF; u32 iwram_code_max = 0xFFFFFFFF; diff --git a/gpsp_config.h b/gpsp_config.h new file mode 100644 index 0000000..ea8db95 --- /dev/null +++ b/gpsp_config.h @@ -0,0 +1,22 @@ + +#ifndef GPSP_CONFIG_H +#define GPSP_CONFIG_H + +/* Cache sizes and their config knobs */ +#if defined(PSP) + #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4) + #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384) + #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024) +#else + #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5) + #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2) + #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32) +#endif + +/* This is MIPS specific for now */ +#define STUB_ARENA_SIZE (16*1024) + +/* Hash table size for ROM trans cache lookups */ +#define ROM_BRANCH_HASH_SIZE (1024 * 64) + +#endif diff --git a/psp/mips_emit.h b/psp/mips_emit.h index b75f7f5..b996f2b 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2618,11 +2618,7 @@ static void emit_mem_access_loadop( #define genccall(fn) mips_emit_jal(((u32)fn) >> 2); #endif -// Stub memory map: -// 0 .. 63 First patch handler [#0] -// 448 .. 511 Last patch handler [#7] -// 512+ smc_write handler -#define SMC_WRITE_OFF32 160 +#define SMC_WRITE_OFF32 (10*16) /* 10 handlers (16 insts) */ // Describes a "plain" memory are, that is, an area that is just accessed // as normal memory (with some caveats tho). @@ -2862,8 +2858,7 @@ static void emit_pmemst_stub( } // If the data is non zero, we just wrote over code // Local-jump to the smc_write (which lives at offset:0) - unsigned instoffset = (&stub_arena[SMC_WRITE_OFF32] - (((u32*)translation_ptr) + 1)); - mips_emit_b(bne, reg_zero, reg_temp, instoffset); + mips_emit_b(bne, reg_zero, reg_temp, branch_offset(&stub_arena[SMC_WRITE_OFF32])); } // Store the data (delay slot from the SMC branch) diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 5e5a479..3d046d8 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -16,6 +16,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#include "../gpsp_config.h" + .set mips32r2 .align 4 @@ -645,3 +647,22 @@ fnptrs: .long execute_spsr_restore_body # 6 .long execute_store_cpsr_body # 7 +#if !defined(HAVE_MMAP) + +# Make this section executable! +.text +.section .jit,"awx",%nobits +.align 2 +.global stub_arena +.global rom_translation_cache +.global ram_translation_cache + +stub_arena: + .space STUB_ARENA_SIZE +rom_translation_cache: + .space ROM_TRANSLATION_CACHE_SIZE +ram_translation_cache: + .space RAM_TRANSLATION_CACHE_SIZE + +#endif + diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 595a789..9dd3fdd 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -16,21 +16,18 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#include "../gpsp_config.h" + .align 4 +#define defsymbl(symbol) \ +.global symbol ; \ +.global _##symbol ; \ +symbol: \ +_##symbol: + #ifndef _WIN32 -#define _x86_update_gba x86_update_gba -#define _x86_indirect_branch_arm x86_indirect_branch_arm -#define _x86_indirect_branch_thumb x86_indirect_branch_thumb -#define _x86_indirect_branch_dual x86_indirect_branch_dual -#define _execute_store_u8 execute_store_u8 -#define _execute_store_u16 execute_store_u16 -#define _execute_store_u32 execute_store_u32 -#define _execute_store_cpsr execute_store_cpsr -#define _execute_arm_translate execute_arm_translate -#define _memory_map_read memory_map_read -#define _reg reg -#define _reg_mode reg_mode +# External symbols (data + functions) #define _oam_update oam_update #define _iwram iwram #define _ewram ewram @@ -38,7 +35,6 @@ #define _oam_ram oam_ram #define _bios_rom bios_rom #define _io_registers io_registers -#define _spsr spsr #define _update_gba update_gba #define _block_lookup_address_arm block_lookup_address_arm @@ -47,8 +43,6 @@ #define _write_io_register8 write_io_register8 #define _write_io_register16 write_io_register16 #define _write_io_register32 write_io_register32 -#define _palette_ram palette_ram -#define _palette_ram_converted palette_ram_converted #define _flush_translation_cache_ram flush_translation_cache_ram #define _write_eeprom write_eeprom #define _write_backup write_backup @@ -56,25 +50,7 @@ #define _execute_store_cpsr_body execute_store_cpsr_body #endif -.global _x86_update_gba -.global _x86_indirect_branch_arm -.global _x86_indirect_branch_thumb -.global _x86_indirect_branch_dual -.global _execute_store_u8 -.global _execute_store_u16 -.global _execute_store_u32 -.global _execute_store_cpsr -.global _execute_arm_translate - -.global _memory_map_read -.global _reg -.global _reg_mode -.global _spsr -.global _palette_ram -.global _palette_ram_converted - .global _oam_update - .global _iwram .global _ewram .global _vram @@ -147,7 +123,7 @@ st: .asciz "u\n" -_x86_update_gba: +defsymbl(x86_update_gba) mov %eax, REG_PC(%ebx) # current PC = eax collapse_flags # update cpsr, trashes ecx and edx @@ -171,14 +147,14 @@ _x86_update_gba: # eax: GBA address to branch to # edi: Cycle counter -_x86_indirect_branch_arm: +defsymbl(x86_indirect_branch_arm) call _block_lookup_address_arm jmp *%eax # For indirect branches that'll definitely go to Thumb. In # Thumb mode any indirect branches except for BX. -_x86_indirect_branch_thumb: +defsymbl(x86_indirect_branch_thumb) call _block_lookup_address_thumb jmp *%eax @@ -186,7 +162,7 @@ _x86_indirect_branch_thumb: # mainly BX (also data processing to PC with S bit set, be # sure to adjust the target with a 1 in the lowest bit for this) -_x86_indirect_branch_dual: +defsymbl(x86_indirect_branch_dual) call _block_lookup_address_dual jmp *%eax @@ -297,7 +273,7 @@ ext_store_u8_jtable: # edx: value to write # ecx: current pc -_execute_store_u8: +defsymbl(execute_store_u8) mov %ecx, REG_PC(%ebx) # write out the PC mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 @@ -383,7 +359,7 @@ ext_store_u16_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -_execute_store_u16: +defsymbl(execute_store_u16) mov %ecx, REG_PC(%ebx) # write out the PC and $~0x01, %eax # fix alignment mov %eax, %ecx # ecx = address @@ -400,6 +376,7 @@ ext_store_iwram32: and $0x7FFF, %eax # wrap around address mov %edx, (_iwram+0x8000)(%eax) # perform store cmpl $0, _iwram(%eax) # Check SMC mirror + jne smc_write ret @@ -456,7 +433,7 @@ ext_store_u32_jtable: .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -_execute_store_u32: +defsymbl(execute_store_u32) mov %ecx, REG_PC(%ebx) # write out the PC and $~0x03, %eax # fix alignment mov %eax, %ecx # ecx = address @@ -470,7 +447,7 @@ _execute_store_u32: # %eax = new_cpsr # %edx = store_mask -_execute_store_cpsr: +defsymbl(execute_store_cpsr) mov %edx, REG_SAVE(%ebx) # save store_mask mov %ecx, REG_SAVE2(%ebx) # save PC too @@ -515,7 +492,7 @@ lookup_pc_arm: # eax: cycle counter -_execute_arm_translate: +defsymbl(execute_arm_translate) # Save main context, since we need to return gracefully pushl %ebx pushl %esi @@ -556,18 +533,30 @@ return_to_main: .data .align 64 -_reg: +defsymbl(reg) .space 0x100, 0 -_palette_ram: +defsymbl(palette_ram) .space 0x400 -_palette_ram_converted: +defsymbl(palette_ram_converted) .space 0x400 -_spsr: +defsymbl(spsr) .space 24 -_reg_mode: +defsymbl(reg_mode) .space 196 -_memory_map_read: +defsymbl(memory_map_read) .space 0x8000 +#if !defined(HAVE_MMAP) + +# Make this section executable! +.text +.section .jit,"awx",%nobits +.align 4 +defsymbl(rom_translation_cache) + .space ROM_TRANSLATION_CACHE_SIZE +defsymbl(ram_translation_cache) + .space RAM_TRANSLATION_CACHE_SIZE + +#endif