diff --git a/arm/arm_stub.S b/arm/arm_stub.S index f5fceb0..f0b7f52 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -1,15 +1,15 @@ + +#include "../gpsp_config.h" + +#define defsymbl(symbol) \ +.global symbol ; \ +.global _##symbol ; \ +symbol: \ +_##symbol: + +.text .align 2 -.globl invalidate_icache_region -.globl invalidate_cache_region - -.globl memory_map_read -.globl reg -.globl palette_ram -.globl palette_ram_converted -.globl reg_mode -.globl spsr - #define REG_R0 (0 * 4) #define REG_R1 (1 * 4) #define REG_R2 (2 * 4) @@ -178,10 +178,7 @@ #define arm_update_gba_builder(name, mode, return_op) ;\ ;\ .align 2 ;\ -.globl arm_update_gba_##name ;\ -.globl _arm_update_gba_##name ;\ -arm_update_gba_##name: ;\ -_arm_update_gba_##name: ;\ +defsymbl(arm_update_gba_##name) ;\ load_pc_##return_op() ;\ str r0, [reg_base, #REG_PC] /* write out the PC */;\ ;\ @@ -243,30 +240,21 @@ arm_update_gba_builder(idle_thumb, thumb, add) @ r0: PC to branch to .align 2 -.globl arm_indirect_branch_arm -.globl _arm_indirect_branch_arm -arm_indirect_branch_arm: -_arm_indirect_branch_arm: +defsymbl(arm_indirect_branch_arm) save_flags() call_c_function(block_lookup_address_arm) restore_flags() bx r0 .align 2 -.globl arm_indirect_branch_thumb -.globl _arm_indirect_branch_thumb -arm_indirect_branch_thumb: -_arm_indirect_branch_thumb: +defsymbl(arm_indirect_branch_thumb) save_flags() call_c_function(block_lookup_address_thumb) restore_flags() bx r0 .align 2 -.globl arm_indirect_branch_dual_arm -.globl _arm_indirect_branch_dual_arm -arm_indirect_branch_dual_arm: -_arm_indirect_branch_dual_arm: +defsymbl(arm_indirect_branch_dual_arm) save_flags() tst r0, #0x01 @ check lower bit bne 1f @ if set going to Thumb mode @@ -286,10 +274,7 @@ _arm_indirect_branch_dual_arm: bx r0 @ return .align 2 -.globl arm_indirect_branch_dual_thumb -.globl _arm_indirect_branch_dual_thumb -arm_indirect_branch_dual_thumb: -_arm_indirect_branch_dual_thumb: +defsymbl(arm_indirect_branch_dual_thumb) save_flags() tst r0, #0x01 @ check lower bit beq 1f @ if set going to ARM mode @@ -317,10 +302,7 @@ _arm_indirect_branch_dual_thumb: @ r2: current PC .align 2 -.globl execute_store_cpsr -.globl _execute_store_cpsr -execute_store_cpsr: -_execute_store_cpsr: +defsymbl(execute_store_cpsr) save_flags() and reg_flags, r0, r1 @ reg_flags = new_cpsr & store_mask ldr r0, [reg_base, #REG_CPSR] @ r0 = cpsr @@ -354,10 +336,7 @@ _execute_store_cpsr: @ r1: bitmask of which bits in spsr to update .align 2 -.globl execute_store_spsr -.globl _execute_store_spsr -execute_store_spsr: -_execute_store_spsr: +defsymbl(execute_store_spsr) ldr r1, =spsr @ r1 = spsr ldr r2, [reg_base, #CPU_MODE] @ r2 = CPU_MODE str r0, [r1, r2, lsl #2] @ spsr[CPU_MODE] = new_spsr @@ -369,10 +348,7 @@ _execute_store_spsr: @ r0: spsr .align 2 -.globl execute_read_spsr -.globl _execute_read_spsr -execute_read_spsr: -_execute_read_spsr: +defsymbl(execute_read_spsr) ldr r0, =spsr @ r0 = spsr ldr r1, [reg_base, #CPU_MODE] @ r1 = CPU_MODE ldr r0, [r0, r1, lsl #2] @ r0 = spsr[CPU_MODE] @@ -385,10 +361,7 @@ _execute_read_spsr: @ r0: current pc .align 2 -.globl execute_spsr_restore -.globl _execute_spsr_restore -execute_spsr_restore: -_execute_spsr_restore: +defsymbl(execute_spsr_restore) save_flags() ldr r1, =spsr @ r1 = spsr ldr r2, [reg_base, #CPU_MODE] @ r2 = cpu_mode @@ -425,10 +398,7 @@ _execute_spsr_restore: #define execute_swi_builder(mode) ;\ ;\ .align 2 ;\ -.globl execute_swi_##mode ;\ -.globl _execute_swi_##mode ;\ -execute_swi_##mode: ;\ -_execute_swi_##mode: ;\ +defsymbl(execute_swi_##mode) ;\ save_flags() ;\ ldr r1, =reg_mode /* r1 = reg_mode */;\ /* reg_mode[MODE_SUPERVISOR][6] = pc */;\ @@ -460,10 +430,7 @@ execute_swi_builder(thumb) #define execute_swi_function_builder(swi_function, mode) ;\ ;\ .align 2 ;\ -.globl execute_swi_hle_##swi_function##_##mode ;\ -.globl _execute_swi_hle_##swi_function##_##mode ;\ -execute_swi_hle_##swi_function##_##mode: ;\ -_execute_swi_hle_##swi_function##_##mode: ;\ +defsymbl(execute_swi_hle_##swi_function##_##mode) ;\ save_flags() ;\ store_registers_##mode() ;\ call_c_function(execute_swi_hle_##swi_function##_c) ;\ @@ -485,10 +452,7 @@ execute_swi_function_builder(div, thumb) @ Uses sp as reg_base; must hold consistently true. .align 2 -.globl execute_arm_translate -.globl _execute_arm_translate -execute_arm_translate: -_execute_arm_translate: +defsymbl(execute_arm_translate) @ save the registers to be able to return later stmdb sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr } @@ -615,10 +579,7 @@ ext_store_ignore: #define execute_store_builder(store_type, store_op, load_op) ;\ ;\ .align 2 ;\ -.globl execute_store_u##store_type ;\ -.globl _execute_store_u##store_type ;\ -execute_store_u##store_type: ;\ -_execute_store_u##store_type: ;\ +defsymbl(execute_store_u##store_type) ;\ execute_store_body(store_type, store_op) ;\ ;\ ext_store_u##store_type: ;\ @@ -676,10 +637,7 @@ execute_store_builder(32, str, ldr) @ This is a store that is executed in a strm case (so no SMC checks in-between) -.globl execute_store_u32_safe -.globl _execute_store_u32_safe -execute_store_u32_safe: -_execute_store_u32_safe: +defsymbl(execute_store_u32_safe) execute_store_body(32_safe, str) restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return @@ -822,10 +780,7 @@ lookup_pc_arm: #define execute_load_builder(load_type, load_function, load_op, mask) ;\ ;\ .align 2 ;\ -.globl execute_load_##load_type ;\ -.globl _execute_load_##load_type ;\ -execute_load_##load_type: ;\ -_execute_load_##load_type: ;\ +defsymbl(execute_load_##load_type) ;\ save_flags() ;\ tst r0, mask /* make sure address is in range */;\ bne ext_load_##load_type /* if not do ext load */;\ @@ -859,19 +814,38 @@ execute_load_builder(u32, 32, ldrne, #0xF0000000) .data -memory_map_read: +defsymbl(memory_map_read) .space 0x8000 -palette_ram: +defsymbl(palette_ram) .space 0x400 -palette_ram_converted: +defsymbl(palette_ram_converted) .space 0x400 -spsr: +defsymbl(spsr) .space 24 -reg_mode: +defsymbl(reg_mode) .space 196 -.globl reg -.globl _reg -reg: +defsymbl(reg) .space 0x100, 0 +@ Vita and 3DS (and of course mmap) map their own cache sections through some +@ platform-speficic mechanisms. +#if !defined(HAVE_MMAP) && !defined(VITA) && !defined(_3DS) + +@ Make this section executable! +.text +#ifdef __ANDROID__ +@ Unfortunately Android builds don't like nobits, so we ship a ton of zeros +@ TODO: Revisit this whenever we upgrade to the latest clang NDK +.section .jit,"awx",%progbits +#else +.section .jit,"awx",%nobits +#endif +.align 4 +defsymbl(rom_translation_cache) + .space ROM_TRANSLATION_CACHE_SIZE +defsymbl(ram_translation_cache) + .space RAM_TRANSLATION_CACHE_SIZE + +#endif + diff --git a/cpu.h b/cpu.h index faa3bc1..fc57626 100644 --- a/cpu.h +++ b/cpu.h @@ -20,6 +20,8 @@ #ifndef CPU_H #define CPU_H +#include "gpsp_config.h" + // System mode and user mode are represented as the same here typedef enum @@ -120,18 +122,6 @@ s32 translate_block_arm(u32 pc, translation_region_type translation_region, s32 translate_block_thumb(u32 pc, translation_region_type translation_region, u32 smc_enable); -#if defined(PSP) - #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4) - #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384) - #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024) -#else - #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5) - #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2) - #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32) -#endif - -#define STUB_ARENA_SIZE (4*1024) - #if defined(HAVE_MMAP) extern u8* rom_translation_cache; extern u8* ram_translation_cache; @@ -147,8 +137,8 @@ extern int sceBlock; #else extern u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE]; extern u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE]; -extern u32 stub_arena[STUB_ARENA_SIZE]; #endif +extern u32 stub_arena[STUB_ARENA_SIZE / 4]; extern u8 *rom_translation_ptr; extern u8 *ram_translation_ptr; @@ -162,9 +152,6 @@ extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES]; extern u32 in_interrupt; -#define ROM_BRANCH_HASH_SIZE (1024 * 64) - -/* EDIT: Shouldn't this be extern ?! */ extern u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE]; void flush_translation_cache_rom(void); diff --git a/cpu_threaded.c b/cpu_threaded.c index 555b9c6..7f12b4f 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -47,26 +47,10 @@ u8* ram_translation_cache_ptr; u8 *rom_translation_ptr = rom_translation_cache; u8 *ram_translation_ptr = ram_translation_cache; #else - -#ifdef __ANDROID__ -// Workaround for 'attempt to map x bytes at offset y' -__asm__(".section .jit,\"awx\",%progbits"); -#else -__asm__(".section .jit,\"awx\",%nobits"); -#endif - -u32 stub_arena[STUB_ARENA_SIZE] - __attribute__ ((aligned(4),section(".jit"))); -u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE] - __attribute__ ((aligned(4),section(".jit"))); u8 *rom_translation_ptr = rom_translation_cache; - -u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE] - __attribute__ ((aligned(4),section(".jit"))); u8 *ram_translation_ptr = ram_translation_cache; - -__asm__(".section .text"); #endif +/* Note, see stub files for more cache definitions */ u32 iwram_code_min = 0xFFFFFFFF; u32 iwram_code_max = 0xFFFFFFFF; diff --git a/gpsp_config.h b/gpsp_config.h new file mode 100644 index 0000000..ea8db95 --- /dev/null +++ b/gpsp_config.h @@ -0,0 +1,22 @@ + +#ifndef GPSP_CONFIG_H +#define GPSP_CONFIG_H + +/* Cache sizes and their config knobs */ +#if defined(PSP) + #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4) + #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384) + #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024) +#else + #define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5) + #define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2) + #define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32) +#endif + +/* This is MIPS specific for now */ +#define STUB_ARENA_SIZE (16*1024) + +/* Hash table size for ROM trans cache lookups */ +#define ROM_BRANCH_HASH_SIZE (1024 * 64) + +#endif diff --git a/psp/mips_emit.h b/psp/mips_emit.h index b75f7f5..b996f2b 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2618,11 +2618,7 @@ static void emit_mem_access_loadop( #define genccall(fn) mips_emit_jal(((u32)fn) >> 2); #endif -// Stub memory map: -// 0 .. 63 First patch handler [#0] -// 448 .. 511 Last patch handler [#7] -// 512+ smc_write handler -#define SMC_WRITE_OFF32 160 +#define SMC_WRITE_OFF32 (10*16) /* 10 handlers (16 insts) */ // Describes a "plain" memory are, that is, an area that is just accessed // as normal memory (with some caveats tho). @@ -2862,8 +2858,7 @@ static void emit_pmemst_stub( } // If the data is non zero, we just wrote over code // Local-jump to the smc_write (which lives at offset:0) - unsigned instoffset = (&stub_arena[SMC_WRITE_OFF32] - (((u32*)translation_ptr) + 1)); - mips_emit_b(bne, reg_zero, reg_temp, instoffset); + mips_emit_b(bne, reg_zero, reg_temp, branch_offset(&stub_arena[SMC_WRITE_OFF32])); } // Store the data (delay slot from the SMC branch) diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 5e5a479..3d046d8 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -16,6 +16,8 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#include "../gpsp_config.h" + .set mips32r2 .align 4 @@ -645,3 +647,22 @@ fnptrs: .long execute_spsr_restore_body # 6 .long execute_store_cpsr_body # 7 +#if !defined(HAVE_MMAP) + +# Make this section executable! +.text +.section .jit,"awx",%nobits +.align 2 +.global stub_arena +.global rom_translation_cache +.global ram_translation_cache + +stub_arena: + .space STUB_ARENA_SIZE +rom_translation_cache: + .space ROM_TRANSLATION_CACHE_SIZE +ram_translation_cache: + .space RAM_TRANSLATION_CACHE_SIZE + +#endif + diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 595a789..9dd3fdd 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -16,21 +16,18 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +#include "../gpsp_config.h" + .align 4 +#define defsymbl(symbol) \ +.global symbol ; \ +.global _##symbol ; \ +symbol: \ +_##symbol: + #ifndef _WIN32 -#define _x86_update_gba x86_update_gba -#define _x86_indirect_branch_arm x86_indirect_branch_arm -#define _x86_indirect_branch_thumb x86_indirect_branch_thumb -#define _x86_indirect_branch_dual x86_indirect_branch_dual -#define _execute_store_u8 execute_store_u8 -#define _execute_store_u16 execute_store_u16 -#define _execute_store_u32 execute_store_u32 -#define _execute_store_cpsr execute_store_cpsr -#define _execute_arm_translate execute_arm_translate -#define _memory_map_read memory_map_read -#define _reg reg -#define _reg_mode reg_mode +# External symbols (data + functions) #define _oam_update oam_update #define _iwram iwram #define _ewram ewram @@ -38,7 +35,6 @@ #define _oam_ram oam_ram #define _bios_rom bios_rom #define _io_registers io_registers -#define _spsr spsr #define _update_gba update_gba #define _block_lookup_address_arm block_lookup_address_arm @@ -47,8 +43,6 @@ #define _write_io_register8 write_io_register8 #define _write_io_register16 write_io_register16 #define _write_io_register32 write_io_register32 -#define _palette_ram palette_ram -#define _palette_ram_converted palette_ram_converted #define _flush_translation_cache_ram flush_translation_cache_ram #define _write_eeprom write_eeprom #define _write_backup write_backup @@ -56,25 +50,7 @@ #define _execute_store_cpsr_body execute_store_cpsr_body #endif -.global _x86_update_gba -.global _x86_indirect_branch_arm -.global _x86_indirect_branch_thumb -.global _x86_indirect_branch_dual -.global _execute_store_u8 -.global _execute_store_u16 -.global _execute_store_u32 -.global _execute_store_cpsr -.global _execute_arm_translate - -.global _memory_map_read -.global _reg -.global _reg_mode -.global _spsr -.global _palette_ram -.global _palette_ram_converted - .global _oam_update - .global _iwram .global _ewram .global _vram @@ -147,7 +123,7 @@ st: .asciz "u\n" -_x86_update_gba: +defsymbl(x86_update_gba) mov %eax, REG_PC(%ebx) # current PC = eax collapse_flags # update cpsr, trashes ecx and edx @@ -171,14 +147,14 @@ _x86_update_gba: # eax: GBA address to branch to # edi: Cycle counter -_x86_indirect_branch_arm: +defsymbl(x86_indirect_branch_arm) call _block_lookup_address_arm jmp *%eax # For indirect branches that'll definitely go to Thumb. In # Thumb mode any indirect branches except for BX. -_x86_indirect_branch_thumb: +defsymbl(x86_indirect_branch_thumb) call _block_lookup_address_thumb jmp *%eax @@ -186,7 +162,7 @@ _x86_indirect_branch_thumb: # mainly BX (also data processing to PC with S bit set, be # sure to adjust the target with a 1 in the lowest bit for this) -_x86_indirect_branch_dual: +defsymbl(x86_indirect_branch_dual) call _block_lookup_address_dual jmp *%eax @@ -297,7 +273,7 @@ ext_store_u8_jtable: # edx: value to write # ecx: current pc -_execute_store_u8: +defsymbl(execute_store_u8) mov %ecx, REG_PC(%ebx) # write out the PC mov %eax, %ecx # ecx = address shr $24, %ecx # ecx = address >> 24 @@ -383,7 +359,7 @@ ext_store_u16_jtable: .long ext_store_eeprom # 0x0D EEPROM (possibly) .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -_execute_store_u16: +defsymbl(execute_store_u16) mov %ecx, REG_PC(%ebx) # write out the PC and $~0x01, %eax # fix alignment mov %eax, %ecx # ecx = address @@ -400,6 +376,7 @@ ext_store_iwram32: and $0x7FFF, %eax # wrap around address mov %edx, (_iwram+0x8000)(%eax) # perform store cmpl $0, _iwram(%eax) # Check SMC mirror + jne smc_write ret @@ -456,7 +433,7 @@ ext_store_u32_jtable: .long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit -_execute_store_u32: +defsymbl(execute_store_u32) mov %ecx, REG_PC(%ebx) # write out the PC and $~0x03, %eax # fix alignment mov %eax, %ecx # ecx = address @@ -470,7 +447,7 @@ _execute_store_u32: # %eax = new_cpsr # %edx = store_mask -_execute_store_cpsr: +defsymbl(execute_store_cpsr) mov %edx, REG_SAVE(%ebx) # save store_mask mov %ecx, REG_SAVE2(%ebx) # save PC too @@ -515,7 +492,7 @@ lookup_pc_arm: # eax: cycle counter -_execute_arm_translate: +defsymbl(execute_arm_translate) # Save main context, since we need to return gracefully pushl %ebx pushl %esi @@ -556,18 +533,30 @@ return_to_main: .data .align 64 -_reg: +defsymbl(reg) .space 0x100, 0 -_palette_ram: +defsymbl(palette_ram) .space 0x400 -_palette_ram_converted: +defsymbl(palette_ram_converted) .space 0x400 -_spsr: +defsymbl(spsr) .space 24 -_reg_mode: +defsymbl(reg_mode) .space 196 -_memory_map_read: +defsymbl(memory_map_read) .space 0x8000 +#if !defined(HAVE_MMAP) + +# Make this section executable! +.text +.section .jit,"awx",%nobits +.align 4 +defsymbl(rom_translation_cache) + .space ROM_TRANSLATION_CACHE_SIZE +defsymbl(ram_translation_cache) + .space RAM_TRANSLATION_CACHE_SIZE + +#endif