Move caches to stub files to get around gcc 10

Seems that using the __atribute__ magic for sections is not the best way
of doing this, since it injects some default atributtes that collide
with the user defined ones. Using assembly is far easier in this case.

Reworked definitions a bit to make it easier to import from assembly.
Also wrapped stuff around macros for easy and less verbose
implementation of the symbol prefix issue.
This commit is contained in:
David Guillen Fandos 2021-03-23 19:47:51 +01:00
parent 11ec213c99
commit ff510e7f7a
7 changed files with 138 additions and 166 deletions

View File

@ -1,15 +1,15 @@
#include "../gpsp_config.h"
#define defsymbl(symbol) \
.global symbol ; \
.global _##symbol ; \
symbol: \
_##symbol:
.text
.align 2
.globl invalidate_icache_region
.globl invalidate_cache_region
.globl memory_map_read
.globl reg
.globl palette_ram
.globl palette_ram_converted
.globl reg_mode
.globl spsr
#define REG_R0 (0 * 4)
#define REG_R1 (1 * 4)
#define REG_R2 (2 * 4)
@ -178,10 +178,7 @@
#define arm_update_gba_builder(name, mode, return_op) ;\
;\
.align 2 ;\
.globl arm_update_gba_##name ;\
.globl _arm_update_gba_##name ;\
arm_update_gba_##name: ;\
_arm_update_gba_##name: ;\
defsymbl(arm_update_gba_##name) ;\
load_pc_##return_op() ;\
str r0, [reg_base, #REG_PC] /* write out the PC */;\
;\
@ -243,30 +240,21 @@ arm_update_gba_builder(idle_thumb, thumb, add)
@ r0: PC to branch to
.align 2
.globl arm_indirect_branch_arm
.globl _arm_indirect_branch_arm
arm_indirect_branch_arm:
_arm_indirect_branch_arm:
defsymbl(arm_indirect_branch_arm)
save_flags()
call_c_function(block_lookup_address_arm)
restore_flags()
bx r0
.align 2
.globl arm_indirect_branch_thumb
.globl _arm_indirect_branch_thumb
arm_indirect_branch_thumb:
_arm_indirect_branch_thumb:
defsymbl(arm_indirect_branch_thumb)
save_flags()
call_c_function(block_lookup_address_thumb)
restore_flags()
bx r0
.align 2
.globl arm_indirect_branch_dual_arm
.globl _arm_indirect_branch_dual_arm
arm_indirect_branch_dual_arm:
_arm_indirect_branch_dual_arm:
defsymbl(arm_indirect_branch_dual_arm)
save_flags()
tst r0, #0x01 @ check lower bit
bne 1f @ if set going to Thumb mode
@ -286,10 +274,7 @@ _arm_indirect_branch_dual_arm:
bx r0 @ return
.align 2
.globl arm_indirect_branch_dual_thumb
.globl _arm_indirect_branch_dual_thumb
arm_indirect_branch_dual_thumb:
_arm_indirect_branch_dual_thumb:
defsymbl(arm_indirect_branch_dual_thumb)
save_flags()
tst r0, #0x01 @ check lower bit
beq 1f @ if set going to ARM mode
@ -317,10 +302,7 @@ _arm_indirect_branch_dual_thumb:
@ r2: current PC
.align 2
.globl execute_store_cpsr
.globl _execute_store_cpsr
execute_store_cpsr:
_execute_store_cpsr:
defsymbl(execute_store_cpsr)
save_flags()
and reg_flags, r0, r1 @ reg_flags = new_cpsr & store_mask
ldr r0, [reg_base, #REG_CPSR] @ r0 = cpsr
@ -354,10 +336,7 @@ _execute_store_cpsr:
@ r1: bitmask of which bits in spsr to update
.align 2
.globl execute_store_spsr
.globl _execute_store_spsr
execute_store_spsr:
_execute_store_spsr:
defsymbl(execute_store_spsr)
ldr r1, =spsr @ r1 = spsr
ldr r2, [reg_base, #CPU_MODE] @ r2 = CPU_MODE
str r0, [r1, r2, lsl #2] @ spsr[CPU_MODE] = new_spsr
@ -369,10 +348,7 @@ _execute_store_spsr:
@ r0: spsr
.align 2
.globl execute_read_spsr
.globl _execute_read_spsr
execute_read_spsr:
_execute_read_spsr:
defsymbl(execute_read_spsr)
ldr r0, =spsr @ r0 = spsr
ldr r1, [reg_base, #CPU_MODE] @ r1 = CPU_MODE
ldr r0, [r0, r1, lsl #2] @ r0 = spsr[CPU_MODE]
@ -385,10 +361,7 @@ _execute_read_spsr:
@ r0: current pc
.align 2
.globl execute_spsr_restore
.globl _execute_spsr_restore
execute_spsr_restore:
_execute_spsr_restore:
defsymbl(execute_spsr_restore)
save_flags()
ldr r1, =spsr @ r1 = spsr
ldr r2, [reg_base, #CPU_MODE] @ r2 = cpu_mode
@ -425,10 +398,7 @@ _execute_spsr_restore:
#define execute_swi_builder(mode) ;\
;\
.align 2 ;\
.globl execute_swi_##mode ;\
.globl _execute_swi_##mode ;\
execute_swi_##mode: ;\
_execute_swi_##mode: ;\
defsymbl(execute_swi_##mode) ;\
save_flags() ;\
ldr r1, =reg_mode /* r1 = reg_mode */;\
/* reg_mode[MODE_SUPERVISOR][6] = pc */;\
@ -460,10 +430,7 @@ execute_swi_builder(thumb)
#define execute_swi_function_builder(swi_function, mode) ;\
;\
.align 2 ;\
.globl execute_swi_hle_##swi_function##_##mode ;\
.globl _execute_swi_hle_##swi_function##_##mode ;\
execute_swi_hle_##swi_function##_##mode: ;\
_execute_swi_hle_##swi_function##_##mode: ;\
defsymbl(execute_swi_hle_##swi_function##_##mode) ;\
save_flags() ;\
store_registers_##mode() ;\
call_c_function(execute_swi_hle_##swi_function##_c) ;\
@ -485,10 +452,7 @@ execute_swi_function_builder(div, thumb)
@ Uses sp as reg_base; must hold consistently true.
.align 2
.globl execute_arm_translate
.globl _execute_arm_translate
execute_arm_translate:
_execute_arm_translate:
defsymbl(execute_arm_translate)
@ save the registers to be able to return later
stmdb sp!, { r4, r5, r6, r7, r8, r9, r10, r11, r12, lr }
@ -615,10 +579,7 @@ ext_store_ignore:
#define execute_store_builder(store_type, store_op, load_op) ;\
;\
.align 2 ;\
.globl execute_store_u##store_type ;\
.globl _execute_store_u##store_type ;\
execute_store_u##store_type: ;\
_execute_store_u##store_type: ;\
defsymbl(execute_store_u##store_type) ;\
execute_store_body(store_type, store_op) ;\
;\
ext_store_u##store_type: ;\
@ -676,10 +637,7 @@ execute_store_builder(32, str, ldr)
@ This is a store that is executed in a strm case (so no SMC checks in-between)
.globl execute_store_u32_safe
.globl _execute_store_u32_safe
execute_store_u32_safe:
_execute_store_u32_safe:
defsymbl(execute_store_u32_safe)
execute_store_body(32_safe, str)
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
@ -822,10 +780,7 @@ lookup_pc_arm:
#define execute_load_builder(load_type, load_function, load_op, mask) ;\
;\
.align 2 ;\
.globl execute_load_##load_type ;\
.globl _execute_load_##load_type ;\
execute_load_##load_type: ;\
_execute_load_##load_type: ;\
defsymbl(execute_load_##load_type) ;\
save_flags() ;\
tst r0, mask /* make sure address is in range */;\
bne ext_load_##load_type /* if not do ext load */;\
@ -859,19 +814,38 @@ execute_load_builder(u32, 32, ldrne, #0xF0000000)
.data
memory_map_read:
defsymbl(memory_map_read)
.space 0x8000
palette_ram:
defsymbl(palette_ram)
.space 0x400
palette_ram_converted:
defsymbl(palette_ram_converted)
.space 0x400
spsr:
defsymbl(spsr)
.space 24
reg_mode:
defsymbl(reg_mode)
.space 196
.globl reg
.globl _reg
reg:
defsymbl(reg)
.space 0x100, 0
@ Vita and 3DS (and of course mmap) map their own cache sections through some
@ platform-speficic mechanisms.
#if !defined(HAVE_MMAP) && !defined(VITA) && !defined(_3DS)
@ Make this section executable!
.text
#ifdef __ANDROID__
@ Unfortunately Android builds don't like nobits, so we ship a ton of zeros
@ TODO: Revisit this whenever we upgrade to the latest clang NDK
.section .jit,"awx",%progbits
#else
.section .jit,"awx",%nobits
#endif
.align 4
defsymbl(rom_translation_cache)
.space ROM_TRANSLATION_CACHE_SIZE
defsymbl(ram_translation_cache)
.space RAM_TRANSLATION_CACHE_SIZE
#endif

19
cpu.h
View File

@ -20,6 +20,8 @@
#ifndef CPU_H
#define CPU_H
#include "gpsp_config.h"
// System mode and user mode are represented as the same here
typedef enum
@ -120,18 +122,6 @@ s32 translate_block_arm(u32 pc, translation_region_type translation_region,
s32 translate_block_thumb(u32 pc, translation_region_type translation_region,
u32 smc_enable);
#if defined(PSP)
#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4)
#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384)
#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024)
#else
#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5)
#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2)
#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32)
#endif
#define STUB_ARENA_SIZE (4*1024)
#if defined(HAVE_MMAP)
extern u8* rom_translation_cache;
extern u8* ram_translation_cache;
@ -147,8 +137,8 @@ extern int sceBlock;
#else
extern u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE];
extern u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE];
extern u32 stub_arena[STUB_ARENA_SIZE];
#endif
extern u32 stub_arena[STUB_ARENA_SIZE / 4];
extern u8 *rom_translation_ptr;
extern u8 *ram_translation_ptr;
@ -162,9 +152,6 @@ extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES];
extern u32 in_interrupt;
#define ROM_BRANCH_HASH_SIZE (1024 * 64)
/* EDIT: Shouldn't this be extern ?! */
extern u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE];
void flush_translation_cache_rom(void);

View File

@ -47,26 +47,10 @@ u8* ram_translation_cache_ptr;
u8 *rom_translation_ptr = rom_translation_cache;
u8 *ram_translation_ptr = ram_translation_cache;
#else
#ifdef __ANDROID__
// Workaround for 'attempt to map x bytes at offset y'
__asm__(".section .jit,\"awx\",%progbits");
#else
__asm__(".section .jit,\"awx\",%nobits");
#endif
u32 stub_arena[STUB_ARENA_SIZE]
__attribute__ ((aligned(4),section(".jit")));
u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE]
__attribute__ ((aligned(4),section(".jit")));
u8 *rom_translation_ptr = rom_translation_cache;
u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE]
__attribute__ ((aligned(4),section(".jit")));
u8 *ram_translation_ptr = ram_translation_cache;
__asm__(".section .text");
#endif
/* Note, see stub files for more cache definitions */
u32 iwram_code_min = 0xFFFFFFFF;
u32 iwram_code_max = 0xFFFFFFFF;

22
gpsp_config.h Normal file
View File

@ -0,0 +1,22 @@
#ifndef GPSP_CONFIG_H
#define GPSP_CONFIG_H
/* Cache sizes and their config knobs */
#if defined(PSP)
#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4)
#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384)
#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024)
#else
#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5)
#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2)
#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32)
#endif
/* This is MIPS specific for now */
#define STUB_ARENA_SIZE (16*1024)
/* Hash table size for ROM trans cache lookups */
#define ROM_BRANCH_HASH_SIZE (1024 * 64)
#endif

View File

@ -2618,11 +2618,7 @@ static void emit_mem_access_loadop(
#define genccall(fn) mips_emit_jal(((u32)fn) >> 2);
#endif
// Stub memory map:
// 0 .. 63 First patch handler [#0]
// 448 .. 511 Last patch handler [#7]
// 512+ smc_write handler
#define SMC_WRITE_OFF32 160
#define SMC_WRITE_OFF32 (10*16) /* 10 handlers (16 insts) */
// Describes a "plain" memory are, that is, an area that is just accessed
// as normal memory (with some caveats tho).
@ -2862,8 +2858,7 @@ static void emit_pmemst_stub(
}
// If the data is non zero, we just wrote over code
// Local-jump to the smc_write (which lives at offset:0)
unsigned instoffset = (&stub_arena[SMC_WRITE_OFF32] - (((u32*)translation_ptr) + 1));
mips_emit_b(bne, reg_zero, reg_temp, instoffset);
mips_emit_b(bne, reg_zero, reg_temp, branch_offset(&stub_arena[SMC_WRITE_OFF32]));
}
// Store the data (delay slot from the SMC branch)

View File

@ -16,6 +16,8 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "../gpsp_config.h"
.set mips32r2
.align 4
@ -645,3 +647,22 @@ fnptrs:
.long execute_spsr_restore_body # 6
.long execute_store_cpsr_body # 7
#if !defined(HAVE_MMAP)
# Make this section executable!
.text
.section .jit,"awx",%nobits
.align 2
.global stub_arena
.global rom_translation_cache
.global ram_translation_cache
stub_arena:
.space STUB_ARENA_SIZE
rom_translation_cache:
.space ROM_TRANSLATION_CACHE_SIZE
ram_translation_cache:
.space RAM_TRANSLATION_CACHE_SIZE
#endif

View File

@ -16,21 +16,18 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "../gpsp_config.h"
.align 4
#define defsymbl(symbol) \
.global symbol ; \
.global _##symbol ; \
symbol: \
_##symbol:
#ifndef _WIN32
#define _x86_update_gba x86_update_gba
#define _x86_indirect_branch_arm x86_indirect_branch_arm
#define _x86_indirect_branch_thumb x86_indirect_branch_thumb
#define _x86_indirect_branch_dual x86_indirect_branch_dual
#define _execute_store_u8 execute_store_u8
#define _execute_store_u16 execute_store_u16
#define _execute_store_u32 execute_store_u32
#define _execute_store_cpsr execute_store_cpsr
#define _execute_arm_translate execute_arm_translate
#define _memory_map_read memory_map_read
#define _reg reg
#define _reg_mode reg_mode
# External symbols (data + functions)
#define _oam_update oam_update
#define _iwram iwram
#define _ewram ewram
@ -38,7 +35,6 @@
#define _oam_ram oam_ram
#define _bios_rom bios_rom
#define _io_registers io_registers
#define _spsr spsr
#define _update_gba update_gba
#define _block_lookup_address_arm block_lookup_address_arm
@ -47,8 +43,6 @@
#define _write_io_register8 write_io_register8
#define _write_io_register16 write_io_register16
#define _write_io_register32 write_io_register32
#define _palette_ram palette_ram
#define _palette_ram_converted palette_ram_converted
#define _flush_translation_cache_ram flush_translation_cache_ram
#define _write_eeprom write_eeprom
#define _write_backup write_backup
@ -56,25 +50,7 @@
#define _execute_store_cpsr_body execute_store_cpsr_body
#endif
.global _x86_update_gba
.global _x86_indirect_branch_arm
.global _x86_indirect_branch_thumb
.global _x86_indirect_branch_dual
.global _execute_store_u8
.global _execute_store_u16
.global _execute_store_u32
.global _execute_store_cpsr
.global _execute_arm_translate
.global _memory_map_read
.global _reg
.global _reg_mode
.global _spsr
.global _palette_ram
.global _palette_ram_converted
.global _oam_update
.global _iwram
.global _ewram
.global _vram
@ -147,7 +123,7 @@
st:
.asciz "u\n"
_x86_update_gba:
defsymbl(x86_update_gba)
mov %eax, REG_PC(%ebx) # current PC = eax
collapse_flags # update cpsr, trashes ecx and edx
@ -171,14 +147,14 @@ _x86_update_gba:
# eax: GBA address to branch to
# edi: Cycle counter
_x86_indirect_branch_arm:
defsymbl(x86_indirect_branch_arm)
call _block_lookup_address_arm
jmp *%eax
# For indirect branches that'll definitely go to Thumb. In
# Thumb mode any indirect branches except for BX.
_x86_indirect_branch_thumb:
defsymbl(x86_indirect_branch_thumb)
call _block_lookup_address_thumb
jmp *%eax
@ -186,7 +162,7 @@ _x86_indirect_branch_thumb:
# mainly BX (also data processing to PC with S bit set, be
# sure to adjust the target with a 1 in the lowest bit for this)
_x86_indirect_branch_dual:
defsymbl(x86_indirect_branch_dual)
call _block_lookup_address_dual
jmp *%eax
@ -297,7 +273,7 @@ ext_store_u8_jtable:
# edx: value to write
# ecx: current pc
_execute_store_u8:
defsymbl(execute_store_u8)
mov %ecx, REG_PC(%ebx) # write out the PC
mov %eax, %ecx # ecx = address
shr $24, %ecx # ecx = address >> 24
@ -383,7 +359,7 @@ ext_store_u16_jtable:
.long ext_store_eeprom # 0x0D EEPROM (possibly)
.long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit
_execute_store_u16:
defsymbl(execute_store_u16)
mov %ecx, REG_PC(%ebx) # write out the PC
and $~0x01, %eax # fix alignment
mov %eax, %ecx # ecx = address
@ -400,6 +376,7 @@ ext_store_iwram32:
and $0x7FFF, %eax # wrap around address
mov %edx, (_iwram+0x8000)(%eax) # perform store
cmpl $0, _iwram(%eax) # Check SMC mirror
jne smc_write
ret
@ -456,7 +433,7 @@ ext_store_u32_jtable:
.long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit
_execute_store_u32:
defsymbl(execute_store_u32)
mov %ecx, REG_PC(%ebx) # write out the PC
and $~0x03, %eax # fix alignment
mov %eax, %ecx # ecx = address
@ -470,7 +447,7 @@ _execute_store_u32:
# %eax = new_cpsr
# %edx = store_mask
_execute_store_cpsr:
defsymbl(execute_store_cpsr)
mov %edx, REG_SAVE(%ebx) # save store_mask
mov %ecx, REG_SAVE2(%ebx) # save PC too
@ -515,7 +492,7 @@ lookup_pc_arm:
# eax: cycle counter
_execute_arm_translate:
defsymbl(execute_arm_translate)
# Save main context, since we need to return gracefully
pushl %ebx
pushl %esi
@ -556,18 +533,30 @@ return_to_main:
.data
.align 64
_reg:
defsymbl(reg)
.space 0x100, 0
_palette_ram:
defsymbl(palette_ram)
.space 0x400
_palette_ram_converted:
defsymbl(palette_ram_converted)
.space 0x400
_spsr:
defsymbl(spsr)
.space 24
_reg_mode:
defsymbl(reg_mode)
.space 196
_memory_map_read:
defsymbl(memory_map_read)
.space 0x8000
#if !defined(HAVE_MMAP)
# Make this section executable!
.text
.section .jit,"awx",%nobits
.align 4
defsymbl(rom_translation_cache)
.space ROM_TRANSLATION_CACHE_SIZE
defsymbl(ram_translation_cache)
.space RAM_TRANSLATION_CACHE_SIZE
#endif