From 89bd699837ed5519b19c5ff7a0bc6fd892de416d Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 5 Mar 2021 01:14:31 +0100 Subject: [PATCH 1/2] Reduce executable size by 90% Turns out most of that file ends up in JIT section, which is RWX and not a very nice way to run code really (security issues aside). This also makes possible to build that file with -ggdb otherwise it complains about stuff. --- cpu_threaded.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpu_threaded.c b/cpu_threaded.c index 083c027..c7b8850 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -54,7 +54,7 @@ u8 *rom_translation_ptr = rom_translation_cache; u8 *ram_translation_ptr = ram_translation_cache; u8 *bios_translation_ptr = bios_translation_cache; #elif defined(ARM_MEMORY_DYNAREC) -__asm__(".section .jit,\"awx\",%progbits"); +__asm__(".section .jit,\"awx\",%nobits"); u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE] __attribute__ ((aligned(4),section(".jit"))); @@ -67,6 +67,8 @@ u8 *ram_translation_ptr = ram_translation_cache; u8 bios_translation_cache[BIOS_TRANSLATION_CACHE_SIZE] __attribute__ ((aligned(4),section(".jit"))); u8 *bios_translation_ptr = bios_translation_cache; + +__asm__(".section .text"); #else u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE]; u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE]; From 7bebd3051c8723f785263f074ac5b180a9cc0852 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 5 Mar 2021 01:15:55 +0100 Subject: [PATCH 2/2] Remove dead code Turns out this was not even used! No need to fix it then! --- arm/arm_emit.h | 26 ++--- arm/arm_stub.S | 271 ++++++------------------------------------------- 2 files changed, 37 insertions(+), 260 deletions(-) diff --git a/arm/arm_emit.h b/arm/arm_emit.h index 7e90b06..669d422 100644 --- a/arm/arm_emit.h +++ b/arm/arm_emit.h @@ -63,12 +63,6 @@ void execute_store_u32_safe(u32 address, u32 source); (((((u32)offset - (u32)source) - 8) >> 2) & 0xFFFFFF) \ -/* reg_base_offset is the amount of bytes after reg_base where the registers - * actually begin. */ - -#define reg_base_offset 1024 - - #define reg_a0 ARMREG_R0 #define reg_a1 ARMREG_R1 #define reg_a2 ARMREG_R2 @@ -486,8 +480,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations) u32 reg_use = arm_register_allocation[reg_index]; \ if(reg_use == mem_reg) \ { \ - ARM_LDR_IMM(0, scratch_reg, reg_base, \ - (reg_base_offset + (reg_index * 4))); \ + ARM_LDR_IMM(0, scratch_reg, reg_base, (reg_index * 4)); \ return scratch_reg; \ } \ \ @@ -517,8 +510,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations) { \ if(arm_register_allocation[reg_index] == mem_reg) \ { \ - ARM_STR_IMM(0, scratch_reg, reg_base, \ - (reg_base_offset + (reg_index * 4))); \ + ARM_STR_IMM(0, scratch_reg, reg_base, (reg_index * 4)); \ } \ } \ \ @@ -559,7 +551,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations) } \ else \ { \ - ARM_LDR_IMM(0, ireg, reg_base, (reg_base_offset + (reg_index * 4))); \ + ARM_LDR_IMM(0, ireg, reg_base, (reg_index * 4)); \ } \ } \ \ @@ -572,7 +564,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations) } \ else \ { \ - ARM_STR_IMM(0, ireg, reg_base, (reg_base_offset + (reg_index * 4))); \ + ARM_STR_IMM(0, ireg, reg_base, (reg_index * 4)); \ } \ } \ @@ -583,8 +575,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations) u32 reg_use = thumb_register_allocation[reg_index]; \ if(reg_use == mem_reg) \ { \ - ARM_LDR_IMM(0, scratch_reg, reg_base, \ - (reg_base_offset + (reg_index * 4))); \ + ARM_LDR_IMM(0, scratch_reg, reg_base, (reg_index * 4)); \ return scratch_reg; \ } \ \ @@ -614,8 +605,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations) { \ if(thumb_register_allocation[reg_index] == mem_reg) \ { \ - ARM_STR_IMM(0, scratch_reg, reg_base, \ - (reg_base_offset + (reg_index * 4))); \ + ARM_STR_IMM(0, scratch_reg, reg_base, (reg_index * 4)); \ } \ } \ \ @@ -628,7 +618,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations) } \ else \ { \ - ARM_LDR_IMM(0, ireg, reg_base, (reg_base_offset + (reg_index * 4))); \ + ARM_LDR_IMM(0, ireg, reg_base, (reg_index * 4)); \ } \ } \ \ @@ -641,7 +631,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations) } \ else \ { \ - ARM_STR_IMM(0, ireg, reg_base, (reg_base_offset + (reg_index * 4))); \ + ARM_STR_IMM(0, ireg, reg_base, (reg_index * 4)); \ } \ } \ diff --git a/arm/arm_stub.S b/arm/arm_stub.S index 77a353a..687aacf 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -7,40 +7,38 @@ .globl memory_map_write .globl reg -#define REG_BASE_OFFSET 1024 +#define REG_R0 (0 * 4) +#define REG_R1 (1 * 4) +#define REG_R2 (2 * 4) +#define REG_R3 (3 * 4) +#define REG_R4 (4 * 4) +#define REG_R5 (5 * 4) +#define REG_R6 (6 * 4) +#define REG_R7 (7 * 4) +#define REG_R8 (8 * 4) +#define REG_R9 (9 * 4) +#define REG_R10 (10 * 4) +#define REG_R11 (11 * 4) +#define REG_R12 (12 * 4) +#define REG_R13 (13 * 4) +#define REG_R14 (14 * 4) +#define REG_SP (13 * 4) +#define REG_LR (14 * 4) +#define REG_PC (15 * 4) -#define REG_R0 (REG_BASE_OFFSET + (0 * 4)) -#define REG_R1 (REG_BASE_OFFSET + (1 * 4)) -#define REG_R2 (REG_BASE_OFFSET + (2 * 4)) -#define REG_R3 (REG_BASE_OFFSET + (3 * 4)) -#define REG_R4 (REG_BASE_OFFSET + (4 * 4)) -#define REG_R5 (REG_BASE_OFFSET + (5 * 4)) -#define REG_R6 (REG_BASE_OFFSET + (6 * 4)) -#define REG_R7 (REG_BASE_OFFSET + (7 * 4)) -#define REG_R8 (REG_BASE_OFFSET + (8 * 4)) -#define REG_R9 (REG_BASE_OFFSET + (9 * 4)) -#define REG_R10 (REG_BASE_OFFSET + (10 * 4)) -#define REG_R11 (REG_BASE_OFFSET + (11 * 4)) -#define REG_R12 (REG_BASE_OFFSET + (12 * 4)) -#define REG_R13 (REG_BASE_OFFSET + (13 * 4)) -#define REG_R14 (REG_BASE_OFFSET + (14 * 4)) -#define REG_SP (REG_BASE_OFFSET + (13 * 4)) -#define REG_LR (REG_BASE_OFFSET + (14 * 4)) -#define REG_PC (REG_BASE_OFFSET + (15 * 4)) +#define REG_N_FLAG (16 * 4) +#define REG_Z_FLAG (17 * 4) +#define REG_C_FLAG (18 * 4) +#define REG_V_FLAG (19 * 4) +#define REG_CPSR (20 * 4) -#define REG_N_FLAG (REG_BASE_OFFSET + (16 * 4)) -#define REG_Z_FLAG (REG_BASE_OFFSET + (17 * 4)) -#define REG_C_FLAG (REG_BASE_OFFSET + (18 * 4)) -#define REG_V_FLAG (REG_BASE_OFFSET + (19 * 4)) -#define REG_CPSR (REG_BASE_OFFSET + (20 * 4)) +#define REG_SAVE (21 * 4) +#define REG_SAVE2 (22 * 4) +#define REG_SAVE3 (23 * 4) -#define REG_SAVE (REG_BASE_OFFSET + (21 * 4)) -#define REG_SAVE2 (REG_BASE_OFFSET + (22 * 4)) -#define REG_SAVE3 (REG_BASE_OFFSET + (23 * 4)) - -#define CPU_MODE (REG_BASE_OFFSET + (29 * 4)) -#define CPU_HALT_STATE (REG_BASE_OFFSET + (30 * 4)) -#define CHANGED_PC_STATUS (REG_BASE_OFFSET + (31 * 4)) +#define CPU_MODE (29 * 4) +#define CPU_HALT_STATE (30 * 4) +#define CHANGED_PC_STATUS (31 * 4) #define reg_a0 r0 @@ -484,9 +482,6 @@ _execute_arm_translate: mov r0, reg_base @ load reg_base into first param call_c_function(move_reg) @ make reg_base the new reg ptr - sub sp, sp, #REG_BASE_OFFSET @ allocate room for ptr table - bl load_ptr_read_function_table @ load read function ptr table - ldr r0, [reg_base, #REG_PC] @ r0 = current pc ldr r1, [reg_base, #REG_CPSR] @ r1 = flags tst r1, #0x20 @ see if Thumb bit is set @@ -727,214 +722,6 @@ execute_load_builder(u16, 16, ldrneh, #0xF0000001) execute_load_builder(s16, 16_signed, ldrnesh, #0xF0000001) execute_load_builder(u32, 32, ldrne, #0xF0000000) - -#define execute_ptr_builder(region, ptr, bits) ;\ - ;\ -execute_##region##_ptr: ;\ - ldr r1, =(ptr) /* load region ptr */;\ - mov r0, r0, lsl #(32 - bits) /* isolate bottom bits */;\ - mov r0, r0, lsr #(32 - bits) ;\ - bx lr /* return */;\ - - -.align 2 -.globl execute_bios_ptr_protected -.globl _execute_bios_ptr_protected -execute_bios_ptr_protected: -_execute_bios_ptr_protected: - ldr r1, =bios_read_protect @ load bios read ptr - and r0, r0, #0x03 @ only want bottom 2 bits - bx lr @ return - - -@ address = (address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000; - -execute_ewram_ptr: - ldr r1, =(ewram + 0x8000) @ load ewram read ptr - mov r2, r0, lsl #17 @ isolate bottom 15 bits - mov r2, r2, lsr #17 - and r0, r0, #0x38000 @ isolate top 2 bits - add r0, r2, r0, lsl #1 @ add top 2 bits * 2 to bottom 15 - bx lr @ return - - -@ u32 gamepak_index = address >> 15; -@ u8 *map = memory_map_read[gamepak_index]; - -@ if(!map) -@ map = load_gamepak_page(gamepak_index & 0x3FF); - -@ value = address##type(map, address & 0x7FFF) - -execute_gamepak_ptr: - ldr r1, =memory_map_read @ load memory_map_read - mov r2, r0, lsr #15 @ isolate top 17 bits - ldr r1, [r1, r2, lsl #2] @ load memory map read ptr - - save_flags() - cmp r1, #0 @ see if map entry is NULL - bne 2f @ if not resume - - str r0, [reg_base, #REG_SAVE2] @ save r0 on the temp memory - mov r2, r2, lsl #20 @ isolate page index - mov r0, r2, lsr #20 - call_c_function(load_gamepak_page) @ read new page into r0 - - mov r1, r0 @ new map = return - ldr r0, [reg_base, #REG_SAVE2] @ restore r0 - -2: - mov r0, r0, lsl #17 @ isolate bottom 15 bits - mov r0, r0, lsr #17 - restore_flags() - bx lr @ return - - -@ These will store the result in a pointer, then pass that pointer. - -execute_eeprom_ptr: - save_flags() - - call_c_function(read_eeprom) @ load EEPROM result - add r1, reg_base, #(REG_SAVE & 0xFF00) - add r1, r1, #(REG_SAVE & 0xFF) - strh r0, [r1] @ write result out - mov r0, #0 @ zero out address - - restore_flags() - bx lr @ return - - -execute_backup_ptr: - save_flags() - - mov r0, r0, lsl #16 @ only want top 16 bits - mov r0, r0, lsr #16 - call_c_function(read_backup) @ load backup result - add r1, reg_base, #(REG_SAVE & 0xFF00) - add r1, r1, #(REG_SAVE & 0xFF) - strb r0, [r1] @ write result out - mov r0, #0 @ zero out address - - restore_flags() - bx lr @ return - - -execute_open_ptr: - ldr r1, [reg_base, #REG_CPSR] @ r1 = cpsr - save_flags() - - str r0, [reg_base, #REG_SAVE2] @ save r0 - - ldr r0, [lr, #-4] @ r0 = current PC - - tst r1, #0x20 @ see if Thumb bit is set - bne 1f @ if so load Thumb op - - call_c_function(read_memory32) @ read open address - - add r1, reg_base, #((REG_SAVE + 4) & 0xFF00) - add r1, r1, #((REG_SAVE + 4) & 0xFF) - add r1, r1, reg_base - str r0, [r1] @ write out - - ldr r0, [reg_base, #REG_SAVE2] @ restore r0 - and r0, r0, #0x03 @ isolate bottom 2 bits - - restore_flags() - bx lr - -1: - call_c_function(read_memory16) @ read open address - - orr r0, r0, r0, lsl #16 @ duplicate opcode over halves - add r1, reg_base, #((REG_SAVE + 4) & 0xFF00) - add r1, r1, #((REG_SAVE + 4) & 0xFF) - - add r1, r1, reg_base - str r0, [r1] @ write out - - ldr r0, [reg_base, #REG_SAVE2] @ restore r0 - and r0, r0, #0x03 @ isolate bottom 2 bits - - restore_flags(); - bx lr - - -execute_ptr_builder(bios_rom, bios_rom, 14) -execute_ptr_builder(iwram, iwram + 0x8000, 15) -execute_ptr_builder(vram, vram, 17) -execute_ptr_builder(oam_ram, oam_ram, 10) -execute_ptr_builder(io_registers, io_registers, 10) -execute_ptr_builder(palette_ram, palette_ram, 10) - -ptr_read_function_table: - .word execute_bios_ptr_protected @ 0x00: BIOS - .word execute_open_ptr @ 0x01: open - .word execute_ewram_ptr @ 0x02: ewram - .word execute_iwram_ptr @ 0x03: iwram - .word execute_io_registers_ptr @ 0x04: I/O registers - .word execute_palette_ram_ptr @ 0x05: palette RAM - .word execute_vram_ptr @ 0x06: vram - .word execute_oam_ram_ptr @ 0x07: oam RAM - .word execute_gamepak_ptr @ 0x08: gamepak - .word execute_gamepak_ptr @ 0x09: gamepak - .word execute_gamepak_ptr @ 0x0A: gamepak - .word execute_gamepak_ptr @ 0x0B: gamepak - .word execute_gamepak_ptr @ 0x0C: gamepak - .word execute_eeprom_ptr @ 0x0D: EEPROM - .word execute_backup_ptr @ 0x0E: backup - -.rept (256 - 15) @ 0x0F - 0xFF: open - .word execute_open_ptr -.endr - - -@ Setup the read function table. -@ Load this onto the the stack; assume we're free to use r3 - -load_ptr_read_function_table: - mov r0, #256 @ 256 elements - ldr r1, =ptr_read_function_table @ r0 = ptr_read_function_table - mov r2, sp @ load here - -2: - ldr r3, [r1], #4 @ read pointer - str r3, [r2], #4 @ write pointer - - subs r0, r0, #1 @ goto next iteration - bne 2b - - bx lr - - -@ Patch the read function table to allow for BIOS reads. - -.align 2 -.globl execute_patch_bios_read -.globl _execute_patch_bios_read -execute_patch_bios_read: -_execute_patch_bios_read: - ldr r1, =reg @ r1 = reg - ldr r0, =execute_bios_rom_ptr @ r0 = patch function - ldr r1, [r1] - str r0, [r1, #-REG_BASE_OFFSET] - bx lr - - -@ Patch the read function table to allow for BIOS reads. - -.align 2 -.globl execute_patch_bios_protect -.globl _execute_patch_bios_protect -execute_patch_bios_protect: -_execute_patch_bios_protect: - ldr r1, =reg @ r1 = reg - ldr r0, =execute_bios_ptr_protected @ r0 = patch function - ldr r1, [r1] - str r0, [r1, #-REG_BASE_OFFSET] - bx lr - .pool .comm memory_map_read 0x8000