diff --git a/Makefile b/Makefile index a5f2649..dfc7c37 100644 --- a/Makefile +++ b/Makefile @@ -362,6 +362,15 @@ else ifneq (,$(findstring armv,$(platform))) endif LDFLAGS := -Wl,--no-undefined +# MIPS +else ifeq ($(platform), mips32) + TARGET := $(TARGET_NAME)_libretro.so + SHARED := -shared -nostdlib -Wl,--version-script=link.T + fpic := -fPIC -DPIC + CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float + HAVE_DYNAREC := 1 + CPU_ARCH := mips + # emscripten else ifeq ($(platform), emscripten) TARGET := $(TARGET_NAME)_libretro_$(platform).bc diff --git a/arm/arm_stub.S b/arm/arm_stub.S index f0b7f52..5917e82 100644 --- a/arm/arm_stub.S +++ b/arm/arm_stub.S @@ -43,8 +43,8 @@ _##symbol: #define CPU_HALT_STATE (30 * 4) #define CHANGED_PC_STATUS (31 * 4) #define COMPLETED_FRAME (32 * 4) - -#define MAIN_THREAD_SP (33 * 4) +#define OAM_UPDATED (33 * 4) +#define MAIN_THREAD_SP (34 * 4) #define reg_a0 r0 #define reg_a1 r1 @@ -538,7 +538,7 @@ return_to_main: @ The instruction at LR is not an inst but a u32 data that contains the PC @ Used for SMC. That's why return is essentially `pc = lr + 4` -#define execute_store_body(store_type, store_op) ;\ +#define execute_store_body(store_type) ;\ save_flags() ;\ str lr, [reg_base, #REG_SAVE3] /* save lr */;\ str r4, [reg_base, #REG_SAVE2] /* save r4 */;\ @@ -559,7 +559,7 @@ ptr_tbl_##store_type: ;\ .word ext_store_u##store_type /* 0x04: I/O regs */;\ .word ext_store_u##store_type /* 0x05: palette RAM */;\ .word ext_store_vram_u##store_type /* 0x06: vram */;\ - .word ext_store_u##store_type /* 0x07: oam ram */;\ + .word ext_store_oam_ram_u##store_type /* 0x07: oam ram */;\ .word ext_store_u##store_type /* 0x08: gamepak: ignore */;\ .word ext_store_u##store_type /* 0x09: gamepak: ignore */;\ .word ext_store_u##store_type /* 0x0A: gamepak: ignore */;\ @@ -576,11 +576,11 @@ ext_store_ignore: add pc, lr, #4 @ return -#define execute_store_builder(store_type, store_op, load_op) ;\ +#define execute_store_builder(store_type, store_op, store_op16, load_op) ;\ ;\ .align 2 ;\ defsymbl(execute_store_u##store_type) ;\ - execute_store_body(store_type, store_op) ;\ + execute_store_body(store_type) ;\ ;\ ext_store_u##store_type: ;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ @@ -619,7 +619,16 @@ ext_store_vram_u##store_type: ;\ cmp r0, #0x18000 /* Check if exceeds 96KB */;\ subcs r0, r0, #0x8000 /* Mirror to the last bank */;\ ldr r2, =(vram) /* r2 = vram base */;\ - store_op r1, [r0, r2] /* store data */;\ + store_op16 r1, [r0, r2] /* store data */;\ + ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ + restore_flags() ;\ + add pc, lr, #4 /* return */;\ + ;\ +ext_store_oam_ram_u##store_type: ;\ + mask_addr_bus16_##store_type(10) /* Mask to mirror memory (+align)*/;\ + add r2, reg_base, #256 /* r2 = oam ram base */;\ + store_op16 r1, [r0, r2] /* store data */;\ + str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\ ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\ restore_flags() ;\ add pc, lr, #4 /* return */;\ @@ -631,14 +640,14 @@ ext_store_vram_u##store_type: ;\ b smc_write /* perform smc write */;\ -execute_store_builder(8, strb, ldrb) -execute_store_builder(16, strh, ldrh) -execute_store_builder(32, str, ldr) +execute_store_builder(8, strb, strh, ldrb) +execute_store_builder(16, strh, strh, ldrh) +execute_store_builder(32, str, str, ldr) @ This is a store that is executed in a strm case (so no SMC checks in-between) defsymbl(execute_store_u32_safe) - execute_store_body(32_safe, str) + execute_store_body(32_safe) restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return @@ -671,6 +680,14 @@ ext_store_vram_u32_safe: restore_flags() ldr pc, [reg_base, #REG_SAVE3] @ return +ext_store_oam_ram_u32_safe: + mask_addr_8(10) @ Mask to mirror memory (no need to align!) + add r2, reg_base, #256 @ r2 = oam ram base + str r1, [r0, r2] @ store data + str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here + restore_flags() + ldr pc, [reg_base, #REG_SAVE3] @ return + write_epilogue: cmp r0, #0 @ check if the write rose an alert beq 4f @ if not we can exit @@ -827,6 +844,8 @@ defsymbl(reg_mode) defsymbl(reg) .space 0x100, 0 +defsymbl(oam_ram) + .space 0x400 @ Vita and 3DS (and of course mmap) map their own cache sections through some @ platform-speficic mechanisms. diff --git a/cpu.c b/cpu.c index ea0d69e..badb9c2 100644 --- a/cpu.c +++ b/cpu.c @@ -1630,6 +1630,7 @@ void raise_interrupt(irq_type irq_raised) #ifndef HAVE_DYNAREC u8 *memory_map_read [8 * 1024]; +u16 oam_ram[512]; u16 palette_ram[512]; u16 palette_ram_converted[512]; #endif diff --git a/cpu.h b/cpu.h index fc57626..2b250ca 100644 --- a/cpu.h +++ b/cpu.h @@ -85,7 +85,8 @@ typedef enum CPU_MODE = 29, CPU_HALT_STATE = 30, CHANGED_PC_STATUS = 31, - COMPLETED_FRAME = 32 + COMPLETED_FRAME = 32, + OAM_UPDATED = 33 } ext_reg_numbers; typedef enum @@ -146,7 +147,6 @@ extern u8 *ram_translation_ptr; extern u32 idle_loop_target_pc; extern u32 iwram_stack_optimize; -extern u32 direct_map_vram; extern u32 translation_gate_targets; extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES]; diff --git a/gba_memory.c b/gba_memory.c index a51f183..b66dce7 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -305,7 +305,6 @@ u32 gamepak_waitstate_sequential[2][3][3] = } }; -u16 oam_ram[512]; u16 io_registers[1024 * 16]; u8 ewram[1024 * 256 * 2]; u8 iwram[1024 * 32 * 2]; @@ -342,14 +341,9 @@ gamepak_swap_entry_type *gamepak_memory_map; // a lot. FILE *gamepak_file_large = NULL; -u32 direct_map_vram = 0; - // Writes to these respective locations should trigger an update // so the related subsystem may react to it. -// If OAM is written to: -u32 oam_update = 1; - // If GBC audio is written to: u32 gbc_sound_update = 0; @@ -755,7 +749,7 @@ cpu_alert_type function_cc write_io_register8(u32 address, u32 value) u32 dispcnt = io_registers[REG_DISPCNT]; if((value & 0x07) != (dispcnt & 0x07)) - oam_update = 1; + reg[OAM_UPDATED] = 1; address8(io_registers, 0x00) = value; break; @@ -1171,7 +1165,7 @@ cpu_alert_type function_cc write_io_register16(u32 address, u32 value) { u32 dispcnt = io_registers[REG_DISPCNT]; if((value & 0x07) != (dispcnt & 0x07)) - oam_update = 1; + reg[OAM_UPDATED] = 1; address16(io_registers, 0x00) = value; break; @@ -1934,7 +1928,7 @@ void function_cc write_rtc(u32 address, u32 value) \ case 0x07: \ /* OAM RAM */ \ - oam_update = 1; \ + reg[OAM_UPDATED] = 1; \ address##type(oam_ram, address & 0x3FF) = value; \ break; \ \ @@ -2529,7 +2523,7 @@ dma_region_type dma_region_map[16] = dma_smc_vars_##type() #define dma_oam_ram_dest() \ - oam_update = 1 \ + reg[OAM_UPDATED] = 1 \ #define dma_vars_oam_ram(type) \ dma_oam_ram_##type() \ @@ -3331,7 +3325,7 @@ void gba_load_state(const void* src) wipe_caches(); #endif - oam_update = 1; + reg[OAM_UPDATED] = 1; gbc_sound_update = 1; for(i = 0; i < 512; i++) diff --git a/main.c b/main.c index 73371e4..2a82338 100644 --- a/main.c +++ b/main.c @@ -158,7 +158,7 @@ u32 update_gba(void) if((dispstat & 0x01) == 0) { u32 i; - if(oam_update) + if(reg[OAM_UPDATED]) oam_update_count++; if(no_alpha) diff --git a/psp/mips_emit.h b/psp/mips_emit.h index b996f2b..818b724 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -2512,7 +2512,8 @@ u8 swi_hle_handle[256] = #define ReOff_SaveR1 (21*4) // 3 save scratch regs #define ReOff_SaveR2 (22*4) #define ReOff_SaveR3 (23*4) -#define ReOff_GP_Save (32*4) // GP_SAVE +#define ReOff_OamUpd (33*4) // OAM_UPDATED +#define ReOff_GP_Save (34*4) // GP_SAVE // Saves all regs to their right slot and loads gp #define emit_save_regs(save_a2) { \ @@ -2629,6 +2630,7 @@ typedef struct { bool check_smc; // Whether the memory can contain code bool bus16; // Whether it can only be accessed at 16bit u32 baseptr; // Memory base address. + u32 baseoff; // Offset from base_reg } t_stub_meminfo; // Generates the stub to access memory for a given region, access type, @@ -2737,7 +2739,11 @@ static void emit_pmemld_stub( } else { // Generate upper bits of the addr and do addr mirroring // (The address hi16 is rounded up since load uses signed offset) - mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); + if (!meminfo->baseoff) { + mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16)); + } else { + base_addr = meminfo->baseoff; + } if (region == 2) { // Can't do EWRAM with an `andi` instruction (18 bits mask) @@ -2760,8 +2766,9 @@ static void emit_pmemld_stub( mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset } else { // Generate regular (<=32KB) mirroring - mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring) - mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr + mips_reg_number breg = (meminfo->baseoff ? reg_base : reg_rv); + mips_emit_andi(reg_temp, reg_a0, memmask); // Clear upper bits (mirroring) + mips_emit_addu(reg_rv, breg, reg_temp); // Adds to base addr } } @@ -2873,9 +2880,8 @@ static void emit_pmemst_stub( // Post processing store: // Signal that OAM was updated if (region == 7) { - u32 palcaddr = (u32)&oam_update; - mips_emit_lui(reg_temp, ((palcaddr + 0x8000) >> 16)); - mips_emit_sw(reg_base, reg_temp, palcaddr & 0xffff); // Write any nonzero data + // Write any nonzero data + mips_emit_sw(reg_base, reg_base, ReOff_OamUpd); generate_function_return_swap_delay(); } else { @@ -3154,7 +3160,7 @@ static void emit_phand( mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7) } - unsigned tbloff = 256 + 2048 + 220 + 4 * toff; // Skip regs and palettes + unsigned tbloff = 256 + 3*1024 + 220 + 4 * toff; // Skip regs and RAMs mips_emit_addu(reg_rv, reg_temp, reg_base); // Add to the base_reg the table offset mips_emit_lw(reg_rv, reg_rv, tbloff); // Read addr from table mips_emit_sll(reg_temp, reg_rv, 4); // 26 bit immediate to the MSB @@ -3229,21 +3235,21 @@ void init_emitter() { // Generate memory handlers const t_stub_meminfo ldinfo [] = { - { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom }, + { emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom, 0}, // 1 Open load / Ignore store - { emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram }, // memsize wrong on purpose - { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, - { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers }, - { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram }, - { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case - { emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram }, - { emit_pmemld_stub, 8, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 9, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 10, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 11, 0x8000, false, false, 0 }, - { emit_pmemld_stub, 12, 0x8000, false, false, 0 }, + { emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram, 0 }, // memsize wrong on purpose + { emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000], 0 }, + { emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers, 0 }, + { emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram, 0x100 }, + { emit_pmemld_stub, 6, 0x0, false, true, (u32)vram, 0 }, // same, vram is a special case + { emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram, 0x900 }, + { emit_pmemld_stub, 8, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 9, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 10, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 11, 0x8000, false, false, 0, 0 }, + { emit_pmemld_stub, 12, 0x8000, false, false, 0, 0 }, // 13 is EEPROM mapped already (a bit special) - { emit_pmemld_stub, 14, 0, false, false, 0 }, // Mapped via function call + { emit_pmemld_stub, 14, 0, false, false, 0, 0 }, // Mapped via function call // 15 Open load / Ignore store }; @@ -3267,12 +3273,12 @@ void init_emitter() { } const t_stub_meminfo stinfo [] = { - { emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram }, - { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, + { emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram, 0 }, + { emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000], 0 }, // I/O is special and mapped with a function call - { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram }, - { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case - { emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram }, + { emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram, 0x100 }, + { emit_pmemst_stub, 6, 0x0, false, true, (u32)vram, 0 }, // same, vram is a special case + { emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram, 0x900 }, }; // Store only for "regular"-ish mem regions diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 3d046d8..1c4ad4b 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -40,6 +40,7 @@ .global reg_check .global palette_ram .global palette_ram_converted +.global oam_ram .global init_emitter .global mips_lookup_pc .global smc_write @@ -52,6 +53,7 @@ .global reg .global spsr .global reg_mode +.global oam_update # MIPS register layout: @@ -116,13 +118,15 @@ .equ CPU_HALT_STATE, (30 * 4) .equ CHANGED_PC_STATUS, (31 * 4) .equ COMPLETED_FRAME, (32 * 4) -.equ GP_SAVE, (33 * 4) +.equ OAM_UPDATED, (33 * 4) +.equ GP_SAVE, (34 * 4) -.equ SPSR_BASE, (0x900) -.equ REGMODE_BASE, (0x900 + 24) +.equ SPSR_BASE, (0x100 + 0x400 * 3) +.equ REGMODE_BASE, (SPSR_BASE + 24) .equ SUPERVISOR_SPSR, (3 * 4 + SPSR_BASE) .equ SUPERVISOR_LR, ((3 * (7 * 4)) + (6 * 4) + REGMODE_BASE) -.equ FNPTRS_BASE, (0x900 + 220 + 960) +.equ FNPTRS_MEMOPS, (REGMODE_BASE + 196) +.equ FNPTRS_BASE, (FNPTRS_MEMOPS + 960) .set noat .set noreorder @@ -623,6 +627,8 @@ palette_ram: .space 0x400 palette_ram_converted: .space 0x400 +oam_ram: + .space 0x400 spsr: .space 24 # u32[6] reg_mode: diff --git a/video.c b/video.c index 23cd368..4221f25 100644 --- a/video.c +++ b/video.c @@ -4429,10 +4429,10 @@ void update_scanline(void) // If OAM has been modified since the last scanline has been updated then // reorder and reprofile the OBJ lists. - if(oam_update) + if(reg[OAM_UPDATED]) { order_obj(video_mode); - oam_update = 0; + reg[OAM_UPDATED] = 0; } order_layers((dispcnt >> 8) & active_layers[video_mode]); diff --git a/x86/x86_stub.S b/x86/x86_stub.S index 9dd3fdd..333c8fd 100644 --- a/x86/x86_stub.S +++ b/x86/x86_stub.S @@ -28,7 +28,6 @@ _##symbol: #ifndef _WIN32 # External symbols (data + functions) -#define _oam_update oam_update #define _iwram iwram #define _ewram ewram #define _vram vram @@ -50,7 +49,6 @@ _##symbol: #define _execute_store_cpsr_body execute_store_cpsr_body #endif -.global _oam_update .global _iwram .global _ewram .global _vram @@ -75,6 +73,7 @@ _##symbol: .equ CPU_HALT_STATE, (30 * 4) .equ CHANGED_PC_STATUS, (31 * 4) .equ COMPLETED_FRAME, (32 * 4) +.equ OAM_UPDATED, (33 * 4) # destroys ecx and edx @@ -241,7 +240,7 @@ ext_store_vram8b: ret ext_store_oam8: - movl $1, _oam_update # flag OAM update + movl $1, OAM_UPDATED(%ebx) # flag OAM update and $0x3FE, %eax # wrap around address and align to 16bits mov %dl, %dh # copy lower 8bits of value into full 16bits mov %dx, _oam_ram(%eax) # perform 16bit store @@ -332,7 +331,7 @@ ext_store_vram16b: ret ext_store_oam16: - movl $1, _oam_update # flag OAM update + movl $1, OAM_UPDATED(%ebx) # flag OAM update and $0x3FF, %eax # wrap around address mov %dx, _oam_ram(%eax) # perform 16bit store ret @@ -410,7 +409,7 @@ ext_store_vram32b: ret ext_store_oam32: - movl $1, _oam_update # flag OAM update + movl $1, OAM_UPDATED(%ebx) # flag OAM update and $0x3FF, %eax # wrap around address mov %edx, _oam_ram(%eax) # perform 32bit store ret @@ -539,6 +538,8 @@ defsymbl(palette_ram) .space 0x400 defsymbl(palette_ram_converted) .space 0x400 +defsymbl(oam_ram) + .space 0x400 defsymbl(spsr) .space 24 defsymbl(reg_mode)