Merge pull request #120 from davidgfnet/master

Fixes and improvements for MIPS and ARM
This commit is contained in:
Autechre 2021-03-27 00:31:51 +01:00 committed by GitHub
commit 08d2fa1ebe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 98 additions and 62 deletions

View File

@ -362,6 +362,15 @@ else ifneq (,$(findstring armv,$(platform)))
endif
LDFLAGS := -Wl,--no-undefined
# MIPS
else ifeq ($(platform), mips32)
TARGET := $(TARGET_NAME)_libretro.so
SHARED := -shared -nostdlib -Wl,--version-script=link.T
fpic := -fPIC -DPIC
CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float
HAVE_DYNAREC := 1
CPU_ARCH := mips
# emscripten
else ifeq ($(platform), emscripten)
TARGET := $(TARGET_NAME)_libretro_$(platform).bc

View File

@ -43,8 +43,8 @@ _##symbol:
#define CPU_HALT_STATE (30 * 4)
#define CHANGED_PC_STATUS (31 * 4)
#define COMPLETED_FRAME (32 * 4)
#define MAIN_THREAD_SP (33 * 4)
#define OAM_UPDATED (33 * 4)
#define MAIN_THREAD_SP (34 * 4)
#define reg_a0 r0
#define reg_a1 r1
@ -538,7 +538,7 @@ return_to_main:
@ The instruction at LR is not an inst but a u32 data that contains the PC
@ Used for SMC. That's why return is essentially `pc = lr + 4`
#define execute_store_body(store_type, store_op) ;\
#define execute_store_body(store_type) ;\
save_flags() ;\
str lr, [reg_base, #REG_SAVE3] /* save lr */;\
str r4, [reg_base, #REG_SAVE2] /* save r4 */;\
@ -559,7 +559,7 @@ ptr_tbl_##store_type: ;\
.word ext_store_u##store_type /* 0x04: I/O regs */;\
.word ext_store_u##store_type /* 0x05: palette RAM */;\
.word ext_store_vram_u##store_type /* 0x06: vram */;\
.word ext_store_u##store_type /* 0x07: oam ram */;\
.word ext_store_oam_ram_u##store_type /* 0x07: oam ram */;\
.word ext_store_u##store_type /* 0x08: gamepak: ignore */;\
.word ext_store_u##store_type /* 0x09: gamepak: ignore */;\
.word ext_store_u##store_type /* 0x0A: gamepak: ignore */;\
@ -576,11 +576,11 @@ ext_store_ignore:
add pc, lr, #4 @ return
#define execute_store_builder(store_type, store_op, load_op) ;\
#define execute_store_builder(store_type, store_op, store_op16, load_op) ;\
;\
.align 2 ;\
defsymbl(execute_store_u##store_type) ;\
execute_store_body(store_type, store_op) ;\
execute_store_body(store_type) ;\
;\
ext_store_u##store_type: ;\
ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
@ -619,7 +619,16 @@ ext_store_vram_u##store_type: ;\
cmp r0, #0x18000 /* Check if exceeds 96KB */;\
subcs r0, r0, #0x8000 /* Mirror to the last bank */;\
ldr r2, =(vram) /* r2 = vram base */;\
store_op r1, [r0, r2] /* store data */;\
store_op16 r1, [r0, r2] /* store data */;\
ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
restore_flags() ;\
add pc, lr, #4 /* return */;\
;\
ext_store_oam_ram_u##store_type: ;\
mask_addr_bus16_##store_type(10) /* Mask to mirror memory (+align)*/;\
add r2, reg_base, #256 /* r2 = oam ram base */;\
store_op16 r1, [r0, r2] /* store data */;\
str r2, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\
ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
restore_flags() ;\
add pc, lr, #4 /* return */;\
@ -631,14 +640,14 @@ ext_store_vram_u##store_type: ;\
b smc_write /* perform smc write */;\
execute_store_builder(8, strb, ldrb)
execute_store_builder(16, strh, ldrh)
execute_store_builder(32, str, ldr)
execute_store_builder(8, strb, strh, ldrb)
execute_store_builder(16, strh, strh, ldrh)
execute_store_builder(32, str, str, ldr)
@ This is a store that is executed in a strm case (so no SMC checks in-between)
defsymbl(execute_store_u32_safe)
execute_store_body(32_safe, str)
execute_store_body(32_safe)
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
@ -671,6 +680,14 @@ ext_store_vram_u32_safe:
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
ext_store_oam_ram_u32_safe:
mask_addr_8(10) @ Mask to mirror memory (no need to align!)
add r2, reg_base, #256 @ r2 = oam ram base
str r1, [r0, r2] @ store data
str r2, [reg_base, #OAM_UPDATED] @ store anything non zero here
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
write_epilogue:
cmp r0, #0 @ check if the write rose an alert
beq 4f @ if not we can exit
@ -827,6 +844,8 @@ defsymbl(reg_mode)
defsymbl(reg)
.space 0x100, 0
defsymbl(oam_ram)
.space 0x400
@ Vita and 3DS (and of course mmap) map their own cache sections through some
@ platform-speficic mechanisms.

1
cpu.c
View File

@ -1630,6 +1630,7 @@ void raise_interrupt(irq_type irq_raised)
#ifndef HAVE_DYNAREC
u8 *memory_map_read [8 * 1024];
u16 oam_ram[512];
u16 palette_ram[512];
u16 palette_ram_converted[512];
#endif

4
cpu.h
View File

@ -85,7 +85,8 @@ typedef enum
CPU_MODE = 29,
CPU_HALT_STATE = 30,
CHANGED_PC_STATUS = 31,
COMPLETED_FRAME = 32
COMPLETED_FRAME = 32,
OAM_UPDATED = 33
} ext_reg_numbers;
typedef enum
@ -146,7 +147,6 @@ extern u8 *ram_translation_ptr;
extern u32 idle_loop_target_pc;
extern u32 iwram_stack_optimize;
extern u32 direct_map_vram;
extern u32 translation_gate_targets;
extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES];

View File

@ -305,7 +305,6 @@ u32 gamepak_waitstate_sequential[2][3][3] =
}
};
u16 oam_ram[512];
u16 io_registers[1024 * 16];
u8 ewram[1024 * 256 * 2];
u8 iwram[1024 * 32 * 2];
@ -342,14 +341,9 @@ gamepak_swap_entry_type *gamepak_memory_map;
// a lot.
FILE *gamepak_file_large = NULL;
u32 direct_map_vram = 0;
// Writes to these respective locations should trigger an update
// so the related subsystem may react to it.
// If OAM is written to:
u32 oam_update = 1;
// If GBC audio is written to:
u32 gbc_sound_update = 0;
@ -755,7 +749,7 @@ cpu_alert_type function_cc write_io_register8(u32 address, u32 value)
u32 dispcnt = io_registers[REG_DISPCNT];
if((value & 0x07) != (dispcnt & 0x07))
oam_update = 1;
reg[OAM_UPDATED] = 1;
address8(io_registers, 0x00) = value;
break;
@ -1171,7 +1165,7 @@ cpu_alert_type function_cc write_io_register16(u32 address, u32 value)
{
u32 dispcnt = io_registers[REG_DISPCNT];
if((value & 0x07) != (dispcnt & 0x07))
oam_update = 1;
reg[OAM_UPDATED] = 1;
address16(io_registers, 0x00) = value;
break;
@ -1934,7 +1928,7 @@ void function_cc write_rtc(u32 address, u32 value)
\
case 0x07: \
/* OAM RAM */ \
oam_update = 1; \
reg[OAM_UPDATED] = 1; \
address##type(oam_ram, address & 0x3FF) = value; \
break; \
\
@ -2529,7 +2523,7 @@ dma_region_type dma_region_map[16] =
dma_smc_vars_##type()
#define dma_oam_ram_dest() \
oam_update = 1 \
reg[OAM_UPDATED] = 1 \
#define dma_vars_oam_ram(type) \
dma_oam_ram_##type() \
@ -3331,7 +3325,7 @@ void gba_load_state(const void* src)
wipe_caches();
#endif
oam_update = 1;
reg[OAM_UPDATED] = 1;
gbc_sound_update = 1;
for(i = 0; i < 512; i++)

2
main.c
View File

@ -158,7 +158,7 @@ u32 update_gba(void)
if((dispstat & 0x01) == 0)
{
u32 i;
if(oam_update)
if(reg[OAM_UPDATED])
oam_update_count++;
if(no_alpha)

View File

@ -2512,7 +2512,8 @@ u8 swi_hle_handle[256] =
#define ReOff_SaveR1 (21*4) // 3 save scratch regs
#define ReOff_SaveR2 (22*4)
#define ReOff_SaveR3 (23*4)
#define ReOff_GP_Save (32*4) // GP_SAVE
#define ReOff_OamUpd (33*4) // OAM_UPDATED
#define ReOff_GP_Save (34*4) // GP_SAVE
// Saves all regs to their right slot and loads gp
#define emit_save_regs(save_a2) { \
@ -2629,6 +2630,7 @@ typedef struct {
bool check_smc; // Whether the memory can contain code
bool bus16; // Whether it can only be accessed at 16bit
u32 baseptr; // Memory base address.
u32 baseoff; // Offset from base_reg
} t_stub_meminfo;
// Generates the stub to access memory for a given region, access type,
@ -2737,7 +2739,11 @@ static void emit_pmemld_stub(
} else {
// Generate upper bits of the addr and do addr mirroring
// (The address hi16 is rounded up since load uses signed offset)
mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16));
if (!meminfo->baseoff) {
mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16));
} else {
base_addr = meminfo->baseoff;
}
if (region == 2) {
// Can't do EWRAM with an `andi` instruction (18 bits mask)
@ -2760,8 +2766,9 @@ static void emit_pmemld_stub(
mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset
} else {
// Generate regular (<=32KB) mirroring
mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr
mips_reg_number breg = (meminfo->baseoff ? reg_base : reg_rv);
mips_emit_andi(reg_temp, reg_a0, memmask); // Clear upper bits (mirroring)
mips_emit_addu(reg_rv, breg, reg_temp); // Adds to base addr
}
}
@ -2873,9 +2880,8 @@ static void emit_pmemst_stub(
// Post processing store:
// Signal that OAM was updated
if (region == 7) {
u32 palcaddr = (u32)&oam_update;
mips_emit_lui(reg_temp, ((palcaddr + 0x8000) >> 16));
mips_emit_sw(reg_base, reg_temp, palcaddr & 0xffff); // Write any nonzero data
// Write any nonzero data
mips_emit_sw(reg_base, reg_base, ReOff_OamUpd);
generate_function_return_swap_delay();
}
else {
@ -3154,7 +3160,7 @@ static void emit_phand(
mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7)
}
unsigned tbloff = 256 + 2048 + 220 + 4 * toff; // Skip regs and palettes
unsigned tbloff = 256 + 3*1024 + 220 + 4 * toff; // Skip regs and RAMs
mips_emit_addu(reg_rv, reg_temp, reg_base); // Add to the base_reg the table offset
mips_emit_lw(reg_rv, reg_rv, tbloff); // Read addr from table
mips_emit_sll(reg_temp, reg_rv, 4); // 26 bit immediate to the MSB
@ -3229,21 +3235,21 @@ void init_emitter() {
// Generate memory handlers
const t_stub_meminfo ldinfo [] = {
{ emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom },
{ emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom, 0},
// 1 Open load / Ignore store
{ emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram }, // memsize wrong on purpose
{ emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] },
{ emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers },
{ emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram },
{ emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case
{ emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram },
{ emit_pmemld_stub, 8, 0x8000, false, false, 0 },
{ emit_pmemld_stub, 9, 0x8000, false, false, 0 },
{ emit_pmemld_stub, 10, 0x8000, false, false, 0 },
{ emit_pmemld_stub, 11, 0x8000, false, false, 0 },
{ emit_pmemld_stub, 12, 0x8000, false, false, 0 },
{ emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram, 0 }, // memsize wrong on purpose
{ emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000], 0 },
{ emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers, 0 },
{ emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram, 0x100 },
{ emit_pmemld_stub, 6, 0x0, false, true, (u32)vram, 0 }, // same, vram is a special case
{ emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram, 0x900 },
{ emit_pmemld_stub, 8, 0x8000, false, false, 0, 0 },
{ emit_pmemld_stub, 9, 0x8000, false, false, 0, 0 },
{ emit_pmemld_stub, 10, 0x8000, false, false, 0, 0 },
{ emit_pmemld_stub, 11, 0x8000, false, false, 0, 0 },
{ emit_pmemld_stub, 12, 0x8000, false, false, 0, 0 },
// 13 is EEPROM mapped already (a bit special)
{ emit_pmemld_stub, 14, 0, false, false, 0 }, // Mapped via function call
{ emit_pmemld_stub, 14, 0, false, false, 0, 0 }, // Mapped via function call
// 15 Open load / Ignore store
};
@ -3267,12 +3273,12 @@ void init_emitter() {
}
const t_stub_meminfo stinfo [] = {
{ emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram },
{ emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] },
{ emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram, 0 },
{ emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000], 0 },
// I/O is special and mapped with a function call
{ emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram },
{ emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case
{ emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram },
{ emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram, 0x100 },
{ emit_pmemst_stub, 6, 0x0, false, true, (u32)vram, 0 }, // same, vram is a special case
{ emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram, 0x900 },
};
// Store only for "regular"-ish mem regions

View File

@ -40,6 +40,7 @@
.global reg_check
.global palette_ram
.global palette_ram_converted
.global oam_ram
.global init_emitter
.global mips_lookup_pc
.global smc_write
@ -52,6 +53,7 @@
.global reg
.global spsr
.global reg_mode
.global oam_update
# MIPS register layout:
@ -116,13 +118,15 @@
.equ CPU_HALT_STATE, (30 * 4)
.equ CHANGED_PC_STATUS, (31 * 4)
.equ COMPLETED_FRAME, (32 * 4)
.equ GP_SAVE, (33 * 4)
.equ OAM_UPDATED, (33 * 4)
.equ GP_SAVE, (34 * 4)
.equ SPSR_BASE, (0x900)
.equ REGMODE_BASE, (0x900 + 24)
.equ SPSR_BASE, (0x100 + 0x400 * 3)
.equ REGMODE_BASE, (SPSR_BASE + 24)
.equ SUPERVISOR_SPSR, (3 * 4 + SPSR_BASE)
.equ SUPERVISOR_LR, ((3 * (7 * 4)) + (6 * 4) + REGMODE_BASE)
.equ FNPTRS_BASE, (0x900 + 220 + 960)
.equ FNPTRS_MEMOPS, (REGMODE_BASE + 196)
.equ FNPTRS_BASE, (FNPTRS_MEMOPS + 960)
.set noat
.set noreorder
@ -623,6 +627,8 @@ palette_ram:
.space 0x400
palette_ram_converted:
.space 0x400
oam_ram:
.space 0x400
spsr:
.space 24 # u32[6]
reg_mode:

View File

@ -4429,10 +4429,10 @@ void update_scanline(void)
// If OAM has been modified since the last scanline has been updated then
// reorder and reprofile the OBJ lists.
if(oam_update)
if(reg[OAM_UPDATED])
{
order_obj(video_mode);
oam_update = 0;
reg[OAM_UPDATED] = 0;
}
order_layers((dispcnt >> 8) & active_layers[video_mode]);

View File

@ -28,7 +28,6 @@ _##symbol:
#ifndef _WIN32
# External symbols (data + functions)
#define _oam_update oam_update
#define _iwram iwram
#define _ewram ewram
#define _vram vram
@ -50,7 +49,6 @@ _##symbol:
#define _execute_store_cpsr_body execute_store_cpsr_body
#endif
.global _oam_update
.global _iwram
.global _ewram
.global _vram
@ -75,6 +73,7 @@ _##symbol:
.equ CPU_HALT_STATE, (30 * 4)
.equ CHANGED_PC_STATUS, (31 * 4)
.equ COMPLETED_FRAME, (32 * 4)
.equ OAM_UPDATED, (33 * 4)
# destroys ecx and edx
@ -241,7 +240,7 @@ ext_store_vram8b:
ret
ext_store_oam8:
movl $1, _oam_update # flag OAM update
movl $1, OAM_UPDATED(%ebx) # flag OAM update
and $0x3FE, %eax # wrap around address and align to 16bits
mov %dl, %dh # copy lower 8bits of value into full 16bits
mov %dx, _oam_ram(%eax) # perform 16bit store
@ -332,7 +331,7 @@ ext_store_vram16b:
ret
ext_store_oam16:
movl $1, _oam_update # flag OAM update
movl $1, OAM_UPDATED(%ebx) # flag OAM update
and $0x3FF, %eax # wrap around address
mov %dx, _oam_ram(%eax) # perform 16bit store
ret
@ -410,7 +409,7 @@ ext_store_vram32b:
ret
ext_store_oam32:
movl $1, _oam_update # flag OAM update
movl $1, OAM_UPDATED(%ebx) # flag OAM update
and $0x3FF, %eax # wrap around address
mov %edx, _oam_ram(%eax) # perform 32bit store
ret
@ -539,6 +538,8 @@ defsymbl(palette_ram)
.space 0x400
defsymbl(palette_ram_converted)
.space 0x400
defsymbl(oam_ram)
.space 0x400
defsymbl(spsr)
.space 24
defsymbl(reg_mode)