Add Dingux support

Uses a different cache primitive and a differend madd(u) encoding.
Also added a flag for BGR vs RGB color output (since PSP is assuming to
be BGR for speed).
Aside from that the ABI required some special function calls for PIC.
This commit is contained in:
David Guillen Fandos 2021-03-16 19:02:11 +01:00
parent 80be1e3447
commit 6b503667ec
3 changed files with 165 additions and 101 deletions

View File

@ -193,7 +193,7 @@ else ifeq ($(platform), psp1)
TARGET := $(TARGET_NAME)_libretro_$(platform).a TARGET := $(TARGET_NAME)_libretro_$(platform).a
CC = psp-gcc$(EXE_EXT) CC = psp-gcc$(EXE_EXT)
AR = psp-ar$(EXE_EXT) AR = psp-ar$(EXE_EXT)
CFLAGS += -DPSP -G0 CFLAGS += -DPSP -G0 -DUSE_BGR_FORMAT
CFLAGS += -I$(shell psp-config --pspsdk-path)/include CFLAGS += -I$(shell psp-config --pspsdk-path)/include
CFLAGS += -march=allegrex -mfp32 -mgp32 -mlong32 -mabi=eabi CFLAGS += -march=allegrex -mfp32 -mgp32 -mlong32 -mabi=eabi
CFLAGS += -fomit-frame-pointer -ffast-math CFLAGS += -fomit-frame-pointer -ffast-math
@ -374,8 +374,10 @@ else ifeq ($(platform), gcw0)
CXX = /opt/gcw0-toolchain/usr/bin/mipsel-linux-g++ CXX = /opt/gcw0-toolchain/usr/bin/mipsel-linux-g++
AR = /opt/gcw0-toolchain/usr/bin/mipsel-linux-ar AR = /opt/gcw0-toolchain/usr/bin/mipsel-linux-ar
SHARED := -shared -nostdlib -Wl,--version-script=link.T SHARED := -shared -nostdlib -Wl,--version-script=link.T
fpic := -fPIC fpic := -fPIC -DPIC
CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float
HAVE_DYNAREC := 1
CPU_ARCH := mips
# Windows # Windows
else else
@ -403,8 +405,8 @@ ifeq ($(DEBUG), 1)
OPTIMIZE_SAFE := -O0 -g OPTIMIZE_SAFE := -O0 -g
OPTIMIZE := -O0 -g OPTIMIZE := -O0 -g
else else
OPTIMIZE_SAFE := -O2 -DNDEBUG -g OPTIMIZE_SAFE := -O2 -DNDEBUG
OPTIMIZE := -O3 -DNDEBUG -g OPTIMIZE := -O3 -DNDEBUG
endif endif

View File

@ -132,6 +132,12 @@ typedef enum
mips_special_min = 0x2D, mips_special_min = 0x2D,
} mips_function_special; } mips_function_special;
typedef enum
{
mips_special2_madd = 0x00,
mips_special2_maddu = 0x01,
} mips_function_special2;
typedef enum typedef enum
{ {
mips_special3_ext = 0x00, mips_special3_ext = 0x00,
@ -203,6 +209,12 @@ typedef enum
mips_special_##function; \ mips_special_##function; \
translation_ptr += 4 \ translation_ptr += 4 \
#define mips_emit_special2(function, rs, rt, rd, shift) \
*((u32 *)translation_ptr) = (mips_opcode_special2 << 26) | \
(rs << 21) | (rt << 16) | (rd << 11) | (shift << 6) | \
mips_special2_##function; \
translation_ptr += 4 \
#define mips_emit_special3(function, rs, rt, imm_a, imm_b) \ #define mips_emit_special3(function, rs, rt, imm_a, imm_b) \
*((u32 *)translation_ptr) = (mips_opcode_special3 << 26) | \ *((u32 *)translation_ptr) = (mips_opcode_special3 << 26) | \
(rs << 21) | (rt << 16) | (imm_a << 11) | (imm_b << 6) | \ (rs << 21) | (rt << 16) | (imm_a << 11) | (imm_b << 6) | \
@ -314,11 +326,19 @@ typedef enum
#define mips_emit_divu(rs, rt) \ #define mips_emit_divu(rs, rt) \
mips_emit_special(divu, rs, rt, 0, 0) \ mips_emit_special(divu, rs, rt, 0, 0) \
#define mips_emit_madd(rs, rt) \ #ifdef PSP
mips_emit_special(madd, rs, rt, 0, 0) \ #define mips_emit_madd(rs, rt) \
mips_emit_special(madd, rs, rt, 0, 0) \
#define mips_emit_maddu(rs, rt) \ #define mips_emit_maddu(rs, rt) \
mips_emit_special(maddu, rs, rt, 0, 0) \ mips_emit_special(maddu, rs, rt, 0, 0)
#else
#define mips_emit_madd(rs, rt) \
mips_emit_special2(madd, rs, rt, 0, 0) \
#define mips_emit_maddu(rs, rt) \
mips_emit_special2(maddu, rs, rt, 0, 0)
#endif
#define mips_emit_movn(rd, rs, rt) \ #define mips_emit_movn(rd, rs, rt) \
mips_emit_special(movn, rs, rt, rd, 0) \ mips_emit_special(movn, rs, rt, rd, 0) \
@ -411,6 +431,9 @@ typedef enum
#define mips_emit_jr(rs) \ #define mips_emit_jr(rs) \
mips_emit_special(jr, rs, 0, 0, 0) \ mips_emit_special(jr, rs, 0, 0, 0) \
#define mips_emit_jalr(rs) \
mips_emit_special(jalr, rs, 0, 31, 0) \
#define mips_emit_synci(rs, offset) \ #define mips_emit_synci(rs, offset) \
mips_emit_regimm(synci, rs, offset) \ mips_emit_regimm(synci, rs, offset) \
@ -2535,8 +2558,9 @@ u8 swi_hle_handle[256] =
#define ReOff_GP_Save (32*4) // GP_SAVE #define ReOff_GP_Save (32*4) // GP_SAVE
// Saves all regs to their right slot and loads gp // Saves all regs to their right slot and loads gp
#define emit_save_regs(save_a2) \ #define emit_save_regs(save_a2) { \
for (unsigned i = 0; i < 15; i++) { \ int i; \
for (i = 0; i < 15; i++) { \
mips_emit_sw(arm_to_mips_reg[i], reg_base, 4 * i); \ mips_emit_sw(arm_to_mips_reg[i], reg_base, 4 * i); \
} \ } \
if (save_a2) { \ if (save_a2) { \
@ -2544,21 +2568,24 @@ u8 swi_hle_handle[256] =
} \ } \
/* Load the gp pointer, used by C code */ \ /* Load the gp pointer, used by C code */ \
mips_emit_lw(mips_reg_gp, reg_base, ReOff_GP_Save); \ mips_emit_lw(mips_reg_gp, reg_base, ReOff_GP_Save); \
}
// Restores the registers from their slot // Restores the registers from their slot
#define emit_restore_regs(restore_a2) \ #define emit_restore_regs(restore_a2) { \
int i; \
if (restore_a2) { \ if (restore_a2) { \
mips_emit_lw(reg_a2, reg_base, ReOff_SaveR2); \ mips_emit_lw(reg_a2, reg_base, ReOff_SaveR2); \
} \ } \
for (unsigned i = 0; i < 15; i++) { \ for (i = 0; i < 15; i++) { \
mips_emit_lw(arm_to_mips_reg[i], reg_base, 4 * i); \ mips_emit_lw(arm_to_mips_reg[i], reg_base, 4 * i); \
} \ } \
}
// Emits a function call for a read or a write (for special stuff like flash) // Emits a function call for a read or a write (for special stuff like flash)
#define emit_mem_call_ds(fnptr, mask) \ #define emit_mem_call_ds(fnptr, mask) \
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); \ mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); \
emit_save_regs(true); \ emit_save_regs(true); \
mips_emit_jal(((u32)(fnptr)) >> 2); \ genccall(fnptr); \
mips_emit_andi(reg_a0, reg_a0, (mask)); \ mips_emit_andi(reg_a0, reg_a0, (mask)); \
emit_restore_regs(true); \ emit_restore_regs(true); \
mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); \ mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); \
@ -2569,10 +2596,10 @@ u8 swi_hle_handle[256] =
mips_emit_nop(); mips_emit_nop();
// Pointer table to stubs, indexed by type and region // Pointer table to stubs, indexed by type and region
// Caution! This is not really a ptr table, but contains pre-encoed JALs
extern u32 tmemld[11][16]; extern u32 tmemld[11][16];
extern u32 tmemst[ 4][16]; extern u32 tmemst[ 4][16];
void mips_lookup_pc(); void mips_lookup_pc();
void smc_write();
cpu_alert_type write_io_register8 (u32 address, u32 value); cpu_alert_type write_io_register8 (u32 address, u32 value);
cpu_alert_type write_io_register16(u32 address, u32 value); cpu_alert_type write_io_register16(u32 address, u32 value);
cpu_alert_type write_io_register32(u32 address, u32 value); cpu_alert_type write_io_register32(u32 address, u32 value);
@ -2624,6 +2651,15 @@ static void emit_mem_access_loadop(
}; };
} }
#ifdef PIC
#define genccall(fn) \
mips_emit_lui(mips_reg_t9, ((u32)fn) >> 16); \
mips_emit_ori(mips_reg_t9, mips_reg_t9, ((u32)fn)); \
mips_emit_jalr(mips_reg_t9);
#else
#define genccall(fn) mips_emit_jal(((u32)fn) >> 2);
#endif
// Stub memory map: // Stub memory map:
// 0 .. 63 First patch handler [#0] // 0 .. 63 First patch handler [#0]
// 448 .. 511 Last patch handler [#7] // 448 .. 511 Last patch handler [#7]
@ -2721,7 +2757,7 @@ static void emit_pmemld_stub(
emit_save_regs(aligned); emit_save_regs(aligned);
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3);
mips_emit_ext(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff mips_emit_ext(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff
mips_emit_jal(((u32)&load_gamepak_page) >> 2); genccall(&load_gamepak_page);
mips_emit_sw(reg_temp, reg_base, ReOff_SaveR1); mips_emit_sw(reg_temp, reg_base, ReOff_SaveR1);
mips_emit_lw(reg_temp, reg_base, ReOff_SaveR1); mips_emit_lw(reg_temp, reg_base, ReOff_SaveR1);
@ -2891,6 +2927,21 @@ static void emit_pmemst_stub(
*tr_ptr = translation_ptr; *tr_ptr = translation_ptr;
} }
#ifdef USE_BGR_FORMAT
/* 0BGR to BGR565, for PSP */
#define palette_convert() \
mips_emit_sll(reg_temp, reg_a1, 1); \
mips_emit_andi(reg_temp, reg_temp, 0xFFC0); \
mips_emit_ins(reg_temp, reg_a1, 0, 5);
#else
/* 0BGR to RGB565 (clobbers a0!) */
#define palette_convert() \
mips_emit_ext(reg_temp, reg_a1, 10, 5); \
mips_emit_ins(reg_temp, reg_a1, 11, 5); \
mips_emit_ext(reg_a0, reg_a1, 5, 5); \
mips_emit_ins(reg_temp, reg_a0, 6, 5);
#endif
// Palette is accessed differently and stored in a decoded manner // Palette is accessed differently and stored in a decoded manner
static void emit_palette_hdl( static void emit_palette_hdl(
unsigned memop_number, const t_stub_meminfo *meminfo, unsigned memop_number, const t_stub_meminfo *meminfo,
@ -2923,17 +2974,13 @@ static void emit_palette_hdl(
mips_emit_sh(reg_a1, reg_base, 0x100); mips_emit_sh(reg_a1, reg_base, 0x100);
} }
mips_emit_sll(reg_temp, reg_a1, 1); palette_convert();
mips_emit_andi(reg_temp, reg_temp, 0xFFC0);
mips_emit_ins(reg_temp, reg_a1, 0, 5);
mips_emit_sh(reg_temp, reg_rv, 0x500); mips_emit_sh(reg_temp, reg_rv, 0x500);
if (size == 2) { if (size == 2) {
// Convert the second half-word also // Convert the second half-word also
mips_emit_srl(reg_a1, reg_a1, 16); mips_emit_srl(reg_a1, reg_a1, 16);
mips_emit_sll(reg_temp, reg_a1, 1); palette_convert();
mips_emit_andi(reg_temp, reg_temp, 0xFFC0);
mips_emit_ins(reg_temp, reg_a1, 0, 5);
mips_emit_sh(reg_temp, reg_rv, 0x502); mips_emit_sh(reg_temp, reg_rv, 0x502);
} }
generate_function_return_swap_delay(); generate_function_return_swap_delay();
@ -2980,6 +3027,7 @@ static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) {
// Stubs for regions with EEPROM or flash/SRAM // Stubs for regions with EEPROM or flash/SRAM
static void emit_saveaccess_stub(u8 **tr_ptr) { static void emit_saveaccess_stub(u8 **tr_ptr) {
unsigned opt, i, strop;
u8 *translation_ptr = *tr_ptr; u8 *translation_ptr = *tr_ptr;
const u8 opmap[6][2] = { {0, 1}, {1, 2}, {2, 4}, {4, 6}, {6, 10}, {10, 11} }; const u8 opmap[6][2] = { {0, 1}, {1, 2}, {2, 4}, {4, 6}, {6, 10}, {10, 11} };
@ -2995,9 +3043,9 @@ static void emit_saveaccess_stub(u8 **tr_ptr) {
emit_mem_call(&write_eeprom, 0x3FF); emit_mem_call(&write_eeprom, 0x3FF);
// Map loads to the read handler. // Map loads to the read handler.
for (unsigned opt = 0; opt < 6; opt++) { for (opt = 0; opt < 6; opt++) {
// Unalignment is not relevant here, so map them all to the same handler. // Unalignment is not relevant here, so map them all to the same handler.
for (unsigned i = opmap[opt][0]; i < opmap[opt][1]; i++) for (i = opmap[opt][0]; i < opmap[opt][1]; i++)
tmemld[i][13] = (u32)translation_ptr; tmemld[i][13] = (u32)translation_ptr;
// Emit just a check + patch jump // Emit just a check + patch jump
mips_emit_srl(reg_temp, reg_a0, 24); mips_emit_srl(reg_temp, reg_a0, 24);
@ -3007,7 +3055,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) {
mips_emit_b(beq, reg_zero, reg_zero, branch_offset(read_hndlr)); mips_emit_b(beq, reg_zero, reg_zero, branch_offset(read_hndlr));
} }
// This is for stores // This is for stores
for (unsigned strop = 0; strop <= 3; strop++) { for (strop = 0; strop <= 3; strop++) {
tmemst[strop][13] = (u32)translation_ptr; tmemst[strop][13] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 24); mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_rv, reg_temp, 0x0D); mips_emit_xori(reg_rv, reg_temp, 0x0D);
@ -3017,7 +3065,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) {
} }
// Flash/SRAM/Backup writes are only 8 byte supported // Flash/SRAM/Backup writes are only 8 byte supported
for (unsigned strop = 0; strop <= 3; strop++) { for (strop = 0; strop <= 3; strop++) {
tmemst[strop][14] = (u32)translation_ptr; tmemst[strop][14] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 24); mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_rv, reg_temp, 0x0E); mips_emit_xori(reg_rv, reg_temp, 0x0E);
@ -3038,7 +3086,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) {
(u32)&write_io_register8, (u32)&write_io_register16, (u32)&write_io_register8, (u32)&write_io_register16,
(u32)&write_io_register32, (u32)&write_io_register32 }; (u32)&write_io_register32, (u32)&write_io_register32 };
const u32 amsk[] = {0x3FF, 0x3FE, 0x3FC, 0x3FC}; const u32 amsk[] = {0x3FF, 0x3FE, 0x3FC, 0x3FC};
for (unsigned strop = 0; strop <= 3; strop++) { for (strop = 0; strop <= 3; strop++) {
tmemst[strop][4] = (u32)translation_ptr; tmemst[strop][4] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 24); mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_temp, reg_temp, 0x04); mips_emit_xori(reg_temp, reg_temp, 0x04);
@ -3047,7 +3095,7 @@ static void emit_saveaccess_stub(u8 **tr_ptr) {
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); // Store the return addr mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); // Store the return addr
emit_save_regs(strop == 3); emit_save_regs(strop == 3);
mips_emit_andi(reg_a0, reg_a0, amsk[strop]); mips_emit_andi(reg_a0, reg_a0, amsk[strop]);
mips_emit_jal(iowrtbl[strop] >> 2); genccall(iowrtbl[strop]);
if (strop < 3) { if (strop < 3) {
mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Save PC (delay) mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Save PC (delay)
@ -3069,6 +3117,7 @@ static void emit_openload_stub(
unsigned memopn, bool signext, unsigned size, unsigned memopn, bool signext, unsigned size,
unsigned alignment, bool aligned, u8 **tr_ptr unsigned alignment, bool aligned, u8 **tr_ptr
) { ) {
u8 *jmp1, *jmp2;
u8 *translation_ptr = *tr_ptr; u8 *translation_ptr = *tr_ptr;
// This affects regions 1 and 15 // This affects regions 1 and 15
@ -3105,30 +3154,31 @@ static void emit_openload_stub(
switch (size) { switch (size) {
case 0: case 0:
mips_emit_b(beq, reg_zero, reg_rv, 2); // Depends on CPU mode
mips_emit_andi(reg_a0, reg_a0, 0x3); // ARM: Isolate two LSB mips_emit_andi(reg_a0, reg_a0, 0x3); // ARM: Isolate two LSB
mips_emit_andi(reg_a0, reg_a0, 0x1); // Thb: Isolate one LSB mips_emit_andi(reg_temp, reg_a0, 0x1); // Thb: Isolate one LSB
mips_emit_jal(((u32)&read_memory8) >> 2); mips_emit_movn(reg_a0, reg_temp, reg_rv); // Pick thumb or ARM
genccall(&read_memory8);
mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay) mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay)
break; break;
case 1: case 1:
mips_emit_b(beq, reg_zero, reg_rv, 2);
mips_emit_andi(reg_a0, reg_a0, 0x2); // ARM: Isolate bit 1 mips_emit_andi(reg_a0, reg_a0, 0x2); // ARM: Isolate bit 1
mips_emit_andi(reg_a0, reg_a0, 0x0); // Thb: Ignore low bits at all mips_emit_movn(reg_a0, reg_zero, reg_rv); // Thumb: ignore all low bits
mips_emit_jal(((u32)&read_memory16) >> 2); genccall(&read_memory16);
mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay) mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay)
break; break;
default: default:
mips_emit_b(beq, reg_zero, reg_rv, 5); mips_emit_b_filler(beq, reg_zero, reg_rv, jmp1);
mips_emit_addu(reg_a0, reg_zero, reg_a1); // Move PC to arg0 mips_emit_addu(reg_a0, reg_zero, reg_a1); // Move PC to arg0
mips_emit_jal(((u32)&read_memory16) >> 2); genccall(&read_memory16);
mips_emit_nop(); mips_emit_nop();
mips_emit_b(beq, reg_zero, reg_zero, 3); mips_emit_b_filler(beq, reg_zero, reg_zero, jmp2);
mips_emit_ins(reg_rv, reg_rv, 16, 16); // res = res | (res << 16) [delay] mips_emit_ins(reg_rv, reg_rv, 16, 16); // res = res | (res << 16) [delay]
mips_emit_jal(((u32)&read_memory32) >> 2); generate_branch_patch_conditional(jmp1, translation_ptr);
genccall(&read_memory32);
mips_emit_nop(); mips_emit_nop();
generate_branch_patch_conditional(jmp2, translation_ptr);
break; break;
}; };
@ -3194,9 +3244,14 @@ static void emit_phand(
mips_emit_rotr(reg_temp, reg_temp, 6); // Swap opcode and immediate mips_emit_rotr(reg_temp, reg_temp, 6); // Swap opcode and immediate
mips_emit_sw(reg_temp, mips_reg_ra, -8); // Patch instruction! mips_emit_sw(reg_temp, mips_reg_ra, -8); // Patch instruction!
#ifdef PSP
mips_emit_cache(0x1A, mips_reg_ra, -8); mips_emit_cache(0x1A, mips_reg_ra, -8);
mips_emit_jr(reg_rv); // Jump directly to target for speed mips_emit_jr(reg_rv); // Jump directly to target for speed
mips_emit_cache(0x08, mips_reg_ra, -8); mips_emit_cache(0x08, mips_reg_ra, -8);
#else
mips_emit_jr(reg_rv);
mips_emit_synci(mips_reg_ra, -8);
#endif
// Round up handlers to 16 instructions for easy addressing :) // Round up handlers to 16 instructions for easy addressing :)
while (translation_ptr - *tr_ptr < 64) { while (translation_ptr - *tr_ptr < 64) {
@ -3212,6 +3267,7 @@ static void emit_phand(
// - mem stubs: There's stubs for load & store, and every memory region // - mem stubs: There's stubs for load & store, and every memory region
// and possible operand size and misaligment (+sign extensions) // and possible operand size and misaligment (+sign extensions)
void init_emitter() { void init_emitter() {
int i;
// Initialize memory to a debuggable state // Initialize memory to a debuggable state
memset(stub_arena, 0, sizeof(stub_arena)); // nop memset(stub_arena, 0, sizeof(stub_arena)); // nop
@ -3234,25 +3290,8 @@ void init_emitter() {
emit_phand(&translation_ptr, 2, 13 * 16, false); // st u32 emit_phand(&translation_ptr, 2, 13 * 16, false); // st u32
emit_phand(&translation_ptr, 2, 14 * 16, false); // st aligned 32 emit_phand(&translation_ptr, 2, 14 * 16, false); // st aligned 32
// Generate SMC write handler, with the lookup machinery // This is just a trampoline (for the SMC branches)
// Call out the flushing routine (save PC) mips_emit_j(((u32)&smc_write) >> 2);
emit_save_regs(false);
mips_emit_jal(((u32)&flush_translation_cache_ram) >> 2);
mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Delay slot
mips_emit_lw(reg_rv, reg_base, ReOff_CPSR); // Read CPSR
mips_emit_andi(reg_rv, reg_rv, 0x20); // Check T bit
mips_emit_b(beq, reg_rv, reg_zero, 3); // Skip to ARM mode
mips_emit_lw(reg_a0, reg_base, ReOff_RegPC); // arg0=pc
// Lookup thumb PC and execute
mips_emit_jal(((u32)&block_lookup_address_thumb) >> 2);
mips_emit_addiu(mips_reg_ra, mips_reg_ra, 8); // Skip 2 insts on return!
// Lookup arm PC and execute
mips_emit_jal(((u32)&block_lookup_address_arm) >> 2);
mips_emit_nop();
// Epiloge (restore and jump)
emit_restore_regs(false);
mips_emit_jr(reg_rv); // Go execute the code
mips_emit_nop(); mips_emit_nop();
// Generate the openload handlers (for accesses to unmapped mem) // Generate the openload handlers (for accesses to unmapped mem)
@ -3269,7 +3308,7 @@ void init_emitter() {
emit_openload_stub(10,false, 2, 0, true, &translation_ptr); // ld aligned 32 emit_openload_stub(10,false, 2, 0, true, &translation_ptr); // ld aligned 32
// Here we emit the ignore store area, just checks and does nothing // Here we emit the ignore store area, just checks and does nothing
for (unsigned i = 0; i < 4; i++) for (i = 0; i < 4; i++)
emit_ignorestore_stub(i, &translation_ptr); emit_ignorestore_stub(i, &translation_ptr);
// Here go the save game handlers // Here go the save game handlers
@ -3295,7 +3334,7 @@ void init_emitter() {
// 15 Open load / Ignore store // 15 Open load / Ignore store
}; };
for (unsigned i = 0; i < sizeof(ldinfo)/sizeof(ldinfo[0]); i++) { for (i = 0; i < sizeof(ldinfo)/sizeof(ldinfo[0]); i++) {
ldhldr_t handler = (ldhldr_t)ldinfo[i].emitter; ldhldr_t handler = (ldhldr_t)ldinfo[i].emitter;
/* region info signext sz al isaligned */ /* region info signext sz al isaligned */
handler(0, &ldinfo[i], false, 0, 0, false, &translation_ptr); // ld u8 handler(0, &ldinfo[i], false, 0, 0, false, &translation_ptr); // ld u8
@ -3325,7 +3364,7 @@ void init_emitter() {
// Store only for "regular"-ish mem regions // Store only for "regular"-ish mem regions
// //
for (unsigned i = 0; i < sizeof(stinfo)/sizeof(stinfo[0]); i++) { for (i = 0; i < sizeof(stinfo)/sizeof(stinfo[0]); i++) {
sthldr_t handler = (sthldr_t)stinfo[i].emitter; sthldr_t handler = (sthldr_t)stinfo[i].emitter;
handler(0, &stinfo[i], 0, false, &translation_ptr); // st u8 handler(0, &stinfo[i], 0, false, &translation_ptr); // st u8
handler(1, &stinfo[i], 1, false, &translation_ptr); // st u16 handler(1, &stinfo[i], 1, false, &translation_ptr); // st u16
@ -3334,6 +3373,11 @@ void init_emitter() {
} }
} }
u32 execute_arm_translate_internal(u32 cycles, void *regptr);
u32 function_cc execute_arm_translate(u32 cycles) {
return execute_arm_translate_internal(cycles, &reg[0]);
}
#endif #endif

View File

@ -33,13 +33,14 @@
.global execute_lsr_flags_reg .global execute_lsr_flags_reg
.global execute_asr_flags_reg .global execute_asr_flags_reg
.global execute_ror_flags_reg .global execute_ror_flags_reg
.global execute_arm_translate .global execute_arm_translate_internal
.global icache_region_sync .global icache_region_sync
.global reg_check .global reg_check
.global palette_ram .global palette_ram
.global palette_ram_converted .global palette_ram_converted
.global init_emitter .global init_emitter
.global mips_lookup_pc .global mips_lookup_pc
.global smc_write
.global write_io_epilogue .global write_io_epilogue
.global memory_map_read .global memory_map_read
@ -120,6 +121,7 @@
.equ REGMODE_BASE, (0x900 + 24) .equ REGMODE_BASE, (0x900 + 24)
.equ SUPERVISOR_SPSR, (3 * 4 + SPSR_BASE) .equ SUPERVISOR_SPSR, (3 * 4 + SPSR_BASE)
.equ SUPERVISOR_LR, ((3 * (7 * 4)) + (6 * 4) + REGMODE_BASE) .equ SUPERVISOR_LR, ((3 * (7 * 4)) + (6 * 4) + REGMODE_BASE)
.equ FNPTRS_BASE, (0x900 + 220 + 960)
.set noat .set noat
.set noreorder .set noreorder
@ -196,6 +198,22 @@
lw $30, REG_R14($16) lw $30, REG_R14($16)
.endm .endm
# PIC ABI mandates to jump to target via $t9
#ifdef PIC
.macro cfncall target, targetid
lw $t9, (FNPTRS_BASE + \targetid * 4)($16)
jalr $t9
nop
.endm
#else
.macro cfncall target, targetid
jal \target
nop
.endm
#endif
# Process a hardware event. Since an interrupt might be # Process a hardware event. Since an interrupt might be
# raised we have to check if the PC has changed. # raised we have to check if the PC has changed.
@ -213,8 +231,8 @@ mips_update_gba:
sw $ra, REG_SAVE2($16) # save return addr sw $ra, REG_SAVE2($16) # save return addr
collapse_flags # update cpsr collapse_flags # update cpsr
save_registers # save registers save_registers # save registers
jal update_gba # process the next event
sw $0, CHANGED_PC_STATUS($16) sw $0, CHANGED_PC_STATUS($16)
cfncall update_gba, 0 # process the next event
lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame
bne $1, $0, return_to_main # Return to main thread now bne $1, $0, return_to_main # Return to main thread now
@ -257,26 +275,24 @@ return_to_main:
mips_indirect_branch_arm: mips_indirect_branch_arm:
save_registers save_registers
jal block_lookup_address_arm # $2 = MIPS address to jump to cfncall block_lookup_address_arm, 1
nop
restore_registers restore_registers
jr $2 # jump to it jr $2 # $2 = value returned
nop nop
mips_indirect_branch_thumb: mips_indirect_branch_thumb:
save_registers save_registers
jal block_lookup_address_thumb # $2 = MIPS address to jump to cfncall block_lookup_address_thumb, 2
nop
restore_registers restore_registers
jr $2 # jump to it jr $2 # $2 = value returned
nop nop
mips_indirect_branch_dual: mips_indirect_branch_dual:
save_registers save_registers
jal block_lookup_address_dual # $2 = MIPS address to jump to cfncall block_lookup_address_dual, 3
nop nop
restore_registers restore_registers
jr $2 # jump to it jr $2 # $2 = value returned
nop nop
@ -293,8 +309,7 @@ write_io_epilogue:
alert_loop: alert_loop:
jal update_gba # process the next event cfncall update_gba, 0 # process the next event
nop
lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame lw $1, COMPLETED_FRAME($16) # Check whether we completed a frame
bne $1, $0, return_to_main # Return to main thread now bne $1, $0, return_to_main # Return to main thread now
@ -321,15 +336,14 @@ no_alert:
nop nop
smc_dma: smc_dma:
jal flush_translation_cache_ram # flush translation cache cfncall flush_translation_cache_ram, 4
nop
j lookup_pc j lookup_pc
nop nop
smc_write: smc_write:
save_registers save_registers
jal flush_translation_cache_ram # flush translation cache sw $6, REG_PC($16) # save PC
sw $6, REG_PC($16) # save PC (delay slot) cfncall flush_translation_cache_ram, 4
mips_lookup_pc: mips_lookup_pc:
lookup_pc: lookup_pc:
@ -339,17 +353,17 @@ lookup_pc:
nop nop
lookup_pc_thumb: lookup_pc_thumb:
jal block_lookup_address_thumb # get Thumb address lw $4, REG_PC($16) # load PC as arg 0
lw $4, REG_PC($16) # load PC as arg 0 (delay slot) cfncall block_lookup_address_thumb, 2 # get Thumb address
restore_registers restore_registers
jr $2 # jump to result jr $2 # jump to result
nop nop
lookup_pc_arm: lookup_pc_arm:
jal block_lookup_address_arm # get ARM address lw $4, REG_PC($16) # load PC as arg 0
lw $4, REG_PC($16) # load PC as arg 0 (delay slot) cfncall block_lookup_address_arm, 1 # get ARM address
restore_registers restore_registers
jr $2 # jump to result jr $2 # jump to result
nop nop
# Return the current cpsr # Return the current cpsr
@ -381,8 +395,8 @@ execute_swi:
ori $2, 0x13 # set mode to supervisor ori $2, 0x13 # set mode to supervisor
sw $2, REG_CPSR($16) # write back CPSR sw $2, REG_CPSR($16) # write back CPSR
save_registers save_registers
jal set_cpu_mode # set the CPU mode to supervisor li $4, 3 # 3 is supervisor mode
li $4, 3 # 3 is supervisor mode (delay slot) cfncall set_cpu_mode, 5 # set the CPU mode to supervisor
restore_registers restore_registers
lw $ra, ($sp) # pop $ra lw $ra, ($sp) # pop $ra
jr $ra # return jr $ra # return
@ -404,8 +418,7 @@ execute_spsr_restore:
addiu $sp, $sp, -4 addiu $sp, $sp, -4
sw $ra, ($sp) sw $ra, ($sp)
save_registers save_registers
jal execute_spsr_restore_body # do the dirty work in this C function cfncall execute_spsr_restore_body, 6 # do the dirty work in this C function
nop
restore_registers restore_registers
addu $4, $2, $0 # move return value to $4 addu $4, $2, $0 # move return value to $4
lw $ra, ($sp) lw $ra, ($sp)
@ -429,8 +442,8 @@ execute_store_cpsr:
extract_flags_body # extract flags from $1 extract_flags_body # extract flags from $1
sw $ra, REG_SAVE3($16) sw $ra, REG_SAVE3($16)
save_registers save_registers
jal execute_store_cpsr_body # do the dirty work in this C function addu $4, $1, $0 # load the new CPSR
addu $4, $1, $0 # load the new CPSR (delay slot) cfncall execute_store_cpsr_body, 7 # do the dirty work in this C function
bne $2, $0, changed_pc_cpsr # this could have changed the pc bne $2, $0, changed_pc_cpsr # this could have changed the pc
nop nop
@ -442,10 +455,10 @@ execute_store_cpsr:
nop nop
changed_pc_cpsr: changed_pc_cpsr:
jal block_lookup_address_arm # GBA address is in $4 addu $4, $2, $0 # load new address in $4
addu $4, $2, $0 # load new address in $4 (delay slot) cfncall block_lookup_address_arm, 1 # GBA address is in $4
restore_registers # restore registers restore_registers # restore registers
jr $2 # jump to the new address jr $2 # jump to the new address
nop nop
@ -549,8 +562,9 @@ ror_zero_shift:
rotrv $4, $4, $5 # return (value ror shift) delay rotrv $4, $4, $5 # return (value ror shift) delay
# $4: cycle counter argument # $4: cycle counter argument
# $5: pointer to reg
execute_arm_translate: execute_arm_translate_internal:
add $sp, $sp, -48 # Store the main thread context add $sp, $sp, -48 # Store the main thread context
sw $s0, 0($sp) sw $s0, 0($sp)
sw $s1, 4($sp) sw $s1, 4($sp)
@ -563,9 +577,7 @@ execute_arm_translate:
sw $fp, 32($sp) sw $fp, 32($sp)
sw $ra, 36($sp) sw $ra, 36($sp)
lui $16, %hi(reg) # load reg address into base reg move $16, $5
addiu $16, %lo(reg)
sw $28, GP_SAVE($16) sw $28, GP_SAVE($16)
addu $17, $4, $0 # load cycle counter register addu $17, $4, $0 # load cycle counter register
@ -582,15 +594,13 @@ execute_arm_translate:
bne $1, $0, 1f bne $1, $0, 1f
lw $4, REG_PC($16) # load PC into $4 (delay) lw $4, REG_PC($16) # load PC into $4 (delay)
jal block_lookup_address_arm # lookup initial jump address cfncall block_lookup_address_arm, 1
nop
restore_registers # load initial register values restore_registers # load initial register values
jr $2 # jump to return jr $2 # jump to return
nop nop
1: 1:
jal block_lookup_address_thumb # lookup initial jump address cfncall block_lookup_address_thumb, 2
nop
restore_registers # load initial register values restore_registers # load initial register values
jr $2 # jump to return jr $2 # jump to return
nop nop
@ -629,5 +639,13 @@ tmemld:
.space 704 .space 704
tmemst: tmemst:
.space 256 .space 256
fnptrs:
.long update_gba # 0
.long block_lookup_address_arm # 1
.long block_lookup_address_thumb # 2
.long block_lookup_address_dual # 3
.long flush_translation_cache_ram # 4
.long set_cpu_mode # 5
.long execute_spsr_restore_body # 6
.long execute_store_cpsr_body # 7