Rewrite of the MIPS dynarec stubs

This allows us to emit the handlers directly in a more efficient manner.
At the same time it allows for an easy fix to emit PIC code, which is
necessary for libretro. This also enables more platform specific
optimizations and variations, perhaps even run-time multiplatform
support.
This commit is contained in:
David Guillen Fandos 2021-03-03 01:38:09 +01:00
parent b9ac453675
commit 5ffd2832e8
8 changed files with 948 additions and 52 deletions

View File

@ -403,8 +403,8 @@ ifeq ($(DEBUG), 1)
OPTIMIZE_SAFE := -O0 -g
OPTIMIZE := -O0 -g
else
OPTIMIZE_SAFE := -O2 -DNDEBUG
OPTIMIZE := -O3 -DNDEBUG
OPTIMIZE_SAFE := -O2 -DNDEBUG -g
OPTIMIZE := -O3 -DNDEBUG -g
endif

23
cpu.h
View File

@ -122,21 +122,19 @@ s32 translate_block_thumb(u32 pc, translation_region_type translation_region,
u32 smc_enable);
#if defined(PSP)
#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4)
#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384)
#define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128)
#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024)
#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4)
#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384)
#define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128)
#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024)
#else
#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5)
#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2)
#define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128 * 2)
#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32)
#define ROM_TRANSLATION_CACHE_SIZE (1024 * 512 * 4 * 5)
#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384 * 2)
#define BIOS_TRANSLATION_CACHE_SIZE (1024 * 128 * 2)
#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 32)
#endif
#define STUB_ARENA_SIZE (4*1024)
#if defined(HAVE_MMAP)
extern u8* rom_translation_cache;
extern u8* ram_translation_cache;
@ -157,6 +155,7 @@ extern int sceBlock;
extern u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE];
extern u8 ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE];
extern u8 bios_translation_cache[BIOS_TRANSLATION_CACHE_SIZE];
extern u32 stub_arena[STUB_ARENA_SIZE];
#endif
extern u8 *rom_translation_ptr;
extern u8 *ram_translation_ptr;

View File

@ -62,6 +62,8 @@ __asm__(".section .jit,\"awx\",%progbits");
__asm__(".section .jit,\"awx\",%nobits");
#endif
u32 stub_arena[STUB_ARENA_SIZE]
__attribute__ ((aligned(4),section(".jit")));
u8 rom_translation_cache[ROM_TRANSLATION_CACHE_SIZE]
__attribute__ ((aligned(4),section(".jit")));
u8 *rom_translation_ptr = rom_translation_cache;
@ -3773,3 +3775,5 @@ void dump_translation_cache(void)
bios_translation_ptr - bios_translation_cache, fd);
fclose(fd);
}

View File

@ -427,7 +427,7 @@ u32 eeprom_address = 0;
s32 eeprom_counter = 0;
u8 eeprom_buffer[8];
void function_cc write_eeprom(u32 address, u32 value)
void function_cc write_eeprom(u32 unused_address, u32 value)
{
switch(eeprom_mode)
{
@ -749,6 +749,7 @@ static cpu_alert_type trigger_dma(u32 dma_number, u32 value)
cpu_alert_type function_cc write_io_register8(u32 address, u32 value)
{
value &= 0xff;
switch(address)
{
case 0x00:
@ -1165,6 +1166,7 @@ cpu_alert_type function_cc write_io_register8(u32 address, u32 value)
cpu_alert_type function_cc write_io_register16(u32 address, u32 value)
{
value &= 0xffff;
switch(address)
{
case 0x00:

View File

@ -163,6 +163,11 @@ u32 function_cc read_memory32(u32 address);
cpu_alert_type function_cc write_memory8(u32 address, u8 value);
cpu_alert_type function_cc write_memory16(u32 address, u16 value);
cpu_alert_type function_cc write_memory32(u32 address, u32 value);
u32 function_cc read_eeprom(void);
void function_cc write_eeprom(u32 address, u32 value);
u8 read_backup(u32 address);
void function_cc write_backup(u32 address, u32 value);
void function_cc write_rtc(u32 address, u32 value);
extern u8 *memory_regions[16];
extern u32 memory_limits[16];

1
main.c
View File

@ -117,6 +117,7 @@ void init_main(void)
flush_translation_cache_rom();
flush_translation_cache_ram();
flush_translation_cache_bios();
init_emitter();
#endif
}

View File

@ -20,6 +20,19 @@
#ifndef MIPS_EMIT_H
#define MIPS_EMIT_H
// Pointers to default handlers.
// Use IWRAM as default, assume aligned by default too
#define execute_load_u8 tmemld[0][3]
#define execute_load_s8 tmemld[1][3]
#define execute_load_u16 tmemld[2][3]
#define execute_load_s16 tmemld[4][3]
#define execute_load_u32 tmemld[6][3]
#define execute_aligned_load32 tmemld[10][3]
#define execute_store_u8 tmemst[0][3]
#define execute_store_u16 tmemst[1][3]
#define execute_store_u32 tmemst[2][3]
#define execute_aligned_store32 tmemst[3][3]
u32 mips_update_gba(u32 pc);
// Although these are defined as a function, don't call them as
@ -44,9 +57,6 @@ u32 execute_lsr_flags_reg(u32 value, u32 shift);
u32 execute_asr_flags_reg(u32 value, u32 shift);
u32 execute_ror_flags_reg(u32 value, u32 shift);
void execute_aligned_store32(u32 address, u32 value);
u32 execute_aligned_load32(u32 address);
void reg_check();
typedef enum
@ -97,6 +107,7 @@ typedef enum
mips_special_jalr = 0x09,
mips_special_movz = 0x0A,
mips_special_movn = 0x0B,
mips_special_sync = 0x0F,
mips_special_mfhi = 0x10,
mips_special_mthi = 0x11,
mips_special_mflo = 0x12,
@ -116,7 +127,9 @@ typedef enum
mips_special_xor = 0x26,
mips_special_nor = 0x27,
mips_special_slt = 0x2A,
mips_special_sltu = 0x2B
mips_special_sltu = 0x2B,
mips_special_max = 0x2C,
mips_special_min = 0x2D,
} mips_function_special;
typedef enum
@ -126,10 +139,18 @@ typedef enum
mips_special3_bshfl = 0x20
} mips_function_special3;
typedef enum
{
mips_bshfl_seb = 0x10,
mips_bshfl_seh = 0x18,
mips_bshfl_wsbh = 0x02,
} mips_function_bshfl;
typedef enum
{
mips_regimm_bltz = 0x00,
mips_regimm_bltzal = 0x10
mips_regimm_bltzal = 0x10,
mips_regimm_synci = 0x1F
} mips_function_regimm;
typedef enum
@ -163,8 +184,14 @@ typedef enum
mips_opcode_sb = 0x28,
mips_opcode_sh = 0x29,
mips_opcode_sw = 0x2B,
mips_opcode_cache = 0x2F,
} mips_opcode;
#define mips_emit_cache(operation, rs, immediate) \
*((u32 *)translation_ptr) = (mips_opcode_cache << 26) | \
(rs << 21) | (operation << 16) | (immediate & 0xFFFF); \
translation_ptr += 4 \
#define mips_emit_reg(opcode, rs, rt, rd, shift, function) \
*((u32 *)translation_ptr) = (mips_opcode_##opcode << 26) | \
(rs << 21) | (rt << 16) | (rd << 11) | (shift << 6) | function; \
@ -184,12 +211,12 @@ typedef enum
#define mips_emit_imm(opcode, rs, rt, immediate) \
*((u32 *)translation_ptr) = (mips_opcode_##opcode << 26) | \
(rs << 21) | (rt << 16) | (immediate & 0xFFFF); \
(rs << 21) | (rt << 16) | ((immediate) & 0xFFFF); \
translation_ptr += 4 \
#define mips_emit_regimm(function, rs, immediate) \
*((u32 *)translation_ptr) = (mips_opcode_regimm << 26) | \
(rs << 21) | (mips_regimm_##function << 16) | (immediate & 0xFFFF); \
(rs << 21) | (mips_regimm_##function << 16) | ((immediate) & 0xFFFF); \
translation_ptr += 4 \
#define mips_emit_jump(opcode, offset) \
@ -203,6 +230,12 @@ typedef enum
#define mips_absolute_offset(offset) \
((u32)offset / 4) \
#define mips_emit_max(rd, rs, rt) \
mips_emit_special(max, rs, rt, rd, 0) \
#define mips_emit_min(rd, rs, rt) \
mips_emit_special(min, rs, rt, rd, 0) \
#define mips_emit_addu(rd, rs, rt) \
mips_emit_special(addu, rs, rt, rd, 0) \
@ -293,6 +326,9 @@ typedef enum
#define mips_emit_movz(rd, rs, rt) \
mips_emit_special(movz, rs, rt, rd, 0) \
#define mips_emit_sync() \
mips_emit_special(sync, 0, 0, 0, 0) \
#define mips_emit_lb(rt, rs, offset) \
mips_emit_imm(lb, rs, rt, offset) \
@ -344,6 +380,12 @@ typedef enum
#define mips_emit_ins(rt, rs, pos, size) \
mips_emit_special3(ins, rs, rt, (pos + size - 1), pos) \
#define mips_emit_seb(rt, rd) \
mips_emit_special3(bshfl, 0, rt, rd, mips_bshfl_seb) \
#define mips_emit_seh(rt, rd) \
mips_emit_special3(bshfl, 0, rt, rd, mips_bshfl_seh) \
// Breaks down if the backpatch offset is greater than 16bits, take care
// when using (should be okay if limited to conditional instructions)
@ -369,9 +411,15 @@ typedef enum
#define mips_emit_jr(rs) \
mips_emit_special(jr, rs, 0, 0, 0) \
#define mips_emit_synci(rs, offset) \
mips_emit_regimm(synci, rs, offset) \
#define mips_emit_bltzal(rs, offset) \
mips_emit_regimm(bltzal, rs, offset) \
#define mips_emit_bltz(rs, offset) \
mips_emit_regimm(bltz, rs, offset) \
#define mips_emit_nop() \
mips_emit_sll(reg_zero, reg_zero, 0) \
@ -566,6 +614,15 @@ u32 arm_to_mips_reg[] =
translation_ptr += 4; \
} \
#define generate_function_return_swap_delay() \
{ \
u32 delay_instruction = address32(translation_ptr, -4); \
translation_ptr -= 4; \
mips_emit_jr(mips_reg_ra); \
address32(translation_ptr, 0) = delay_instruction; \
translation_ptr += 4; \
} \
#define generate_swap_delay() \
{ \
u32 delay_instruction = address32(translation_ptr, -8); \
@ -2468,4 +2525,815 @@ u8 swi_hle_handle[256] =
generate_load_pc(reg_a0, pc); \
mips_emit_sw(reg_a0, reg_base, (REG_PC * 4)) \
// Register save layout as follows:
#define ReOff_RegPC (15*4) // REG_PC
#define ReOff_CPSR (20*4) // REG_CPSR
#define ReOff_SaveR1 (21*4) // 3 save scratch regs
#define ReOff_SaveR2 (22*4)
#define ReOff_SaveR3 (23*4)
#define ReOff_GP_Save (32*4) // GP_SAVE
// Saves all regs to their right slot and loads gp
#define emit_save_regs(save_a2) \
for (unsigned i = 0; i < 15; i++) { \
mips_emit_sw(arm_to_mips_reg[i], reg_base, 4 * i); \
} \
if (save_a2) { \
mips_emit_sw(reg_a2, reg_base, ReOff_SaveR2); \
} \
/* Load the gp pointer, used by C code */ \
mips_emit_lw(mips_reg_gp, reg_base, ReOff_GP_Save); \
// Restores the registers from their slot
#define emit_restore_regs(restore_a2) \
if (restore_a2) { \
mips_emit_lw(reg_a2, reg_base, ReOff_SaveR2); \
} \
for (unsigned i = 0; i < 15; i++) { \
mips_emit_lw(arm_to_mips_reg[i], reg_base, 4 * i); \
} \
// Emits a function call for a read or a write (for special stuff like flash)
#define emit_mem_call_ds(fnptr, mask) \
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1); \
emit_save_regs(true); \
mips_emit_jal(((u32)(fnptr)) >> 2); \
mips_emit_andi(reg_a0, reg_a0, (mask)); \
emit_restore_regs(true); \
mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1); \
mips_emit_jr(mips_reg_ra);
#define emit_mem_call(fnptr, mask) \
emit_mem_call_ds(fnptr, mask) \
mips_emit_nop();
// Pointer table to stubs, indexed by type and region
// Caution! This is not really a ptr table, but contains pre-encoed JALs
extern u32 tmemld[11][16];
extern u32 tmemst[ 4][16];
void mips_lookup_pc();
cpu_alert_type write_io_register8 (u32 address, u32 value);
cpu_alert_type write_io_register16(u32 address, u32 value);
cpu_alert_type write_io_register32(u32 address, u32 value);
void write_io_epilogue();
// This is a pointer table to the open load stubs, used by the BIOS (optimization)
u32* openld_core_ptrs[11];
const u8 ldhldrtbl[11] = {0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5};
#define ld_phndlr_branch(memop) \
(((u32*)&stub_arena[ldhldrtbl[(memop)] * 16]) - ((u32*)translation_ptr + 1))
#define st_phndlr_branch(memop) \
(((u32*)&stub_arena[((memop) + 6) * 16]) - ((u32*)translation_ptr + 1))
#define branch_handlerid(phndlrid) \
(((u32*)&stub_arena[(phndlrid) * 16]) - ((u32*)translation_ptr + 1))
#define branch_offset(ptr) \
(((u32*)ptr) - ((u32*)translation_ptr + 1))
static void emit_mem_access_loadop(
u8 *translation_ptr,
u32 base_addr, unsigned size, unsigned alignment, bool signext)
{
switch (size) {
case 2:
mips_emit_lw(reg_rv, reg_rv, (base_addr & 0xffff));
break;
case 1:
if (signext) {
// Load 16 with sign extension is essentially a load byte
if (alignment) {
mips_emit_lb(reg_rv, reg_rv, (base_addr & 0xffff));
} else {
mips_emit_lh(reg_rv, reg_rv, (base_addr & 0xffff));
}
} else {
mips_emit_lhu(reg_rv, reg_rv, (base_addr & 0xffff));
}
break;
default:
if (signext) {
mips_emit_lb(reg_rv, reg_rv, (base_addr & 0xffff));
} else {
mips_emit_lbu(reg_rv, reg_rv, (base_addr & 0xffff));
}
break;
};
}
// Stub memory map:
// 0 .. 63 First patch handler [#0]
// 448 .. 511 Last patch handler [#7]
// 512+ smc_write handler
#define SMC_WRITE_OFF32 160
// Describes a "plain" memory are, that is, an area that is just accessed
// as normal memory (with some caveats tho).
typedef struct {
void *emitter;
unsigned region; // Region ID (top 8 bits)
unsigned memsize; // 0 byte, 1 halfword, 2 word
bool check_smc; // Whether the memory can contain code
bool bus16; // Whether it can only be accessed at 16bit
u32 baseptr; // Memory base address.
} t_stub_meminfo;
// Generates the stub to access memory for a given region, access type,
// size and misalignment.
// Handles "special" cases like weirdly mapped memory
static void emit_pmemld_stub(
unsigned memop_number, const t_stub_meminfo *meminfo,
bool signext, unsigned size,
unsigned alignment, bool aligned,
u8 **tr_ptr)
{
u8 *translation_ptr = *tr_ptr;
unsigned region = meminfo->region;
u32 base_addr = meminfo->baseptr;
if (region >= 9 && region <= 11) {
// Use the same handler for these regions (just replicas)
tmemld[memop_number][region] = tmemld[memop_number][8];
return;
}
// Clean up one or two bits (to align access). It might already be aligned!
u32 memmask = (meminfo->memsize - 1);
memmask = (memmask >> size) << size; // Clear 1 or 2 (or none) bits
// Add the stub to the table (add the JAL instruction encoded already)
tmemld[memop_number][region] = (u32)translation_ptr;
// Size: 0 (8 bits), 1 (16 bits), 2 (32 bits)
// First check we are in the right memory region
unsigned regionbits = 8;
unsigned regioncheck = region;
if (region == 8) {
// This is an optimization for ROM regions
// For region 8-11 we reuse the same code (and have a more generic check)
// Region 12 is harder to cover without changing the check (shift + xor)
regionbits = 6;
regioncheck >>= 2; // Ignore the two LSB, don't care
}
// Address checking: jumps to handler if bad region/alignment
mips_emit_srl(reg_temp, reg_a0, (32 - regionbits));
if (!aligned && size != 0) { // u8 or aligned u32 dont need to check alignment bits
mips_emit_ins(reg_temp, reg_a0, regionbits, size); // Add 1 or 2 bits of alignment
}
if (regioncheck || alignment) { // If region and alignment are zero, can skip
mips_emit_xori(reg_temp, reg_temp, regioncheck | (alignment << regionbits));
}
// The patcher to use depends on ld/st, access size, and sign extension
// (so there's 10 of them). They live in the top stub addresses.
mips_emit_b(bne, reg_zero, reg_temp, ld_phndlr_branch(memop_number));
// BIOS region requires extra checks for protected reads
if (region == 0) {
// BIOS is *not* mirrored, check that
mips_emit_srl(reg_rv, reg_a0, 14);
unsigned joff = (openld_core_ptrs[memop_number] - ((u32*)translation_ptr + 1));
mips_emit_b(bne, reg_zero, reg_rv, joff); // Jumps to read open
// Check whether the read is allowed. Only within BIOS!
// TODO: FIX THIS! This should be a protected read, not an open one!
mips_emit_srl(reg_temp, reg_a1, 14);
unsigned jof2 = (openld_core_ptrs[memop_number] - ((u32*)translation_ptr + 1));
mips_emit_b(bne, reg_zero, reg_temp, jof2);
}
if (region >= 8 && region <= 12) {
u8 *jmppatch;
// ROM area: might need to load the ROM on-demand
mips_emit_srl(reg_rv, reg_a0, 15); // 32KB page number
mips_emit_sll(reg_rv, reg_rv, 2); // (word indexed)
mips_emit_addu(reg_rv, reg_rv, reg_base); // base + offset
mips_emit_lw(reg_rv, reg_rv, 0x8000); // base[offset-0x8000]
mips_emit_b_filler(bne, reg_rv, reg_zero, jmppatch); // if not null, can skip load page
mips_emit_andi(reg_temp, reg_a0, memmask); // Get the lowest 15 bits [delay]
// This code call the C routine to map the relevant ROM page
emit_save_regs(aligned);
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3);
mips_emit_ext(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff
mips_emit_jal(((u32)&load_gamepak_page) >> 2);
mips_emit_sw(reg_temp, reg_base, ReOff_SaveR1);
mips_emit_lw(reg_temp, reg_base, ReOff_SaveR1);
emit_restore_regs(aligned);
mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR3);
generate_branch_patch_conditional(jmppatch, translation_ptr);
// Now we can proceed to load, place addr in the right register
mips_emit_addu(reg_rv, reg_rv, reg_temp);
} else if (region == 14) {
// Read from flash, is a bit special, fn call
emit_mem_call_ds(&read_backup, 0xFFFF);
if (!size && signext) {
mips_emit_seb(reg_rv, reg_rv);
} else if (size == 1 && alignment) {
mips_emit_seb(reg_rv, reg_rv);
} else if (size == 2) {
mips_emit_rotr(reg_rv, reg_rv, 8 * alignment);
} else {
mips_emit_nop();
}
*tr_ptr = translation_ptr;
return;
} else {
// Generate upper bits of the addr and do addr mirroring
// (The address hi16 is rounded up since load uses signed offset)
mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16));
if (region == 2) {
// EWRAM is a bit special
// Need to insert a zero in the addr (due to how it's mapped)
mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB)
mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18)
mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16
mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr
} else if (region == 6) {
// VRAM is mirrored every 128KB but the last 32KB is mapped to the previous
mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block)
if (!aligned && alignment != 0) {
mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size)
}
mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block
mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset
} else {
// Generate regular (<=32KB) mirroring
mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr
}
}
// Aligned accesses (or the weird s16u1 case) are just one inst
if (alignment == 0 || (size == 1 && signext)) {
emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext); // Delay slot
translation_ptr += 4;
}
else {
// Unaligned accesses (require rotation) need two insts
emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext);
translation_ptr += 4;
mips_emit_rotr(reg_rv, reg_rv, alignment * 8); // Delay slot
}
generate_function_return_swap_delay(); // Return. Move prev inst to delay slot
*tr_ptr = translation_ptr;
}
// Generates the stub to store memory for a given region and size
// Handles "special" cases like weirdly mapped memory
static void emit_pmemst_stub(
unsigned memop_number, const t_stub_meminfo *meminfo,
unsigned size, bool aligned, u8 **tr_ptr)
{
u8 *translation_ptr = *tr_ptr;
unsigned region = meminfo->region;
u32 base_addr = meminfo->baseptr;
// Palette, VRAM and OAM cannot be really byte accessed (use a 16 bit store)
bool doubleaccess = (size == 0 && meminfo->bus16);
unsigned realsize = size;
if (doubleaccess)
realsize = 1;
// Clean up one or two bits (to align access). It might already be aligned!
u32 memmask = (meminfo->memsize - 1);
memmask = (memmask >> realsize) << realsize;
// Add the stub to the table (add the JAL instruction encoded already)
tmemst[memop_number][region] = (u32)translation_ptr;
// First check we are in the right memory region (same as loads)
mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_temp, reg_temp, region);
mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(memop_number));
mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16));
if (doubleaccess) {
mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8)
}
if (region == 2) {
// EWRAM is a bit special
// Need to insert a zero in the addr (due to how it's mapped)
mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB)
mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18)
mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16
mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr
} else if (region == 6) {
// VRAM is mirrored every 128KB but the last 32KB is mapped to the previous
mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block)
if (!aligned && realsize != 0) {
mips_emit_ins(reg_a0, reg_zero, 0, realsize);// addr & ~1/2 (align to size)
}
mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block
mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset
} else {
// Generate regular (<=32KB) mirroring
mips_emit_andi(reg_a0, reg_a0, memmask); // Clear upper bits (mirroring)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to base addr
}
// Generate SMC write and tracking
// TODO: Should we have SMC checks here also for aligned?
if (meminfo->check_smc && !aligned) {
mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer
if (realsize == 2) {
mips_emit_lw(reg_temp, reg_temp, base_addr);
} else if (realsize == 1) {
mips_emit_lh(reg_temp, reg_temp, base_addr);
} else {
mips_emit_lb(reg_temp, reg_temp, base_addr);
}
// If the data is non zero, we just wrote over code
// Local-jump to the smc_write (which lives at offset:0)
unsigned instoffset = (&stub_arena[SMC_WRITE_OFF32] - (((u32*)translation_ptr) + 1));
mips_emit_b(bne, reg_zero, reg_temp, instoffset);
}
// Store the data (delay slot from the SMC branch)
if (realsize == 2) {
mips_emit_sw(reg_a1, reg_rv, base_addr);
} else if (realsize == 1) {
mips_emit_sh(reg_a1, reg_rv, base_addr);
} else {
mips_emit_sb(reg_a1, reg_rv, base_addr);
}
// Post processing store:
// Signal that OAM was updated
if (region == 7) {
u32 palcaddr = (u32)&oam_update;
mips_emit_lui(reg_temp, ((palcaddr + 0x8000) >> 16));
mips_emit_sw(reg_base, reg_temp, palcaddr & 0xffff); // Write any nonzero data
generate_function_return_swap_delay();
}
else {
mips_emit_jr(mips_reg_ra);
mips_emit_nop();
}
*tr_ptr = translation_ptr;
}
// Palette is accessed differently and stored in a decoded manner
static void emit_palette_hdl(
unsigned memop_number, const t_stub_meminfo *meminfo,
unsigned size, bool aligned, u8 **tr_ptr)
{
u8 *translation_ptr = *tr_ptr;
// Palette cannot be accessed at byte level
unsigned realsize = size ? size : 1;
u32 memmask = (meminfo->memsize - 1);
memmask = (memmask >> realsize) << realsize;
// Add the stub to the table (add the JAL instruction encoded already)
tmemst[memop_number][5] = (u32)translation_ptr;
// First check we are in the right memory region (same as loads)
mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_temp, reg_temp, 5);
mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(memop_number));
mips_emit_andi(reg_rv, reg_a0, memmask); // Clear upper bits (mirroring)
if (size == 0) {
mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8)
}
mips_emit_addu(reg_rv, reg_rv, reg_base);
// Store the data (delay slot from the SMC branch)
if (realsize == 2) {
mips_emit_sw(reg_a1, reg_base, 0x100);
} else if (realsize == 1) {
mips_emit_sh(reg_a1, reg_base, 0x100);
}
mips_emit_sll(reg_temp, reg_a1, 1);
mips_emit_andi(reg_temp, reg_temp, 0xFFC0);
mips_emit_ins(reg_temp, reg_a1, 0, 5);
mips_emit_sh(reg_temp, reg_rv, 0x500);
if (size == 2) {
// Convert the second half-word also
mips_emit_srl(reg_a1, reg_a1, 16);
mips_emit_sll(reg_temp, reg_a1, 1);
mips_emit_andi(reg_temp, reg_temp, 0xFFC0);
mips_emit_ins(reg_temp, reg_a1, 0, 5);
mips_emit_sh(reg_temp, reg_rv, 0x502);
}
generate_function_return_swap_delay();
*tr_ptr = translation_ptr;
}
// This emits stubs for regions where writes have no side-effects
static void emit_ignorestore_stub(unsigned size, u8 **tr_ptr) {
u8 *translation_ptr = *tr_ptr;
// Region 0-1 (BIOS and ignore)
tmemst[size][0] = tmemst[size][1] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 25); // Check 7 MSB to be zero
mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size));
mips_emit_nop();
mips_emit_jr(mips_reg_ra);
mips_emit_nop();
// Region 8-B
tmemst[size][ 8] = tmemst[size][ 9] =
tmemst[size][10] = tmemst[size][11] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 26); // Check 6 MSB to be 0x02
mips_emit_xori(reg_temp, reg_temp, 0x02);
mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size));
mips_emit_nop();
mips_emit_jr(mips_reg_ra);
mips_emit_nop();
// Region C or F (or bigger!)
tmemst[size][12] = tmemst[size][15] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_sltiu(reg_rv, reg_temp, 0x0F);
mips_emit_b(beq, reg_rv, reg_zero, 3); // If 15 or bigger, ignore store
mips_emit_xori(reg_rv, reg_temp, 0x0C);
mips_emit_b(bne, reg_temp, reg_zero, st_phndlr_branch(size));
mips_emit_nop();
mips_emit_jr(mips_reg_ra);
mips_emit_nop();
*tr_ptr = translation_ptr;
}
// Stubs for regions with EEPROM or flash/SRAM
static void emit_saveaccess_stub(u8 **tr_ptr) {
u8 *translation_ptr = *tr_ptr;
const u8 opmap[6][2] = { {0, 1}, {1, 2}, {2, 4}, {4, 6}, {6, 10}, {10, 11} };
// Writes to region 8 are directed to RTC (only 16 bit ones though)
tmemld[1][8] = (u32)translation_ptr;
emit_mem_call(&write_rtc, 0xFE);
// These are for region 0xD where EEPROM is mapped. Addr is ignored
// Value is limited to one bit (both reading and writing!)
u32 *read_hndlr = (u32*)translation_ptr;
emit_mem_call(&read_eeprom, 0x3FF);
u32 *write_hndlr = (u32*)translation_ptr;
emit_mem_call(&write_eeprom, 0x3FF);
// Map loads to the read handler.
for (unsigned opt = 0; opt < 6; opt++) {
// Unalignment is not relevant here, so map them all to the same handler.
for (unsigned i = opmap[opt][0]; i < opmap[opt][1]; i++)
tmemld[i][13] = (u32)translation_ptr;
// Emit just a check + patch jump
mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_rv, reg_temp, 0x0D);
mips_emit_b(bne, reg_rv, reg_zero, branch_handlerid(opt));
mips_emit_nop();
mips_emit_b(beq, reg_zero, reg_zero, branch_offset(read_hndlr));
}
// This is for stores
for (unsigned strop = 0; strop <= 3; strop++) {
tmemst[strop][13] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_rv, reg_temp, 0x0D);
mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(strop));
mips_emit_nop();
mips_emit_b(beq, reg_zero, reg_zero, branch_offset(write_hndlr));
}
// Flash/SRAM/Backup writes are only 8 byte supported
for (unsigned strop = 0; strop <= 3; strop++) {
tmemst[strop][14] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_rv, reg_temp, 0x0E);
mips_emit_b(bne, reg_rv, reg_zero, st_phndlr_branch(strop));
if (strop == 0) {
emit_mem_call(&write_backup, 0xFFFF);
} else {
mips_emit_nop();
mips_emit_jr(mips_reg_ra); // Does nothing in this case
mips_emit_nop();
}
}
// Region 4 writes
// I/O writes are also a bit special, they can trigger things like DMA, IRQs...
// Also: aligned (strop==3) accesses do not trigger IRQs
const u32 iowrtbl[] = {
(u32)&write_io_register8, (u32)&write_io_register16,
(u32)&write_io_register32, (u32)&write_io_register32 };
const u32 amsk[] = {0x3FF, 0x3FE, 0x3FC, 0x3FC};
for (unsigned strop = 0; strop <= 3; strop++) {
tmemst[strop][4] = (u32)translation_ptr;
mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_xori(reg_temp, reg_temp, 0x04);
mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(strop));
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3); // Store the return addr
emit_save_regs(strop == 3);
mips_emit_andi(reg_a0, reg_a0, amsk[strop]);
mips_emit_jal(iowrtbl[strop] >> 2);
if (strop < 3) {
mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Save PC (delay)
mips_emit_j(((u32)&write_io_epilogue) >> 2);
mips_emit_nop();
} else {
mips_emit_nop();
mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR3);
emit_restore_regs(true);
generate_function_return_swap_delay();
}
}
*tr_ptr = translation_ptr;
}
// Emits openload store memory region stub
static void emit_openload_stub(
unsigned memopn, bool signext, unsigned size,
unsigned alignment, bool aligned, u8 **tr_ptr
) {
u8 *translation_ptr = *tr_ptr;
// This affects regions 1 and 15
tmemld[memopn][ 1] = (u32)translation_ptr;
tmemld[memopn][15] = (u32)translation_ptr;
// We need to repatch if: alignment is different or
// if we are accessing a non-ignore region (1 and 15)
mips_emit_srl(reg_temp, reg_a0, 24);
mips_emit_sltiu(reg_rv, reg_temp, 0x0F);
mips_emit_addiu(reg_temp, reg_temp, -1);
mips_emit_sltu(reg_temp, reg_zero, reg_temp);
mips_emit_and(reg_temp, reg_temp, reg_rv);
if (!aligned && size != 0) {
// Also check and aggregate alignment
mips_emit_ext(reg_rv, reg_a0, 0, size);
mips_emit_xori(reg_rv, reg_rv, alignment);
mips_emit_or(reg_temp, reg_rv, reg_temp);
}
// Jump to patch handler
mips_emit_b(bne, reg_zero, reg_temp, ld_phndlr_branch(memopn));
// BIOS can jump here to do open loads
openld_core_ptrs[memopn] = (u32*)translation_ptr;
// Proceed with open load by reading data at PC (previous data in the bus)
mips_emit_lw(reg_rv, reg_base, ReOff_CPSR); // Read CPSR
mips_emit_andi(reg_rv, reg_rv, 0x20); // Check T bit
emit_save_regs(aligned);
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR1);
switch (size) {
case 0:
mips_emit_b(beq, reg_zero, reg_rv, 2); // Depends on CPU mode
mips_emit_andi(reg_a0, reg_a0, 0x3); // ARM: Isolate two LSB
mips_emit_andi(reg_a0, reg_a0, 0x1); // Thb: Isolate one LSB
mips_emit_jal(((u32)&read_memory8) >> 2);
mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay)
break;
case 1:
mips_emit_b(beq, reg_zero, reg_rv, 2);
mips_emit_andi(reg_a0, reg_a0, 0x2); // ARM: Isolate bit 1
mips_emit_andi(reg_a0, reg_a0, 0x0); // Thb: Ignore low bits at all
mips_emit_jal(((u32)&read_memory16) >> 2);
mips_emit_addu(reg_a0, reg_a0, reg_a1); // Add low bits to addr (delay)
break;
default:
mips_emit_b(beq, reg_zero, reg_rv, 5);
mips_emit_addu(reg_a0, reg_zero, reg_a1); // Move PC to arg0
mips_emit_jal(((u32)&read_memory16) >> 2);
mips_emit_nop();
mips_emit_b(beq, reg_zero, reg_zero, 3);
mips_emit_ins(reg_rv, reg_rv, 16, 16); // res = res | (res << 16) [delay]
mips_emit_jal(((u32)&read_memory32) >> 2);
mips_emit_nop();
break;
};
mips_emit_lw(mips_reg_ra, reg_base, ReOff_SaveR1);
emit_restore_regs(aligned);
// Same behaviour as reading from region14 really (8 bit bus)
if (!size && signext) {
mips_emit_seb(reg_rv, reg_rv);
} else if (size == 1 && alignment) {
mips_emit_seb(reg_rv, reg_rv);
} else if (size == 2) {
mips_emit_rotr(reg_rv, reg_rv, 8 * alignment);
}
generate_function_return_swap_delay();
*tr_ptr = translation_ptr;
}
typedef void (*sthldr_t)(
unsigned memop_number, const t_stub_meminfo *meminfo,
unsigned size, bool aligned, u8 **tr_ptr);
typedef void (*ldhldr_t)(
unsigned memop_number, const t_stub_meminfo *meminfo,
bool signext, unsigned size,
unsigned alignment, bool aligned,
u8 **tr_ptr);
// Generates a patch handler for a given access size
// It will detect the access alignment and memory region and load
// the corresponding handler from the table (at the right offset)
// and patch the jal instruction from where it was called.
static void emit_phand(
u8 **tr_ptr, unsigned size, unsigned toff,
bool check_alignment)
{
u8 *translation_ptr = *tr_ptr;
mips_emit_srl(reg_temp, reg_a0, 24);
#ifdef PSP
mips_emit_addiu(reg_rv, reg_zero, 15*4); // Table limit (max)
mips_emit_sll(reg_temp, reg_temp, 2); // Table is word indexed
mips_emit_min(reg_temp, reg_temp, reg_rv);// Do not overflow table
#else
mips_emit_sltiu(reg_rv, reg_temp, 0x0F); // Check for addr 0x1XXX.. 0xFXXX
mips_emit_b(bne, reg_zero, reg_rv, 2); // Skip two insts (well, cant skip ds)
mips_emit_sll(reg_temp, reg_temp, 2); // Table is word indexed
mips_emit_addiu(reg_temp, reg_zero, 15*4);// Simulate ld/st to 0x0FXXX (open/ignore)
#endif
// Stores or byte-accesses do not care about alignment
if (check_alignment) {
// Move alignment bits for the table lookup
mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7)
}
unsigned tbloff = 256 + 2048 + 220 + 4 * toff; // Skip regs and palettes
mips_emit_addu(reg_rv, reg_temp, reg_base); // Add to the base_reg the table offset
mips_emit_lw(reg_rv, reg_rv, tbloff); // Read addr from table
mips_emit_sll(reg_temp, reg_rv, 4); // 26 bit immediate to the MSB
mips_emit_ori(reg_temp, reg_temp, 0x3); // JAL opcode
mips_emit_rotr(reg_temp, reg_temp, 6); // Swap opcode and immediate
mips_emit_sw(reg_temp, mips_reg_ra, -8); // Patch instruction!
mips_emit_cache(0x1A, mips_reg_ra, -8);
mips_emit_jr(reg_rv); // Jump directly to target for speed
mips_emit_cache(0x08, mips_reg_ra, -8);
// Round up handlers to 16 instructions for easy addressing :)
while (translation_ptr - *tr_ptr < 64) {
mips_emit_nop();
}
*tr_ptr = translation_ptr;
}
// This function emits the following stubs:
// - smc_write: Jumps to C code to trigger a cache flush
// - memop patcher: Patches a memop whenever it accesses the wrong mem region
// - mem stubs: There's stubs for load & store, and every memory region
// and possible operand size and misaligment (+sign extensions)
void init_emitter() {
// Initialize memory to a debuggable state
memset(stub_arena, 0, sizeof(stub_arena)); // nop
// Generates the trampoline and helper stubs that we need
u8 *translation_ptr = (u8*)&stub_arena[0];
// Generate first the patch handlers
// We have 6+4 patchers, one per mem type (6 or 4)
// Calculate the offset into tmemld[10][XX];
emit_phand(&translation_ptr, 0, 0 * 16, false); // ld u8
emit_phand(&translation_ptr, 0, 1 * 16, false); // ld s8
emit_phand(&translation_ptr, 1, 2 * 16, true); // ld u16 + u16u1
emit_phand(&translation_ptr, 1, 4 * 16, true); // ld s16 + s16u1
emit_phand(&translation_ptr, 2, 6 * 16, true); // ld u32 (0/1/2/3u)
emit_phand(&translation_ptr, 2, 10 * 16, false); // ld aligned 32
// Store table is immediately after
emit_phand(&translation_ptr, 0, 11 * 16, false); // st u8
emit_phand(&translation_ptr, 1, 12 * 16, false); // st u16
emit_phand(&translation_ptr, 2, 13 * 16, false); // st u32
emit_phand(&translation_ptr, 2, 14 * 16, false); // st aligned 32
// Generate SMC write handler, with the lookup machinery
// Call out the flushing routine (save PC)
emit_save_regs(false);
mips_emit_jal(((u32)&flush_translation_cache_ram) >> 2);
mips_emit_sw(reg_a2, reg_base, ReOff_RegPC); // Delay slot
mips_emit_lw(reg_rv, reg_base, ReOff_CPSR); // Read CPSR
mips_emit_andi(reg_rv, reg_rv, 0x20); // Check T bit
mips_emit_b(beq, reg_rv, reg_zero, 3); // Skip to ARM mode
mips_emit_lw(reg_a0, reg_base, ReOff_RegPC); // arg0=pc
// Lookup thumb PC and execute
mips_emit_jal(((u32)&block_lookup_address_thumb) >> 2);
mips_emit_addiu(mips_reg_ra, mips_reg_ra, 8); // Skip 2 insts on return!
// Lookup arm PC and execute
mips_emit_jal(((u32)&block_lookup_address_arm) >> 2);
mips_emit_nop();
// Epiloge (restore and jump)
emit_restore_regs(false);
mips_emit_jr(reg_rv); // Go execute the code
mips_emit_nop();
// Generate the openload handlers (for accesses to unmapped mem)
emit_openload_stub(0, false, 0, 0, false, &translation_ptr); // ld u8
emit_openload_stub(1, true, 0, 0, false, &translation_ptr); // ld s8
emit_openload_stub(2, false, 1, 0, false, &translation_ptr); // ld u16
emit_openload_stub(3, false, 1, 1, false, &translation_ptr); // ld u16u1
emit_openload_stub(4, true, 1, 0, false, &translation_ptr); // ld s16
emit_openload_stub(5, true, 1, 1, false, &translation_ptr); // ld s16u1
emit_openload_stub(6, false, 2, 0, false, &translation_ptr); // ld u32
emit_openload_stub(7, false, 2, 1, false, &translation_ptr); // ld u32u1
emit_openload_stub(8, false, 2, 2, false, &translation_ptr); // ld u32u2
emit_openload_stub(9, false, 2, 3, false, &translation_ptr); // ld u32u3
emit_openload_stub(10,false, 2, 0, true, &translation_ptr); // ld aligned 32
// Here we emit the ignore store area, just checks and does nothing
for (unsigned i = 0; i < 4; i++)
emit_ignorestore_stub(i, &translation_ptr);
// Here go the save game handlers
emit_saveaccess_stub(&translation_ptr);
// Generate memory handlers
const t_stub_meminfo ldinfo [] = {
{ emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom },
// 1 Open load / Ignore store
{ emit_pmemld_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] },
{ emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above
{ emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers },
{ emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram },
{ emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case
{ emit_pmemld_stub, 7, 0x400, false, true, (u32)oam_ram },
{ emit_pmemld_stub, 8, 0x8000, false, false, 0 },
{ emit_pmemld_stub, 9, 0x8000, false, false, 0 },
{ emit_pmemld_stub, 10, 0x8000, false, false, 0 },
{ emit_pmemld_stub, 11, 0x8000, false, false, 0 },
{ emit_pmemld_stub, 12, 0x8000, false, false, 0 },
// 13 is EEPROM mapped already (a bit special)
{ emit_pmemld_stub, 14, 0, false, false, 0 }, // Mapped via function call
// 15 Open load / Ignore store
};
for (unsigned i = 0; i < sizeof(ldinfo)/sizeof(ldinfo[0]); i++) {
ldhldr_t handler = (ldhldr_t)ldinfo[i].emitter;
/* region info signext sz al isaligned */
handler(0, &ldinfo[i], false, 0, 0, false, &translation_ptr); // ld u8
handler(1, &ldinfo[i], true, 0, 0, false, &translation_ptr); // ld s8
handler(2, &ldinfo[i], false, 1, 0, false, &translation_ptr); // ld u16
handler(3, &ldinfo[i], false, 1, 1, false, &translation_ptr); // ld u16u1
handler(4, &ldinfo[i], true, 1, 0, false, &translation_ptr); // ld s16
handler(5, &ldinfo[i], true, 1, 1, false, &translation_ptr); // ld s16u1
handler(6, &ldinfo[i], false, 2, 0, false, &translation_ptr); // ld u32
handler(7, &ldinfo[i], false, 2, 1, false, &translation_ptr); // ld u32u1
handler(8, &ldinfo[i], false, 2, 2, false, &translation_ptr); // ld u32u2
handler(9, &ldinfo[i], false, 2, 3, false, &translation_ptr); // ld u32u3
handler(10,&ldinfo[i], false, 2, 0, true, &translation_ptr); // aligned ld u32
}
const t_stub_meminfo stinfo [] = {
{ emit_pmemst_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] },
{ emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above
// I/O is special and mapped with a function call
{ emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram },
{ emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case
{ emit_pmemst_stub, 7, 0x400, false, true, (u32)oam_ram },
};
// Store only for "regular"-ish mem regions
//
for (unsigned i = 0; i < sizeof(stinfo)/sizeof(stinfo[0]); i++) {
sthldr_t handler = (sthldr_t)stinfo[i].emitter;
handler(0, &stinfo[i], 0, false, &translation_ptr); // st u8
handler(1, &stinfo[i], 1, false, &translation_ptr); // st u16
handler(2, &stinfo[i], 2, false, &translation_ptr); // st u32
handler(3, &stinfo[i], 2, true, &translation_ptr); // st aligned 32
}
}
#endif

View File

@ -23,16 +23,16 @@
.global mips_indirect_branch_arm
.global mips_indirect_branch_thumb
.global mips_indirect_branch_dual
.global execute_load_u8
.global execute_load_u16
.global execute_load_u32
.global execute_load_s8
.global execute_load_s16
.global execute_store_u8
.global execute_store_u16
.global execute_store_u32
.global execute_aligned_load32
.global execute_aligned_store32
#.global execute_load_u8
#.global execute_load_u16
#.global execute_load_u32
#.global execute_load_s8
#.global execute_load_s16
#.global execute_store_u8
#.global execute_store_u16
#.global execute_store_u32
#.global execute_aligned_load32
#.global execute_aligned_store32
.global execute_read_cpsr
.global execute_read_spsr
.global execute_swi
@ -48,9 +48,15 @@
.global reg_check
.global palette_ram
.global palette_ram_converted
.global init_emitter
.global mips_lookup_pc
.global write_io_epilogue
.global memory_map_read
.global memory_map_write
.global tmemld
.global tmemst
.global tmemst
.global reg
.global spsr
.global reg_mode
@ -105,7 +111,6 @@
.equ REG_R12, (12 * 4)
.equ REG_R13, (13 * 4)
.equ REG_R14, (14 * 4)
.equ REG_LR, (14 * 4)
.equ REG_PC, (15 * 4)
.equ REG_N_FLAG, (16 * 4)
.equ REG_Z_FLAG, (17 * 4)
@ -1004,7 +1009,7 @@ execute_load_ewram_u8:
# Put the generic address over the handler you want to be default
# IWRAM is typically the most frequently read and written to.
execute_load_u8:
# execute_load_u8:
execute_load_iwram_u8:
translate_region 3, patch_load_u8, (iwram + 0x8000), 0x7FFF
load_u8 (iwram + 0x8000)
@ -1107,7 +1112,7 @@ execute_load_ewram_s8:
translate_region_ewram patch_load_s8
load_s8 (ewram + 0x8000)
execute_load_s8:
#execute_load_s8:
execute_load_iwram_s8:
translate_region 3, patch_load_s8, (iwram + 0x8000), 0x7FFF
load_s8 (iwram + 0x8000)
@ -1209,7 +1214,7 @@ execute_load_ewram_u16:
translate_region_ewram_load_align 1, 0, patch_load_u16
load_u16 (ewram + 0x8000)
execute_load_u16:
#execute_load_u16:
execute_load_iwram_u16:
translate_region_align 3, 1, 0, patch_load_u16, (iwram + 0x8000), 0x7FFF
load_u16 (iwram + 0x8000)
@ -1408,7 +1413,7 @@ execute_load_ewram_s16:
translate_region_ewram_load_align 1, 0, patch_load_s16
load_s16 (ewram + 0x8000)
execute_load_s16:
#execute_load_s16:
execute_load_iwram_s16:
translate_region_align 3, 1, 0, patch_load_s16, (iwram + 0x8000), 0x7FFF
load_s16 (iwram + 0x8000)
@ -1607,7 +1612,7 @@ execute_load_ewram_u32:
translate_region_ewram_load_align 2, 0, patch_load_u32
load_u32 (ewram + 0x8000)
execute_load_u32:
#execute_load_u32:
execute_load_iwram_u32:
translate_region_align 3, 2, 0, patch_load_u32, (iwram + 0x8000), 0x7FFF
load_u32 (iwram + 0x8000)
@ -1993,7 +1998,7 @@ execute_load_ewram_u32a:
translate_region_ewram patch_load_u32a
load_u32 (ewram + 0x8000)
execute_aligned_load32:
#execute_aligned_load32:
execute_load_iwram_u32a:
translate_region 3, patch_load_u32a, (iwram + 0x8000), 0x7FFF
load_u32 (iwram + 0x8000)
@ -2078,7 +2083,7 @@ execute_store_ewram_u8:
translate_region_ewram patch_store_u8
store_u8_smc (ewram + 0x8000)
execute_store_u8:
#execute_store_u8:
execute_store_iwram_u8:
translate_region 3, patch_store_u8, (iwram + 0x8000), 0x7FFF
store_u8_smc (iwram + 0x8000)
@ -2175,7 +2180,7 @@ execute_store_ewram_u16:
translate_region_ewram_store_align16 patch_store_u16
store_u16_smc (ewram + 0x8000)
execute_store_u16:
#execute_store_u16:
execute_store_iwram_u16:
translate_region 3, patch_store_u16, (iwram + 0x8000), 0x7FFE
store_u16_smc (iwram + 0x8000)
@ -2274,7 +2279,7 @@ execute_store_ewram_u32:
translate_region_ewram_store_align32 patch_store_u32
store_u32_smc (ewram + 0x8000)
execute_store_u32:
#execute_store_u32:
execute_store_iwram_u32:
translate_region 3, patch_store_u32, (iwram + 0x8000), 0x7FFC
store_u32_smc (iwram + 0x8000)
@ -2380,7 +2385,7 @@ execute_store_ewram_u32a:
translate_region_ewram_store_align32 patch_store_u32a
store_u32 (ewram + 0x8000)
execute_aligned_store32:
#execute_aligned_store32:
execute_store_iwram_u32a:
translate_region 3, patch_store_u32a, (iwram + 0x8000), 0x7FFC
store_u32 (iwram + 0x8000)
@ -2529,6 +2534,7 @@ smc_write:
jal flush_translation_cache_ram # flush translation cache
sw $6, REG_PC($16) # save PC (delay slot)
mips_lookup_pc:
lookup_pc:
lw $2, REG_CPSR($16) # $2 = cpsr
andi $2, $2, 0x20 # isolate mode bit
@ -2624,8 +2630,7 @@ execute_store_cpsr:
and $2, $2, $4 # $2 = (cpsr & (~store_mask))
or $1, $1, $2 # $1 = new cpsr combined with old
extract_flags_body # extract flags from $1
addiu $sp, $sp, -4
sw $ra, ($sp)
sw $ra, REG_SAVE3($16)
save_registers
jal execute_store_cpsr_body # do the dirty work in this C function
addu $4, $1, $0 # load the new CPSR (delay slot)
@ -2635,16 +2640,16 @@ execute_store_cpsr:
restore_registers
lw $ra, ($sp)
lw $ra, REG_SAVE3($16)
jr $ra
addiu $sp, $sp, 4
nop
changed_pc_cpsr:
jal block_lookup_address_arm # GBA address is in $4
addu $4, $2, $0 # load new address in $4 (delay slot)
restore_registers # restore registers
jr $2 # jump to the new address
addiu $sp, $sp, 4 # get rid of the old ra (delay slot)
nop
# $4: new spsr
@ -2797,11 +2802,14 @@ execute_arm_translate:
.data
.align 6
memory_map_write:
.space 0x8000
memory_map_read:
.space 0x8000
# This must be between memory_map_read and memory_map_write because it's used
# to calculate their addresses elsewhere in this file.
# memory_map_read is immediately before arm_reg on purpose (offset used
# to access it, via lw op). We do not use write though.
reg:
.space 0x100
@ -2815,5 +2823,14 @@ spsr:
reg_mode:
.space 196 # u32[7][7];
memory_map_write:
.space 0x8000
# Here we store:
# void *tmemld[11][16]; # 10 types of loads
# void *tmemst[ 4][16]; # 3 types of stores
# Essentially a list of pointers to the different mem load handlers
# Keep them close for a fast patcher.
tmemld:
.space 704
tmemst:
.space 256