Simplify lookup/translate logic to make it simpler.

This commit is contained in:
David Guillen Fandos 2022-01-05 16:32:42 +01:00
parent cb9696cb98
commit b6ddec8fa0
2 changed files with 141 additions and 246 deletions

12
cpu.h
View file

@ -97,12 +97,6 @@ typedef enum
REG_MAX = 64 REG_MAX = 64
} ext_reg_numbers; } ext_reg_numbers;
typedef enum
{
TRANSLATION_REGION_RAM,
TRANSLATION_REGION_ROM,
} translation_region_type;
extern u32 instruction_count; extern u32 instruction_count;
void execute_arm(u32 cycles); void execute_arm(u32 cycles);
@ -126,10 +120,8 @@ bool cpu_read_savestate(const u8 *src);
u8 function_cc *block_lookup_address_arm(u32 pc); u8 function_cc *block_lookup_address_arm(u32 pc);
u8 function_cc *block_lookup_address_thumb(u32 pc); u8 function_cc *block_lookup_address_thumb(u32 pc);
u8 function_cc *block_lookup_address_dual(u32 pc); u8 function_cc *block_lookup_address_dual(u32 pc);
s32 translate_block_arm(u32 pc, translation_region_type translation_region, bool translate_block_arm(u32 pc, bool ram_region);
u32 smc_enable); bool translate_block_thumb(u32 pc, bool ram_region);
s32 translate_block_thumb(u32 pc, translation_region_type translation_region,
u32 smc_enable);
#if defined(MMAP_JIT_CACHE) #if defined(MMAP_JIT_CACHE)
extern u8* rom_translation_cache; extern u8* rom_translation_cache;

View file

@ -1958,8 +1958,7 @@ void translate_icache_sync() {
u32 rdreg = (hiop & 7); \ u32 rdreg = (hiop & 7); \
u32 aoff = (pc & ~2) + (imm*4) + 4; \ u32 aoff = (pc & ~2) + (imm*4) + 4; \
/* ROM + same page -> optimize as const load */ \ /* ROM + same page -> optimize as const load */ \
if (translation_region == TRANSLATION_REGION_ROM && \ if (!ram_region && (((aoff + 4) >> 15) == (pc >> 15))) { \
(((aoff + 4) >> 15) == (pc >> 15))) { \
u32 value = address32(pc_address_block, (aoff & 0x7FFF)); \ u32 value = address32(pc_address_block, (aoff & 0x7FFF)); \
thumb_load_pc_pool_const(rdreg, value); \ thumb_load_pc_pool_const(rdreg, value); \
} else { \ } else { \
@ -2546,94 +2545,48 @@ inline static ramtag_type* get_ram_tag(u16 tagval) {
u32 thumb = 1; \ u32 thumb = 1; \
pc &= ~0x01 \ pc &= ~0x01 \
#define ram_translation_region TRANSLATION_REGION_RAM
#define rom_translation_region TRANSLATION_REGION_ROM
#define block_lookup_translate_arm(mem_type, smc_enable) \ #define block_lookup_translate_builder(type) \
translation_result = translate_block_arm(pc, mem_type##_translation_region, \ u8 function_cc *block_lookup_translate_##type(u32 pc) \
smc_enable) \
#define block_lookup_translate_thumb(mem_type, smc_enable) \
translation_result = translate_block_thumb(pc, \
mem_type##_translation_region, smc_enable) \
#define block_lookup_translate_ram(inst_type) \
{ \ { \
u8 pcregion = (pc >> 24); \
u16 *location; \
u32 block_tag; \
\
block_lookup_address_pc_##type(); \
\
switch(pcregion) \
{ \
case 0x2: \
case 0x3: \
{ \
u16* tagp = (pcregion == 2) ? (u16 *)(ewram + (pc & 0x3FFFF) + 0x40000) \
: (u16 *)(iwram + (pc & 0x7FFF)); \
ramtag_type* trentry; \ ramtag_type* trentry; \
/* Allocate a tag if not a valid one, and initialize header */ \ /* Allocate a tag if not a valid one, and initialize header */ \
if (!VALID_TAG(*location)) { \ if (!VALID_TAG(*tagp)) { \
allocate_tag_##inst_type(location); \ allocate_tag_##type(tagp); \
trentry = get_ram_tag(*location); \ trentry = get_ram_tag(*tagp); \
trentry->offset_arm = 0; \ trentry->offset_arm = 0; \
trentry->offset_thumb = 0; \ trentry->offset_thumb = 0; \
} else { \ } else { \
trentry = get_ram_tag(*location); \ trentry = get_ram_tag(*tagp); \
} \ } \
\ \
if (trentry->offset_##inst_type == 0) \ if (!trentry->offset_##type) { \
{ \ bool result; \
__label__ redo; \ u8 *blkptr = ram_translation_ptr + block_prologue_size; \
s32 translation_result; \ trentry->offset_##type = blkptr - ram_translation_cache; \
result = translate_block_##type(pc, true); \
\ \
redo: \ if (result) \
\ return blkptr; \
translation_recursion_level++; \ } else { \
block_address = ram_translation_ptr + block_prologue_size; \ return &ram_translation_cache[trentry->offset_##type]; \
trentry->offset_##inst_type = block_address - ram_translation_cache; \ } \
\
translation_result = translate_block_##inst_type( \
pc, ram_translation_region, 1); \
\
translation_recursion_level--; \
\
/* If the translation failed then pass that failure on if we're in \
a recursive level, or try again if we've hit the bottom. */ \
if(translation_result == -1) \
{ \
if(translation_recursion_level) \
return NULL; \ return NULL; \
\
goto redo; \
} \ } \
\ \
if(translation_recursion_level == 0) \
translate_icache_sync(); \
} \
else \
{ \
block_address = &ram_translation_cache[trentry->offset_##inst_type]; \
} \
}
u32 translation_recursion_level = 0;
u32 translation_flush_count = 0;
#define block_lookup_address_builder(type) \
u8 function_cc *block_lookup_address_##type(u32 pc) \
{ \
u16 *location; \
u32 block_tag; \
u8 *block_address; \
\
/* Starting at the beginning, we allow for one translation cache flush. */ \
if(translation_recursion_level == 0){ \
translation_flush_count = 0; \
} \
block_lookup_address_pc_##type(); \
\
switch(pc >> 24) \
{ \
case 0x2: \
location = (u16 *)(ewram + (pc & 0x3FFFF) + 0x40000); \
block_lookup_translate_ram(type); \
break; \
\
case 0x3: \
location = (u16 *)(iwram + (pc & 0x7FFF)); \
block_lookup_translate_ram(type); \
break; \
\
case 0x0: \ case 0x0: \
case 0x8 ... 0xD: \ case 0x8 ... 0xD: \
{ \ { \
@ -2648,69 +2601,40 @@ u8 function_cc *block_lookup_address_##type(u32 pc) \
{ \ { \
bhdr = (hashhdr_type*)&rom_translation_cache[blk_offset]; \ bhdr = (hashhdr_type*)&rom_translation_cache[blk_offset]; \
if(bhdr->pc_value == key) \ if(bhdr->pc_value == key) \
{ \ return &rom_translation_cache[ \
block_address = &rom_translation_cache[blk_offset + \ blk_offset + sizeof(hashhdr_type) + block_prologue_size]; \
sizeof(hashhdr_type) + block_prologue_size]; \ \
break; \
} \
blk_offset = bhdr->next_entry; \ blk_offset = bhdr->next_entry; \
blk_offset_addr = &bhdr->next_entry; \ blk_offset_addr = &bhdr->next_entry; \
} \ } \
if(!blk_offset) \
{ \
__label__ redo; \
s32 result; \
\ \
redo: \ { /* Not found, go ahead and translate, and backfill the hash table */ \
\ u8 *blkptr; \
translation_recursion_level++; \ bool result; \
bhdr = (hashhdr_type*)rom_translation_ptr; \ bhdr = (hashhdr_type*)rom_translation_ptr; \
bhdr->pc_value = key; \ bhdr->pc_value = key; \
bhdr->next_entry = 0; \ bhdr->next_entry = 0; \
*blk_offset_addr = (u32)(rom_translation_ptr - rom_translation_cache);\ *blk_offset_addr = (u32)(rom_translation_ptr - rom_translation_cache);\
rom_translation_ptr += sizeof(hashhdr_type); \ rom_translation_ptr += sizeof(hashhdr_type); \
block_address = rom_translation_ptr + block_prologue_size; \ blkptr = rom_translation_ptr + block_prologue_size; \
result = translate_block_##type(pc, rom_translation_region, 0); \ result = translate_block_##type(pc, false); \
translation_recursion_level--; \
\ \
/* If the translation failed then pass that failure on if we're in \ if (result) \
a recursive level, or try again if we've hit the bottom. */ \ return blkptr; \
if(result == -1) \ } \
{ \
if(translation_recursion_level) \
return NULL; \ return NULL; \
\ } \
goto redo; \
} \ } \
\ \
if(translation_recursion_level == 0) \ /* Do not return NULL since it could indeed happen that some branch \
translate_icache_sync(); \ points to some random place (perhaps due to being garbage). This can \
} \ happen when especulatively compiling code in RAM. Perhaps the game \
break; \ patches these instructions later, which would trigger a flush */ \
} \ return (u8*)(~0); \
\
default: \
/* If we're at the bottom, it means we're actually trying to jump to an \
address that we can't handle. Otherwise, it means that code scanned \
has reached an address that can't be handled, which means that we \
have most likely hit an area that doesn't contain code yet (for \
instance, in RAM). If such a thing happens, return -1 and the \
block translater will naively link it (it'll be okay, since it \
should never be hit) */ \
if(translation_recursion_level == 0) \
{ \
printf("bad jump %x (%x)\n", pc, reg[REG_PC]); \
fflush(stdout); \
} \
block_address = (u8 *)(-1); \
break; \
} \
\
return block_address; \
} \ } \
block_lookup_address_builder(arm); block_lookup_translate_builder(arm);
block_lookup_address_builder(thumb); block_lookup_translate_builder(thumb);
u8 function_cc *block_lookup_address_dual(u32 pc) u8 function_cc *block_lookup_address_dual(u32 pc)
{ {
@ -2726,6 +2650,36 @@ u8 function_cc *block_lookup_address_dual(u32 pc)
} }
} }
u8 function_cc *block_lookup_address_arm(u32 pc)
{
for (unsigned i = 0; i < 4; i++) {
u8 *ret = block_lookup_translate_arm(pc);
if (ret) {
translate_icache_sync();
return ret;
}
}
printf("bad jump %x (%x)\n", pc, reg[REG_PC]);
fflush(stdout);
return NULL;
}
u8 function_cc *block_lookup_address_thumb(u32 pc)
{
for (unsigned i = 0; i < 4; i++) {
u8 *ret = block_lookup_translate_thumb(pc);
if (ret) {
translate_icache_sync();
return ret;
}
}
printf("bad jump %x (%x)\n", pc, reg[REG_PC]);
fflush(stdout);
return NULL;
}
// Potential exit point: If the rd field is pc for instructions is 0x0F, // Potential exit point: If the rd field is pc for instructions is 0x0F,
// the instruction is b/bl/bx, or the instruction is ldm with PC in the // the instruction is b/bl/bx, or the instruction is ldm with PC in the
// register list. // register list.
@ -2815,12 +2769,6 @@ u8 function_cc *block_lookup_address_dual(u32 pc)
break; \ break; \
} \ } \
#define arm_link_block() \
if(branch_target == 0x00000008) \
translation_target = bios_swi_entrypoint; \
else \
translation_target = block_lookup_address_arm(branch_target); \
#define arm_instruction_width 4 #define arm_instruction_width 4
#define arm_base_cycles() \ #define arm_base_cycles() \
@ -2890,13 +2838,6 @@ u8 function_cc *block_lookup_address_dual(u32 pc)
#define thumb_set_condition(_condition) \ #define thumb_set_condition(_condition) \
#define thumb_link_block() \
/* Speed hack to make SWI calls direct jumps */ \
if(branch_target == 0x00000008) \
translation_target = bios_swi_entrypoint; \
else \
translation_target = block_lookup_address_thumb(branch_target); \
#define thumb_instruction_width 2 #define thumb_instruction_width 2
#define thumb_base_cycles() \ #define thumb_base_cycles() \
@ -3056,7 +2997,7 @@ block_exit_type block_exits[MAX_EXITS];
pc &= ~0x01 \ pc &= ~0x01 \
#define update_pc_limits() \ #define update_pc_limits() \
if (translation_region == TRANSLATION_REGION_RAM) { \ if (ram_region) { \
if (pc >= 0x3000000) { \ if (pc >= 0x3000000) { \
iwram_code_min = MIN(pc & 0x7FFF, iwram_code_min); \ iwram_code_min = MIN(pc & 0x7FFF, iwram_code_min); \
iwram_code_max = MAX(pc & 0x7FFF, iwram_code_max); \ iwram_code_max = MAX(pc & 0x7FFF, iwram_code_max); \
@ -3066,8 +3007,7 @@ if (translation_region == TRANSLATION_REGION_RAM) { \
} \ } \
} \ } \
s32 translate_block_arm(u32 pc, translation_region_type bool translate_block_arm(u32 pc, bool ram_region)
translation_region, u32 smc_enable)
{ {
u32 opcode = 0; u32 opcode = 0;
u32 last_opcode; u32 last_opcode;
@ -3096,21 +3036,16 @@ s32 translate_block_arm(u32 pc, translation_region_type
if(!pc_address_block) if(!pc_address_block)
pc_address_block = load_gamepak_page(pc_region & 0x3FF); pc_address_block = load_gamepak_page(pc_region & 0x3FF);
switch(translation_region) if (ram_region) {
{
case TRANSLATION_REGION_RAM:
translation_ptr = ram_translation_ptr; translation_ptr = ram_translation_ptr;
translation_cache_limit = &ram_translation_cache[ translation_cache_limit = &ram_translation_cache[
RAM_TRANSLATION_CACHE_SIZE - TRANSLATION_CACHE_LIMIT_THRESHOLD RAM_TRANSLATION_CACHE_SIZE - TRANSLATION_CACHE_LIMIT_THRESHOLD
- (0x10000 - ram_block_tag) / 2 * sizeof(ramtag_type)]; - (0x10000 - ram_block_tag) / 2 * sizeof(ramtag_type)];
break; } else {
case TRANSLATION_REGION_ROM:
translation_ptr = rom_translation_ptr; translation_ptr = rom_translation_ptr;
translation_cache_limit = translation_cache_limit =
rom_translation_cache + ROM_TRANSLATION_CACHE_SIZE - rom_translation_cache + ROM_TRANSLATION_CACHE_SIZE -
TRANSLATION_CACHE_LIMIT_THRESHOLD; TRANSLATION_CACHE_LIMIT_THRESHOLD;
break;
} }
generate_block_prologue(); generate_block_prologue();
@ -3119,7 +3054,7 @@ s32 translate_block_arm(u32 pc, translation_region_type
of the data processing functions can access it), and its expansion was of the data processing functions can access it), and its expansion was
massacreing the compiler. */ massacreing the compiler. */
if(smc_enable) if(ram_region)
{ {
scan_block(arm, yes); scan_block(arm, yes);
} }
@ -3166,21 +3101,12 @@ s32 translate_block_arm(u32 pc, translation_region_type
a simple recursive call here won't work, it has to pedal out to a simple recursive call here won't work, it has to pedal out to
the beginning. */ the beginning. */
if(translation_ptr > translation_cache_limit) if(translation_ptr > translation_cache_limit) {
{ if (ram_region)
translation_flush_count++;
switch(translation_region)
{
case TRANSLATION_REGION_RAM:
flush_translation_cache_ram(); flush_translation_cache_ram();
break; else
case TRANSLATION_REGION_ROM:
flush_translation_cache_rom(); flush_translation_cache_rom();
break; return false;
}
return -1;
} }
/* If the next instruction is a block entry point update the /* If the next instruction is a block entry point update the
@ -3226,32 +3152,27 @@ s32 translate_block_arm(u32 pc, translation_region_type
} }
} }
switch(translation_region) if (ram_region)
{
case TRANSLATION_REGION_RAM:
ram_translation_ptr = translation_ptr; ram_translation_ptr = translation_ptr;
break; else
case TRANSLATION_REGION_ROM:
rom_translation_ptr = translation_ptr; rom_translation_ptr = translation_ptr;
break;
}
for(i = 0; i < external_block_exit_position; i++) for(i = 0; i < external_block_exit_position; i++)
{ {
branch_target = external_block_exits[i].branch_target; branch_target = external_block_exits[i].branch_target;
arm_link_block(); if(branch_target == 0x00000008)
if(!translation_target){ translation_target = bios_swi_entrypoint;
return -1; else
} translation_target = block_lookup_translate_arm(branch_target);
if (!translation_target)
return false;
generate_branch_patch_unconditional( generate_branch_patch_unconditional(
external_block_exits[i].branch_source, translation_target); external_block_exits[i].branch_source, translation_target);
} }
return 0; return true;
} }
s32 translate_block_thumb(u32 pc, translation_region_type bool translate_block_thumb(u32 pc, bool ram_region)
translation_region, u32 smc_enable)
{ {
u32 opcode = 0; u32 opcode = 0;
u32 last_opcode; u32 last_opcode;
@ -3279,21 +3200,15 @@ s32 translate_block_thumb(u32 pc, translation_region_type
if(!pc_address_block) if(!pc_address_block)
pc_address_block = load_gamepak_page(pc_region & 0x3FF); pc_address_block = load_gamepak_page(pc_region & 0x3FF);
switch(translation_region) if (ram_region) {
{
case TRANSLATION_REGION_RAM:
translation_ptr = ram_translation_ptr; translation_ptr = ram_translation_ptr;
translation_cache_limit = &ram_translation_cache[ translation_cache_limit = &ram_translation_cache[
RAM_TRANSLATION_CACHE_SIZE - TRANSLATION_CACHE_LIMIT_THRESHOLD RAM_TRANSLATION_CACHE_SIZE - TRANSLATION_CACHE_LIMIT_THRESHOLD
- (0x10000 - ram_block_tag) / 2 * sizeof(ramtag_type)]; - (0x10000 - ram_block_tag) / 2 * sizeof(ramtag_type)];
break; } else {
case TRANSLATION_REGION_ROM:
translation_ptr = rom_translation_ptr; translation_ptr = rom_translation_ptr;
translation_cache_limit = translation_cache_limit = &rom_translation_cache[
rom_translation_cache + ROM_TRANSLATION_CACHE_SIZE - ROM_TRANSLATION_CACHE_SIZE - TRANSLATION_CACHE_LIMIT_THRESHOLD];
TRANSLATION_CACHE_LIMIT_THRESHOLD;
break;
} }
generate_block_prologue(); generate_block_prologue();
@ -3302,7 +3217,7 @@ s32 translate_block_thumb(u32 pc, translation_region_type
of the data processing functions can access it), and its expansion was of the data processing functions can access it), and its expansion was
massacreing the compiler. */ massacreing the compiler. */
if(smc_enable) if(ram_region)
{ {
scan_block(thumb, yes); scan_block(thumb, yes);
} }
@ -3349,19 +3264,11 @@ s32 translate_block_thumb(u32 pc, translation_region_type
if(translation_ptr > translation_cache_limit) if(translation_ptr > translation_cache_limit)
{ {
translation_flush_count++; if (ram_region)
switch(translation_region)
{
case TRANSLATION_REGION_RAM:
flush_translation_cache_ram(); flush_translation_cache_ram();
break; else
case TRANSLATION_REGION_ROM:
flush_translation_cache_rom(); flush_translation_cache_rom();
break; return false;
}
return -1;
} }
/* If the next instruction is a block entry point update the /* If the next instruction is a block entry point update the
@ -3402,28 +3309,24 @@ s32 translate_block_thumb(u32 pc, translation_region_type
} }
} }
switch(translation_region) if (ram_region)
{
case TRANSLATION_REGION_RAM:
ram_translation_ptr = translation_ptr; ram_translation_ptr = translation_ptr;
break; else
case TRANSLATION_REGION_ROM:
rom_translation_ptr = translation_ptr; rom_translation_ptr = translation_ptr;
break;
}
for(i = 0; i < external_block_exit_position; i++) for(i = 0; i < external_block_exit_position; i++)
{ {
branch_target = external_block_exits[i].branch_target; branch_target = external_block_exits[i].branch_target;
thumb_link_block(); if(branch_target == 0x00000008)
if(!translation_target){ translation_target = bios_swi_entrypoint;
return -1; else
} translation_target = block_lookup_translate_thumb(branch_target);
if (!translation_target)
return false;
generate_branch_patch_unconditional( generate_branch_patch_unconditional(
external_block_exits[i].branch_source, translation_target); external_block_exits[i].branch_source, translation_target);
} }
return 0; return true;
} }
void init_bios_hooks(void) void init_bios_hooks(void)