Implement dual mode (arm/thumb) for RAM positions

(This is similar to 908fb8 but for memory regions)

Removes the weird offset encoding in favour of a metadata structure,
similar to what there was before. However this structure overlaps with
the cache ram itself and grows like a stack. This is to avoid wating
memory since most games only use a few blocks.
Simplify the "dual" block lookup routines too, since they are only an
extrypoint to the other two real modes.
This commit is contained in:
David Guillen Fandos 2022-01-04 18:59:07 +01:00
parent 8036ad5b50
commit f597836abc
2 changed files with 82 additions and 75 deletions

View File

@ -2490,21 +2490,46 @@ void translate_icache_sync() {
// 0x0000 : this is just data (never translated)
// 0x00XX : not used (since first byte is zero)
// 0x0101 : this is code that is not the start of a translated block
// 0x0105 : first possible tag (we do not use 103 due to arm asm immediates)
// 0xXXXX : this is the start of a translated block
// 0xXXXX : this is the start of a translated block, starting from 0xFFFF downwards
// LSB is always set (we decrement by two) to ensure both bytes != 0
//
// The tag value is a pointer to the ram cache where the translated block starts
// The tag value must have both bytes set to non-zero, therefore the LSB is
// always 1 and starts from 0x0101 value.
// The 15 value is an index to 16 byte blocks in the RAM CACHE, therefore this
// cache is limited to ~ 2^15 * 16byte ~= 512KB
// The tag value is an index to a `ramtag_type` structure that sits at the end
// of the RAM CACHE (grows like a stack). For simplicity we start tags at 0xFFFF
// and grow like a stack.
#define LAST_TAG_NUM 0x0101
#define INITIAL_TOP_TAG 0xFFFF
#define CODE_TAG_BLOCK16 0x0101
#define CODE_TAG_BLOCK32 0x01010101
#define INITIAL_TOP_TAG 0x0105
#define TAG2ADDR(tag) (&ram_translation_cache[((((tag) - INITIAL_TOP_TAG) >> 1) << 4)])
#define ADDR2TAG(addr) ((((((u8*)addr) - &ram_translation_cache[0]) >> 4) << 1) + INITIAL_TOP_TAG)
#define VALIDTAG(tag) (tag >= INITIAL_TOP_TAG)
#define VALID_TAG(tagn) (tagn > LAST_TAG_NUM)
#define allocate_tag_arm(location) { \
location[0] = ram_block_tag; \
/* Could be another thumb inst */ \
if (!location[1]) \
location[1] = CODE_TAG_BLOCK16; \
ram_block_tag -= 2; \
}
#define allocate_tag_thumb(location) { \
location[0] = ram_block_tag; \
ram_block_tag -= 2; \
}
typedef struct
{
u32 offset_arm; // Cache offset to the ARM-mode compiled block
u32 offset_thumb; // Cache offset to the Thumb-mode compiled block
} ramtag_type;
static u32 ram_block_tag = INITIAL_TOP_TAG;
inline static ramtag_type* get_ram_tag(u16 tagval) {
ramtag_type *tbl = (ramtag_type*)&ram_translation_cache[RAM_TRANSLATION_CACHE_SIZE];
s16 tgidx = (s16)(tagval);
return &tbl[tgidx >> 1]; /* Since LSB is always 1 and thus unused */
}
// This function will return a pointer to a translated block of code. If it
// doesn't exist it will translate it, if it does it will pass it back.
@ -2521,20 +2546,6 @@ void translate_icache_sync() {
u32 thumb = 1; \
pc &= ~0x01 \
#define block_lookup_address_pc_dual() \
u32 thumb = pc & 0x01; \
\
if(thumb) \
{ \
pc--; \
reg[REG_CPSR] |= 0x20; \
} \
else \
{ \
pc = (pc + 2) & ~0x03; \
reg[REG_CPSR] &= ~0x20; \
} \
#define ram_translation_region TRANSLATION_REGION_RAM
#define rom_translation_region TRANSLATION_REGION_ROM
@ -2546,51 +2557,33 @@ void translate_icache_sync() {
translation_result = translate_block_thumb(pc, \
mem_type##_translation_region, smc_enable) \
#define block_lookup_translate_dual(mem_type, smc_enable) \
if(thumb) \
{ \
translation_result = translate_block_thumb(pc, \
mem_type##_translation_region, smc_enable); \
#define block_lookup_translate_ram(inst_type) \
{ \
ramtag_type* trentry; \
/* Allocate a tag if not a valid one, and initialize header */ \
if (!VALID_TAG(*location)) { \
allocate_tag_##inst_type(location); \
trentry = get_ram_tag(*location); \
trentry->offset_arm = 0; \
trentry->offset_thumb = 0; \
} else { \
trentry = get_ram_tag(*location); \
} \
else \
{ \
translation_result = translate_block_arm(pc, \
mem_type##_translation_region, smc_enable); \
} \
#define fill_tag_arm(tag) \
location[0] = (tag); \
location[1] = CODE_TAG_BLOCK16
#define fill_tag_thumb(tag) \
*location = (tag)
#define fill_tag_dual(tag) \
if(thumb) \
fill_tag_thumb(tag); \
else { \
fill_tag_arm(tag); \
} \
#define block_lookup_translate_ram(instruction_type) \
block_tag = *location; \
if(!VALIDTAG(block_tag)) \
\
if (trentry->offset_##inst_type == 0) \
{ \
__label__ redo; \
s32 translation_result; \
\
redo: \
\
/* Pad the start of the block to 16 bytes, see "memory tagging" above */ \
while (((uintptr_t)(&ram_translation_ptr[block_prologue_size] \
- ram_translation_cache)) % 16) \
ram_translation_ptr++; \
\
translation_recursion_level++; \
block_address = ram_translation_ptr + block_prologue_size; \
fill_tag_##instruction_type(ADDR2TAG(block_address)); \
trentry->offset_##inst_type = block_address - ram_translation_cache; \
\
translation_result = translate_block_##inst_type( \
pc, ram_translation_region, 1); \
\
block_lookup_translate_##instruction_type(ram, 1); \
translation_recursion_level--; \
\
/* If the translation failed then pass that failure on if we're in \
@ -2608,8 +2601,9 @@ void translate_icache_sync() {
} \
else \
{ \
block_address = (u8*)TAG2ADDR(block_tag); \
block_address = &ram_translation_cache[trentry->offset_##inst_type]; \
} \
}
u32 translation_recursion_level = 0;
u32 translation_flush_count = 0;
@ -2665,7 +2659,7 @@ u8 function_cc *block_lookup_address_##type(u32 pc) \
if(!blk_offset) \
{ \
__label__ redo; \
s32 translation_result; \
s32 result; \
\
redo: \
\
@ -2676,12 +2670,12 @@ u8 function_cc *block_lookup_address_##type(u32 pc) \
*blk_offset_addr = (u32)(rom_translation_ptr - rom_translation_cache);\
rom_translation_ptr += sizeof(hashhdr_type); \
block_address = rom_translation_ptr + block_prologue_size; \
block_lookup_translate_##type(rom, 0); \
result = translate_block_##type(pc, rom_translation_region, 0); \
translation_recursion_level--; \
\
/* If the translation failed then pass that failure on if we're in \
a recursive level, or try again if we've hit the bottom. */ \
if(translation_result == -1) \
if(result == -1) \
{ \
if(translation_recursion_level) \
return NULL; \
@ -2717,7 +2711,20 @@ u8 function_cc *block_lookup_address_##type(u32 pc) \
block_lookup_address_builder(arm);
block_lookup_address_builder(thumb);
block_lookup_address_builder(dual);
u8 function_cc *block_lookup_address_dual(u32 pc)
{
u32 thumb = pc & 0x01;
if(thumb) {
pc &= ~1;
reg[REG_CPSR] |= 0x20;
return block_lookup_address_thumb(pc);
} else {
pc = (pc + 2) & ~0x03;
reg[REG_CPSR] &= ~0x20;
return block_lookup_address_arm(pc);
}
}
// Potential exit point: If the rd field is pc for instructions is 0x0F,
// the instruction is b/bl/bx, or the instruction is ldm with PC in the
@ -3093,9 +3100,9 @@ s32 translate_block_arm(u32 pc, translation_region_type
{
case TRANSLATION_REGION_RAM:
translation_ptr = ram_translation_ptr;
translation_cache_limit =
ram_translation_cache + RAM_TRANSLATION_CACHE_SIZE -
TRANSLATION_CACHE_LIMIT_THRESHOLD;
translation_cache_limit = &ram_translation_cache[
RAM_TRANSLATION_CACHE_SIZE - TRANSLATION_CACHE_LIMIT_THRESHOLD
- (0x10000 - ram_block_tag) / 2 * sizeof(ramtag_type)];
break;
case TRANSLATION_REGION_ROM:
@ -3276,9 +3283,9 @@ s32 translate_block_thumb(u32 pc, translation_region_type
{
case TRANSLATION_REGION_RAM:
translation_ptr = ram_translation_ptr;
translation_cache_limit =
ram_translation_cache + RAM_TRANSLATION_CACHE_SIZE -
TRANSLATION_CACHE_LIMIT_THRESHOLD;
translation_cache_limit = &ram_translation_cache[
RAM_TRANSLATION_CACHE_SIZE - TRANSLATION_CACHE_LIMIT_THRESHOLD
- (0x10000 - ram_block_tag) / 2 * sizeof(ramtag_type)];
break;
case TRANSLATION_REGION_ROM:
@ -3463,6 +3470,7 @@ void flush_translation_cache_ram(void)
iwram_code_max = 0U;
ewram_code_min = ~0U;
ewram_code_max = 0U;
ram_block_tag = INITIAL_TOP_TAG;
}
void flush_translation_cache_rom(void)

View File

@ -11,15 +11,14 @@
#if defined(SMALL_TRANSLATION_CACHE)
#define ROM_TRANSLATION_CACHE_SIZE (1024 * 1024 * 2)
#define RAM_TRANSLATION_CACHE_SIZE (1024 * 384)
#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 2)
#else
#define ROM_TRANSLATION_CACHE_SIZE (1024 * 1024 * 10)
#define RAM_TRANSLATION_CACHE_SIZE (1024 * 512)
#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 8)
#endif
/* Please note that RAM_TRANSLATION_CACHE_SIZE is limited to 512KB
Check cpu_threaded.c for "memory tagging" for more info. */
/* Should be an upperbound to the maximum number of bytes a single JIT'ed
instruction can take. STM/LDM are tipically the biggest ones */
#define TRANSLATION_CACHE_LIMIT_THRESHOLD (1024 * 2)
/* Hash table size for ROM trans cache lookups */
#define ROM_BRANCH_HASH_BITS 16