Make ewram memory lineal

This saves a few cycles in MIPS and simplifies a bit the core.
Removed the write map, only affects interpreter performance very
minimally. Rewired ARM and x86 handlers to support direct access to
I/EWRAM (and VRAM on ARM) to compensate. Overall performance is slightly
better but code is cleaner and allows for further improvements in the
dynarecs.
This commit is contained in:
David Guillen Fandos 2021-03-23 19:05:35 +01:00
parent 7e27010a3c
commit 11ec213c99
11 changed files with 271 additions and 368 deletions

View File

@ -4,7 +4,6 @@
.globl invalidate_cache_region
.globl memory_map_read
.globl memory_map_write
.globl reg
.globl palette_ram
.globl palette_ram_converted
@ -533,31 +532,6 @@ return_to_main:
bx lr
@ Write out to memory.
@ Input:
@ r0: address
@ r1: value
@ r2: current pc
#define execute_store_body(store_type, store_op) ;\
save_flags() ;\
str lr, [reg_base, #REG_SAVE3] /* save lr */;\
tst r0, #0xF0000000 /* make sure address is in range */;\
bne ext_store_u##store_type /* if not do ext store */;\
;\
ldr r2, =memory_map_write /* r2 = memory_map_write */;\
mov lr, r0, lsr #15 /* lr = page index of address */;\
ldr r2, [r2, lr, lsl #2] /* r2 = memory page */;\
;\
cmp r2, #0 /* see if map is ext */;\
beq ext_store_u##store_type /* if so do ext store */;\
;\
mov r0, r0, lsl #17 /* isolate bottom 15 bits in top */;\
mov r0, r0, lsr #17 /* like performing and 0x7FFF */;\
store_op r1, [r2, r0] /* store result */;\
#define store_align_8() ;\
and r1, r1, #0xff ;\
@ -568,6 +542,75 @@ return_to_main:
#define store_align_32() ;\
bic r0, r0, #0x03 ;\
#define mask_addr_8(nbits) ;\
mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\
mov r0, r0, lsr #(32 - nbits) /* high bits are now clear */;\
#define mask_addr_16(nbits) ;\
mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\
mov r0, r0, lsr #(32 - nbits + 1) /* high bits are now clear */;\
mov r0, r0, lsl #1 /* LSB is also zero */;\
#define mask_addr_32(nbits) ;\
mov r0, r0, lsl #(32 - nbits) /* isolate bottom n bits in top */;\
mov r0, r0, lsr #(32 - nbits + 2) /* high bits are now clear */;\
mov r0, r0, lsl #2 /* 2 LSB are also zero */;\
@ Vram, OAM and palette memories can only be accessed at a 16 bit boundary
#define mask_addr_bus16_32(nbits) mask_addr_32(nbits)
#define mask_addr_bus16_16(nbits) mask_addr_16(nbits)
#define mask_addr_bus16_8(nbits) \
mask_addr_16(nbits) \
extract_u16(r1, r1)
@ Write out to memory.
@ Input:
@ r0: address
@ r1: value
@ r2: current pc
@
@ The instruction at LR is not an inst but a u32 data that contains the PC
@ Used for SMC. That's why return is essentially `pc = lr + 4`
#define execute_store_body(store_type, store_op) ;\
save_flags() ;\
str lr, [reg_base, #REG_SAVE3] /* save lr */;\
str r4, [reg_base, #REG_SAVE2] /* save r4 */;\
tst r0, #0xF0000000 /* make sure address is in range */;\
bne ext_store_u##store_type /* if not do ext store */;\
;\
ldr lr, =ptr_tbl_##store_type /* lr = ptr table */;\
mov r4, r0, lsr #24 /* r4 = region number */;\
ldr lr, [lr, r4, lsl #2] /* lr = function pointer */;\
ldr r4, [reg_base, #REG_SAVE2] /* restore r4 */;\
bx lr /* jump to handler */;\
;\
ptr_tbl_##store_type: ;\
.word ext_store_ignore /* 0x00: BIOS, ignore */;\
.word ext_store_ignore /* 0x01: ignore */;\
.word ext_store_ewram_u##store_type /* 0x02: ewram */;\
.word ext_store_iwram_u##store_type /* 0x03: iwram */;\
.word ext_store_u##store_type /* 0x04: I/O regs */;\
.word ext_store_u##store_type /* 0x05: palette RAM */;\
.word ext_store_vram_u##store_type /* 0x06: vram */;\
.word ext_store_u##store_type /* 0x07: oam ram */;\
.word ext_store_u##store_type /* 0x08: gamepak: ignore */;\
.word ext_store_u##store_type /* 0x09: gamepak: ignore */;\
.word ext_store_u##store_type /* 0x0A: gamepak: ignore */;\
.word ext_store_u##store_type /* 0x0B: gamepak: ignore */;\
.word ext_store_u##store_type /* 0x0C: gamepak: ignore */;\
.word ext_store_u##store_type /* 0x0D: EEPROM */;\
.word ext_store_u##store_type /* 0x0E: backup */;\
.word ext_store_ignore /* 0x0F: ignore */;\
@ for ignored areas, just return
ext_store_ignore:
ldr lr, [reg_base, #REG_SAVE3] @ pop lr off of stack
restore_flags()
add pc, lr, #4 @ return
#define execute_store_builder(store_type, store_op, load_op) ;\
;\
@ -577,20 +620,6 @@ return_to_main:
execute_store_u##store_type: ;\
_execute_store_u##store_type: ;\
execute_store_body(store_type, store_op) ;\
sub r2, r2, #0x8000 /* Pointer to code status data */;\
load_op r0, [r2, r0] /* check code flag */;\
;\
cmp r0, #0 /* see if it's not 0 */;\
bne 2f /* if so perform smc write */;\
ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\
restore_flags() ;\
add pc, lr, #4 /* return */;\
;\
2: ;\
ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\
ldr r0, [lr] /* load PC */;\
str r0, [reg_base, #REG_PC] /* write out PC */;\
b smc_write /* perform smc write */;\
;\
ext_store_u##store_type: ;\
ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
@ -599,11 +628,53 @@ ext_store_u##store_type: ;\
store_align_##store_type() ;\
call_c_function(write_memory##store_type) ;\
b write_epilogue /* handle additional write stuff */;\
;\
ext_store_iwram_u##store_type: ;\
mask_addr_##store_type(15) /* Mask to mirror memory (+align)*/;\
ldr r2, =(iwram+0x8000) /* r2 = iwram base */;\
store_op r1, [r0, r2] /* store data */;\
sub r2, r2, #0x8000 /* r2 = iwram smc base */;\
load_op r1, [r0, r2] /* r1 = SMC sentinel */;\
cmp r1, #0 /* see if it's not 0 */;\
bne 3f /* if so perform smc write */;\
ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
restore_flags() ;\
add pc, lr, #4 /* return */;\
;\
ext_store_ewram_u##store_type: ;\
mask_addr_##store_type(18) /* Mask to mirror memory (+align)*/;\
ldr r2, =(ewram) /* r2 = ewram base */;\
store_op r1, [r0, r2] /* store data */;\
add r2, r2, #0x40000 /* r2 = ewram smc base */;\
load_op r1, [r0, r2] /* r1 = SMC sentinel */;\
cmp r1, #0 /* see if it's not 0 */;\
bne 3f /* if so perform smc write */;\
ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
restore_flags() ;\
add pc, lr, #4 /* return */;\
;\
ext_store_vram_u##store_type: ;\
mask_addr_bus16_##store_type(17) /* Mask to mirror memory (+align)*/;\
cmp r0, #0x18000 /* Check if exceeds 96KB */;\
subcs r0, r0, #0x8000 /* Mirror to the last bank */;\
ldr r2, =(vram) /* r2 = vram base */;\
store_op r1, [r0, r2] /* store data */;\
ldr lr, [reg_base, #REG_SAVE3] /* pop lr off of stack */;\
restore_flags() ;\
add pc, lr, #4 /* return */;\
;\
3: ;\
ldr lr, [reg_base, #REG_SAVE3] /* restore lr */;\
ldr r0, [lr] /* load PC */;\
str r0, [reg_base, #REG_PC] /* write out PC */;\
b smc_write /* perform smc write */;\
execute_store_builder(8, strb, ldrb)
execute_store_builder(16, strh, ldrh)
execute_store_builder(32, str, ldr)
@ This is a store that is executed in a strm case (so no SMC checks in-between)
.globl execute_store_u32_safe
.globl _execute_store_u32_safe
@ -619,6 +690,28 @@ ext_store_u32_safe:
restore_flags()
bx lr @ Return
ext_store_iwram_u32_safe:
mask_addr_8(15) @ Mask to mirror memory (no need to align!)
ldr r2, =(iwram+0x8000) @ r2 = iwram base
str r1, [r0, r2] @ store data
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
ext_store_ewram_u32_safe:
mask_addr_8(18) @ Mask to mirror memory (no need to align!)
ldr r2, =(ewram) @ r2 = ewram base
str r1, [r0, r2] @ store data
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
ext_store_vram_u32_safe:
mask_addr_8(17) @ Mask to mirror memory (no need to align!)
ldr r2, =(vram) @ r2 = vram base
cmp r0, #0x18000 @ Check if exceeds 96KB
subcs r0, r0, #0x8000 @ Mirror to the last bank
str r1, [r0, r2] @ store data
restore_flags()
ldr pc, [reg_base, #REG_SAVE3] @ return
write_epilogue:
cmp r0, #0 @ check if the write rose an alert
@ -756,6 +849,7 @@ ext_load_##load_type: ;\
restore_flags() ;\
add pc, lr, #4 /* return */;\
.pool
execute_load_builder(u8, 8, ldrneb, #0xF0000000)
execute_load_builder(s8, 8, ldrnesb, #0xF0000000)
@ -763,14 +857,10 @@ execute_load_builder(u16, 16, ldrneh, #0xF0000001)
execute_load_builder(s16, 16_signed, ldrnesh, #0xF0000001)
execute_load_builder(u32, 32, ldrne, #0xF0000000)
.pool
.data
memory_map_read:
.space 0x8000
memory_map_write:
.space 0x8000
palette_ram:
.space 0x400
palette_ram_converted:

30
cpu.c
View File

@ -1003,7 +1003,6 @@ const u32 psr_masks[16] =
#define fast_write_memory(size, type, address, value) \
{ \
u8 *map; \
u32 _address = (address) & ~(aligned_address_mask##size & 0x03); \
if(_address < 0x10000000) \
{ \
@ -1011,17 +1010,9 @@ const u32 psr_masks[16] =
memory_writes_##type++; \
} \
\
if(((_address & aligned_address_mask##size) == 0) && \
(map = memory_map_write[_address >> 15])) \
{ \
*((type *)((u8 *)map + (_address & 0x7FFF))) = value; \
} \
else \
{ \
cpu_alert = write_memory##size(_address, value); \
if(cpu_alert) \
goto alert; \
} \
cpu_alert = write_memory##size(_address, value); \
if(cpu_alert) \
goto alert; \
} \
#define load_aligned32(address, dest) \
@ -1046,22 +1037,14 @@ const u32 psr_masks[16] =
#define store_aligned32(address, value) \
{ \
u32 _address = address; \
u8 *map = memory_map_write[_address >> 15]; \
if(_address < 0x10000000) \
{ \
memory_region_access_write_u32[_address >> 24]++; \
memory_writes_u32++; \
} \
if(map) \
{ \
address32(map, _address & 0x7FFF) = value; \
} \
else \
{ \
cpu_alert = write_memory32(_address, value); \
if(cpu_alert) \
goto alert; \
} \
cpu_alert = write_memory32(_address, value); \
if(cpu_alert) \
goto alert; \
} \
#define load_memory_u8(address, dest) \
@ -1647,7 +1630,6 @@ void raise_interrupt(irq_type irq_raised)
#ifndef HAVE_DYNAREC
u8 *memory_map_read [8 * 1024];
u8 *memory_map_write[8 * 1024];
u16 palette_ram[512];
u16 palette_ram_converted[512];
#endif

4
cpu.h
View File

@ -155,11 +155,7 @@ extern u8 *ram_translation_ptr;
#define MAX_TRANSLATION_GATES 8
extern u32 idle_loop_target_pc;
extern u32 force_pc_update_target;
extern u32 iwram_stack_optimize;
extern u32 allow_smc_ram_u8;
extern u32 allow_smc_ram_u16;
extern u32 allow_smc_ram_u32;
extern u32 direct_map_vram;
extern u32 translation_gate_targets;
extern u32 translation_gate_target_pc[MAX_TRANSLATION_GATES];

View File

@ -76,12 +76,6 @@ u32 ewram_code_max = 0xFFFFFFFF;
u32 *rom_branch_hash[ROM_BRANCH_HASH_SIZE];
// Default
u32 force_pc_update_target = 0xFFFFFFFF;
u32 allow_smc_ram_u8 = 1;
u32 allow_smc_ram_u16 = 1;
u32 allow_smc_ram_u32 = 1;
typedef struct
{
u8 *block_offset;
@ -2813,7 +2807,7 @@ u8 function_cc *block_lookup_address_##type(u32 pc) \
switch(pc >> 24) \
{ \
case 0x2: \
location = (u16 *)(ewram + (pc & 0x7FFF) + ((pc & 0x38000) * 2)); \
location = (u16 *)(ewram + (pc & 0x3FFFF) + 0x40000); \
block_lookup_translate(type, ram, 1); \
break; \
\
@ -3119,18 +3113,22 @@ block_lookup_address_builder(dual);
block_data_type block_data[MAX_BLOCK_SIZE];
block_exit_type block_exits[MAX_EXITS];
#define smc_write_arm_yes() \
if(address32(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) == 0x0000) \
#define smc_write_arm_yes() { \
int offset = (pc < 0x03000000) ? 0x40000 : -0x8000; \
if(address32(pc_address_block, (block_end_pc & 0x7FFF) + offset) == 0) \
{ \
address32(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) = \
address32(pc_address_block, (block_end_pc & 0x7FFF) + offset) = \
0xFFFFFFFF; \
} \
}
#define smc_write_thumb_yes() \
if(address16(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) == 0x0000) \
#define smc_write_thumb_yes() { \
int offset = (pc < 0x03000000) ? 0x40000 : -0x8000; \
if(address16(pc_address_block, (block_end_pc & 0x7FFF) + offset) == 0) \
{ \
address16(pc_address_block, (block_end_pc & 0x7FFF) - 0x8000) = 0xFFFF; \
address16(pc_address_block, (block_end_pc & 0x7FFF) + offset) = 0xFFFF; \
} \
}
#define smc_write_arm_no() \
@ -3428,7 +3426,6 @@ s32 translate_block_thumb(u32 pc, translation_region_type
u32 opcode = 0;
u32 last_opcode;
u32 condition;
u32 last_condition;
u32 pc_region = (pc >> 15);
u32 new_pc_region;
u8 *pc_address_block = memory_map_read[pc_region];
@ -3514,8 +3511,6 @@ s32 translate_block_thumb(u32 pc, translation_region_type
block_exit_position = 0;
block_data_position = 0;
last_condition = 0x0E;
while(pc != block_end_pc)
{
block_data[block_data_position].block_offset = translation_ptr;
@ -3627,13 +3622,16 @@ s32 translate_block_thumb(u32 pc, translation_region_type
void flush_translation_cache_ram(void)
{
flush_ram_count++;
/* printf("ram flush %d (pc %x), %x to %x, %x to %x\n",
/*printf("ram flush %d (pc %x), %x to %x, %x to %x\n",
flush_ram_count, reg[REG_PC], iwram_code_min, iwram_code_max,
ewram_code_min, ewram_code_max); */
ewram_code_min, ewram_code_max);*/
last_ram_translation_ptr = ram_translation_cache;
ram_translation_ptr = ram_translation_cache;
ram_block_tag_top = 0x0101;
// Proceed to clean the SMC area if needed
// (also try to memset as little as possible for performance)
if(iwram_code_min != 0xFFFFFFFF)
{
iwram_code_min &= 0x7FFF;
@ -3643,33 +3641,9 @@ void flush_translation_cache_ram(void)
if(ewram_code_min != 0xFFFFFFFF)
{
u32 ewram_code_min_page;
u32 ewram_code_max_page;
u32 ewram_code_min_offset;
u32 ewram_code_max_offset;
u32 i;
ewram_code_min &= 0x3FFFF;
ewram_code_max &= 0x3FFFF;
ewram_code_min_page = ewram_code_min >> 15;
ewram_code_max_page = ewram_code_max >> 15;
ewram_code_min_offset = ewram_code_min & 0x7FFF;
ewram_code_max_offset = ewram_code_max & 0x7FFF;
if(ewram_code_min_page == ewram_code_max_page)
{
memset(ewram + (ewram_code_min_page * 0x10000) +
ewram_code_min_offset, 0,
ewram_code_max_offset - ewram_code_min_offset);
}
else
{
for(i = ewram_code_min_page + 1; i < ewram_code_max_page; i++)
memset(ewram + (i * 0x10000), 0, 0x8000);
memset(ewram, 0, ewram_code_max_offset);
}
memset(&ewram[0x40000 + ewram_code_min], 0, ewram_code_max - ewram_code_min);
}
iwram_code_min = 0xFFFFFFFF;

View File

@ -596,8 +596,7 @@ u32 function_cc read_eeprom(void)
\
case 0x02: \
/* external work RAM */ \
address = (address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000; \
value = address##type(ewram, address); \
value = address##type(ewram, (address & 0x3FFFF)); \
break; \
\
case 0x03: \
@ -1907,8 +1906,7 @@ void function_cc write_rtc(u32 address, u32 value)
{ \
case 0x02: \
/* external work RAM */ \
address = (address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000; \
address##type(ewram, address) = value; \
address##type(ewram, (address & 0x3FFFF)) = value; \
break; \
\
case 0x03: \
@ -2454,7 +2452,7 @@ s32 load_bios(char *name)
// DMA memory regions can be one of the following:
// IWRAM - 32kb offset from the contiguous iwram region.
// EWRAM - like segmented but with self modifying code check.
// EWRAM - also contiguous but with self modifying code check mirror.
// VRAM - 96kb offset from the contiguous vram region, should take care
// Palette RAM - Converts palette entries when written to.
// OAM RAM - Sets OAM modified flag to true.
@ -2527,11 +2525,8 @@ dma_region_type dma_region_map[16] =
#define dma_vars_iwram(type) \
dma_smc_vars_##type() \
#define dma_vars_vram(type) \
#define dma_vars_palette_ram(type) \
#define dma_oam_ram_src() \
#define dma_vars_ewram(type) \
dma_smc_vars_##type()
#define dma_oam_ram_dest() \
oam_update = 1 \
@ -2539,14 +2534,17 @@ dma_region_type dma_region_map[16] =
#define dma_vars_oam_ram(type) \
dma_oam_ram_##type() \
#define dma_vars_io(type) \
#define dma_vars_io(type)
#define dma_vars_vram(type)
#define dma_vars_palette_ram(type)
#define dma_vars_bios(type)
#define dma_vars_ext(type)
#define dma_oam_ram_src()
#define dma_segmented_load_src() \
memory_map_read[src_current_region] \
#define dma_segmented_load_dest() \
memory_map_write[dest_current_region] \
#define dma_vars_gamepak(type) \
u32 type##_new_region; \
u32 type##_current_region = type##_ptr >> 15; \
@ -2558,24 +2556,6 @@ dma_region_type dma_region_map[16] =
type##_address_block = load_gamepak_page(type##_current_region & 0x3FF); \
} \
#define dma_vars_ewram(type) \
dma_smc_vars_##type(); \
u32 type##_new_region; \
u32 type##_current_region = type##_ptr >> 15; \
u8 *type##_address_block = dma_segmented_load_##type() \
#define dma_vars_bios(type) \
#define dma_vars_ext(type) \
#define dma_ewram_check_region(type) \
type##_new_region = (type##_ptr >> 15); \
if(type##_new_region != type##_current_region) \
{ \
type##_current_region = type##_new_region; \
type##_address_block = dma_segmented_load_##type(); \
} \
#define dma_gamepak_check_region(type) \
type##_new_region = (type##_ptr >> 15); \
if(type##_new_region != type##_current_region) \
@ -2605,9 +2585,7 @@ dma_region_type dma_region_map[16] =
read_value = address##transfer_size(palette_ram, type##_ptr & 0x3FF) \
#define dma_read_ewram(type, transfer_size) \
dma_ewram_check_region(type); \
read_value = address##transfer_size(type##_address_block, \
type##_ptr & 0x7FFF) \
read_value = address##transfer_size(ewram, type##_ptr & 0x3FFFF) \
#define dma_read_gamepak(type, transfer_size) \
dma_gamepak_check_region(type); \
@ -2642,12 +2620,9 @@ dma_region_type dma_region_map[16] =
write_memory##transfer_size(type##_ptr, read_value) \
#define dma_write_ewram(type, transfer_size) \
dma_ewram_check_region(type); \
\
address##transfer_size(type##_address_block, type##_ptr & 0x7FFF) = \
read_value; \
smc_trigger |= address##transfer_size(type##_address_block, \
(type##_ptr & 0x7FFF) - 0x8000) \
address##transfer_size(ewram, type##_ptr & 0x3FFFF) = read_value; \
smc_trigger |= address##transfer_size(ewram, \
(type##_ptr & 0x3FFFF) + 0x40000) \
#define dma_epilogue_iwram() \
if(smc_trigger) \
@ -3105,14 +3080,6 @@ cpu_alert_type dma_transfer(dma_transfer_type *dma)
map_offset++) \
memory_map_##type[map_offset] = NULL; \
#define map_ram_region(type, start, end, mirror_blocks, region) \
for(map_offset = (start) / 0x8000; map_offset < \
((end) / 0x8000); map_offset++) \
{ \
memory_map_##type[map_offset] = \
((u8 *)region) + ((map_offset % mirror_blocks) * 0x10000) + 0x8000; \
} \
#define map_vram(type) \
for(map_offset = 0x6000000 / 0x8000; map_offset < (0x7000000 / 0x8000); \
map_offset += 4) \
@ -3274,8 +3241,8 @@ void init_memory(void)
// Fill memory map regions, areas marked as NULL must be checked directly
map_region(read, 0x0000000, 0x1000000, 1, bios_rom);
map_null(read, 0x1000000, 0x2000000);
map_ram_region(read, 0x2000000, 0x3000000, 8, ewram);
map_ram_region(read, 0x3000000, 0x4000000, 1, iwram);
map_region(read, 0x2000000, 0x3000000, 8, ewram);
map_region(read, 0x3000000, 0x4000000, 1, &iwram[0x8000]);
map_region(read, 0x4000000, 0x5000000, 1, io_registers);
map_null(read, 0x5000000, 0x6000000);
map_null(read, 0x6000000, 0x7000000);
@ -3284,45 +3251,12 @@ void init_memory(void)
init_memory_gamepak();
map_null(read, 0xE000000, 0x10000000);
// Fill memory map regions, areas marked as NULL must be checked directly
map_null(write, 0x0000000, 0x2000000);
map_ram_region(write, 0x2000000, 0x3000000, 8, ewram);
map_ram_region(write, 0x3000000, 0x4000000, 1, iwram);
map_null(write, 0x4000000, 0x5000000);
map_null(write, 0x5000000, 0x6000000);
// The problem here is that the current method of handling self-modifying code
// requires writeable memory to be proceeded by 32KB SMC data areas or be
// indirectly writeable. It's possible to get around this if you turn off the SMC
// check altogether, but this will make a good number of ROMs crash (perhaps most
// of the ones that actually need it? This has yet to be determined).
// This is because VRAM cannot be efficiently made incontiguous, and still allow
// the renderer to work as efficiently. It would, at the very least, require a
// lot of hacking of the renderer which I'm not prepared to do.
// TODO(davidgfnet): add SMC VRAM detection
// However, it IS possible to directly map the first page no matter what because
// there's 32kb of blank stuff sitting beneath it.
if(direct_map_vram)
{
map_vram(write);
}
else
{
map_null(write, 0x6000000, 0x7000000);
}
map_null(write, 0x7000000, 0x8000000);
map_null(write, 0x8000000, 0xE000000);
map_null(write, 0xE000000, 0x10000000);
memset(io_registers, 0, 0x8000);
memset(oam_ram, 0, 0x400);
memset(palette_ram, 0, 0x400);
memset(iwram, 0, 0x10000);
memset(ewram, 0, 0x80000);
memset(vram, 0, 0x18000);
memset(io_registers, 0, sizeof(io_registers));
memset(oam_ram, 0, sizeof(oam_ram));
memset(palette_ram, 0, sizeof(palette_ram));
memset(iwram, 0, sizeof(iwram));
memset(ewram, 0, sizeof(ewram));
memset(vram, 0, sizeof(vram));
io_registers[REG_DISPCNT] = 0x80;
io_registers[REG_P1] = 0x3FF;
@ -3426,8 +3360,6 @@ void gba_save_state(void* dst)
#define memory_savestate_builder(type) \
void memory_##type##_savestate(void) \
{ \
u32 i; \
\
state_mem_##type##_variable(backup_type); \
state_mem_##type##_variable(sram_size); \
state_mem_##type##_variable(flash_mode); \
@ -3453,10 +3385,7 @@ void memory_##type##_savestate(void) \
state_mem_##type##_array(dma); \
\
state_mem_##type(iwram + 0x8000, 0x8000); \
for(i = 0; i < 8; i++) \
{ \
state_mem_##type(ewram + (i * 0x10000) + 0x8000, 0x8000); \
} \
state_mem_##type(ewram, 0x40000); \
state_mem_##type(vram, 0x18000); \
state_mem_##type(oam_ram, 0x400); \
state_mem_##type(palette_ram, 0x400); \

View File

@ -214,7 +214,6 @@ extern u8 ewram[1024 * 256 * 2];
extern u8 iwram[1024 * 32 * 2];
extern u8 *memory_map_read[8 * 1024];
extern u8 *memory_map_write[8 * 1024];
extern u32 reg[64];

View File

@ -809,15 +809,8 @@ static void set_memory_descriptors(void)
{
const uint64_t mem = RETRO_MEMORY_SYSTEM_RAM;
struct retro_memory_descriptor desc[9] = {
{ mem, iwram, 0x00000 + 0x8000, 0x3000000, 0, 0, 0x8000, NULL },
{ mem, ewram, 0x00000 + 0x8000, 0x2000000, 0, 0, 0x8000, NULL },
{ mem, ewram, 0x10000 + 0x8000, 0x2008000, 0, 0, 0x8000, NULL },
{ mem, ewram, 0x20000 + 0x8000, 0x2010000, 0, 0, 0x8000, NULL },
{ mem, ewram, 0x30000 + 0x8000, 0x2018000, 0, 0, 0x8000, NULL },
{ mem, ewram, 0x40000 + 0x8000, 0x2020000, 0, 0, 0x8000, NULL },
{ mem, ewram, 0x50000 + 0x8000, 0x2028000, 0, 0, 0x8000, NULL },
{ mem, ewram, 0x60000 + 0x8000, 0x2030000, 0, 0, 0x8000, NULL },
{ mem, ewram, 0x70000 + 0x8000, 0x2038000, 0, 0, 0x8000, NULL }
{ mem, iwram, 0x00000 + 0x8000, 0x3000000, 0, 0, 0x8000, NULL },
{ mem, ewram, 0x00000, 0x2000000, 0, 0, 0x40000, NULL },
};
struct retro_memory_map retromap = {
desc,

View File

@ -1010,47 +1010,10 @@ u32 generate_load_rm_sh_##flags_op(u32 rm) \
{ \
u32 _address = (u32)(address); \
u32 _address_hi = (_address + 0x8000) >> 16; \
generate_load_imm(ireg, address); \
mips_emit_lui(ireg, _address_hi >> 16) \
generate_load_memory_##type(ireg, _address - (_address_hi << 16)); \
} \
#define generate_known_address_load_builder(type) \
u32 generate_known_address_load_##type(u32 rd, u32 address) \
{ \
switch(address >> 24) \
{ \
/* Read from the BIOS ROM, can be converted to an immediate load. \
Only really possible to do this from the BIOS but should be okay \
to allow it everywhere */ \
case 0x00: \
u32 imm = read_memory_constant_##type(address); \
generate_load_imm(arm_to_mips_reg[rd], imm); \
return 1; \
\
/* Read from RAM, can be converted to a load */ \
case 0x02: \
generate_load_memory(type, arm_to_mips_reg[rd], (u8 *)ewram + \
(address & 0x7FFF) + ((address & 0x38000) * 2) + 0x8000); \
return 1; \
\
case 0x03: \
generate_load_memory(type, arm_to_mips_reg[rd], (u8 *)iwram + \
(address & 0x7FFF) + 0x8000); \
return 1; \
\
/* Read from gamepak ROM, this has to be an immediate load because \
it might not actually be in memory anymore when we get to it. */ \
case 0x08: \
u32 imm = read_memory_constant_##type(address); \
generate_load_imm(arm_to_mips_reg[rd], imm); \
return 1; \
\
default: \
return 0; \
} \
} \
#define generate_block_extra_vars() \
u32 stored_pc = pc; \
u8 *update_trampoline \
@ -1060,12 +1023,6 @@ u32 generate_load_rm_sh_##flags_op(u32 rm) \
generate_load_rm_sh_builder(flags); \
generate_load_rm_sh_builder(no_flags); \
\
/* generate_known_address_load_builder(u8); \
generate_known_address_load_builder(u16); \
generate_known_address_load_builder(u32); \
generate_known_address_load_builder(s8); \
generate_known_address_load_builder(s16); */ \
\
u32 generate_load_offset_sh(u32 rm) \
{ \
switch((opcode >> 5) & 0x03) \
@ -2787,12 +2744,13 @@ static void emit_pmemld_stub(
mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16));
if (region == 2) {
// EWRAM is a bit special
// Can't do EWRAM with an `andi` instruction (18 bits mask)
mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff
if (!aligned && alignment != 0) {
mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size)
}
// Need to insert a zero in the addr (due to how it's mapped)
mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB)
mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18)
mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16
mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr
mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr
} else if (region == 6) {
// VRAM is mirrored every 128KB but the last 32KB is mapped to the previous
mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
@ -2862,12 +2820,13 @@ static void emit_pmemst_stub(
}
if (region == 2) {
// EWRAM is a bit special
// Can't do EWRAM with an `andi` instruction (18 bits mask)
mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff
if (!aligned && realsize != 0) {
mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size)
}
// Need to insert a zero in the addr (due to how it's mapped)
mips_emit_andi(reg_temp, reg_a0, memmask); // Clears all but 15 bits (LSB)
mips_emit_ext(reg_a0, reg_a0, 15, 3); // Gets the 3 higher bits (from the 18)
mips_emit_ins(reg_temp, reg_a0, 16, 3); // Puts the 3 bits into bits 18..16
mips_emit_addu(reg_rv, reg_rv, reg_temp); // Adds to the base addr
mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr
} else if (region == 6) {
// VRAM is mirrored every 128KB but the last 32KB is mapped to the previous
mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
@ -2888,7 +2847,12 @@ static void emit_pmemst_stub(
// Generate SMC write and tracking
// TODO: Should we have SMC checks here also for aligned?
if (meminfo->check_smc && !aligned) {
mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer
if (region == 2) {
mips_emit_lui(reg_temp, 0x40000 >> 16);
mips_emit_addu(reg_temp, reg_rv, reg_temp); // SMC lives after the ewram
} else {
mips_emit_addiu(reg_temp, reg_rv, 0x8000); // -32KB is the addr of the SMC buffer
}
if (realsize == 2) {
mips_emit_lw(reg_temp, reg_temp, base_addr);
} else if (realsize == 1) {
@ -3272,8 +3236,8 @@ void init_emitter() {
const t_stub_meminfo ldinfo [] = {
{ emit_pmemld_stub, 0, 0x4000, false, false, (u32)bios_rom },
// 1 Open load / Ignore store
{ emit_pmemld_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] },
{ emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above
{ emit_pmemld_stub, 2, 0x8000, true, false, (u32)ewram }, // memsize wrong on purpose
{ emit_pmemld_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] },
{ emit_pmemld_stub, 4, 0x400, false, false, (u32)io_registers },
{ emit_pmemld_stub, 5, 0x400, false, true, (u32)palette_ram },
{ emit_pmemld_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case
@ -3308,8 +3272,8 @@ void init_emitter() {
}
const t_stub_meminfo stinfo [] = {
{ emit_pmemst_stub, 2, 0x8000, true, false, (u32)&ewram[0x8000] },
{ emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] }, // memsize wrong on purpose, see above
{ emit_pmemst_stub, 2, 0x8000, true, false, (u32)ewram },
{ emit_pmemst_stub, 3, 0x8000, true, false, (u32)&iwram[0x8000] },
// I/O is special and mapped with a function call
{ emit_palette_hdl, 5, 0x400, false, true, (u32)palette_ram },
{ emit_pmemst_stub, 6, 0x0, false, true, (u32)vram }, // same, vram is a special case

View File

@ -44,7 +44,6 @@
.global write_io_epilogue
.global memory_map_read
.global memory_map_write
.global tmemld
.global tmemst
.global tmemst
@ -609,9 +608,6 @@ execute_arm_translate_internal:
.data
.align 6
memory_map_write:
.space 0x8000
memory_map_read:
.space 0x8000

View File

@ -1485,23 +1485,13 @@ u32 function_cc execute_aligned_load32(u32 address)
return read_memory32(address);
}
void function_cc execute_aligned_store32(u32 address, u32 source)
{
u8 *map;
if(!(address & 0xF0000000) && (map = memory_map_write[address >> 15]))
address32(map, address & 0x7FFF) = source;
else
write_memory32(address, source);
}
#define arm_block_memory_load() \
generate_function_call(execute_aligned_load32); \
generate_store_reg(rv, i) \
#define arm_block_memory_store() \
generate_load_reg_pc(a1, i, 8); \
generate_function_call(execute_aligned_store32) \
generate_function_call(write_memory32) \
#define arm_block_memory_final_load() \
arm_block_memory_load() \
@ -1956,7 +1946,7 @@ u32 function_cc execute_ror_imm_op(u32 value, u32 shift)
#define thumb_block_memory_extra_push_lr(base_reg) \
generate_add_reg_reg_imm(a0, s0, (bit_count[reg_list] * 4)); \
generate_load_reg(a1, REG_LR); \
generate_function_call(execute_aligned_store32) \
generate_function_call(write_memory32) \
#define thumb_block_memory_load() \
generate_function_call(execute_aligned_load32); \
@ -1964,7 +1954,7 @@ u32 function_cc execute_ror_imm_op(u32 value, u32 shift)
#define thumb_block_memory_store() \
generate_load_reg(a1, i); \
generate_function_call(execute_aligned_store32) \
generate_function_call(write_memory32) \
#define thumb_block_memory_final_load() \
thumb_block_memory_load() \

View File

@ -29,7 +29,6 @@
#define _execute_store_cpsr execute_store_cpsr
#define _execute_arm_translate execute_arm_translate
#define _memory_map_read memory_map_read
#define _memory_map_write memory_map_write
#define _reg reg
#define _reg_mode reg_mode
#define _oam_update oam_update
@ -68,7 +67,6 @@
.global _execute_arm_translate
.global _memory_map_read
.global _memory_map_write
.global _reg
.global _reg_mode
.global _spsr
@ -231,6 +229,20 @@ ext_store_eeprom:
# 8bit ext memory routines
ext_store_iwram8:
and $0x7FFF, %eax # wrap around address
mov %dl, (_iwram+0x8000)(%eax) # perform store
cmpb $0, _iwram(%eax) # Check SMC mirror
jne smc_write
ret
ext_store_ewram8:
and $0x3FFFF, %eax # wrap around address
mov %dl, _ewram(%eax) # perform store
cmpb $0, (_ewram+0x40000)(%eax) # Check SMC mirror
jne smc_write
ret
ext_store_io8:
and $0x3FF, %eax # wrap around address
and $0xFF, %edx
@ -267,8 +279,8 @@ ext_store_backup:
ext_store_u8_jtable:
.long ext_store_ignore # 0x00 BIOS, ignore
.long ext_store_ignore # 0x01 invalid, ignore
.long ext_store_ignore # 0x02 EWRAM, should have been hit already
.long ext_store_ignore # 0x03 IWRAM, should have been hit already
.long ext_store_ewram8 # 0x02 EWRAM
.long ext_store_iwram8 # 0x03 IWRAM
.long ext_store_io8 # 0x04 I/O registers
.long ext_store_palette8 # 0x05 Palette RAM
.long ext_store_vram8 # 0x06 VRAM
@ -281,7 +293,12 @@ ext_store_u8_jtable:
.long ext_store_eeprom # 0x0D EEPROM (possibly)
.long ext_store_backup # 0x0E Flash ROM/SRAM
ext_store_u8:
# eax: address to write to
# edx: value to write
# ecx: current pc
_execute_store_u8:
mov %ecx, REG_PC(%ebx) # write out the PC
mov %eax, %ecx # ecx = address
shr $24, %ecx # ecx = address >> 24
cmp $15, %ecx
@ -290,47 +307,22 @@ ext_store_u8:
mov ext_store_u8_jtable(, %ecx, 4), %ecx
jmp *%ecx # jump to table index
# eax: address to write to
# edx: value to write
# ecx: current pc
_execute_store_u8:
mov %ecx, REG_PC(%ebx) # write out the PC
mov %eax, %ecx # ecx = address
test $0xF0000000, %ecx # check address range
jnz ext_store_u8 # if above perform an extended write
shr $15, %ecx # ecx = page number of address
# load the corresponding memory map offset
mov _memory_map_write(, %ecx, 4), %ecx
test %ecx, %ecx # see if it's NULL
jz ext_store_u8 # if so perform an extended write
and $0x7FFF, %eax # isolate the lower 15bits of the address
mov %dl, (%eax, %ecx) # store the value
# check for self-modifying code
testb $0xFF, -32768(%eax, %ecx)
jne smc_write
ret # return
_execute_store_u16:
mov %ecx, REG_PC(%ebx) # write out the PC
and $~0x01, %eax # fix alignment
mov %eax, %ecx # ecx = address
test $0xF0000000, %ecx # check address range
jnz ext_store_u16 # if above perform an extended write
shr $15, %ecx # ecx = page number of address
# load the corresponding memory map offset
mov _memory_map_write(, %ecx, 4), %ecx
test %ecx, %ecx # see if it's NULL
jz ext_store_u16 # if so perform an extended write
and $0x7FFF, %eax # isolate the lower 15bits of the address
mov %dx, (%eax, %ecx) # store the value
# check for self-modifying code
testw $0xFFFF, -32768(%eax, %ecx)
jne smc_write
ret # return
# 16bit ext memory routines
ext_store_iwram16:
and $0x7FFF, %eax # wrap around address
mov %dx, (_iwram+0x8000)(%eax) # perform store
cmpw $0, _iwram(%eax) # Check SMC mirror
jne smc_write
ret
ext_store_ewram16:
and $0x3FFFF, %eax # wrap around address
mov %dx, _ewram(%eax) # perform store
cmpw $0, (_ewram+0x40000)(%eax) # Check SMC mirror
jne smc_write
ret
ext_store_io16:
and $0x3FF, %eax # wrap around address
and $0xFFFF, %edx
@ -377,8 +369,8 @@ ext_store_rtc:
ext_store_u16_jtable:
.long ext_store_ignore # 0x00 BIOS, ignore
.long ext_store_ignore # 0x01 invalid, ignore
.long ext_store_ignore # 0x02 EWRAM, should have been hit already
.long ext_store_ignore # 0x03 IWRAM, should have been hit already
.long ext_store_ewram16 # 0x02 EWRAM
.long ext_store_iwram16 # 0x03 IWRAM
.long ext_store_io16 # 0x04 I/O registers
.long ext_store_palette16 # 0x05 Palette RAM
.long ext_store_vram16 # 0x06 VRAM
@ -391,7 +383,9 @@ ext_store_u16_jtable:
.long ext_store_eeprom # 0x0D EEPROM (possibly)
.long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit
ext_store_u16:
_execute_store_u16:
mov %ecx, REG_PC(%ebx) # write out the PC
and $~0x01, %eax # fix alignment
mov %eax, %ecx # ecx = address
shr $24, %ecx # ecx = address >> 24
cmp $15, %ecx
@ -400,26 +394,22 @@ ext_store_u16:
mov ext_store_u16_jtable(, %ecx, 4), %ecx
jmp *%ecx # jump to table index
_execute_store_u32:
mov %ecx, REG_PC(%ebx) # write out the PC
and $~0x03, %eax # fix alignment
mov %eax, %ecx # ecx = address
test $0xF0000000, %ecx # check address range
jnz ext_store_u32 # if above perform an extended write
shr $15, %ecx # ecx = page number of address
# load the corresponding memory map offset
mov _memory_map_write(, %ecx, 4), %ecx
test %ecx, %ecx # see if it's NULL
jz ext_store_u32 # if so perform an extended write
and $0x7FFF, %eax # isolate the lower 15bits of the address
mov %edx, (%eax, %ecx) # store the value
# check for self-modifying code
testl $0xFFFFFFFF, -32768(%eax, %ecx)
jne smc_write
ret # return it
# 32bit ext memory routines
ext_store_iwram32:
and $0x7FFF, %eax # wrap around address
mov %edx, (_iwram+0x8000)(%eax) # perform store
cmpl $0, _iwram(%eax) # Check SMC mirror
jne smc_write
ret
ext_store_ewram32:
and $0x3FFFF, %eax # wrap around address
mov %edx, _ewram(%eax) # perform store
cmpl $0, (_ewram+0x40000)(%eax) # Check SMC mirror
jne smc_write
ret
ext_store_io32:
and $0x3FF, %eax # wrap around address
call _write_io_register32 # perform 32bit I/O register write
@ -451,8 +441,8 @@ ext_store_oam32:
ext_store_u32_jtable:
.long ext_store_ignore # 0x00 BIOS, ignore
.long ext_store_ignore # 0x01 invalid, ignore
.long ext_store_ignore # 0x02 EWRAM, should have been hit already
.long ext_store_ignore # 0x03 IWRAM, should have been hit already
.long ext_store_ewram32 # 0x02 EWRAM
.long ext_store_iwram32 # 0x03 IWRAM
.long ext_store_io32 # 0x04 I/O registers
.long ext_store_palette32 # 0x05 Palette RAM
.long ext_store_vram32 # 0x06 VRAM
@ -466,7 +456,9 @@ ext_store_u32_jtable:
.long ext_store_ignore # 0x0E Flash ROM/SRAM must be 8bit
ext_store_u32:
_execute_store_u32:
mov %ecx, REG_PC(%ebx) # write out the PC
and $~0x03, %eax # fix alignment
mov %eax, %ecx # ecx = address
shr $24, %ecx # ecx = address >> 24
cmp $15, %ecx
@ -507,8 +499,8 @@ smc_write:
call _flush_translation_cache_ram
lookup_pc:
add $4, %esp
movl $0, CHANGED_PC_STATUS(%ebx)
add $4, %esp # Can't return, discard addr
movl $0, CHANGED_PC_STATUS(%ebx) # Lookup new block and jump to it
mov REG_PC(%ebx), %eax
testl $0x20, REG_CPSR(%ebx)
jz lookup_pc_arm
@ -577,7 +569,5 @@ _reg_mode:
_memory_map_read:
.space 0x8000
_memory_map_write:
.space 0x8000