[interp] Improve interpreter timings and honor WAITCNT

This fixes a few games and makes the interpreter faster (since it
doesn't run an overclocked CPU anymore).
This commit is contained in:
David Guillen Fandos 2023-06-07 19:40:27 +02:00
parent c9c88f3560
commit 84c347edad
6 changed files with 113 additions and 42 deletions

16
cpu.c
View File

@ -865,6 +865,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
{ \ { \
/* Account for cycles and other stats */ \ /* Account for cycles and other stats */ \
u8 region = _address >> 24; \ u8 region = _address >> 24; \
cycles_remaining -= ws_cyc_nseq[region][(size - 8) / 16]; \
memory_region_access_read_##type[region]++; \ memory_region_access_read_##type[region]++; \
memory_reads_##type++; \ memory_reads_##type++; \
} \ } \
@ -890,6 +891,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
if(_address < 0x10000000) \ if(_address < 0x10000000) \
{ \ { \
u8 region = _address >> 24; \ u8 region = _address >> 24; \
cycles_remaining -= ws_cyc_nseq[region][(size - 8) / 16]; \
memory_region_access_write_##type[region]++; \ memory_region_access_write_##type[region]++; \
memory_writes_##type++; \ memory_writes_##type++; \
} \ } \
@ -905,6 +907,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
{ \ { \
/* Account for cycles and other stats */ \ /* Account for cycles and other stats */ \
u8 region = _address >> 24; \ u8 region = _address >> 24; \
cycles_remaining -= ws_cyc_seq[region][1]; \
memory_region_access_read_u32[region]++; \ memory_region_access_read_u32[region]++; \
memory_reads_u32++; \ memory_reads_u32++; \
} \ } \
@ -925,6 +928,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
{ \ { \
/* Account for cycles and other stats */ \ /* Account for cycles and other stats */ \
u8 region = _address >> 24; \ u8 region = _address >> 24; \
cycles_remaining -= ws_cyc_seq[region][1]; \
memory_region_access_write_u32[region]++; \ memory_region_access_write_u32[region]++; \
memory_writes_u32++; \ memory_writes_u32++; \
} \ } \
@ -1404,6 +1408,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
{ \ { \
thumb_pc_offset(2); \ thumb_pc_offset(2); \
} \ } \
cycles_remaining -= ws_cyc_nseq[pc >> 24][0]; \
} \ } \
// When a mode change occurs from non-FIQ to non-FIQ retire the current // When a mode change occurs from non-FIQ to non-FIQ retire the current
@ -1530,7 +1535,6 @@ void execute_arm(u32 cycles)
u8 *pc_address_block = memory_map_read[pc_region]; u8 *pc_address_block = memory_map_read[pc_region];
u32 new_pc_region; u32 new_pc_region;
s32 cycles_remaining; s32 cycles_remaining;
u32 cycles_per_instruction = global_cycles_per_instruction;
cpu_alert_type cpu_alert; cpu_alert_type cpu_alert;
u32 old_pc; u32 old_pc;
@ -1564,7 +1568,6 @@ void execute_arm(u32 cycles)
arm_loop: arm_loop:
collapse_flags(); collapse_flags();
cycles_per_instruction = global_cycles_per_instruction;
/* Process cheats if we are about to execute the cheat hook */ /* Process cheats if we are about to execute the cheat hook */
if (pc == cheat_master_hook) if (pc == cheat_master_hook)
@ -2156,6 +2159,7 @@ arm_loop:
{ {
arm_pc_offset_update_direct(src); arm_pc_offset_update_direct(src);
} }
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
} }
else else
{ {
@ -3084,6 +3088,7 @@ arm_loop:
/* B offset */ /* B offset */
arm_decode_branch(); arm_decode_branch();
arm_pc_offset_update(offset + 8); arm_pc_offset_update(offset + 8);
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
break; break;
} }
@ -3093,6 +3098,7 @@ arm_loop:
arm_decode_branch(); arm_decode_branch();
reg[REG_LR] = pc + 4; reg[REG_LR] = pc + 4;
arm_pc_offset_update(offset + 8); arm_pc_offset_update(offset + 8);
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
break; break;
} }
@ -3130,7 +3136,7 @@ arm_loop:
skip_instruction: skip_instruction:
/* End of Execute ARM instruction */ /* End of Execute ARM instruction */
cycles_remaining -= cycles_per_instruction; cycles_remaining -= ws_cyc_seq[(pc >> 24) & 0xF][1];
if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0; if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0;
@ -3622,6 +3628,7 @@ thumb_loop:
/* B label */ /* B label */
thumb_decode_branch(); thumb_decode_branch();
thumb_pc_offset_update(((s32)(offset << 21) >> 20) + 4); thumb_pc_offset_update(((s32)(offset << 21) >> 20) + 4);
cycles_remaining -= ws_cyc_nseq[pc >> 24][0];
break; break;
} }
@ -3642,12 +3649,13 @@ thumb_loop:
pc = reg[REG_LR] + (offset * 2); pc = reg[REG_LR] + (offset * 2);
reg[REG_LR] = lr; reg[REG_LR] = lr;
reg[REG_PC] = pc; reg[REG_PC] = pc;
cycles_remaining -= ws_cyc_nseq[pc >> 24][0];
break; break;
} }
} }
/* End of Execute THUMB instruction */ /* End of Execute THUMB instruction */
cycles_remaining -= cycles_per_instruction; cycles_remaining -= ws_cyc_seq[(pc >> 24) & 0xF][0];
if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0; if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0;

View File

@ -2774,7 +2774,7 @@ u8 function_cc *block_lookup_address_thumb(u32 pc)
#define arm_instruction_width 4 #define arm_instruction_width 4
#define arm_base_cycles() \ #define arm_base_cycles() \
cycle_count += waitstate_cycles_sequential[pc >> 24][2] \ cycle_count += def_seq_cycles[pc >> 24][1] \
// For now this just sets a variable that says flags should always be // For now this just sets a variable that says flags should always be
// computed. // computed.
@ -2843,7 +2843,7 @@ u8 function_cc *block_lookup_address_thumb(u32 pc)
#define thumb_instruction_width 2 #define thumb_instruction_width 2
#define thumb_base_cycles() \ #define thumb_base_cycles() \
cycle_count += waitstate_cycles_sequential[pc >> 24][1] \ cycle_count += def_seq_cycles[pc >> 24][0] \
// Here's how this works: each instruction has three different sets of flag // Here's how this works: each instruction has three different sets of flag
// attributes, each consisiting of a 4bit mask describing how that instruction // attributes, each consisiting of a 4bit mask describing how that instruction

View File

@ -269,42 +269,71 @@ static void trigger_timer(u32 timer_number, u32 value)
write_ioreg(REG_TMXCNT(timer_number), value); write_ioreg(REG_TMXCNT(timer_number), value);
} }
// This table is configured for sequential access on system defaults /* Memory timings */
const u8 ws012_nonseq[] = {4, 3, 2, 8};
const u8 ws0_seq[] = {2, 1};
const u8 ws1_seq[] = {4, 1};
const u8 ws2_seq[] = {8, 1};
const u32 waitstate_cycles_sequential[16][3] = /* Divided by region and bus width (16/32) */
u8 ws_cyc_seq[16][2] =
{ {
{ 1, 1, 1 }, // BIOS { 1, 1 }, // BIOS
{ 1, 1, 1 }, // Invalid { 1, 1 }, // Invalid
{ 3, 3, 6 }, // EWRAM (default settings) { 3, 6 }, // EWRAM (default settings)
{ 1, 1, 1 }, // IWRAM { 1, 1 }, // IWRAM
{ 1, 1, 1 }, // IO Registers { 1, 1 }, // IO Registers
{ 1, 1, 2 }, // Palette RAM { 1, 2 }, // Palette RAM
{ 1, 1, 2 }, // VRAM { 1, 2 }, // VRAM
{ 1, 1, 2 }, // OAM { 1, 2 }, // OAM
{ 3, 3, 6 }, // Gamepak (wait 0) { 0, 0 }, // Gamepak (wait 0)
{ 3, 3, 6 }, // Gamepak (wait 0) { 0, 0 }, // Gamepak (wait 0)
{ 5, 5, 9 }, // Gamepak (wait 1) { 0, 0 }, // Gamepak (wait 1)
{ 5, 5, 9 }, // Gamepak (wait 1) { 0, 0 }, // Gamepak (wait 1)
{ 9, 9, 17 }, // Gamepak (wait 2) { 0, 0 }, // Gamepak (wait 2)
{ 9, 9, 17 }, // Gamepak (wait 2) { 0, 0 }, // Gamepak (wait 2)
{ 1, 1 }, // Invalid
{ 1, 1 }, // Invalid
};
u8 ws_cyc_nseq[16][2] =
{
{ 1, 1 }, // BIOS
{ 1, 1 }, // Invalid
{ 3, 6 }, // EWRAM (default settings)
{ 1, 1 }, // IWRAM
{ 1, 1 }, // IO Registers
{ 1, 2 }, // Palette RAM
{ 1, 2 }, // VRAM
{ 1, 2 }, // OAM
{ 0, 0 }, // Gamepak (wait 0)
{ 0, 0 }, // Gamepak (wait 0)
{ 0, 0 }, // Gamepak (wait 1)
{ 0, 0 }, // Gamepak (wait 1)
{ 0, 0 }, // Gamepak (wait 2)
{ 0, 0 }, // Gamepak (wait 2)
{ 1, 1 }, // Invalid
{ 1, 1 }, // Invalid
}; };
// Different settings for gamepak ws0-2 sequential (2nd) access const u32 def_seq_cycles[16][2] =
const u32 gamepak_waitstate_sequential[2][3][3] =
{ {
{ { 1, 1 }, // BIOS
{ 3, 3, 6 }, { 1, 1 }, // Invalid
{ 5, 5, 9 }, { 3, 6 }, // EWRAM (default settings)
{ 9, 9, 17 } { 1, 1 }, // IWRAM
}, { 1, 1 }, // IO Registers
{ { 1, 2 }, // Palette RAM
{ 2, 2, 3 }, { 1, 2 }, // VRAM
{ 2, 2, 3 }, { 1, 2 }, // OAM
{ 2, 2, 3 } { 3, 6 }, // Gamepak (wait 0)
} { 3, 6 }, // Gamepak (wait 0)
{ 5, 9 }, // Gamepak (wait 1)
{ 5, 9 }, // Gamepak (wait 1)
{ 9, 17 }, // Gamepak (wait 2)
{ 9, 17 }, // Gamepak (wait 2)
}; };
u8 bios_rom[1024 * 16]; u8 bios_rom[1024 * 16];
// Up to 128kb, store SRAM, flash ROM, or EEPROM here. // Up to 128kb, store SRAM, flash ROM, or EEPROM here.
@ -364,6 +393,34 @@ u32 flash_bank_cnt;
u32 flash_device_id = FLASH_DEVICE_MACRONIX_64KB; u32 flash_device_id = FLASH_DEVICE_MACRONIX_64KB;
void reload_timing_info()
{
int i;
uint16_t waitcnt = read_ioreg(REG_WAITCNT);
/* Sequential 16 and 32 bit accesses to ROM */
ws_cyc_seq[0x8][0] = ws_cyc_seq[0x9][0] = 1 + ws0_seq[(waitcnt >> 4) & 1];
ws_cyc_seq[0xA][0] = ws_cyc_seq[0xB][0] = 1 + ws1_seq[(waitcnt >> 7) & 1];
ws_cyc_seq[0xC][0] = ws_cyc_seq[0xD][0] = 1 + ws2_seq[(waitcnt >> 10) & 1];
for (i = 0x8; i <= 0xD; i++)
{
/* 32 bit accesses just cost double due to 16 bit bus */
ws_cyc_seq[i][1] = ws_cyc_seq[i][0] * 2;
}
/* Sequential 16 and 32 bit accesses to ROM */
ws_cyc_nseq[0x8][0] = ws_cyc_nseq[0x9][0] = 1 + ws012_nonseq[(waitcnt >> 2) & 3];
ws_cyc_nseq[0xA][0] = ws_cyc_nseq[0xB][0] = 1 + ws012_nonseq[(waitcnt >> 5) & 3];
ws_cyc_nseq[0xC][0] = ws_cyc_nseq[0xD][0] = 1 + ws012_nonseq[(waitcnt >> 8) & 3];
for (i = 0x8; i <= 0xD; i++)
{
/* 32 bit accesses are a non-seq (16) + seq access (16) */
ws_cyc_nseq[i][1] = 1 + ws_cyc_nseq[i][0] + ws_cyc_seq[i][0];
}
}
u8 read_backup(u32 address) u8 read_backup(u32 address)
{ {
u8 value = 0; u8 value = 0;
@ -866,8 +923,12 @@ cpu_alert_type function_cc write_io_register16(u32 address, u32 value)
case REG_VCOUNT: case REG_VCOUNT:
break; // Do nothing break; // Do nothing
// Registers without side effects
case REG_WAITCNT: case REG_WAITCNT:
write_ioreg(REG_WAITCNT, value);
reload_timing_info();
break;
// Registers without side effects
default: default:
write_ioreg(ioreg, value); write_ioreg(ioreg, value);
break; break;
@ -2251,8 +2312,8 @@ cpu_alert_type dma_transfer(unsigned dma_chan, int *usedcycles)
// This is an approximation for the most common case (no region cross) // This is an approximation for the most common case (no region cross)
if (usedcycles) if (usedcycles)
*usedcycles += dmach->length * ( *usedcycles += dmach->length * (
waitstate_cycles_sequential[src_ptr >> 24][tfsizes] + def_seq_cycles[src_ptr >> 24][tfsizes - 1] +
waitstate_cycles_sequential[dst_ptr >> 24][tfsizes]); def_seq_cycles[dst_ptr >> 24][tfsizes - 1]);
return ret; return ret;
} }
@ -2415,6 +2476,8 @@ void init_memory(void)
write_ioreg(REG_BG3PD, 0x100); write_ioreg(REG_BG3PD, 0x100);
write_ioreg(REG_RCNT, 0x8000); write_ioreg(REG_RCNT, 0x8000);
reload_timing_info();
backup_type = BACKUP_NONE; backup_type = BACKUP_NONE;
sram_bankcount = SRAM_SIZE_32KB; sram_bankcount = SRAM_SIZE_32KB;

View File

@ -200,7 +200,10 @@ void function_cc write_backup(u32 address, u32 value);
void function_cc write_rtc(u32 address, u32 value); void function_cc write_rtc(u32 address, u32 value);
/* EDIT: Shouldn't this be extern ?! */ /* EDIT: Shouldn't this be extern ?! */
extern const u32 waitstate_cycles_sequential[16][3]; extern const u32 def_seq_cycles[16][2];
/* Cycles can change depending on WAITCNT */
extern u8 ws_cyc_seq[16][2];
extern u8 ws_cyc_nseq[16][2];
extern u32 gamepak_size; extern u32 gamepak_size;
extern char gamepak_title[13]; extern char gamepak_title[13];

2
main.c
View File

@ -22,8 +22,6 @@
timer_type timer[4]; timer_type timer[4];
const u32 global_cycles_per_instruction = 1;
u32 cpu_ticks = 0; u32 cpu_ticks = 0;
u32 execute_cycles = 960; u32 execute_cycles = 960;

1
main.h
View File

@ -68,7 +68,6 @@ typedef enum
extern u32 cpu_ticks; extern u32 cpu_ticks;
extern u32 execute_cycles; extern u32 execute_cycles;
extern const u32 global_cycles_per_instruction;
extern u32 skip_next_frame; extern u32 skip_next_frame;
extern u32 flush_ram_count; extern u32 flush_ram_count;