[interp] Improve interpreter timings and honor WAITCNT

This fixes a few games and makes the interpreter faster (since it
doesn't run an overclocked CPU anymore).
This commit is contained in:
David Guillen Fandos 2023-06-07 19:40:27 +02:00
parent c9c88f3560
commit 84c347edad
6 changed files with 113 additions and 42 deletions

16
cpu.c
View File

@ -865,6 +865,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
{ \
/* Account for cycles and other stats */ \
u8 region = _address >> 24; \
cycles_remaining -= ws_cyc_nseq[region][(size - 8) / 16]; \
memory_region_access_read_##type[region]++; \
memory_reads_##type++; \
} \
@ -890,6 +891,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
if(_address < 0x10000000) \
{ \
u8 region = _address >> 24; \
cycles_remaining -= ws_cyc_nseq[region][(size - 8) / 16]; \
memory_region_access_write_##type[region]++; \
memory_writes_##type++; \
} \
@ -905,6 +907,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
{ \
/* Account for cycles and other stats */ \
u8 region = _address >> 24; \
cycles_remaining -= ws_cyc_seq[region][1]; \
memory_region_access_read_u32[region]++; \
memory_reads_u32++; \
} \
@ -925,6 +928,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
{ \
/* Account for cycles and other stats */ \
u8 region = _address >> 24; \
cycles_remaining -= ws_cyc_seq[region][1]; \
memory_region_access_write_u32[region]++; \
memory_writes_u32++; \
} \
@ -1404,6 +1408,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
{ \
thumb_pc_offset(2); \
} \
cycles_remaining -= ws_cyc_nseq[pc >> 24][0]; \
} \
// When a mode change occurs from non-FIQ to non-FIQ retire the current
@ -1530,7 +1535,6 @@ void execute_arm(u32 cycles)
u8 *pc_address_block = memory_map_read[pc_region];
u32 new_pc_region;
s32 cycles_remaining;
u32 cycles_per_instruction = global_cycles_per_instruction;
cpu_alert_type cpu_alert;
u32 old_pc;
@ -1564,7 +1568,6 @@ void execute_arm(u32 cycles)
arm_loop:
collapse_flags();
cycles_per_instruction = global_cycles_per_instruction;
/* Process cheats if we are about to execute the cheat hook */
if (pc == cheat_master_hook)
@ -2156,6 +2159,7 @@ arm_loop:
{
arm_pc_offset_update_direct(src);
}
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
}
else
{
@ -3084,6 +3088,7 @@ arm_loop:
/* B offset */
arm_decode_branch();
arm_pc_offset_update(offset + 8);
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
break;
}
@ -3093,6 +3098,7 @@ arm_loop:
arm_decode_branch();
reg[REG_LR] = pc + 4;
arm_pc_offset_update(offset + 8);
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
break;
}
@ -3130,7 +3136,7 @@ arm_loop:
skip_instruction:
/* End of Execute ARM instruction */
cycles_remaining -= cycles_per_instruction;
cycles_remaining -= ws_cyc_seq[(pc >> 24) & 0xF][1];
if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0;
@ -3622,6 +3628,7 @@ thumb_loop:
/* B label */
thumb_decode_branch();
thumb_pc_offset_update(((s32)(offset << 21) >> 20) + 4);
cycles_remaining -= ws_cyc_nseq[pc >> 24][0];
break;
}
@ -3642,12 +3649,13 @@ thumb_loop:
pc = reg[REG_LR] + (offset * 2);
reg[REG_LR] = lr;
reg[REG_PC] = pc;
cycles_remaining -= ws_cyc_nseq[pc >> 24][0];
break;
}
}
/* End of Execute THUMB instruction */
cycles_remaining -= cycles_per_instruction;
cycles_remaining -= ws_cyc_seq[(pc >> 24) & 0xF][0];
if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0;

View File

@ -2774,7 +2774,7 @@ u8 function_cc *block_lookup_address_thumb(u32 pc)
#define arm_instruction_width 4
#define arm_base_cycles() \
cycle_count += waitstate_cycles_sequential[pc >> 24][2] \
cycle_count += def_seq_cycles[pc >> 24][1] \
// For now this just sets a variable that says flags should always be
// computed.
@ -2843,7 +2843,7 @@ u8 function_cc *block_lookup_address_thumb(u32 pc)
#define thumb_instruction_width 2
#define thumb_base_cycles() \
cycle_count += waitstate_cycles_sequential[pc >> 24][1] \
cycle_count += def_seq_cycles[pc >> 24][0] \
// Here's how this works: each instruction has three different sets of flag
// attributes, each consisiting of a 4bit mask describing how that instruction

View File

@ -269,42 +269,71 @@ static void trigger_timer(u32 timer_number, u32 value)
write_ioreg(REG_TMXCNT(timer_number), value);
}
// This table is configured for sequential access on system defaults
/* Memory timings */
const u8 ws012_nonseq[] = {4, 3, 2, 8};
const u8 ws0_seq[] = {2, 1};
const u8 ws1_seq[] = {4, 1};
const u8 ws2_seq[] = {8, 1};
const u32 waitstate_cycles_sequential[16][3] =
/* Divided by region and bus width (16/32) */
u8 ws_cyc_seq[16][2] =
{
{ 1, 1, 1 }, // BIOS
{ 1, 1, 1 }, // Invalid
{ 3, 3, 6 }, // EWRAM (default settings)
{ 1, 1, 1 }, // IWRAM
{ 1, 1, 1 }, // IO Registers
{ 1, 1, 2 }, // Palette RAM
{ 1, 1, 2 }, // VRAM
{ 1, 1, 2 }, // OAM
{ 3, 3, 6 }, // Gamepak (wait 0)
{ 3, 3, 6 }, // Gamepak (wait 0)
{ 5, 5, 9 }, // Gamepak (wait 1)
{ 5, 5, 9 }, // Gamepak (wait 1)
{ 9, 9, 17 }, // Gamepak (wait 2)
{ 9, 9, 17 }, // Gamepak (wait 2)
{ 1, 1 }, // BIOS
{ 1, 1 }, // Invalid
{ 3, 6 }, // EWRAM (default settings)
{ 1, 1 }, // IWRAM
{ 1, 1 }, // IO Registers
{ 1, 2 }, // Palette RAM
{ 1, 2 }, // VRAM
{ 1, 2 }, // OAM
{ 0, 0 }, // Gamepak (wait 0)
{ 0, 0 }, // Gamepak (wait 0)
{ 0, 0 }, // Gamepak (wait 1)
{ 0, 0 }, // Gamepak (wait 1)
{ 0, 0 }, // Gamepak (wait 2)
{ 0, 0 }, // Gamepak (wait 2)
{ 1, 1 }, // Invalid
{ 1, 1 }, // Invalid
};
u8 ws_cyc_nseq[16][2] =
{
{ 1, 1 }, // BIOS
{ 1, 1 }, // Invalid
{ 3, 6 }, // EWRAM (default settings)
{ 1, 1 }, // IWRAM
{ 1, 1 }, // IO Registers
{ 1, 2 }, // Palette RAM
{ 1, 2 }, // VRAM
{ 1, 2 }, // OAM
{ 0, 0 }, // Gamepak (wait 0)
{ 0, 0 }, // Gamepak (wait 0)
{ 0, 0 }, // Gamepak (wait 1)
{ 0, 0 }, // Gamepak (wait 1)
{ 0, 0 }, // Gamepak (wait 2)
{ 0, 0 }, // Gamepak (wait 2)
{ 1, 1 }, // Invalid
{ 1, 1 }, // Invalid
};
// Different settings for gamepak ws0-2 sequential (2nd) access
const u32 gamepak_waitstate_sequential[2][3][3] =
const u32 def_seq_cycles[16][2] =
{
{
{ 3, 3, 6 },
{ 5, 5, 9 },
{ 9, 9, 17 }
},
{
{ 2, 2, 3 },
{ 2, 2, 3 },
{ 2, 2, 3 }
}
{ 1, 1 }, // BIOS
{ 1, 1 }, // Invalid
{ 3, 6 }, // EWRAM (default settings)
{ 1, 1 }, // IWRAM
{ 1, 1 }, // IO Registers
{ 1, 2 }, // Palette RAM
{ 1, 2 }, // VRAM
{ 1, 2 }, // OAM
{ 3, 6 }, // Gamepak (wait 0)
{ 3, 6 }, // Gamepak (wait 0)
{ 5, 9 }, // Gamepak (wait 1)
{ 5, 9 }, // Gamepak (wait 1)
{ 9, 17 }, // Gamepak (wait 2)
{ 9, 17 }, // Gamepak (wait 2)
};
u8 bios_rom[1024 * 16];
// Up to 128kb, store SRAM, flash ROM, or EEPROM here.
@ -364,6 +393,34 @@ u32 flash_bank_cnt;
u32 flash_device_id = FLASH_DEVICE_MACRONIX_64KB;
void reload_timing_info()
{
int i;
uint16_t waitcnt = read_ioreg(REG_WAITCNT);
/* Sequential 16 and 32 bit accesses to ROM */
ws_cyc_seq[0x8][0] = ws_cyc_seq[0x9][0] = 1 + ws0_seq[(waitcnt >> 4) & 1];
ws_cyc_seq[0xA][0] = ws_cyc_seq[0xB][0] = 1 + ws1_seq[(waitcnt >> 7) & 1];
ws_cyc_seq[0xC][0] = ws_cyc_seq[0xD][0] = 1 + ws2_seq[(waitcnt >> 10) & 1];
for (i = 0x8; i <= 0xD; i++)
{
/* 32 bit accesses just cost double due to 16 bit bus */
ws_cyc_seq[i][1] = ws_cyc_seq[i][0] * 2;
}
/* Sequential 16 and 32 bit accesses to ROM */
ws_cyc_nseq[0x8][0] = ws_cyc_nseq[0x9][0] = 1 + ws012_nonseq[(waitcnt >> 2) & 3];
ws_cyc_nseq[0xA][0] = ws_cyc_nseq[0xB][0] = 1 + ws012_nonseq[(waitcnt >> 5) & 3];
ws_cyc_nseq[0xC][0] = ws_cyc_nseq[0xD][0] = 1 + ws012_nonseq[(waitcnt >> 8) & 3];
for (i = 0x8; i <= 0xD; i++)
{
/* 32 bit accesses are a non-seq (16) + seq access (16) */
ws_cyc_nseq[i][1] = 1 + ws_cyc_nseq[i][0] + ws_cyc_seq[i][0];
}
}
u8 read_backup(u32 address)
{
u8 value = 0;
@ -866,8 +923,12 @@ cpu_alert_type function_cc write_io_register16(u32 address, u32 value)
case REG_VCOUNT:
break; // Do nothing
// Registers without side effects
case REG_WAITCNT:
write_ioreg(REG_WAITCNT, value);
reload_timing_info();
break;
// Registers without side effects
default:
write_ioreg(ioreg, value);
break;
@ -2251,8 +2312,8 @@ cpu_alert_type dma_transfer(unsigned dma_chan, int *usedcycles)
// This is an approximation for the most common case (no region cross)
if (usedcycles)
*usedcycles += dmach->length * (
waitstate_cycles_sequential[src_ptr >> 24][tfsizes] +
waitstate_cycles_sequential[dst_ptr >> 24][tfsizes]);
def_seq_cycles[src_ptr >> 24][tfsizes - 1] +
def_seq_cycles[dst_ptr >> 24][tfsizes - 1]);
return ret;
}
@ -2415,6 +2476,8 @@ void init_memory(void)
write_ioreg(REG_BG3PD, 0x100);
write_ioreg(REG_RCNT, 0x8000);
reload_timing_info();
backup_type = BACKUP_NONE;
sram_bankcount = SRAM_SIZE_32KB;

View File

@ -200,7 +200,10 @@ void function_cc write_backup(u32 address, u32 value);
void function_cc write_rtc(u32 address, u32 value);
/* EDIT: Shouldn't this be extern ?! */
extern const u32 waitstate_cycles_sequential[16][3];
extern const u32 def_seq_cycles[16][2];
/* Cycles can change depending on WAITCNT */
extern u8 ws_cyc_seq[16][2];
extern u8 ws_cyc_nseq[16][2];
extern u32 gamepak_size;
extern char gamepak_title[13];

2
main.c
View File

@ -22,8 +22,6 @@
timer_type timer[4];
const u32 global_cycles_per_instruction = 1;
u32 cpu_ticks = 0;
u32 execute_cycles = 960;

1
main.h
View File

@ -68,7 +68,6 @@ typedef enum
extern u32 cpu_ticks;
extern u32 execute_cycles;
extern const u32 global_cycles_per_instruction;
extern u32 skip_next_frame;
extern u32 flush_ram_count;