[interp] Improve interpreter timings and honor WAITCNT
This fixes a few games and makes the interpreter faster (since it doesn't run an overclocked CPU anymore).
This commit is contained in:
parent
c9c88f3560
commit
84c347edad
16
cpu.c
16
cpu.c
|
@ -865,6 +865,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
|
||||||
{ \
|
{ \
|
||||||
/* Account for cycles and other stats */ \
|
/* Account for cycles and other stats */ \
|
||||||
u8 region = _address >> 24; \
|
u8 region = _address >> 24; \
|
||||||
|
cycles_remaining -= ws_cyc_nseq[region][(size - 8) / 16]; \
|
||||||
memory_region_access_read_##type[region]++; \
|
memory_region_access_read_##type[region]++; \
|
||||||
memory_reads_##type++; \
|
memory_reads_##type++; \
|
||||||
} \
|
} \
|
||||||
|
@ -890,6 +891,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
|
||||||
if(_address < 0x10000000) \
|
if(_address < 0x10000000) \
|
||||||
{ \
|
{ \
|
||||||
u8 region = _address >> 24; \
|
u8 region = _address >> 24; \
|
||||||
|
cycles_remaining -= ws_cyc_nseq[region][(size - 8) / 16]; \
|
||||||
memory_region_access_write_##type[region]++; \
|
memory_region_access_write_##type[region]++; \
|
||||||
memory_writes_##type++; \
|
memory_writes_##type++; \
|
||||||
} \
|
} \
|
||||||
|
@ -905,6 +907,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
|
||||||
{ \
|
{ \
|
||||||
/* Account for cycles and other stats */ \
|
/* Account for cycles and other stats */ \
|
||||||
u8 region = _address >> 24; \
|
u8 region = _address >> 24; \
|
||||||
|
cycles_remaining -= ws_cyc_seq[region][1]; \
|
||||||
memory_region_access_read_u32[region]++; \
|
memory_region_access_read_u32[region]++; \
|
||||||
memory_reads_u32++; \
|
memory_reads_u32++; \
|
||||||
} \
|
} \
|
||||||
|
@ -925,6 +928,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
|
||||||
{ \
|
{ \
|
||||||
/* Account for cycles and other stats */ \
|
/* Account for cycles and other stats */ \
|
||||||
u8 region = _address >> 24; \
|
u8 region = _address >> 24; \
|
||||||
|
cycles_remaining -= ws_cyc_seq[region][1]; \
|
||||||
memory_region_access_write_u32[region]++; \
|
memory_region_access_write_u32[region]++; \
|
||||||
memory_writes_u32++; \
|
memory_writes_u32++; \
|
||||||
} \
|
} \
|
||||||
|
@ -1404,6 +1408,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
|
||||||
{ \
|
{ \
|
||||||
thumb_pc_offset(2); \
|
thumb_pc_offset(2); \
|
||||||
} \
|
} \
|
||||||
|
cycles_remaining -= ws_cyc_nseq[pc >> 24][0]; \
|
||||||
} \
|
} \
|
||||||
|
|
||||||
// When a mode change occurs from non-FIQ to non-FIQ retire the current
|
// When a mode change occurs from non-FIQ to non-FIQ retire the current
|
||||||
|
@ -1530,7 +1535,6 @@ void execute_arm(u32 cycles)
|
||||||
u8 *pc_address_block = memory_map_read[pc_region];
|
u8 *pc_address_block = memory_map_read[pc_region];
|
||||||
u32 new_pc_region;
|
u32 new_pc_region;
|
||||||
s32 cycles_remaining;
|
s32 cycles_remaining;
|
||||||
u32 cycles_per_instruction = global_cycles_per_instruction;
|
|
||||||
cpu_alert_type cpu_alert;
|
cpu_alert_type cpu_alert;
|
||||||
|
|
||||||
u32 old_pc;
|
u32 old_pc;
|
||||||
|
@ -1564,7 +1568,6 @@ void execute_arm(u32 cycles)
|
||||||
arm_loop:
|
arm_loop:
|
||||||
|
|
||||||
collapse_flags();
|
collapse_flags();
|
||||||
cycles_per_instruction = global_cycles_per_instruction;
|
|
||||||
|
|
||||||
/* Process cheats if we are about to execute the cheat hook */
|
/* Process cheats if we are about to execute the cheat hook */
|
||||||
if (pc == cheat_master_hook)
|
if (pc == cheat_master_hook)
|
||||||
|
@ -2156,6 +2159,7 @@ arm_loop:
|
||||||
{
|
{
|
||||||
arm_pc_offset_update_direct(src);
|
arm_pc_offset_update_direct(src);
|
||||||
}
|
}
|
||||||
|
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -3084,6 +3088,7 @@ arm_loop:
|
||||||
/* B offset */
|
/* B offset */
|
||||||
arm_decode_branch();
|
arm_decode_branch();
|
||||||
arm_pc_offset_update(offset + 8);
|
arm_pc_offset_update(offset + 8);
|
||||||
|
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3093,6 +3098,7 @@ arm_loop:
|
||||||
arm_decode_branch();
|
arm_decode_branch();
|
||||||
reg[REG_LR] = pc + 4;
|
reg[REG_LR] = pc + 4;
|
||||||
arm_pc_offset_update(offset + 8);
|
arm_pc_offset_update(offset + 8);
|
||||||
|
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3130,7 +3136,7 @@ arm_loop:
|
||||||
skip_instruction:
|
skip_instruction:
|
||||||
|
|
||||||
/* End of Execute ARM instruction */
|
/* End of Execute ARM instruction */
|
||||||
cycles_remaining -= cycles_per_instruction;
|
cycles_remaining -= ws_cyc_seq[(pc >> 24) & 0xF][1];
|
||||||
|
|
||||||
if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0;
|
if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0;
|
||||||
|
|
||||||
|
@ -3622,6 +3628,7 @@ thumb_loop:
|
||||||
/* B label */
|
/* B label */
|
||||||
thumb_decode_branch();
|
thumb_decode_branch();
|
||||||
thumb_pc_offset_update(((s32)(offset << 21) >> 20) + 4);
|
thumb_pc_offset_update(((s32)(offset << 21) >> 20) + 4);
|
||||||
|
cycles_remaining -= ws_cyc_nseq[pc >> 24][0];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3642,12 +3649,13 @@ thumb_loop:
|
||||||
pc = reg[REG_LR] + (offset * 2);
|
pc = reg[REG_LR] + (offset * 2);
|
||||||
reg[REG_LR] = lr;
|
reg[REG_LR] = lr;
|
||||||
reg[REG_PC] = pc;
|
reg[REG_PC] = pc;
|
||||||
|
cycles_remaining -= ws_cyc_nseq[pc >> 24][0];
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* End of Execute THUMB instruction */
|
/* End of Execute THUMB instruction */
|
||||||
cycles_remaining -= cycles_per_instruction;
|
cycles_remaining -= ws_cyc_seq[(pc >> 24) & 0xF][0];
|
||||||
|
|
||||||
if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0;
|
if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0;
|
||||||
|
|
||||||
|
|
|
@ -2774,7 +2774,7 @@ u8 function_cc *block_lookup_address_thumb(u32 pc)
|
||||||
#define arm_instruction_width 4
|
#define arm_instruction_width 4
|
||||||
|
|
||||||
#define arm_base_cycles() \
|
#define arm_base_cycles() \
|
||||||
cycle_count += waitstate_cycles_sequential[pc >> 24][2] \
|
cycle_count += def_seq_cycles[pc >> 24][1] \
|
||||||
|
|
||||||
// For now this just sets a variable that says flags should always be
|
// For now this just sets a variable that says flags should always be
|
||||||
// computed.
|
// computed.
|
||||||
|
@ -2843,7 +2843,7 @@ u8 function_cc *block_lookup_address_thumb(u32 pc)
|
||||||
#define thumb_instruction_width 2
|
#define thumb_instruction_width 2
|
||||||
|
|
||||||
#define thumb_base_cycles() \
|
#define thumb_base_cycles() \
|
||||||
cycle_count += waitstate_cycles_sequential[pc >> 24][1] \
|
cycle_count += def_seq_cycles[pc >> 24][0] \
|
||||||
|
|
||||||
// Here's how this works: each instruction has three different sets of flag
|
// Here's how this works: each instruction has three different sets of flag
|
||||||
// attributes, each consisiting of a 4bit mask describing how that instruction
|
// attributes, each consisiting of a 4bit mask describing how that instruction
|
||||||
|
|
127
gba_memory.c
127
gba_memory.c
|
@ -269,42 +269,71 @@ static void trigger_timer(u32 timer_number, u32 value)
|
||||||
write_ioreg(REG_TMXCNT(timer_number), value);
|
write_ioreg(REG_TMXCNT(timer_number), value);
|
||||||
}
|
}
|
||||||
|
|
||||||
// This table is configured for sequential access on system defaults
|
/* Memory timings */
|
||||||
|
const u8 ws012_nonseq[] = {4, 3, 2, 8};
|
||||||
|
const u8 ws0_seq[] = {2, 1};
|
||||||
|
const u8 ws1_seq[] = {4, 1};
|
||||||
|
const u8 ws2_seq[] = {8, 1};
|
||||||
|
|
||||||
const u32 waitstate_cycles_sequential[16][3] =
|
/* Divided by region and bus width (16/32) */
|
||||||
|
u8 ws_cyc_seq[16][2] =
|
||||||
{
|
{
|
||||||
{ 1, 1, 1 }, // BIOS
|
{ 1, 1 }, // BIOS
|
||||||
{ 1, 1, 1 }, // Invalid
|
{ 1, 1 }, // Invalid
|
||||||
{ 3, 3, 6 }, // EWRAM (default settings)
|
{ 3, 6 }, // EWRAM (default settings)
|
||||||
{ 1, 1, 1 }, // IWRAM
|
{ 1, 1 }, // IWRAM
|
||||||
{ 1, 1, 1 }, // IO Registers
|
{ 1, 1 }, // IO Registers
|
||||||
{ 1, 1, 2 }, // Palette RAM
|
{ 1, 2 }, // Palette RAM
|
||||||
{ 1, 1, 2 }, // VRAM
|
{ 1, 2 }, // VRAM
|
||||||
{ 1, 1, 2 }, // OAM
|
{ 1, 2 }, // OAM
|
||||||
{ 3, 3, 6 }, // Gamepak (wait 0)
|
{ 0, 0 }, // Gamepak (wait 0)
|
||||||
{ 3, 3, 6 }, // Gamepak (wait 0)
|
{ 0, 0 }, // Gamepak (wait 0)
|
||||||
{ 5, 5, 9 }, // Gamepak (wait 1)
|
{ 0, 0 }, // Gamepak (wait 1)
|
||||||
{ 5, 5, 9 }, // Gamepak (wait 1)
|
{ 0, 0 }, // Gamepak (wait 1)
|
||||||
{ 9, 9, 17 }, // Gamepak (wait 2)
|
{ 0, 0 }, // Gamepak (wait 2)
|
||||||
{ 9, 9, 17 }, // Gamepak (wait 2)
|
{ 0, 0 }, // Gamepak (wait 2)
|
||||||
|
{ 1, 1 }, // Invalid
|
||||||
|
{ 1, 1 }, // Invalid
|
||||||
|
};
|
||||||
|
u8 ws_cyc_nseq[16][2] =
|
||||||
|
{
|
||||||
|
{ 1, 1 }, // BIOS
|
||||||
|
{ 1, 1 }, // Invalid
|
||||||
|
{ 3, 6 }, // EWRAM (default settings)
|
||||||
|
{ 1, 1 }, // IWRAM
|
||||||
|
{ 1, 1 }, // IO Registers
|
||||||
|
{ 1, 2 }, // Palette RAM
|
||||||
|
{ 1, 2 }, // VRAM
|
||||||
|
{ 1, 2 }, // OAM
|
||||||
|
{ 0, 0 }, // Gamepak (wait 0)
|
||||||
|
{ 0, 0 }, // Gamepak (wait 0)
|
||||||
|
{ 0, 0 }, // Gamepak (wait 1)
|
||||||
|
{ 0, 0 }, // Gamepak (wait 1)
|
||||||
|
{ 0, 0 }, // Gamepak (wait 2)
|
||||||
|
{ 0, 0 }, // Gamepak (wait 2)
|
||||||
|
{ 1, 1 }, // Invalid
|
||||||
|
{ 1, 1 }, // Invalid
|
||||||
};
|
};
|
||||||
|
|
||||||
// Different settings for gamepak ws0-2 sequential (2nd) access
|
const u32 def_seq_cycles[16][2] =
|
||||||
|
|
||||||
const u32 gamepak_waitstate_sequential[2][3][3] =
|
|
||||||
{
|
{
|
||||||
{
|
{ 1, 1 }, // BIOS
|
||||||
{ 3, 3, 6 },
|
{ 1, 1 }, // Invalid
|
||||||
{ 5, 5, 9 },
|
{ 3, 6 }, // EWRAM (default settings)
|
||||||
{ 9, 9, 17 }
|
{ 1, 1 }, // IWRAM
|
||||||
},
|
{ 1, 1 }, // IO Registers
|
||||||
{
|
{ 1, 2 }, // Palette RAM
|
||||||
{ 2, 2, 3 },
|
{ 1, 2 }, // VRAM
|
||||||
{ 2, 2, 3 },
|
{ 1, 2 }, // OAM
|
||||||
{ 2, 2, 3 }
|
{ 3, 6 }, // Gamepak (wait 0)
|
||||||
}
|
{ 3, 6 }, // Gamepak (wait 0)
|
||||||
|
{ 5, 9 }, // Gamepak (wait 1)
|
||||||
|
{ 5, 9 }, // Gamepak (wait 1)
|
||||||
|
{ 9, 17 }, // Gamepak (wait 2)
|
||||||
|
{ 9, 17 }, // Gamepak (wait 2)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
u8 bios_rom[1024 * 16];
|
u8 bios_rom[1024 * 16];
|
||||||
|
|
||||||
// Up to 128kb, store SRAM, flash ROM, or EEPROM here.
|
// Up to 128kb, store SRAM, flash ROM, or EEPROM here.
|
||||||
|
@ -364,6 +393,34 @@ u32 flash_bank_cnt;
|
||||||
|
|
||||||
u32 flash_device_id = FLASH_DEVICE_MACRONIX_64KB;
|
u32 flash_device_id = FLASH_DEVICE_MACRONIX_64KB;
|
||||||
|
|
||||||
|
void reload_timing_info()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
uint16_t waitcnt = read_ioreg(REG_WAITCNT);
|
||||||
|
|
||||||
|
/* Sequential 16 and 32 bit accesses to ROM */
|
||||||
|
ws_cyc_seq[0x8][0] = ws_cyc_seq[0x9][0] = 1 + ws0_seq[(waitcnt >> 4) & 1];
|
||||||
|
ws_cyc_seq[0xA][0] = ws_cyc_seq[0xB][0] = 1 + ws1_seq[(waitcnt >> 7) & 1];
|
||||||
|
ws_cyc_seq[0xC][0] = ws_cyc_seq[0xD][0] = 1 + ws2_seq[(waitcnt >> 10) & 1];
|
||||||
|
|
||||||
|
for (i = 0x8; i <= 0xD; i++)
|
||||||
|
{
|
||||||
|
/* 32 bit accesses just cost double due to 16 bit bus */
|
||||||
|
ws_cyc_seq[i][1] = ws_cyc_seq[i][0] * 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Sequential 16 and 32 bit accesses to ROM */
|
||||||
|
ws_cyc_nseq[0x8][0] = ws_cyc_nseq[0x9][0] = 1 + ws012_nonseq[(waitcnt >> 2) & 3];
|
||||||
|
ws_cyc_nseq[0xA][0] = ws_cyc_nseq[0xB][0] = 1 + ws012_nonseq[(waitcnt >> 5) & 3];
|
||||||
|
ws_cyc_nseq[0xC][0] = ws_cyc_nseq[0xD][0] = 1 + ws012_nonseq[(waitcnt >> 8) & 3];
|
||||||
|
|
||||||
|
for (i = 0x8; i <= 0xD; i++)
|
||||||
|
{
|
||||||
|
/* 32 bit accesses are a non-seq (16) + seq access (16) */
|
||||||
|
ws_cyc_nseq[i][1] = 1 + ws_cyc_nseq[i][0] + ws_cyc_seq[i][0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
u8 read_backup(u32 address)
|
u8 read_backup(u32 address)
|
||||||
{
|
{
|
||||||
u8 value = 0;
|
u8 value = 0;
|
||||||
|
@ -866,8 +923,12 @@ cpu_alert_type function_cc write_io_register16(u32 address, u32 value)
|
||||||
case REG_VCOUNT:
|
case REG_VCOUNT:
|
||||||
break; // Do nothing
|
break; // Do nothing
|
||||||
|
|
||||||
// Registers without side effects
|
|
||||||
case REG_WAITCNT:
|
case REG_WAITCNT:
|
||||||
|
write_ioreg(REG_WAITCNT, value);
|
||||||
|
reload_timing_info();
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Registers without side effects
|
||||||
default:
|
default:
|
||||||
write_ioreg(ioreg, value);
|
write_ioreg(ioreg, value);
|
||||||
break;
|
break;
|
||||||
|
@ -2251,8 +2312,8 @@ cpu_alert_type dma_transfer(unsigned dma_chan, int *usedcycles)
|
||||||
// This is an approximation for the most common case (no region cross)
|
// This is an approximation for the most common case (no region cross)
|
||||||
if (usedcycles)
|
if (usedcycles)
|
||||||
*usedcycles += dmach->length * (
|
*usedcycles += dmach->length * (
|
||||||
waitstate_cycles_sequential[src_ptr >> 24][tfsizes] +
|
def_seq_cycles[src_ptr >> 24][tfsizes - 1] +
|
||||||
waitstate_cycles_sequential[dst_ptr >> 24][tfsizes]);
|
def_seq_cycles[dst_ptr >> 24][tfsizes - 1]);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -2415,6 +2476,8 @@ void init_memory(void)
|
||||||
write_ioreg(REG_BG3PD, 0x100);
|
write_ioreg(REG_BG3PD, 0x100);
|
||||||
write_ioreg(REG_RCNT, 0x8000);
|
write_ioreg(REG_RCNT, 0x8000);
|
||||||
|
|
||||||
|
reload_timing_info();
|
||||||
|
|
||||||
backup_type = BACKUP_NONE;
|
backup_type = BACKUP_NONE;
|
||||||
|
|
||||||
sram_bankcount = SRAM_SIZE_32KB;
|
sram_bankcount = SRAM_SIZE_32KB;
|
||||||
|
|
|
@ -200,7 +200,10 @@ void function_cc write_backup(u32 address, u32 value);
|
||||||
void function_cc write_rtc(u32 address, u32 value);
|
void function_cc write_rtc(u32 address, u32 value);
|
||||||
|
|
||||||
/* EDIT: Shouldn't this be extern ?! */
|
/* EDIT: Shouldn't this be extern ?! */
|
||||||
extern const u32 waitstate_cycles_sequential[16][3];
|
extern const u32 def_seq_cycles[16][2];
|
||||||
|
/* Cycles can change depending on WAITCNT */
|
||||||
|
extern u8 ws_cyc_seq[16][2];
|
||||||
|
extern u8 ws_cyc_nseq[16][2];
|
||||||
|
|
||||||
extern u32 gamepak_size;
|
extern u32 gamepak_size;
|
||||||
extern char gamepak_title[13];
|
extern char gamepak_title[13];
|
||||||
|
|
2
main.c
2
main.c
|
@ -22,8 +22,6 @@
|
||||||
|
|
||||||
timer_type timer[4];
|
timer_type timer[4];
|
||||||
|
|
||||||
const u32 global_cycles_per_instruction = 1;
|
|
||||||
|
|
||||||
u32 cpu_ticks = 0;
|
u32 cpu_ticks = 0;
|
||||||
|
|
||||||
u32 execute_cycles = 960;
|
u32 execute_cycles = 960;
|
||||||
|
|
1
main.h
1
main.h
|
@ -68,7 +68,6 @@ typedef enum
|
||||||
|
|
||||||
extern u32 cpu_ticks;
|
extern u32 cpu_ticks;
|
||||||
extern u32 execute_cycles;
|
extern u32 execute_cycles;
|
||||||
extern const u32 global_cycles_per_instruction;
|
|
||||||
extern u32 skip_next_frame;
|
extern u32 skip_next_frame;
|
||||||
|
|
||||||
extern u32 flush_ram_count;
|
extern u32 flush_ram_count;
|
||||||
|
|
Loading…
Reference in New Issue