[interp] Improve interpreter timings and honor WAITCNT
This fixes a few games and makes the interpreter faster (since it doesn't run an overclocked CPU anymore).
This commit is contained in:
parent
c9c88f3560
commit
84c347edad
16
cpu.c
16
cpu.c
|
@ -865,6 +865,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
|
|||
{ \
|
||||
/* Account for cycles and other stats */ \
|
||||
u8 region = _address >> 24; \
|
||||
cycles_remaining -= ws_cyc_nseq[region][(size - 8) / 16]; \
|
||||
memory_region_access_read_##type[region]++; \
|
||||
memory_reads_##type++; \
|
||||
} \
|
||||
|
@ -890,6 +891,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
|
|||
if(_address < 0x10000000) \
|
||||
{ \
|
||||
u8 region = _address >> 24; \
|
||||
cycles_remaining -= ws_cyc_nseq[region][(size - 8) / 16]; \
|
||||
memory_region_access_write_##type[region]++; \
|
||||
memory_writes_##type++; \
|
||||
} \
|
||||
|
@ -905,6 +907,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
|
|||
{ \
|
||||
/* Account for cycles and other stats */ \
|
||||
u8 region = _address >> 24; \
|
||||
cycles_remaining -= ws_cyc_seq[region][1]; \
|
||||
memory_region_access_read_u32[region]++; \
|
||||
memory_reads_u32++; \
|
||||
} \
|
||||
|
@ -925,6 +928,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
|
|||
{ \
|
||||
/* Account for cycles and other stats */ \
|
||||
u8 region = _address >> 24; \
|
||||
cycles_remaining -= ws_cyc_seq[region][1]; \
|
||||
memory_region_access_write_u32[region]++; \
|
||||
memory_writes_u32++; \
|
||||
} \
|
||||
|
@ -1404,6 +1408,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF };
|
|||
{ \
|
||||
thumb_pc_offset(2); \
|
||||
} \
|
||||
cycles_remaining -= ws_cyc_nseq[pc >> 24][0]; \
|
||||
} \
|
||||
|
||||
// When a mode change occurs from non-FIQ to non-FIQ retire the current
|
||||
|
@ -1530,7 +1535,6 @@ void execute_arm(u32 cycles)
|
|||
u8 *pc_address_block = memory_map_read[pc_region];
|
||||
u32 new_pc_region;
|
||||
s32 cycles_remaining;
|
||||
u32 cycles_per_instruction = global_cycles_per_instruction;
|
||||
cpu_alert_type cpu_alert;
|
||||
|
||||
u32 old_pc;
|
||||
|
@ -1564,7 +1568,6 @@ void execute_arm(u32 cycles)
|
|||
arm_loop:
|
||||
|
||||
collapse_flags();
|
||||
cycles_per_instruction = global_cycles_per_instruction;
|
||||
|
||||
/* Process cheats if we are about to execute the cheat hook */
|
||||
if (pc == cheat_master_hook)
|
||||
|
@ -2156,6 +2159,7 @@ arm_loop:
|
|||
{
|
||||
arm_pc_offset_update_direct(src);
|
||||
}
|
||||
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -3084,6 +3088,7 @@ arm_loop:
|
|||
/* B offset */
|
||||
arm_decode_branch();
|
||||
arm_pc_offset_update(offset + 8);
|
||||
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3093,6 +3098,7 @@ arm_loop:
|
|||
arm_decode_branch();
|
||||
reg[REG_LR] = pc + 4;
|
||||
arm_pc_offset_update(offset + 8);
|
||||
cycles_remaining -= ws_cyc_nseq[pc >> 24][1];
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3130,7 +3136,7 @@ arm_loop:
|
|||
skip_instruction:
|
||||
|
||||
/* End of Execute ARM instruction */
|
||||
cycles_remaining -= cycles_per_instruction;
|
||||
cycles_remaining -= ws_cyc_seq[(pc >> 24) & 0xF][1];
|
||||
|
||||
if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0;
|
||||
|
||||
|
@ -3622,6 +3628,7 @@ thumb_loop:
|
|||
/* B label */
|
||||
thumb_decode_branch();
|
||||
thumb_pc_offset_update(((s32)(offset << 21) >> 20) + 4);
|
||||
cycles_remaining -= ws_cyc_nseq[pc >> 24][0];
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3642,12 +3649,13 @@ thumb_loop:
|
|||
pc = reg[REG_LR] + (offset * 2);
|
||||
reg[REG_LR] = lr;
|
||||
reg[REG_PC] = pc;
|
||||
cycles_remaining -= ws_cyc_nseq[pc >> 24][0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* End of Execute THUMB instruction */
|
||||
cycles_remaining -= cycles_per_instruction;
|
||||
cycles_remaining -= ws_cyc_seq[(pc >> 24) & 0xF][0];
|
||||
|
||||
if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0;
|
||||
|
||||
|
|
|
@ -2774,7 +2774,7 @@ u8 function_cc *block_lookup_address_thumb(u32 pc)
|
|||
#define arm_instruction_width 4
|
||||
|
||||
#define arm_base_cycles() \
|
||||
cycle_count += waitstate_cycles_sequential[pc >> 24][2] \
|
||||
cycle_count += def_seq_cycles[pc >> 24][1] \
|
||||
|
||||
// For now this just sets a variable that says flags should always be
|
||||
// computed.
|
||||
|
@ -2843,7 +2843,7 @@ u8 function_cc *block_lookup_address_thumb(u32 pc)
|
|||
#define thumb_instruction_width 2
|
||||
|
||||
#define thumb_base_cycles() \
|
||||
cycle_count += waitstate_cycles_sequential[pc >> 24][1] \
|
||||
cycle_count += def_seq_cycles[pc >> 24][0] \
|
||||
|
||||
// Here's how this works: each instruction has three different sets of flag
|
||||
// attributes, each consisiting of a 4bit mask describing how that instruction
|
||||
|
|
127
gba_memory.c
127
gba_memory.c
|
@ -269,42 +269,71 @@ static void trigger_timer(u32 timer_number, u32 value)
|
|||
write_ioreg(REG_TMXCNT(timer_number), value);
|
||||
}
|
||||
|
||||
// This table is configured for sequential access on system defaults
|
||||
/* Memory timings */
|
||||
const u8 ws012_nonseq[] = {4, 3, 2, 8};
|
||||
const u8 ws0_seq[] = {2, 1};
|
||||
const u8 ws1_seq[] = {4, 1};
|
||||
const u8 ws2_seq[] = {8, 1};
|
||||
|
||||
const u32 waitstate_cycles_sequential[16][3] =
|
||||
/* Divided by region and bus width (16/32) */
|
||||
u8 ws_cyc_seq[16][2] =
|
||||
{
|
||||
{ 1, 1, 1 }, // BIOS
|
||||
{ 1, 1, 1 }, // Invalid
|
||||
{ 3, 3, 6 }, // EWRAM (default settings)
|
||||
{ 1, 1, 1 }, // IWRAM
|
||||
{ 1, 1, 1 }, // IO Registers
|
||||
{ 1, 1, 2 }, // Palette RAM
|
||||
{ 1, 1, 2 }, // VRAM
|
||||
{ 1, 1, 2 }, // OAM
|
||||
{ 3, 3, 6 }, // Gamepak (wait 0)
|
||||
{ 3, 3, 6 }, // Gamepak (wait 0)
|
||||
{ 5, 5, 9 }, // Gamepak (wait 1)
|
||||
{ 5, 5, 9 }, // Gamepak (wait 1)
|
||||
{ 9, 9, 17 }, // Gamepak (wait 2)
|
||||
{ 9, 9, 17 }, // Gamepak (wait 2)
|
||||
{ 1, 1 }, // BIOS
|
||||
{ 1, 1 }, // Invalid
|
||||
{ 3, 6 }, // EWRAM (default settings)
|
||||
{ 1, 1 }, // IWRAM
|
||||
{ 1, 1 }, // IO Registers
|
||||
{ 1, 2 }, // Palette RAM
|
||||
{ 1, 2 }, // VRAM
|
||||
{ 1, 2 }, // OAM
|
||||
{ 0, 0 }, // Gamepak (wait 0)
|
||||
{ 0, 0 }, // Gamepak (wait 0)
|
||||
{ 0, 0 }, // Gamepak (wait 1)
|
||||
{ 0, 0 }, // Gamepak (wait 1)
|
||||
{ 0, 0 }, // Gamepak (wait 2)
|
||||
{ 0, 0 }, // Gamepak (wait 2)
|
||||
{ 1, 1 }, // Invalid
|
||||
{ 1, 1 }, // Invalid
|
||||
};
|
||||
u8 ws_cyc_nseq[16][2] =
|
||||
{
|
||||
{ 1, 1 }, // BIOS
|
||||
{ 1, 1 }, // Invalid
|
||||
{ 3, 6 }, // EWRAM (default settings)
|
||||
{ 1, 1 }, // IWRAM
|
||||
{ 1, 1 }, // IO Registers
|
||||
{ 1, 2 }, // Palette RAM
|
||||
{ 1, 2 }, // VRAM
|
||||
{ 1, 2 }, // OAM
|
||||
{ 0, 0 }, // Gamepak (wait 0)
|
||||
{ 0, 0 }, // Gamepak (wait 0)
|
||||
{ 0, 0 }, // Gamepak (wait 1)
|
||||
{ 0, 0 }, // Gamepak (wait 1)
|
||||
{ 0, 0 }, // Gamepak (wait 2)
|
||||
{ 0, 0 }, // Gamepak (wait 2)
|
||||
{ 1, 1 }, // Invalid
|
||||
{ 1, 1 }, // Invalid
|
||||
};
|
||||
|
||||
// Different settings for gamepak ws0-2 sequential (2nd) access
|
||||
|
||||
const u32 gamepak_waitstate_sequential[2][3][3] =
|
||||
const u32 def_seq_cycles[16][2] =
|
||||
{
|
||||
{
|
||||
{ 3, 3, 6 },
|
||||
{ 5, 5, 9 },
|
||||
{ 9, 9, 17 }
|
||||
},
|
||||
{
|
||||
{ 2, 2, 3 },
|
||||
{ 2, 2, 3 },
|
||||
{ 2, 2, 3 }
|
||||
}
|
||||
{ 1, 1 }, // BIOS
|
||||
{ 1, 1 }, // Invalid
|
||||
{ 3, 6 }, // EWRAM (default settings)
|
||||
{ 1, 1 }, // IWRAM
|
||||
{ 1, 1 }, // IO Registers
|
||||
{ 1, 2 }, // Palette RAM
|
||||
{ 1, 2 }, // VRAM
|
||||
{ 1, 2 }, // OAM
|
||||
{ 3, 6 }, // Gamepak (wait 0)
|
||||
{ 3, 6 }, // Gamepak (wait 0)
|
||||
{ 5, 9 }, // Gamepak (wait 1)
|
||||
{ 5, 9 }, // Gamepak (wait 1)
|
||||
{ 9, 17 }, // Gamepak (wait 2)
|
||||
{ 9, 17 }, // Gamepak (wait 2)
|
||||
};
|
||||
|
||||
|
||||
u8 bios_rom[1024 * 16];
|
||||
|
||||
// Up to 128kb, store SRAM, flash ROM, or EEPROM here.
|
||||
|
@ -364,6 +393,34 @@ u32 flash_bank_cnt;
|
|||
|
||||
u32 flash_device_id = FLASH_DEVICE_MACRONIX_64KB;
|
||||
|
||||
void reload_timing_info()
|
||||
{
|
||||
int i;
|
||||
uint16_t waitcnt = read_ioreg(REG_WAITCNT);
|
||||
|
||||
/* Sequential 16 and 32 bit accesses to ROM */
|
||||
ws_cyc_seq[0x8][0] = ws_cyc_seq[0x9][0] = 1 + ws0_seq[(waitcnt >> 4) & 1];
|
||||
ws_cyc_seq[0xA][0] = ws_cyc_seq[0xB][0] = 1 + ws1_seq[(waitcnt >> 7) & 1];
|
||||
ws_cyc_seq[0xC][0] = ws_cyc_seq[0xD][0] = 1 + ws2_seq[(waitcnt >> 10) & 1];
|
||||
|
||||
for (i = 0x8; i <= 0xD; i++)
|
||||
{
|
||||
/* 32 bit accesses just cost double due to 16 bit bus */
|
||||
ws_cyc_seq[i][1] = ws_cyc_seq[i][0] * 2;
|
||||
}
|
||||
|
||||
/* Sequential 16 and 32 bit accesses to ROM */
|
||||
ws_cyc_nseq[0x8][0] = ws_cyc_nseq[0x9][0] = 1 + ws012_nonseq[(waitcnt >> 2) & 3];
|
||||
ws_cyc_nseq[0xA][0] = ws_cyc_nseq[0xB][0] = 1 + ws012_nonseq[(waitcnt >> 5) & 3];
|
||||
ws_cyc_nseq[0xC][0] = ws_cyc_nseq[0xD][0] = 1 + ws012_nonseq[(waitcnt >> 8) & 3];
|
||||
|
||||
for (i = 0x8; i <= 0xD; i++)
|
||||
{
|
||||
/* 32 bit accesses are a non-seq (16) + seq access (16) */
|
||||
ws_cyc_nseq[i][1] = 1 + ws_cyc_nseq[i][0] + ws_cyc_seq[i][0];
|
||||
}
|
||||
}
|
||||
|
||||
u8 read_backup(u32 address)
|
||||
{
|
||||
u8 value = 0;
|
||||
|
@ -866,8 +923,12 @@ cpu_alert_type function_cc write_io_register16(u32 address, u32 value)
|
|||
case REG_VCOUNT:
|
||||
break; // Do nothing
|
||||
|
||||
// Registers without side effects
|
||||
case REG_WAITCNT:
|
||||
write_ioreg(REG_WAITCNT, value);
|
||||
reload_timing_info();
|
||||
break;
|
||||
|
||||
// Registers without side effects
|
||||
default:
|
||||
write_ioreg(ioreg, value);
|
||||
break;
|
||||
|
@ -2251,8 +2312,8 @@ cpu_alert_type dma_transfer(unsigned dma_chan, int *usedcycles)
|
|||
// This is an approximation for the most common case (no region cross)
|
||||
if (usedcycles)
|
||||
*usedcycles += dmach->length * (
|
||||
waitstate_cycles_sequential[src_ptr >> 24][tfsizes] +
|
||||
waitstate_cycles_sequential[dst_ptr >> 24][tfsizes]);
|
||||
def_seq_cycles[src_ptr >> 24][tfsizes - 1] +
|
||||
def_seq_cycles[dst_ptr >> 24][tfsizes - 1]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -2415,6 +2476,8 @@ void init_memory(void)
|
|||
write_ioreg(REG_BG3PD, 0x100);
|
||||
write_ioreg(REG_RCNT, 0x8000);
|
||||
|
||||
reload_timing_info();
|
||||
|
||||
backup_type = BACKUP_NONE;
|
||||
|
||||
sram_bankcount = SRAM_SIZE_32KB;
|
||||
|
|
|
@ -200,7 +200,10 @@ void function_cc write_backup(u32 address, u32 value);
|
|||
void function_cc write_rtc(u32 address, u32 value);
|
||||
|
||||
/* EDIT: Shouldn't this be extern ?! */
|
||||
extern const u32 waitstate_cycles_sequential[16][3];
|
||||
extern const u32 def_seq_cycles[16][2];
|
||||
/* Cycles can change depending on WAITCNT */
|
||||
extern u8 ws_cyc_seq[16][2];
|
||||
extern u8 ws_cyc_nseq[16][2];
|
||||
|
||||
extern u32 gamepak_size;
|
||||
extern char gamepak_title[13];
|
||||
|
|
2
main.c
2
main.c
|
@ -22,8 +22,6 @@
|
|||
|
||||
timer_type timer[4];
|
||||
|
||||
const u32 global_cycles_per_instruction = 1;
|
||||
|
||||
u32 cpu_ticks = 0;
|
||||
|
||||
u32 execute_cycles = 960;
|
||||
|
|
Loading…
Reference in New Issue