From 84c347edadcba216488a2939a1867168b31498d4 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Wed, 7 Jun 2023 19:40:27 +0200 Subject: [PATCH] [interp] Improve interpreter timings and honor WAITCNT This fixes a few games and makes the interpreter faster (since it doesn't run an overclocked CPU anymore). --- cpu.c | 16 +++++-- cpu_threaded.c | 4 +- gba_memory.c | 127 ++++++++++++++++++++++++++++++++++++------------- gba_memory.h | 5 +- main.c | 2 - main.h | 1 - 6 files changed, 113 insertions(+), 42 deletions(-) diff --git a/cpu.c b/cpu.c index b3978f7..de79b79 100644 --- a/cpu.c +++ b/cpu.c @@ -865,6 +865,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF }; { \ /* Account for cycles and other stats */ \ u8 region = _address >> 24; \ + cycles_remaining -= ws_cyc_nseq[region][(size - 8) / 16]; \ memory_region_access_read_##type[region]++; \ memory_reads_##type++; \ } \ @@ -890,6 +891,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF }; if(_address < 0x10000000) \ { \ u8 region = _address >> 24; \ + cycles_remaining -= ws_cyc_nseq[region][(size - 8) / 16]; \ memory_region_access_write_##type[region]++; \ memory_writes_##type++; \ } \ @@ -905,6 +907,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF }; { \ /* Account for cycles and other stats */ \ u8 region = _address >> 24; \ + cycles_remaining -= ws_cyc_seq[region][1]; \ memory_region_access_read_u32[region]++; \ memory_reads_u32++; \ } \ @@ -925,6 +928,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF }; { \ /* Account for cycles and other stats */ \ u8 region = _address >> 24; \ + cycles_remaining -= ws_cyc_seq[region][1]; \ memory_region_access_write_u32[region]++; \ memory_writes_u32++; \ } \ @@ -1404,6 +1408,7 @@ const u32 spsr_masks[4] = { 0x00000000, 0x000000EF, 0xF0000000, 0xF00000EF }; { \ thumb_pc_offset(2); \ } \ + cycles_remaining -= ws_cyc_nseq[pc >> 24][0]; \ } \ // When a mode change occurs from non-FIQ to non-FIQ retire the current @@ -1530,7 +1535,6 @@ void execute_arm(u32 cycles) u8 *pc_address_block = memory_map_read[pc_region]; u32 new_pc_region; s32 cycles_remaining; - u32 cycles_per_instruction = global_cycles_per_instruction; cpu_alert_type cpu_alert; u32 old_pc; @@ -1564,7 +1568,6 @@ void execute_arm(u32 cycles) arm_loop: collapse_flags(); - cycles_per_instruction = global_cycles_per_instruction; /* Process cheats if we are about to execute the cheat hook */ if (pc == cheat_master_hook) @@ -2156,6 +2159,7 @@ arm_loop: { arm_pc_offset_update_direct(src); } + cycles_remaining -= ws_cyc_nseq[pc >> 24][1]; } else { @@ -3084,6 +3088,7 @@ arm_loop: /* B offset */ arm_decode_branch(); arm_pc_offset_update(offset + 8); + cycles_remaining -= ws_cyc_nseq[pc >> 24][1]; break; } @@ -3093,6 +3098,7 @@ arm_loop: arm_decode_branch(); reg[REG_LR] = pc + 4; arm_pc_offset_update(offset + 8); + cycles_remaining -= ws_cyc_nseq[pc >> 24][1]; break; } @@ -3130,7 +3136,7 @@ arm_loop: skip_instruction: /* End of Execute ARM instruction */ - cycles_remaining -= cycles_per_instruction; + cycles_remaining -= ws_cyc_seq[(pc >> 24) & 0xF][1]; if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0; @@ -3622,6 +3628,7 @@ thumb_loop: /* B label */ thumb_decode_branch(); thumb_pc_offset_update(((s32)(offset << 21) >> 20) + 4); + cycles_remaining -= ws_cyc_nseq[pc >> 24][0]; break; } @@ -3642,12 +3649,13 @@ thumb_loop: pc = reg[REG_LR] + (offset * 2); reg[REG_LR] = lr; reg[REG_PC] = pc; + cycles_remaining -= ws_cyc_nseq[pc >> 24][0]; break; } } /* End of Execute THUMB instruction */ - cycles_remaining -= cycles_per_instruction; + cycles_remaining -= ws_cyc_seq[(pc >> 24) & 0xF][0]; if (pc == idle_loop_target_pc && cycles_remaining > 0) cycles_remaining = 0; diff --git a/cpu_threaded.c b/cpu_threaded.c index c2dc35f..bbca755 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -2774,7 +2774,7 @@ u8 function_cc *block_lookup_address_thumb(u32 pc) #define arm_instruction_width 4 #define arm_base_cycles() \ - cycle_count += waitstate_cycles_sequential[pc >> 24][2] \ + cycle_count += def_seq_cycles[pc >> 24][1] \ // For now this just sets a variable that says flags should always be // computed. @@ -2843,7 +2843,7 @@ u8 function_cc *block_lookup_address_thumb(u32 pc) #define thumb_instruction_width 2 #define thumb_base_cycles() \ - cycle_count += waitstate_cycles_sequential[pc >> 24][1] \ + cycle_count += def_seq_cycles[pc >> 24][0] \ // Here's how this works: each instruction has three different sets of flag // attributes, each consisiting of a 4bit mask describing how that instruction diff --git a/gba_memory.c b/gba_memory.c index 389e744..d966a46 100644 --- a/gba_memory.c +++ b/gba_memory.c @@ -269,42 +269,71 @@ static void trigger_timer(u32 timer_number, u32 value) write_ioreg(REG_TMXCNT(timer_number), value); } -// This table is configured for sequential access on system defaults +/* Memory timings */ +const u8 ws012_nonseq[] = {4, 3, 2, 8}; +const u8 ws0_seq[] = {2, 1}; +const u8 ws1_seq[] = {4, 1}; +const u8 ws2_seq[] = {8, 1}; -const u32 waitstate_cycles_sequential[16][3] = +/* Divided by region and bus width (16/32) */ +u8 ws_cyc_seq[16][2] = { - { 1, 1, 1 }, // BIOS - { 1, 1, 1 }, // Invalid - { 3, 3, 6 }, // EWRAM (default settings) - { 1, 1, 1 }, // IWRAM - { 1, 1, 1 }, // IO Registers - { 1, 1, 2 }, // Palette RAM - { 1, 1, 2 }, // VRAM - { 1, 1, 2 }, // OAM - { 3, 3, 6 }, // Gamepak (wait 0) - { 3, 3, 6 }, // Gamepak (wait 0) - { 5, 5, 9 }, // Gamepak (wait 1) - { 5, 5, 9 }, // Gamepak (wait 1) - { 9, 9, 17 }, // Gamepak (wait 2) - { 9, 9, 17 }, // Gamepak (wait 2) + { 1, 1 }, // BIOS + { 1, 1 }, // Invalid + { 3, 6 }, // EWRAM (default settings) + { 1, 1 }, // IWRAM + { 1, 1 }, // IO Registers + { 1, 2 }, // Palette RAM + { 1, 2 }, // VRAM + { 1, 2 }, // OAM + { 0, 0 }, // Gamepak (wait 0) + { 0, 0 }, // Gamepak (wait 0) + { 0, 0 }, // Gamepak (wait 1) + { 0, 0 }, // Gamepak (wait 1) + { 0, 0 }, // Gamepak (wait 2) + { 0, 0 }, // Gamepak (wait 2) + { 1, 1 }, // Invalid + { 1, 1 }, // Invalid +}; +u8 ws_cyc_nseq[16][2] = +{ + { 1, 1 }, // BIOS + { 1, 1 }, // Invalid + { 3, 6 }, // EWRAM (default settings) + { 1, 1 }, // IWRAM + { 1, 1 }, // IO Registers + { 1, 2 }, // Palette RAM + { 1, 2 }, // VRAM + { 1, 2 }, // OAM + { 0, 0 }, // Gamepak (wait 0) + { 0, 0 }, // Gamepak (wait 0) + { 0, 0 }, // Gamepak (wait 1) + { 0, 0 }, // Gamepak (wait 1) + { 0, 0 }, // Gamepak (wait 2) + { 0, 0 }, // Gamepak (wait 2) + { 1, 1 }, // Invalid + { 1, 1 }, // Invalid }; -// Different settings for gamepak ws0-2 sequential (2nd) access - -const u32 gamepak_waitstate_sequential[2][3][3] = +const u32 def_seq_cycles[16][2] = { - { - { 3, 3, 6 }, - { 5, 5, 9 }, - { 9, 9, 17 } - }, - { - { 2, 2, 3 }, - { 2, 2, 3 }, - { 2, 2, 3 } - } + { 1, 1 }, // BIOS + { 1, 1 }, // Invalid + { 3, 6 }, // EWRAM (default settings) + { 1, 1 }, // IWRAM + { 1, 1 }, // IO Registers + { 1, 2 }, // Palette RAM + { 1, 2 }, // VRAM + { 1, 2 }, // OAM + { 3, 6 }, // Gamepak (wait 0) + { 3, 6 }, // Gamepak (wait 0) + { 5, 9 }, // Gamepak (wait 1) + { 5, 9 }, // Gamepak (wait 1) + { 9, 17 }, // Gamepak (wait 2) + { 9, 17 }, // Gamepak (wait 2) }; + u8 bios_rom[1024 * 16]; // Up to 128kb, store SRAM, flash ROM, or EEPROM here. @@ -364,6 +393,34 @@ u32 flash_bank_cnt; u32 flash_device_id = FLASH_DEVICE_MACRONIX_64KB; +void reload_timing_info() +{ + int i; + uint16_t waitcnt = read_ioreg(REG_WAITCNT); + + /* Sequential 16 and 32 bit accesses to ROM */ + ws_cyc_seq[0x8][0] = ws_cyc_seq[0x9][0] = 1 + ws0_seq[(waitcnt >> 4) & 1]; + ws_cyc_seq[0xA][0] = ws_cyc_seq[0xB][0] = 1 + ws1_seq[(waitcnt >> 7) & 1]; + ws_cyc_seq[0xC][0] = ws_cyc_seq[0xD][0] = 1 + ws2_seq[(waitcnt >> 10) & 1]; + + for (i = 0x8; i <= 0xD; i++) + { + /* 32 bit accesses just cost double due to 16 bit bus */ + ws_cyc_seq[i][1] = ws_cyc_seq[i][0] * 2; + } + + /* Sequential 16 and 32 bit accesses to ROM */ + ws_cyc_nseq[0x8][0] = ws_cyc_nseq[0x9][0] = 1 + ws012_nonseq[(waitcnt >> 2) & 3]; + ws_cyc_nseq[0xA][0] = ws_cyc_nseq[0xB][0] = 1 + ws012_nonseq[(waitcnt >> 5) & 3]; + ws_cyc_nseq[0xC][0] = ws_cyc_nseq[0xD][0] = 1 + ws012_nonseq[(waitcnt >> 8) & 3]; + + for (i = 0x8; i <= 0xD; i++) + { + /* 32 bit accesses are a non-seq (16) + seq access (16) */ + ws_cyc_nseq[i][1] = 1 + ws_cyc_nseq[i][0] + ws_cyc_seq[i][0]; + } +} + u8 read_backup(u32 address) { u8 value = 0; @@ -866,8 +923,12 @@ cpu_alert_type function_cc write_io_register16(u32 address, u32 value) case REG_VCOUNT: break; // Do nothing - // Registers without side effects case REG_WAITCNT: + write_ioreg(REG_WAITCNT, value); + reload_timing_info(); + break; + + // Registers without side effects default: write_ioreg(ioreg, value); break; @@ -2251,8 +2312,8 @@ cpu_alert_type dma_transfer(unsigned dma_chan, int *usedcycles) // This is an approximation for the most common case (no region cross) if (usedcycles) *usedcycles += dmach->length * ( - waitstate_cycles_sequential[src_ptr >> 24][tfsizes] + - waitstate_cycles_sequential[dst_ptr >> 24][tfsizes]); + def_seq_cycles[src_ptr >> 24][tfsizes - 1] + + def_seq_cycles[dst_ptr >> 24][tfsizes - 1]); return ret; } @@ -2415,6 +2476,8 @@ void init_memory(void) write_ioreg(REG_BG3PD, 0x100); write_ioreg(REG_RCNT, 0x8000); + reload_timing_info(); + backup_type = BACKUP_NONE; sram_bankcount = SRAM_SIZE_32KB; diff --git a/gba_memory.h b/gba_memory.h index 95fc57f..51c24d9 100644 --- a/gba_memory.h +++ b/gba_memory.h @@ -200,7 +200,10 @@ void function_cc write_backup(u32 address, u32 value); void function_cc write_rtc(u32 address, u32 value); /* EDIT: Shouldn't this be extern ?! */ -extern const u32 waitstate_cycles_sequential[16][3]; +extern const u32 def_seq_cycles[16][2]; +/* Cycles can change depending on WAITCNT */ +extern u8 ws_cyc_seq[16][2]; +extern u8 ws_cyc_nseq[16][2]; extern u32 gamepak_size; extern char gamepak_title[13]; diff --git a/main.c b/main.c index 907efc1..13ce5a2 100644 --- a/main.c +++ b/main.c @@ -22,8 +22,6 @@ timer_type timer[4]; -const u32 global_cycles_per_instruction = 1; - u32 cpu_ticks = 0; u32 execute_cycles = 960; diff --git a/main.h b/main.h index b34c89f..8a4cced 100644 --- a/main.h +++ b/main.h @@ -68,7 +68,6 @@ typedef enum extern u32 cpu_ticks; extern u32 execute_cycles; -extern const u32 global_cycles_per_instruction; extern u32 skip_next_frame; extern u32 flush_ram_count;