Move palettes around to simplify MIPS dynarec

Will move also OAM structures to gain a few cycles per load/store.
Loads can also be optimized for an extra instruction per access.
This commit is contained in:
David Guillen Fandos 2021-03-15 02:25:02 +01:00
parent 6e9104e1ee
commit c86b9064df
5 changed files with 42 additions and 36 deletions

View File

@ -6,6 +6,8 @@
.globl memory_map_read
.globl memory_map_write
.globl reg
.globl palette_ram
.globl palette_ram_converted
#define REG_R0 (0 * 4)
#define REG_R1 (1 * 4)
@ -763,10 +765,12 @@ execute_load_builder(u32, 32, ldrne, #0xF0000000)
.pool
.data
.comm memory_map_read 0x8000
.comm memory_map_write 0x8000
.data
.comm palette_ram 0x400
.comm palette_ram_converted 0x400
.globl reg
.globl _reg

2
cpu.c
View File

@ -1652,6 +1652,8 @@ void raise_interrupt(irq_type irq_raised)
#ifndef HAVE_DYNAREC
u8 *memory_map_read [8 * 1024];
u8 *memory_map_write[8 * 1024];
u16 palette_ram[512];
u16 palette_ram_converted[512];
#endif
void execute_arm(u32 cycles)

View File

@ -305,9 +305,7 @@ u32 gamepak_waitstate_sequential[2][3][3] =
}
};
u16 palette_ram[512];
u16 oam_ram[512];
u16 palette_ram_converted[512];
u16 io_registers[1024 * 16];
u8 ewram[1024 * 256 * 2];
u8 iwram[1024 * 32 * 2];

View File

@ -46,6 +46,8 @@
.global execute_arm_translate
.global icache_region_sync
.global reg_check
.global palette_ram
.global palette_ram_converted
.global memory_map_read
.global memory_map_write
@ -2093,18 +2095,15 @@ execute_store_io_u8:
execute_store_palette_u8:
region_check 5, patch_store_u8
lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
andi $2, $4, 0x3FE # align palette address
ins $5, $5, 8, 8 # double value
andi $4, $4, 0x3FE # align palette address
addu $2, $2, $4
sh $5, %lo(palette_ram)($2) # palette_ram[address] = value
addu $2, $2, $16
sh $5, 0x100($2) # palette_ram[address] = value
sll $1, $5, 1 # make green 6bits
ins $1, $0, 0, 6 # make bottom bit 0
ins $1, $5, 0, 5 # insert red channel into $1
lui $2, %hi(palette_ram_converted)
addu $2, $2, $4
jr $ra # return
sh $1, %lo(palette_ram_converted)($2)
sh $1, 0x500($2)
execute_store_vram_u8:
translate_region_vram_store_align16 patch_store_u8
@ -2193,17 +2192,14 @@ execute_store_io_u16:
execute_store_palette_u16:
region_check 5, patch_store_u16
lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
andi $4, $4, 0x3FE # wrap/align palette address
addu $2, $2, $4
sh $5, %lo(palette_ram)($2) # palette_ram[address] = value
andi $2, $4, 0x3FE # wrap/align palette address
addu $2, $2, $16
sh $5, 0x100($2) # palette_ram[address] = value
sll $1, $5, 1 # make green 6bits
ins $1, $0, 0, 6 # make bottom bit 0
ins $1, $5, 0, 5 # insert red channel into $1
lui $2, %hi(palette_ram_converted)
addu $2, $2, $4
jr $ra # return
sh $1, %lo(palette_ram_converted)($2)
sh $1, 0x500($2)
execute_store_vram_u16:
translate_region_vram_store_align16 patch_store_u16
@ -2295,18 +2291,14 @@ execute_store_io_u32:
execute_store_palette_u32:
region_check 5, patch_store_u32
lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
andi $4, $4, 0x3FC # wrap/align palette address
addu $2, $2, $4
sw $5, %lo(palette_ram)($2) # palette_ram[address] = value
andi $2, $4, 0x3FC # wrap/align palette address
addu $2, $2, $16
sw $5, 0x100($2) # palette_ram[address] = value
sll $1, $5, 1 # make green 6bits
ins $1, $0, 0, 6 # make bottom bit 0
ins $1, $5, 0, 5 # insert red channel into $1
lui $2, %hi(palette_ram_converted)
addu $2, $2, $4
addiu $2, $2, %lo(palette_ram_converted)
sh $1, ($2)
sh $1, 0x500($2)
srl $5, $5, 16 # shift down to next palette value
sll $1, $5, 1 # make green 6bits
@ -2314,7 +2306,7 @@ execute_store_palette_u32:
ins $1, $5, 0, 5 # insert red channel into $1
jr $ra # return
sh $1, 2($2)
sh $1, 0x502($2)
execute_store_vram_u32:
translate_region_vram_store_align32 patch_store_u32
@ -2411,18 +2403,14 @@ execute_store_io_u32a:
execute_store_palette_u32a:
region_check 5, patch_store_u32a
lui $2, %hi(palette_ram) # start loading palette_ram address (delay)
andi $4, $4, 0x3FC # wrap/align palette address
addu $2, $2, $4
sw $5, %lo(palette_ram)($2) # palette_ram[address] = value
andi $2, $4, 0x3FC # wrap/align palette address
addu $2, $2, $16
sw $5, 0x100($2) # palette_ram[address] = value
sll $1, $5, 1 # make green 6bits
ins $1, $0, 0, 6 # make bottom bit 0
ins $1, $5, 0, 5 # insert red channel into $1
lui $2, %hi(palette_ram_converted)
addu $2, $2, $4
addiu $2, $2, %lo(palette_ram_converted)
sh $1, ($2)
sh $1, 0x500($2)
srl $5, $5, 16 # shift down to next palette value
sll $1, $5, 1 # make green 6bits
@ -2430,7 +2418,7 @@ execute_store_palette_u32a:
ins $1, $5, 0, 5 # insert red channel into $1
jr $ra # return
sh $1, 2($2)
sh $1, 0x502($2)
execute_store_vram_u32a:
translate_region_vram_store_align32 patch_store_u32a
@ -2810,6 +2798,7 @@ execute_arm_translate:
.data
.align 6
memory_map_read:
.space 0x8000
@ -2819,5 +2808,12 @@ memory_map_read:
reg:
.space 0x100
# Placed here for easy access
palette_ram:
.space 0x400
palette_ram_converted:
.space 0x400
memory_map_write:
.space 0x8000

View File

@ -69,6 +69,8 @@
.global _memory_map_read
.global _memory_map_write
.global _reg
.global _palette_ram
.global _palette_ram_converted
.global _oam_update
@ -561,6 +563,10 @@ return_to_main:
_reg:
.space 0x100, 0
_palette_ram:
.space 0x400
_palette_ram_converted:
.space 0x400
.comm _memory_map_read 0x8000
.comm _memory_map_write 0x8000