Add forward declarations

This commit is contained in:
twinaphex 2020-10-08 15:47:37 +02:00
parent 27419bc0b0
commit 4181385f39
1 changed files with 110 additions and 104 deletions

View File

@ -22,13 +22,17 @@
#include "arm_codegen.h" #include "arm_codegen.h"
void generate_indirect_branch_arm(void);
u32 prepare_load_reg_pc(u32 scratch_reg, u32 reg_index, u32 pc_offset);
void generate_store_reg(u32 ireg, u32 reg_index);
u32 arm_update_gba_arm(u32 pc); u32 arm_update_gba_arm(u32 pc);
u32 arm_update_gba_thumb(u32 pc); u32 arm_update_gba_thumb(u32 pc);
u32 arm_update_gba_idle_arm(u32 pc); u32 arm_update_gba_idle_arm(u32 pc);
u32 arm_update_gba_idle_thumb(u32 pc); u32 arm_update_gba_idle_thumb(u32 pc);
// Although these are defined as a function, don't call them as /* Although these are defined as a function, don't call them as
// such (jump to it instead) * such (jump to it instead) */
void arm_indirect_branch_arm(u32 address); void arm_indirect_branch_arm(u32 address);
void arm_indirect_branch_thumb(u32 address); void arm_indirect_branch_thumb(u32 address);
void arm_indirect_branch_dual_arm(u32 address); void arm_indirect_branch_dual_arm(u32 address);
@ -37,7 +41,7 @@ void arm_indirect_branch_dual_thumb(u32 address);
void execute_store_cpsr(u32 new_cpsr, u32 store_mask, u32 address); void execute_store_cpsr(u32 new_cpsr, u32 store_mask, u32 address);
u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address); u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address);
void execute_store_spsr(u32 new_cpsr, u32 store_mask); void execute_store_spsr(u32 new_cpsr, u32 store_mask);
u32 execute_read_spsr(); u32 execute_read_spsr(void);
u32 execute_spsr_restore(u32 address); u32 execute_spsr_restore(u32 address);
void execute_swi_arm(u32 pc); void execute_swi_arm(u32 pc);
@ -53,8 +57,8 @@ void execute_store_u32_safe(u32 address, u32 source);
(((((u32)offset - (u32)source) - 8) >> 2) & 0xFFFFFF) \ (((((u32)offset - (u32)source) - 8) >> 2) & 0xFFFFFF) \
// reg_base_offset is the amount of bytes after reg_base where the registers /* reg_base_offset is the amount of bytes after reg_base where the registers
// actually begin. * actually begin. */
#define reg_base_offset 1024 #define reg_base_offset 1024
@ -77,25 +81,26 @@ void execute_store_u32_safe(u32 address, u32 source);
#define reg_rd ARMREG_R0 #define reg_rd ARMREG_R0
// Register allocation layout for ARM and Thumb: /* Register allocation layout for ARM and Thumb:
// Map from a GBA register to a host ARM register. -1 means load it * Map from a GBA register to a host ARM register. -1 means load it
// from memory into one of the temp registers. * from memory into one of the temp registers.
// The following registers are chosen based on statistical analysis * The following registers are chosen based on statistical analysis
// of a few games (see below), but might not be the best ones. Results * of a few games (see below), but might not be the best ones. Results
// vary tremendously between ARM and Thumb (for obvious reasons), so * vary tremendously between ARM and Thumb (for obvious reasons), so
// two sets are used. Take care to not call any function which can * two sets are used. Take care to not call any function which can
// overwrite any of these registers from the dynarec - only call * overwrite any of these registers from the dynarec - only call
// trusted functions in arm_stub.S which know how to save/restore * trusted functions in arm_stub.S which know how to save/restore
// them and know how to transfer them to the C functions it calls * them and know how to transfer them to the C functions it calls
// if necessary. * if necessary.
// The following define the actual registers available for allocation. * The following define the actual registers available for allocation.
// As registers are freed up add them to this list. * As registers are freed up add them to this list.
// Note that r15 is linked to the a0 temp reg - this register will * Note that r15 is linked to the a0 temp reg - this register will
// be preloaded with a constant upon read, and used to link to * be preloaded with a constant upon read, and used to link to
// indirect branch functions upon write. * indirect branch functions upon write.
*/
#define reg_x0 ARMREG_R3 #define reg_x0 ARMREG_R3
#define reg_x1 ARMREG_R4 #define reg_x1 ARMREG_R4
@ -148,22 +153,22 @@ r15: 0.091287% (-- 100.000000%)
s32 arm_register_allocation[] = s32 arm_register_allocation[] =
{ {
reg_x0, // GBA r0 reg_x0, /* GBA r0 */
reg_x1, // GBA r1 reg_x1, /* GBA r1 */
mem_reg, // GBA r2 mem_reg, /* GBA r2 */
mem_reg, // GBA r3 mem_reg, /* GBA r3 */
mem_reg, // GBA r4 mem_reg, /* GBA r4 */
mem_reg, // GBA r5 mem_reg, /* GBA r5 */
reg_x2, // GBA r6 reg_x2, /* GBA r6 */
mem_reg, // GBA r7 mem_reg, /* GBA r7 */
mem_reg, // GBA r8 mem_reg, /* GBA r8 */
reg_x3, // GBA r9 reg_x3, /* GBA r9 */
mem_reg, // GBA r10 mem_reg, /* GBA r10 */
mem_reg, // GBA r11 mem_reg, /* GBA r11 */
reg_x4, // GBA r12 reg_x4, /* GBA r12 */
mem_reg, // GBA r13 mem_reg, /* GBA r13 */
reg_x5, // GBA r14 reg_x5, /* GBA r14 */
reg_a0 // GBA r15 reg_a0 /* GBA r15 */
mem_reg, mem_reg,
mem_reg, mem_reg,
@ -185,22 +190,22 @@ s32 arm_register_allocation[] =
s32 thumb_register_allocation[] = s32 thumb_register_allocation[] =
{ {
reg_x0, // GBA r0 reg_x0, /* GBA r0 */
reg_x1, // GBA r1 reg_x1, /* GBA r1 */
reg_x2, // GBA r2 reg_x2, /* GBA r2 */
reg_x3, // GBA r3 reg_x3, /* GBA r3 */
reg_x4, // GBA r4 reg_x4, /* GBA r4 */
reg_x5, // GBA r5 reg_x5, /* GBA r5 */
mem_reg, // GBA r6 mem_reg, /* GBA r6 */
mem_reg, // GBA r7 mem_reg, /* GBA r7 */
mem_reg, // GBA r8 mem_reg, /* GBA r8 */
mem_reg, // GBA r9 mem_reg, /* GBA r9 */
mem_reg, // GBA r10 mem_reg, /* GBA r10 */
mem_reg, // GBA r11 mem_reg, /* GBA r11 */
mem_reg, // GBA r12 mem_reg, /* GBA r12 */
mem_reg, // GBA r13 mem_reg, /* GBA r13 */
mem_reg, // GBA r14 mem_reg, /* GBA r14 */
reg_a0 // GBA r15 reg_a0 /* GBA r15 */
mem_reg, mem_reg,
mem_reg, mem_reg,
@ -220,19 +225,16 @@ s32 thumb_register_allocation[] =
mem_reg, mem_reg,
}; };
#define arm_imm_lsl_to_rot(value) \ #define arm_imm_lsl_to_rot(value) \
(32 - value) \ (32 - value) \
u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations) u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations)
{ {
u32 store_count = 0; u32 store_count = 0;
u32 left_shift = 0; u32 left_shift = 0;
// Otherwise it'll return 0 things to store because it'll never /* Otherwise it'll return 0 things to store because it'll never
// find anything. * find anything. */
if(imm == 0) if(imm == 0)
{ {
rotations[0] = 0; rotations[0] = 0;
@ -240,7 +242,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations)
return 1; return 1;
} }
// Find chunks of non-zero data at 2 bit alignments. /* Find chunks of non-zero data at 2 bit alignments. */
while(1) while(1)
{ {
for(; left_shift < 32; left_shift += 2) for(; left_shift < 32; left_shift += 2)
@ -249,20 +251,19 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations)
break; break;
} }
/* We've hit the end of the useful data. */
if(left_shift == 32) if(left_shift == 32)
{
// We've hit the end of the useful data.
return store_count; return store_count;
}
// Hit the end, it might wrap back around to the beginning. /* Hit the end, it might wrap back around to the beginning. */
if(left_shift >= 24) if(left_shift >= 24)
{ {
// Make a mask for the residual bits. IE, if we have /* Make a mask for the residual bits. IE, if we have
// 5 bits of data at the end we can wrap around to 3 * 5 bits of data at the end we can wrap around to 3
// bits of data in the beginning. Thus the first * bits of data in the beginning. Thus the first
// thing, after being shifted left, has to be less * thing, after being shifted left, has to be less
// than 111b, 0x7, or (1 << 3) - 1. * than 111b, 0x7, or (1 << 3) - 1.
*/
u32 top_bits = 32 - left_shift; u32 top_bits = 32 - left_shift;
u32 residual_bits = 8 - top_bits; u32 residual_bits = 8 - top_bits;
u32 residual_mask = (1 << residual_bits) - 1; u32 residual_mask = (1 << residual_bits) - 1;
@ -270,8 +271,8 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations)
if((store_count > 1) && (left_shift > 24) && if((store_count > 1) && (left_shift > 24) &&
((stores[0] << ((32 - rotations[0]) & 0x1F)) < residual_mask)) ((stores[0] << ((32 - rotations[0]) & 0x1F)) < residual_mask))
{ {
// Then we can throw out the last bit and tack it on /* Then we can throw out the last bit and tack it on
// to the first bit. * to the first bit. */
stores[0] = stores[0] =
(stores[0] << ((top_bits + (32 - rotations[0])) & 0x1F)) | (stores[0] << ((top_bits + (32 - rotations[0])) & 0x1F)) |
((imm >> left_shift) & 0xFF); ((imm >> left_shift) & 0xFF);
@ -281,7 +282,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations)
} }
else else
{ {
// There's nothing to wrap over to in the beginning /* There's nothing to wrap over to in the beginning */
stores[store_count] = (imm >> left_shift) & 0xFF; stores[store_count] = (imm >> left_shift) & 0xFF;
rotations[store_count] = (32 - left_shift) & 0x1F; rotations[store_count] = (32 - left_shift) & 0x1F;
return store_count + 1; return store_count + 1;
@ -372,7 +373,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations)
#define generate_exit_block() \ #define generate_exit_block() \
ARM_BX(0, ARMREG_LR) \ ARM_BX(0, ARMREG_LR) \
// The branch target is to be filled in later (thus a 0 for now) /* The branch target is to be filled in later (thus a 0 for now) */
#define generate_branch_filler(condition_code, writeback_location) \ #define generate_branch_filler(condition_code, writeback_location) \
(writeback_location) = translation_ptr; \ (writeback_location) = translation_ptr; \
@ -412,9 +413,10 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations)
*((u32 *)(dest)) = (*((u32 *)dest) & 0xFF000000) | \ *((u32 *)(dest)) = (*((u32 *)dest) & 0xFF000000) | \
arm_relative_offset(dest, offset) \ arm_relative_offset(dest, offset) \
// A different function is called for idle updates because of the relative /* A different function is called for idle updates because of the relative
// location of the embedded PC. The idle version could be optimized to put * location of the embedded PC. The idle version could be optimized to put
// the CPU into halt mode too, however. * the CPU into halt mode too, however.
*/
#define generate_branch_idle_eliminate(writeback_location, new_pc, mode) \ #define generate_branch_idle_eliminate(writeback_location, new_pc, mode) \
generate_function_call(arm_update_gba_idle_##mode); \ generate_function_call(arm_update_gba_idle_##mode); \
@ -443,7 +445,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations)
generate_cycle_update(); \ generate_cycle_update(); \
generate_branch_no_cycle_update(writeback_location, new_pc, mode) \ generate_branch_no_cycle_update(writeback_location, new_pc, mode) \
// a0 holds the destination /* a0 holds the destination */
#define generate_indirect_branch_no_cycle_update(type) \ #define generate_indirect_branch_no_cycle_update(type) \
ARM_B(0, arm_relative_offset(translation_ptr, arm_indirect_branch_##type)) \ ARM_B(0, arm_relative_offset(translation_ptr, arm_indirect_branch_##type)) \
@ -455,7 +457,7 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations)
#define generate_block_prologue() \ #define generate_block_prologue() \
#define generate_block_extra_vars_arm() \ #define generate_block_extra_vars_arm() \
void generate_indirect_branch_arm() \ void generate_indirect_branch_arm(void) \
{ \ { \
if(condition == 0x0E) \ if(condition == 0x0E) \
{ \ { \
@ -639,9 +641,10 @@ u32 arm_disect_imm_32bit(u32 imm, u32 *stores, u32 *rotations)
#define block_prologue_size 0 #define block_prologue_size 0
// It should be okay to still generate result flags, spsr will overwrite them. /* It should be okay to still generate result flags, spsr will overwrite them.
// This is pretty infrequent (returning from interrupt handlers, et al) so * This is pretty infrequent (returning from interrupt handlers, et al) so
// probably not worth optimizing for. * probably not worth optimizing for.
*/
#define check_for_interrupts() \ #define check_for_interrupts() \
if((io_registers[REG_IE] & io_registers[REG_IF]) && \ if((io_registers[REG_IE] & io_registers[REG_IF]) && \
@ -927,9 +930,10 @@ u32 execute_spsr_restore_body(u32 pc)
#define generate_op_mvns_reg_immshift(_rd, _rn, _rm, shift_type, shift) \ #define generate_op_mvns_reg_immshift(_rd, _rn, _rm, shift_type, shift) \
generate_op_reg_immshift_uflags(MVNS, _rd, _rm, shift_type, shift) \ generate_op_reg_immshift_uflags(MVNS, _rd, _rm, shift_type, shift) \
// The reg operand is in reg_rm, not reg_rn like expected, so rsbs isn't /* The reg operand is in reg_rm, not reg_rn like expected, so rsbs isn't
// being used here. When rsbs is fully inlined it can be used with the * being used here. When rsbs is fully inlined it can be used with the
// apropriate operands. * apropriate operands.
*/
#define generate_op_neg_reg_immshift(_rd, _rn, _rm, shift_type, shift) \ #define generate_op_neg_reg_immshift(_rd, _rn, _rm, shift_type, shift) \
{ \ { \
@ -1092,7 +1096,7 @@ u32 execute_spsr_restore_body(u32 pc)
#define arm_generate_op_reg_flags(name, load_op, store_op, flags_op) \ #define arm_generate_op_reg_flags(name, load_op, store_op, flags_op) \
arm_generate_op_reg(name, load_op, store_op, flags_op) \ arm_generate_op_reg(name, load_op, store_op, flags_op) \
// imm will be loaded by the called function if necessary. /* imm will be loaded by the called function if necessary. */
#define arm_generate_op_imm(name, load_op, store_op, flags_op) \ #define arm_generate_op_imm(name, load_op, store_op, flags_op) \
arm_decode_data_proc_imm(opcode); \ arm_decode_data_proc_imm(opcode); \
@ -1203,8 +1207,9 @@ u32 execute_spsr_restore_body(u32 pc)
#define arm_psr_read(op_type, psr_reg) \ #define arm_psr_read(op_type, psr_reg) \
arm_psr_read_##psr_reg() \ arm_psr_read_##psr_reg() \
// This function's okay because it's called from an ASM function that can /* This function's okay because it's called from an ASM function that can
// wrap it correctly. * wrap it correctly.
*/
u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
{ {
@ -1251,9 +1256,10 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
arm_psr_##transfer_type(op_type, psr_reg); \ arm_psr_##transfer_type(op_type, psr_reg); \
} \ } \
// TODO: loads will need the PC passed as well for open address, however can /* TODO: loads will need the PC passed as well for open address, however can
// eventually be rectified with a hash table on the memory accesses * eventually be rectified with a hash table on the memory accesses
// (same with the stores) * (same with the stores)
*/
#define arm_access_memory_load(mem_type) \ #define arm_access_memory_load(mem_type) \
cycle_count += 2; \ cycle_count += 2; \
@ -1267,7 +1273,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
generate_function_call(execute_store_##mem_type); \ generate_function_call(execute_store_##mem_type); \
write32((pc + 4)) \ write32((pc + 4)) \
// Calculate the address into a0 from _rn, _rm /* Calculate the address into a0 from _rn, _rm */
#define arm_access_memory_adjust_reg_sh_up(ireg) \ #define arm_access_memory_adjust_reg_sh_up(ireg) \
ARM_ADD_REG_IMMSHIFT(0, ireg, _rn, _rm, ((opcode >> 5) & 0x03), \ ARM_ADD_REG_IMMSHIFT(0, ireg, _rn, _rm, ((opcode >> 5) & 0x03), \
@ -1365,7 +1371,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
printf("sbit on %s %s %s %s\n", #access_type, #pre_op, #post_op, #wb) \ printf("sbit on %s %s %s %s\n", #access_type, #pre_op, #post_op, #wb) \
// TODO: Make these use cached registers. Implement iwram_stack_optimize. /* TODO: Make these use cached registers. Implement iwram_stack_optimize. */
#define arm_block_memory_load() \ #define arm_block_memory_load() \
generate_function_call(execute_load_u32); \ generate_function_call(execute_load_u32); \
@ -1416,7 +1422,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
#define arm_block_memory_writeback_no() #define arm_block_memory_writeback_no()
// Only emit writeback if the register is not in the list /* Only emit writeback if the register is not in the list */
#define arm_block_memory_writeback_load(writeback_type) \ #define arm_block_memory_writeback_load(writeback_type) \
if(!((reg_list >> rn) & 0x01)) \ if(!((reg_list >> rn) & 0x01)) \
@ -1625,7 +1631,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
complete_store_reg(__rd, rd); \ complete_store_reg(__rd, rd); \
} \ } \
// Operation types: imm, mem_reg, mem_imm /* Operation types: imm, mem_reg, mem_imm */
#define thumb_access_memory_load(mem_type, _rd) \ #define thumb_access_memory_load(mem_type, _rd) \
cycle_count += 2; \ cycle_count += 2; \
@ -1663,7 +1669,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
thumb_access_memory_##access_type(mem_type, _rd); \ thumb_access_memory_##access_type(mem_type, _rd); \
} \ } \
// TODO: Make these use cached registers. Implement iwram_stack_optimize. /* TODO: Make these use cached registers. Implement iwram_stack_optimize. */
#define thumb_block_address_preadjust_up() \ #define thumb_block_address_preadjust_up() \
generate_add_imm(reg_s0, (bit_count[reg_list] * 4), 0) \ generate_add_imm(reg_s0, (bit_count[reg_list] * 4), 0) \
@ -1898,13 +1904,13 @@ u8 swi_hle_handle[256] =
0x0 // SWI 2A: SoundGetJumpList 0x0 // SWI 2A: SoundGetJumpList
}; };
void execute_swi_hle_div_arm(); void execute_swi_hle_div_arm(void);
void execute_swi_hle_div_thumb(); void execute_swi_hle_div_thumb(void);
void execute_swi_hle_div_c() void execute_swi_hle_div_c(void)
{ {
/* real BIOS supposedly locks up, but game can recover on interrupt */
if (reg[1] == 0) if (reg[1] == 0)
// real BIOS supposedly locks up, but game can recover on interrupt
return; return;
s32 result = (s32)reg[0] / (s32)reg[1]; s32 result = (s32)reg[0] / (s32)reg[1];
reg[1] = (s32)reg[0] % (s32)reg[1]; reg[1] = (s32)reg[0] % (s32)reg[1];