Add preliminary support for non mips32r2 devices

This is required in PS2 but could also make older dingux devices run
gpsp on retroarch
This commit is contained in:
David Guillen Fandos 2021-06-18 18:03:47 +02:00
parent 34b90277bc
commit e0a31952db
3 changed files with 142 additions and 73 deletions

View File

@ -200,7 +200,7 @@ else ifeq ($(platform), psp1)
TARGET := $(TARGET_NAME)_libretro_$(platform).a
CC = psp-gcc$(EXE_EXT)
AR = psp-ar$(EXE_EXT)
CFLAGS += -DPSP -G0 -DUSE_BGR_FORMAT
CFLAGS += -DPSP -G0 -DUSE_BGR_FORMAT -DMIPS_HAS_R2_INSTS
CFLAGS += -I$(shell psp-config --pspsdk-path)/include
CFLAGS += -march=allegrex -mfp32 -mgp32 -mlong32 -mabi=eabi
CFLAGS += -fomit-frame-pointer -ffast-math
@ -375,7 +375,7 @@ else ifeq ($(platform), mips32)
SHARED := -shared -nostdlib -Wl,--version-script=link.T
fpic := -fPIC -DPIC
CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float
CFLAGS += -fno-caller-saves
CFLAGS += -fno-caller-saves -DMIPS_HAS_R2_INSTS
HAVE_DYNAREC := 1
CPU_ARCH := mips
@ -393,6 +393,7 @@ else ifeq ($(platform), gcw0)
SHARED := -shared -nostdlib -Wl,--version-script=link.T
fpic := -fPIC -DPIC
CFLAGS += -fomit-frame-pointer -ffast-math -march=mips32 -mtune=mips32r2 -mhard-float
CFLAGS += -DMIPS_HAS_R2_INSTS
HAVE_DYNAREC := 1
CPU_ARCH := mips
@ -408,7 +409,7 @@ else ifeq ($(platform), gcw0-odbeta)
# The ASM code and/or MIPS dynarec of GPSP does not respect
# MIPS calling conventions, so we must use '-fno-caller-saves'
# for the OpenDingux Beta build
CFLAGS += -fno-caller-saves
CFLAGS += -fno-caller-saves -DMIPS_HAS_R2_INSTS
HAVE_DYNAREC := 1
CPU_ARCH := mips

View File

@ -791,12 +791,13 @@ u32 arm_to_mips_reg[] =
check_load_reg_pc(arm_reg, _rm, 8); \
if(_shift != 0) \
{ \
mips_emit_rotr(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
rotate_right(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], \
reg_temp, _shift); \
} \
else \
{ \
{ /* Special case: RRX (no carry update) */ \
mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], 1); \
mips_emit_ins(arm_to_mips_reg[arm_reg], reg_c_cache, 31, 1); \
insert_bits(arm_to_mips_reg[arm_reg], reg_c_cache, reg_temp, 31, 1); \
} \
_rm = arm_reg \
@ -804,7 +805,7 @@ u32 arm_to_mips_reg[] =
check_load_reg_pc(arm_reg, _rm, 8); \
if(_shift != 0) \
{ \
mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (32 - _shift), 1); \
extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (32 - _shift), 1); \
mips_emit_sll(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
_rm = arm_reg; \
} \
@ -813,7 +814,7 @@ u32 arm_to_mips_reg[] =
check_load_reg_pc(arm_reg, _rm, 8); \
if(_shift != 0) \
{ \
mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
} \
else \
@ -827,7 +828,7 @@ u32 arm_to_mips_reg[] =
check_load_reg_pc(arm_reg, _rm, 8); \
if(_shift != 0) \
{ \
mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
mips_emit_sra(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
} \
else \
@ -841,15 +842,16 @@ u32 arm_to_mips_reg[] =
check_load_reg_pc(arm_reg, _rm, 8); \
if(_shift != 0) \
{ \
mips_emit_ext(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
mips_emit_rotr(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], _shift); \
extract_bits(reg_c_cache, arm_to_mips_reg[_rm], (_shift - 1), 1); \
rotate_right(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], \
reg_temp, _shift); \
} \
else \
{ \
mips_emit_andi(reg_temp, arm_to_mips_reg[_rm], 1); \
{ /* Special case: RRX (carry update) */ \
mips_emit_sll(reg_temp, reg_c_cache, 31); \
mips_emit_andi(reg_c_cache, arm_to_mips_reg[_rm], 1); \
mips_emit_srl(arm_to_mips_reg[arm_reg], arm_to_mips_reg[_rm], 1); \
mips_emit_ins(arm_to_mips_reg[arm_reg], reg_c_cache, 31, 1); \
mips_emit_addu(reg_c_cache, reg_temp, reg_zero); \
mips_emit_or(arm_to_mips_reg[arm_reg], arm_to_mips_reg[arm_reg],reg_temp);\
} \
_rm = arm_reg \
@ -870,7 +872,8 @@ u32 arm_to_mips_reg[] =
mips_emit_sra(reg_a0, reg_a0, 31) \
#define generate_shift_reg_ror_no_flags(_rm, _rs) \
mips_emit_rotrv(reg_a0, arm_to_mips_reg[_rm], arm_to_mips_reg[_rs]) \
rotate_right_var(reg_a0, arm_to_mips_reg[_rm], \
reg_temp, arm_to_mips_reg[_rs]) \
#define generate_shift_reg_lsl_flags(_rm, _rs) \
generate_load_reg_pc(reg_a0, _rm, 12); \
@ -892,7 +895,8 @@ u32 arm_to_mips_reg[] =
mips_emit_addiu(reg_temp, arm_to_mips_reg[_rs], -1); \
mips_emit_srlv(reg_temp, arm_to_mips_reg[_rm], reg_temp); \
mips_emit_andi(reg_c_cache, reg_temp, 1); \
mips_emit_rotrv(reg_a0, arm_to_mips_reg[_rm], arm_to_mips_reg[_rs]) \
rotate_right_var(reg_a0, arm_to_mips_reg[_rm], \
reg_temp, arm_to_mips_reg[_rs]) \
#define generate_shift_imm(arm_reg, name, flags_op) \
u32 shift = (opcode >> 7) & 0x1F; \
@ -1894,7 +1898,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
} \
else \
{ \
mips_emit_ins(reg_a2, reg_zero, 0, 2); \
emit_align_reg(reg_a2, 2); \
\
for(i = 0; i < 16; i++) \
{ \
@ -2070,20 +2074,6 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
check_store_reg_pc_thumb(dest_rd); \
} \
/*
#define thumb_data_proc_hi(name) \
{ \
thumb_decode_hireg_op(); \
check_load_reg_pc(arm_reg_a0, rs, 4); \
check_load_reg_pc(arm_reg_a1, rd, 4); \
generate_op_##name##_reg(arm_to_mips_reg[rd], arm_to_mips_reg[rd], \
arm_to_mips_reg[rs]); \
check_store_reg_pc_thumb(rd); \
} \
*/
#define thumb_data_proc_test_hi(name) \
{ \
thumb_decode_hireg_op(); \
@ -2331,7 +2321,7 @@ u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address)
} \
else \
{ \
mips_emit_ins(reg_a2, reg_zero, 0, 2); \
emit_align_reg(reg_a2, 2); \
\
for(i = 0; i < 8; i++) \
{ \
@ -2528,6 +2518,71 @@ u8 swi_hle_handle[256] =
generate_load_pc(reg_a0, pc); \
mips_emit_sw(reg_a0, reg_base, (REG_PC * 4)) \
// Some macros to wrap device-specific instructions
/* MIPS32R2 and PSP support ins, ext, seb, rotr */
#ifdef MIPS_HAS_R2_INSTS
// Inserts LSB bits into another register
#define insert_bits(rdest, rsrc, rtemp, pos, size) \
mips_emit_ins(rdest, rsrc, pos, size);
// Doubles a byte into a halfword
#define double_byte(reg, rtmp) \
mips_emit_ins(reg, reg, 8, 8);
// Clears numbits at LSB position (to align an address)
#define emit_align_reg(reg, numbits) \
mips_emit_ins(reg, reg_zero, 0, numbits)
// Extract a bitfield (pos, size) to a register
#define extract_bits(rt, rs, pos, size) \
mips_emit_ext(rt, rs, pos, size)
// Extends signed byte to u32
#define extend_byte_signed(rt, rs) \
mips_emit_seb(rt, rs)
// Rotates a word using a temp reg if necessary
#define rotate_right(rdest, rsrc, rtemp, amount) \
mips_emit_rotr(rdest, rsrc, amount);
// Same but variable amount rotation (register)
#define rotate_right_var(rdest, rsrc, rtemp, ramount) \
mips_emit_rotrv(rdest, rsrc, ramount);
#else
// Inserts LSB bits into another register
// *assumes dest bits are cleared*!
#define insert_bits(rdest, rsrc, rtemp, pos, size) \
mips_emit_sll(rtemp, rsrc, 32 - size); \
mips_emit_srl(rtemp, rtemp, 32 - size - pos); \
mips_emit_or(rdest, rdest, rtemp);
// Doubles a byte into a halfword
#define double_byte(reg, rtmp) \
mips_emit_sll(rtmp, reg, 8); \
mips_emit_andi(reg, reg, 0xff); \
mips_emit_or(reg, reg, rtmp);
// Clears numbits at LSB position (to align an address)
#define emit_align_reg(reg, numbits) \
mips_emit_srl(reg, reg, numbits); \
mips_emit_sll(reg, reg, numbits)
// Extract a bitfield (pos, size) to a register
#define extract_bits(rt, rs, pos, size) \
mips_emit_sll(rt, rs, 32 - ((pos) + (size))); \
mips_emit_srl(rt, rt, 32 - (size))
// Extends signed byte to u32
#define extend_byte_signed(rt, rs) \
mips_emit_sll(rt, rs, 24); \
mips_emit_sra(rt, rt, 24)
// Rotates a word (uses temp reg)
#define rotate_right(rdest, rsrc, rtemp, amount) \
mips_emit_sll(rtemp, rsrc, 32 - (amount)); \
mips_emit_srl(rdest, rsrc, (amount)); \
mips_emit_or(rdest, rdest, rtemp)
// Variable rotation using temp reg (dst != src)
#define rotate_right_var(rdest, rsrc, rtemp, ramount) \
mips_emit_andi(rtemp, ramount, 0x1F); \
mips_emit_srlv(rdest, rsrc, rtemp); \
mips_emit_subu(rtemp, reg_zero, rtemp); \
mips_emit_addiu(rtemp, rtemp, 32); \
mips_emit_sllv(rtemp, rsrc, rtemp); \
mips_emit_or(rdest, rdest, rtemp)
#endif
// Register save layout as follows:
#define ReOff_RegPC (15*4) // REG_PC
@ -2698,7 +2753,7 @@ static void emit_pmemld_stub(
// Address checking: jumps to handler if bad region/alignment
mips_emit_srl(reg_temp, reg_a0, (32 - regionbits));
if (!aligned && size != 0) { // u8 or aligned u32 dont need to check alignment bits
mips_emit_ins(reg_temp, reg_a0, regionbits, size); // Add 1 or 2 bits of alignment
insert_bits(reg_temp, reg_a0, reg_rv, regionbits, size); // Add 1 or 2 bits of alignment
}
if (regioncheck || alignment) { // If region and alignment are zero, can skip
mips_emit_xori(reg_temp, reg_temp, regioncheck | (alignment << regionbits));
@ -2735,7 +2790,7 @@ static void emit_pmemld_stub(
// This code call the C routine to map the relevant ROM page
emit_save_regs(aligned);
mips_emit_sw(mips_reg_ra, reg_base, ReOff_SaveR3);
mips_emit_ext(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff
extract_bits(reg_a0, reg_a0, 15, 10); // a0 = (addr >> 15) & 0x3ff
genccall(&load_gamepak_page);
mips_emit_sw(reg_temp, reg_base, ReOff_SaveR1);
@ -2750,11 +2805,11 @@ static void emit_pmemld_stub(
// Read from flash, is a bit special, fn call
emit_mem_call_ds(&read_backup, 0xFFFF);
if (!size && signext) {
mips_emit_seb(reg_rv, reg_rv);
extend_byte_signed(reg_rv, reg_rv);
} else if (size == 1 && alignment) {
mips_emit_seb(reg_rv, reg_rv);
extend_byte_signed(reg_rv, reg_rv);
} else if (size == 2) {
mips_emit_rotr(reg_rv, reg_rv, 8 * alignment);
rotate_right(reg_rv, reg_rv, reg_temp, 8 * alignment);
}
generate_function_return_swap_delay();
*tr_ptr = translation_ptr;
@ -2770,21 +2825,22 @@ static void emit_pmemld_stub(
if (region == 2) {
// Can't do EWRAM with an `andi` instruction (18 bits mask)
mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff
extract_bits(reg_a0, reg_a0, 0, 18); // &= 0x3ffff
if (!aligned && alignment != 0) {
mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size)
emit_align_reg(reg_a0, size); // addr & ~1/2 (align to size)
}
// Need to insert a zero in the addr (due to how it's mapped)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr
} else if (region == 6) {
// VRAM is mirrored every 128KB but the last 32KB is mapped to the previous
mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
extract_bits(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block)
if (!aligned && alignment != 0) {
mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size)
emit_align_reg(reg_a0, size); // addr & ~1/2 (align to size)
}
mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block
mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
extract_bits(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
mips_emit_b(bne, reg_zero, reg_temp, 1); // Skip unless last block
generate_swap_delay();
mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset
} else {
@ -2795,16 +2851,13 @@ static void emit_pmemld_stub(
}
}
// Aligned accesses (or the weird s16u1 case) are just one inst
if (alignment == 0 || (size == 1 && signext)) {
emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext); // Delay slot
translation_ptr += 4;
}
else {
// Unaligned accesses (require rotation) need two insts
emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext);
translation_ptr += 4;
mips_emit_rotr(reg_rv, reg_rv, alignment * 8); // Delay slot
// Emit load operation
emit_mem_access_loadop(translation_ptr, base_addr, size, alignment, signext);
translation_ptr += 4;
if (!(alignment == 0 || (size == 1 && signext))) {
// Unaligned accesses require rotation, except for size=1 & signext
rotate_right(reg_rv, reg_rv, reg_temp, alignment * 8);
}
generate_function_return_swap_delay(); // Return. Move prev inst to delay slot
@ -2842,26 +2895,27 @@ static void emit_pmemst_stub(
mips_emit_lui(reg_rv, ((base_addr + 0x8000) >> 16));
if (doubleaccess) {
mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8)
double_byte(reg_a1, reg_temp); // value = value | (value << 8)
}
if (region == 2) {
// Can't do EWRAM with an `andi` instruction (18 bits mask)
mips_emit_ext(reg_a0, reg_a0, 0, 18); // &= 0x3ffff
extract_bits(reg_a0, reg_a0, 0, 18); // &= 0x3ffff
if (!aligned && realsize != 0) {
mips_emit_ins(reg_a0, reg_zero, 0, size);// addr & ~1/2 (align to size)
emit_align_reg(reg_a0, size); // addr & ~1/2 (align to size)
}
// Need to insert a zero in the addr (due to how it's mapped)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // Adds to the base addr
} else if (region == 6) {
// VRAM is mirrored every 128KB but the last 32KB is mapped to the previous
mips_emit_ext(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
extract_bits(reg_temp, reg_a0, 15, 2); // Extract bits 15 and 16
mips_emit_addiu(reg_temp, reg_temp, -3); // Check for 3 (last block)
if (!aligned && realsize != 0) {
mips_emit_ins(reg_a0, reg_zero, 0, realsize);// addr & ~1/2 (align to size)
emit_align_reg(reg_a0, realsize); // addr & ~1/2 (align to size)
}
mips_emit_b(bne, reg_zero, reg_temp, 2); // Skip unless last block
mips_emit_ext(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
extract_bits(reg_a0, reg_a0, 0, 17); // addr & 0x1FFFF [delay]
mips_emit_b(bne, reg_zero, reg_temp, 1); // Skip next inst unless last block
generate_swap_delay();
mips_emit_addiu(reg_a0, reg_a0, 0x8000); // addr - 0x8000 (mirror last block)
mips_emit_addu(reg_rv, reg_rv, reg_a0); // addr = base + adjusted offset
} else {
@ -2951,7 +3005,7 @@ static void emit_palette_hdl(
mips_emit_b(bne, reg_zero, reg_temp, st_phndlr_branch(memop_number));
mips_emit_andi(reg_rv, reg_a0, memmask); // Clear upper bits (mirroring)
if (size == 0) {
mips_emit_ins(reg_a1, reg_a1, 8, 8); // value = value | (value << 8)
double_byte(reg_a1, reg_temp); // value = value | (value << 8)
}
mips_emit_addu(reg_rv, reg_rv, reg_base);
@ -3187,15 +3241,16 @@ static void emit_phand(
mips_emit_min(reg_temp, reg_temp, reg_rv);// Do not overflow table
#else
mips_emit_sltiu(reg_rv, reg_temp, 0x0F); // Check for addr 0x1XXX.. 0xFXXX
mips_emit_b(bne, reg_zero, reg_rv, 2); // Skip two insts (well, cant skip ds)
mips_emit_sll(reg_temp, reg_temp, 2); // Table is word indexed
mips_emit_b(bne, reg_zero, reg_rv, 1); // Skip next inst if region is good
generate_swap_delay();
mips_emit_addiu(reg_temp, reg_zero, 15*4);// Simulate ld/st to 0x0FXXX (open/ignore)
#endif
// Stores or byte-accesses do not care about alignment
if (check_alignment) {
// Move alignment bits for the table lookup
mips_emit_ins(reg_temp, reg_a0, 6, size); // Alignment bits (1 or 2, to bits 6 (and 7)
// Move alignment bits for the table lookup (1 or 2, to bits 6 and 7)
insert_bits(reg_temp, reg_a0, reg_rv, 6, size);
}
unsigned tbloff = 256 + 3*1024 + 220 + 4 * toff; // Skip regs and RAMs

View File

@ -130,9 +130,25 @@
# make sure $16 has the register base for these macros
.macro collapse_flag flag_reg, shift
ins $2, $\flag_reg, \shift, 1 # insert flag into CPSR
.endm
#ifdef MIPS_HAS_R2_INSTS
.macro collapse_flag flag_reg, shift
ins $2, $\flag_reg, \shift, 1 # insert flag into CPSR
.endm
.macro extract_flag shift, flag_reg
ext $\flag_reg, $1, \shift, 1 # extract flag from CPSR
.endm
#else
.macro collapse_flag flag_reg, shift
sll $1, $\flag_reg, \shift
or $2, $2, $1
.endm
.macro extract_flag shift, flag_reg
srl $\flag_reg, $1, \shift
andi $\flag_reg, $\flag_reg, 1
.endm
#endif
.macro collapse_flags
lw $2, REG_CPSR($16) # load CPSR
@ -144,10 +160,6 @@
sw $2, REG_CPSR($16) # store CPSR
.endm
.macro extract_flag shift, flag_reg
ext $\flag_reg, $1, \shift, 1 # extract flag from CPSR
.endm
.macro extract_flags_body # extract flags from $1
extract_flag 31, 20 # load flags
extract_flag 30, 21
@ -403,7 +415,8 @@ execute_swi:
sw $4, SUPERVISOR_LR($16) # store next PC in the supervisor's LR
collapse_flags # get cpsr in $2
sw $2, SUPERVISOR_SPSR($16) # save cpsr in SUPERVISOR_CPSR
ins $2, $0, 0, 6 # zero out bottom 6 bits of CPSR
srl $2, $2, 6 # zero out bottom 6 bits of CPSR
sll $2, $2, 6
ori $2, 0x13 # set mode to supervisor
sw $2, REG_CPSR($16) # write back CPSR
save_registers