From 4dc706f36198ca705d1d6ee5b7be649c269656ab Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sat, 25 Sep 2021 23:26:08 +0200 Subject: [PATCH] Improve flag shifts (MIPS) --- psp/mips_emit.h | 53 ++++++++++++++++++++++++++++-------- psp/mips_stub.S | 72 ------------------------------------------------- 2 files changed, 42 insertions(+), 83 deletions(-) diff --git a/psp/mips_emit.h b/psp/mips_emit.h index 9f5bc1a..6ff64de 100644 --- a/psp/mips_emit.h +++ b/psp/mips_emit.h @@ -53,11 +53,6 @@ void execute_store_spsr(u32 new_spsr, u32 store_mask); u32 execute_spsr_restore_body(u32 address); u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address); -u32 execute_lsl_flags_reg(u32 value, u32 shift); -u32 execute_lsr_flags_reg(u32 value, u32 shift); -u32 execute_asr_flags_reg(u32 value, u32 shift); -u32 execute_ror_flags_reg(u32 value, u32 shift); - typedef enum { mips_reg_zero, @@ -847,19 +842,55 @@ u32 arm_to_mips_reg[] = reg_temp, arm_to_mips_reg[_rs]) \ #define generate_shift_reg_lsl_flags(_rm, _rs) \ +{ \ + u32 shift_reg = _rs; \ + check_load_reg_pc(arm_reg_a1, shift_reg, 8); \ generate_load_reg_pc(reg_a0, _rm, 12); \ - generate_load_reg_pc(reg_a1, _rs, 8); \ - generate_function_call_swap_delay(execute_lsl_flags_reg) \ + /* Only load the result on zero, no shift */ \ + mips_emit_b(beq, arm_to_mips_reg[shift_reg], reg_zero, 7); \ + generate_swap_delay(); \ + mips_emit_addiu(reg_temp, arm_to_mips_reg[shift_reg], -1); \ + mips_emit_sllv(reg_a0, reg_a0, reg_temp); \ + mips_emit_srl(reg_c_cache, reg_a0, 31); \ + mips_emit_sltiu(reg_temp, arm_to_mips_reg[shift_reg], 33); \ + mips_emit_sll(reg_a0, reg_a0, 1); \ + /* Result and flag to be zero if shift is 33+ */ \ + mips_emit_movz(reg_c_cache, reg_zero, reg_temp); \ + mips_emit_movz(reg_a0, reg_zero, reg_temp); \ +} \ #define generate_shift_reg_lsr_flags(_rm, _rs) \ +{ \ + u32 shift_reg = _rs; \ + check_load_reg_pc(arm_reg_a1, shift_reg, 8); \ generate_load_reg_pc(reg_a0, _rm, 12); \ - generate_load_reg_pc(reg_a1, _rs, 8) \ - generate_function_call_swap_delay(execute_lsr_flags_reg) \ + /* Only load the result on zero, no shift */ \ + mips_emit_b(beq, arm_to_mips_reg[shift_reg], reg_zero, 7); \ + generate_swap_delay(); \ + mips_emit_addiu(reg_temp, arm_to_mips_reg[shift_reg], -1); \ + mips_emit_srlv(reg_a0, reg_a0, reg_temp); \ + mips_emit_andi(reg_c_cache, reg_a0, 1); \ + mips_emit_sltiu(reg_temp, arm_to_mips_reg[shift_reg], 33); \ + mips_emit_srl(reg_a0, reg_a0, 1); \ + /* Result and flag to be zero if shift is 33+ */ \ + mips_emit_movz(reg_c_cache, reg_zero, reg_temp); \ + mips_emit_movz(reg_a0, reg_zero, reg_temp); \ +} \ #define generate_shift_reg_asr_flags(_rm, _rs) \ + generate_load_reg_pc(reg_a1, _rs, 8); \ generate_load_reg_pc(reg_a0, _rm, 12); \ - generate_load_reg_pc(reg_a1, _rs, 8) \ - generate_function_call_swap_delay(execute_asr_flags_reg) \ + /* Only load the result on zero, no shift */ \ + mips_emit_b(beq, reg_a1, reg_zero, 7); \ + generate_swap_delay(); \ + /* Cap shift at 32, since it's equivalent */ \ + mips_emit_addiu(reg_temp, reg_zero, 32); \ + mips_emit_srl(reg_rv, reg_a1, 5); \ + mips_emit_movn(reg_a1, reg_temp, reg_rv); \ + mips_emit_addiu(reg_temp, reg_a1, -1); \ + mips_emit_srav(reg_a0, reg_a0, reg_temp); \ + mips_emit_andi(reg_c_cache, reg_a0, 1); \ + mips_emit_sra(reg_a0, reg_a0, 1); \ #define generate_shift_reg_ror_flags(_rm, _rs) \ mips_emit_b(beq, arm_to_mips_reg[_rs], reg_zero, 3); \ diff --git a/psp/mips_stub.S b/psp/mips_stub.S index 6b152ff..7c7621d 100644 --- a/psp/mips_stub.S +++ b/psp/mips_stub.S @@ -492,78 +492,6 @@ defsymbl(execute_store_spsr) jr $ra # return sw $4, SPSR_BASE($1) # spsr[cpu_mode] = $4 (delay slot) -# $4: value -# $5: shift - -defsymbl(execute_lsl_flags_reg) - beq $5, $0, lsl_shift_zero # is the shift zero? - sltiu $1, $5, 32 # $1 = (shift < 32) (delay) - beq $1, $0, lsl_shift_high # is the shift >= 32? - li $2, 32 - - subu $2, $2, $5 # $2 = (32 - shift) - srlv $2, $4, $2 # $2 = (value >> (32 - shift)) - andi $22, $2, 1 # c flag = (value >> (32 - shift)) & 0x01 - -lsl_shift_zero: - jr $ra # return - sllv $4, $4, $5 # return (value << shift) (delay) - -lsl_shift_high: - sltiu $1, $5, 33 # $1 = (shift < 33) (delay) - bne $1, $0, lsl_shift_done # jump if shift == 32 - andi $22, $4, 1 # c flag = value & 0x01 (delay) - - addu $22, $0, $0 # c flag = 0 otherwise - -lsl_shift_done: - jr $ra # return - addu $4, $0, $0 # value = 0 no matter what - - -defsymbl(execute_lsr_flags_reg) - beq $5, $0, lsr_shift_zero # is the shift zero? - sltiu $1, $5, 32 # $1 = (shift < 32) (delay) - beq $1, $0, lsr_shift_high # is the shift >= 32? - addiu $2, $5, -1 # $2 = shift - 1 (delay) - - srlv $2, $4, $2 # $2 = (value >> (shift - 1)) - andi $22, $2, 1 # c flag = (value >> (32 - shift)) & 0x01 - -lsr_shift_zero: - jr $ra # return - srlv $4, $4, $5 # return (value >> shift) (delay) - -lsr_shift_high: - sltiu $1, $5, 33 # $1 = (shift < 33) (delay) - bne $1, $0, lsr_shift_done # jump if shift == 32 - srl $22, $4, 31 # c flag = value >> 31 (delay) - - addu $22, $0, $0 # c flag = 0 otherwise - -lsr_shift_done: - jr $ra # return - addu $4, $0, $0 # value = 0 no matter what - - -defsymbl(execute_asr_flags_reg) - beq $5, $0, asr_shift_zero # is the shift zero? - sltiu $1, $5, 32 # $1 = (shift < 32) (delay) - beq $1, $0, asr_shift_high # is the shift >= 32? - addiu $2, $5, -1 # $2 = shift - 1 (delay) - - srlv $2, $4, $2 # $2 = (value >> (shift - 1)) - andi $22, $2, 1 # c flag = (value >> (32 - shift)) & 0x01 - -asr_shift_zero: - jr $ra # return - srav $4, $4, $5 # return (value >> shift) (delay) - -asr_shift_high: - sra $4, $4, 31 # value >>= 31 - jr $ra # return - andi $22, $4, 1 # c flag = value & 0x01 - # $4: cycle counter argument # $5: pointer to reg