diff --git a/Makefile b/Makefile index a0ed865..6b183b8 100644 --- a/Makefile +++ b/Makefile @@ -349,6 +349,17 @@ else ifeq ($(platform), wii) CFLAGS += -DGEKKO -DHW_RVL -mrvl -mcpu=750 -meabi -mhard-float -DMSB_FIRST -D__ppc__ STATIC_LINKING = 1 +# aarch64 (armv8) +else ifeq ($(platform), arm64) + TARGET := $(TARGET_NAME)_libretro.so + SHARED := -shared -Wl,--version-script=link.T + fpic := -fPIC + CFLAGS += -fomit-frame-pointer -ffast-math + LDFLAGS += -Wl,--no-undefined + HAVE_DYNAREC := 1 + MMAP_JIT_CACHE = 1 + CPU_ARCH := arm64 + # ARM else ifneq (,$(findstring armv,$(platform))) TARGET := $(TARGET_NAME)_libretro.so @@ -488,6 +499,7 @@ CFLAGS += -DMMAP_JIT_CACHE endif # Add -DTRACE_INSTRUCTIONS to trace instruction execution +# Can add -DTRACE_REGISTERS to additionally print register values ifeq ($(DEBUG), 1) OPTIMIZE := -O0 -g else @@ -502,6 +514,8 @@ endif ifeq ($(CPU_ARCH), arm) DEFINES += -DARM_ARCH +else ifeq ($(CPU_ARCH), arm64) + DEFINES += -DARM64_ARCH else ifeq ($(CPU_ARCH), mips) DEFINES += -DMIPS_ARCH else ifeq ($(CPU_ARCH), x86_32) diff --git a/Makefile.common b/Makefile.common index bc4cdd3..d3e1493 100644 --- a/Makefile.common +++ b/Makefile.common @@ -31,16 +31,15 @@ SOURCES_C += $(CORE_DIR)/cpu_threaded.c endif ifeq ($(HAVE_DYNAREC), 1) - -ifeq ($(CPU_ARCH), x86_32) -SOURCES_ASM += $(CORE_DIR)/x86/x86_stub.S -endif -ifeq ($(CPU_ARCH), arm) -SOURCES_ASM += $(CORE_DIR)/arm/arm_stub.S -endif -ifeq ($(CPU_ARCH), mips) -SOURCES_ASM += $(CORE_DIR)/mips/mips_stub.S -endif + ifeq ($(CPU_ARCH), x86_32) + SOURCES_ASM += $(CORE_DIR)/x86/x86_stub.S + else ifeq ($(CPU_ARCH), arm) + SOURCES_ASM += $(CORE_DIR)/arm/arm_stub.S + else ifeq ($(CPU_ARCH), arm64) + SOURCES_ASM += $(CORE_DIR)/arm/arm64_stub.S + else ifeq ($(CPU_ARCH), mips) + SOURCES_ASM += $(CORE_DIR)/mips/mips_stub.S + endif endif ifeq ($(CPU_ARCH), arm) diff --git a/arm/arm64_codegen.h b/arm/arm64_codegen.h new file mode 100644 index 0000000..81a151b --- /dev/null +++ b/arm/arm64_codegen.h @@ -0,0 +1,297 @@ +/* gameplaySP + * + * Copyright (C) 2021 David Guillen Fandos + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +typedef enum +{ + aa64_opcode_logic = 0x0A, + aa64_opcode_addsub = 0x0B, + aa64_opcode_adr = 0x10, + aa64_opcode_addsubi = 0x11, + aa64_opcode_movi = 0x12, + aa64_opcode_bfm = 0x13, + aa64_opcode_b = 0x14, + aa64_opcode_b2 = 0x15, + aa64_opcode_tbz = 0x16, + aa64_opcode_tbnz = 0x17, + aa64_opcode_memi = 0x19, + aa64_opcode_misc = 0x1A, + aa64_opcode_mul4 = 0x1B, + +} aa64_opcode; + +typedef enum +{ + ccode_eq = 0x0, /* Equal Z == 1 */ + ccode_ne = 0x1, /* Not Equal Z == 0 */ + ccode_hs = 0x2, /* Carry Set C == 1 */ + ccode_lo = 0x3, /* Carry Clear C == 0 */ + ccode_mi = 0x4, /* Minus/Neg N == 1 */ + ccode_pl = 0x5, /* Plus/Pos N == 0 */ + ccode_vs = 0x6, /* Overflow V == 1 */ + ccode_vc = 0x7, /* !Overflow V == 0 */ + ccode_hi = 0x8, /* UGreatThan C && !Z */ + ccode_ls = 0x9, /* ULessEqual !C || Z */ + ccode_ge = 0xA, /* SGreatEqual N == V */ + ccode_lt = 0xB, /* SLessThan N != V */ + ccode_gt = 0xC, /* SLessThan !Z&N==V */ + ccode_le = 0xD, /* SLessEqual Z|(N!=V) */ + ccode_al = 0xE, /* Always */ + ccode_nv = 0xF, /* Never */ +} aa64_condcode; + + + +#define aa64_br_offset(label) \ + (((uintptr_t)(label) - (uintptr_t)(translation_ptr)) >> 2) \ + +#define aa64_br_offset_from(label, from) \ + (((uintptr_t)(label) - (uintptr_t)(from)) >> 2) \ + +#define aa64_emit_inst(opcode, ope, rd, rs, extra) \ +{ \ + *((u32 *)translation_ptr) = (aa64_opcode_##opcode << 24) | ((ope) << 29) | \ + ((rs) << 5) | (rd) | (extra); \ + translation_ptr += 4; \ +} + +#define aa64_emit_ldr(rv, rb, offset) \ + aa64_emit_inst(memi, 5, rv, rb, (1 << 22) | ((offset) << 10)) \ + +#define aa64_emit_str(rv, rb, offset) \ + aa64_emit_inst(memi, 5, rv, rb, (0 << 22) | ((offset) << 10)) \ + +#define aa64_emit_addshift(rd, rs, rm, st, sa) \ + aa64_emit_inst(addsub, 0, rd, rs, ((rm) << 16) | ((st)<<22) | ((sa)<<10)) \ + +#define aa64_emit_add_lsl(rd, rs, rm, sa) \ + aa64_emit_addshift(rd, rs, rm, 0, sa) \ + +#define aa64_emit_addi(rd, rs, imm) \ + aa64_emit_inst(addsubi, 0, rd, rs, (imm) << 10) \ + +#define aa64_emit_addi12(rd, rs, imm) \ + aa64_emit_inst(addsubi, 0, rd, rs, ((imm) << 10) | (1 << 22)) \ + +#define aa64_emit_addis(rd, rs, imm) \ + aa64_emit_inst(addsubi, 1, rd, rs, (imm) << 10) \ + +#define aa64_emit_subi(rd, rs, imm) \ + aa64_emit_inst(addsubi, 2, rd, rs, (imm) << 10) \ + +#define aa64_emit_subi12(rd, rs, imm) \ + aa64_emit_inst(addsubi, 2, rd, rs, ((imm) << 10) | (1 << 22)) \ + +#define aa64_emit_subis(rd, rs, imm) \ + aa64_emit_inst(addsubi, 3, rd, rs, (imm) << 10) \ + +/* rd = ra + rn * rm */ +#define aa64_emit_madd(rd, ra, rn, rm) \ + aa64_emit_inst(mul4, 0, rd, rn, ((ra) << 10) | ((rm) << 16)) \ + +/* rd = ra - rn * rm */ +#define aa64_emit_msub(rd, ra, rn, rm) \ + aa64_emit_inst(mul4, 0, rd, rn, ((ra) << 10) | ((rm) << 16) | 0x8000) \ + +#define aa64_emit_smaddl(rd, ra, rn, rm) \ + aa64_emit_inst(mul4, 4, rd, rn, ((ra) << 10) | ((rm) << 16) | 0x200000) \ + +#define aa64_emit_umaddl(rd, ra, rn, rm) \ + aa64_emit_inst(mul4, 4, rd, rn, ((ra) << 10) | ((rm) << 16) | 0xA00000) \ + +#define aa64_emit_mul(rd, rn, rm) \ + aa64_emit_madd(rd, 31, rn, rm) \ + +// MovZ, clears the highest bits and sets the lower ones +#define aa64_emit_movlo(rd, imm) \ + aa64_emit_inst(movi, 2, rd, 0, (((imm) & 0xffff) << 5) | (4 << 21)) \ + +// MovZ, clears the lowest bits and sets the higher ones +#define aa64_emit_movhiz(rd, imm) \ + aa64_emit_inst(movi, 2, rd, 0, (((imm) & 0xffff) << 5) | (5 << 21)) \ + +// MovK, keeps the other (lower) bits +#define aa64_emit_movhi(rd, imm) \ + aa64_emit_inst(movi, 3, rd, 0, (((imm) & 0xffff) << 5) | (5 << 21)) \ + +// MovN, moves the inverted immediate (for negative numbers) +#define aa64_emit_movne(rd, imm) \ + aa64_emit_inst(movi, 0, rd, 0, (((imm) & 0xffff) << 5) | (4 << 21)) \ + +#define aa64_emit_branch(offset) \ + aa64_emit_inst(b, 0, 0, 0, (((u32)(offset))) & 0x3ffffff) \ + +#define aa64_emit_branch_patch(ptr, offset) \ + *(ptr) = (((*(ptr)) & 0xfc000000) | (((u32)(offset)) & 0x3ffffff)) \ + +#define aa64_emit_brcond(cond, offset) \ + aa64_emit_inst(b, 2, cond, 0, ((((u32)(offset))) & 0x7ffff) << 5) \ + +#define aa64_emit_brcond_patch(ptr, offset) \ + *(ptr) = (((*(ptr)) & 0xff00001f) | (((((u32)(offset))) & 0x7ffff) << 5)) \ + +#define aa64_emit_brlink(offset) \ + aa64_emit_inst(b, 4, 0, 0, (((u32)(offset))) & 0x3ffffff) \ + +#define aa64_emit_extr(rd, rs, rm, amount) \ + aa64_emit_inst(bfm, 0, rd, rs, (1 << 23) | ((amount) << 10) | ((rm) << 16)) \ + +#define aa64_emit_ror(rd, rs, amount) \ + aa64_emit_extr(rd, rs, rs, amount) \ + +#define aa64_emit_lsr(rd, rs, amount) \ + aa64_emit_inst(bfm, 2, rd, rs, (31 << 10) | ((amount) << 16)) \ + +#define aa64_emit_lsl(rd, rs, amount) \ + aa64_emit_inst(bfm, 2, rd, rs, ((31-(amount)) << 10) | (((32-(amount)) & 31) << 16)) + +#define aa64_emit_asr(rd, rs, amount) \ + aa64_emit_inst(bfm, 0, rd, rs, (31 << 10) | ((amount) << 16)) \ + +#define aa64_emit_lsr64(rd, rs, amount) \ + aa64_emit_inst(bfm, 6, rd, rs, (1 << 22) | (63 << 10) | ((amount) << 16)) \ + +#define aa64_emit_eori(rd, rs, immr, imms) \ + aa64_emit_inst(movi, 2, rd, rs, ((imms) << 10) | ((immr) << 16)) \ + +#define aa64_emit_orri(rd, rs, immr, imms) \ + aa64_emit_inst(movi, 1, rd, rs, ((imms) << 10) | ((immr) << 16)) \ + +#define aa64_emit_andi(rd, rs, immr, imms) \ + aa64_emit_inst(movi, 0, rd, rs, ((imms) << 10) | ((immr) << 16)) \ + +#define aa64_emit_andi64(rd, rs, immr, imms) \ + aa64_emit_inst(movi, 4, rd, rs, (1 << 22) | ((imms) << 10) | ((immr) << 16)) + +#define aa64_emit_mov(rd, rs) \ + aa64_emit_orr(rd, 31, rs) \ + +#define aa64_emit_orr(rd, rs, rm) \ + aa64_emit_inst(logic, 1, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_orn(rd, rs, rm) \ + aa64_emit_inst(logic, 1, rd, rs, ((rm) << 16) | (1 << 21)) \ + +#define aa64_emit_and(rd, rs, rm) \ + aa64_emit_inst(logic, 0, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_ands(rd, rs, rm) \ + aa64_emit_inst(logic, 3, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_tst(rs, rm) \ + aa64_emit_ands(31, rs, rm) \ + +#define aa64_emit_cmpi(rs, imm) \ + aa64_emit_subis(31, rs, imm) \ + +#define aa64_emit_xor(rd, rs, rm) \ + aa64_emit_inst(logic, 2, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_bic(rd, rs, rm) \ + aa64_emit_inst(logic, 0, rd, rs, ((rm) << 16) | (1 << 21)) \ + +#define aa64_emit_add(rd, rs, rm) \ + aa64_emit_inst(addsub, 0, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_sub(rd, rs, rm) \ + aa64_emit_inst(addsub, 2, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_adc(rd, rs, rm) \ + aa64_emit_inst(misc, 0, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_sbc(rd, rs, rm) \ + aa64_emit_inst(misc, 2, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_adds(rd, rs, rm) \ + aa64_emit_inst(addsub, 1, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_subs(rd, rs, rm) \ + aa64_emit_inst(addsub, 3, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_adcs(rd, rs, rm) \ + aa64_emit_inst(misc, 1, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_sbcs(rd, rs, rm) \ + aa64_emit_inst(misc, 3, rd, rs, ((rm) << 16)) \ + +#define aa64_emit_adr(rd, offset) \ + aa64_emit_inst(adr, (offset) & 3, rd, 0, ((offset) >> 2) & 0x7ffff) \ + +#define aa64_emit_tbz(rd, bitn, offset) \ + aa64_emit_inst(tbz, 1, rd, 0, ((((u32)(offset)) & 0x3fff) << 5) | ((bitn) << 19)) + +#define aa64_emit_tbnz(rd, bitn, offset) \ + aa64_emit_inst(tbnz, 1, rd, 0, ((((u32)(offset)) & 0x3fff) << 5) | ((bitn) << 19)) + +#define aa64_emit_cbz(rd, offset) \ + aa64_emit_inst(b, 1, rd, 0, ((((u32)offset) & 0x7ffff)) << 5) \ + +#define aa64_emit_cbnz(rd, offset) \ + aa64_emit_inst(b2, 1, rd, 0, ((((u32)offset) & 0x7ffff)) << 5) \ + +/* Misc Operations: Cond-select, Cond-Compare, ADC/SBC, CLZ/O, REV ... */ +#define aa64_emit_csel(rd, rtrue, rfalse, cond) \ + aa64_emit_inst(misc, 0, rd, rtrue, (1<<23)|((rfalse) << 16)|((cond) << 12)) \ + +#define aa64_emit_csinc(rd, rs, rm, cond) \ + aa64_emit_inst(misc, 0, rd, rs, 0x800400 | ((rm) << 16) | ((cond) << 12)) \ + +#define aa64_emit_csinv(rd, rs, rm, cond) \ + aa64_emit_inst(misc, 2, rd, rs, 0x800000 | ((rm) << 16) | ((cond) << 12)) \ + +#define aa64_emit_csneg(rd, rs, rm, cond) \ + aa64_emit_inst(misc, 2, rd, rs, 0x800400 | ((rm) << 16) | ((cond) << 12)) \ + +#define aa64_emit_ubfm(rd, rs, imms, immr) \ + aa64_emit_inst(bfm, 2, rd, rs, ((imms) << 10) | ((immr) << 16)) \ + +#define aa64_emit_ubfx(rd, rs, pos, size) \ + aa64_emit_ubfm(rd, rs, pos + size - 1, pos) \ + +#define aa64_emit_cset(rd, cond) \ + aa64_emit_csinc(rd, 31, 31, ((cond) ^ 1)) \ + +#define aa64_emit_csetm(rd, cond) \ + aa64_emit_csinv(rd, 31, 31, ((cond) ^ 1)) \ + +#define aa64_emit_ccmpi(rn, immv, flags, cond) \ + aa64_emit_inst(misc, 3, rn, flags, 0x400800 | ((immv)<<16) | ((cond)<<12)) \ + +#define aa64_emit_rorv(rd, rs, ra) \ + aa64_emit_inst(misc, 0, rd, rs, ((ra) << 16) | 0xC02C00) \ + +#define aa64_emit_lslv(rd, rs, ra) \ + aa64_emit_inst(misc, 0, rd, rs, ((ra) << 16) | 0xC02000) \ + +#define aa64_emit_lsrv(rd, rs, ra) \ + aa64_emit_inst(misc, 0, rd, rs, ((ra) << 16) | 0xC02400) \ + +#define aa64_emit_asrv(rd, rs, ra) \ + aa64_emit_inst(misc, 0, rd, rs, ((ra) << 16) | 0xC02800) \ + +#define aa64_emit_orr_shift64(rd, rs, rm, st, sa) \ + aa64_emit_inst(logic, 5, rd, rs, ((rm) << 16) | ((st)<<22) | ((sa)<<10)) \ + +#define aa64_emit_merge_regs(rd, rhi, rlo) \ + aa64_emit_orr_shift64(rd, rlo, rhi, 0, 32) \ + +#define aa64_emit_sdiv(rd, rs, rm) \ + aa64_emit_inst(misc, 0, rd, rs, ((rm) << 16) | 0xC00C00) \ + + + diff --git a/arm/arm64_emit.h b/arm/arm64_emit.h new file mode 100644 index 0000000..4f372c8 --- /dev/null +++ b/arm/arm64_emit.h @@ -0,0 +1,1879 @@ +/* gameplaySP + * + * Copyright (C) 2021 David Guillen Fandos + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef ARM64_EMIT_H +#define ARM64_EMIT_H + +#include "arm64_codegen.h" + +/* This is a fork of the MIPS dynarec, since A64 has 32 regs as well and + does not map great to the armv4 instruction set. Also flexible operand + is fairly limited and cannot map to armv4 well. + All flags are kept in registers and loaded/restored as needed. */ + +u32 a64_update_gba(u32 pc); + +// Although these are defined as a function, don't call them as +// such (jump to it instead) +void a64_indirect_branch_arm(u32 address); +void a64_indirect_branch_thumb(u32 address); +void a64_indirect_branch_dual(u32 address); + +u32 execute_read_cpsr(); +u32 execute_read_spsr(); +void execute_swi(u32 pc); +void a64_cheat_hook(void); + +u32 execute_spsr_restore(u32 address); +void execute_store_cpsr(u32 new_cpsr, u32 store_mask); +void execute_store_spsr(u32 new_spsr, u32 store_mask); + +void execute_aligned_store32(u32 addr, u32 data); +u32 execute_aligned_load32(u32 addr); + +typedef enum +{ + arm64_reg_x0, // arg0 + arm64_reg_x1, // arg1 + arm64_reg_x2, // arg2 + arm64_reg_x3, // temporary reg + arm64_reg_x4, // temporary reg + arm64_reg_x5, // temporary reg + arm64_reg_x6, // ARM reg 0 (temporary) + arm64_reg_x7, // ARM reg 1 (temporary) + arm64_reg_x8, // ARM reg 2 (temporary) + arm64_reg_x9, // ARM reg 3 (temporary) + arm64_reg_x10, // ARM reg 4 (temporary) + arm64_reg_x11, // ARM reg 5 (temporary) + arm64_reg_x12, // ARM reg 6 (temporary) + arm64_reg_x13, // ARM reg 7 (temporary) + arm64_reg_x14, // ARM reg 8 (temporary) + arm64_reg_x15, // ARM reg 9 (temporary) + arm64_reg_x16, // ARM reg 10 (temporary) + arm64_reg_x17, // ARM reg 11 (temporary) + arm64_reg_x18, + arm64_reg_x19, // save0 (mem-scratch) (saved) + arm64_reg_x20, // base pointer (saved) + arm64_reg_x21, // cycle counter (saved) + arm64_reg_x22, // C-flag (contains 0 or 1, carry bit) + arm64_reg_x23, // V-flag (contains 0 or 1, overflow bit) + arm64_reg_x24, // Z-flag (contains 0 or 1, zero bit) + arm64_reg_x25, // N-flag (contains 0 or 1, sign bit) + arm64_reg_x26, // ARM reg 12 (saved) + arm64_reg_x27, // ARM reg 13 (saved) + arm64_reg_x28, // ARM reg 14 (saved) + arm64_reg_x29, // ARM reg 15 (block start ~ PC) (saved) + arm64_reg_lr, + arm64_reg_sp, +} arm64_reg_number; + + +#define reg_save0 arm64_reg_x19 +#define reg_base arm64_reg_x20 +#define reg_cycles arm64_reg_x21 +#define reg_res arm64_reg_x0 +#define reg_a0 arm64_reg_x0 +#define reg_a1 arm64_reg_x1 +#define reg_a2 arm64_reg_x2 +#define reg_temp arm64_reg_x3 +#define reg_temp2 arm64_reg_x4 +#define reg_pc arm64_reg_x29 +#define reg_c_cache arm64_reg_x22 +#define reg_v_cache arm64_reg_x23 +#define reg_z_cache arm64_reg_x24 +#define reg_n_cache arm64_reg_x25 + +#define reg_r0 arm64_reg_x6 +#define reg_r1 arm64_reg_x7 +#define reg_r2 arm64_reg_x8 +#define reg_r3 arm64_reg_x9 +#define reg_r4 arm64_reg_x10 +#define reg_r5 arm64_reg_x11 +#define reg_r6 arm64_reg_x12 +#define reg_r7 arm64_reg_x13 +#define reg_r8 arm64_reg_x14 +#define reg_r9 arm64_reg_x15 +#define reg_r10 arm64_reg_x16 +#define reg_r11 arm64_reg_x17 +#define reg_r12 arm64_reg_x26 +#define reg_r13 arm64_reg_x27 +#define reg_r14 arm64_reg_x28 + +#define reg_zero arm64_reg_sp // Careful it's also SP + +// Writing to r15 goes straight to a0, to be chained with other ops + +u32 arm_to_a64_reg[] = +{ + reg_r0, + reg_r1, + reg_r2, + reg_r3, + reg_r4, + reg_r5, + reg_r6, + reg_r7, + reg_r8, + reg_r9, + reg_r10, + reg_r11, + reg_r12, + reg_r13, + reg_r14, + reg_a0, + reg_a1, + reg_a2 +}; + +#define arm_reg_a0 15 +#define arm_reg_a1 16 +#define arm_reg_a2 17 + +#define generate_save_reg(regnum) \ + aa64_emit_str(arm_to_a64_reg[regnum], reg_base, regnum) \ + +#define generate_restore_reg(regnum) \ + aa64_emit_ldr(arm_to_a64_reg[regnum], reg_base, regnum) \ + +#define emit_save_regs() \ +{ \ + unsigned i; \ + for (i = 0; i < 15; i++) { \ + generate_save_reg(i); \ + } \ +} + +#define emit_restore_regs() \ +{ \ + unsigned i; \ + for (i = 0; i < 15; i++) { \ + generate_restore_reg(i); \ + } \ +} + +#define generate_load_reg(ireg, reg_index) \ + aa64_emit_mov(ireg, arm_to_a64_reg[reg_index]) \ + +#define generate_load_imm(ireg, imm) \ + if ((s32)(imm) < 0 && (s32)(imm) >= -65536) { \ + /* immediate like 0xffffxxxx */ \ + aa64_emit_movne(ireg, (~(imm))); \ + } else if (((imm) & 0xffff) == 0) { \ + /* immediate like 0xxxxx0000 */ \ + aa64_emit_movhiz(ireg, (imm)); \ + } else { \ + aa64_emit_movlo(ireg, imm); \ + if ((imm) >= (1 << 16)) { \ + aa64_emit_movhi(ireg, ((imm) >> 16)); \ + } \ + } + +#define generate_load_pc_2inst(ireg, new_pc) \ +{ \ + aa64_emit_movlo(ireg, new_pc); \ + aa64_emit_movhi(ireg, ((new_pc) >> 16)); \ +} + +#define generate_load_pc(ireg, new_pc) \ +{ \ + s32 pc_delta = (new_pc) - (stored_pc); \ + if (pc_delta >= 0) { \ + if (pc_delta < 4096) { \ + aa64_emit_addi(ireg, reg_pc, pc_delta); \ + } else { \ + generate_load_imm(ireg, new_pc); \ + } \ + } else { \ + if (pc_delta >= -4096) { \ + aa64_emit_subi(ireg, reg_pc, -pc_delta); \ + } else { \ + generate_load_imm(ireg, new_pc); \ + } \ + } \ +} \ + +#define generate_store_reg(ireg, reg_index) \ + aa64_emit_mov(arm_to_a64_reg[reg_index], ireg) \ + +/* Logical immediates are weird in aarch64, load imm to register */ +#define generate_logical_imm(optype, ireg_dest, ireg_src, imm) \ + generate_load_imm(reg_temp, imm); \ + aa64_emit_##optype(ireg_dest, ireg_src, reg_temp); \ + +/* TODO Use addi12 if the immediate is <24 bits ? */ +#define generate_alu_imm(imm_type, reg_type, ireg_dest, ireg_src, imm) \ + if((u32)(imm) < 4096) \ + { \ + aa64_emit_##imm_type(ireg_dest, ireg_src, imm); \ + } \ + else \ + { \ + generate_load_imm(reg_temp, imm); \ + aa64_emit_##reg_type(ireg_dest, ireg_src, reg_temp); \ + } \ + +#define generate_mov(ireg_dest, ireg_src) \ + aa64_emit_mov(arm_to_a64_reg[ireg_dest], arm_to_a64_reg[ireg_src]) \ + +#define generate_function_call(function_location) \ + aa64_emit_brlink(aa64_br_offset(function_location)); \ + +#define generate_cycle_update() \ + if(cycle_count != 0) \ + { \ + unsigned hicycle = cycle_count >> 12; \ + if (hicycle) { \ + aa64_emit_subi12(reg_cycles, reg_cycles, hicycle); \ + } \ + aa64_emit_subi(reg_cycles, reg_cycles, (cycle_count & 0xfff)); \ + cycle_count = 0; \ + } \ + +/* Patches ARM-mode conditional branches */ +#define generate_branch_patch_conditional(dest, label) \ + aa64_emit_brcond_patch(((u32*)dest), aa64_br_offset_from(label, dest)) + +#define emit_branch_filler(writeback_location) \ + (writeback_location) = translation_ptr; \ + aa64_emit_branch(0); \ + +#define generate_branch_patch_unconditional(dest, target) \ + aa64_emit_branch_patch((u32*)dest, aa64_br_offset_from(target, dest)) \ + +#define generate_branch_no_cycle_update(writeback_location, new_pc) \ + if(pc == idle_loop_target_pc) \ + { \ + generate_load_pc(reg_a0, new_pc); \ + generate_function_call(a64_update_gba); \ + emit_branch_filler(writeback_location); \ + } \ + else \ + { \ + aa64_emit_tbnz(reg_cycles, 31, 2); \ + emit_branch_filler(writeback_location); \ + generate_load_pc_2inst(reg_a0, new_pc); \ + generate_function_call(a64_update_gba); \ + aa64_emit_branch(-4); \ + } \ + +#define generate_branch_cycle_update(writeback_location, new_pc) \ + generate_cycle_update(); \ + generate_branch_no_cycle_update(writeback_location, new_pc) \ + +// a0 holds the destination + +#define generate_indirect_branch_cycle_update(type) \ + generate_cycle_update() \ + generate_indirect_branch_no_cycle_update(type) \ + +#define generate_indirect_branch_no_cycle_update(type) \ + aa64_emit_branch(aa64_br_offset(a64_indirect_branch_##type)); \ + +#define block_prologue_size 0 +#define generate_block_prologue() \ + generate_load_imm(reg_pc, stored_pc) \ + +#define check_generate_n_flag \ + (flag_status & 0x08) \ + +#define check_generate_z_flag \ + (flag_status & 0x04) \ + +#define check_generate_c_flag \ + (flag_status & 0x02) \ + +#define check_generate_v_flag \ + (flag_status & 0x01) \ + +#define generate_load_reg_pc(ireg, reg_index, pc_offset) \ + if(reg_index == REG_PC) \ + { \ + generate_load_pc(ireg, (pc + pc_offset)); \ + } \ + else \ + { \ + generate_load_reg(ireg, reg_index); \ + } \ + +#define check_load_reg_pc(arm_reg, reg_index, pc_offset) \ + if(reg_index == REG_PC) \ + { \ + reg_index = arm_reg; \ + generate_load_pc(arm_to_a64_reg[arm_reg], (pc + pc_offset)); \ + } \ + +#define check_store_reg_pc_no_flags(reg_index) \ + if(reg_index == REG_PC) \ + { \ + generate_indirect_branch_arm(); \ + } \ + +#define check_store_reg_pc_flags(reg_index) \ + if(reg_index == REG_PC) \ + { \ + generate_function_call(execute_spsr_restore); \ + generate_indirect_branch_dual(); \ + } \ + +#define generate_shift_imm_lsl_no_flags(arm_reg, _rm, _shift) \ + check_load_reg_pc(arm_reg, _rm, 8); \ + if(_shift != 0) \ + { \ + aa64_emit_lsl(arm_to_a64_reg[arm_reg], arm_to_a64_reg[_rm], _shift); \ + _rm = arm_reg; \ + } \ + +#define generate_shift_imm_lsr_no_flags(arm_reg, _rm, _shift) \ + if(_shift != 0) \ + { \ + check_load_reg_pc(arm_reg, _rm, 8); \ + aa64_emit_lsr(arm_to_a64_reg[arm_reg], arm_to_a64_reg[_rm], _shift); \ + } \ + else \ + { \ + aa64_emit_movlo(arm_to_a64_reg[arm_reg], 0); \ + } \ + _rm = arm_reg \ + +#define generate_shift_imm_asr_no_flags(arm_reg, _rm, _shift) \ + check_load_reg_pc(arm_reg, _rm, 8); \ + aa64_emit_asr(arm_to_a64_reg[arm_reg], arm_to_a64_reg[_rm], \ + _shift ? _shift : 31); \ + _rm = arm_reg \ + +#define generate_shift_imm_ror_no_flags(arm_reg, _rm, _shift) \ + check_load_reg_pc(arm_reg, _rm, 8); \ + if(_shift != 0) \ + { \ + aa64_emit_ror(arm_to_a64_reg[arm_reg], arm_to_a64_reg[_rm], _shift); \ + } \ + else \ + { /* Special case: RRX (no carry update) */ \ + aa64_emit_extr(arm_to_a64_reg[arm_reg], \ + reg_c_cache, arm_to_a64_reg[_rm], 1); \ + } \ + _rm = arm_reg \ + +#define generate_shift_imm_lsl_flags(arm_reg, _rm, _shift) \ + check_load_reg_pc(arm_reg, _rm, 8); \ + if(_shift != 0) \ + { \ + aa64_emit_ubfx(reg_c_cache, arm_to_a64_reg[_rm], (32 - _shift), 1); \ + aa64_emit_lsl(arm_to_a64_reg[arm_reg], arm_to_a64_reg[_rm], _shift); \ + _rm = arm_reg; \ + } \ + +#define generate_shift_imm_lsr_flags(arm_reg, _rm, _shift) \ + check_load_reg_pc(arm_reg, _rm, 8); \ + if(_shift != 0) \ + { \ + aa64_emit_ubfx(reg_c_cache, arm_to_a64_reg[_rm], (_shift - 1), 1); \ + aa64_emit_lsr(arm_to_a64_reg[arm_reg], arm_to_a64_reg[_rm], _shift); \ + } \ + else \ + { \ + aa64_emit_lsr(reg_c_cache, arm_to_a64_reg[_rm], 31); \ + aa64_emit_movlo(arm_to_a64_reg[arm_reg], 0); \ + } \ + _rm = arm_reg \ + +#define generate_shift_imm_asr_flags(arm_reg, _rm, _shift) \ + check_load_reg_pc(arm_reg, _rm, 8); \ + if(_shift != 0) \ + { \ + aa64_emit_ubfx(reg_c_cache, arm_to_a64_reg[_rm], (_shift - 1), 1); \ + aa64_emit_asr(arm_to_a64_reg[arm_reg], arm_to_a64_reg[_rm], _shift); \ + } \ + else \ + { \ + aa64_emit_lsr(reg_c_cache, arm_to_a64_reg[_rm], 31); \ + aa64_emit_asr(arm_to_a64_reg[arm_reg], arm_to_a64_reg[_rm], 31); \ + } \ + _rm = arm_reg \ + +#define generate_shift_imm_ror_flags(arm_reg, _rm, _shift) \ + check_load_reg_pc(arm_reg, _rm, 8); \ + if(_shift != 0) \ + { \ + aa64_emit_ubfx(reg_c_cache, arm_to_a64_reg[_rm], (_shift - 1), 1); \ + aa64_emit_ror(arm_to_a64_reg[arm_reg], arm_to_a64_reg[_rm], _shift); \ + } \ + else \ + { /* Special case: RRX (carry update) */ \ + aa64_emit_extr(reg_temp, reg_c_cache, arm_to_a64_reg[_rm], 1); \ + aa64_emit_ubfx(reg_c_cache, arm_to_a64_reg[_rm], 0, 1); \ + aa64_emit_mov(arm_to_a64_reg[arm_reg], reg_temp); \ + } \ + _rm = arm_reg \ + +#define generate_shift_reg_lsl_no_flags(_rm, _rs) \ + aa64_emit_cmpi(arm_to_a64_reg[_rs], 32); \ + aa64_emit_lslv(reg_temp, arm_to_a64_reg[_rm], arm_to_a64_reg[_rs]); \ + aa64_emit_csel(reg_a0, reg_zero, reg_temp, ccode_hs); \ + +#define generate_shift_reg_lsr_no_flags(_rm, _rs) \ + aa64_emit_cmpi(arm_to_a64_reg[_rs], 32); \ + aa64_emit_lsrv(reg_temp, arm_to_a64_reg[_rm], arm_to_a64_reg[_rs]); \ + aa64_emit_csel(reg_a0, reg_zero, reg_temp, ccode_hs); \ + +#define generate_shift_reg_asr_no_flags(_rm, _rs) \ + aa64_emit_cmpi(arm_to_a64_reg[_rs], 31); \ + aa64_emit_asrv(reg_a0, arm_to_a64_reg[_rm], arm_to_a64_reg[_rs]); \ + aa64_emit_asr(reg_temp, arm_to_a64_reg[_rm], 31); \ + aa64_emit_csel(reg_a0, reg_a0, reg_temp, ccode_lo); \ + +#define generate_shift_reg_ror_no_flags(_rm, _rs) \ + aa64_emit_rorv(reg_a0, arm_to_a64_reg[_rm], arm_to_a64_reg[_rs]) \ + +#define generate_shift_reg_lsl_flags(_rm, _rs) \ +{ \ + u32 shift_reg = _rs; \ + check_load_reg_pc(arm_reg_a1, shift_reg, 8); \ + generate_load_reg_pc(reg_a0, _rm, 12); \ + /* Only load the result on zero, no shift */ \ + aa64_emit_cbz(arm_to_a64_reg[shift_reg], 8); \ + aa64_emit_subi(reg_temp, arm_to_a64_reg[shift_reg], 1); \ + aa64_emit_lslv(reg_a0, reg_a0, reg_temp); \ + aa64_emit_lsr(reg_c_cache, reg_a0, 31); \ + aa64_emit_cmpi(arm_to_a64_reg[shift_reg], 33); \ + aa64_emit_lsl(reg_a0, reg_a0, 1); \ + /* Result and flag to be zero if shift is > 32 */ \ + aa64_emit_csel(reg_c_cache, reg_zero, reg_c_cache, ccode_hs); \ + aa64_emit_csel(reg_a0, reg_zero, reg_a0, ccode_hs); \ +} \ + +#define generate_shift_reg_lsr_flags(_rm, _rs) \ +{ \ + u32 shift_reg = _rs; \ + check_load_reg_pc(arm_reg_a1, shift_reg, 8); \ + generate_load_reg_pc(reg_a0, _rm, 12); \ + /* Only load the result on zero, no shift */ \ + aa64_emit_cbz(arm_to_a64_reg[shift_reg], 8); \ + aa64_emit_subi(reg_temp, arm_to_a64_reg[shift_reg], 1); \ + aa64_emit_lsrv(reg_a0, reg_a0, reg_temp); \ + aa64_emit_andi(reg_c_cache, reg_a0, 0, 0); /* imm=1 */ \ + aa64_emit_cmpi(arm_to_a64_reg[shift_reg], 33); \ + aa64_emit_lsr(reg_a0, reg_a0, 1); \ + /* Result and flag to be zero if shift is > 32 */ \ + aa64_emit_csel(reg_c_cache, reg_zero, reg_c_cache, ccode_hs); \ + aa64_emit_csel(reg_a0, reg_zero, reg_a0, ccode_hs); \ +} \ + +#define generate_shift_reg_asr_flags(_rm, _rs) \ + generate_load_reg_pc(reg_a1, _rs, 8); \ + generate_load_reg_pc(reg_a0, _rm, 12); \ + /* Only load the result on zero, no shift */ \ + aa64_emit_cbz(reg_a1, 8); \ + /* Cap shift at 32, since it's equivalent */ \ + aa64_emit_movlo(reg_temp, 32); \ + aa64_emit_cmpi(reg_a1, 32); \ + aa64_emit_csel(reg_a1, reg_a1, reg_temp, ccode_ls); \ + aa64_emit_subi(reg_temp, reg_a1, 1); \ + aa64_emit_asrv(reg_a0, reg_a0, reg_temp); \ + aa64_emit_andi(reg_c_cache, reg_a0, 0, 0); /* imm=1 */ \ + aa64_emit_asr(reg_a0, reg_a0, 1); \ + +#define generate_shift_reg_ror_flags(_rm, _rs) \ + aa64_emit_cbz(arm_to_a64_reg[_rs], 4); \ + aa64_emit_subi(reg_temp, arm_to_a64_reg[_rs], 1); \ + aa64_emit_lsrv(reg_temp, arm_to_a64_reg[_rm], reg_temp); \ + aa64_emit_andi(reg_c_cache, reg_temp, 0, 0); /* imm=1 */ \ + aa64_emit_rorv(reg_a0, arm_to_a64_reg[_rm], arm_to_a64_reg[_rs]) \ + +#define generate_shift_imm(arm_reg, name, flags_op) \ + u32 shift = (opcode >> 7) & 0x1F; \ + generate_shift_imm_##name##_##flags_op(arm_reg, rm, shift) \ + +#define generate_shift_reg(arm_reg, name, flags_op) \ + u32 rs = ((opcode >> 8) & 0x0F); \ + generate_shift_reg_##name##_##flags_op(rm, rs); \ + rm = arm_reg \ + +// Made functions due to the macro expansion getting too large. +// Returns a new rm if it redirects it (which will happen on most of these +// cases) + +#define generate_load_rm_sh(flags_op) \ +{ \ + switch((opcode >> 4) & 0x07) \ + { \ + /* LSL imm */ \ + case 0x0: \ + { \ + generate_shift_imm(arm_reg_a0, lsl, flags_op); \ + break; \ + } \ + \ + /* LSL reg */ \ + case 0x1: \ + { \ + generate_shift_reg(arm_reg_a0, lsl, flags_op); \ + break; \ + } \ + \ + /* LSR imm */ \ + case 0x2: \ + { \ + generate_shift_imm(arm_reg_a0, lsr, flags_op); \ + break; \ + } \ + \ + /* LSR reg */ \ + case 0x3: \ + { \ + generate_shift_reg(arm_reg_a0, lsr, flags_op); \ + break; \ + } \ + \ + /* ASR imm */ \ + case 0x4: \ + { \ + generate_shift_imm(arm_reg_a0, asr, flags_op); \ + break; \ + } \ + \ + /* ASR reg */ \ + case 0x5: \ + { \ + generate_shift_reg(arm_reg_a0, asr, flags_op); \ + break; \ + } \ + \ + /* ROR imm */ \ + case 0x6: \ + { \ + generate_shift_imm(arm_reg_a0, ror, flags_op); \ + break; \ + } \ + \ + /* ROR reg */ \ + case 0x7: \ + { \ + generate_shift_reg(arm_reg_a0, ror, flags_op); \ + break; \ + } \ + } \ +} \ + +#define generate_block_extra_vars() \ + u32 stored_pc = pc; \ + +#define generate_block_extra_vars_arm() \ + generate_block_extra_vars(); \ + +#define generate_load_offset_sh() \ + { \ + switch((opcode >> 5) & 0x03) \ + { \ + /* LSL imm */ \ + case 0x0: \ + { \ + generate_shift_imm(arm_reg_a1, lsl, no_flags); \ + break; \ + } \ + \ + /* LSR imm */ \ + case 0x1: \ + { \ + generate_shift_imm(arm_reg_a1, lsr, no_flags); \ + break; \ + } \ + \ + /* ASR imm */ \ + case 0x2: \ + { \ + generate_shift_imm(arm_reg_a1, asr, no_flags); \ + break; \ + } \ + \ + /* ROR imm */ \ + case 0x3: \ + { \ + generate_shift_imm(arm_reg_a1, ror, no_flags); \ + break; \ + } \ + } \ + } \ + +#define generate_indirect_branch_arm() \ +{ \ + if(condition == 0x0E) \ + { \ + generate_indirect_branch_cycle_update(arm); \ + } \ + else \ + { \ + generate_indirect_branch_no_cycle_update(arm); \ + } \ +} \ + +#define generate_indirect_branch_dual() \ +{ \ + if(condition == 0x0E) \ + { \ + generate_indirect_branch_cycle_update(dual); \ + } \ + else \ + { \ + generate_indirect_branch_no_cycle_update(dual); \ + } \ +} \ + +#define generate_block_extra_vars_thumb() \ + generate_block_extra_vars() \ + +// It should be okay to still generate result flags, spsr will overwrite them. +// This is pretty infrequent (returning from interrupt handlers, et al) so +// probably not worth optimizing for. + +u32 execute_spsr_restore_body(u32 address) +{ + set_cpu_mode(cpu_modes[reg[REG_CPSR] & 0x1F]); + if((io_registers[REG_IE] & io_registers[REG_IF]) && + io_registers[REG_IME] && ((reg[REG_CPSR] & 0x80) == 0)) + { + reg_mode[MODE_IRQ][6] = address + 4; + spsr[MODE_IRQ] = reg[REG_CPSR]; + reg[REG_CPSR] = 0xD2; + address = 0x00000018; + set_cpu_mode(MODE_IRQ); + } + + if(reg[REG_CPSR] & 0x20) + address |= 0x01; + + return address; +} + +/* Generate the opposite condition to skip the block */ +#define generate_condition_eq() \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbz(reg_z_cache, 0); \ + +#define generate_condition_ne() \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbnz(reg_z_cache, 0); \ + +#define generate_condition_cs() \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbz(reg_c_cache, 0); \ + +#define generate_condition_cc() \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbnz(reg_c_cache, 0); \ + +#define generate_condition_mi() \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbz(reg_n_cache, 0); \ + +#define generate_condition_pl() \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbnz(reg_n_cache, 0); \ + +#define generate_condition_vs() \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbz(reg_v_cache, 0); \ + +#define generate_condition_vc() \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbnz(reg_v_cache, 0); \ + +#define generate_condition_hi() \ + aa64_emit_eori(reg_temp, reg_c_cache, 0, 0); /* imm=1 */ \ + aa64_emit_orr(reg_temp, reg_temp, reg_z_cache); \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbnz(reg_temp, 0); \ + +#define generate_condition_ls() \ + aa64_emit_eori(reg_temp, reg_c_cache, 0, 0); /* imm=1 */ \ + aa64_emit_orr(reg_temp, reg_temp, reg_z_cache); \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbz(reg_temp, 0); \ + +#define generate_condition_ge() \ + aa64_emit_sub(reg_temp, reg_n_cache, reg_v_cache); \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbnz(reg_temp, 0); \ + +#define generate_condition_lt() \ + aa64_emit_sub(reg_temp, reg_n_cache, reg_v_cache); \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbz(reg_temp, 0); \ + +#define generate_condition_gt() \ + aa64_emit_xor(reg_temp, reg_n_cache, reg_v_cache); \ + aa64_emit_orr(reg_temp, reg_temp, reg_z_cache); \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbnz(reg_temp, 0); \ + +#define generate_condition_le() \ + aa64_emit_xor(reg_temp, reg_n_cache, reg_v_cache); \ + aa64_emit_orr(reg_temp, reg_temp, reg_z_cache); \ + (backpatch_address) = translation_ptr; \ + aa64_emit_cbz(reg_temp, 0); \ + +#define generate_condition() \ + switch(condition) \ + { \ + case 0x0: \ + generate_condition_eq(); \ + break; \ + \ + case 0x1: \ + generate_condition_ne(); \ + break; \ + \ + case 0x2: \ + generate_condition_cs(); \ + break; \ + \ + case 0x3: \ + generate_condition_cc(); \ + break; \ + \ + case 0x4: \ + generate_condition_mi(); \ + break; \ + \ + case 0x5: \ + generate_condition_pl(); \ + break; \ + \ + case 0x6: \ + generate_condition_vs(); \ + break; \ + \ + case 0x7: \ + generate_condition_vc(); \ + break; \ + \ + case 0x8: \ + generate_condition_hi(); \ + break; \ + \ + case 0x9: \ + generate_condition_ls(); \ + break; \ + \ + case 0xA: \ + generate_condition_ge(); \ + break; \ + \ + case 0xB: \ + generate_condition_lt(); \ + break; \ + \ + case 0xC: \ + generate_condition_gt(); \ + break; \ + \ + case 0xD: \ + generate_condition_le(); \ + break; \ + \ + case 0xE: \ + break; \ + \ + case 0xF: \ + break; \ + } \ + +#define generate_branch() \ +{ \ + if(condition == 0x0E) \ + { \ + generate_branch_cycle_update( \ + block_exits[block_exit_position].branch_source, \ + block_exits[block_exit_position].branch_target); \ + } \ + else \ + { \ + generate_branch_no_cycle_update( \ + block_exits[block_exit_position].branch_source, \ + block_exits[block_exit_position].branch_target); \ + } \ + block_exit_position++; \ +} + +// Flag generation, using the native CPU ALU flags +#define generate_op_logic_flags(_reg) \ + if(check_generate_n_flag) \ + { \ + aa64_emit_lsr(reg_n_cache, _reg, 31); \ + } \ + if(check_generate_z_flag) \ + { \ + aa64_emit_cmpi(_reg, 0); \ + aa64_emit_cset(reg_z_cache, ccode_eq); \ + } \ + +#define generate_op_arith_flags() \ + /* Assumes that state is in the flags */ \ + if(check_generate_c_flag) { \ + aa64_emit_cset(reg_c_cache, ccode_hs); \ + } \ + if(check_generate_v_flag) { \ + aa64_emit_cset(reg_v_cache, ccode_vs); \ + } \ + if(check_generate_n_flag) { \ + aa64_emit_cset(reg_n_cache, ccode_mi); \ + } \ + if(check_generate_z_flag) { \ + aa64_emit_cset(reg_z_cache, ccode_eq); \ + } \ + +#define load_c_flag() \ + aa64_emit_movne(reg_temp, 0); \ + aa64_emit_adds(reg_temp, reg_temp, reg_c_cache); \ + +// Muls instruction +#define generate_op_muls_reg(_rd, _rn, _rm) \ + aa64_emit_mul(_rd, _rn, _rm); \ + generate_op_logic_flags(_rd) \ + +// Immediate logical operations. Use native Z and N flag and CSET instruction. +#define generate_op_and_imm(_rd, _rn) \ + generate_logical_imm(and, _rd, _rn, imm) \ + +#define generate_op_orr_imm(_rd, _rn) \ + generate_logical_imm(orr, _rd, _rn, imm) \ + +#define generate_op_eor_imm(_rd, _rn) \ + generate_logical_imm(xor, _rd, _rn, imm) \ + +#define generate_op_bic_imm(_rd, _rn) \ + generate_logical_imm(bic, _rd, _rn, imm) \ + +#define generate_op_ands_imm(_rd, _rn) \ + generate_op_and_imm(_rd, _rn); \ + generate_op_logic_flags(_rd) \ + +#define generate_op_orrs_imm(_rd, _rn) \ + generate_op_orr_imm(_rd, _rn); \ + generate_op_logic_flags(_rd) \ + +#define generate_op_eors_imm(_rd, _rn) \ + generate_op_eor_imm(_rd, _rn); \ + generate_op_logic_flags(_rd) \ + +#define generate_op_bics_imm(_rd, _rn) \ + generate_op_bic_imm(_rd, _rn); \ + generate_op_logic_flags(_rd) \ + +// Register logical operations. Uses also native flags. +#define generate_op_and_reg(_rd, _rn, _rm) \ + aa64_emit_and(_rd, _rn, _rm) \ + +#define generate_op_orr_reg(_rd, _rn, _rm) \ + aa64_emit_orr(_rd, _rn, _rm) \ + +#define generate_op_eor_reg(_rd, _rn, _rm) \ + aa64_emit_xor(_rd, _rn, _rm) \ + +#define generate_op_bic_reg(_rd, _rn, _rm) \ + aa64_emit_bic(_rd, _rn, _rm) \ + +#define generate_op_ands_reg(_rd, _rn, _rm) \ + aa64_emit_and(_rd, _rn, _rm); \ + generate_op_logic_flags(_rd) \ + +#define generate_op_orrs_reg(_rd, _rn, _rm) \ + aa64_emit_orr(_rd, _rn, _rm); \ + generate_op_logic_flags(_rd) \ + +#define generate_op_eors_reg(_rd, _rn, _rm) \ + aa64_emit_xor(_rd, _rn, _rm); \ + generate_op_logic_flags(_rd) \ + +#define generate_op_bics_reg(_rd, _rn, _rm) \ + aa64_emit_bic(_rd, _rn, _rm); \ + generate_op_logic_flags(_rd) \ + +// Arithmetic reg-reg operations. + +#define generate_op_add_reg(_rd, _rn, _rm) \ + aa64_emit_add(_rd, _rn, _rm) \ + +#define generate_op_sub_reg(_rd, _rn, _rm) \ + aa64_emit_sub(_rd, _rn, _rm) \ + +#define generate_op_rsb_reg(_rd, _rn, _rm) \ + aa64_emit_sub(_rd, _rm, _rn) \ + +#define generate_op_adds_reg(_rd, _rn, _rm) \ + aa64_emit_adds(_rd, _rn, _rm) \ + generate_op_arith_flags() \ + +#define generate_op_subs_reg(_rd, _rn, _rm) \ + aa64_emit_subs(_rd, _rn, _rm) \ + generate_op_arith_flags() \ + +#define generate_op_rsbs_reg(_rd, _rn, _rm) \ + aa64_emit_subs(_rd, _rm, _rn) \ + generate_op_arith_flags() \ + +#define generate_op_adc_reg(_rd, _rn, _rm) \ + aa64_emit_add(_rd, _rn, _rm); /* Two adds is faster */ \ + aa64_emit_add(_rd, _rd, reg_c_cache); \ + +#define generate_op_sbc_reg(_rd, _rn, _rm) \ + /* Rd = Rn - Rm + (C - 1) */ \ + aa64_emit_sub(_rd, _rn, _rm); \ + aa64_emit_subi(reg_temp, reg_c_cache, 1); \ + aa64_emit_add(_rd, _rd, reg_temp); \ + +#define generate_op_rsc_reg(_rd, _rn, _rm) \ + aa64_emit_sub(_rd, _rm, _rn); \ + aa64_emit_subi(reg_temp, reg_c_cache, 1); \ + aa64_emit_add(_rd, _rd, reg_temp); \ + +/* Must use native instruction to accurately calculate C/V flags */ +#define generate_op_adcs_reg(_rd, _rn, _rm) \ + load_c_flag(); \ + aa64_emit_adcs(_rd, _rn, _rm); \ + generate_op_arith_flags() \ + +#define generate_op_sbcs_reg(_rd, _rn, _rm) \ + load_c_flag(); \ + aa64_emit_sbcs(_rd, _rn, _rm); \ + generate_op_arith_flags() \ + +#define generate_op_rscs_reg(_rd, _rn, _rm) \ + load_c_flag(); \ + aa64_emit_sbcs(_rd, _rm, _rn); \ + generate_op_arith_flags() \ + + +#define generate_op_neg_reg(_rd, _rn, _rm) \ + generate_op_subs_reg(_rd, reg_zero, _rm) \ + +// Arithmetic immediate operations. Use native flags when needed (input). + +#define generate_op_add_imm(_rd, _rn) \ + generate_alu_imm(addi, add, _rd, _rn, imm) \ + +#define generate_op_adds_imm(_rd, _rn) \ + generate_alu_imm(addis, adds, _rd, _rn, imm) \ + generate_op_arith_flags(); \ + +#define generate_op_sub_imm(_rd, _rn) \ + generate_alu_imm(subi, sub, _rd, _rn, imm) \ + +#define generate_op_subs_imm(_rd, _rn) \ + generate_alu_imm(subis, subs, _rd, _rn, imm) \ + generate_op_arith_flags(); \ + +#define generate_op_rsb_imm(_rd, _rn) \ + if (imm) { \ + generate_load_imm(reg_temp, imm); \ + aa64_emit_sub(_rd, reg_temp, _rn) \ + } else { \ + aa64_emit_sub(_rd, reg_zero, _rn) \ + } \ + +#define generate_op_rsbs_imm(_rd, _rn) \ + if (imm) { \ + generate_load_imm(reg_temp, imm); \ + aa64_emit_subs(_rd, reg_temp, _rn) \ + } else { \ + aa64_emit_subs(_rd, reg_zero, _rn) \ + } \ + generate_op_arith_flags(); \ + + +#define generate_op_adc_imm(_rd, _rn) \ + if (imm) { \ + generate_alu_imm(addi, add, _rd, _rn, imm); \ + aa64_emit_add(_rd, _rd, reg_c_cache); \ + } else { \ + aa64_emit_add(_rd, _rn, reg_c_cache); \ + } \ + +#define generate_op_sbc_imm(_rd, _rn) \ + /* Rd = Rn - Imm - 1 + C = Rn - (Imm + 1) + C */ \ + generate_alu_imm(subi, sub, _rd, _rn, ((imm) + 1)); \ + aa64_emit_add(_rd, _rd, reg_c_cache); \ + +#define generate_op_rsc_imm(_rd, _rn) \ + /* Rd = Imm - Rn - 1 + C = (Imm - 1) - Rn + C */ \ + generate_load_imm(reg_temp, ((imm)-1)); \ + aa64_emit_sub(_rd, reg_temp, _rn) \ + aa64_emit_add(_rd, _rd, reg_c_cache); \ + +/* Uses native instructions when needed, for C/V accuracy */ +#define generate_op_adcs_imm(_rd, _rn) \ + if (imm) { \ + load_c_flag(); \ + generate_load_imm(reg_temp, (imm)); \ + aa64_emit_adcs(_rd, _rn, reg_temp); \ + } else { \ + aa64_emit_adds(_rd, _rn, reg_c_cache); \ + } \ + generate_op_arith_flags(); \ + +#define generate_op_sbcs_imm(_rd, _rn) \ + load_c_flag(); \ + if (imm) { \ + generate_load_imm(reg_temp, (imm)); \ + aa64_emit_sbcs(_rd, _rn, reg_temp); \ + } else { \ + aa64_emit_sbcs(_rd, _rn, reg_zero); \ + } \ + generate_op_arith_flags(); \ + +#define generate_op_rscs_imm(_rd, _rn) \ + load_c_flag(); \ + if (imm) { \ + generate_load_imm(reg_temp, (imm)); \ + aa64_emit_sbcs(_rd, reg_temp, _rn); \ + } else { \ + aa64_emit_sbcs(_rd, reg_zero, _rn); \ + } \ + generate_op_arith_flags(); \ + + +// Move operations, only logical flags +#define generate_op_mov_imm(_rd, _rn) \ + generate_load_imm(_rd, imm) \ + +#define generate_op_movs_imm(_rd, _rn) \ + generate_load_imm(_rd, imm) \ + aa64_emit_movlo(reg_n_cache, (imm) >> 31); \ + aa64_emit_movlo(reg_z_cache, (imm) ? 0 : 1); \ + +#define generate_op_mvn_imm(_rd, _rn) \ + generate_load_imm(_rd, (~imm)) \ + +#define generate_op_mvns_imm(_rd, _rn) \ + generate_load_imm(_rd, (~imm)); \ + aa64_emit_movlo(reg_n_cache, (~(imm)) >> 31); \ + aa64_emit_movlo(reg_z_cache, (~(imm)) ? 1 : 0); \ + +#define generate_op_mov_reg(_rd, _rn, _rm) \ + aa64_emit_mov(_rd, _rm) \ + +#define generate_op_movs_reg(_rd, _rn, _rm) \ + aa64_emit_addi(_rd, _rm, 0); \ + generate_op_logic_flags(_rd) \ + +#define generate_op_mvn_reg(_rd, _rn, _rm) \ + aa64_emit_orn(_rd, reg_zero, _rm) \ + +#define generate_op_mvns_reg(_rd, _rn, _rm) \ + aa64_emit_orn(_rd, reg_zero, _rm) \ + generate_op_logic_flags(_rd) \ + +// Testing/Comparison functions +#define generate_op_cmp_reg(_rd, _rn, _rm) \ + generate_op_subs_reg(reg_temp2, _rn, _rm) \ + +#define generate_op_cmn_reg(_rd, _rn, _rm) \ + generate_op_adds_reg(reg_temp2, _rn, _rm) \ + +#define generate_op_tst_reg(_rd, _rn, _rm) \ + generate_op_ands_reg(reg_temp2, _rn, _rm) \ + +#define generate_op_teq_reg(_rd, _rn, _rm) \ + generate_op_eors_reg(reg_temp2, _rn, _rm) \ + +#define generate_op_cmp_imm(_rd, _rn) \ + generate_op_subs_imm(reg_temp2, _rn) \ + +#define generate_op_cmn_imm(_rd, _rn) \ + generate_op_adds_imm(reg_temp2, _rn) \ + +#define generate_op_tst_imm(_rd, _rn) \ + generate_op_ands_imm(reg_temp2, _rn) \ + +#define generate_op_teq_imm(_rd, _rn) \ + generate_op_eors_imm(reg_temp2, _rn) \ + + +#define arm_generate_op_load_yes() \ + generate_load_reg_pc(reg_a1, rn, 8) \ + +#define arm_generate_op_load_no() \ + +#define arm_op_check_yes() \ + check_load_reg_pc(arm_reg_a1, rn, 8) \ + +#define arm_op_check_no() \ + +#define arm_generate_op_reg_flags(name, load_op) \ + arm_decode_data_proc_reg(opcode); \ + if(check_generate_c_flag) \ + { \ + generate_load_rm_sh(flags); \ + } \ + else \ + { \ + generate_load_rm_sh(no_flags); \ + } \ + \ + arm_op_check_##load_op(); \ + generate_op_##name##_reg(arm_to_a64_reg[rd], arm_to_a64_reg[rn], \ + arm_to_a64_reg[rm]) \ + +#define arm_generate_op_reg(name, load_op) \ + arm_decode_data_proc_reg(opcode); \ + generate_load_rm_sh(no_flags); \ + arm_op_check_##load_op(); \ + generate_op_##name##_reg(arm_to_a64_reg[rd], arm_to_a64_reg[rn], \ + arm_to_a64_reg[rm]) \ + +#define arm_generate_op_imm(name, load_op) \ + arm_decode_data_proc_imm(opcode); \ + ror(imm, imm, imm_ror); \ + arm_op_check_##load_op(); \ + generate_op_##name##_imm(arm_to_a64_reg[rd], arm_to_a64_reg[rn]) \ + +#define arm_generate_op_imm_flags(name, load_op) \ + arm_generate_op_imm(name, load_op) \ + +#define arm_data_proc(name, type, flags_op) \ +{ \ + arm_generate_op_##type(name, yes); \ + check_store_reg_pc_##flags_op(rd); \ +} \ + +#define arm_data_proc_test(name, type) \ +{ \ + arm_generate_op_##type(name, yes); \ +} \ + +#define arm_data_proc_unary(name, type, flags_op) \ +{ \ + arm_generate_op_##type(name, no); \ + check_store_reg_pc_##flags_op(rd); \ +} \ + +// 32 bit multiplication + +#define arm_multiply_flags_yes(_rd) \ + generate_op_logic_flags(_rd) \ + +#define arm_multiply_flags_no(_rd) \ + +#define arm_multiply_add_no() \ + aa64_emit_mul(arm_to_a64_reg[rd], arm_to_a64_reg[rm], arm_to_a64_reg[rs]); \ + +#define arm_multiply_add_yes() \ + aa64_emit_madd(arm_to_a64_reg[rd], arm_to_a64_reg[rn], \ + arm_to_a64_reg[rm], arm_to_a64_reg[rs]); \ + +#define arm_multiply(add_op, flags) \ +{ \ + arm_decode_multiply(); \ + arm_multiply_add_##add_op(); \ + arm_multiply_flags_##flags(arm_to_a64_reg[rd]); \ +} \ + +// 32x32 -> 64 multiplication (long mul/muladd) + +#define generate_multiply_s64() \ + aa64_emit_smaddl(reg_temp, reg_zero, arm_to_a64_reg[rm], arm_to_a64_reg[rs]) + +#define generate_multiply_u64() \ + aa64_emit_umaddl(reg_temp, reg_zero, arm_to_a64_reg[rm], arm_to_a64_reg[rs]) + +#define generate_multiply_s64_add() \ + aa64_emit_smaddl(reg_temp, reg_temp, arm_to_a64_reg[rm], arm_to_a64_reg[rs]) + +#define generate_multiply_u64_add() \ + aa64_emit_umaddl(reg_temp, reg_temp, arm_to_a64_reg[rm], arm_to_a64_reg[rs]) + +#define arm_multiply_long_flags_yes(_rdlo, _rdhi) \ + aa64_emit_orr(reg_z_cache, _rdlo, _rdhi); \ + aa64_emit_cmpi(reg_z_cache, 0); \ + aa64_emit_cset(reg_z_cache, ccode_eq); \ + aa64_emit_lsr(reg_n_cache, _rdhi, 31); \ + +#define arm_multiply_long_flags_no(_rdlo, _rdhi) \ + +#define arm_multiply_long_add_yes(name) \ + aa64_emit_merge_regs(reg_temp, arm_to_a64_reg[rdhi], arm_to_a64_reg[rdlo]); \ + generate_multiply_##name() \ + +#define arm_multiply_long_add_no(name) \ + generate_multiply_##name() \ + +#define arm_multiply_long(name, add_op, flags) \ +{ \ + arm_decode_multiply_long(); \ + arm_multiply_long_add_##add_op(name); \ + aa64_emit_andi64(arm_to_a64_reg[rdlo], reg_temp, 0, 31); \ + aa64_emit_lsr64(arm_to_a64_reg[rdhi], reg_temp, 32); \ + arm_multiply_long_flags_##flags(arm_to_a64_reg[rdlo], arm_to_a64_reg[rdhi]);\ +} \ + +#define arm_psr_read(op_type, psr_reg) \ + generate_function_call(execute_read_##psr_reg); \ + generate_store_reg(reg_res, rd) \ + +u32 execute_store_cpsr_body(u32 _cpsr, u32 store_mask, u32 address) +{ + reg[REG_CPSR] = _cpsr; + if(store_mask & 0xFF) + { + set_cpu_mode(cpu_modes[_cpsr & 0x1F]); + if((io_registers[REG_IE] & io_registers[REG_IF]) && + io_registers[REG_IME] && ((_cpsr & 0x80) == 0)) + { + reg_mode[MODE_IRQ][6] = address + 4; + spsr[MODE_IRQ] = _cpsr; + reg[REG_CPSR] = 0xD2; + set_cpu_mode(MODE_IRQ); + return 0x00000018; + } + } + + return 0; +} + +#define arm_psr_load_new_reg() \ + generate_load_reg(reg_a0, rm) \ + +#define arm_psr_load_new_imm() \ + generate_load_imm(reg_a0, imm) \ + +#define arm_psr_store(op_type, psr_reg) \ + arm_psr_load_new_##op_type(); \ + generate_load_imm(reg_a1, psr_masks[psr_field]); \ + generate_load_pc(reg_a2, (pc)); \ + generate_function_call(execute_store_##psr_reg) \ + +#define arm_psr(op_type, transfer_type, psr_reg) \ +{ \ + arm_decode_psr_##op_type(opcode); \ + arm_psr_##transfer_type(op_type, psr_reg); \ +} \ + +#define thumb_load_pc_pool_const(rd, value) \ + generate_load_imm(arm_to_a64_reg[rd], (value)); \ + +#define arm_access_memory_load(mem_type) \ + cycle_count += 2; \ + generate_load_pc(reg_a1, (pc + 8)); \ + generate_function_call(execute_load_##mem_type); \ + generate_store_reg(reg_res, rd); \ + check_store_reg_pc_no_flags(rd) \ + +#define arm_access_memory_store(mem_type) \ + cycle_count++; \ + generate_load_pc(reg_a2, (pc + 4)); \ + generate_load_reg_pc(reg_a1, rd, 12); \ + generate_function_call(execute_store_##mem_type) \ + +#define arm_access_memory_reg_pre_up() \ + aa64_emit_add(reg_a0, arm_to_a64_reg[rn], arm_to_a64_reg[rm]) \ + +#define arm_access_memory_reg_pre_down() \ + aa64_emit_sub(reg_a0, arm_to_a64_reg[rn], arm_to_a64_reg[rm]) \ + +#define arm_access_memory_reg_pre(adjust_dir) \ + check_load_reg_pc(arm_reg_a0, rn, 8); \ + arm_access_memory_reg_pre_##adjust_dir() \ + +#define arm_access_memory_reg_pre_wb(adjust_dir) \ + arm_access_memory_reg_pre(adjust_dir); \ + generate_store_reg(reg_a0, rn) \ + +#define arm_access_memory_reg_post_up() \ + aa64_emit_add(arm_to_a64_reg[rn], arm_to_a64_reg[rn], arm_to_a64_reg[rm]) \ + +#define arm_access_memory_reg_post_down() \ + aa64_emit_sub(arm_to_a64_reg[rn], arm_to_a64_reg[rn], arm_to_a64_reg[rm]) \ + +#define arm_access_memory_reg_post(adjust_dir) \ + generate_load_reg(reg_a0, rn); \ + arm_access_memory_reg_post_##adjust_dir() \ + +#define arm_access_memory_imm_pre_up() \ + aa64_emit_addi(reg_a0, arm_to_a64_reg[rn], offset) \ + +#define arm_access_memory_imm_pre_down() \ + aa64_emit_subi(reg_a0, arm_to_a64_reg[rn], offset) \ + +#define arm_access_memory_imm_pre(adjust_dir) \ + check_load_reg_pc(arm_reg_a0, rn, 8); \ + arm_access_memory_imm_pre_##adjust_dir() \ + +#define arm_access_memory_imm_pre_wb(adjust_dir) \ + arm_access_memory_imm_pre(adjust_dir); \ + generate_store_reg(reg_a0, rn) \ + +#define arm_access_memory_imm_post_up() \ + aa64_emit_addi(arm_to_a64_reg[rn], arm_to_a64_reg[rn], offset) \ + +#define arm_access_memory_imm_post_down() \ + aa64_emit_subi(arm_to_a64_reg[rn], arm_to_a64_reg[rn], offset) \ + +#define arm_access_memory_imm_post(adjust_dir) \ + generate_load_reg(reg_a0, rn); \ + arm_access_memory_imm_post_##adjust_dir() \ + +#define arm_data_trans_reg(adjust_op, adjust_dir) \ + arm_decode_data_trans_reg(); \ + generate_load_offset_sh(); \ + arm_access_memory_reg_##adjust_op(adjust_dir) \ + +#define arm_data_trans_imm(adjust_op, adjust_dir) \ + arm_decode_data_trans_imm(); \ + arm_access_memory_imm_##adjust_op(adjust_dir) \ + +#define arm_data_trans_half_reg(adjust_op, adjust_dir) \ + arm_decode_half_trans_r(); \ + arm_access_memory_reg_##adjust_op(adjust_dir) \ + +#define arm_data_trans_half_imm(adjust_op, adjust_dir) \ + arm_decode_half_trans_of(); \ + arm_access_memory_imm_##adjust_op(adjust_dir) \ + +#define arm_access_memory(access_type, direction, adjust_op, mem_type, \ + offset_type) \ +{ \ + arm_data_trans_##offset_type(adjust_op, direction); \ + arm_access_memory_##access_type(mem_type); \ +} \ + +#define word_bit_count(word) \ + (bit_count[word >> 8] + bit_count[word & 0xFF]) \ + +#define arm_block_memory_load() \ + generate_function_call(execute_aligned_load32); \ + generate_store_reg(reg_res, i) \ + +#define arm_block_memory_store() \ + generate_load_reg_pc(reg_a1, i, 8); \ + generate_function_call(execute_aligned_store32) \ + +#define arm_block_memory_final_load() \ + arm_block_memory_load() \ + +#define arm_block_memory_final_store() \ + generate_load_pc(reg_a2, (pc + 4)); \ + generate_load_reg(reg_a1, i) \ + generate_function_call(execute_store_u32); \ + +#define arm_block_memory_adjust_pc_store() \ + +#define arm_block_memory_adjust_pc_load() \ + if(reg_list & 0x8000) \ + { \ + generate_indirect_branch_arm(); \ + } \ + +#define arm_block_memory_offset_down_a() \ + aa64_emit_subi(reg_save0, base_reg, ((word_bit_count(reg_list)-1) * 4)) \ + +#define arm_block_memory_offset_down_b() \ + aa64_emit_subi(reg_save0, base_reg, (word_bit_count(reg_list) * 4)) \ + +#define arm_block_memory_offset_no() \ + aa64_emit_addi(reg_save0, base_reg, 0) \ + +#define arm_block_memory_offset_up() \ + aa64_emit_addi(reg_save0, base_reg, 4) \ + +#define arm_block_memory_writeback_down() \ + aa64_emit_subi(base_reg, base_reg, (word_bit_count(reg_list) * 4)) \ + +#define arm_block_memory_writeback_up() \ + aa64_emit_addi(base_reg, base_reg, (word_bit_count(reg_list) * 4)) \ + +#define arm_block_memory_writeback_no() + +// Only emit writeback if the register is not in the list + +#define arm_block_memory_writeback_load(writeback_type) \ + if(!((reg_list >> rn) & 0x01)) \ + { \ + arm_block_memory_writeback_##writeback_type(); \ + } \ + +#define arm_block_memory_writeback_store(writeback_type) \ + arm_block_memory_writeback_##writeback_type() \ + +#define arm_block_memory(access_type, offset_type, writeback_type, s_bit) \ +{ \ + arm_decode_block_trans(); \ + u32 i; \ + u32 offset = 0; \ + u32 base_reg = arm_to_a64_reg[rn]; \ + \ + arm_block_memory_offset_##offset_type(); \ + arm_block_memory_writeback_##access_type(writeback_type); \ + \ + { \ + aa64_emit_andi(reg_save0, reg_save0, 30, 29); /* clear 2 LSB */ \ + \ + for(i = 0; i < 16; i++) \ + { \ + if((reg_list >> i) & 0x01) \ + { \ + cycle_count++; \ + aa64_emit_addi(reg_a0, reg_save0, offset); \ + if(reg_list & ~((2 << i) - 1)) \ + { \ + arm_block_memory_##access_type(); \ + offset += 4; \ + } \ + else \ + { \ + arm_block_memory_final_##access_type(); \ + break; \ + } \ + } \ + } \ + \ + arm_block_memory_adjust_pc_##access_type(); \ + } \ +} \ + + +// ARM: rn *must* be different from rm and rd. rm *can* be the same as rd. + +#define arm_swap(type) \ +{ \ + arm_decode_swap(); \ + cycle_count += 3; \ + generate_load_reg(reg_a0, rn); \ + generate_function_call(execute_load_##type); \ + generate_load_reg(reg_a1, rm); \ + generate_store_reg(reg_res, rd); \ + generate_load_reg(reg_a0, rn); \ + generate_function_call(execute_store_##type); \ +} \ + +#define thumb_generate_op_load_yes(_rs) \ + generate_load_reg(reg_a1, _rs) \ + +#define thumb_generate_op_load_no(_rs) \ + +#define thumb_generate_op_reg(name, _rd, _rs, _rn) \ + generate_op_##name##_reg(arm_to_a64_reg[_rd], \ + arm_to_a64_reg[_rs], arm_to_a64_reg[_rn]) \ + +#define thumb_generate_op_imm(name, _rd, _rs, _rn) \ + generate_op_##name##_imm(arm_to_a64_reg[_rd], arm_to_a64_reg[_rs]) \ + +// Types: add_sub, add_sub_imm, alu_op, imm +// Affects N/Z/C/V flags + +#define thumb_data_proc(type, name, rn_type, _rd, _rs, _rn) \ +{ \ + thumb_decode_##type(); \ + thumb_generate_op_##rn_type(name, _rd, _rs, _rn); \ +} \ + +#define thumb_data_proc_test(type, name, rn_type, _rs, _rn) \ +{ \ + thumb_decode_##type(); \ + thumb_generate_op_##rn_type(name, 0, _rs, _rn); \ +} \ + +#define thumb_data_proc_unary(type, name, rn_type, _rd, _rn) \ +{ \ + thumb_decode_##type(); \ + thumb_generate_op_##rn_type(name, _rd, 0, _rn); \ +} \ + +#define check_store_reg_pc_thumb(_rd) \ + if(_rd == REG_PC) \ + { \ + generate_indirect_branch_cycle_update(thumb); \ + } \ + +#define thumb_data_proc_hi(name) \ +{ \ + thumb_decode_hireg_op(); \ + u32 dest_rd = rd; \ + check_load_reg_pc(arm_reg_a0, rs, 4); \ + check_load_reg_pc(arm_reg_a1, rd, 4); \ + generate_op_##name##_reg(arm_to_a64_reg[dest_rd], arm_to_a64_reg[rd], \ + arm_to_a64_reg[rs]); \ + check_store_reg_pc_thumb(dest_rd); \ +} \ + +#define thumb_data_proc_test_hi(name) \ +{ \ + thumb_decode_hireg_op(); \ + check_load_reg_pc(arm_reg_a0, rs, 4); \ + check_load_reg_pc(arm_reg_a1, rd, 4); \ + generate_op_##name##_reg(reg_temp, arm_to_a64_reg[rd], \ + arm_to_a64_reg[rs]); \ +} \ + +#define thumb_data_proc_mov_hi() \ +{ \ + thumb_decode_hireg_op(); \ + check_load_reg_pc(arm_reg_a0, rs, 4); \ + generate_mov(rd, rs); \ + check_store_reg_pc_thumb(rd); \ +} \ + +#define thumb_load_pc(_rd) \ +{ \ + thumb_decode_imm(); \ + generate_load_pc(arm_to_a64_reg[_rd], (((pc & ~2) + 4) + (imm * 4))); \ +} \ + +#define thumb_load_sp(_rd) \ +{ \ + thumb_decode_imm(); \ + aa64_emit_addi(arm_to_a64_reg[_rd], reg_r13, (imm * 4)); \ +} \ + +#define thumb_adjust_sp_up() \ + aa64_emit_addi(reg_r13, reg_r13, (imm * 4)); \ + +#define thumb_adjust_sp_down() \ + aa64_emit_subi(reg_r13, reg_r13, (imm * 4)); \ + +#define thumb_adjust_sp(direction) \ +{ \ + thumb_decode_add_sp(); \ + thumb_adjust_sp_##direction(); \ +} \ + +// Decode types: shift, alu_op +// Operation types: lsl, lsr, asr, ror +// Affects N/Z/C flags + +#define thumb_generate_shift_imm(name) \ + if(check_generate_c_flag) \ + { \ + generate_shift_imm_##name##_flags(rd, rs, imm); \ + } \ + else \ + { \ + generate_shift_imm_##name##_no_flags(rd, rs, imm); \ + } \ + if(rs != rd) \ + { \ + generate_mov(rd, rs); \ + } \ + +#define thumb_generate_shift_reg(name) \ +{ \ + u32 original_rd = rd; \ + if(check_generate_c_flag) \ + { \ + generate_shift_reg_##name##_flags(rd, rs); \ + } \ + else \ + { \ + generate_shift_reg_##name##_no_flags(rd, rs); \ + } \ + aa64_emit_addi(arm_to_a64_reg[original_rd], reg_a0, 0); \ +} \ + +#define thumb_shift(decode_type, op_type, value_type) \ +{ \ + thumb_decode_##decode_type(); \ + thumb_generate_shift_##value_type(op_type); \ + generate_op_logic_flags(arm_to_a64_reg[rd]); \ +} \ + +// Operation types: imm, mem_reg, mem_imm + +#define thumb_access_memory_load(mem_type, reg_rd) \ + cycle_count += 2; \ + generate_load_pc(reg_a1, (pc + 4)); \ + generate_function_call(execute_load_##mem_type); \ + generate_store_reg(reg_res, reg_rd) \ + +#define thumb_access_memory_store(mem_type, reg_rd) \ + cycle_count++; \ + generate_load_reg(reg_a1, reg_rd) \ + generate_load_pc(reg_a2, (pc + 2)); \ + generate_function_call(execute_store_##mem_type); \ + +#define thumb_access_memory_generate_address_pc_relative(offset, reg_rb, \ + reg_ro) \ + generate_load_pc(reg_a0, (offset)) \ + +#define thumb_access_memory_generate_address_reg_imm(offset, reg_rb, reg_ro) \ + aa64_emit_addi(reg_a0, arm_to_a64_reg[reg_rb], (offset)) \ + +#define thumb_access_memory_generate_address_reg_imm_sp(offset, reg_rb, reg_ro) \ + aa64_emit_addi(reg_a0, arm_to_a64_reg[reg_rb], (offset * 4)) \ + +#define thumb_access_memory_generate_address_reg_reg(offset, reg_rb, reg_ro) \ + aa64_emit_add(reg_a0, arm_to_a64_reg[reg_rb], arm_to_a64_reg[reg_ro]) \ + +#define thumb_access_memory(access_type, op_type, reg_rd, reg_rb, reg_ro, \ + address_type, offset, mem_type) \ +{ \ + thumb_decode_##op_type(); \ + thumb_access_memory_generate_address_##address_type(offset, reg_rb, \ + reg_ro); \ + thumb_access_memory_##access_type(mem_type, reg_rd); \ +} \ + + +#define thumb_block_address_preadjust_no(base_reg) \ + aa64_emit_addi(reg_save0, base_reg, 0) \ + +#define thumb_block_address_preadjust_down(base_reg) \ + aa64_emit_subi(reg_save0, base_reg, (bit_count[reg_list] * 4)); \ + aa64_emit_addi(base_reg, reg_save0, 0); \ + +#define thumb_block_address_preadjust_push_lr(base_reg) \ + aa64_emit_subi(reg_save0, base_reg, ((bit_count[reg_list] + 1) * 4)); \ + aa64_emit_addi(base_reg, reg_save0, 0); \ + +#define thumb_block_address_postadjust_no(base_reg) \ + +#define thumb_block_address_postadjust_up(base_reg) \ + aa64_emit_addi(base_reg, reg_save0, (bit_count[reg_list] * 4)); \ + +#define thumb_block_address_postadjust_pop_pc(base_reg) \ + aa64_emit_addi(base_reg, reg_save0, ((bit_count[reg_list] + 1) * 4)); \ + +#define thumb_block_address_postadjust_push_lr(base_reg) \ + +#define thumb_block_memory_load() \ + generate_function_call(execute_aligned_load32); \ + generate_store_reg(reg_res, i) \ + +#define thumb_block_memory_store() \ + generate_load_reg(reg_a1, i) \ + generate_function_call(execute_aligned_store32); \ + +#define thumb_block_memory_final_load() \ + thumb_block_memory_load() \ + +#define thumb_block_memory_final_store() \ + generate_load_pc(reg_a2, (pc + 2)); \ + generate_load_reg(reg_a1, i) \ + generate_function_call(execute_store_u32); \ + +#define thumb_block_memory_final_no(access_type) \ + thumb_block_memory_final_##access_type() \ + +#define thumb_block_memory_final_up(access_type) \ + thumb_block_memory_final_##access_type() \ + +#define thumb_block_memory_final_down(access_type) \ + thumb_block_memory_final_##access_type() \ + +#define thumb_block_memory_final_push_lr(access_type) \ + thumb_block_memory_##access_type() \ + +#define thumb_block_memory_final_pop_pc(access_type) \ + thumb_block_memory_##access_type() \ + +#define thumb_block_memory_extra_no() \ + +#define thumb_block_memory_extra_up() \ + +#define thumb_block_memory_extra_down() \ + +#define thumb_block_memory_extra_push_lr() \ + aa64_emit_addi(reg_a0, reg_save0, (bit_count[reg_list] * 4)); \ + generate_load_reg(reg_a1, REG_LR) \ + generate_function_call(execute_aligned_store32); \ + +#define thumb_block_memory_extra_pop_pc() \ + aa64_emit_addi(reg_a0, reg_save0, (bit_count[reg_list] * 4)); \ + generate_function_call(execute_aligned_load32); \ + generate_indirect_branch_cycle_update(thumb) \ + +#define thumb_block_memory(access_type, pre_op, post_op, arm_base_reg) \ +{ \ + thumb_decode_rlist(); \ + u32 i; \ + u32 offset = 0; \ + u32 base_reg = arm_to_a64_reg[arm_base_reg]; \ + \ + thumb_block_address_preadjust_##pre_op(base_reg); \ + thumb_block_address_postadjust_##post_op(base_reg); \ + \ + { \ + aa64_emit_andi(reg_save0, reg_save0, 30, 29); /* clear 2 LSB */ \ + \ + for(i = 0; i < 8; i++) \ + { \ + if((reg_list >> i) & 0x01) \ + { \ + cycle_count++; \ + aa64_emit_addi(reg_a0, reg_save0, offset); \ + if(reg_list & ~((2 << i) - 1)) \ + { \ + thumb_block_memory_##access_type(); \ + offset += 4; \ + } \ + else \ + { \ + thumb_block_memory_final_##post_op(access_type); \ + break; \ + } \ + } \ + } \ + \ + thumb_block_memory_extra_##post_op(); \ + } \ +} + +#define generate_branch_filler(condition_code, writeback_location) \ + (writeback_location) = translation_ptr; \ + aa64_emit_brcond(condition_code, 0); \ + + +#define thumb_conditional_branch(condition) \ +{ \ + generate_cycle_update(); \ + generate_condition_##condition(); \ + generate_branch_no_cycle_update( \ + block_exits[block_exit_position].branch_source, \ + block_exits[block_exit_position].branch_target); \ + generate_branch_patch_conditional(backpatch_address, translation_ptr); \ + block_exit_position++; \ +} \ + +#define arm_conditional_block_header() \ + generate_cycle_update(); \ + generate_condition(); \ + +#define arm_b() \ + generate_branch() \ + +#define arm_bl() \ + generate_load_pc(reg_r14, (pc + 4)); \ + generate_branch() \ + +#define arm_bx() \ + arm_decode_branchx(opcode); \ + generate_load_reg(reg_a0, rn); \ + generate_indirect_branch_dual() \ + +#define arm_swi() \ + generate_load_pc(reg_a0, (pc + 4)); \ + generate_function_call(execute_swi); \ + generate_branch() \ + +#define thumb_b() \ + generate_branch_cycle_update( \ + block_exits[block_exit_position].branch_source, \ + block_exits[block_exit_position].branch_target); \ + block_exit_position++ \ + +#define thumb_bl() \ + generate_load_pc(reg_r14, ((pc + 2) | 0x01)); \ + generate_branch_cycle_update( \ + block_exits[block_exit_position].branch_source, \ + block_exits[block_exit_position].branch_target); \ + block_exit_position++ \ + +#define thumb_blh() \ +{ \ + thumb_decode_branch(); \ + generate_alu_imm(addi, add, reg_a0, reg_r14, (offset * 2)); \ + generate_load_pc(reg_r14, ((pc + 2) | 0x01)); \ + generate_indirect_branch_cycle_update(dual); \ + break; \ +} \ + +#define thumb_bx() \ +{ \ + thumb_decode_hireg_op(); \ + generate_load_reg_pc(reg_a0, rs, 4); \ + generate_indirect_branch_cycle_update(dual); \ +} \ + +#define thumb_process_cheats() \ + generate_function_call(a64_cheat_hook); + +#define arm_process_cheats() \ + generate_function_call(a64_cheat_hook); + +#ifdef TRACE_INSTRUCTIONS + void trace_instruction(u32 pc, u32 mode) + { + if (mode) + printf("Executed arm %x\n", pc); + else + printf("Executed thumb %x\n", pc); + #ifdef TRACE_REGISTERS + print_regs(); + #endif + } + + #define emit_trace_instruction(pc, mode) \ + emit_save_regs(); \ + generate_load_imm(reg_a0, pc); \ + generate_load_imm(reg_a1, mode); \ + generate_function_call(trace_instruction); \ + emit_restore_regs() + #define emit_trace_thumb_instruction(pc) emit_trace_instruction(pc, 0) + #define emit_trace_arm_instruction(pc) emit_trace_instruction(pc, 1) +#else + #define emit_trace_thumb_instruction(pc) + #define emit_trace_arm_instruction(pc) +#endif + +#define thumb_swi() \ + generate_load_pc(reg_a0, (pc + 2)); \ + generate_function_call(execute_swi); \ + generate_branch_cycle_update( \ + block_exits[block_exit_position].branch_source, \ + block_exits[block_exit_position].branch_target); \ + block_exit_position++ \ + +#define arm_hle_div(cpu_mode) \ + aa64_emit_sdiv(reg_r3, reg_r0, reg_r1); \ + aa64_emit_msub(reg_r1, reg_r0, reg_r1, reg_r3); \ + aa64_emit_mov(reg_r0, reg_r3); \ + aa64_emit_cmpi(reg_r3, 0); \ + aa64_emit_csneg(reg_r3, reg_r3, reg_r3, ccode_ge); \ + +#define arm_hle_div_arm(cpu_mode) \ + aa64_emit_sdiv(reg_r3, reg_r1, reg_r0); \ + aa64_emit_msub(reg_r1, reg_r1, reg_r0, reg_r3); \ + aa64_emit_mov(reg_r0, reg_r3); \ + aa64_emit_cmpi(reg_r3, 0); \ + aa64_emit_csneg(reg_r3, reg_r3, reg_r3, ccode_ge); \ + +#define generate_translation_gate(type) \ + generate_load_pc(reg_a0, pc); \ + generate_indirect_branch_no_cycle_update(type) \ + + +extern void* ldst_handler_functions[16*4 + 17*6]; +extern void* ldst_lookup_tables[16*4 + 17*6]; + + +void init_emitter() { + rom_cache_watermark = 0; + init_bios_hooks(); + + // Generate handler table + memcpy(ldst_lookup_tables, ldst_handler_functions, sizeof(ldst_lookup_tables)); +} + +u32 execute_arm_translate_internal(u32 cycles, void *regptr); + +u32 execute_arm_translate(u32 cycles) { + return execute_arm_translate_internal(cycles, ®[0]); +} + +#endif + + diff --git a/arm/arm64_stub.S b/arm/arm64_stub.S new file mode 100644 index 0000000..66a87f8 --- /dev/null +++ b/arm/arm64_stub.S @@ -0,0 +1,705 @@ +# gameplaySP +# +# Copyright (C) 2021 David Guillen Fandos +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of +# the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +#include "../gpsp_config.h" + +#define defsymbl(symbol) \ +.align 2; \ +.type symbol, %function ;\ +.global symbol ; \ +.global _##symbol ; \ +symbol: \ +_##symbol: + +.text +.align 2 + +#define REG_R0 (0 * 4) +#define REG_R1 (1 * 4) +#define REG_R2 (2 * 4) +#define REG_R3 (3 * 4) +#define REG_R4 (4 * 4) +#define REG_R5 (5 * 4) +#define REG_R6 (6 * 4) +#define REG_R7 (7 * 4) +#define REG_R8 (8 * 4) +#define REG_R9 (9 * 4) +#define REG_R10 (10 * 4) +#define REG_R11 (11 * 4) +#define REG_R12 (12 * 4) +#define REG_R13 (13 * 4) +#define REG_R14 (14 * 4) +#define REG_SP (13 * 4) +#define REG_LR (14 * 4) +#define REG_PC (15 * 4) +#define REG_CPSR (16 * 4) +#define CPU_MODE (17 * 4) +#define CPU_HALT_STATE (18 * 4) + +#define REG_N_FLAG (20 * 4) +#define REG_Z_FLAG (21 * 4) +#define REG_C_FLAG (22 * 4) +#define REG_V_FLAG (23 * 4) +#define CHANGED_PC_STATUS (24 * 4) +#define COMPLETED_FRAME (25 * 4) +#define OAM_UPDATED (26 * 4) +#define REG_SAVE (27 * 4) +#define REG_SAVE2 (28 * 4) +#define REG_SAVE3 (29 * 4) +#define REG_SAVE4 (30 * 4) +#define REG_SAVE5 (31 * 4) + +#define reg_base x20 +#define reg_cycles w21 + +#define reg_c_flag w22 +#define reg_v_flag w23 +#define reg_z_flag w24 +#define reg_n_flag w25 + + +// Memory offsets from reg_base to the different buffers +#define RDMAP_OFF -0xB9000 // 8K pointers (64KB) +#define IWRAM_OFF -0xA9000 // 32KB (double for shadow) +#define VRAM_OFF -0x99000 // 96KB +#define EWRAM_OFF -0x81000 // 256KB (double for shadow) +#define MEM_TBL_OFF -0x1000 // Some space for the tables +#define SPSR_RAM_OFF 0x100 +#define REGMODE_RAM_OFF 0x118 +#define OAM_RAM_OFF 0x200 +#define PAL_RAM_OFF 0x600 +#define IOREG_OFF 0xA00 +#define PALCNV_RAM_OFF 0xE00 + +// Used for SWI handling +#define MODE_SUPERVISOR 3 +#define SUPERVISOR_SPSR (SPSR_RAM_OFF + 3*4) // spsr[3] +#define SUPERVISOR_LR (REGMODE_RAM_OFF + (3 * (7 * 4)) + (6 * 4)) // reg_mode[3][6] + + +// Stores and restores registers to their register storage in RAM + +#define load_registers() ;\ + ldp w6, w7, [reg_base, #0] ;\ + ldp w8, w9, [reg_base, #8] ;\ + ldp w10, w11, [reg_base, #16] ;\ + ldp w12, w13, [reg_base, #24] ;\ + ldp w14, w15, [reg_base, #32] ;\ + ldp w16, w17, [reg_base, #40] ;\ + ldp w26, w27, [reg_base, #48] ;\ + ldr w28, [reg_base, #56] ;\ + +#define store_registers() ;\ + stp w6, w7, [reg_base, #0] ;\ + stp w8, w9, [reg_base, #8] ;\ + stp w10, w11, [reg_base, #16] ;\ + stp w12, w13, [reg_base, #24] ;\ + stp w14, w15, [reg_base, #32] ;\ + stp w16, w17, [reg_base, #40] ;\ + stp w26, w27, [reg_base, #48] ;\ + str w28, [reg_base, #56] ;\ + + +// Extracts flags from CPSR into the cache flag registers + +#define extract_flags_reg(tmpreg) ;\ + ubfx reg_n_flag, tmpreg, #31, #1 ;\ + ubfx reg_z_flag, tmpreg, #30, #1 ;\ + ubfx reg_c_flag, tmpreg, #29, #1 ;\ + ubfx reg_v_flag, tmpreg, #28, #1 ;\ + +#define extract_flags(tmpreg) ;\ + ldr tmpreg, [reg_base, #REG_CPSR] ;\ + extract_flags_reg(tmpreg) ;\ + +// Collects cache flag bits and consolidates them to the CPSR reg + +#define consolidate_flags(tmpreg) ;\ + ldr tmpreg, [reg_base, #REG_CPSR] ;\ + bfi tmpreg, reg_n_flag, #31, #1 ;\ + bfi tmpreg, reg_z_flag, #30, #1 ;\ + bfi tmpreg, reg_c_flag, #29, #1 ;\ + bfi tmpreg, reg_v_flag, #28, #1 ;\ + str tmpreg, [reg_base, #REG_CPSR] ;\ + + +// Update the GBA hardware (video, sound, input, etc) +// w0: current PC + +defsymbl(a64_update_gba) + str w0, [reg_base, #REG_PC] // update the PC value + str lr, [reg_base, #REG_SAVE] // Save LR for later if needed + + consolidate_flags(w0) // update the CPSR + store_registers() // save out registers + + bl update_gba // update GBA state + + ldr w1, [reg_base, #COMPLETED_FRAME] // return to main if new frame + cbnz w1, return_to_main + + // Resume execution (perhaps from a new PC) + mov reg_cycles, w0 // load new cycle count + extract_flags(w2) // reload flag cache bits + + ldr w0, [reg_base, #CHANGED_PC_STATUS] // see if PC has change + cbnz w0, 1f // go start from new PC + + ldr lr, [reg_base, #REG_SAVE] // Restore return point + load_registers() // reload registers + ret // resume execution, no PC change + +1: // Resume from new PC + ldr w0, [reg_base, #REG_PC] // load new PC + tbnz w2, #5, 2f // CPSR.T means in thumb mode + + bl block_lookup_address_arm + load_registers() // reload registers + br x0 // jump to new ARM block +2: + bl block_lookup_address_thumb + load_registers() // reload registers + br x0 // jump to new Thumb block +.size a64_update_gba, .-a64_update_gba + + +// Cheat hooks for master function +// This is called whenever PC == cheats-master-function +// Just calls the C function to process cheats + +defsymbl(a64_cheat_hook) + store_registers() + str lr, [reg_base, #REG_SAVE] + bl process_cheats + ldr lr, [reg_base, #REG_SAVE] + load_registers() + ret + + +// These are b stubs for performing indirect branches. They are not +// linked to and don't return, instead they link elsewhere. + +// Input: +// r0: PC to branch to + +defsymbl(a64_indirect_branch_arm) + store_registers() + bl block_lookup_address_arm + load_registers() + br x0 + +defsymbl(a64_indirect_branch_thumb) + store_registers() + bl block_lookup_address_thumb + load_registers() + br x0 + +defsymbl(a64_indirect_branch_dual) + store_registers() + bl block_lookup_address_dual + load_registers() + br x0 + + +// Read CPSR and SPSR values + +defsymbl(execute_read_cpsr) + consolidate_flags(w0) // Consolidate on ret value + ret + +defsymbl(execute_read_spsr) + ldr w1, [reg_base, #CPU_MODE] // read cpu mode to w1 + add x0, reg_base, #SPSR_RAM_OFF // ptr to spsr table + ldr w0, [x0, x1, lsl #2] // Read actual value from trable + ret + + +// Update the cpsr. + +// Input: +// w0: new cpsr value +// w1: bitmask of which bits in cpsr to update +// w2: current PC + +defsymbl(execute_store_cpsr) + ldr w4, [reg_base, #REG_CPSR] // read current CPSR + and w3, w0, w1 // reg_flags = new_cpsr & store_mask + bic w4, w4, w1 // current_cpsr & ~store_mask + orr w0, w3, w4 // w3 = final CPSR value + extract_flags_reg(w0) // Update cached flags too + + str lr, [reg_base, #REG_SAVE] + store_registers() + bl execute_store_cpsr_body // Do the remaining work in C mode + + cbnz w0, 1f // If PC has changed due to this + + ldr lr, [reg_base, #REG_SAVE] // Resume execution where we left it + load_registers() + ret + +1: + // Returned value contains the PC, resume execution there + bl block_lookup_address_arm + load_registers() + br x0 // Resume in the returned block +.size execute_store_cpsr, .-execute_store_cpsr + + +// Write to SPSR +// w0: new SPSR value +// w1: store mask + +defsymbl(execute_store_spsr) + ldr w2, [reg_base, #CPU_MODE] // read cpu mode to w1 + add x2, reg_base, x2, lsl #2 // calculate table offset + ldr w3, [x2, #SPSR_RAM_OFF] // Read actual value from trable + + and w0, w0, w1 // new-spsr & mask + bic w3, w3, w1 // old-spsr & ~mask + orr w0, w0, w3 // final spsr value + + str w0, [x2, #SPSR_RAM_OFF] // Store new SPSR + ret +.size execute_store_spsr, .-execute_store_spsr + +// Restore the cpsr from the mode spsr and mode shift. + +// Input: +// r0: current pc + +defsymbl(execute_spsr_restore) + ldr w1, [reg_base, #CPU_MODE] // w1 = cpu_mode + cbz w1, 1f // Ignore if in user mode + + lsl w2, w1, #2 // We access 32 bit words + add w2, w2, #SPSR_RAM_OFF + ldr w3, [reg_base, x2] // w3 = spsr[cpu_mode] + str w3, [reg_base, #REG_CPSR] // update CPSR with SPSR value + extract_flags_reg(w3) // update cached flag values + + // This function call will pass r0 (address) and return it. + str lr, [reg_base, #REG_SAVE] + store_registers() // save ARM registers + bl execute_spsr_restore_body + ldr lr, [reg_base, #REG_SAVE] + load_registers() + +1: + ret +.size execute_spsr_restore, .-execute_spsr_restore + + +// Setup the mode transition work for calling an SWI. + +// Input: +// r0: current pc + +defsymbl(execute_swi) + str lr, [reg_base, #REG_SAVE] + str w0, [reg_base, #SUPERVISOR_LR] // Store next PC into supervisor LR + consolidate_flags(w1) // Calculate current CPSR flags + str w1, [reg_base, #SUPERVISOR_SPSR] // Store them in the SPSR + bic w1, w1, #0x3F // Clear mode bits + mov w2, #(0x13 | 0x80) // Set supervisor mode bits + orr w1, w1, w2 + str w1, [reg_base, #REG_CPSR] // Update CPSR with new value + store_registers() + mov w0, #MODE_SUPERVISOR + bl set_cpu_mode // Set supervisor mode + ldr lr, [reg_base, #REG_SAVE] + load_registers() + ret +.size execute_swi, .-execute_swi + +defsymbl(execute_arm_translate_internal) + // save registers that will be clobbered + sub sp, sp, #96 + stp x19, x20, [sp, #0] + stp x21, x22, [sp, #16] + stp x23, x24, [sp, #32] + stp x25, x26, [sp, #48] + stp x27, x28, [sp, #64] + stp x29, x30, [sp, #80] + + mov reg_cycles, w0 // load cycle counter + mov reg_base, x1 // init base_reg + + // Check whether the CPU is sleeping already, we should just wait for IRQs + ldr w1, [reg_base, #CPU_HALT_STATE] + cmp w1, #0 + bne alert_loop + + ldr w0, [reg_base, #REG_PC] // r0 = current pc + ldr w1, [reg_base, #REG_CPSR] // r1 = flags + tst w1, #0x20 // see if Thumb bit is set + extract_flags(w2) // load flags + + bne 1f // if so lookup thumb + + bl block_lookup_address_arm + load_registers() + br x0 // jump to first ARM block +1: + bl block_lookup_address_thumb + load_registers() + br x0 // jump to first Thumb block + + +// Epilogue to return to the main thread (whatever called execute_arm_translate) + +return_to_main: + // restore the saved regs and return + ldp x19, x20, [sp, #0] + ldp x21, x22, [sp, #16] + ldp x23, x24, [sp, #32] + ldp x25, x26, [sp, #48] + ldp x27, x28, [sp, #64] + ldp x29, x30, [sp, #80] + add sp, sp, #96 + ret + + +// Memory read stub routines + +#define execute_load_builder(load_type, ldop, ldmask, tblidx, ldfn) ;\ + ;\ +defsymbl(execute_load_##load_type) ;\ + tst w0, #(0xf0000000 | ldmask) ;\ + lsr w3, w0, #24 ;\ + csinc w3, wzr, w3, ne ;\ + add x4, reg_base, (MEM_TBL_OFF + tblidx*136) ;\ + ldr x3, [x4, x3, lsl #3] ;\ + br x3 ;\ + ;\ +ld_bios_##load_type: /* BIOS area, need to verify PC */;\ + lsr w3, w1, #24 /* Are we running the BIOS */;\ + cbnz w3, ld_slow_##load_type ;\ + and w0, w0, #(0x7fff) /* BIOS only 16 KB */;\ + add x3, reg_base, #(RDMAP_OFF) ;\ + ldr x3, [x3] /* x3 = bios mem buffer */;\ + ldop w0, [x3, x0] /* load actual value */;\ + ret ;\ + ;\ +ld_ewram_##load_type: /* EWRAM area */;\ + and w0, w0, #(0x3ffff) ;\ + add x3, reg_base, #EWRAM_OFF ;\ + ldop w0, [x3, x0] ;\ + ret ;\ + ;\ +ld_iwram_##load_type: /* IWRAM area */;\ + and w0, w0, #(0x7fff) ;\ + add x3, reg_base, #(IWRAM_OFF+0x8000) ;\ + ldop w0, [x3, x0] ;\ + ret ;\ + ;\ +ld_ioram_##load_type: /* I/O RAM area */;\ + and w0, w0, #(0x3ff) ;\ + add x3, reg_base, #(IOREG_OFF) ;\ + ldop w0, [x3, x0] ;\ + ret ;\ + ;\ +ld_palram_##load_type: /* PAL RAM area */;\ + and w0, w0, #(0x3ff) ;\ + add x3, reg_base, #(PAL_RAM_OFF) ;\ + ldop w0, [x3, x0] ;\ + ret ;\ + ;\ +ld_oamram_##load_type: /* OAM RAM area */;\ + and w0, w0, #(0x3ff) ;\ + add x3, reg_base, #(OAM_RAM_OFF) ;\ + ldop w0, [x3, x0] ;\ + ret ;\ + ;\ +ld_rdmap_##load_type: ;\ + lsr w4, w0, #15 /* Each block is 32KB */;\ + add x3, reg_base, #(RDMAP_OFF) ;\ + ldr x4, [x3, x4, lsl #3] /* x4 = table pointer */;\ + and w0, w0, #(0x7fff) /* 32KB pages */;\ + ldop w0, [x4, x0] /* load actual value */;\ + ret ;\ + ;\ +ld_slow_##load_type: /* Slow C path */;\ + str w1, [reg_base, #REG_PC] /* write out PC */;\ + str lr, [reg_base, #REG_SAVE] /* Save LR */;\ + store_registers() ;\ + bl ldfn ;\ + ldr lr, [reg_base, #REG_SAVE] ;\ + load_registers() ;\ + ret ;\ +.size execute_load_##load_type, .-execute_load_##load_type + +#define load_lookup_table(load_type, aload_type) ;\ + .quad ld_slow_##aload_type /* -1: Unaligned/Bad access */;\ + .quad ld_bios_##aload_type /* 0x00: BIOS */;\ + .quad ld_slow_##aload_type /* 0x01: Open bus */;\ + .quad ld_ewram_##load_type /* 0x02: ewram */;\ + .quad ld_iwram_##load_type /* 0x03: iwram */;\ + .quad ld_ioram_##load_type /* 0x04: I/O regs */;\ + .quad ld_palram_##load_type /* 0x05: palette RAM */;\ + .quad ld_rdmap_##load_type /* 0x06: vram */;\ + .quad ld_oamram_##load_type /* 0x07: oam ram */;\ + .quad ld_rdmap_##load_type /* 0x08: gamepak: ignore */;\ + .quad ld_rdmap_##load_type /* 0x09: gamepak: ignore */;\ + .quad ld_rdmap_##load_type /* 0x0A: gamepak: ignore */;\ + .quad ld_rdmap_##load_type /* 0x0B: gamepak: ignore */;\ + .quad ld_rdmap_##load_type /* 0x0C: gamepak: ignore */;\ + .quad ld_slow_##aload_type /* 0x0D: EEPROM */;\ + .quad ld_slow_##aload_type /* 0x0E: backup */;\ + .quad ld_slow_##aload_type /* 0x0F: ignore */;\ + +// Aligned load is a bit special +defsymbl(execute_aligned_load32) + tst w0, #(0xf0000000) + lsr w3, w0, #24 + csinc w3, wzr, w3, ne + add x4, reg_base, (MEM_TBL_OFF + 5*136) + ldr x3, [x4, x3, lsl #3] + br x3 +ld_slow_aligned_u32: // Slow C path for multiple loads + str lr, [reg_base, #REG_SAVE] // Save LR + store_registers() + bl read_memory32 + ldr lr, [reg_base, #REG_SAVE] + load_registers() + ret +ld_bios_aligned_u32: + and w0, w0, #(0x7fff) // Do not verify PC on purpose + add x3, reg_base, #(RDMAP_OFF) + ldr x3, [x3] + ldr w0, [x3, x0] + ret + + +execute_load_builder( u8, ldrb, 0, 0, read_memory8) +execute_load_builder( s8, ldrsb, 0, 1, read_memory8s) +execute_load_builder(u16, ldrh, 1, 2, read_memory16) +execute_load_builder(s16, ldrsh, 1, 3, read_memory16s) +execute_load_builder(u32, ldr, 3, 4, read_memory32) + + +// Prepares for a external store (calls C code) +#define store_align_8() and w1, w1, #0xff +#define store_align_16() and w1, w1, #0xffff; bic w0, w0, #1 +#define store_align_32() bic w0, w0, #3 + +// Write out to memory. + +// Input: +// w0: address +// w1: value +// w2: PC value + +#define execute_store_builder(store_type, str_op, str_op16, load_op, \ + stmask, stmask16, tblidx) ;\ + ;\ +defsymbl(execute_store_u##store_type) ;\ + lsr w4, w0, #28 ;\ + lsr w3, w0, #24 ;\ + cbnz w4, ext_store_u##store_type ;\ + add x4, reg_base, (MEM_TBL_OFF + 816 + tblidx*128) ;\ + ldr x3, [x4, x3, lsl #3] ;\ + br x3 ;\ + ;\ +ext_store_u##store_type: ;\ +ext_store_u##store_type##_safe: ;\ + str w2, [reg_base, #REG_PC] /* write out PC */;\ + str lr, [reg_base, #REG_SAVE] /* Preserve LR */;\ + store_align_##store_type() ;\ + store_registers() ;\ + bl write_memory##store_type ;\ + cbnz w0, write_epilogue /* handle additional write stuff */;\ + ldr lr, [reg_base, #REG_SAVE] ;\ + load_registers() ;\ + ret /* resume if no side effects */;\ + ;\ +ext_store_iwram_u##store_type: ;\ + and w0, w0, #(0x7fff & ~stmask) /* Mask to mirror memory (+align)*/;\ + add x3, reg_base, #(IWRAM_OFF+0x8000) /* x3 = iwram base */;\ + str_op w1, [x0, x3] /* store data */;\ + sub x3, x3, #0x8000 /* x3 = iwram smc base */;\ + load_op w1, [x0, x3] /* w1 = SMC sentinel */;\ + cbnz w1, 3f /* Check value, should be zero */;\ + ret /* return */;\ + ;\ +ext_store_ewram_u##store_type: ;\ + and w0, w0, #(0x3ffff & ~stmask) /* Mask to mirror memory (+align)*/;\ + add x3, reg_base, #EWRAM_OFF /* x3 = ewram base */;\ + str_op w1, [x0, x3] /* store data */;\ + add x3, x3, #0x40000 /* x3 = ewram smc base */;\ + load_op w1, [x0, x3] /* w1 = SMC sentinel */;\ + cbnz w1, 3f /* Check value, should be zero */;\ + ret /* return */;\ + ;\ +ext_store_vram_u##store_type: ;\ +ext_store_vram_u##store_type##_safe: ;\ + and w0, w0, #(0x1ffff & ~stmask16) /* Mask to mirror memory (+align)*/;\ + sub w3, w0, #0x8000 /* Mirrored addr for last bank */;\ + cmp w0, #0x18000 /* Check if exceeds 96KB */;\ + csel w0, w3, w0, cs /* If it does, pick the mirror */;\ + add x3, reg_base, #VRAM_OFF /* x3 = ewram base */;\ + str_op16 w1, [x0, x3] /* store data */;\ + ret /* return */;\ + ;\ +ext_store_oam_ram_u##store_type: ;\ +ext_store_oam_ram_u##store_type##_safe: ;\ + and w0, w0, #(0x3ff & ~stmask16) /* Mask to mirror memory (+align)*/;\ + add x3, reg_base, #OAM_RAM_OFF /* x3 = oam ram base */;\ + str_op16 w1, [x0, x3] /* store data */;\ + str w29, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\ + ret /* return */;\ + ;\ +3: ;\ + str w2, [reg_base, #REG_PC] /* write out PC */;\ + store_registers() /* store registers */;\ + consolidate_flags(w1) ;\ + b smc_write /* perform smc write */;\ +.size execute_store_u##store_type, .-execute_store_u##store_type + +// for ignored areas, just return +ext_store_ignore: + ret // return + +#define store_lookup_table(store_type) ;\ + .quad ext_store_ignore /* 0x00: BIOS, ignore */;\ + .quad ext_store_ignore /* 0x01: ignore */;\ + .quad ext_store_ewram_u##store_type /* 0x02: ewram */;\ + .quad ext_store_iwram_u##store_type /* 0x03: iwram */;\ + .quad ext_store_u##store_type /* 0x04: I/O regs */;\ + .quad ext_store_u##store_type /* 0x05: palette RAM */;\ + .quad ext_store_vram_u##store_type /* 0x06: vram */;\ + .quad ext_store_oam_ram_u##store_type /* 0x07: oam ram */;\ + .quad ext_store_u##store_type /* 0x08: gamepak: ignore */;\ + .quad ext_store_u##store_type /* 0x09: gamepak: ignore */;\ + .quad ext_store_u##store_type /* 0x0A: gamepak: ignore */;\ + .quad ext_store_u##store_type /* 0x0B: gamepak: ignore */;\ + .quad ext_store_u##store_type /* 0x0C: gamepak: ignore */;\ + .quad ext_store_u##store_type /* 0x0D: EEPROM */;\ + .quad ext_store_u##store_type /* 0x0E: backup */;\ + .quad ext_store_ignore /* 0x0F: ignore */;\ + +execute_store_builder(8, strb, strh, ldrb, 0, 1, 0) +execute_store_builder(16, strh, strh, ldrh, 1, 1, 1) +execute_store_builder(32, str, str, ldr, 3, 3, 2) + +// This is a store that is executed in a strm case (so no SMC checks in-between) + +defsymbl(execute_aligned_store32) + lsr w4, w0, #28 + lsr w3, w0, #24 + cbnz w4, ext_store_u32 + add x4, reg_base, MEM_TBL_OFF + 816 + 3*128 + ldr x3, [x4, x3, lsl #3] + br x3 +ext_store_iwram_u32_safe: + and w0, w0, #(0x7fff) // Mask to mirror memory (no need to align!) + add x3, reg_base, #(IWRAM_OFF+0x8000) // x3 = iwram base + str w1, [x0, x3] // store data + ret // Return +ext_store_ewram_u32_safe: + and w0, w0, #(0x3ffff) // Mask to mirror memory (no need to align!) + add x3, reg_base, #(EWRAM_OFF) // x3 = ewram base + str w1, [x0, x3] // store data + ret // Return +.size execute_aligned_store32, .-execute_aligned_store32 + +// This is called whenever an external store with side effects was performed +write_epilogue: + consolidate_flags(w1) // update the CPSR before update + + cmp w0, #2 // see if the alert is due to SMC + beq smc_write // if so, goto SMC handler + +alert_loop: + bl update_gba // update GBA until CPU isn't halted + + ldr w1, [reg_base, #COMPLETED_FRAME] // Check whether a frame was completed + cbnz w1, return_to_main // and return to caller function. + + ldr w1, [reg_base, #CPU_HALT_STATE] // Check whether the CPU is halted + cbnz w1, alert_loop // and keep looping until it is + + mov reg_cycles, w0 // load new cycle count + ldr w0, [reg_base, #REG_PC] // load new PC + b lookup_pc // Resume execution at that PC + + +smc_write: + bl flush_translation_cache_ram + ldr w0, [reg_base, #REG_PC] // load "current new" PC + +// Resume execution at PC (at w0) +lookup_pc: + ldr w1, [reg_base, #REG_CPSR] // w1 = flags + extract_flags_reg(w1) + tbnz w1, #5, 2f // see if Thumb bit is set + + // Lookup and jump to the right mode block + bl block_lookup_address_arm + load_registers() + br x0 +2: + bl block_lookup_address_thumb + load_registers() + br x0 + +.data +.align 4 +defsymbl(ldst_handler_functions) + load_lookup_table(u8, u8) + load_lookup_table(s8, s8) + load_lookup_table(u16, u16) + load_lookup_table(s16, s16) + load_lookup_table(u32, u32) + load_lookup_table(u32, aligned_u32) + store_lookup_table(8) + store_lookup_table(16) + store_lookup_table(32) + store_lookup_table(32_safe) + +.bss +.align 4 + +defsymbl(memory_map_read) + .space 0x10000 +defsymbl(iwram) + .space 0x10000 +defsymbl(vram) + .space 0x18000 +defsymbl(ewram) + .space 0x80000 +defsymbl(ldst_lookup_tables) + .space 4096 +defsymbl(reg) + .space 0x100 +defsymbl(spsr) + .space 24 +defsymbl(reg_mode) + .space 196 + .space 36 // Padding +defsymbl(oam_ram) + .space 0x400 +defsymbl(palette_ram) + .space 0x400 +defsymbl(io_registers) + .space 0x400 +defsymbl(palette_ram_converted) + .space 0x400 + + diff --git a/cpu_threaded.c b/cpu_threaded.c index 0d3a989..7988493 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -218,6 +218,8 @@ extern u8 bit_count[256]; #include "mips/mips_emit.h" #elif defined(ARM_ARCH) #include "arm/arm_emit.h" +#elif defined(ARM64_ARCH) + #include "arm/arm64_emit.h" #else #include "x86/x86_emit.h" #endif @@ -243,7 +245,7 @@ extern u8 bit_count[256]; void platform_cache_sync(void *baseaddr, void *endptr) { ctr_flush_invalidate_cache(); } -#elif defined(ARM_ARCH) +#elif defined(ARM_ARCH) || defined(ARM64_ARCH) void platform_cache_sync(void *baseaddr, void *endptr) { __clear_cache(baseaddr, endptr); } diff --git a/jni/Android.mk b/jni/Android.mk index 1ece93b..f582726 100644 --- a/jni/Android.mk +++ b/jni/Android.mk @@ -12,6 +12,10 @@ ifeq ($(TARGET_ARCH),arm) COREFLAGS += -DARM_ARCH -DMMAP_JIT_CACHE CPU_ARCH := arm HAVE_DYNAREC := 1 +else ifeq ($(TARGET_ARCH),arm64) + COREFLAGS += -DARM64_ARCH -DMMAP_JIT_CACHE + CPU_ARCH := arm64 + HAVE_DYNAREC := 1 else ifeq ($(TARGET_ARCH),x86) COREFLAGS += -DMMAP_JIT_CACHE CPU_ARCH := x86_32 diff --git a/tests/Makefile b/tests/Makefile new file mode 100644 index 0000000..20a92d9 --- /dev/null +++ b/tests/Makefile @@ -0,0 +1,12 @@ + +ARMV8PFX=/opt/buildroot-armv8el-uclibc/bin/aarch64-buildroot-linux-uclibc + +all: + gcc -o arm64gen arm64gen.c -ggdb -I../arm/ + ./arm64gen > bytecode.bin + $(ARMV8PFX)-as -o bytecoderef.o arm64gen.S + $(ARMV8PFX)-objcopy -O binary bytecoderef.o bytecoderef.bin + @ cmp bytecoderef.bin bytecode.bin || echo "Bytecode mismatch" + @ cmp bytecoderef.bin bytecode.bin && echo "Test passed!" + + diff --git a/tests/arm64gen.S b/tests/arm64gen.S new file mode 100644 index 0000000..127463f --- /dev/null +++ b/tests/arm64gen.S @@ -0,0 +1,208 @@ + +b 16*4 +bl 16*4 + +b.eq 16*4 +b.ne 16*4 +b.hs 16*4 +b.lo 16*4 +b.mi 16*4 +b.pl 16*4 +b.vs 16*4 +b.vc 16*4 +b.hi 16*4 +b.ls 16*4 +b.ge 16*4 +b.lt 16*4 +b.gt 16*4 +b.le 16*4 +b.al 16*4 +b.nv 16*4 + +ldr w1, [x2, #64] +ldr w29, [x30, #64] +str w1, [x2, #64] +str w29, [x30, #64] + +mov w0, #0x1234 +mov w12, #0x5656 +mov w12, #0xFFFF + +movk w13, #0x9876, lsl #16 +movk w13, #0xFFFF, lsl #16 + +movz w13, #0xabcd, lsl #16 + +mov w14, #0xffff5555 + +add w11, w12, w13, lsl #0 +add w11, w12, w13, lsl #19 +add w11, w12, w13, lsl #31 + +add w1, w29, #0x123 +add w1, w29, #0xFFF +sub w1, w29, #0x123 +sub w1, w29, #0xFFF + +add w3, w30, #0x123000 +add w3, w30, #0xFFF000 +sub w3, w30, #0x123000 +sub w3, w30, #0xFFF000 + +adds w29, w30, #0x123 +adds w29, w30, #0xFFF +subs w29, w30, #0x123 +subs w29, w30, #0xFFF + +madd w2, w3, w4, w5 +madd w25, w26, w27, w28 +msub w2, w3, w4, w5 +msub w25, w26, w27, w28 + +smaddl x2, w3, w4, x5 +smaddl x25, w26, w27, x28 +umaddl x2, w3, w4, x5 +umaddl x25, w26, w27, x28 + +mul w1, w2, w3 +mul w27, w28, w29 + +ror w1, w2, #1 +ror w1, w2, #31 +ror w30, w29, #1 +ror w30, w29, #31 + +lsr w1, w2, #1 +lsr w1, w2, #31 +lsr w30, w29, #1 +lsr w30, w29, #31 + +lsl w1, w2, #1 +lsl w1, w2, #31 +lsl w30, w29, #1 +lsl w30, w29, #31 + +asr w1, w2, #1 +asr w1, w2, #31 +asr w30, w29, #1 +asr w30, w29, #31 + +lsr x1, x2, #1 +lsr x1, x2, #2 +lsr x1, x2, #62 +lsr x1, x2, #63 +lsr x30, x29, #1 +lsr x30, x29, #62 + +eor w3, w4, #1 +eor w3, w4, #(~1) +orr w3, w4, #1 +orr w3, w4, #(~1) +and w3, w4, #1 +and w3, w4, #(~3) + +and x3, x4, #0xffffffff +and x3, x4, #0x1 +and x1, x2, #1 +and x1, x2, #(~1) +and x1, x2, #0xffffffff + +mov w1, w2 +mov w30, wzr + +orr w1, w2, w3 +orr w29, w30, wzr +eor w1, w2, w3 +eor w29, w30, wzr +orn w1, w2, w3 +orn w29, w30, wzr +and w1, w2, w3 +and w29, w30, wzr +bic w1, w2, w3 +bic w29, w30, wzr +ands w1, w2, w3 +ands w29, w30, wzr + +tst w1, w2 +tst w25, wzr +cmp w1, #0 +cmp w30, #0 +cmp w1, #32 +cmp w30, #32 +cmp w1, #200 +cmp w30, #200 + +add w1, w2, w3 +add w29, w30, w28 +sub w1, w2, w3 +sub w29, w30, w28 +adc w1, w2, w3 +adc w29, w30, w28 +sbc w1, w2, w3 +sbc w29, w30, w28 +adds w1, w2, w3 +adds w29, w30, w28 +subs w1, w2, w3 +subs w29, w30, w28 +adcs w1, w2, w3 +adcs w29, w30, w28 +sbcs w1, w2, w3 +sbcs w29, w30, w28 + +tbz w20, #1, 63*4 +tbnz w20, #1, 63*4 +tbz w20, #0, 2*4 +tbnz w20, #7, 2*4 + +cbz w20, 63*4 +cbnz w20, 63*4 +cbz w20, 2*4 +cbnz w20, 2*4 + +csel w20, w24, w25, ne +csel w1, w2, w3, eq +csel w1, w20, wzr, lt +csel w1, wzr, wzr, gt + +csinc w20, w24, w25, ne +csinc w1, w2, w3, eq +csinc w1, w20, wzr, lt +csinc w1, wzr, wzr, gt + +csinv w20, w24, w25, ne +csinv w1, w2, w3, eq +csinv w1, w20, wzr, lt +csinv w1, wzr, wzr, gt + +csneg w20, w24, w25, ne +csneg w1, w2, w3, eq +csneg w1, w20, wzr, lt +csneg w1, wzr, wzr, gt + +cset w1, eq +cset w1, hs +cset w20, lo +csetm w1, hs +csetm w20, lo + +ubfx w1, w2, #8, #8 +ubfx w1, w2, #16, #16 +ubfx w1, wzr, #8, #24 +ubfx w1, wzr, #16, #16 + +rorv w1, w2, w3 +rorv w28, w29, w30 +lslv w1, w2, w3 +lslv w28, w29, w30 +lsrv w1, w2, w3 +lsrv w28, w29, w30 +asrv w1, w2, w3 +asrv w28, w29, w30 + +orr x1, x2, x3, lsl #32 +orr x25, x26, x27, lsl #32 + +sdiv w1, w2, w3 +sdiv w28, w29, w30 + + diff --git a/tests/arm64gen.c b/tests/arm64gen.c new file mode 100644 index 0000000..55b57d9 --- /dev/null +++ b/tests/arm64gen.c @@ -0,0 +1,223 @@ + +#define u32 uint32_t +#define u8 uint8_t + +#include +#include +#include "arm64_codegen.h" + +int main() { + u32 buffer[1024]; + u8 *translation_ptr = (u8*)&buffer[0]; + + aa64_emit_branch(16); + aa64_emit_brlink(16); + + aa64_emit_brcond(ccode_eq, 16); + aa64_emit_brcond(ccode_ne, 16); + aa64_emit_brcond(ccode_hs, 16); + aa64_emit_brcond(ccode_lo, 16); + aa64_emit_brcond(ccode_mi, 16); + aa64_emit_brcond(ccode_pl, 16); + aa64_emit_brcond(ccode_vs, 16); + aa64_emit_brcond(ccode_vc, 16); + aa64_emit_brcond(ccode_hi, 16); + aa64_emit_brcond(ccode_ls, 16); + aa64_emit_brcond(ccode_ge, 16); + aa64_emit_brcond(ccode_lt, 16); + aa64_emit_brcond(ccode_gt, 16); + aa64_emit_brcond(ccode_le, 16); + aa64_emit_brcond(ccode_al, 16); + aa64_emit_brcond(ccode_nv, 16); + + aa64_emit_ldr(1, 2, 16); + aa64_emit_ldr(29, 30, 16); + aa64_emit_str(1, 2, 16); + aa64_emit_str(29, 30, 16); + + aa64_emit_movlo(0, 0x1234); + aa64_emit_movlo(12, 0x5656); + aa64_emit_movlo(12, ~0); + + aa64_emit_movhi(13, 0x9876); + aa64_emit_movhi(13, ~0); + + aa64_emit_movhiz(13, 0xabcd); + + aa64_emit_movne(14, 0xAAAA); + + aa64_emit_add_lsl(11, 12, 13, 0); + aa64_emit_add_lsl(11, 12, 13, 19); + aa64_emit_add_lsl(11, 12, 13, 31); + + aa64_emit_addi(1, 29, 0x123); + aa64_emit_addi(1, 29, 0xFFF); + aa64_emit_subi(1, 29, 0x123); + aa64_emit_subi(1, 29, 0xFFF); + + aa64_emit_addi12(3, 30, 0x123); + aa64_emit_addi12(3, 30, 0xFFF); + aa64_emit_subi12(3, 30, 0x123); + aa64_emit_subi12(3, 30, 0xFFF); + + aa64_emit_addis(29, 30, 0x123); + aa64_emit_addis(29, 30, 0xFFF); + aa64_emit_subis(29, 30, 0x123); + aa64_emit_subis(29, 30, 0xFFF); + + aa64_emit_madd(2, 5, 3, 4); + aa64_emit_madd(25, 28, 26, 27); + aa64_emit_msub(2, 5, 3, 4); + aa64_emit_msub(25, 28, 26, 27); + + aa64_emit_smaddl(2, 5, 3, 4); + aa64_emit_smaddl(25, 28, 26, 27); + aa64_emit_umaddl(2, 5, 3, 4); + aa64_emit_umaddl(25, 28, 26, 27); + + aa64_emit_mul(1, 2, 3); + aa64_emit_mul(27, 28, 29); + + aa64_emit_ror(1, 2, 1); + aa64_emit_ror(1, 2, 31); + aa64_emit_ror(30, 29, 1); + aa64_emit_ror(30, 29, 31); + + aa64_emit_lsr(1, 2, 1); + aa64_emit_lsr(1, 2, 31); + aa64_emit_lsr(30, 29, 1); + aa64_emit_lsr(30, 29, 31); + + aa64_emit_lsl(1, 2, 1); + aa64_emit_lsl(1, 2, 31); + aa64_emit_lsl(30, 29, 1); + aa64_emit_lsl(30, 29, 31); + + aa64_emit_asr(1, 2, 1); + aa64_emit_asr(1, 2, 31); + aa64_emit_asr(30, 29, 1); + aa64_emit_asr(30, 29, 31); + + aa64_emit_lsr64(1, 2, 1); + aa64_emit_lsr64(1, 2, 2); + aa64_emit_lsr64(1, 2, 62); + aa64_emit_lsr64(1, 2, 63); + aa64_emit_lsr64(30, 29, 1); + aa64_emit_lsr64(30, 29, 62); + + aa64_emit_eori(3, 4, 0, 0); + aa64_emit_eori(3, 4, 31, 30); /* ~1 */ + aa64_emit_orri(3, 4, 0, 0); + aa64_emit_orri(3, 4, 31, 30); + aa64_emit_andi(3, 4, 0, 0); + aa64_emit_andi(3, 4, 30, 29); /* ~3 */ + + aa64_emit_andi64(3, 4, 0, 31); + aa64_emit_andi64(3, 4, 0, 0); + aa64_emit_andi64(1, 2, 0, 0); /* & 1 */ + aa64_emit_andi64(1, 2, 63, 62); /* & ~1 */ + aa64_emit_andi64(1, 2, 0, 31); /* & 0xffffffff */ + + aa64_emit_mov(1, 2); + aa64_emit_mov(30, 31); + + aa64_emit_orr(1, 2, 3); + aa64_emit_orr(29, 30, 31); + aa64_emit_xor(1, 2, 3); + aa64_emit_xor(29, 30, 31); + aa64_emit_orn(1, 2, 3); + aa64_emit_orn(29, 30, 31); + aa64_emit_and(1, 2, 3); + aa64_emit_and(29, 30, 31); + aa64_emit_bic(1, 2, 3); + aa64_emit_bic(29, 30, 31); + aa64_emit_ands(1, 2, 3); + aa64_emit_ands(29, 30, 31); + + aa64_emit_tst(1, 2); + aa64_emit_tst(25, 31); + + aa64_emit_cmpi(1, 0); + aa64_emit_cmpi(30, 0); + aa64_emit_cmpi(1, 32); + aa64_emit_cmpi(30, 32); + aa64_emit_cmpi(1, 200); + aa64_emit_cmpi(30, 200); + + aa64_emit_add(1, 2, 3); + aa64_emit_add(29, 30, 28); + aa64_emit_sub(1, 2, 3); + aa64_emit_sub(29, 30, 28); + aa64_emit_adc(1, 2, 3); + aa64_emit_adc(29, 30, 28); + aa64_emit_sbc(1, 2, 3); + aa64_emit_sbc(29, 30, 28); + aa64_emit_adds(1, 2, 3); + aa64_emit_adds(29, 30, 28); + aa64_emit_subs(1, 2, 3); + aa64_emit_subs(29, 30, 28); + aa64_emit_adcs(1, 2, 3); + aa64_emit_adcs(29, 30, 28); + aa64_emit_sbcs(1, 2, 3); + aa64_emit_sbcs(29, 30, 28); + + aa64_emit_tbz(20, 1, 63); + aa64_emit_tbnz(20, 1, 63); + aa64_emit_tbz(20, 0, 2); + aa64_emit_tbnz(20, 7, 2); + + aa64_emit_cbz(20, 63); + aa64_emit_cbnz(20, 63); + aa64_emit_cbz(20, 2); + aa64_emit_cbnz(20, 2); + + aa64_emit_csel(20, 24, 25, ccode_ne); + aa64_emit_csel(1, 2, 3, ccode_eq); + aa64_emit_csel(1, 20, 31, ccode_lt); + aa64_emit_csel(1, 31, 31, ccode_gt); + + aa64_emit_csinc(20, 24, 25, ccode_ne); + aa64_emit_csinc(1, 2, 3, ccode_eq); + aa64_emit_csinc(1, 20, 31, ccode_lt); + aa64_emit_csinc(1, 31, 31, ccode_gt); + + aa64_emit_csinv(20, 24, 25, ccode_ne); + aa64_emit_csinv(1, 2, 3, ccode_eq); + aa64_emit_csinv(1, 20, 31, ccode_lt); + aa64_emit_csinv(1, 31, 31, ccode_gt); + + aa64_emit_csneg(20, 24, 25, ccode_ne); + aa64_emit_csneg(1, 2, 3, ccode_eq); + aa64_emit_csneg(1, 20, 31, ccode_lt); + aa64_emit_csneg(1, 31, 31, ccode_gt); + + aa64_emit_cset(1, ccode_eq); + aa64_emit_cset(1, ccode_hs); + aa64_emit_cset(20, ccode_lo); + aa64_emit_csetm(1, ccode_hs); + aa64_emit_csetm(20, ccode_lo); + + aa64_emit_ubfx(1, 2, 8, 8); + aa64_emit_ubfx(1, 2, 16, 16); + aa64_emit_ubfx(1, 31, 8, 24); + aa64_emit_ubfx(1, 31, 16, 16); + + aa64_emit_rorv(1, 2, 3); + aa64_emit_rorv(28, 29, 30); + aa64_emit_lslv(1, 2, 3); + aa64_emit_lslv(28, 29, 30); + aa64_emit_lsrv(1, 2, 3); + aa64_emit_lsrv(28, 29, 30); + aa64_emit_asrv(1, 2, 3); + aa64_emit_asrv(28, 29, 30); + + aa64_emit_merge_regs(1, 3, 2); /* hi, lo */ + aa64_emit_merge_regs(25, 27, 26); + + aa64_emit_sdiv(1, 2, 3); + aa64_emit_sdiv(28, 29, 30); + + fwrite(buffer, 1, translation_ptr-(u8*)buffer, stdout); +} + +