[aarch64] Adding new aarch64 dynarec!

This is based on the MIPS dynarec (more or less) with some ARM
borrowings. Seems to be quite fast (under my testing fixed results:
faster than ARM on A1 but not a lot faster than the interpreter on
Android Snapdragon 845) but still some optimizations are missing at the
moment.

Seems to pass my testing suite and compatibility wise is very similar to
arm.
This commit is contained in:
David Guillen Fandos 2021-12-11 11:30:03 +01:00
parent 2419b77b28
commit bcd3d1ca29
10 changed files with 3354 additions and 11 deletions

View File

@ -349,6 +349,17 @@ else ifeq ($(platform), wii)
CFLAGS += -DGEKKO -DHW_RVL -mrvl -mcpu=750 -meabi -mhard-float -DMSB_FIRST -D__ppc__
STATIC_LINKING = 1
# aarch64 (armv8)
else ifeq ($(platform), arm64)
TARGET := $(TARGET_NAME)_libretro.so
SHARED := -shared -Wl,--version-script=link.T
fpic := -fPIC
CFLAGS += -fomit-frame-pointer -ffast-math
LDFLAGS += -Wl,--no-undefined
HAVE_DYNAREC := 1
MMAP_JIT_CACHE = 1
CPU_ARCH := arm64
# ARM
else ifneq (,$(findstring armv,$(platform)))
TARGET := $(TARGET_NAME)_libretro.so
@ -488,6 +499,7 @@ CFLAGS += -DMMAP_JIT_CACHE
endif
# Add -DTRACE_INSTRUCTIONS to trace instruction execution
# Can add -DTRACE_REGISTERS to additionally print register values
ifeq ($(DEBUG), 1)
OPTIMIZE := -O0 -g
else
@ -502,6 +514,8 @@ endif
ifeq ($(CPU_ARCH), arm)
DEFINES += -DARM_ARCH
else ifeq ($(CPU_ARCH), arm64)
DEFINES += -DARM64_ARCH
else ifeq ($(CPU_ARCH), mips)
DEFINES += -DMIPS_ARCH
else ifeq ($(CPU_ARCH), x86_32)

View File

@ -31,16 +31,15 @@ SOURCES_C += $(CORE_DIR)/cpu_threaded.c
endif
ifeq ($(HAVE_DYNAREC), 1)
ifeq ($(CPU_ARCH), x86_32)
SOURCES_ASM += $(CORE_DIR)/x86/x86_stub.S
endif
ifeq ($(CPU_ARCH), arm)
SOURCES_ASM += $(CORE_DIR)/arm/arm_stub.S
endif
ifeq ($(CPU_ARCH), mips)
SOURCES_ASM += $(CORE_DIR)/mips/mips_stub.S
endif
ifeq ($(CPU_ARCH), x86_32)
SOURCES_ASM += $(CORE_DIR)/x86/x86_stub.S
else ifeq ($(CPU_ARCH), arm)
SOURCES_ASM += $(CORE_DIR)/arm/arm_stub.S
else ifeq ($(CPU_ARCH), arm64)
SOURCES_ASM += $(CORE_DIR)/arm/arm64_stub.S
else ifeq ($(CPU_ARCH), mips)
SOURCES_ASM += $(CORE_DIR)/mips/mips_stub.S
endif
endif
ifeq ($(CPU_ARCH), arm)

297
arm/arm64_codegen.h Normal file
View File

@ -0,0 +1,297 @@
/* gameplaySP
*
* Copyright (C) 2021 David Guillen Fandos <david@davidgf.net>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
typedef enum
{
aa64_opcode_logic = 0x0A,
aa64_opcode_addsub = 0x0B,
aa64_opcode_adr = 0x10,
aa64_opcode_addsubi = 0x11,
aa64_opcode_movi = 0x12,
aa64_opcode_bfm = 0x13,
aa64_opcode_b = 0x14,
aa64_opcode_b2 = 0x15,
aa64_opcode_tbz = 0x16,
aa64_opcode_tbnz = 0x17,
aa64_opcode_memi = 0x19,
aa64_opcode_misc = 0x1A,
aa64_opcode_mul4 = 0x1B,
} aa64_opcode;
typedef enum
{
ccode_eq = 0x0, /* Equal Z == 1 */
ccode_ne = 0x1, /* Not Equal Z == 0 */
ccode_hs = 0x2, /* Carry Set C == 1 */
ccode_lo = 0x3, /* Carry Clear C == 0 */
ccode_mi = 0x4, /* Minus/Neg N == 1 */
ccode_pl = 0x5, /* Plus/Pos N == 0 */
ccode_vs = 0x6, /* Overflow V == 1 */
ccode_vc = 0x7, /* !Overflow V == 0 */
ccode_hi = 0x8, /* UGreatThan C && !Z */
ccode_ls = 0x9, /* ULessEqual !C || Z */
ccode_ge = 0xA, /* SGreatEqual N == V */
ccode_lt = 0xB, /* SLessThan N != V */
ccode_gt = 0xC, /* SLessThan !Z&N==V */
ccode_le = 0xD, /* SLessEqual Z|(N!=V) */
ccode_al = 0xE, /* Always */
ccode_nv = 0xF, /* Never */
} aa64_condcode;
#define aa64_br_offset(label) \
(((uintptr_t)(label) - (uintptr_t)(translation_ptr)) >> 2) \
#define aa64_br_offset_from(label, from) \
(((uintptr_t)(label) - (uintptr_t)(from)) >> 2) \
#define aa64_emit_inst(opcode, ope, rd, rs, extra) \
{ \
*((u32 *)translation_ptr) = (aa64_opcode_##opcode << 24) | ((ope) << 29) | \
((rs) << 5) | (rd) | (extra); \
translation_ptr += 4; \
}
#define aa64_emit_ldr(rv, rb, offset) \
aa64_emit_inst(memi, 5, rv, rb, (1 << 22) | ((offset) << 10)) \
#define aa64_emit_str(rv, rb, offset) \
aa64_emit_inst(memi, 5, rv, rb, (0 << 22) | ((offset) << 10)) \
#define aa64_emit_addshift(rd, rs, rm, st, sa) \
aa64_emit_inst(addsub, 0, rd, rs, ((rm) << 16) | ((st)<<22) | ((sa)<<10)) \
#define aa64_emit_add_lsl(rd, rs, rm, sa) \
aa64_emit_addshift(rd, rs, rm, 0, sa) \
#define aa64_emit_addi(rd, rs, imm) \
aa64_emit_inst(addsubi, 0, rd, rs, (imm) << 10) \
#define aa64_emit_addi12(rd, rs, imm) \
aa64_emit_inst(addsubi, 0, rd, rs, ((imm) << 10) | (1 << 22)) \
#define aa64_emit_addis(rd, rs, imm) \
aa64_emit_inst(addsubi, 1, rd, rs, (imm) << 10) \
#define aa64_emit_subi(rd, rs, imm) \
aa64_emit_inst(addsubi, 2, rd, rs, (imm) << 10) \
#define aa64_emit_subi12(rd, rs, imm) \
aa64_emit_inst(addsubi, 2, rd, rs, ((imm) << 10) | (1 << 22)) \
#define aa64_emit_subis(rd, rs, imm) \
aa64_emit_inst(addsubi, 3, rd, rs, (imm) << 10) \
/* rd = ra + rn * rm */
#define aa64_emit_madd(rd, ra, rn, rm) \
aa64_emit_inst(mul4, 0, rd, rn, ((ra) << 10) | ((rm) << 16)) \
/* rd = ra - rn * rm */
#define aa64_emit_msub(rd, ra, rn, rm) \
aa64_emit_inst(mul4, 0, rd, rn, ((ra) << 10) | ((rm) << 16) | 0x8000) \
#define aa64_emit_smaddl(rd, ra, rn, rm) \
aa64_emit_inst(mul4, 4, rd, rn, ((ra) << 10) | ((rm) << 16) | 0x200000) \
#define aa64_emit_umaddl(rd, ra, rn, rm) \
aa64_emit_inst(mul4, 4, rd, rn, ((ra) << 10) | ((rm) << 16) | 0xA00000) \
#define aa64_emit_mul(rd, rn, rm) \
aa64_emit_madd(rd, 31, rn, rm) \
// MovZ, clears the highest bits and sets the lower ones
#define aa64_emit_movlo(rd, imm) \
aa64_emit_inst(movi, 2, rd, 0, (((imm) & 0xffff) << 5) | (4 << 21)) \
// MovZ, clears the lowest bits and sets the higher ones
#define aa64_emit_movhiz(rd, imm) \
aa64_emit_inst(movi, 2, rd, 0, (((imm) & 0xffff) << 5) | (5 << 21)) \
// MovK, keeps the other (lower) bits
#define aa64_emit_movhi(rd, imm) \
aa64_emit_inst(movi, 3, rd, 0, (((imm) & 0xffff) << 5) | (5 << 21)) \
// MovN, moves the inverted immediate (for negative numbers)
#define aa64_emit_movne(rd, imm) \
aa64_emit_inst(movi, 0, rd, 0, (((imm) & 0xffff) << 5) | (4 << 21)) \
#define aa64_emit_branch(offset) \
aa64_emit_inst(b, 0, 0, 0, (((u32)(offset))) & 0x3ffffff) \
#define aa64_emit_branch_patch(ptr, offset) \
*(ptr) = (((*(ptr)) & 0xfc000000) | (((u32)(offset)) & 0x3ffffff)) \
#define aa64_emit_brcond(cond, offset) \
aa64_emit_inst(b, 2, cond, 0, ((((u32)(offset))) & 0x7ffff) << 5) \
#define aa64_emit_brcond_patch(ptr, offset) \
*(ptr) = (((*(ptr)) & 0xff00001f) | (((((u32)(offset))) & 0x7ffff) << 5)) \
#define aa64_emit_brlink(offset) \
aa64_emit_inst(b, 4, 0, 0, (((u32)(offset))) & 0x3ffffff) \
#define aa64_emit_extr(rd, rs, rm, amount) \
aa64_emit_inst(bfm, 0, rd, rs, (1 << 23) | ((amount) << 10) | ((rm) << 16)) \
#define aa64_emit_ror(rd, rs, amount) \
aa64_emit_extr(rd, rs, rs, amount) \
#define aa64_emit_lsr(rd, rs, amount) \
aa64_emit_inst(bfm, 2, rd, rs, (31 << 10) | ((amount) << 16)) \
#define aa64_emit_lsl(rd, rs, amount) \
aa64_emit_inst(bfm, 2, rd, rs, ((31-(amount)) << 10) | (((32-(amount)) & 31) << 16))
#define aa64_emit_asr(rd, rs, amount) \
aa64_emit_inst(bfm, 0, rd, rs, (31 << 10) | ((amount) << 16)) \
#define aa64_emit_lsr64(rd, rs, amount) \
aa64_emit_inst(bfm, 6, rd, rs, (1 << 22) | (63 << 10) | ((amount) << 16)) \
#define aa64_emit_eori(rd, rs, immr, imms) \
aa64_emit_inst(movi, 2, rd, rs, ((imms) << 10) | ((immr) << 16)) \
#define aa64_emit_orri(rd, rs, immr, imms) \
aa64_emit_inst(movi, 1, rd, rs, ((imms) << 10) | ((immr) << 16)) \
#define aa64_emit_andi(rd, rs, immr, imms) \
aa64_emit_inst(movi, 0, rd, rs, ((imms) << 10) | ((immr) << 16)) \
#define aa64_emit_andi64(rd, rs, immr, imms) \
aa64_emit_inst(movi, 4, rd, rs, (1 << 22) | ((imms) << 10) | ((immr) << 16))
#define aa64_emit_mov(rd, rs) \
aa64_emit_orr(rd, 31, rs) \
#define aa64_emit_orr(rd, rs, rm) \
aa64_emit_inst(logic, 1, rd, rs, ((rm) << 16)) \
#define aa64_emit_orn(rd, rs, rm) \
aa64_emit_inst(logic, 1, rd, rs, ((rm) << 16) | (1 << 21)) \
#define aa64_emit_and(rd, rs, rm) \
aa64_emit_inst(logic, 0, rd, rs, ((rm) << 16)) \
#define aa64_emit_ands(rd, rs, rm) \
aa64_emit_inst(logic, 3, rd, rs, ((rm) << 16)) \
#define aa64_emit_tst(rs, rm) \
aa64_emit_ands(31, rs, rm) \
#define aa64_emit_cmpi(rs, imm) \
aa64_emit_subis(31, rs, imm) \
#define aa64_emit_xor(rd, rs, rm) \
aa64_emit_inst(logic, 2, rd, rs, ((rm) << 16)) \
#define aa64_emit_bic(rd, rs, rm) \
aa64_emit_inst(logic, 0, rd, rs, ((rm) << 16) | (1 << 21)) \
#define aa64_emit_add(rd, rs, rm) \
aa64_emit_inst(addsub, 0, rd, rs, ((rm) << 16)) \
#define aa64_emit_sub(rd, rs, rm) \
aa64_emit_inst(addsub, 2, rd, rs, ((rm) << 16)) \
#define aa64_emit_adc(rd, rs, rm) \
aa64_emit_inst(misc, 0, rd, rs, ((rm) << 16)) \
#define aa64_emit_sbc(rd, rs, rm) \
aa64_emit_inst(misc, 2, rd, rs, ((rm) << 16)) \
#define aa64_emit_adds(rd, rs, rm) \
aa64_emit_inst(addsub, 1, rd, rs, ((rm) << 16)) \
#define aa64_emit_subs(rd, rs, rm) \
aa64_emit_inst(addsub, 3, rd, rs, ((rm) << 16)) \
#define aa64_emit_adcs(rd, rs, rm) \
aa64_emit_inst(misc, 1, rd, rs, ((rm) << 16)) \
#define aa64_emit_sbcs(rd, rs, rm) \
aa64_emit_inst(misc, 3, rd, rs, ((rm) << 16)) \
#define aa64_emit_adr(rd, offset) \
aa64_emit_inst(adr, (offset) & 3, rd, 0, ((offset) >> 2) & 0x7ffff) \
#define aa64_emit_tbz(rd, bitn, offset) \
aa64_emit_inst(tbz, 1, rd, 0, ((((u32)(offset)) & 0x3fff) << 5) | ((bitn) << 19))
#define aa64_emit_tbnz(rd, bitn, offset) \
aa64_emit_inst(tbnz, 1, rd, 0, ((((u32)(offset)) & 0x3fff) << 5) | ((bitn) << 19))
#define aa64_emit_cbz(rd, offset) \
aa64_emit_inst(b, 1, rd, 0, ((((u32)offset) & 0x7ffff)) << 5) \
#define aa64_emit_cbnz(rd, offset) \
aa64_emit_inst(b2, 1, rd, 0, ((((u32)offset) & 0x7ffff)) << 5) \
/* Misc Operations: Cond-select, Cond-Compare, ADC/SBC, CLZ/O, REV ... */
#define aa64_emit_csel(rd, rtrue, rfalse, cond) \
aa64_emit_inst(misc, 0, rd, rtrue, (1<<23)|((rfalse) << 16)|((cond) << 12)) \
#define aa64_emit_csinc(rd, rs, rm, cond) \
aa64_emit_inst(misc, 0, rd, rs, 0x800400 | ((rm) << 16) | ((cond) << 12)) \
#define aa64_emit_csinv(rd, rs, rm, cond) \
aa64_emit_inst(misc, 2, rd, rs, 0x800000 | ((rm) << 16) | ((cond) << 12)) \
#define aa64_emit_csneg(rd, rs, rm, cond) \
aa64_emit_inst(misc, 2, rd, rs, 0x800400 | ((rm) << 16) | ((cond) << 12)) \
#define aa64_emit_ubfm(rd, rs, imms, immr) \
aa64_emit_inst(bfm, 2, rd, rs, ((imms) << 10) | ((immr) << 16)) \
#define aa64_emit_ubfx(rd, rs, pos, size) \
aa64_emit_ubfm(rd, rs, pos + size - 1, pos) \
#define aa64_emit_cset(rd, cond) \
aa64_emit_csinc(rd, 31, 31, ((cond) ^ 1)) \
#define aa64_emit_csetm(rd, cond) \
aa64_emit_csinv(rd, 31, 31, ((cond) ^ 1)) \
#define aa64_emit_ccmpi(rn, immv, flags, cond) \
aa64_emit_inst(misc, 3, rn, flags, 0x400800 | ((immv)<<16) | ((cond)<<12)) \
#define aa64_emit_rorv(rd, rs, ra) \
aa64_emit_inst(misc, 0, rd, rs, ((ra) << 16) | 0xC02C00) \
#define aa64_emit_lslv(rd, rs, ra) \
aa64_emit_inst(misc, 0, rd, rs, ((ra) << 16) | 0xC02000) \
#define aa64_emit_lsrv(rd, rs, ra) \
aa64_emit_inst(misc, 0, rd, rs, ((ra) << 16) | 0xC02400) \
#define aa64_emit_asrv(rd, rs, ra) \
aa64_emit_inst(misc, 0, rd, rs, ((ra) << 16) | 0xC02800) \
#define aa64_emit_orr_shift64(rd, rs, rm, st, sa) \
aa64_emit_inst(logic, 5, rd, rs, ((rm) << 16) | ((st)<<22) | ((sa)<<10)) \
#define aa64_emit_merge_regs(rd, rhi, rlo) \
aa64_emit_orr_shift64(rd, rlo, rhi, 0, 32) \
#define aa64_emit_sdiv(rd, rs, rm) \
aa64_emit_inst(misc, 0, rd, rs, ((rm) << 16) | 0xC00C00) \

1879
arm/arm64_emit.h Normal file

File diff suppressed because it is too large Load Diff

705
arm/arm64_stub.S Normal file
View File

@ -0,0 +1,705 @@
# gameplaySP
#
# Copyright (C) 2021 David Guillen Fandos <david@davidgf.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of
# the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "../gpsp_config.h"
#define defsymbl(symbol) \
.align 2; \
.type symbol, %function ;\
.global symbol ; \
.global _##symbol ; \
symbol: \
_##symbol:
.text
.align 2
#define REG_R0 (0 * 4)
#define REG_R1 (1 * 4)
#define REG_R2 (2 * 4)
#define REG_R3 (3 * 4)
#define REG_R4 (4 * 4)
#define REG_R5 (5 * 4)
#define REG_R6 (6 * 4)
#define REG_R7 (7 * 4)
#define REG_R8 (8 * 4)
#define REG_R9 (9 * 4)
#define REG_R10 (10 * 4)
#define REG_R11 (11 * 4)
#define REG_R12 (12 * 4)
#define REG_R13 (13 * 4)
#define REG_R14 (14 * 4)
#define REG_SP (13 * 4)
#define REG_LR (14 * 4)
#define REG_PC (15 * 4)
#define REG_CPSR (16 * 4)
#define CPU_MODE (17 * 4)
#define CPU_HALT_STATE (18 * 4)
#define REG_N_FLAG (20 * 4)
#define REG_Z_FLAG (21 * 4)
#define REG_C_FLAG (22 * 4)
#define REG_V_FLAG (23 * 4)
#define CHANGED_PC_STATUS (24 * 4)
#define COMPLETED_FRAME (25 * 4)
#define OAM_UPDATED (26 * 4)
#define REG_SAVE (27 * 4)
#define REG_SAVE2 (28 * 4)
#define REG_SAVE3 (29 * 4)
#define REG_SAVE4 (30 * 4)
#define REG_SAVE5 (31 * 4)
#define reg_base x20
#define reg_cycles w21
#define reg_c_flag w22
#define reg_v_flag w23
#define reg_z_flag w24
#define reg_n_flag w25
// Memory offsets from reg_base to the different buffers
#define RDMAP_OFF -0xB9000 // 8K pointers (64KB)
#define IWRAM_OFF -0xA9000 // 32KB (double for shadow)
#define VRAM_OFF -0x99000 // 96KB
#define EWRAM_OFF -0x81000 // 256KB (double for shadow)
#define MEM_TBL_OFF -0x1000 // Some space for the tables
#define SPSR_RAM_OFF 0x100
#define REGMODE_RAM_OFF 0x118
#define OAM_RAM_OFF 0x200
#define PAL_RAM_OFF 0x600
#define IOREG_OFF 0xA00
#define PALCNV_RAM_OFF 0xE00
// Used for SWI handling
#define MODE_SUPERVISOR 3
#define SUPERVISOR_SPSR (SPSR_RAM_OFF + 3*4) // spsr[3]
#define SUPERVISOR_LR (REGMODE_RAM_OFF + (3 * (7 * 4)) + (6 * 4)) // reg_mode[3][6]
// Stores and restores registers to their register storage in RAM
#define load_registers() ;\
ldp w6, w7, [reg_base, #0] ;\
ldp w8, w9, [reg_base, #8] ;\
ldp w10, w11, [reg_base, #16] ;\
ldp w12, w13, [reg_base, #24] ;\
ldp w14, w15, [reg_base, #32] ;\
ldp w16, w17, [reg_base, #40] ;\
ldp w26, w27, [reg_base, #48] ;\
ldr w28, [reg_base, #56] ;\
#define store_registers() ;\
stp w6, w7, [reg_base, #0] ;\
stp w8, w9, [reg_base, #8] ;\
stp w10, w11, [reg_base, #16] ;\
stp w12, w13, [reg_base, #24] ;\
stp w14, w15, [reg_base, #32] ;\
stp w16, w17, [reg_base, #40] ;\
stp w26, w27, [reg_base, #48] ;\
str w28, [reg_base, #56] ;\
// Extracts flags from CPSR into the cache flag registers
#define extract_flags_reg(tmpreg) ;\
ubfx reg_n_flag, tmpreg, #31, #1 ;\
ubfx reg_z_flag, tmpreg, #30, #1 ;\
ubfx reg_c_flag, tmpreg, #29, #1 ;\
ubfx reg_v_flag, tmpreg, #28, #1 ;\
#define extract_flags(tmpreg) ;\
ldr tmpreg, [reg_base, #REG_CPSR] ;\
extract_flags_reg(tmpreg) ;\
// Collects cache flag bits and consolidates them to the CPSR reg
#define consolidate_flags(tmpreg) ;\
ldr tmpreg, [reg_base, #REG_CPSR] ;\
bfi tmpreg, reg_n_flag, #31, #1 ;\
bfi tmpreg, reg_z_flag, #30, #1 ;\
bfi tmpreg, reg_c_flag, #29, #1 ;\
bfi tmpreg, reg_v_flag, #28, #1 ;\
str tmpreg, [reg_base, #REG_CPSR] ;\
// Update the GBA hardware (video, sound, input, etc)
// w0: current PC
defsymbl(a64_update_gba)
str w0, [reg_base, #REG_PC] // update the PC value
str lr, [reg_base, #REG_SAVE] // Save LR for later if needed
consolidate_flags(w0) // update the CPSR
store_registers() // save out registers
bl update_gba // update GBA state
ldr w1, [reg_base, #COMPLETED_FRAME] // return to main if new frame
cbnz w1, return_to_main
// Resume execution (perhaps from a new PC)
mov reg_cycles, w0 // load new cycle count
extract_flags(w2) // reload flag cache bits
ldr w0, [reg_base, #CHANGED_PC_STATUS] // see if PC has change
cbnz w0, 1f // go start from new PC
ldr lr, [reg_base, #REG_SAVE] // Restore return point
load_registers() // reload registers
ret // resume execution, no PC change
1: // Resume from new PC
ldr w0, [reg_base, #REG_PC] // load new PC
tbnz w2, #5, 2f // CPSR.T means in thumb mode
bl block_lookup_address_arm
load_registers() // reload registers
br x0 // jump to new ARM block
2:
bl block_lookup_address_thumb
load_registers() // reload registers
br x0 // jump to new Thumb block
.size a64_update_gba, .-a64_update_gba
// Cheat hooks for master function
// This is called whenever PC == cheats-master-function
// Just calls the C function to process cheats
defsymbl(a64_cheat_hook)
store_registers()
str lr, [reg_base, #REG_SAVE]
bl process_cheats
ldr lr, [reg_base, #REG_SAVE]
load_registers()
ret
// These are b stubs for performing indirect branches. They are not
// linked to and don't return, instead they link elsewhere.
// Input:
// r0: PC to branch to
defsymbl(a64_indirect_branch_arm)
store_registers()
bl block_lookup_address_arm
load_registers()
br x0
defsymbl(a64_indirect_branch_thumb)
store_registers()
bl block_lookup_address_thumb
load_registers()
br x0
defsymbl(a64_indirect_branch_dual)
store_registers()
bl block_lookup_address_dual
load_registers()
br x0
// Read CPSR and SPSR values
defsymbl(execute_read_cpsr)
consolidate_flags(w0) // Consolidate on ret value
ret
defsymbl(execute_read_spsr)
ldr w1, [reg_base, #CPU_MODE] // read cpu mode to w1
add x0, reg_base, #SPSR_RAM_OFF // ptr to spsr table
ldr w0, [x0, x1, lsl #2] // Read actual value from trable
ret
// Update the cpsr.
// Input:
// w0: new cpsr value
// w1: bitmask of which bits in cpsr to update
// w2: current PC
defsymbl(execute_store_cpsr)
ldr w4, [reg_base, #REG_CPSR] // read current CPSR
and w3, w0, w1 // reg_flags = new_cpsr & store_mask
bic w4, w4, w1 // current_cpsr & ~store_mask
orr w0, w3, w4 // w3 = final CPSR value
extract_flags_reg(w0) // Update cached flags too
str lr, [reg_base, #REG_SAVE]
store_registers()
bl execute_store_cpsr_body // Do the remaining work in C mode
cbnz w0, 1f // If PC has changed due to this
ldr lr, [reg_base, #REG_SAVE] // Resume execution where we left it
load_registers()
ret
1:
// Returned value contains the PC, resume execution there
bl block_lookup_address_arm
load_registers()
br x0 // Resume in the returned block
.size execute_store_cpsr, .-execute_store_cpsr
// Write to SPSR
// w0: new SPSR value
// w1: store mask
defsymbl(execute_store_spsr)
ldr w2, [reg_base, #CPU_MODE] // read cpu mode to w1
add x2, reg_base, x2, lsl #2 // calculate table offset
ldr w3, [x2, #SPSR_RAM_OFF] // Read actual value from trable
and w0, w0, w1 // new-spsr & mask
bic w3, w3, w1 // old-spsr & ~mask
orr w0, w0, w3 // final spsr value
str w0, [x2, #SPSR_RAM_OFF] // Store new SPSR
ret
.size execute_store_spsr, .-execute_store_spsr
// Restore the cpsr from the mode spsr and mode shift.
// Input:
// r0: current pc
defsymbl(execute_spsr_restore)
ldr w1, [reg_base, #CPU_MODE] // w1 = cpu_mode
cbz w1, 1f // Ignore if in user mode
lsl w2, w1, #2 // We access 32 bit words
add w2, w2, #SPSR_RAM_OFF
ldr w3, [reg_base, x2] // w3 = spsr[cpu_mode]
str w3, [reg_base, #REG_CPSR] // update CPSR with SPSR value
extract_flags_reg(w3) // update cached flag values
// This function call will pass r0 (address) and return it.
str lr, [reg_base, #REG_SAVE]
store_registers() // save ARM registers
bl execute_spsr_restore_body
ldr lr, [reg_base, #REG_SAVE]
load_registers()
1:
ret
.size execute_spsr_restore, .-execute_spsr_restore
// Setup the mode transition work for calling an SWI.
// Input:
// r0: current pc
defsymbl(execute_swi)
str lr, [reg_base, #REG_SAVE]
str w0, [reg_base, #SUPERVISOR_LR] // Store next PC into supervisor LR
consolidate_flags(w1) // Calculate current CPSR flags
str w1, [reg_base, #SUPERVISOR_SPSR] // Store them in the SPSR
bic w1, w1, #0x3F // Clear mode bits
mov w2, #(0x13 | 0x80) // Set supervisor mode bits
orr w1, w1, w2
str w1, [reg_base, #REG_CPSR] // Update CPSR with new value
store_registers()
mov w0, #MODE_SUPERVISOR
bl set_cpu_mode // Set supervisor mode
ldr lr, [reg_base, #REG_SAVE]
load_registers()
ret
.size execute_swi, .-execute_swi
defsymbl(execute_arm_translate_internal)
// save registers that will be clobbered
sub sp, sp, #96
stp x19, x20, [sp, #0]
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
stp x27, x28, [sp, #64]
stp x29, x30, [sp, #80]
mov reg_cycles, w0 // load cycle counter
mov reg_base, x1 // init base_reg
// Check whether the CPU is sleeping already, we should just wait for IRQs
ldr w1, [reg_base, #CPU_HALT_STATE]
cmp w1, #0
bne alert_loop
ldr w0, [reg_base, #REG_PC] // r0 = current pc
ldr w1, [reg_base, #REG_CPSR] // r1 = flags
tst w1, #0x20 // see if Thumb bit is set
extract_flags(w2) // load flags
bne 1f // if so lookup thumb
bl block_lookup_address_arm
load_registers()
br x0 // jump to first ARM block
1:
bl block_lookup_address_thumb
load_registers()
br x0 // jump to first Thumb block
// Epilogue to return to the main thread (whatever called execute_arm_translate)
return_to_main:
// restore the saved regs and return
ldp x19, x20, [sp, #0]
ldp x21, x22, [sp, #16]
ldp x23, x24, [sp, #32]
ldp x25, x26, [sp, #48]
ldp x27, x28, [sp, #64]
ldp x29, x30, [sp, #80]
add sp, sp, #96
ret
// Memory read stub routines
#define execute_load_builder(load_type, ldop, ldmask, tblidx, ldfn) ;\
;\
defsymbl(execute_load_##load_type) ;\
tst w0, #(0xf0000000 | ldmask) ;\
lsr w3, w0, #24 ;\
csinc w3, wzr, w3, ne ;\
add x4, reg_base, (MEM_TBL_OFF + tblidx*136) ;\
ldr x3, [x4, x3, lsl #3] ;\
br x3 ;\
;\
ld_bios_##load_type: /* BIOS area, need to verify PC */;\
lsr w3, w1, #24 /* Are we running the BIOS */;\
cbnz w3, ld_slow_##load_type ;\
and w0, w0, #(0x7fff) /* BIOS only 16 KB */;\
add x3, reg_base, #(RDMAP_OFF) ;\
ldr x3, [x3] /* x3 = bios mem buffer */;\
ldop w0, [x3, x0] /* load actual value */;\
ret ;\
;\
ld_ewram_##load_type: /* EWRAM area */;\
and w0, w0, #(0x3ffff) ;\
add x3, reg_base, #EWRAM_OFF ;\
ldop w0, [x3, x0] ;\
ret ;\
;\
ld_iwram_##load_type: /* IWRAM area */;\
and w0, w0, #(0x7fff) ;\
add x3, reg_base, #(IWRAM_OFF+0x8000) ;\
ldop w0, [x3, x0] ;\
ret ;\
;\
ld_ioram_##load_type: /* I/O RAM area */;\
and w0, w0, #(0x3ff) ;\
add x3, reg_base, #(IOREG_OFF) ;\
ldop w0, [x3, x0] ;\
ret ;\
;\
ld_palram_##load_type: /* PAL RAM area */;\
and w0, w0, #(0x3ff) ;\
add x3, reg_base, #(PAL_RAM_OFF) ;\
ldop w0, [x3, x0] ;\
ret ;\
;\
ld_oamram_##load_type: /* OAM RAM area */;\
and w0, w0, #(0x3ff) ;\
add x3, reg_base, #(OAM_RAM_OFF) ;\
ldop w0, [x3, x0] ;\
ret ;\
;\
ld_rdmap_##load_type: ;\
lsr w4, w0, #15 /* Each block is 32KB */;\
add x3, reg_base, #(RDMAP_OFF) ;\
ldr x4, [x3, x4, lsl #3] /* x4 = table pointer */;\
and w0, w0, #(0x7fff) /* 32KB pages */;\
ldop w0, [x4, x0] /* load actual value */;\
ret ;\
;\
ld_slow_##load_type: /* Slow C path */;\
str w1, [reg_base, #REG_PC] /* write out PC */;\
str lr, [reg_base, #REG_SAVE] /* Save LR */;\
store_registers() ;\
bl ldfn ;\
ldr lr, [reg_base, #REG_SAVE] ;\
load_registers() ;\
ret ;\
.size execute_load_##load_type, .-execute_load_##load_type
#define load_lookup_table(load_type, aload_type) ;\
.quad ld_slow_##aload_type /* -1: Unaligned/Bad access */;\
.quad ld_bios_##aload_type /* 0x00: BIOS */;\
.quad ld_slow_##aload_type /* 0x01: Open bus */;\
.quad ld_ewram_##load_type /* 0x02: ewram */;\
.quad ld_iwram_##load_type /* 0x03: iwram */;\
.quad ld_ioram_##load_type /* 0x04: I/O regs */;\
.quad ld_palram_##load_type /* 0x05: palette RAM */;\
.quad ld_rdmap_##load_type /* 0x06: vram */;\
.quad ld_oamram_##load_type /* 0x07: oam ram */;\
.quad ld_rdmap_##load_type /* 0x08: gamepak: ignore */;\
.quad ld_rdmap_##load_type /* 0x09: gamepak: ignore */;\
.quad ld_rdmap_##load_type /* 0x0A: gamepak: ignore */;\
.quad ld_rdmap_##load_type /* 0x0B: gamepak: ignore */;\
.quad ld_rdmap_##load_type /* 0x0C: gamepak: ignore */;\
.quad ld_slow_##aload_type /* 0x0D: EEPROM */;\
.quad ld_slow_##aload_type /* 0x0E: backup */;\
.quad ld_slow_##aload_type /* 0x0F: ignore */;\
// Aligned load is a bit special
defsymbl(execute_aligned_load32)
tst w0, #(0xf0000000)
lsr w3, w0, #24
csinc w3, wzr, w3, ne
add x4, reg_base, (MEM_TBL_OFF + 5*136)
ldr x3, [x4, x3, lsl #3]
br x3
ld_slow_aligned_u32: // Slow C path for multiple loads
str lr, [reg_base, #REG_SAVE] // Save LR
store_registers()
bl read_memory32
ldr lr, [reg_base, #REG_SAVE]
load_registers()
ret
ld_bios_aligned_u32:
and w0, w0, #(0x7fff) // Do not verify PC on purpose
add x3, reg_base, #(RDMAP_OFF)
ldr x3, [x3]
ldr w0, [x3, x0]
ret
execute_load_builder( u8, ldrb, 0, 0, read_memory8)
execute_load_builder( s8, ldrsb, 0, 1, read_memory8s)
execute_load_builder(u16, ldrh, 1, 2, read_memory16)
execute_load_builder(s16, ldrsh, 1, 3, read_memory16s)
execute_load_builder(u32, ldr, 3, 4, read_memory32)
// Prepares for a external store (calls C code)
#define store_align_8() and w1, w1, #0xff
#define store_align_16() and w1, w1, #0xffff; bic w0, w0, #1
#define store_align_32() bic w0, w0, #3
// Write out to memory.
// Input:
// w0: address
// w1: value
// w2: PC value
#define execute_store_builder(store_type, str_op, str_op16, load_op, \
stmask, stmask16, tblidx) ;\
;\
defsymbl(execute_store_u##store_type) ;\
lsr w4, w0, #28 ;\
lsr w3, w0, #24 ;\
cbnz w4, ext_store_u##store_type ;\
add x4, reg_base, (MEM_TBL_OFF + 816 + tblidx*128) ;\
ldr x3, [x4, x3, lsl #3] ;\
br x3 ;\
;\
ext_store_u##store_type: ;\
ext_store_u##store_type##_safe: ;\
str w2, [reg_base, #REG_PC] /* write out PC */;\
str lr, [reg_base, #REG_SAVE] /* Preserve LR */;\
store_align_##store_type() ;\
store_registers() ;\
bl write_memory##store_type ;\
cbnz w0, write_epilogue /* handle additional write stuff */;\
ldr lr, [reg_base, #REG_SAVE] ;\
load_registers() ;\
ret /* resume if no side effects */;\
;\
ext_store_iwram_u##store_type: ;\
and w0, w0, #(0x7fff & ~stmask) /* Mask to mirror memory (+align)*/;\
add x3, reg_base, #(IWRAM_OFF+0x8000) /* x3 = iwram base */;\
str_op w1, [x0, x3] /* store data */;\
sub x3, x3, #0x8000 /* x3 = iwram smc base */;\
load_op w1, [x0, x3] /* w1 = SMC sentinel */;\
cbnz w1, 3f /* Check value, should be zero */;\
ret /* return */;\
;\
ext_store_ewram_u##store_type: ;\
and w0, w0, #(0x3ffff & ~stmask) /* Mask to mirror memory (+align)*/;\
add x3, reg_base, #EWRAM_OFF /* x3 = ewram base */;\
str_op w1, [x0, x3] /* store data */;\
add x3, x3, #0x40000 /* x3 = ewram smc base */;\
load_op w1, [x0, x3] /* w1 = SMC sentinel */;\
cbnz w1, 3f /* Check value, should be zero */;\
ret /* return */;\
;\
ext_store_vram_u##store_type: ;\
ext_store_vram_u##store_type##_safe: ;\
and w0, w0, #(0x1ffff & ~stmask16) /* Mask to mirror memory (+align)*/;\
sub w3, w0, #0x8000 /* Mirrored addr for last bank */;\
cmp w0, #0x18000 /* Check if exceeds 96KB */;\
csel w0, w3, w0, cs /* If it does, pick the mirror */;\
add x3, reg_base, #VRAM_OFF /* x3 = ewram base */;\
str_op16 w1, [x0, x3] /* store data */;\
ret /* return */;\
;\
ext_store_oam_ram_u##store_type: ;\
ext_store_oam_ram_u##store_type##_safe: ;\
and w0, w0, #(0x3ff & ~stmask16) /* Mask to mirror memory (+align)*/;\
add x3, reg_base, #OAM_RAM_OFF /* x3 = oam ram base */;\
str_op16 w1, [x0, x3] /* store data */;\
str w29, [reg_base, #OAM_UPDATED] /* write non zero to signal */;\
ret /* return */;\
;\
3: ;\
str w2, [reg_base, #REG_PC] /* write out PC */;\
store_registers() /* store registers */;\
consolidate_flags(w1) ;\
b smc_write /* perform smc write */;\
.size execute_store_u##store_type, .-execute_store_u##store_type
// for ignored areas, just return
ext_store_ignore:
ret // return
#define store_lookup_table(store_type) ;\
.quad ext_store_ignore /* 0x00: BIOS, ignore */;\
.quad ext_store_ignore /* 0x01: ignore */;\
.quad ext_store_ewram_u##store_type /* 0x02: ewram */;\
.quad ext_store_iwram_u##store_type /* 0x03: iwram */;\
.quad ext_store_u##store_type /* 0x04: I/O regs */;\
.quad ext_store_u##store_type /* 0x05: palette RAM */;\
.quad ext_store_vram_u##store_type /* 0x06: vram */;\
.quad ext_store_oam_ram_u##store_type /* 0x07: oam ram */;\
.quad ext_store_u##store_type /* 0x08: gamepak: ignore */;\
.quad ext_store_u##store_type /* 0x09: gamepak: ignore */;\
.quad ext_store_u##store_type /* 0x0A: gamepak: ignore */;\
.quad ext_store_u##store_type /* 0x0B: gamepak: ignore */;\
.quad ext_store_u##store_type /* 0x0C: gamepak: ignore */;\
.quad ext_store_u##store_type /* 0x0D: EEPROM */;\
.quad ext_store_u##store_type /* 0x0E: backup */;\
.quad ext_store_ignore /* 0x0F: ignore */;\
execute_store_builder(8, strb, strh, ldrb, 0, 1, 0)
execute_store_builder(16, strh, strh, ldrh, 1, 1, 1)
execute_store_builder(32, str, str, ldr, 3, 3, 2)
// This is a store that is executed in a strm case (so no SMC checks in-between)
defsymbl(execute_aligned_store32)
lsr w4, w0, #28
lsr w3, w0, #24
cbnz w4, ext_store_u32
add x4, reg_base, MEM_TBL_OFF + 816 + 3*128
ldr x3, [x4, x3, lsl #3]
br x3
ext_store_iwram_u32_safe:
and w0, w0, #(0x7fff) // Mask to mirror memory (no need to align!)
add x3, reg_base, #(IWRAM_OFF+0x8000) // x3 = iwram base
str w1, [x0, x3] // store data
ret // Return
ext_store_ewram_u32_safe:
and w0, w0, #(0x3ffff) // Mask to mirror memory (no need to align!)
add x3, reg_base, #(EWRAM_OFF) // x3 = ewram base
str w1, [x0, x3] // store data
ret // Return
.size execute_aligned_store32, .-execute_aligned_store32
// This is called whenever an external store with side effects was performed
write_epilogue:
consolidate_flags(w1) // update the CPSR before update
cmp w0, #2 // see if the alert is due to SMC
beq smc_write // if so, goto SMC handler
alert_loop:
bl update_gba // update GBA until CPU isn't halted
ldr w1, [reg_base, #COMPLETED_FRAME] // Check whether a frame was completed
cbnz w1, return_to_main // and return to caller function.
ldr w1, [reg_base, #CPU_HALT_STATE] // Check whether the CPU is halted
cbnz w1, alert_loop // and keep looping until it is
mov reg_cycles, w0 // load new cycle count
ldr w0, [reg_base, #REG_PC] // load new PC
b lookup_pc // Resume execution at that PC
smc_write:
bl flush_translation_cache_ram
ldr w0, [reg_base, #REG_PC] // load "current new" PC
// Resume execution at PC (at w0)
lookup_pc:
ldr w1, [reg_base, #REG_CPSR] // w1 = flags
extract_flags_reg(w1)
tbnz w1, #5, 2f // see if Thumb bit is set
// Lookup and jump to the right mode block
bl block_lookup_address_arm
load_registers()
br x0
2:
bl block_lookup_address_thumb
load_registers()
br x0
.data
.align 4
defsymbl(ldst_handler_functions)
load_lookup_table(u8, u8)
load_lookup_table(s8, s8)
load_lookup_table(u16, u16)
load_lookup_table(s16, s16)
load_lookup_table(u32, u32)
load_lookup_table(u32, aligned_u32)
store_lookup_table(8)
store_lookup_table(16)
store_lookup_table(32)
store_lookup_table(32_safe)
.bss
.align 4
defsymbl(memory_map_read)
.space 0x10000
defsymbl(iwram)
.space 0x10000
defsymbl(vram)
.space 0x18000
defsymbl(ewram)
.space 0x80000
defsymbl(ldst_lookup_tables)
.space 4096
defsymbl(reg)
.space 0x100
defsymbl(spsr)
.space 24
defsymbl(reg_mode)
.space 196
.space 36 // Padding
defsymbl(oam_ram)
.space 0x400
defsymbl(palette_ram)
.space 0x400
defsymbl(io_registers)
.space 0x400
defsymbl(palette_ram_converted)
.space 0x400

View File

@ -218,6 +218,8 @@ extern u8 bit_count[256];
#include "mips/mips_emit.h"
#elif defined(ARM_ARCH)
#include "arm/arm_emit.h"
#elif defined(ARM64_ARCH)
#include "arm/arm64_emit.h"
#else
#include "x86/x86_emit.h"
#endif
@ -243,7 +245,7 @@ extern u8 bit_count[256];
void platform_cache_sync(void *baseaddr, void *endptr) {
ctr_flush_invalidate_cache();
}
#elif defined(ARM_ARCH)
#elif defined(ARM_ARCH) || defined(ARM64_ARCH)
void platform_cache_sync(void *baseaddr, void *endptr) {
__clear_cache(baseaddr, endptr);
}

View File

@ -12,6 +12,10 @@ ifeq ($(TARGET_ARCH),arm)
COREFLAGS += -DARM_ARCH -DMMAP_JIT_CACHE
CPU_ARCH := arm
HAVE_DYNAREC := 1
else ifeq ($(TARGET_ARCH),arm64)
COREFLAGS += -DARM64_ARCH -DMMAP_JIT_CACHE
CPU_ARCH := arm64
HAVE_DYNAREC := 1
else ifeq ($(TARGET_ARCH),x86)
COREFLAGS += -DMMAP_JIT_CACHE
CPU_ARCH := x86_32

12
tests/Makefile Normal file
View File

@ -0,0 +1,12 @@
ARMV8PFX=/opt/buildroot-armv8el-uclibc/bin/aarch64-buildroot-linux-uclibc
all:
gcc -o arm64gen arm64gen.c -ggdb -I../arm/
./arm64gen > bytecode.bin
$(ARMV8PFX)-as -o bytecoderef.o arm64gen.S
$(ARMV8PFX)-objcopy -O binary bytecoderef.o bytecoderef.bin
@ cmp bytecoderef.bin bytecode.bin || echo "Bytecode mismatch"
@ cmp bytecoderef.bin bytecode.bin && echo "Test passed!"

208
tests/arm64gen.S Normal file
View File

@ -0,0 +1,208 @@
b 16*4
bl 16*4
b.eq 16*4
b.ne 16*4
b.hs 16*4
b.lo 16*4
b.mi 16*4
b.pl 16*4
b.vs 16*4
b.vc 16*4
b.hi 16*4
b.ls 16*4
b.ge 16*4
b.lt 16*4
b.gt 16*4
b.le 16*4
b.al 16*4
b.nv 16*4
ldr w1, [x2, #64]
ldr w29, [x30, #64]
str w1, [x2, #64]
str w29, [x30, #64]
mov w0, #0x1234
mov w12, #0x5656
mov w12, #0xFFFF
movk w13, #0x9876, lsl #16
movk w13, #0xFFFF, lsl #16
movz w13, #0xabcd, lsl #16
mov w14, #0xffff5555
add w11, w12, w13, lsl #0
add w11, w12, w13, lsl #19
add w11, w12, w13, lsl #31
add w1, w29, #0x123
add w1, w29, #0xFFF
sub w1, w29, #0x123
sub w1, w29, #0xFFF
add w3, w30, #0x123000
add w3, w30, #0xFFF000
sub w3, w30, #0x123000
sub w3, w30, #0xFFF000
adds w29, w30, #0x123
adds w29, w30, #0xFFF
subs w29, w30, #0x123
subs w29, w30, #0xFFF
madd w2, w3, w4, w5
madd w25, w26, w27, w28
msub w2, w3, w4, w5
msub w25, w26, w27, w28
smaddl x2, w3, w4, x5
smaddl x25, w26, w27, x28
umaddl x2, w3, w4, x5
umaddl x25, w26, w27, x28
mul w1, w2, w3
mul w27, w28, w29
ror w1, w2, #1
ror w1, w2, #31
ror w30, w29, #1
ror w30, w29, #31
lsr w1, w2, #1
lsr w1, w2, #31
lsr w30, w29, #1
lsr w30, w29, #31
lsl w1, w2, #1
lsl w1, w2, #31
lsl w30, w29, #1
lsl w30, w29, #31
asr w1, w2, #1
asr w1, w2, #31
asr w30, w29, #1
asr w30, w29, #31
lsr x1, x2, #1
lsr x1, x2, #2
lsr x1, x2, #62
lsr x1, x2, #63
lsr x30, x29, #1
lsr x30, x29, #62
eor w3, w4, #1
eor w3, w4, #(~1)
orr w3, w4, #1
orr w3, w4, #(~1)
and w3, w4, #1
and w3, w4, #(~3)
and x3, x4, #0xffffffff
and x3, x4, #0x1
and x1, x2, #1
and x1, x2, #(~1)
and x1, x2, #0xffffffff
mov w1, w2
mov w30, wzr
orr w1, w2, w3
orr w29, w30, wzr
eor w1, w2, w3
eor w29, w30, wzr
orn w1, w2, w3
orn w29, w30, wzr
and w1, w2, w3
and w29, w30, wzr
bic w1, w2, w3
bic w29, w30, wzr
ands w1, w2, w3
ands w29, w30, wzr
tst w1, w2
tst w25, wzr
cmp w1, #0
cmp w30, #0
cmp w1, #32
cmp w30, #32
cmp w1, #200
cmp w30, #200
add w1, w2, w3
add w29, w30, w28
sub w1, w2, w3
sub w29, w30, w28
adc w1, w2, w3
adc w29, w30, w28
sbc w1, w2, w3
sbc w29, w30, w28
adds w1, w2, w3
adds w29, w30, w28
subs w1, w2, w3
subs w29, w30, w28
adcs w1, w2, w3
adcs w29, w30, w28
sbcs w1, w2, w3
sbcs w29, w30, w28
tbz w20, #1, 63*4
tbnz w20, #1, 63*4
tbz w20, #0, 2*4
tbnz w20, #7, 2*4
cbz w20, 63*4
cbnz w20, 63*4
cbz w20, 2*4
cbnz w20, 2*4
csel w20, w24, w25, ne
csel w1, w2, w3, eq
csel w1, w20, wzr, lt
csel w1, wzr, wzr, gt
csinc w20, w24, w25, ne
csinc w1, w2, w3, eq
csinc w1, w20, wzr, lt
csinc w1, wzr, wzr, gt
csinv w20, w24, w25, ne
csinv w1, w2, w3, eq
csinv w1, w20, wzr, lt
csinv w1, wzr, wzr, gt
csneg w20, w24, w25, ne
csneg w1, w2, w3, eq
csneg w1, w20, wzr, lt
csneg w1, wzr, wzr, gt
cset w1, eq
cset w1, hs
cset w20, lo
csetm w1, hs
csetm w20, lo
ubfx w1, w2, #8, #8
ubfx w1, w2, #16, #16
ubfx w1, wzr, #8, #24
ubfx w1, wzr, #16, #16
rorv w1, w2, w3
rorv w28, w29, w30
lslv w1, w2, w3
lslv w28, w29, w30
lsrv w1, w2, w3
lsrv w28, w29, w30
asrv w1, w2, w3
asrv w28, w29, w30
orr x1, x2, x3, lsl #32
orr x25, x26, x27, lsl #32
sdiv w1, w2, w3
sdiv w28, w29, w30

223
tests/arm64gen.c Normal file
View File

@ -0,0 +1,223 @@
#define u32 uint32_t
#define u8 uint8_t
#include <stdio.h>
#include <stdint.h>
#include "arm64_codegen.h"
int main() {
u32 buffer[1024];
u8 *translation_ptr = (u8*)&buffer[0];
aa64_emit_branch(16);
aa64_emit_brlink(16);
aa64_emit_brcond(ccode_eq, 16);
aa64_emit_brcond(ccode_ne, 16);
aa64_emit_brcond(ccode_hs, 16);
aa64_emit_brcond(ccode_lo, 16);
aa64_emit_brcond(ccode_mi, 16);
aa64_emit_brcond(ccode_pl, 16);
aa64_emit_brcond(ccode_vs, 16);
aa64_emit_brcond(ccode_vc, 16);
aa64_emit_brcond(ccode_hi, 16);
aa64_emit_brcond(ccode_ls, 16);
aa64_emit_brcond(ccode_ge, 16);
aa64_emit_brcond(ccode_lt, 16);
aa64_emit_brcond(ccode_gt, 16);
aa64_emit_brcond(ccode_le, 16);
aa64_emit_brcond(ccode_al, 16);
aa64_emit_brcond(ccode_nv, 16);
aa64_emit_ldr(1, 2, 16);
aa64_emit_ldr(29, 30, 16);
aa64_emit_str(1, 2, 16);
aa64_emit_str(29, 30, 16);
aa64_emit_movlo(0, 0x1234);
aa64_emit_movlo(12, 0x5656);
aa64_emit_movlo(12, ~0);
aa64_emit_movhi(13, 0x9876);
aa64_emit_movhi(13, ~0);
aa64_emit_movhiz(13, 0xabcd);
aa64_emit_movne(14, 0xAAAA);
aa64_emit_add_lsl(11, 12, 13, 0);
aa64_emit_add_lsl(11, 12, 13, 19);
aa64_emit_add_lsl(11, 12, 13, 31);
aa64_emit_addi(1, 29, 0x123);
aa64_emit_addi(1, 29, 0xFFF);
aa64_emit_subi(1, 29, 0x123);
aa64_emit_subi(1, 29, 0xFFF);
aa64_emit_addi12(3, 30, 0x123);
aa64_emit_addi12(3, 30, 0xFFF);
aa64_emit_subi12(3, 30, 0x123);
aa64_emit_subi12(3, 30, 0xFFF);
aa64_emit_addis(29, 30, 0x123);
aa64_emit_addis(29, 30, 0xFFF);
aa64_emit_subis(29, 30, 0x123);
aa64_emit_subis(29, 30, 0xFFF);
aa64_emit_madd(2, 5, 3, 4);
aa64_emit_madd(25, 28, 26, 27);
aa64_emit_msub(2, 5, 3, 4);
aa64_emit_msub(25, 28, 26, 27);
aa64_emit_smaddl(2, 5, 3, 4);
aa64_emit_smaddl(25, 28, 26, 27);
aa64_emit_umaddl(2, 5, 3, 4);
aa64_emit_umaddl(25, 28, 26, 27);
aa64_emit_mul(1, 2, 3);
aa64_emit_mul(27, 28, 29);
aa64_emit_ror(1, 2, 1);
aa64_emit_ror(1, 2, 31);
aa64_emit_ror(30, 29, 1);
aa64_emit_ror(30, 29, 31);
aa64_emit_lsr(1, 2, 1);
aa64_emit_lsr(1, 2, 31);
aa64_emit_lsr(30, 29, 1);
aa64_emit_lsr(30, 29, 31);
aa64_emit_lsl(1, 2, 1);
aa64_emit_lsl(1, 2, 31);
aa64_emit_lsl(30, 29, 1);
aa64_emit_lsl(30, 29, 31);
aa64_emit_asr(1, 2, 1);
aa64_emit_asr(1, 2, 31);
aa64_emit_asr(30, 29, 1);
aa64_emit_asr(30, 29, 31);
aa64_emit_lsr64(1, 2, 1);
aa64_emit_lsr64(1, 2, 2);
aa64_emit_lsr64(1, 2, 62);
aa64_emit_lsr64(1, 2, 63);
aa64_emit_lsr64(30, 29, 1);
aa64_emit_lsr64(30, 29, 62);
aa64_emit_eori(3, 4, 0, 0);
aa64_emit_eori(3, 4, 31, 30); /* ~1 */
aa64_emit_orri(3, 4, 0, 0);
aa64_emit_orri(3, 4, 31, 30);
aa64_emit_andi(3, 4, 0, 0);
aa64_emit_andi(3, 4, 30, 29); /* ~3 */
aa64_emit_andi64(3, 4, 0, 31);
aa64_emit_andi64(3, 4, 0, 0);
aa64_emit_andi64(1, 2, 0, 0); /* & 1 */
aa64_emit_andi64(1, 2, 63, 62); /* & ~1 */
aa64_emit_andi64(1, 2, 0, 31); /* & 0xffffffff */
aa64_emit_mov(1, 2);
aa64_emit_mov(30, 31);
aa64_emit_orr(1, 2, 3);
aa64_emit_orr(29, 30, 31);
aa64_emit_xor(1, 2, 3);
aa64_emit_xor(29, 30, 31);
aa64_emit_orn(1, 2, 3);
aa64_emit_orn(29, 30, 31);
aa64_emit_and(1, 2, 3);
aa64_emit_and(29, 30, 31);
aa64_emit_bic(1, 2, 3);
aa64_emit_bic(29, 30, 31);
aa64_emit_ands(1, 2, 3);
aa64_emit_ands(29, 30, 31);
aa64_emit_tst(1, 2);
aa64_emit_tst(25, 31);
aa64_emit_cmpi(1, 0);
aa64_emit_cmpi(30, 0);
aa64_emit_cmpi(1, 32);
aa64_emit_cmpi(30, 32);
aa64_emit_cmpi(1, 200);
aa64_emit_cmpi(30, 200);
aa64_emit_add(1, 2, 3);
aa64_emit_add(29, 30, 28);
aa64_emit_sub(1, 2, 3);
aa64_emit_sub(29, 30, 28);
aa64_emit_adc(1, 2, 3);
aa64_emit_adc(29, 30, 28);
aa64_emit_sbc(1, 2, 3);
aa64_emit_sbc(29, 30, 28);
aa64_emit_adds(1, 2, 3);
aa64_emit_adds(29, 30, 28);
aa64_emit_subs(1, 2, 3);
aa64_emit_subs(29, 30, 28);
aa64_emit_adcs(1, 2, 3);
aa64_emit_adcs(29, 30, 28);
aa64_emit_sbcs(1, 2, 3);
aa64_emit_sbcs(29, 30, 28);
aa64_emit_tbz(20, 1, 63);
aa64_emit_tbnz(20, 1, 63);
aa64_emit_tbz(20, 0, 2);
aa64_emit_tbnz(20, 7, 2);
aa64_emit_cbz(20, 63);
aa64_emit_cbnz(20, 63);
aa64_emit_cbz(20, 2);
aa64_emit_cbnz(20, 2);
aa64_emit_csel(20, 24, 25, ccode_ne);
aa64_emit_csel(1, 2, 3, ccode_eq);
aa64_emit_csel(1, 20, 31, ccode_lt);
aa64_emit_csel(1, 31, 31, ccode_gt);
aa64_emit_csinc(20, 24, 25, ccode_ne);
aa64_emit_csinc(1, 2, 3, ccode_eq);
aa64_emit_csinc(1, 20, 31, ccode_lt);
aa64_emit_csinc(1, 31, 31, ccode_gt);
aa64_emit_csinv(20, 24, 25, ccode_ne);
aa64_emit_csinv(1, 2, 3, ccode_eq);
aa64_emit_csinv(1, 20, 31, ccode_lt);
aa64_emit_csinv(1, 31, 31, ccode_gt);
aa64_emit_csneg(20, 24, 25, ccode_ne);
aa64_emit_csneg(1, 2, 3, ccode_eq);
aa64_emit_csneg(1, 20, 31, ccode_lt);
aa64_emit_csneg(1, 31, 31, ccode_gt);
aa64_emit_cset(1, ccode_eq);
aa64_emit_cset(1, ccode_hs);
aa64_emit_cset(20, ccode_lo);
aa64_emit_csetm(1, ccode_hs);
aa64_emit_csetm(20, ccode_lo);
aa64_emit_ubfx(1, 2, 8, 8);
aa64_emit_ubfx(1, 2, 16, 16);
aa64_emit_ubfx(1, 31, 8, 24);
aa64_emit_ubfx(1, 31, 16, 16);
aa64_emit_rorv(1, 2, 3);
aa64_emit_rorv(28, 29, 30);
aa64_emit_lslv(1, 2, 3);
aa64_emit_lslv(28, 29, 30);
aa64_emit_lsrv(1, 2, 3);
aa64_emit_lsrv(28, 29, 30);
aa64_emit_asrv(1, 2, 3);
aa64_emit_asrv(28, 29, 30);
aa64_emit_merge_regs(1, 3, 2); /* hi, lo */
aa64_emit_merge_regs(25, 27, 26);
aa64_emit_sdiv(1, 2, 3);
aa64_emit_sdiv(28, 29, 30);
fwrite(buffer, 1, translation_ptr-(u8*)buffer, stdout);
}