From 64d0a066b4090d7fb36d492f5450b174332d9e25 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 3 Nov 2023 00:09:08 +0100 Subject: [PATCH] Move interpreter to use CXX so that we can simplify some bits --- Makefile.common | 17 +--- arm/video_blend.S | 193 ---------------------------------------------- cpu.c => cpu.cc | 15 ++-- cpu.h | 2 + cpu_threaded.c | 2 - savestate.h | 7 +- 6 files changed, 17 insertions(+), 219 deletions(-) delete mode 100644 arm/video_blend.S rename cpu.c => cpu.cc (99%) diff --git a/Makefile.common b/Makefile.common index f580c5d..4951422 100644 --- a/Makefile.common +++ b/Makefile.common @@ -4,10 +4,10 @@ INCFLAGS := -I$(CORE_DIR)/libretro -I$(LIBRETRO_COMM_DIR)/include -I$(CORE_DI SOURCES_ASM := $(CORE_DIR)/bios_data.S -SOURCES_CC := $(CORE_DIR)/video.cc +SOURCES_CC := $(CORE_DIR)/video.cc \ + $(CORE_DIR)/cpu.cc SOURCES_C := $(CORE_DIR)/main.c \ - $(CORE_DIR)/cpu.c \ $(CORE_DIR)/gba_memory.c \ $(CORE_DIR)/savestate.c \ $(CORE_DIR)/input.c \ @@ -28,10 +28,7 @@ SOURCES_C := $(CORE_DIR)/main.c \ $(LIBRETRO_COMM_DIR)/vfs/vfs_implementation.c ifeq ($(HAVE_DYNAREC), 1) -SOURCES_C += $(CORE_DIR)/cpu_threaded.c -endif - -ifeq ($(HAVE_DYNAREC), 1) + SOURCES_C += $(CORE_DIR)/cpu_threaded.c ifeq ($(CPU_ARCH), x86_32) SOURCES_ASM += $(CORE_DIR)/x86/x86_stub.S else ifeq ($(CPU_ARCH), arm) @@ -43,12 +40,4 @@ ifeq ($(HAVE_DYNAREC), 1) endif endif -ifeq ($(CPU_ARCH), arm) - -ifeq ($(CPU_ARCH_ARM_BLENDING_OPTS),1) -CFLAGS += -DARM_ARCH_BLENDING_OPTS -SOURCES_ASM += $(CORE_DIR)/arm/video_blend.S -endif - -endif diff --git a/arm/video_blend.S b/arm/video_blend.S deleted file mode 100644 index 9caaa33..0000000 --- a/arm/video_blend.S +++ /dev/null @@ -1,193 +0,0 @@ -@ Input: -@ r0 = screen_src_ptr -@ r1 = screen_dest_ptr -@ r2 = start -@ r3 = end - -6: -#ifdef __MACH__ - .word _io_registers - .word _palette_ram_converted -#else - .word io_registers - .word palette_ram_converted -#endif - .word 0x04000200 @ combine test mask - .word 0x07E0F81F @ clamp mask - .word 0x000003FE @ palette index mask - .word 0x08010020 @ saturation mask - -.align 2 -.globl expand_blend -.globl _expand_blend -expand_blend: -_expand_blend: - stmdb sp!, { r4, r5, r6, r9, r10, r11, r14 } - - add r0, r0, r2, lsl #2 @ screen_src_ptr += start - add r1, r1, r2, lsl #1 @ screen_dest_ptr += start - sub r2, r3, r2 @ r2 = end - start - ldr r3, 6b @ r3 = io_registers - ldrh r3, [r3, #0x52] @ r3 = bldalpha - mov r4, r3, lsr #8 @ r4 = bldalpha >> 8 - and r3, r3, #0x1F @ r3 = blend_a - and r4, r4, #0x1F @ r4 = blend_b - cmp r3, #16 @ if(blend_a > 16) - movgt r3, #16 @ blend_a = 16 - cmp r4, #16 @ if(blend_b > 16) - movgt r4, #16 @ blend_b = 16 - - ldr r14, 6b + 4 @ r14 = palette_ram_converted - ldr r12, 6b + 8 @ r12 = 0x04000200 - ldr r11, 6b + 12 @ r11 = 0x07E0F81F - ldr r10, 6b + 16 @ r10 = 0x000003FE - - add r5, r3, r4 @ r5 = blend_a + blend_b - cmp r5, #16 @ if((blend_a + blend_b) > 16) - bgt 3f @ goto loop w/saturation - - - @ loop w/o saturation -1: - ldr r5, [r0], #4 @ r5 = pixel_pair, screen_src_ptr++ - and r6, r5, r12 @ r6 = r5 & 0x04000200 - cmp r6, r12 @ if(r6 != 0x4000200) - bne 2f @ goto no_blend - - and r6, r10, r5, lsl #1 @ r6 = (pixel_pair & 0x1FF) << 1 - ldrh r6, [r14, r6] @ r6 = pixel_top - orr r6, r6, r6, lsl #16 @ r6 = pixel_top | (pixel_top << 16) - and r6, r6, r11 @ r6 = pixel_top_dilated - - and r5, r10, r5, lsr #15 @ r5 = ((pixel_pair >> 16) & 0x1FF) << 1 - ldrh r5, [r14, r5] @ r5 = pixel_bottom - orr r5, r5, r5, lsl #16 @ r5 = pixel_bottom | (pixel_bottom << 16) - and r5, r5, r11 @ r5 = pixel_bottom_dilated - - mul r5, r4, r5 @ r5 = pixel_bottom * blend_b = bottom_mul - mla r5, r3, r6, r5 @ r5 = (pixel_top * blend_a) + bottom_mul - - and r5, r11, r5, lsr #4 @ r5 = (color_dilated >> 4) & 0x07E0F81F - orr r5, r5, r5, lsr #16 @ r5 = color_dilated | (color_dilated >> 16) - - strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++ - subs r2, r2, #1 @ counter-- - bne 1b @ go again - - ldmia sp!, { r4, r5, r6, r9, r10, r11, pc } - -2: - and r5, r10, r5, lsl #1 @ r5 = (pixel_pair & 0x1FF) << 1 - ldrh r5, [r14, r5] @ r5 = pixel_top - strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++ - - subs r2, r2, #1 @ counter-- - bne 1b @ go again - - ldmia sp!, { r4, r5, r6, r9, r10, r11, pc } - -@ loop w/saturation - -3: - ldr r9, 6b + 20 @ r9 = 0x08010020 - -4: - ldr r5, [r0], #4 @ r5 = pixel_pair, screen_src_ptr++ - and r6, r5, r12 @ r6 = r5 & 0x04000200 - cmp r6, r12 @ if(r6 != 0x4000200) - bne 5f @ goto no_blend - - and r6, r10, r5, lsl #1 @ r6 = (pixel_pair & 0x1FF) << 1 - ldrh r6, [r14, r6] @ r6 = pixel_top - orr r6, r6, r6, lsl #16 @ r6 = pixel_top | (pixel_top << 16) - and r6, r6, r11 @ r6 = pixel_top_dilated - - and r5, r10, r5, lsr #15 @ r5 = ((pixel_pair >> 16) & 0x1FF) << 1 - ldrh r5, [r14, r5] @ r5 = pixel_bottom - orr r5, r5, r5, lsl #16 @ r5 = pixel_bottom | (pixel_bottom << 16) - and r5, r5, r11 @ r5 = pixel_bottom_dilated - - mul r5, r4, r5 @ r5 = pixel_bottom * blend_b = bottom_mul - mla r5, r3, r6, r5 @ r5 = (pixel_top * blend_a) + bottom_mul - - and r6, r9, r5, lsr #4 @ r6 = saturation bits - orr r6, r6, r6, lsr #1 @ propogate saturation down msb - orr r6, r6, r6, lsr #2 @ propogate down next two bits - orr r6, r6, r6, lsr #3 @ propogate down next three bits - orr r5, r6, r5, lsr #4 @ mask over result w/saturation - - and r5, r11, r5 @ r5 = (color_dilated >> 4) & 0x07E0F81F - orr r5, r5, r5, lsr #16 @ r5 = color_dilated | (color_dilated >> 16) - strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++ - - subs r2, r2, #1 @ counter-- - bne 4b @ go again - - ldmia sp!, { r4, r5, r6, r9, r10, r11, pc } - -5: - and r5, r10, r5, lsl #1 @ r5 = (pixel_pair & 0x1FF) << 1 - ldrh r5, [r14, r5] @ r5 = pixel_top - strh r5, [r1], #2 @ *screen_dest_ptr = r5, screen_dest_ptr++ - - subs r2, r2, #1 @ counter-- - bne 4b @ go again - - ldmia sp!, { r4, r5, r6, r9, r10, r11, pc } - - - -@ The following function isn't complete (only works on run multiples of 8), -@ but unfortunately I don't see much potential for actually being able to -@ use it.. - -#define expand_pixel_pair(reg, temp) ;\ - and temp, r3, reg, lsr #15 ;\ - ldrh temp, [r2, temp] ;\ - ;\ - and reg, r3, reg, lsl #1 ;\ - ldrh reg, [r2, reg] ;\ - ;\ - orr reg, reg, temp, lsl #16 ;\ - - -@ Input: -@ r0 = screen_ptr -@ r1 = start -@ r2 = end - -1: -#ifdef __MACH__ - .word _palette_ram_converted -#else - .word palette_ram_converted -#endif - .word 0x3FE - -.align 2 -.globl expand_normal -.globl _expand_normal -expand_normal: -_expand_normal: - stmdb sp!, { r4, r5, r6, r7, r14 } - - add r0, r0, r1, lsl #1 @ screen_ptr += start - sub r1, r2, r1 @ r1 = end - start - ldr r2, 1b @ r2 = palette_ram_converted - ldr r3, 1b + 4 @ r3 = 0x3FE - -2: - ldmia r0, { r4, r5, r6, r7 } - - expand_pixel_pair(r4, r14) - expand_pixel_pair(r5, r14) - expand_pixel_pair(r6, r14) - expand_pixel_pair(r7, r14) - - stmia r0!, { r4, r5, r6, r7 } - - subs r1, r1, #8 - bne 2b - - ldmia sp!, { r4, r5, r6, r7, pc } - diff --git a/cpu.c b/cpu.cc similarity index 99% rename from cpu.c rename to cpu.cc index 02e2593..eb66478 100644 --- a/cpu.c +++ b/cpu.cc @@ -21,9 +21,10 @@ // - stm reglist writeback when base is in the list needs adjustment // - block memory needs psr swapping and user mode reg swapping -#include "common.h" - -#include "cpu_instrument.h" +extern "C" { + #include "common.h" + #include "cpu_instrument.h" +} const u8 bit_count[256] = { @@ -1411,7 +1412,6 @@ u32 instruction_count = 0; void set_cpu_mode(cpu_mode_type new_mode) { - u32 i; cpu_mode_type cpu_mode = reg[CPU_MODE]; if(cpu_mode == new_mode) @@ -1419,7 +1419,7 @@ void set_cpu_mode(cpu_mode_type new_mode) if(new_mode == MODE_FIQ) { - for(i = 8; i < 15; i++) + for (u32 i = 8; i < 15; i++) REG_MODE(cpu_mode)[i - 8] = reg[i]; } else @@ -1430,7 +1430,7 @@ void set_cpu_mode(cpu_mode_type new_mode) if(cpu_mode == MODE_FIQ) { - for(i = 8; i < 15; i++) + for (u32 i = 8; i < 15; i++) reg[i] = REG_MODE(new_mode)[i - 8]; } else @@ -3667,10 +3667,9 @@ thumb_loop: void init_cpu(void) { // Initialize CPU registers - int i; memset(reg, 0, REG_USERDEF * sizeof(u32)); memset(reg_mode, 0, sizeof(reg_mode)); - for (i = 0; i < sizeof(spsr)/sizeof(spsr[0]); i++) + for (u32 i = 0; i < sizeof(spsr)/sizeof(spsr[0]); i++) spsr[i] = 0x00000010; reg[CPU_HALT_STATE] = CPU_ACTIVE; diff --git a/cpu.h b/cpu.h index ce3ef0f..dd6cb89 100644 --- a/cpu.h +++ b/cpu.h @@ -182,4 +182,6 @@ extern const u32 spsr_masks[4]; void init_cpu(void); void move_reg(); +extern const u8 bit_count[256]; + #endif diff --git a/cpu_threaded.c b/cpu_threaded.c index 20b4b80..99a0610 100644 --- a/cpu_threaded.c +++ b/cpu_threaded.c @@ -91,8 +91,6 @@ typedef struct u8 *branch_source; } block_exit_type; -extern u8 bit_count[256]; - // Div (6) and DivArm (7) #define is_div_swi(swinum) (((swinum) & 0xFE) == 0x06) diff --git a/savestate.h b/savestate.h index 8faf506..4eaa690 100644 --- a/savestate.h +++ b/savestate.h @@ -50,13 +50,16 @@ #define bson_write_int32array(p, key, arr, cnt) \ { \ + u32 _n; \ u32 *arrptr = (u32*)(arr); \ - int _n; \ *p++ = 0x4; \ bson_write_cstring(p, key); \ bson_write_u32(p, 5 + (cnt) * 8); \ for (_n = 0; _n < (cnt); _n++) { \ - char ak[3] = {'0'+(_n/10), '0'+(_n%10), 0}; \ + char ak[3] = { \ + (char)('0' + (_n/10)), \ + (char)('0' + (_n%10)), \ + 0 }; \ bson_write_int32(p, ak, arrptr[_n]); \ } \ *p++ = 0; \