From 663767b0786bf4202c748a6c0a70f2645ea72b8b Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Tue, 7 Sep 2021 00:28:31 +0200 Subject: [PATCH] Checking in Normatt's BIOS source code for reference and development. --- bios/Makefile | 140 +++ bios/source/core.s | 176 ++++ bios/source/divide.s | 76 ++ bios/source/helper.arm.c | 33 + bios/source/logo.c | 74 ++ bios/source/logo_data.c | 187 ++++ bios/source/logo_data.h | 32 + bios/source/reset.s | 62 ++ bios/source/softwareinterrupts.c | 1528 ++++++++++++++++++++++++++++++ 9 files changed, 2308 insertions(+) create mode 100644 bios/Makefile create mode 100644 bios/source/core.s create mode 100644 bios/source/divide.s create mode 100644 bios/source/helper.arm.c create mode 100644 bios/source/logo.c create mode 100644 bios/source/logo_data.c create mode 100644 bios/source/logo_data.h create mode 100644 bios/source/reset.s create mode 100644 bios/source/softwareinterrupts.c diff --git a/bios/Makefile b/bios/Makefile new file mode 100644 index 0000000..4b1b8f1 --- /dev/null +++ b/bios/Makefile @@ -0,0 +1,140 @@ +#--------------------------------------------------------------------------------- +.SUFFIXES: +#--------------------------------------------------------------------------------- + +ifeq ($(strip $(DEVKITARM)),) +$(error "Please set DEVKITARM in your environment. export DEVKITARM=devkitARM") +endif + +include $(DEVKITARM)/gba_rules + +#--------------------------------------------------------------------------------- +# TARGET is the name of the output +# BUILD is the directory where object files & intermediate files will be placed +# SOURCES is a list of directories containing source code +# DATA is a list of directories containing data files +# INCLUDES is a list of directories containing header files +# SPECS is the directory containing the important build and link files +#--------------------------------------------------------------------------------- +export TARGET := $(shell basename $(CURDIR)) +BUILD := build +SOURCES := source +DATA := data +INCLUDES := include + + +#--------------------------------------------------------------------------------- +# options for code generation +#--------------------------------------------------------------------------------- +ARCH := -mthumb-interwork + +CFLAGS := -g -Wall -O2\ + -mcpu=arm7tdmi -mtune=arm7tdmi -fomit-frame-pointer\ + -ffast-math -std=c99\ + $(ARCH) + +CFLAGS += $(INCLUDE) + +CXXFLAGS := $(CFLAGS) -fno-rtti -fno-exceptions + +ASFLAGS := -g $(ARCH) +LDFLAGS = -nostartfiles -Wl,-init=0x0 -g $(ARCH) -Wl,-Map,$(TARGET).map + +#--------------------------------------------------------------------------------- +# Setup some defines +#--------------------------------------------------------------------------------- +# libtonc should be installed from devkitpro as well +LIBTONC := $(DEVKITPRO)/libtonc + +#--------------------------------------------------------------------------------- +# any extra libraries we wish to link with the project +#--------------------------------------------------------------------------------- +LIBS := -ltonc + +#--------------------------------------------------------------------------------- +# list of directories containing libraries, this must be the top level containing +# include and lib +#--------------------------------------------------------------------------------- +LIBDIRS := $(LIBTONC) + +#--------------------------------------------------------------------------------- +# no real need to edit anything past this point unless you need to add additional +# rules for different file extensions +#--------------------------------------------------------------------------------- +ifneq ($(BUILD),$(notdir $(CURDIR))) +#--------------------------------------------------------------------------------- + +export OUTPUT := $(CURDIR)/$(TARGET) + +export VPATH := $(foreach dir,$(SOURCES),$(CURDIR)/$(dir)) \ + $(foreach dir,$(DATA),$(CURDIR)/$(dir)) + +export DEPSDIR := $(CURDIR)/$(BUILD) + +CFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.c))) +CPPFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.cpp))) +SFILES := $(foreach dir,$(SOURCES),$(notdir $(wildcard $(dir)/*.s))) +BINFILES := $(foreach dir,$(DATA),$(notdir $(wildcard $(dir)/*.*))) + +#--------------------------------------------------------------------------------- +# use CXX for linking C++ projects, CC for standard C +#--------------------------------------------------------------------------------- +ifeq ($(strip $(CPPFILES)),) +#--------------------------------------------------------------------------------- + export LD := $(CC) +#--------------------------------------------------------------------------------- +else +#--------------------------------------------------------------------------------- + export LD := $(CXX) +#--------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------- + +export OFILES := $(addsuffix .o,$(BINFILES)) \ + $(CPPFILES:.cpp=.o) $(CFILES:.c=.o) $(SFILES:.s=.o) + +export INCLUDE := $(foreach dir,$(INCLUDES),-I$(CURDIR)/$(dir)) \ + $(foreach dir,$(LIBDIRS),-I$(dir)/include) \ + -I$(CURDIR)/$(BUILD) + +export LIBPATHS := $(foreach dir,$(LIBDIRS),-L$(dir)/lib) + +.PHONY: $(BUILD) clean all + +#--------------------------------------------------------------------------------- +all: $(BUILD) + +$(BUILD): + @[ -d $@ ] || mkdir -p $@ + @make --no-print-directory -C $(BUILD) -f $(CURDIR)/Makefile + +#--------------------------------------------------------------------------------- +clean: + @echo clean ... + @rm -fr $(BUILD) $(TARGET).dldi $(TARGET).elf + + +#--------------------------------------------------------------------------------- +else + +DEPENDS := $(OFILES:.o=.d) + +#--------------------------------------------------------------------------------- +# main targets +#--------------------------------------------------------------------------------- +../gba_bios.bin : $(OUTPUT).elf +$(OUTPUT).elf : $(OFILES) + + +#--------------------------------------------------------------------------------- +%.bin: %.elf + @$(OBJCOPY) --pad-to 0x4000 -O binary $< $@ + @echo built ... $(notdir $@) + + +-include $(DEPENDS) + + +#--------------------------------------------------------------------------------------- +endif +#--------------------------------------------------------------------------------------- diff --git a/bios/source/core.s b/bios/source/core.s new file mode 100644 index 0000000..8e20ae3 --- /dev/null +++ b/bios/source/core.s @@ -0,0 +1,176 @@ +@--------------------------------------------------------------------------------- + .section ".init" +@--------------------------------------------------------------------------------- + .global _start + .type _start STT_FUNC + .align 4 + .arm + +@--------------------------------------------------------------------------------- +_start: +@--------------------------------------------------------------------------------- + b reset_vector @ 0x00 Reset + b reserved_vector @ 0x04 Undefined + b swi_vector @ 0x08 SWI + b reserved_vector @ 0x0C Abort Prefetch + b reserved_vector @ 0x10 Abort Data + b reserved_vector @ 0x14 Reserved + b irq_vector @ 0x18 IRQ + b irq_vector @ 0x1C FIQ + +@--------------------------------------------------------------------------------- +irq_vector: +@--------------------------------------------------------------------------------- + @ Save these registers, IRQ functions will be allowed to modify them w/o + @ saving. + stmdb sp!, { r0 - r3, r12, lr } + + @ Pointer to IRQ handler is at 0x03FFFFFC (mirrored WRAM) + mov r0, #0x04000000 + + @ Store return address and branch to handler + mov lr, pc + ldr pc, [ r0, #-4 ] + + @ Return from IRQ + ldmia sp!, { r0 - r3, r12, lr } + subs pc, lr, #4 + +@--------------------------------------------------------------------------------- +reset_vector: @This isn't required if not booting from bios +@--------------------------------------------------------------------------------- + mov r0, #0xDF + msr cpsr_cf, r0 + + @Disable Interrupts IME=0 + mov r3, #0x04000000 + strb r3, [r3,#0x208] + + @Setup stacks + bl init + + mov r2, #1 + strb r2, [r3,#0x208] + + ldr r0, =DrawLogo + ldr lr, =swi_SoftReset + bx r0 + +@--------------------------------------------------------------------------------- +reserved_vector: @Lets just infinite loop for now +@--------------------------------------------------------------------------------- + b reserved_vector + +@ SWI calling convention: +@ Parameters are passed in via r0 - r3 +@ Called SWI can modify r0 - r3 (and return things here), r12, and r14. +@ They can't modify anything else. +@--------------------------------------------------------------------------------- +swi_vector: +@--------------------------------------------------------------------------------- + @ Save these as temporaries + stmdb sp!, { r11, r12, lr } + + @ Load comment from SWI instruction, which indicates which SWI + @ to use. + ldrb r12, [lr,#-2] + adr r11, swi_branch_table + ldr r12, [r11,r12,lsl#2] + + @ get SPSR and enter system mode, interrupts on + MRS R11, SPSR + @ This must be stacked and not just saved, because otherwise SWI won't + @ be reentrant, which can happen if you're waiting for interrupts and the + @ interrupt handler triggers the SWI. + stmfd sp!, {r11} + + @ Set up new CPSR value + and r11, r11, #0x80 + orr r11, r11, #0x1f + msr cpsr_cf, r11 + + @ We have to now save system-mode lr register as well + stmfd sp!, {r2, r3, lr} + + @ Set return address + adr lr, swi_complete + @ Branch to SWI handler + bx r12 + +swi_complete: + @ Restore system mode lr + ldmfd sp!, {r2, r3, lr} + + @ Go back to supervisor mode to get back to that stack + mov r12, #0xD3 + msr cpsr_cf, r12 + + @ SPSR has to be restored because the transition to system mode broke it + ldmfd sp!, {r11} + msr spsr_cf, r11 + + @ Restore stuff we saved + ldmfd sp!, {r11,r12,lr} + + @ Return from exception handler + movs pc, lr + +@--------------------------------------------------------------------------------- +swi_branch_table: +@--------------------------------------------------------------------------------- + .word swi_SoftReset @ 0x00_SoftReset + .word swi_RegisterRamReset @ 0x01_RegisterRAMReset + .word swi_Halt @ 0x02_Halt + .word swi_Stop @ 0x03_Stop + .word swi_IntrWait @ 0x04_IntrWait + .word swi_VBlankIntrWait @ 0x05_VBlankIntrWait + .word swi_Div @ 0x06_Div + .word swi_DivARM @ 0x07_DivARM + .word swi_Sqrt @ 0x08_Sqrt + .word swi_ArcTan @ 0x09_ArcTan + .word swi_ArcTan2 @ 0x0A_ArcTan2 + .word swi_CpuSet @ 0x0B_CPUSet + .word swi_CpuFastSet @ 0x0C_CPUFastSet + .word swi_GetBiosChecksum @ 0x0D_GetBiosChecksum + .word swi_BgAffineSet @ 0x0E_BgAffineSet + .word swi_ObjAffineSet @ 0x0F_ObjAffineSet + .word swi_BitUnPack @ 0x10_BitUnPack + .word swi_LZ77UnCompWram @ 0x11_LZ77UnCompWram + .word swi_LZ77UnCompVram @ 0x12_LZ77UnCompVram + .word swi_HuffUnComp @ 0x13_HuffUnComp + .word swi_RLUnCompWram @ 0x14_RLUnCompWram + .word swi_RLUnCompVram @ 0x15_RLUnCompVram + .word swi_Diff8bitUnFilterWram @ 0x16_Diff8bitUnFilterWram + .word swi_Diff8bitUnFilterVram @ 0x17_Diff8bitUnFilterVram + .word swi_Diff16bitUnFilter @ 0x18_Diff16bitUnFilter + .word swi_Invalid @ 0x19_SoundBiasChange + .word swi_Invalid @ 0x1A_SoundDriverInit + .word swi_Invalid @ 0x1B_SoundDriverMode + .word swi_Invalid @ 0x1C_SoundDriverMain +@ .word swi_SoundDriverMain @ 0x1C_SoundDriverMain + .word swi_Invalid @ 0x1D_SoundDriverVSync + .word swi_Invalid @ 0x1E_SoundChannelClear + .word swi_MidiKey2Freq @ 0x1F_MidiKey2Freq + .word swi_Invalid @ 0x20_MusicPlayerOpen + .word swi_Invalid @ 0x21_MusicPlayerStart + .word swi_Invalid @ 0x22_MusicPlayerStop + .word swi_MusicPlayerContinue @ 0x23_MusicPlayerContinue + .word swi_MusicPlayerFadeOut @ 0x24_MusicPlayerFadeOut + .word swi_Invalid @ 0x25_MultiBoot + .word swi_Invalid @ 0x26_HardReset + .word swi_CustomHalt @ 0x27_CustomHalt + .word swi_Invalid @ 0x28_SoundDriverVSyncOff + .word swi_Invalid @ 0x29_SoundDriverVSyncOn + .word swi_SoundGetJumpList @ 0x2A_SoundGetJumpList + +@--------------------------------------------------------------------------------- +.global swi_Invalid +.type swi_Invalid STT_FUNC +swi_Invalid: +@--------------------------------------------------------------------------------- + @ Infinite loop for now + @b swi_Invalid + + @ Do nothing for release builds + bx lr +@--------------------------------------------------------------------------------- \ No newline at end of file diff --git a/bios/source/divide.s b/bios/source/divide.s new file mode 100644 index 0000000..5950d84 --- /dev/null +++ b/bios/source/divide.s @@ -0,0 +1,76 @@ + +@--------------------------------------------------------------------------------- +@ Setup some nicer names +@--------------------------------------------------------------------------------- +numerator .req r0 +denominator .req r1 +accumulator .req r2 +current_bit .req r3 + +numerator_signed .req r12 +denominator_signed .req r3 + +sign_flip .req r12 + +result .req r0 +remainder .req r1 +result_abs .req r3 + +temp .req r3 + +@--------------------------------------------------------------------------------- +.global swi_DivARM +.type swi_DivARM STT_FUNC +swi_DivARM: +@--------------------------------------------------------------------------------- + mov temp, numerator + mov numerator, denominator + mov denominator, temp + b swi_Div + +@--------------------------------------------------------------------------------- +.global swi_Div +.type swi_Div STT_FUNC +swi_Div: +@ See http://www.tofla.iconbar.com/tofla/arm/arm02/index.htm for more information +@--------------------------------------------------------------------------------- + @ Set if numerator is signed, and abs numerator + ands numerator_signed, numerator, #0x80000000 + rsbmi numerator, numerator, #0 + + // Same with denominator + ands denominator_signed, denominator, #0x80000000 + rsbmi denominator, denominator, #0 + + // Gets set if sign(numerator) != sign(denominator) + eor sign_flip, numerator_signed, denominator_signed + + mov accumulator, #0 + mov current_bit, #1 + + // This moves out the current bit to the MSB of the denominator, + // and aligns the denominator up to the same bit-length as the + // numerator + 0: + cmp denominator, numerator + movls denominator, denominator, lsl #1 + movls current_bit, current_bit, lsl #1 + bls 0b + + // Basically the grade-school algorithm, for unsigned integers in binary + 1: + cmp numerator, denominator + subcs numerator, numerator, denominator + orrcs accumulator, accumulator, current_bit + movs current_bit, current_bit, lsr #1 + movcc denominator, denominator, lsr #1 + bcc 1b + + mov remainder, numerator + mov result_abs, accumulator + mov result, accumulator + + tst sign_flip, #0x80000000 + rsbmi result, result, #0 + + bx lr diff --git a/bios/source/helper.arm.c b/bios/source/helper.arm.c new file mode 100644 index 0000000..8edc170 --- /dev/null +++ b/bios/source/helper.arm.c @@ -0,0 +1,33 @@ +/* + Custom GBA BIOS replacement + Copyright (c) 2002-2006 VBA Development Team + Copyright (c) 2006-2013 VBA-M Development Team + Copyright (c) 2013 Normmatt + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + +#include + +u32 umul3232H32(u32 val, u32 val2) +{ + register u32 a __asm ("r2"); + register u32 b __asm ("r3"); + __asm ("umull %0, %1, %2, %3" : + "=r"(a), "=r"(b) : + "r"(val), "r"(val2) + ); + return b; +} \ No newline at end of file diff --git a/bios/source/logo.c b/bios/source/logo.c new file mode 100644 index 0000000..fb142be --- /dev/null +++ b/bios/source/logo.c @@ -0,0 +1,74 @@ +/* + Custom GBA BIOS replacement + Copyright (c) 2002-2006 VBA Development Team + Copyright (c) 2006-2013 VBA-M Development Team + Copyright (c) 2013 Normmatt + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + +#include + +#include "logo_data.h" + +static inline void CPUWriteHalfWord(u32 adr, u16 val) +{ + *(u16*)adr = val; +} +static inline u16 CPUReadHalfWord(u32 adr) +{ + return *(u16*)adr; +} + +void memcpy_(u32 source, u32 dest, int count) +{ + // copy + while(count) { + CPUWriteHalfWord(dest, (source>0x0EFFFFFF ? 0x1CAD : CPUReadHalfWord(source))); + dest += 2; + source += 2; + count--; + } +} + +extern void swi_LZ77UnCompVram_(u32 source, u32 dest, int checkBios); +extern void swi_RegisterRamReset(u32 flags); + +/*----------------------------------------------------------------- +DrawLogo + Draws a custom logo and pauses for a second or two. +-----------------------------------------------------------------*/ +void DrawLogo() { + + // Load palette + memcpy_((u32)logo_dataPal, (u32)pal_bg_mem, logo_dataPalLen); + // Load tiles into CBB 0 + swi_LZ77UnCompVram_((u32)logo_dataTiles, (u32)&tile_mem[0][0], 0); + // Load map into SBB 30 + swi_LZ77UnCompVram_((u32)logo_dataMap, (u32)&se_mem[30][0], 0); + + // set up BG0 for a 4bpp 64x32t map, using + // using charblock 0 and screenblock 31 + REG_BG0CNT= BG_CBB(0) | BG_SBB(30) | BG_4BPP | BG_REG_32x32; + REG_DISPCNT= DCNT_MODE0 | DCNT_BG0; + + int countdown = 60*2; + while(countdown--) + { + vid_vsync(); + } + + swi_RegisterRamReset(0xFF); +} diff --git a/bios/source/logo_data.c b/bios/source/logo_data.c new file mode 100644 index 0000000..d1b0145 --- /dev/null +++ b/bios/source/logo_data.c @@ -0,0 +1,187 @@ + +//{{BLOCK(logo_data) + +//====================================================================== +// +// logo_data, 256x256@4, +// + palette 16 entries, not compressed +// + 120 tiles (t|f|p reduced) lz77 compressed +// + regular map (in SBBs), lz77 compressed, 32x32 +// Total size: 32 + 1720 + 496 = 2248 +// +// Time-stamp: 2013-08-26, 19:29:30 +// Exported by Cearn's GBA Image Transmogrifier, v0.8.6 +// ( http://www.coranac.com/projects/#grit ) +// +//====================================================================== + +const unsigned short logo_dataTiles[860] __attribute__((aligned(4)))= +{ + 0x0010,0x000F,0x003C,0xF000,0xF001,0xF001,0x1001,0x410D, + 0x3376,0x8888,0x15F0,0x0770,0x8888,0x1FF0,0x1FA0,0x7810, + 0x0247,0x15F0,0x9200,0x0000,0x2000,0x00DA,0xA300,0x00FF, + 0x2020,0xFFEA,0x1F00,0x0095,0xB940,0x00ED,0xB910,0xFFFE, + 0xFEB7,0xFFFF,0xFD14,0xFFFF,0x0110,0x00EF,0xAB06,0x06FF, + 0xADFF,0xBA14,0x30ED,0x9010,0xCD05,0xAB0C,0x5699,0x4027, + 0xF058,0xFF1D,0x08FF,0x5555,0x5555,0x7750,0xEFFF,0xC0BD, + 0x4700,0x23B0,0x9995,0xEDCA,0x0000,0x1028,0x10A5,0x179B, + + 0xA000,0x5ACE,0x0901,0xFF00,0x4ACF,0x4B00,0x29CE,0x7E00, + 0xAE56,0x7F10,0x00EB,0x9387,0x0400,0xBFD0,0xA703,0xD000, + 0x005B,0xCFD4,0x0005,0x107B,0xF008,0xA801,0x0E10,0x0030, + 0x7013,0x1700,0x00B0,0x85A2,0x4300,0xFFD9,0x60FF,0xD800, + 0x00B1,0x80DB,0x0A00,0xFBAE,0xFFFF,0xFE3B,0x00FF,0x04DF, + 0xFFFF,0x00AF,0xBDFF,0x0401,0xFF00,0x003A,0x8D00,0x4400, + 0x1862,0x48F0,0x01F0,0x0000,0x00D0,0xF012,0x80C2,0xA003, + 0xDD21,0xCDDD,0x0105,0x1A42,0x01AE,0x8E46,0x4A01,0xF0BF, + + 0xB03E,0xD0CB,0xDB02,0xDD01,0x0BDD,0xFFFC,0x0CFF,0x0350, + 0xF0D6,0x5022,0x5009,0x6301,0xE1F0,0x501F,0x5509,0x4575, + 0x9B01,0x7DF0,0x0680,0x0140,0xA0A3,0x3FF0,0x60C0,0x013F, + 0xDF93,0x0004,0xA200,0x09FD,0x00FF,0xD910,0x2702,0xFB40, + 0x2500,0xC662,0x5741,0x3060,0xBFFF,0x0203,0x1A03,0x02A4, + 0x9E07,0x0B02,0x04CF,0x3F02,0xEA1A,0xFF0B,0x6CFF,0x01B3, + 0x705C,0x1000,0x59F0,0x30AA,0x010B,0xFF70,0x00D4,0xE91D, + 0x0F02,0xC8FA,0xB601,0x6300,0xFC40,0xDB00,0x00FE,0x5400, + + 0x0260,0x609B,0x8D02,0x305C,0xDF5F,0xB107,0x8F00,0x2002, + 0x01D3,0x00B6,0x9FFF,0x0330,0xC1F5,0xE08B,0x4203,0x02CB, + 0xDFD5,0xDA02,0x00BF,0x2A9A,0xC4BF,0x9601,0x0190,0x209A, + 0x9E11,0xAB1A,0x9A00,0x007D,0xBF9E,0x2903,0x00EF,0x000C, + 0x006E,0xFE00,0x01BF,0xFB00,0x09EF,0x003C,0x00D7,0x917B, + 0xF08B,0x9103,0x109F,0x0021,0x4000,0xDBA7,0xB700,0xFFFD, + 0x9008,0xFFFD,0x009E,0x1B64,0xFFFA,0xFF20,0x5009,0x22F9, + 0x0122,0xFE00,0xDE10,0x15AB,0x5E03,0x129C,0xFFD9,0xEF4C, + + 0x7A01,0x00FF,0x2001,0x3E70,0x2222,0x0035,0x02B0,0x00EF, + 0x2A03,0x0700,0x008D,0x600B,0x10BF,0x500F,0x013F,0x4410, + 0x1BFF,0xB80A,0xFFFE,0x039C,0xCDA1,0xA403,0x02BE,0xFB99, + 0x2CFF,0xA200,0x5951,0x0013,0x0000,0xBF21,0x003A,0xFFFA, + 0xAF2D,0x0003,0x0A76,0x7A00,0x7E10,0x011D,0x8553,0x7F60, + 0xFF51,0x3AFF,0x9A01,0x11AA,0x580B,0x03BB,0x3466,0x6900, + 0x9C40,0x0000,0x15BA,0x8ABB,0x0301,0x3AFA,0x7F13,0x01EC, + 0x58C8,0x04B7,0x4007,0x0B04,0xB931,0x4453,0x0B00,0xCB92, + + 0x50BC,0x3803,0x04B5,0x031F,0x003F,0xBFFF,0xDA99,0x4BFF, + 0x8000,0xFD40,0xD951,0x004B,0x2220,0x08DF,0xD06A,0xF500, + 0x0300,0x009F,0xAF07,0x0B00,0x40BF,0x70D0,0x02BF,0xA810, + 0x6FFF,0xC0B6,0x1B00,0x3D01,0xBDFF,0xFFDB,0x37DF,0x8448, + 0xBF41,0x2000,0x0C02,0xABBB,0x9505,0x4B04,0x119D,0xA701, + 0x0497,0xCB82,0x8714,0x01A0,0xAE3F,0xAA32,0x0345,0xBA40, + 0x30CC,0xC8AB,0x9B04,0x9F44,0x9AFF,0xFEA9,0xFF0D,0x1002, + 0x64FA,0x141F,0x4B17,0x6602,0x0280,0xA31A,0x05CF,0xFB40, + + 0x07DF,0x0190,0x0960,0x74D8,0x213F,0x0112,0x10A6,0xCEEC, + 0x10AB,0xFF7D,0x29EF,0x7092,0x01FF,0x50E3,0x527F,0x989D, + 0x9999,0xB359,0xFF04,0x10AF,0x0103,0xAFBB,0x14D7,0x2320, + 0x0163,0xFFF0,0xBBB2,0xFFFB,0x05F3,0xF91B,0x0310,0x1350, + 0x5E22,0x8F12,0x9312,0xBBDF,0x5301,0xFF3C,0x20DF,0x5003, + 0x2313,0x137F,0xA063,0x0FFF,0xBBA0,0xFFDB,0xDE01,0x0320, + 0x1350,0x7F13,0x13DF,0x2063,0xEF03,0x3F20,0xAA42,0x1340, + 0x7F13,0x3A43,0x049C,0x00EE,0x4007,0xA303,0x044D,0x00CC, + + 0xACE9,0x2705,0x05B4,0xA12B,0x2F05,0x5722,0xFFFE,0x002A, + 0x3040,0x0803,0x7A03,0x030A,0x2B7E,0x03AB,0x6D82,0x8603, + 0x039D,0x9E8A,0x2731,0x0320,0x53DD,0x3367,0x506F,0xCF01, + 0x7F13,0x8713,0x1200,0x7F3B,0x53E9,0x2367,0x136F,0x2077, + 0x03EB,0x03F3,0x331F,0x68F7,0xF0BF,0x8003,0xA107,0x6305, + 0xFC30,0x44EF,0x0508,0x4B83,0xB200,0xCF01,0xFD50,0xDF69, + 0x5A00,0x7706,0x06B3,0x707B,0x03FF,0x564B,0x034C,0x7C4F, + 0x5303,0x109C,0x0603,0x9C7F,0x06A8,0x7C82,0x8606,0x064C, + + 0x1C8A,0xFFFC,0xEF17,0xFD08,0x0300,0x00FE,0x0607,0x2096, + 0xEF07,0x0F10,0x1710,0xC705,0x3009,0x04E7,0xE0A3,0x0103, + 0xA503,0x3E04,0x02EF,0xEF72,0x06B9,0xBAD4,0x0370,0x10B0, + 0xEF0F,0xA004,0x5F13,0x0BFF,0x5000,0x00B1,0x0003,0x03F0, + 0x0B30,0x9999,0x1319,0xFD43,0x03F0,0x0B40,0x6B03,0x0307, + 0x03F0,0x0B50,0x1310,0xDF43,0x03F0,0x0B60,0x0408,0x05F3, + 0xF065,0x4003,0x130B,0xDD43,0x03F0,0x0780,0x076F,0xF083, + 0x6003,0xF90B,0x5F01,0xF871,0xE500,0x03F0,0x0B30,0x0000, + + 0x08F5,0x5903,0x08F2,0xF107,0x03F0,0x0F00,0x19FF,0xC504, + 0x496A,0xF104,0x1913,0x0869,0x691C,0x2408,0xB749,0x2508, + 0x1129,0x027D,0x7F78,0x0308,0x03F0,0x0B30,0x05BF,0xD6A3, + 0x6407,0x03F0,0x0770,0x4B93,0x03F0,0x5F83,0xF0FF,0xA303, + 0xF05F,0xC303,0xF05F,0x3603,0x43DF,0x0627,0x1C72,0x00FF, + 0x0380,0x1537,0x0897,0x0028,0xFAC5,0x5733,0x2753,0x2F13, + 0x3713,0x2718,0x065D,0x1BCE,0x43DA,0x0657,0xC4D9,0xE413, + 0x6F48,0xD620,0xFFE9,0xFF25,0x096D,0xAF23,0x0800,0x076F, + + 0x7718,0x04A4,0xAE53,0xA603,0x40FF,0x4406,0xD800,0xF8D9, + 0x17DF,0x1917,0xA713,0x2007,0x4AFF,0xDFF6,0x17E3,0xB0B8, + 0xB35F,0x4473,0x3444,0x6CD7,0x0316,0x17B6,0x90B7,0x9FF0, + 0xBF29,0x100C,0x0903,0x0CFA,0x0986,0x0C17,0x4443,0xA844, + 0x0596,0x1B03,0xA100,0xFFFE,0x209E,0xFEC9,0x0BEF,0x6000, + 0xDCBA,0x6606,0xA733,0x04CD,0x8073,0xCF06,0xDDDF,0xDFFE, + 0xFF4A,0x0DBD,0x017A,0x2344,0xD917,0xEF77,0x064C,0x63D3, + 0x4607,0x26D7,0x30DF,0x4444,0x3FA8,0x73C3,0xB0EC,0x607D, + + 0x93BF,0x427B,0x5FB1,0x73B3,0x4444,0xCADD,0xD3F2,0x4473, + 0x7FB0,0x73D3,0xA908,0xB843,0x8CDF,0x73B3,0x3444,0xB030, + 0xC3BF,0x4473,0xDA14,0xEBB8,0x73D3,0xC141,0xF35F,0x4073, + 0x1FB1,0x8009,0x4909,0x9009,0xFFFE,0x1009,0x9CC9,0x4D08, + 0x4460,0x5EB1,0xE309,0x3B0B,0x29EF,0x7104,0xFFFF,0xEDDE, + 0xAB0B,0x21BD,0x4335,0xA144,0x039F,0xAFFF,0xAF08,0x0BAF, + 0x41D6,0x0AAF,0xAFF7,0x4442,0x2444,0x8DB9,0xD3FB,0xC073, + 0xC37F,0xF273,0xD35E,0x2073,0xDFB1,0x73C3,0x4470,0xC0D0, + + 0x01F0,0xA712,0x00C7,0x7100,0xFC03,0x2000,0xFFD9,0x0B10, + 0x1AAC,0xAA47,0xDE06,0x0AAE,0x5C57,0xA20C,0x0A0A,0x0367, + 0x08FD,0x2A2B,0x1777,0xF213,0x9232,0x079F,0x100B,0xE71C, + 0xA264,0xEB1C,0xF70C,0x8100,0xBFAA,0x16BE,0x0C96,0xADC3, + 0x0C16,0xACF6,0xFA1C,0xFF1C,0xC6A4,0x040D,0x0F1D,0x2000, + 0x02A7,0x30EF,0x2852,0x0087,0xEF59,0xAAAB,0xDC89,0xDDED, + 0xF08F,0x8C1F,0x1F6A,0xA8A6,0x1CAA,0xADD5,0xEF2F,0xBA00, + 0x6C00,0x0AA4,0x4050,0x1D19,0xAD77,0xDC09,0x0307,0xBFFF, + + 0x0039,0x39BD,0xC31B,0xF30A,0x08B4,0xBFD8,0x8A0B,0xA60C, + 0xFD00,0x00D0,0xD394,0x12F0,0x0770,0x09BA,0x31CC,0x7D44, + 0xF38F,0xC0FF,0xAFBD,0x1FE0,0xBDEF,0x559A,0x3445,0xF1D8, + 0x40B4,0x160A,0x12F0,0x01A0, +}; + +const unsigned short logo_dataMap[248] __attribute__((aligned(4)))= +{ + 0x0010,0x0008,0x003F,0xF000,0xF001,0xF001,0xF001,0xF001, + 0xF001,0xFF01,0x01F0,0x01F0,0x01F0,0x01F0,0x01F0,0x01F0, + 0x01F0,0x01F0,0xF0FF,0xF001,0xF001,0xF001,0xF001,0xF001, + 0xF001,0x7001,0x1D07,0x0001,0xF002,0xF001,0x4001,0x030B, + 0x40A0,0x0401,0x0500,0x0600,0x0700,0x01F0,0xF0C1,0x4001, + 0x080B,0x0900,0x0A00,0x8440,0x0B00,0x0C00,0x0D00,0x0E00, + 0x0600,0x000F,0x0010,0xF011,0xA098,0x1205,0x0002,0x0013, + 0x0014,0x0015,0x1619,0x0008,0x0017,0x2018,0x1921,0x1A00, + + 0x0080,0x1B29,0x1C00,0x1D00,0x1E00,0x0000,0x001F,0x0020, + 0x0021,0x0022,0x2300,0x2400,0x2500,0x2600,0x0000,0x0027, + 0x0028,0x0029,0x002A,0x2B00,0x2C00,0x2D00,0x2E00,0x0000, + 0x002F,0x0030,0x0031,0x8832,0x5B00,0x0033,0x2034,0x3561, + 0x3600,0x2080,0x373F,0x3800,0x3900,0x3A00,0x0000,0x003B, + 0x003C,0x003D,0x003E,0x3F00,0x4000,0x4100,0x4200,0x0000, + 0x0043,0x0044,0x0045,0x0046,0x4700,0x4800,0x4900,0x4A00, + 0x0008,0x004B,0x204C,0x4D99,0x4E00,0x2080,0x4FA1,0x5000, + + 0x5100,0x5200,0x0000,0x0053,0x0054,0x0055,0x0056,0x5700, + 0x5800,0x5900,0x5A00,0x0000,0x005B,0x005C,0x005D,0x005E, + 0x5F00,0x6000,0x6100,0x6200,0x0000,0x0063,0x0064,0x0065, + 0x0866,0x6700,0x6800,0xD900,0x0069,0x206A,0x6B00,0xE140, + 0x006C,0x006D,0x386E,0x6F00,0x01F0,0x01F0,0x0B40,0x0070, + 0x2071,0x7200,0x1F81,0x0073,0x0074,0xE375,0x01F0,0x01F0, + 0x0B40,0x0076,0xF177,0xF05D,0xFF01,0x01F0,0x01F0,0x01F0, + 0x01F0,0x01F0,0x01F0,0x01F0,0x01F0,0xF0FF,0xF001,0xF001, + + 0xF001,0xF001,0xF001,0xF001,0xF001,0xFF01,0x01F0,0x01F0, + 0x01F0,0x01F0,0x01F0,0x01F0,0x01F0,0x01F0,0xF0FF,0xF001, + 0xF001,0xF001,0xF001,0xF001,0xF001,0xF001,0xFF01,0x01F0, + 0x01F0,0x01F0,0x01F0,0x01F0,0x01F0,0x01F0,0x01F0,0xF0FF, + 0xF001,0xF001,0xF001,0xF001,0xF001,0xF001,0xF001,0xFF01, + 0x01F0,0x01F0,0x01F0,0x01F0,0x01F0,0x01F0,0x01F0,0x01F0, + 0xF0FE,0xF001,0xF001,0xF001,0xF001,0xF001,0x6001,0x0001, +}; + +const unsigned short logo_dataPal[16] __attribute__((aligned(4)))= +{ + 0x0000,0x0421,0x0842,0x0C63,0x1084,0x18C6,0x1CE7,0x2108, + 0x294A,0x2D6B,0x4631,0x6318,0x6F7B,0x77BD,0x7BDE,0x7FFF, +}; + +//}}BLOCK(logo_data) diff --git a/bios/source/logo_data.h b/bios/source/logo_data.h new file mode 100644 index 0000000..843917b --- /dev/null +++ b/bios/source/logo_data.h @@ -0,0 +1,32 @@ + +//{{BLOCK(logo_data) + +//====================================================================== +// +// logo_data, 256x256@4, +// + palette 16 entries, not compressed +// + 120 tiles (t|f|p reduced) lz77 compressed +// + regular map (in SBBs), lz77 compressed, 32x32 +// Total size: 32 + 1720 + 496 = 2248 +// +// Time-stamp: 2013-08-26, 19:29:30 +// Exported by Cearn's GBA Image Transmogrifier, v0.8.6 +// ( http://www.coranac.com/projects/#grit ) +// +//====================================================================== + +#ifndef GRIT_LOGO_DATA_H +#define GRIT_LOGO_DATA_H + +#define logo_dataTilesLen 1720 +extern const unsigned short logo_dataTiles[860]; + +#define logo_dataMapLen 496 +extern const unsigned short logo_dataMap[248]; + +#define logo_dataPalLen 32 +extern const unsigned short logo_dataPal[16]; + +#endif // GRIT_LOGO_DATA_H + +//}}BLOCK(logo_data) diff --git a/bios/source/reset.s b/bios/source/reset.s new file mode 100644 index 0000000..431ecd5 --- /dev/null +++ b/bios/source/reset.s @@ -0,0 +1,62 @@ + +.arm +@--------------------------------------------------------------------------------- +.global swi_SoftReset +.type swi_SoftReset STT_FUNC +swi_SoftReset: +@--------------------------------------------------------------------------------- + mov r3, #0x04000000 + @ Read flag from 0x3007FFA + @ 00h=8000000h (ROM), or 01h-FFh=2000000h (RAM) + @ This must be done before init because it gets cleared in init + ldrb r2, [r3,#-6] + bl init + cmp r2, #0 + ldmdb r3, {r0-r12} + movne lr, #0x02000000 + moveq lr, #0x08000000 + mov r0, #0x1F + msr cpsr_cf, r0 + mov r0, #0 + bx lr + +@--------------------------------------------------------------------------------- +.global init +init: +@ Requires r3 to be set to 0x04000000 already. +@--------------------------------------------------------------------------------- + @ Reset the stack locations + mov r0, #0xD3 + msr cpsr_cf, r0 + ldr sp, Cpu_Stack_SVC + mov lr, #0 + msr spsr_cf, lr + mov r0, #0xD2 + msr cpsr_cf, r0 + ldr sp, Cpu_Stack_IRQ + mov lr, #0 + msr spsr_cf, lr + mov r0, #0x5F + msr cpsr_cf, r0 + ldr sp, Cpu_Stack_USR + + movs r0, #0 + subs r1, r0, #0x200 + + @ Clear top 0x200 bytes of IWRAM 03007E00 -> 03008000 + +init_loop: + str r0, [r3,r1] + adds r1, r1, #4 + blt init_loop + + bx lr + +@--------------------------------------------------------------------------------- +@--------------------------------------------------------------------------------- + +Cpu_Stack_USR: .word 0x03007F00 +Cpu_Stack_IRQ: .word 0x03007FA0 +Cpu_Stack_SVC: .word 0x03007FE0 + +@--------------------------------------------------------------------------------- \ No newline at end of file diff --git a/bios/source/softwareinterrupts.c b/bios/source/softwareinterrupts.c new file mode 100644 index 0000000..89c3906 --- /dev/null +++ b/bios/source/softwareinterrupts.c @@ -0,0 +1,1528 @@ +/* + Custom GBA BIOS replacement + Copyright (c) 2002-2006 VBA Development Team + Copyright (c) 2006-2013 VBA-M Development Team + Copyright (c) 2013 Normmatt + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +*/ + +#include + +#define HALTCNT (*(vu8*) 0x04000301) + +const s16 sineTable[256] = { + (s16)0x0000, (s16)0x0192, (s16)0x0323, (s16)0x04B5, (s16)0x0645, (s16)0x07D5, (s16)0x0964, (s16)0x0AF1, + (s16)0x0C7C, (s16)0x0E05, (s16)0x0F8C, (s16)0x1111, (s16)0x1294, (s16)0x1413, (s16)0x158F, (s16)0x1708, + (s16)0x187D, (s16)0x19EF, (s16)0x1B5D, (s16)0x1CC6, (s16)0x1E2B, (s16)0x1F8B, (s16)0x20E7, (s16)0x223D, + (s16)0x238E, (s16)0x24DA, (s16)0x261F, (s16)0x275F, (s16)0x2899, (s16)0x29CD, (s16)0x2AFA, (s16)0x2C21, + (s16)0x2D41, (s16)0x2E5A, (s16)0x2F6B, (s16)0x3076, (s16)0x3179, (s16)0x3274, (s16)0x3367, (s16)0x3453, + (s16)0x3536, (s16)0x3612, (s16)0x36E5, (s16)0x37AF, (s16)0x3871, (s16)0x392A, (s16)0x39DA, (s16)0x3A82, + (s16)0x3B20, (s16)0x3BB6, (s16)0x3C42, (s16)0x3CC5, (s16)0x3D3E, (s16)0x3DAE, (s16)0x3E14, (s16)0x3E71, + (s16)0x3EC5, (s16)0x3F0E, (s16)0x3F4E, (s16)0x3F84, (s16)0x3FB1, (s16)0x3FD3, (s16)0x3FEC, (s16)0x3FFB, + (s16)0x4000, (s16)0x3FFB, (s16)0x3FEC, (s16)0x3FD3, (s16)0x3FB1, (s16)0x3F84, (s16)0x3F4E, (s16)0x3F0E, + (s16)0x3EC5, (s16)0x3E71, (s16)0x3E14, (s16)0x3DAE, (s16)0x3D3E, (s16)0x3CC5, (s16)0x3C42, (s16)0x3BB6, + (s16)0x3B20, (s16)0x3A82, (s16)0x39DA, (s16)0x392A, (s16)0x3871, (s16)0x37AF, (s16)0x36E5, (s16)0x3612, + (s16)0x3536, (s16)0x3453, (s16)0x3367, (s16)0x3274, (s16)0x3179, (s16)0x3076, (s16)0x2F6B, (s16)0x2E5A, + (s16)0x2D41, (s16)0x2C21, (s16)0x2AFA, (s16)0x29CD, (s16)0x2899, (s16)0x275F, (s16)0x261F, (s16)0x24DA, + (s16)0x238E, (s16)0x223D, (s16)0x20E7, (s16)0x1F8B, (s16)0x1E2B, (s16)0x1CC6, (s16)0x1B5D, (s16)0x19EF, + (s16)0x187D, (s16)0x1708, (s16)0x158F, (s16)0x1413, (s16)0x1294, (s16)0x1111, (s16)0x0F8C, (s16)0x0E05, + (s16)0x0C7C, (s16)0x0AF1, (s16)0x0964, (s16)0x07D5, (s16)0x0645, (s16)0x04B5, (s16)0x0323, (s16)0x0192, + (s16)0x0000, (s16)0xFE6E, (s16)0xFCDD, (s16)0xFB4B, (s16)0xF9BB, (s16)0xF82B, (s16)0xF69C, (s16)0xF50F, + (s16)0xF384, (s16)0xF1FB, (s16)0xF074, (s16)0xEEEF, (s16)0xED6C, (s16)0xEBED, (s16)0xEA71, (s16)0xE8F8, + (s16)0xE783, (s16)0xE611, (s16)0xE4A3, (s16)0xE33A, (s16)0xE1D5, (s16)0xE075, (s16)0xDF19, (s16)0xDDC3, + (s16)0xDC72, (s16)0xDB26, (s16)0xD9E1, (s16)0xD8A1, (s16)0xD767, (s16)0xD633, (s16)0xD506, (s16)0xD3DF, + (s16)0xD2BF, (s16)0xD1A6, (s16)0xD095, (s16)0xCF8A, (s16)0xCE87, (s16)0xCD8C, (s16)0xCC99, (s16)0xCBAD, + (s16)0xCACA, (s16)0xC9EE, (s16)0xC91B, (s16)0xC851, (s16)0xC78F, (s16)0xC6D6, (s16)0xC626, (s16)0xC57E, + (s16)0xC4E0, (s16)0xC44A, (s16)0xC3BE, (s16)0xC33B, (s16)0xC2C2, (s16)0xC252, (s16)0xC1EC, (s16)0xC18F, + (s16)0xC13B, (s16)0xC0F2, (s16)0xC0B2, (s16)0xC07C, (s16)0xC04F, (s16)0xC02D, (s16)0xC014, (s16)0xC005, + (s16)0xC000, (s16)0xC005, (s16)0xC014, (s16)0xC02D, (s16)0xC04F, (s16)0xC07C, (s16)0xC0B2, (s16)0xC0F2, + (s16)0xC13B, (s16)0xC18F, (s16)0xC1EC, (s16)0xC252, (s16)0xC2C2, (s16)0xC33B, (s16)0xC3BE, (s16)0xC44A, + (s16)0xC4E0, (s16)0xC57E, (s16)0xC626, (s16)0xC6D6, (s16)0xC78F, (s16)0xC851, (s16)0xC91B, (s16)0xC9EE, + (s16)0xCACA, (s16)0xCBAD, (s16)0xCC99, (s16)0xCD8C, (s16)0xCE87, (s16)0xCF8A, (s16)0xD095, (s16)0xD1A6, + (s16)0xD2BF, (s16)0xD3DF, (s16)0xD506, (s16)0xD633, (s16)0xD767, (s16)0xD8A1, (s16)0xD9E1, (s16)0xDB26, + (s16)0xDC72, (s16)0xDDC3, (s16)0xDF19, (s16)0xE075, (s16)0xE1D5, (s16)0xE33A, (s16)0xE4A3, (s16)0xE611, + (s16)0xE783, (s16)0xE8F8, (s16)0xEA71, (s16)0xEBED, (s16)0xED6C, (s16)0xEEEF, (s16)0xF074, (s16)0xF1FB, + (s16)0xF384, (s16)0xF50F, (s16)0xF69C, (s16)0xF82B, (s16)0xF9BB, (s16)0xFB4B, (s16)0xFCDD, (s16)0xFE6E +}; + +const u8 ScaleTable[180] = { + (u8)0xE0, (u8)0xE1, (u8)0xE2, (u8)0xE3, (u8)0xE4, (u8)0xE5, (u8)0xE6, (u8)0xE7, (u8)0xE8, + (u8)0xE9, (u8)0xEA, (u8)0xEB, (u8)0xD0, (u8)0xD1, (u8)0xD2, (u8)0xD3, (u8)0xD4, (u8)0xD5, + (u8)0xD6, (u8)0xD7, (u8)0xD8, (u8)0xD9, (u8)0xDA, (u8)0xDB, (u8)0xC0, (u8)0xC1, (u8)0xC2, + (u8)0xC3, (u8)0xC4, (u8)0xC5, (u8)0xC6, (u8)0xC7, (u8)0xC8, (u8)0xC9, (u8)0xCA, (u8)0xCB, + (u8)0xB0, (u8)0xB1, (u8)0xB2, (u8)0xB3, (u8)0xB4, (u8)0xB5, (u8)0xB6, (u8)0xB7, (u8)0xB8, + (u8)0xB9, (u8)0xBA, (u8)0xBB, (u8)0xA0, (u8)0xA1, (u8)0xA2, (u8)0xA3, (u8)0xA4, (u8)0xA5, + (u8)0xA6, (u8)0xA7, (u8)0xA8, (u8)0xA9, (u8)0xAA, (u8)0xAB, (u8)0x90, (u8)0x91, (u8)0x92, + (u8)0x93, (u8)0x94, (u8)0x95, (u8)0x96, (u8)0x97, (u8)0x98, (u8)0x99, (u8)0x9A, (u8)0x9B, + (u8)0x80, (u8)0x81, (u8)0x82, (u8)0x83, (u8)0x84, (u8)0x85, (u8)0x86, (u8)0x87, (u8)0x88, + (u8)0x89, (u8)0x8A, (u8)0x8B, (u8)0x70, (u8)0x71, (u8)0x72, (u8)0x73, (u8)0x74, (u8)0x75, + (u8)0x76, (u8)0x77, (u8)0x78, (u8)0x79, (u8)0x7A, (u8)0x7B, (u8)0x60, (u8)0x61, (u8)0x62, + (u8)0x63, (u8)0x64, (u8)0x65, (u8)0x66, (u8)0x67, (u8)0x68, (u8)0x69, (u8)0x6A, (u8)0x6B, + (u8)0x50, (u8)0x51, (u8)0x52, (u8)0x53, (u8)0x54, (u8)0x55, (u8)0x56, (u8)0x57, (u8)0x58, + (u8)0x59, (u8)0x5A, (u8)0x5B, (u8)0x40, (u8)0x41, (u8)0x42, (u8)0x43, (u8)0x44, (u8)0x45, + (u8)0x46, (u8)0x47, (u8)0x48, (u8)0x49, (u8)0x4A, (u8)0x4B, (u8)0x30, (u8)0x31, (u8)0x32, + (u8)0x33, (u8)0x34, (u8)0x35, (u8)0x36, (u8)0x37, (u8)0x38, (u8)0x39, (u8)0x3A, (u8)0x3B, + (u8)0x20, (u8)0x21, (u8)0x22, (u8)0x23, (u8)0x24, (u8)0x25, (u8)0x26, (u8)0x27, (u8)0x28, + (u8)0x29, (u8)0x2A, (u8)0x2B, (u8)0x10, (u8)0x11, (u8)0x12, (u8)0x13, (u8)0x14, (u8)0x15, + (u8)0x16, (u8)0x17, (u8)0x18, (u8)0x19, (u8)0x1A, (u8)0x1B, (u8)0x00, (u8)0x01, (u8)0x02, + (u8)0x03, (u8)0x04, (u8)0x05, (u8)0x06, (u8)0x07, (u8)0x08, (u8)0x09, (u8)0xA, (u8)0xB +}; + +const u32 FreqTable[12] = { + (u32)0x80000000, + (u32)0x879C7C97, + (u32)0x8FACD61E, + (u32)0x9837F052, + (u32)0xA14517CC, + (u32)0xAADC0848, + (u32)0xB504F334, + (u32)0xBFC886BB, + (u32)0xCB2FF52A, + (u32)0xD744FCCB, + (u32)0xE411F03A, + (u32)0xF1A1BF39 +}; + +/*----------------------------------------------------------------- + Absolute Value from http://stackoverflow.com/questions/16499475/arm-assembly-absolute-value-function-are-two-or-three-lines-faster +-----------------------------------------------------------------*/ +s32 abs(s32 x){ + s32 signext = (x >= 0) ? 0 : -1; //This can be done with an ASR instruction + return (x + signext) ^ signext; +} + +/*----------------------------------------------------------------- + Square Root Method from http://www.finesse.demon.co.uk/steven/sqrt.html +-----------------------------------------------------------------*/ +#define iter1(N) \ + try = root + (1 << (N)); \ + if (n >= try << (N)) \ + { n -= try << (N); \ + root |= 2 << (N); \ + } + +u32 sqrt_ (u32 n) +{ + u32 root = 0, try; + iter1 (15); iter1 (14); iter1 (13); iter1 (12); + iter1 (11); iter1 (10); iter1 ( 9); iter1 ( 8); + iter1 ( 7); iter1 ( 6); iter1 ( 5); iter1 ( 4); + iter1 ( 3); iter1 ( 2); iter1 ( 1); iter1 ( 0); + return root >> 1; +} + +/*----------------------------------------------------------------- + Wrapper methods for VBA-M Methods +-----------------------------------------------------------------*/ +static inline u16 CPUReadByte(u32 adr) +{ + return *(u8*)adr; +} +static inline u16 CPUReadHalfWord(u32 adr) +{ + return *(u16*)adr; +} +static inline u32 CPUReadMemory(u32 adr) +{ + return *(u32*)adr; +} +static inline void CPUWriteMemory(u32 adr, u32 val) +{ + *(u32*)adr = val; +} +static inline void CPUWriteHalfWord(u32 adr, u16 val) +{ + *(u16*)adr = val; +} +static inline void CPUWriteByte(u32 adr, u8 val) +{ + *(u8*)adr = val; +} +static inline void CPUUpdateRegister(u32 adr, u16 val) +{ + *(u16*)(0x04000000+adr) = val; +} + +u32 umul3232H32(u32 val, u32 val2); + +/*----------------------------------------------------------------- + Assembly method declarations +-----------------------------------------------------------------*/ +extern int swi_Div(u32 a, u32 b); //Returns result + +/*----------------------------------------------------------------- +0x27 - CustomHalt + Writes the 8bit parameter value to HALTCNT, below values are equivalent to Halt + and Stop/Sleep functions, other values reserved, purpose unknown. + 8bit parameter (00h=Halt, 80h=Stop) +-----------------------------------------------------------------*/ +void swi_CustomHalt(u8 val) { + HALTCNT = val; +} + +/*----------------------------------------------------------------- + 0x02 - Halt + Halts the CPU until an interrupt request occurs. The CPU is switched into low-power mode, + all other circuits (video, sound, timers, serial, keypad, system clock) are kept operating. + Halt mode is terminated when any enabled interrupts are requested, that is when (IE AND IF) + is not zero, the GBA locks up if that condition doesn't get true. + However, the state of CPUs IRQ disable bit in CPSR register, and the IME register are + don't care, Halt passes through even if either one has disabled interrupts. +-----------------------------------------------------------------*/ +void swi_Halt() { + swi_CustomHalt(0); +} + +/*----------------------------------------------------------------- +0x03 - Stop + Switches the GBA into very low power mode (to be used similar as a screen-saver). + The CPU, System Clock, Sound, Video, SIO-Shift Clock, DMAs, and Timers are stopped. + Stop state can be terminated by the following interrupts only + (as far as enabled in IE register): Joypad, Game Pak, or General-Purpose-SIO. + + "The system clock is stopped so the IF flag is not set." + Preparation for Stop: + Disable Video before implementing Stop (otherwise Video just freezes, but still keeps consuming battery power). + Possibly required to disable Sound also? Obviously, it'd be also recommended to disable any external + hardware (such like Rumble or Infra-Red) as far as possible. +-----------------------------------------------------------------*/ +void swi_Stop() { + swi_CustomHalt(0x80); +} + +/*----------------------------------------------------------------- + Used by the IntrWait functions +-----------------------------------------------------------------*/ +bool CheckInterrupts(u32 waitFlags) +{ + REG_IME = 0; //Disable interrupts + u16 intFlags = *(vu16*)(0x04000000-8); //Get current flags + u16 flags = intFlags & waitFlags; + if(flags) + { + intFlags = (flags) ^ intFlags; + *(vu16*)(0x04000000-8) = intFlags; + } + REG_IME = 1; //Enable interrupts + + return flags; +} + +/*----------------------------------------------------------------- +0x04 - IntrWait + Continues to wait in Halt state until one (or more) of the specified interrupt(s) do occur. + The function forcefully sets IME=1. When using multiple interrupts at the same time, + this function is having less overhead than repeatedly calling the Halt function +-----------------------------------------------------------------*/ +void swi_IntrWait(bool discard, u32 waitFlags) +{ + if(discard) + { + CheckInterrupts(waitFlags); + } + + u32 val = 0; + do + { + HALTCNT = 0; + val = CheckInterrupts(waitFlags); + } + while(!val); +} + +/*----------------------------------------------------------------- +0x05 - VBlankIntrWait + Continues to wait in Halt state until one (or more) of the specified interrupt(s) do occur. + The function forcefully sets IME=1. When using multiple interrupts at the same time, + this function is having less overhead than repeatedly calling the Halt function +-----------------------------------------------------------------*/ +void swi_VBlankIntrWait() +{ + swi_IntrWait(true,1); +} + +/*----------------------------------------------------------------- +0x06 - Div + Signed Division, r0/r1. + + r0 signed 32bit Number + r1 signed 32bit Denom + + Return: + + r0 Number DIV Denom ;signed + r1 Number MOD Denom ;signed + r3 ABS (Number DIV Denom) ;unsigned + + For example, incoming -1234, 10 should return -123, -4, +123. + The function usually gets caught in an endless loop upon division by zero. +-----------------------------------------------------------------*/ +/*u32 swi_Div(s32 num, s32 denom) +{ + //Implemented in assembly +}*/ + +/*----------------------------------------------------------------- +0x07 - DivArm + Same as above (SWI 06h Div), but incoming parameters are exchanged, r1/r0 (r0=Denom, r1=number). + For compatibility with ARM's library. Slightly slower (3 clock cycles) than SWI 06h. +-----------------------------------------------------------------*/ +/*u32 swi_DivArm(s32 num, s32 denom) +{ + //Implemented in assembly +}*/ + +/*----------------------------------------------------------------- +0x08 - Sqrt + Calculate square root. +-----------------------------------------------------------------*/ +u32 swi_Sqrt(u32 input) +{ + return sqrt_(input); +} + +/*----------------------------------------------------------------- +0x09 - ArcTan + Calculates the arc tangent. +-----------------------------------------------------------------*/ +u32 swi_ArcTan(u32 input) +{ + s32 a = -(((s32)(input*input)) >> 14); + s32 b = ((0xA9 * a) >> 14) + 0x390; + b = ((b * a) >> 14) + 0x91C; + b = ((b * a) >> 14) + 0xFB6; + b = ((b * a) >> 14) + 0x16AA; + b = ((b * a) >> 14) + 0x2081; + b = ((b * a) >> 14) + 0x3651; + b = ((b * a) >> 14) + 0xA2F9; + a = ((s32)input * b) >> 16; + return (u32)a; +} + +/*----------------------------------------------------------------- +0x0A - ArcTan2 + Calculates the arc tangent after correction processing. +-----------------------------------------------------------------*/ +u32 swi_ArcTan2(s32 x, s32 y) +{ + u32 res = 0; + if (y == 0) { + res = ((x>>16) & 0x8000); + } else { + if (x == 0) { + res = ((y>>16) & 0x8000) + 0x4000; + } else { + if ((abs(x) > abs(y)) || ((abs(x) == abs(y)) && (!((x<0) && (y<0))))) { + u32 div = swi_Div(y << 14, (u32)x); + div = swi_ArcTan(div); + if (x < 0) + res = 0x8000 + div; + else + res = (((y>>16) & 0x8000)<<1) + div; + } else { + u32 div = swi_Div(x << 14, (u32)y); + div = swi_ArcTan(div); + res = (0x4000 + ((y>>16) & 0x8000)) - div; + } + } + } + return res; +} + +/*----------------------------------------------------------------- +0x0B - CpuSet + Memory copy/fill in units of 4 bytes or 2 bytes. + Memcopy is implemented as repeated LDMIA/STMIA [Rb]!,r3 or LDRH/STRH r3,[r0,r5] instructions. + Memfill as single LDMIA or LDRH followed by repeated STMIA [Rb]!,r3 or STRH r3,[r0,r5]. + The length must be a multiple of 4 bytes (32bit mode) or 2 bytes (16bit mode). + The (half)wordcount in r2 must be length/4 (32bit mode) or length/2 (16bit mode), + ie. length in word/halfword units rather than byte units. +-----------------------------------------------------------------*/ +void swi_CpuSet(u32 source, u32 dest, u32 cnt) +{ + if(((source & 0xe000000) == 0) || + ((source + (((cnt << 11)>>9) & 0x1fffff)) & 0xe000000) == 0) + return; + + int count = cnt & 0x1FFFFF; + + // 32-bit ? + if((cnt >> 26) & 1) { + // needed for 32-bit mode! + source &= 0xFFFFFFFC; + dest &= 0xFFFFFFFC; + // fill ? + if((cnt >> 24) & 1) { + u32 value = (source>0x0EFFFFFF ? 0x1CAD1CAD : CPUReadMemory(source)); + while(count) { + CPUWriteMemory(dest, value); + dest += 4; + count--; + } + } else { + // copy + while(count) { + CPUWriteMemory(dest, (source>0x0EFFFFFF ? 0x1CAD1CAD : CPUReadMemory(source))); + dest += 4; + source += 4; + count--; + } + } + } else { + // 16-bit fill? + if((cnt >> 24) & 1) { + u16 value = (source>0x0EFFFFFF ? 0x1CAD : CPUReadHalfWord(source)); + while(count) { + CPUWriteHalfWord(dest, value); + dest += 2; + count--; + } + } else { + // copy + while(count) { + CPUWriteHalfWord(dest, (source>0x0EFFFFFF ? 0x1CAD : CPUReadHalfWord(source))); + dest += 2; + source += 2; + count--; + } + } + } +} + +/*----------------------------------------------------------------- +0x0C - CpuFastSet + Memory copy/fill in units of 32 bytes. Memcopy is implemented as repeated LDMIA/STMIA [Rb]!,r2-r9 instructions. + Memfill as single LDR followed by repeated STMIA [Rb]!,r2-r9. + After processing all 32-byte-blocks, the NDS additonally processes the remaining words as 4-byte blocks. + + The length is specifed as wordcount, ie. the number of bytes divided by 4. + On the GBA, the length must be a multiple of 8 words (32 bytes). +-----------------------------------------------------------------*/ +void swi_CpuFastSet(u32 source, u32 dest, u32 cnt) +{ + if((((u32)source & 0xe000000) == 0) || + (((u32)source + (((cnt << 11)>>9) & 0x1fffff)) & 0xe000000) == 0) + return; + + // needed for 32-bit mode! + source &= 0xFFFFFFFC; + dest &= 0xFFFFFFFC; + + int count = cnt & 0x1FFFFF; + + // fill? + if((cnt >> 24) & 1) { + u32 value = (source>0x0EFFFFFF ? 0xBAFFFFFB : CPUReadMemory(source)); + while(count > 0) { + // BIOS always transfers 32 bytes at a time + for(int i = 0; i < 8; i++) { + CPUWriteMemory(dest, value); + dest += 4; + } + count -= 8; + } + } else { + // copy + while(count > 0) { + // BIOS always transfers 32 bytes at a time + for(int i = 0; i < 8; i++) { + CPUWriteMemory(dest, (source>0x0EFFFFFF ? 0xBAFFFFFB :CPUReadMemory(source))); + source += 4; + dest += 4; + } + count -= 8; + } + } +} + +/*----------------------------------------------------------------- +0x0D - GetBiosChecksum + Calculates the checksum of the BIOS ROM (by reading in 32bit units, and adding up these values). + IRQ and FIQ are disabled during execution. + The checksum is BAAE187Fh (GBA and GBA SP), or BAAE1880h (DS in GBA mode, whereas the only difference + is that the byte at [3F0Ch] is changed from 00h to 01h, otherwise the BIOS is 1:1 same as GBA BIOS, + it does even include multiboot code). +-----------------------------------------------------------------*/ +int swi_GetBiosChecksum() +{ + //TODO: Actually checksum this bios + //TODO: Make this bios checksum the same as official bios + return 0xBAAE187F; +} + +/*----------------------------------------------------------------- +0x0E - BgAffineSet + Used to calculate BG Rotation/Scaling parameters. +-----------------------------------------------------------------*/ +void swi_BgAffineSet(u32 src, u32 dest, u32 num) +{ + while(num--) { + s32 cx = CPUReadMemory(src); + src+=4; + s32 cy = CPUReadMemory(src); + src+=4; + s16 dispx = CPUReadHalfWord(src); + src+=2; + s16 dispy = CPUReadHalfWord(src); + src+=2; + s16 rx = CPUReadHalfWord(src); + src+=2; + s16 ry = CPUReadHalfWord(src); + src+=2; + u16 theta = CPUReadHalfWord(src)>>8; + src+=4; // keep structure alignment + s32 a = sineTable[(theta+0x40)&255]; + s32 b = sineTable[theta]; + + s16 dx = (rx * a)>>14; + s16 dmx = (rx * b)>>14; + s16 dy = (ry * b)>>14; + s16 dmy = (ry * a)>>14; + + CPUWriteHalfWord(dest, dx); + dest += 2; + CPUWriteHalfWord(dest, -dmx); + dest += 2; + CPUWriteHalfWord(dest, dy); + dest += 2; + CPUWriteHalfWord(dest, dmy); + dest += 2; + + s32 startx = cx - dx * dispx + dmx * dispy; + s32 starty = cy - dy * dispx - dmy * dispy; + + CPUWriteMemory(dest, startx); + dest += 4; + CPUWriteMemory(dest, starty); + dest += 4; + } +} + +/*----------------------------------------------------------------- +0x0F - ObjAffineSet + Calculates and sets the OBJ's affine parameters from the scaling ratio and angle of rotation. + The affine parameters are calculated from the parameters set in Srcp. + The four affine parameters are set every Offset bytes, starting from the Destp address. + If the Offset value is 2, the parameters are stored contiguously. If the value is 8, they match the structure of OAM. + When Srcp is arrayed, the calculation can be performed continuously by specifying Num. +-----------------------------------------------------------------*/ +void swi_ObjAffineSet(u32 src, u32 dest, int num, int offset) +{ + while(num--) { + s16 rx = CPUReadHalfWord(src); + src+=2; + s16 ry = CPUReadHalfWord(src); + src+=2; + u16 theta = CPUReadHalfWord(src)>>8; + src+=4; // keep structure alignment + + s32 a = (s32)sineTable[(theta+0x40)&255]; + s32 b = (s32)sineTable[theta]; + + s16 dx = ((s32)rx * a)>>14; + s16 dmx = ((s32)rx * b)>>14; + s16 dy = ((s32)ry * b)>>14; + s16 dmy = ((s32)ry * a)>>14; + + CPUWriteHalfWord(dest, dx); + dest += offset; + CPUWriteHalfWord(dest, -dmx); + dest += offset; + CPUWriteHalfWord(dest, dy); + dest += offset; + CPUWriteHalfWord(dest, dmy); + dest += offset; + } +} + +/*----------------------------------------------------------------- +0x10 - BitUnPack + Used to increase the color depth of bitmaps or tile data. For example, to convert a 1bit + monochrome font into 4bit or 8bit GBA tiles. + The Unpack Info is specified separately, allowing to convert the same source data into different formats. +-----------------------------------------------------------------*/ +void swi_BitUnPack(u32 source, u32 dest, u32 header) +{ + int len = CPUReadHalfWord(header); + // check address + if(((source & 0xe000000) == 0) || + ((source + len) & 0xe000000) == 0) + return; + + int bits = CPUReadByte(header+2); + int revbits = 8 - bits; + // u32 value = 0; + u32 base = CPUReadMemory(header+4); + bool addBase = (base & 0x80000000) ? true : false; + base &= 0x7fffffff; + int dataSize = CPUReadByte(header+3); + + int data = 0; + int bitwritecount = 0; + while(1) { + len -= 1; + if(len < 0) + break; + int mask = 0xff >> revbits; + u8 b = CPUReadByte(source); + source++; + int bitcount = 0; + while(1) { + if(bitcount >= 8) + break; + u32 d = b & mask; + u32 temp = d >> bitcount; + if(d || addBase) { + temp += base; + } + data |= temp << bitwritecount; + bitwritecount += dataSize; + if(bitwritecount >= 32) { + CPUWriteMemory(dest, data); + dest += 4; + data = 0; + bitwritecount = 0; + } + mask <<= bits; + bitcount += bits; + } + } +} + +/*----------------------------------------------------------------- +0x11 - LZ77UnCompWram + Expands LZ77-compressed data. The Wram function is faster, and writes in units of 8bits. + For the Vram function the destination must be halfword aligned, data is written in units of 16bits. + If the size of the compressed data is not a multiple of 4, please adjust it as much as possible + by padding with 0. + + Align the source address to a 4-Byte boundary. +-----------------------------------------------------------------*/ +void swi_LZ77UnCompWram(u32 source, u32 dest) +{ + u32 header = CPUReadMemory(source); + source += 4; + + if(((source & 0xe000000) == 0) || + ((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0) + return; + + int len = header >> 8; + + while(len > 0) { + u8 d = CPUReadByte(source++); + + if(d) { + for(int i = 0; i < 8; i++) { + if(d & 0x80) { + u16 data = CPUReadByte(source++) << 8; + data |= CPUReadByte(source++); + int length = (data >> 12) + 3; + int offset = (data & 0x0FFF); + u32 windowOffset = dest - offset - 1; + for(int i2 = 0; i2 < length; i2++) { + CPUWriteByte(dest++, CPUReadByte(windowOffset++)); + len--; + if(len == 0) + return; + } + } else { + CPUWriteByte(dest++, CPUReadByte(source++)); + len--; + if(len == 0) + return; + } + d <<= 1; + } + } else { + for(int i = 0; i < 8; i++) { + CPUWriteByte(dest++, CPUReadByte(source++)); + len--; + if(len == 0) + return; + } + } + } +} + +/*----------------------------------------------------------------- +0x12 - LZ77UnCompVram + Expands LZ77-compressed data. The Wram function is faster, and writes in units of 8bits. + For the Vram function the destination must be halfword aligned, data is written in units of 16bits. + If the size of the compressed data is not a multiple of 4, please adjust it as much as possible + by padding with 0. + + Align the source address to a 4-Byte boundary. +-----------------------------------------------------------------*/ +void swi_LZ77UnCompVram_(u32 source, u32 dest, int checkBios) +{ + u32 header = CPUReadMemory(source); + source += 4; + + if(checkBios && (((source & 0xe000000) == 0) || + ((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0)) + return; + + int byteCount = 0; + int byteShift = 0; + u32 writeValue = 0; + + int len = header >> 8; + + while(len > 0) { + u8 d = CPUReadByte(source++); + + if(d) { + for(int i = 0; i < 8; i++) { + if(d & 0x80) { + u16 data = CPUReadByte(source++) << 8; + data |= CPUReadByte(source++); + int length = (data >> 12) + 3; + int offset = (data & 0x0FFF); + u32 windowOffset = dest + byteCount - offset - 1; + for(int i2 = 0; i2 < length; i2++) { + writeValue |= (CPUReadByte(windowOffset++) << byteShift); + byteShift += 8; + byteCount++; + + if(byteCount == 2) { + CPUWriteHalfWord(dest, writeValue); + dest += 2; + byteCount = 0; + byteShift = 0; + writeValue = 0; + } + len--; + if(len == 0) + return; + } + } else { + writeValue |= (CPUReadByte(source++) << byteShift); + byteShift += 8; + byteCount++; + if(byteCount == 2) { + CPUWriteHalfWord(dest, writeValue); + dest += 2; + byteCount = 0; + byteShift = 0; + writeValue = 0; + } + len--; + if(len == 0) + return; + } + d <<= 1; + } + } else { + for(int i = 0; i < 8; i++) { + writeValue |= (CPUReadByte(source++) << byteShift); + byteShift += 8; + byteCount++; + if(byteCount == 2) { + CPUWriteHalfWord(dest, writeValue); + dest += 2; + byteShift = 0; + byteCount = 0; + writeValue = 0; + } + len--; + if(len == 0) + return; + } + } + } +} + +void swi_LZ77UnCompVram(u32 source, u32 dest) +{ + swi_LZ77UnCompVram_(source,dest,1); +} + +/*----------------------------------------------------------------- +0x13 - HuffUnComp + The decoder starts in root node, the separate bits in the bitstream specify if the next node is node0 or node1, + if that node is a data node, then the data is stored in memory, and the decoder is reset to the root node. + The most often used data should be as close to the root node as possible. For example, the 4-byte string "Huff" + could be compressed to 6 bits: 10-11-0-0, with root.0 pointing directly to data "f", and root.1 pointing to a + child node, whose nodes point to data "H" and data "u". + + Data is written in units of 32bits, if the size of the compressed data is not a multiple of 4, + please adjust it as much as possible by padding with 0. + Align the source address to a 4Byte boundary. +-----------------------------------------------------------------*/ +void swi_HuffUnComp(u32 source, u32 dest) +{ + u32 header = CPUReadMemory(source); + source += 4; + + if(((source & 0xe000000) == 0) || + ((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0) + return; + + u8 treeSize = CPUReadByte(source++); + + u32 treeStart = source; + + source += ((treeSize+1)<<1)-1; // minus because we already skipped one byte + + int len = header >> 8; + + u32 mask = 0x80000000; + u32 data = CPUReadMemory(source); + source += 4; + + int pos = 0; + u8 rootNode = CPUReadByte(treeStart); + u8 currentNode = rootNode; + bool writeData = false; + int byteShift = 0; + int byteCount = 0; + u32 writeValue = 0; + + if((header & 0x0F) == 8) { + while(len > 0) { + // take left + if(pos == 0) + pos++; + else + pos += (((currentNode & 0x3F)+1)<<1); + + if(data & mask) { + // right + if(currentNode & 0x40) + writeData = true; + currentNode = CPUReadByte(treeStart+pos+1); + } else { + // left + if(currentNode & 0x80) + writeData = true; + currentNode = CPUReadByte(treeStart+pos); + } + + if(writeData) { + writeValue |= (currentNode << byteShift); + byteCount++; + byteShift += 8; + + pos = 0; + currentNode = rootNode; + writeData = false; + + if(byteCount == 4) { + byteCount = 0; + byteShift = 0; + CPUWriteMemory(dest, writeValue); + writeValue = 0; + dest += 4; + len -= 4; + } + } + mask >>= 1; + if(mask == 0) { + mask = 0x80000000; + data = CPUReadMemory(source); + source += 4; + } + } + } else { + int halfLen = 0; + int value = 0; + while(len > 0) { + // take left + if(pos == 0) + pos++; + else + pos += (((currentNode & 0x3F)+1)<<1); + + if((data & mask)) { + // right + if(currentNode & 0x40) + writeData = true; + currentNode = CPUReadByte(treeStart+pos+1); + } else { + // left + if(currentNode & 0x80) + writeData = true; + currentNode = CPUReadByte(treeStart+pos); + } + + if(writeData) { + if(halfLen == 0) + value |= currentNode; + else + value |= (currentNode<<4); + + halfLen += 4; + if(halfLen == 8) { + writeValue |= (value << byteShift); + byteCount++; + byteShift += 8; + + halfLen = 0; + value = 0; + + if(byteCount == 4) { + byteCount = 0; + byteShift = 0; + CPUWriteMemory(dest, writeValue); + dest += 4; + writeValue = 0; + len -= 4; + } + } + pos = 0; + currentNode = rootNode; + writeData = false; + } + mask >>= 1; + if(mask == 0) { + mask = 0x80000000; + data = CPUReadMemory(source); + source += 4; + } + } + } +} + +/*----------------------------------------------------------------- +0x14 - RLUnCompWram + Expands run-length compressed data. The Wram function is faster, and writes in units of 8bits. + For the Vram function the destination must be halfword aligned, data is written in units of 16bits. + If the size of the compressed data is not a multiple of 4, please adjust it as much as possible by padding with 0. + Align the source address to a 4Byte boundary. +-----------------------------------------------------------------*/ +void swi_RLUnCompWram(u32 source, u32 dest) +{ + u32 header = CPUReadMemory(source); + source += 4; + + if(((source & 0xe000000) == 0) || + ((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0) + return; + + int len = header >> 8; + + while(len > 0) { + u8 d = CPUReadByte(source++); + int l = d & 0x7F; + if(d & 0x80) { + u8 data = CPUReadByte(source++); + l += 3; + for(int i = 0;i < l; i++) { + CPUWriteByte(dest++, data); + len--; + if(len == 0) + return; + } + } else { + l++; + for(int i = 0; i < l; i++) { + CPUWriteByte(dest++, CPUReadByte(source++)); + len--; + if(len == 0) + return; + } + } + } +} + +/*----------------------------------------------------------------- +0x15 - RLUnCompVram + Expands run-length compressed data. The Wram function is faster, and writes in units of 8bits. + For the Vram function the destination must be halfword aligned, data is written in units of 16bits. + If the size of the compressed data is not a multiple of 4, please adjust it as much as possible by padding with 0. + Align the source address to a 4Byte boundary. +-----------------------------------------------------------------*/ +void swi_RLUnCompVram(u32 source, u32 dest) +{ + u32 header = CPUReadMemory(source & 0xFFFFFFFC); + source += 4; + + if(((source & 0xe000000) == 0) || + ((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0) + return; + + int len = header >> 8; + int byteCount = 0; + int byteShift = 0; + u32 writeValue = 0; + + while(len > 0) { + u8 d = CPUReadByte(source++); + int l = d & 0x7F; + if(d & 0x80) { + u8 data = CPUReadByte(source++); + l += 3; + for(int i = 0;i < l; i++) { + writeValue |= (data << byteShift); + byteShift += 8; + byteCount++; + + if(byteCount == 2) { + CPUWriteHalfWord(dest, writeValue); + dest += 2; + byteCount = 0; + byteShift = 0; + writeValue = 0; + } + len--; + if(len == 0) + return; + } + } else { + l++; + for(int i = 0; i < l; i++) { + writeValue |= (CPUReadByte(source++) << byteShift); + byteShift += 8; + byteCount++; + if(byteCount == 2) { + CPUWriteHalfWord(dest, writeValue); + dest += 2; + byteCount = 0; + byteShift = 0; + writeValue = 0; + } + len--; + if(len == 0) + return; + } + } + } +} + +/*----------------------------------------------------------------- +0x16 - Diff8bitUnFilterWram + These aren't actually real decompression functions, destination data will have exactly the same size as source data. + However, assume a bitmap or wave form to contain a stream of increasing numbers such like 10..19, the filtered/unfiltered data would be: + + unfiltered: 10 11 12 13 14 15 16 17 18 19 + filtered: 10 +1 +1 +1 +1 +1 +1 +1 +1 +1 + + In this case using filtered data (combined with actual compression algorithms) will obviously produce better compression results. + Data units may be either 8bit or 16bit used with Diff8bit or Diff16bit functions respectively. + The 8bitVram function allows to write to VRAM directly (which uses 16bit data bus) by writing two 8bit values at once, + the downside is that it is eventually slower as the 8bitWram function. +-----------------------------------------------------------------*/ +void swi_Diff8bitUnFilterWram(u32 source, u32 dest) +{ + u32 header = CPUReadMemory(source); + source += 4; + + if(((source & 0xe000000) == 0) || + (((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0)) + return; + + int len = header >> 8; + + u8 data = CPUReadByte(source++); + CPUWriteByte(dest++, data); + len--; + + while(len > 0) { + u8 diff = CPUReadByte(source++); + data += diff; + CPUWriteByte(dest++, data); + len--; + } +} + +/*----------------------------------------------------------------- +0x17 - Diff8bitUnFilterVram + These aren't actually real decompression functions, destination data will have exactly the same size as source data. + However, assume a bitmap or wave form to contain a stream of increasing numbers such like 10..19, the filtered/unfiltered data would be: + + unfiltered: 10 11 12 13 14 15 16 17 18 19 + filtered: 10 +1 +1 +1 +1 +1 +1 +1 +1 +1 + + In this case using filtered data (combined with actual compression algorithms) will obviously produce better compression results. + Data units may be either 8bit or 16bit used with Diff8bit or Diff16bit functions respectively. + The 8bitVram function allows to write to VRAM directly (which uses 16bit data bus) by writing two 8bit values at once, + the downside is that it is eventually slower as the 8bitWram function. +-----------------------------------------------------------------*/ +void swi_Diff8bitUnFilterVram(u32 source, u32 dest) +{ + u32 header = CPUReadMemory(source); + source += 4; + + if(((source & 0xe000000) == 0) || + ((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0) + return; + + int len = header >> 8; + + u8 data = CPUReadByte(source++); + u16 writeData = data; + int shift = 8; + int bytes = 1; + + while(len >= 2) { + u8 diff = CPUReadByte(source++); + data += diff; + writeData |= (data << shift); + bytes++; + shift += 8; + if(bytes == 2) { + CPUWriteHalfWord(dest, writeData); + dest += 2; + len -= 2; + bytes = 0; + writeData = 0; + shift = 0; + } + } +} + +/*----------------------------------------------------------------- +0x18 - Diff16bitUnFilter + These aren't actually real decompression functions, destination data will have exactly the same size as source data. + However, assume a bitmap or wave form to contain a stream of increasing numbers such like 10..19, the filtered/unfiltered data would be: + + unfiltered: 10 11 12 13 14 15 16 17 18 19 + filtered: 10 +1 +1 +1 +1 +1 +1 +1 +1 +1 + + In this case using filtered data (combined with actual compression algorithms) will obviously produce better compression results. + Data units may be either 8bit or 16bit used with Diff8bit or Diff16bit functions respectively. + The 8bitVram function allows to write to VRAM directly (which uses 16bit data bus) by writing two 8bit values at once, + the downside is that it is eventually slower as the 8bitWram function. +-----------------------------------------------------------------*/ +void swi_Diff16bitUnFilter(u32 source, u32 dest) +{ + u32 header = CPUReadMemory(source); + source += 4; + + if(((source & 0xe000000) == 0) || + ((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0) + return; + + int len = header >> 8; + + u16 data = CPUReadHalfWord(source); + source += 2; + CPUWriteHalfWord(dest, data); + dest += 2; + len -= 2; + + while(len >= 2) { + u16 diff = CPUReadHalfWord(source); + source += 2; + data += diff; + CPUWriteHalfWord(dest, data); + dest += 2; + len -= 2; + } +} + +/*----------------------------------------------------------------- +0x18 - Diff16bitUnFilter + Calculates the value of the assignment to ((SoundArea)sa).vchn[x].fr when playing the wave data, wa, + with the interval (MIDI KEY) mk and the fine adjustment value (halftones=256) fp. +-----------------------------------------------------------------*/ + +typedef struct { + u16 Type; //Always 0 + u16 Stat; //Loop Mode + u32 Freq; //Frequency Q10 + u32 Loop; //Loop Start + u32 Size; //Loop End/Length + s8 Data[1]; //PCM Data +} WaveData; + +u32 swi_MidiKey2Freq(WaveData* wa, u32 mk, u32 fp) +{ + if(mk > 178) + { + fp = 0xFF000000; + mk = 178; + } + + u8 scale = ScaleTable[mk]; + u32 freq = FreqTable[(scale & 0x0F)]; + u32 hn = scale / 16; + u32 temp2 = freq >> hn; + + u32 scale2 = ScaleTable[mk+1]; + u32 freq2 = FreqTable[(scale2 & 0x0F)]; + u32 hn2 = scale2 / 16; + u32 temp4 = freq2 >> hn2; + + u32 diff = temp4 - temp2; + u32 temp6 = umul3232H32(diff,fp); + + u32 wave_freq = wa->Freq; + + u32 result = umul3232H32((temp6+temp2),wave_freq); + + return result; +} + +/*----------------------------------------------------------------- +0x1C - SoundDriverMain + Main of the sound driver. + Call every 1/60 of a second. The flow of the process is to call SoundDriverVSync, which is explained later, immediately after the V-Blank interrupt. + After that, this routine is called after BG and OBJ processing is executed. +-----------------------------------------------------------------*/ +void swi_SoundDriverMain() +{ + u32 m4data = CPUReadMemory(0x03007FF0); + u32 m4dataOriginal = m4data; + u32 flag = CPUReadMemory(m4data); + + if(flag == 0x68736D53) //Special engine flag + { + CPUWriteMemory(m4data,flag+1); + + void (*functionPtr)(u32); + functionPtr = (void *)CPUReadMemory(m4data+0x20); + + //If MPlayMain exists call it? + if(functionPtr) + { + u32 param = CPUReadMemory(m4data+0x24); + + functionPtr(param); + } + + functionPtr = (void *)CPUReadMemory(m4data+0x28); + //If CgbSound exists call it? + if(functionPtr) + { + u32 param = CPUReadMemory(m4data+0x24); + + functionPtr(param); + } + + u32 adr = m4data + 0x350; + u32 freq = CPUReadMemory(m4data+0x10); + u8 unknown = CPUReadByte(m4data+0x04); + + if(unknown > 0) + { + u8 unknown2 = CPUReadByte(m4data+0x0B); + unknown2 -= unknown-1; + adr += freq * unknown2; + } + + u32 unknownConst = 0x630; + u8 unknown1 = CPUReadByte(m4data+0x05); + if(unknown1) + { + u32 temp = 0; + //misc crap from 1E20 to 1E70 + if(unknown == 2) + temp = m4data+0x350; + else + temp = adr+freq; + + int count = freq; + + do + { + s32 unknown2; + s32 unknown3; + + unknown2 = *(s8*)(adr+unknownConst); + unknown3 = *(s8*)(adr); + unknown2 += unknown3; + + unknown3 = *(s8*)(temp+unknownConst); + unknown2 += unknown3; + + unknown3 = *(s8*)(temp); + temp += 1; + unknown2 += unknown3; + + unknown2 *= unknown1; + + unknown2 >>= 9; + + //If its negative + if(unknown2 & 0x80) + { + unknown2+=1; + } + + //Store new note? + *(s8*)(adr+unknownConst) = unknown2; + *(s8*)(adr) = unknown2; + + } while (count--); + } + + //1E74 + u32 adr2 = unknownConst + adr; + + u32 count = freq>>3; + + if(count <= 0) + { + CPUWriteMemory(adr,0); + adr += 4; + CPUWriteMemory(adr2,0); + adr2 += 4; + } + + count >>= 1; + + if(count <= 0) + { + CPUWriteMemory(adr,0); + adr += 4; + CPUWriteMemory(adr2,0); + adr2 += 4; + + CPUWriteMemory(adr,0); + adr += 4; + CPUWriteMemory(adr2,0); + adr2 += 4; + } + + do + { + CPUWriteMemory(adr,0); + adr += 4; + CPUWriteMemory(adr2,0); + adr2 += 4; + + CPUWriteMemory(adr,0); + adr += 4; + CPUWriteMemory(adr2,0); + adr2 += 4; + + CPUWriteMemory(adr,0); + adr += 4; + CPUWriteMemory(adr2,0); + adr2 += 4; + + CPUWriteMemory(adr,0); + adr += 4; + CPUWriteMemory(adr2,0); + adr2 += 4; + } while(count-- > 0); + + u32 unknown4 = CPUReadMemory(m4data+0x14); + u32 unknown5 = CPUReadMemory(m4data+0x18); + u8 loopcount = CPUReadByte(m4data+0x06); + + //Why jump 0x50 forward? + m4data += 0x50; + + do + { + //00001EAE + u32 unknown7 = CPUReadMemory(m4data+0x24); + u8 unknown8 = CPUReadByte(m4data); + + //00001EBE + if(unknown8 & 0xC7) + { + if(!(unknown8 & 0x80)) + { + if(unknown8 & 0x40) + { + u8 unknown11 = 0x03; + CPUWriteByte(m4data,unknown11); + CPUWriteMemory(m4data+0x28,unknown7+0x10); + CPUWriteMemory(m4data+0x18,CPUReadMemory(unknown7+0x0C)); + CPUWriteByte(m4data+0x09,0x00); + CPUWriteMemory(m4data+0x1C,0x00); + + //00001EDE + u8 unknown9 = CPUReadByte(unknown7+0x03); + + if(unknown8 & 0xC0) + { + //00001EE6 + unknown11 = 0x13; + CPUWriteByte(m4data,unknown11); + } + //jump to 1F46 + + //00001F46 + u8 unknown10 = CPUReadByte(m4data+0x04); + + //WTF a byte is ALWAYS less than 0xFF + //0xFF might be an error code? + if(unknown10 >= 0xFF) + { + unknown10 = 0xFF; + unknown11--; + CPUWriteByte(m4data,unknown11); + } + + //00001F54 + CPUWriteByte(m4data+0x09,unknown10); + u32 unknown12 = (CPUReadByte(m4dataOriginal+0x07) + 1) * unknown11; + u32 unknown13 = unknown12 << 4; + unknown12 = unknown13 * CPUReadByte(m4data+0x02); + unknown12 >>= 8; + CPUWriteByte(m4data+0x0A,(u8)unknown12); + + //00001F68 + u32 unknown14 = (CPUReadByte(m4data+0x03) * unknown13) >> 8; + CPUWriteByte(m4data+0x0B,(u8)unknown12); + } + } + } + m4data += 0x40; //000020EA + } while (loopcount--); //000020E8 + + //000020F0 + //Reset flag? + CPUWriteMemory(m4data,0x68736D53); + } +} + +/*----------------------------------------------------------------- +0x23 - MusicPlayerContinue +-----------------------------------------------------------------*/ +void swi_MusicPlayerContinue(u32 dst) +{ +} + +/*----------------------------------------------------------------- +0x24 - MusicPlayerFadeOut +-----------------------------------------------------------------*/ +void swi_MusicPlayerFadeOut(u32 dst) +{ +} + +/*----------------------------------------------------------------- +0x2A - SoundGetJumpList + Receives pointers to 36 additional sound-related BIOS functions. +-----------------------------------------------------------------*/ +extern void swi_Invalid(); +void swi_SoundGetJumpList(u32 dst) +{ + //Dummy out the jump list by forcing all of them to return immediately + for(int i = 0; i < 0x24; i++) { + CPUWriteMemory(dst, (u32)&swi_Invalid); + dst += 4; + } +} + +/*----------------------------------------------------------------- +0x00 - SoftReset + Clears 200h bytes of RAM (containing stacks, and BIOS IRQ vector/flags), initializes system, supervisor, and irq stack pointers, + sets R0-R12, LR_svc, SPSR_svc, LR_irq, and SPSR_irq to zero, and enters system mode. + + Host sp_svc sp_irq sp_sys zerofilled area return address + GBA 3007FE0h 3007FA0h 3007F00h [3007E00h..3007FFFh] Flag[3007FFAh] + + The GBA return address 8bit flag is interpreted as 00h=8000000h (ROM), or 01h-FFh=2000000h (RAM), entered in ARM state. + Note: The reset is applied only to the CPU that has executed the SWI (ie. on the NDS, the other CPU will remain unaffected). + Return: Does not return to calling procedure, instead, loads the above return address into R14, and then jumps to that address by a "BX R14" opcode. +-----------------------------------------------------------------*/ +/*void swi_SoftReset() +{ + //Implemented in assembly +}*/ + +/*----------------------------------------------------------------- +0x01 - RegisterRamReset + Resets the I/O registers and RAM specified in ResetFlags. However, it does not clear the CPU internal RAM area from 3007E00h-3007FFFh. + + r0 ResetFlags + Bit Expl. + 0 Clear 256K on-board WRAM ;-don't use when returning to WRAM + 1 Clear 32K in-chip WRAM ;-excluding last 200h bytes + 2 Clear Palette + 3 Clear VRAM + 4 Clear OAM ;-zerofilled! does NOT disable OBJs! + 5 Reset SIO registers ;-switches to general purpose mode! + 6 Reset Sound registers + 7 Reset all other registers (except SIO, Sound) + + Return: No return value. + Bug: LSBs of SIODATA32 are always destroyed, even if Bit5 of R0 was cleared. + The function always switches the screen into forced blank by setting DISPCNT=0080h (regardless of incoming R0, screen becomes white). +-----------------------------------------------------------------*/ +#define COPY_MODE_FILL 1<<24 +void swi_RegisterRamReset(u32 flags) +{ + // no need to trace here. this is only called directly from GBA.cpp + // to emulate bios initialization + + u32 zero = 0; + CPUUpdateRegister(0x0, 0x80); + + if(flags) { + if(flags & 0x01) { + // clear work RAM + swi_CpuFastSet((u32)&zero, 0x02000000, 0x40000/4 | COPY_MODE_FILL); + } + if(flags & 0x02) { + // clear internal RAM + swi_CpuFastSet((u32)&zero, 0x03000000, 0x7e00/4 | COPY_MODE_FILL); // don't clear 0x7e00-0x7fff + } + if(flags & 0x04) { + // clear palette RAM + swi_CpuFastSet((u32)&zero, 0x05000000, 0x400/4 | COPY_MODE_FILL); + } + if(flags & 0x08) { + // clear VRAM + swi_CpuFastSet((u32)&zero, 0x06000000, 0x18000/4 | COPY_MODE_FILL); + } + if(flags & 0x10) { + // clean OAM + swi_CpuFastSet((u32)&zero, 0x07000000, 0x400/4 | COPY_MODE_FILL); + } + + if(flags & 0x80) { + int i; + for(i = 0; i < 0x10; i++) + CPUUpdateRegister(0x200+i*2, 0); + + for(i = 0; i < 0xF; i++) + CPUUpdateRegister(0x4+i*2, 0); + + for(i = 0; i < 0x20; i++) + CPUUpdateRegister(0x20+i*2, 0); + + for(i = 0; i < 0x18; i++) + CPUUpdateRegister(0xb0+i*2, 0); + + CPUUpdateRegister(0x130, 0); + CPUUpdateRegister(0x20, 0x100); + CPUUpdateRegister(0x30, 0x100); + CPUUpdateRegister(0x26, 0x100); + CPUUpdateRegister(0x36, 0x100); + } + + if(flags & 0x20) { + int i; + for(i = 0; i < 8; i++) + CPUUpdateRegister(0x110+i*2, 0); + CPUUpdateRegister(0x134, 0x8000); + for(i = 0; i < 7; i++) + CPUUpdateRegister(0x140+i*2, 0); + } + + if(flags & 0x40) { + int i; + CPUWriteByte(0x4000084, 0); + CPUWriteByte(0x4000084, 0x80); + CPUWriteMemory(0x4000080, 0x880e0000); + CPUUpdateRegister(0x88, CPUReadHalfWord(0x4000088)&0x3ff); + CPUWriteByte(0x4000070, 0x70); + for(i = 0; i < 8; i++) + CPUUpdateRegister(0x90+i*2, 0); + CPUWriteByte(0x4000070, 0); + for(i = 0; i < 8; i++) + CPUUpdateRegister(0x90+i*2, 0); + CPUWriteByte(0x4000084, 0); + } + } +}