1529 lines
47 KiB
C
1529 lines
47 KiB
C
/*
|
|
Custom GBA BIOS replacement
|
|
Copyright (c) 2002-2006 VBA Development Team
|
|
Copyright (c) 2006-2013 VBA-M Development Team
|
|
Copyright (c) 2013 Normmatt
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include <tonc.h>
|
|
|
|
#define HALTCNT (*(vu8*) 0x04000301)
|
|
|
|
const s16 sineTable[256] = {
|
|
(s16)0x0000, (s16)0x0192, (s16)0x0323, (s16)0x04B5, (s16)0x0645, (s16)0x07D5, (s16)0x0964, (s16)0x0AF1,
|
|
(s16)0x0C7C, (s16)0x0E05, (s16)0x0F8C, (s16)0x1111, (s16)0x1294, (s16)0x1413, (s16)0x158F, (s16)0x1708,
|
|
(s16)0x187D, (s16)0x19EF, (s16)0x1B5D, (s16)0x1CC6, (s16)0x1E2B, (s16)0x1F8B, (s16)0x20E7, (s16)0x223D,
|
|
(s16)0x238E, (s16)0x24DA, (s16)0x261F, (s16)0x275F, (s16)0x2899, (s16)0x29CD, (s16)0x2AFA, (s16)0x2C21,
|
|
(s16)0x2D41, (s16)0x2E5A, (s16)0x2F6B, (s16)0x3076, (s16)0x3179, (s16)0x3274, (s16)0x3367, (s16)0x3453,
|
|
(s16)0x3536, (s16)0x3612, (s16)0x36E5, (s16)0x37AF, (s16)0x3871, (s16)0x392A, (s16)0x39DA, (s16)0x3A82,
|
|
(s16)0x3B20, (s16)0x3BB6, (s16)0x3C42, (s16)0x3CC5, (s16)0x3D3E, (s16)0x3DAE, (s16)0x3E14, (s16)0x3E71,
|
|
(s16)0x3EC5, (s16)0x3F0E, (s16)0x3F4E, (s16)0x3F84, (s16)0x3FB1, (s16)0x3FD3, (s16)0x3FEC, (s16)0x3FFB,
|
|
(s16)0x4000, (s16)0x3FFB, (s16)0x3FEC, (s16)0x3FD3, (s16)0x3FB1, (s16)0x3F84, (s16)0x3F4E, (s16)0x3F0E,
|
|
(s16)0x3EC5, (s16)0x3E71, (s16)0x3E14, (s16)0x3DAE, (s16)0x3D3E, (s16)0x3CC5, (s16)0x3C42, (s16)0x3BB6,
|
|
(s16)0x3B20, (s16)0x3A82, (s16)0x39DA, (s16)0x392A, (s16)0x3871, (s16)0x37AF, (s16)0x36E5, (s16)0x3612,
|
|
(s16)0x3536, (s16)0x3453, (s16)0x3367, (s16)0x3274, (s16)0x3179, (s16)0x3076, (s16)0x2F6B, (s16)0x2E5A,
|
|
(s16)0x2D41, (s16)0x2C21, (s16)0x2AFA, (s16)0x29CD, (s16)0x2899, (s16)0x275F, (s16)0x261F, (s16)0x24DA,
|
|
(s16)0x238E, (s16)0x223D, (s16)0x20E7, (s16)0x1F8B, (s16)0x1E2B, (s16)0x1CC6, (s16)0x1B5D, (s16)0x19EF,
|
|
(s16)0x187D, (s16)0x1708, (s16)0x158F, (s16)0x1413, (s16)0x1294, (s16)0x1111, (s16)0x0F8C, (s16)0x0E05,
|
|
(s16)0x0C7C, (s16)0x0AF1, (s16)0x0964, (s16)0x07D5, (s16)0x0645, (s16)0x04B5, (s16)0x0323, (s16)0x0192,
|
|
(s16)0x0000, (s16)0xFE6E, (s16)0xFCDD, (s16)0xFB4B, (s16)0xF9BB, (s16)0xF82B, (s16)0xF69C, (s16)0xF50F,
|
|
(s16)0xF384, (s16)0xF1FB, (s16)0xF074, (s16)0xEEEF, (s16)0xED6C, (s16)0xEBED, (s16)0xEA71, (s16)0xE8F8,
|
|
(s16)0xE783, (s16)0xE611, (s16)0xE4A3, (s16)0xE33A, (s16)0xE1D5, (s16)0xE075, (s16)0xDF19, (s16)0xDDC3,
|
|
(s16)0xDC72, (s16)0xDB26, (s16)0xD9E1, (s16)0xD8A1, (s16)0xD767, (s16)0xD633, (s16)0xD506, (s16)0xD3DF,
|
|
(s16)0xD2BF, (s16)0xD1A6, (s16)0xD095, (s16)0xCF8A, (s16)0xCE87, (s16)0xCD8C, (s16)0xCC99, (s16)0xCBAD,
|
|
(s16)0xCACA, (s16)0xC9EE, (s16)0xC91B, (s16)0xC851, (s16)0xC78F, (s16)0xC6D6, (s16)0xC626, (s16)0xC57E,
|
|
(s16)0xC4E0, (s16)0xC44A, (s16)0xC3BE, (s16)0xC33B, (s16)0xC2C2, (s16)0xC252, (s16)0xC1EC, (s16)0xC18F,
|
|
(s16)0xC13B, (s16)0xC0F2, (s16)0xC0B2, (s16)0xC07C, (s16)0xC04F, (s16)0xC02D, (s16)0xC014, (s16)0xC005,
|
|
(s16)0xC000, (s16)0xC005, (s16)0xC014, (s16)0xC02D, (s16)0xC04F, (s16)0xC07C, (s16)0xC0B2, (s16)0xC0F2,
|
|
(s16)0xC13B, (s16)0xC18F, (s16)0xC1EC, (s16)0xC252, (s16)0xC2C2, (s16)0xC33B, (s16)0xC3BE, (s16)0xC44A,
|
|
(s16)0xC4E0, (s16)0xC57E, (s16)0xC626, (s16)0xC6D6, (s16)0xC78F, (s16)0xC851, (s16)0xC91B, (s16)0xC9EE,
|
|
(s16)0xCACA, (s16)0xCBAD, (s16)0xCC99, (s16)0xCD8C, (s16)0xCE87, (s16)0xCF8A, (s16)0xD095, (s16)0xD1A6,
|
|
(s16)0xD2BF, (s16)0xD3DF, (s16)0xD506, (s16)0xD633, (s16)0xD767, (s16)0xD8A1, (s16)0xD9E1, (s16)0xDB26,
|
|
(s16)0xDC72, (s16)0xDDC3, (s16)0xDF19, (s16)0xE075, (s16)0xE1D5, (s16)0xE33A, (s16)0xE4A3, (s16)0xE611,
|
|
(s16)0xE783, (s16)0xE8F8, (s16)0xEA71, (s16)0xEBED, (s16)0xED6C, (s16)0xEEEF, (s16)0xF074, (s16)0xF1FB,
|
|
(s16)0xF384, (s16)0xF50F, (s16)0xF69C, (s16)0xF82B, (s16)0xF9BB, (s16)0xFB4B, (s16)0xFCDD, (s16)0xFE6E
|
|
};
|
|
|
|
const u8 ScaleTable[180] = {
|
|
(u8)0xE0, (u8)0xE1, (u8)0xE2, (u8)0xE3, (u8)0xE4, (u8)0xE5, (u8)0xE6, (u8)0xE7, (u8)0xE8,
|
|
(u8)0xE9, (u8)0xEA, (u8)0xEB, (u8)0xD0, (u8)0xD1, (u8)0xD2, (u8)0xD3, (u8)0xD4, (u8)0xD5,
|
|
(u8)0xD6, (u8)0xD7, (u8)0xD8, (u8)0xD9, (u8)0xDA, (u8)0xDB, (u8)0xC0, (u8)0xC1, (u8)0xC2,
|
|
(u8)0xC3, (u8)0xC4, (u8)0xC5, (u8)0xC6, (u8)0xC7, (u8)0xC8, (u8)0xC9, (u8)0xCA, (u8)0xCB,
|
|
(u8)0xB0, (u8)0xB1, (u8)0xB2, (u8)0xB3, (u8)0xB4, (u8)0xB5, (u8)0xB6, (u8)0xB7, (u8)0xB8,
|
|
(u8)0xB9, (u8)0xBA, (u8)0xBB, (u8)0xA0, (u8)0xA1, (u8)0xA2, (u8)0xA3, (u8)0xA4, (u8)0xA5,
|
|
(u8)0xA6, (u8)0xA7, (u8)0xA8, (u8)0xA9, (u8)0xAA, (u8)0xAB, (u8)0x90, (u8)0x91, (u8)0x92,
|
|
(u8)0x93, (u8)0x94, (u8)0x95, (u8)0x96, (u8)0x97, (u8)0x98, (u8)0x99, (u8)0x9A, (u8)0x9B,
|
|
(u8)0x80, (u8)0x81, (u8)0x82, (u8)0x83, (u8)0x84, (u8)0x85, (u8)0x86, (u8)0x87, (u8)0x88,
|
|
(u8)0x89, (u8)0x8A, (u8)0x8B, (u8)0x70, (u8)0x71, (u8)0x72, (u8)0x73, (u8)0x74, (u8)0x75,
|
|
(u8)0x76, (u8)0x77, (u8)0x78, (u8)0x79, (u8)0x7A, (u8)0x7B, (u8)0x60, (u8)0x61, (u8)0x62,
|
|
(u8)0x63, (u8)0x64, (u8)0x65, (u8)0x66, (u8)0x67, (u8)0x68, (u8)0x69, (u8)0x6A, (u8)0x6B,
|
|
(u8)0x50, (u8)0x51, (u8)0x52, (u8)0x53, (u8)0x54, (u8)0x55, (u8)0x56, (u8)0x57, (u8)0x58,
|
|
(u8)0x59, (u8)0x5A, (u8)0x5B, (u8)0x40, (u8)0x41, (u8)0x42, (u8)0x43, (u8)0x44, (u8)0x45,
|
|
(u8)0x46, (u8)0x47, (u8)0x48, (u8)0x49, (u8)0x4A, (u8)0x4B, (u8)0x30, (u8)0x31, (u8)0x32,
|
|
(u8)0x33, (u8)0x34, (u8)0x35, (u8)0x36, (u8)0x37, (u8)0x38, (u8)0x39, (u8)0x3A, (u8)0x3B,
|
|
(u8)0x20, (u8)0x21, (u8)0x22, (u8)0x23, (u8)0x24, (u8)0x25, (u8)0x26, (u8)0x27, (u8)0x28,
|
|
(u8)0x29, (u8)0x2A, (u8)0x2B, (u8)0x10, (u8)0x11, (u8)0x12, (u8)0x13, (u8)0x14, (u8)0x15,
|
|
(u8)0x16, (u8)0x17, (u8)0x18, (u8)0x19, (u8)0x1A, (u8)0x1B, (u8)0x00, (u8)0x01, (u8)0x02,
|
|
(u8)0x03, (u8)0x04, (u8)0x05, (u8)0x06, (u8)0x07, (u8)0x08, (u8)0x09, (u8)0xA, (u8)0xB
|
|
};
|
|
|
|
const u32 FreqTable[12] = {
|
|
(u32)0x80000000,
|
|
(u32)0x879C7C97,
|
|
(u32)0x8FACD61E,
|
|
(u32)0x9837F052,
|
|
(u32)0xA14517CC,
|
|
(u32)0xAADC0848,
|
|
(u32)0xB504F334,
|
|
(u32)0xBFC886BB,
|
|
(u32)0xCB2FF52A,
|
|
(u32)0xD744FCCB,
|
|
(u32)0xE411F03A,
|
|
(u32)0xF1A1BF39
|
|
};
|
|
|
|
/*-----------------------------------------------------------------
|
|
Absolute Value from http://stackoverflow.com/questions/16499475/arm-assembly-absolute-value-function-are-two-or-three-lines-faster
|
|
-----------------------------------------------------------------*/
|
|
s32 abs(s32 x){
|
|
s32 signext = (x >= 0) ? 0 : -1; //This can be done with an ASR instruction
|
|
return (x + signext) ^ signext;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
Square Root Method from http://www.finesse.demon.co.uk/steven/sqrt.html
|
|
-----------------------------------------------------------------*/
|
|
#define iter1(N) \
|
|
try = root + (1 << (N)); \
|
|
if (n >= try << (N)) \
|
|
{ n -= try << (N); \
|
|
root |= 2 << (N); \
|
|
}
|
|
|
|
u32 sqrt_ (u32 n)
|
|
{
|
|
u32 root = 0, try;
|
|
iter1 (15); iter1 (14); iter1 (13); iter1 (12);
|
|
iter1 (11); iter1 (10); iter1 ( 9); iter1 ( 8);
|
|
iter1 ( 7); iter1 ( 6); iter1 ( 5); iter1 ( 4);
|
|
iter1 ( 3); iter1 ( 2); iter1 ( 1); iter1 ( 0);
|
|
return root >> 1;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
Wrapper methods for VBA-M Methods
|
|
-----------------------------------------------------------------*/
|
|
static inline u16 CPUReadByte(u32 adr)
|
|
{
|
|
return *(u8*)adr;
|
|
}
|
|
static inline u16 CPUReadHalfWord(u32 adr)
|
|
{
|
|
return *(u16*)adr;
|
|
}
|
|
static inline u32 CPUReadMemory(u32 adr)
|
|
{
|
|
return *(u32*)adr;
|
|
}
|
|
static inline void CPUWriteMemory(u32 adr, u32 val)
|
|
{
|
|
*(u32*)adr = val;
|
|
}
|
|
static inline void CPUWriteHalfWord(u32 adr, u16 val)
|
|
{
|
|
*(u16*)adr = val;
|
|
}
|
|
static inline void CPUWriteByte(u32 adr, u8 val)
|
|
{
|
|
*(u8*)adr = val;
|
|
}
|
|
static inline void CPUUpdateRegister(u32 adr, u16 val)
|
|
{
|
|
*(u16*)(0x04000000+adr) = val;
|
|
}
|
|
|
|
u32 umul3232H32(u32 val, u32 val2);
|
|
|
|
/*-----------------------------------------------------------------
|
|
Assembly method declarations
|
|
-----------------------------------------------------------------*/
|
|
extern int swi_Div(u32 a, u32 b); //Returns result
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x27 - CustomHalt
|
|
Writes the 8bit parameter value to HALTCNT, below values are equivalent to Halt
|
|
and Stop/Sleep functions, other values reserved, purpose unknown.
|
|
8bit parameter (00h=Halt, 80h=Stop)
|
|
-----------------------------------------------------------------*/
|
|
void swi_CustomHalt(u8 val) {
|
|
HALTCNT = val;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x02 - Halt
|
|
Halts the CPU until an interrupt request occurs. The CPU is switched into low-power mode,
|
|
all other circuits (video, sound, timers, serial, keypad, system clock) are kept operating.
|
|
Halt mode is terminated when any enabled interrupts are requested, that is when (IE AND IF)
|
|
is not zero, the GBA locks up if that condition doesn't get true.
|
|
However, the state of CPUs IRQ disable bit in CPSR register, and the IME register are
|
|
don't care, Halt passes through even if either one has disabled interrupts.
|
|
-----------------------------------------------------------------*/
|
|
void swi_Halt() {
|
|
swi_CustomHalt(0);
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x03 - Stop
|
|
Switches the GBA into very low power mode (to be used similar as a screen-saver).
|
|
The CPU, System Clock, Sound, Video, SIO-Shift Clock, DMAs, and Timers are stopped.
|
|
Stop state can be terminated by the following interrupts only
|
|
(as far as enabled in IE register): Joypad, Game Pak, or General-Purpose-SIO.
|
|
|
|
"The system clock is stopped so the IF flag is not set."
|
|
Preparation for Stop:
|
|
Disable Video before implementing Stop (otherwise Video just freezes, but still keeps consuming battery power).
|
|
Possibly required to disable Sound also? Obviously, it'd be also recommended to disable any external
|
|
hardware (such like Rumble or Infra-Red) as far as possible.
|
|
-----------------------------------------------------------------*/
|
|
void swi_Stop() {
|
|
swi_CustomHalt(0x80);
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
Used by the IntrWait functions
|
|
-----------------------------------------------------------------*/
|
|
bool CheckInterrupts(u32 waitFlags)
|
|
{
|
|
REG_IME = 0; //Disable interrupts
|
|
u16 intFlags = *(vu16*)(0x04000000-8); //Get current flags
|
|
u16 flags = intFlags & waitFlags;
|
|
if(flags)
|
|
{
|
|
intFlags = (flags) ^ intFlags;
|
|
*(vu16*)(0x04000000-8) = intFlags;
|
|
}
|
|
REG_IME = 1; //Enable interrupts
|
|
|
|
return flags;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x04 - IntrWait
|
|
Continues to wait in Halt state until one (or more) of the specified interrupt(s) do occur.
|
|
The function forcefully sets IME=1. When using multiple interrupts at the same time,
|
|
this function is having less overhead than repeatedly calling the Halt function
|
|
-----------------------------------------------------------------*/
|
|
void swi_IntrWait(bool discard, u32 waitFlags)
|
|
{
|
|
if(discard)
|
|
{
|
|
CheckInterrupts(waitFlags);
|
|
}
|
|
|
|
u32 val = 0;
|
|
do
|
|
{
|
|
HALTCNT = 0;
|
|
val = CheckInterrupts(waitFlags);
|
|
}
|
|
while(!val);
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x05 - VBlankIntrWait
|
|
Continues to wait in Halt state until one (or more) of the specified interrupt(s) do occur.
|
|
The function forcefully sets IME=1. When using multiple interrupts at the same time,
|
|
this function is having less overhead than repeatedly calling the Halt function
|
|
-----------------------------------------------------------------*/
|
|
void swi_VBlankIntrWait()
|
|
{
|
|
swi_IntrWait(true,1);
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x06 - Div
|
|
Signed Division, r0/r1.
|
|
|
|
r0 signed 32bit Number
|
|
r1 signed 32bit Denom
|
|
|
|
Return:
|
|
|
|
r0 Number DIV Denom ;signed
|
|
r1 Number MOD Denom ;signed
|
|
r3 ABS (Number DIV Denom) ;unsigned
|
|
|
|
For example, incoming -1234, 10 should return -123, -4, +123.
|
|
The function usually gets caught in an endless loop upon division by zero.
|
|
-----------------------------------------------------------------*/
|
|
/*u32 swi_Div(s32 num, s32 denom)
|
|
{
|
|
//Implemented in assembly
|
|
}*/
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x07 - DivArm
|
|
Same as above (SWI 06h Div), but incoming parameters are exchanged, r1/r0 (r0=Denom, r1=number).
|
|
For compatibility with ARM's library. Slightly slower (3 clock cycles) than SWI 06h.
|
|
-----------------------------------------------------------------*/
|
|
/*u32 swi_DivArm(s32 num, s32 denom)
|
|
{
|
|
//Implemented in assembly
|
|
}*/
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x08 - Sqrt
|
|
Calculate square root.
|
|
-----------------------------------------------------------------*/
|
|
u32 swi_Sqrt(u32 input)
|
|
{
|
|
return sqrt_(input);
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x09 - ArcTan
|
|
Calculates the arc tangent.
|
|
-----------------------------------------------------------------*/
|
|
u32 swi_ArcTan(u32 input)
|
|
{
|
|
s32 a = -(((s32)(input*input)) >> 14);
|
|
s32 b = ((0xA9 * a) >> 14) + 0x390;
|
|
b = ((b * a) >> 14) + 0x91C;
|
|
b = ((b * a) >> 14) + 0xFB6;
|
|
b = ((b * a) >> 14) + 0x16AA;
|
|
b = ((b * a) >> 14) + 0x2081;
|
|
b = ((b * a) >> 14) + 0x3651;
|
|
b = ((b * a) >> 14) + 0xA2F9;
|
|
a = ((s32)input * b) >> 16;
|
|
return (u32)a;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x0A - ArcTan2
|
|
Calculates the arc tangent after correction processing.
|
|
-----------------------------------------------------------------*/
|
|
u32 swi_ArcTan2(s32 x, s32 y)
|
|
{
|
|
u32 res = 0;
|
|
if (y == 0) {
|
|
res = ((x>>16) & 0x8000);
|
|
} else {
|
|
if (x == 0) {
|
|
res = ((y>>16) & 0x8000) + 0x4000;
|
|
} else {
|
|
if ((abs(x) > abs(y)) || ((abs(x) == abs(y)) && (!((x<0) && (y<0))))) {
|
|
u32 div = swi_Div(y << 14, (u32)x);
|
|
div = swi_ArcTan(div);
|
|
if (x < 0)
|
|
res = 0x8000 + div;
|
|
else
|
|
res = (((y>>16) & 0x8000)<<1) + div;
|
|
} else {
|
|
u32 div = swi_Div(x << 14, (u32)y);
|
|
div = swi_ArcTan(div);
|
|
res = (0x4000 + ((y>>16) & 0x8000)) - div;
|
|
}
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x0B - CpuSet
|
|
Memory copy/fill in units of 4 bytes or 2 bytes.
|
|
Memcopy is implemented as repeated LDMIA/STMIA [Rb]!,r3 or LDRH/STRH r3,[r0,r5] instructions.
|
|
Memfill as single LDMIA or LDRH followed by repeated STMIA [Rb]!,r3 or STRH r3,[r0,r5].
|
|
The length must be a multiple of 4 bytes (32bit mode) or 2 bytes (16bit mode).
|
|
The (half)wordcount in r2 must be length/4 (32bit mode) or length/2 (16bit mode),
|
|
ie. length in word/halfword units rather than byte units.
|
|
-----------------------------------------------------------------*/
|
|
void swi_CpuSet(u32 source, u32 dest, u32 cnt)
|
|
{
|
|
if(((source & 0xe000000) == 0) ||
|
|
((source + (((cnt << 11)>>9) & 0x1fffff)) & 0xe000000) == 0)
|
|
return;
|
|
|
|
int count = cnt & 0x1FFFFF;
|
|
|
|
// 32-bit ?
|
|
if((cnt >> 26) & 1) {
|
|
// needed for 32-bit mode!
|
|
source &= 0xFFFFFFFC;
|
|
dest &= 0xFFFFFFFC;
|
|
// fill ?
|
|
if((cnt >> 24) & 1) {
|
|
u32 value = (source>0x0EFFFFFF ? 0x1CAD1CAD : CPUReadMemory(source));
|
|
while(count) {
|
|
CPUWriteMemory(dest, value);
|
|
dest += 4;
|
|
count--;
|
|
}
|
|
} else {
|
|
// copy
|
|
while(count) {
|
|
CPUWriteMemory(dest, (source>0x0EFFFFFF ? 0x1CAD1CAD : CPUReadMemory(source)));
|
|
dest += 4;
|
|
source += 4;
|
|
count--;
|
|
}
|
|
}
|
|
} else {
|
|
// 16-bit fill?
|
|
if((cnt >> 24) & 1) {
|
|
u16 value = (source>0x0EFFFFFF ? 0x1CAD : CPUReadHalfWord(source));
|
|
while(count) {
|
|
CPUWriteHalfWord(dest, value);
|
|
dest += 2;
|
|
count--;
|
|
}
|
|
} else {
|
|
// copy
|
|
while(count) {
|
|
CPUWriteHalfWord(dest, (source>0x0EFFFFFF ? 0x1CAD : CPUReadHalfWord(source)));
|
|
dest += 2;
|
|
source += 2;
|
|
count--;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x0C - CpuFastSet
|
|
Memory copy/fill in units of 32 bytes. Memcopy is implemented as repeated LDMIA/STMIA [Rb]!,r2-r9 instructions.
|
|
Memfill as single LDR followed by repeated STMIA [Rb]!,r2-r9.
|
|
After processing all 32-byte-blocks, the NDS additonally processes the remaining words as 4-byte blocks.
|
|
|
|
The length is specifed as wordcount, ie. the number of bytes divided by 4.
|
|
On the GBA, the length must be a multiple of 8 words (32 bytes).
|
|
-----------------------------------------------------------------*/
|
|
void swi_CpuFastSet(u32 source, u32 dest, u32 cnt)
|
|
{
|
|
if((((u32)source & 0xe000000) == 0) ||
|
|
(((u32)source + (((cnt << 11)>>9) & 0x1fffff)) & 0xe000000) == 0)
|
|
return;
|
|
|
|
// needed for 32-bit mode!
|
|
source &= 0xFFFFFFFC;
|
|
dest &= 0xFFFFFFFC;
|
|
|
|
int count = cnt & 0x1FFFFF;
|
|
|
|
// fill?
|
|
if((cnt >> 24) & 1) {
|
|
u32 value = (source>0x0EFFFFFF ? 0xBAFFFFFB : CPUReadMemory(source));
|
|
while(count > 0) {
|
|
// BIOS always transfers 32 bytes at a time
|
|
for(int i = 0; i < 8; i++) {
|
|
CPUWriteMemory(dest, value);
|
|
dest += 4;
|
|
}
|
|
count -= 8;
|
|
}
|
|
} else {
|
|
// copy
|
|
while(count > 0) {
|
|
// BIOS always transfers 32 bytes at a time
|
|
for(int i = 0; i < 8; i++) {
|
|
CPUWriteMemory(dest, (source>0x0EFFFFFF ? 0xBAFFFFFB :CPUReadMemory(source)));
|
|
source += 4;
|
|
dest += 4;
|
|
}
|
|
count -= 8;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x0D - GetBiosChecksum
|
|
Calculates the checksum of the BIOS ROM (by reading in 32bit units, and adding up these values).
|
|
IRQ and FIQ are disabled during execution.
|
|
The checksum is BAAE187Fh (GBA and GBA SP), or BAAE1880h (DS in GBA mode, whereas the only difference
|
|
is that the byte at [3F0Ch] is changed from 00h to 01h, otherwise the BIOS is 1:1 same as GBA BIOS,
|
|
it does even include multiboot code).
|
|
-----------------------------------------------------------------*/
|
|
int swi_GetBiosChecksum()
|
|
{
|
|
//TODO: Actually checksum this bios
|
|
//TODO: Make this bios checksum the same as official bios
|
|
return 0xBAAE187F;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x0E - BgAffineSet
|
|
Used to calculate BG Rotation/Scaling parameters.
|
|
-----------------------------------------------------------------*/
|
|
void swi_BgAffineSet(u32 src, u32 dest, u32 num)
|
|
{
|
|
while(num--) {
|
|
s32 cx = CPUReadMemory(src);
|
|
src+=4;
|
|
s32 cy = CPUReadMemory(src);
|
|
src+=4;
|
|
s16 dispx = CPUReadHalfWord(src);
|
|
src+=2;
|
|
s16 dispy = CPUReadHalfWord(src);
|
|
src+=2;
|
|
s16 rx = CPUReadHalfWord(src);
|
|
src+=2;
|
|
s16 ry = CPUReadHalfWord(src);
|
|
src+=2;
|
|
u16 theta = CPUReadHalfWord(src)>>8;
|
|
src+=4; // keep structure alignment
|
|
s32 a = sineTable[(theta+0x40)&255];
|
|
s32 b = sineTable[theta];
|
|
|
|
s16 dx = (rx * a)>>14;
|
|
s16 dmx = (rx * b)>>14;
|
|
s16 dy = (ry * b)>>14;
|
|
s16 dmy = (ry * a)>>14;
|
|
|
|
CPUWriteHalfWord(dest, dx);
|
|
dest += 2;
|
|
CPUWriteHalfWord(dest, -dmx);
|
|
dest += 2;
|
|
CPUWriteHalfWord(dest, dy);
|
|
dest += 2;
|
|
CPUWriteHalfWord(dest, dmy);
|
|
dest += 2;
|
|
|
|
s32 startx = cx - dx * dispx + dmx * dispy;
|
|
s32 starty = cy - dy * dispx - dmy * dispy;
|
|
|
|
CPUWriteMemory(dest, startx);
|
|
dest += 4;
|
|
CPUWriteMemory(dest, starty);
|
|
dest += 4;
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x0F - ObjAffineSet
|
|
Calculates and sets the OBJ's affine parameters from the scaling ratio and angle of rotation.
|
|
The affine parameters are calculated from the parameters set in Srcp.
|
|
The four affine parameters are set every Offset bytes, starting from the Destp address.
|
|
If the Offset value is 2, the parameters are stored contiguously. If the value is 8, they match the structure of OAM.
|
|
When Srcp is arrayed, the calculation can be performed continuously by specifying Num.
|
|
-----------------------------------------------------------------*/
|
|
void swi_ObjAffineSet(u32 src, u32 dest, int num, int offset)
|
|
{
|
|
while(num--) {
|
|
s16 rx = CPUReadHalfWord(src);
|
|
src+=2;
|
|
s16 ry = CPUReadHalfWord(src);
|
|
src+=2;
|
|
u16 theta = CPUReadHalfWord(src)>>8;
|
|
src+=4; // keep structure alignment
|
|
|
|
s32 a = (s32)sineTable[(theta+0x40)&255];
|
|
s32 b = (s32)sineTable[theta];
|
|
|
|
s16 dx = ((s32)rx * a)>>14;
|
|
s16 dmx = ((s32)rx * b)>>14;
|
|
s16 dy = ((s32)ry * b)>>14;
|
|
s16 dmy = ((s32)ry * a)>>14;
|
|
|
|
CPUWriteHalfWord(dest, dx);
|
|
dest += offset;
|
|
CPUWriteHalfWord(dest, -dmx);
|
|
dest += offset;
|
|
CPUWriteHalfWord(dest, dy);
|
|
dest += offset;
|
|
CPUWriteHalfWord(dest, dmy);
|
|
dest += offset;
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x10 - BitUnPack
|
|
Used to increase the color depth of bitmaps or tile data. For example, to convert a 1bit
|
|
monochrome font into 4bit or 8bit GBA tiles.
|
|
The Unpack Info is specified separately, allowing to convert the same source data into different formats.
|
|
-----------------------------------------------------------------*/
|
|
void swi_BitUnPack(u32 source, u32 dest, u32 header)
|
|
{
|
|
int len = CPUReadHalfWord(header);
|
|
// check address
|
|
if(((source & 0xe000000) == 0) ||
|
|
((source + len) & 0xe000000) == 0)
|
|
return;
|
|
|
|
int bits = CPUReadByte(header+2);
|
|
int revbits = 8 - bits;
|
|
// u32 value = 0;
|
|
u32 base = CPUReadMemory(header+4);
|
|
bool addBase = (base & 0x80000000) ? true : false;
|
|
base &= 0x7fffffff;
|
|
int dataSize = CPUReadByte(header+3);
|
|
|
|
int data = 0;
|
|
int bitwritecount = 0;
|
|
while(1) {
|
|
len -= 1;
|
|
if(len < 0)
|
|
break;
|
|
int mask = 0xff >> revbits;
|
|
u8 b = CPUReadByte(source);
|
|
source++;
|
|
int bitcount = 0;
|
|
while(1) {
|
|
if(bitcount >= 8)
|
|
break;
|
|
u32 d = b & mask;
|
|
u32 temp = d >> bitcount;
|
|
if(d || addBase) {
|
|
temp += base;
|
|
}
|
|
data |= temp << bitwritecount;
|
|
bitwritecount += dataSize;
|
|
if(bitwritecount >= 32) {
|
|
CPUWriteMemory(dest, data);
|
|
dest += 4;
|
|
data = 0;
|
|
bitwritecount = 0;
|
|
}
|
|
mask <<= bits;
|
|
bitcount += bits;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x11 - LZ77UnCompWram
|
|
Expands LZ77-compressed data. The Wram function is faster, and writes in units of 8bits.
|
|
For the Vram function the destination must be halfword aligned, data is written in units of 16bits.
|
|
If the size of the compressed data is not a multiple of 4, please adjust it as much as possible
|
|
by padding with 0.
|
|
|
|
Align the source address to a 4-Byte boundary.
|
|
-----------------------------------------------------------------*/
|
|
void swi_LZ77UnCompWram(u32 source, u32 dest)
|
|
{
|
|
u32 header = CPUReadMemory(source);
|
|
source += 4;
|
|
|
|
if(((source & 0xe000000) == 0) ||
|
|
((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0)
|
|
return;
|
|
|
|
int len = header >> 8;
|
|
|
|
while(len > 0) {
|
|
u8 d = CPUReadByte(source++);
|
|
|
|
if(d) {
|
|
for(int i = 0; i < 8; i++) {
|
|
if(d & 0x80) {
|
|
u16 data = CPUReadByte(source++) << 8;
|
|
data |= CPUReadByte(source++);
|
|
int length = (data >> 12) + 3;
|
|
int offset = (data & 0x0FFF);
|
|
u32 windowOffset = dest - offset - 1;
|
|
for(int i2 = 0; i2 < length; i2++) {
|
|
CPUWriteByte(dest++, CPUReadByte(windowOffset++));
|
|
len--;
|
|
if(len == 0)
|
|
return;
|
|
}
|
|
} else {
|
|
CPUWriteByte(dest++, CPUReadByte(source++));
|
|
len--;
|
|
if(len == 0)
|
|
return;
|
|
}
|
|
d <<= 1;
|
|
}
|
|
} else {
|
|
for(int i = 0; i < 8; i++) {
|
|
CPUWriteByte(dest++, CPUReadByte(source++));
|
|
len--;
|
|
if(len == 0)
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x12 - LZ77UnCompVram
|
|
Expands LZ77-compressed data. The Wram function is faster, and writes in units of 8bits.
|
|
For the Vram function the destination must be halfword aligned, data is written in units of 16bits.
|
|
If the size of the compressed data is not a multiple of 4, please adjust it as much as possible
|
|
by padding with 0.
|
|
|
|
Align the source address to a 4-Byte boundary.
|
|
-----------------------------------------------------------------*/
|
|
void swi_LZ77UnCompVram_(u32 source, u32 dest, int checkBios)
|
|
{
|
|
u32 header = CPUReadMemory(source);
|
|
source += 4;
|
|
|
|
if(checkBios && (((source & 0xe000000) == 0) ||
|
|
((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0))
|
|
return;
|
|
|
|
int byteCount = 0;
|
|
int byteShift = 0;
|
|
u32 writeValue = 0;
|
|
|
|
int len = header >> 8;
|
|
|
|
while(len > 0) {
|
|
u8 d = CPUReadByte(source++);
|
|
|
|
if(d) {
|
|
for(int i = 0; i < 8; i++) {
|
|
if(d & 0x80) {
|
|
u16 data = CPUReadByte(source++) << 8;
|
|
data |= CPUReadByte(source++);
|
|
int length = (data >> 12) + 3;
|
|
int offset = (data & 0x0FFF);
|
|
u32 windowOffset = dest + byteCount - offset - 1;
|
|
for(int i2 = 0; i2 < length; i2++) {
|
|
writeValue |= (CPUReadByte(windowOffset++) << byteShift);
|
|
byteShift += 8;
|
|
byteCount++;
|
|
|
|
if(byteCount == 2) {
|
|
CPUWriteHalfWord(dest, writeValue);
|
|
dest += 2;
|
|
byteCount = 0;
|
|
byteShift = 0;
|
|
writeValue = 0;
|
|
}
|
|
len--;
|
|
if(len == 0)
|
|
return;
|
|
}
|
|
} else {
|
|
writeValue |= (CPUReadByte(source++) << byteShift);
|
|
byteShift += 8;
|
|
byteCount++;
|
|
if(byteCount == 2) {
|
|
CPUWriteHalfWord(dest, writeValue);
|
|
dest += 2;
|
|
byteCount = 0;
|
|
byteShift = 0;
|
|
writeValue = 0;
|
|
}
|
|
len--;
|
|
if(len == 0)
|
|
return;
|
|
}
|
|
d <<= 1;
|
|
}
|
|
} else {
|
|
for(int i = 0; i < 8; i++) {
|
|
writeValue |= (CPUReadByte(source++) << byteShift);
|
|
byteShift += 8;
|
|
byteCount++;
|
|
if(byteCount == 2) {
|
|
CPUWriteHalfWord(dest, writeValue);
|
|
dest += 2;
|
|
byteShift = 0;
|
|
byteCount = 0;
|
|
writeValue = 0;
|
|
}
|
|
len--;
|
|
if(len == 0)
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void swi_LZ77UnCompVram(u32 source, u32 dest)
|
|
{
|
|
swi_LZ77UnCompVram_(source,dest,1);
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x13 - HuffUnComp
|
|
The decoder starts in root node, the separate bits in the bitstream specify if the next node is node0 or node1,
|
|
if that node is a data node, then the data is stored in memory, and the decoder is reset to the root node.
|
|
The most often used data should be as close to the root node as possible. For example, the 4-byte string "Huff"
|
|
could be compressed to 6 bits: 10-11-0-0, with root.0 pointing directly to data "f", and root.1 pointing to a
|
|
child node, whose nodes point to data "H" and data "u".
|
|
|
|
Data is written in units of 32bits, if the size of the compressed data is not a multiple of 4,
|
|
please adjust it as much as possible by padding with 0.
|
|
Align the source address to a 4Byte boundary.
|
|
-----------------------------------------------------------------*/
|
|
void swi_HuffUnComp(u32 source, u32 dest)
|
|
{
|
|
u32 header = CPUReadMemory(source);
|
|
source += 4;
|
|
|
|
if(((source & 0xe000000) == 0) ||
|
|
((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0)
|
|
return;
|
|
|
|
u8 treeSize = CPUReadByte(source++);
|
|
|
|
u32 treeStart = source;
|
|
|
|
source += ((treeSize+1)<<1)-1; // minus because we already skipped one byte
|
|
|
|
int len = header >> 8;
|
|
|
|
u32 mask = 0x80000000;
|
|
u32 data = CPUReadMemory(source);
|
|
source += 4;
|
|
|
|
int pos = 0;
|
|
u8 rootNode = CPUReadByte(treeStart);
|
|
u8 currentNode = rootNode;
|
|
bool writeData = false;
|
|
int byteShift = 0;
|
|
int byteCount = 0;
|
|
u32 writeValue = 0;
|
|
|
|
if((header & 0x0F) == 8) {
|
|
while(len > 0) {
|
|
// take left
|
|
if(pos == 0)
|
|
pos++;
|
|
else
|
|
pos += (((currentNode & 0x3F)+1)<<1);
|
|
|
|
if(data & mask) {
|
|
// right
|
|
if(currentNode & 0x40)
|
|
writeData = true;
|
|
currentNode = CPUReadByte(treeStart+pos+1);
|
|
} else {
|
|
// left
|
|
if(currentNode & 0x80)
|
|
writeData = true;
|
|
currentNode = CPUReadByte(treeStart+pos);
|
|
}
|
|
|
|
if(writeData) {
|
|
writeValue |= (currentNode << byteShift);
|
|
byteCount++;
|
|
byteShift += 8;
|
|
|
|
pos = 0;
|
|
currentNode = rootNode;
|
|
writeData = false;
|
|
|
|
if(byteCount == 4) {
|
|
byteCount = 0;
|
|
byteShift = 0;
|
|
CPUWriteMemory(dest, writeValue);
|
|
writeValue = 0;
|
|
dest += 4;
|
|
len -= 4;
|
|
}
|
|
}
|
|
mask >>= 1;
|
|
if(mask == 0) {
|
|
mask = 0x80000000;
|
|
data = CPUReadMemory(source);
|
|
source += 4;
|
|
}
|
|
}
|
|
} else {
|
|
int halfLen = 0;
|
|
int value = 0;
|
|
while(len > 0) {
|
|
// take left
|
|
if(pos == 0)
|
|
pos++;
|
|
else
|
|
pos += (((currentNode & 0x3F)+1)<<1);
|
|
|
|
if((data & mask)) {
|
|
// right
|
|
if(currentNode & 0x40)
|
|
writeData = true;
|
|
currentNode = CPUReadByte(treeStart+pos+1);
|
|
} else {
|
|
// left
|
|
if(currentNode & 0x80)
|
|
writeData = true;
|
|
currentNode = CPUReadByte(treeStart+pos);
|
|
}
|
|
|
|
if(writeData) {
|
|
if(halfLen == 0)
|
|
value |= currentNode;
|
|
else
|
|
value |= (currentNode<<4);
|
|
|
|
halfLen += 4;
|
|
if(halfLen == 8) {
|
|
writeValue |= (value << byteShift);
|
|
byteCount++;
|
|
byteShift += 8;
|
|
|
|
halfLen = 0;
|
|
value = 0;
|
|
|
|
if(byteCount == 4) {
|
|
byteCount = 0;
|
|
byteShift = 0;
|
|
CPUWriteMemory(dest, writeValue);
|
|
dest += 4;
|
|
writeValue = 0;
|
|
len -= 4;
|
|
}
|
|
}
|
|
pos = 0;
|
|
currentNode = rootNode;
|
|
writeData = false;
|
|
}
|
|
mask >>= 1;
|
|
if(mask == 0) {
|
|
mask = 0x80000000;
|
|
data = CPUReadMemory(source);
|
|
source += 4;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x14 - RLUnCompWram
|
|
Expands run-length compressed data. The Wram function is faster, and writes in units of 8bits.
|
|
For the Vram function the destination must be halfword aligned, data is written in units of 16bits.
|
|
If the size of the compressed data is not a multiple of 4, please adjust it as much as possible by padding with 0.
|
|
Align the source address to a 4Byte boundary.
|
|
-----------------------------------------------------------------*/
|
|
void swi_RLUnCompWram(u32 source, u32 dest)
|
|
{
|
|
u32 header = CPUReadMemory(source);
|
|
source += 4;
|
|
|
|
if(((source & 0xe000000) == 0) ||
|
|
((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0)
|
|
return;
|
|
|
|
int len = header >> 8;
|
|
|
|
while(len > 0) {
|
|
u8 d = CPUReadByte(source++);
|
|
int l = d & 0x7F;
|
|
if(d & 0x80) {
|
|
u8 data = CPUReadByte(source++);
|
|
l += 3;
|
|
for(int i = 0;i < l; i++) {
|
|
CPUWriteByte(dest++, data);
|
|
len--;
|
|
if(len == 0)
|
|
return;
|
|
}
|
|
} else {
|
|
l++;
|
|
for(int i = 0; i < l; i++) {
|
|
CPUWriteByte(dest++, CPUReadByte(source++));
|
|
len--;
|
|
if(len == 0)
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x15 - RLUnCompVram
|
|
Expands run-length compressed data. The Wram function is faster, and writes in units of 8bits.
|
|
For the Vram function the destination must be halfword aligned, data is written in units of 16bits.
|
|
If the size of the compressed data is not a multiple of 4, please adjust it as much as possible by padding with 0.
|
|
Align the source address to a 4Byte boundary.
|
|
-----------------------------------------------------------------*/
|
|
void swi_RLUnCompVram(u32 source, u32 dest)
|
|
{
|
|
u32 header = CPUReadMemory(source & 0xFFFFFFFC);
|
|
source += 4;
|
|
|
|
if(((source & 0xe000000) == 0) ||
|
|
((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0)
|
|
return;
|
|
|
|
int len = header >> 8;
|
|
int byteCount = 0;
|
|
int byteShift = 0;
|
|
u32 writeValue = 0;
|
|
|
|
while(len > 0) {
|
|
u8 d = CPUReadByte(source++);
|
|
int l = d & 0x7F;
|
|
if(d & 0x80) {
|
|
u8 data = CPUReadByte(source++);
|
|
l += 3;
|
|
for(int i = 0;i < l; i++) {
|
|
writeValue |= (data << byteShift);
|
|
byteShift += 8;
|
|
byteCount++;
|
|
|
|
if(byteCount == 2) {
|
|
CPUWriteHalfWord(dest, writeValue);
|
|
dest += 2;
|
|
byteCount = 0;
|
|
byteShift = 0;
|
|
writeValue = 0;
|
|
}
|
|
len--;
|
|
if(len == 0)
|
|
return;
|
|
}
|
|
} else {
|
|
l++;
|
|
for(int i = 0; i < l; i++) {
|
|
writeValue |= (CPUReadByte(source++) << byteShift);
|
|
byteShift += 8;
|
|
byteCount++;
|
|
if(byteCount == 2) {
|
|
CPUWriteHalfWord(dest, writeValue);
|
|
dest += 2;
|
|
byteCount = 0;
|
|
byteShift = 0;
|
|
writeValue = 0;
|
|
}
|
|
len--;
|
|
if(len == 0)
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x16 - Diff8bitUnFilterWram
|
|
These aren't actually real decompression functions, destination data will have exactly the same size as source data.
|
|
However, assume a bitmap or wave form to contain a stream of increasing numbers such like 10..19, the filtered/unfiltered data would be:
|
|
|
|
unfiltered: 10 11 12 13 14 15 16 17 18 19
|
|
filtered: 10 +1 +1 +1 +1 +1 +1 +1 +1 +1
|
|
|
|
In this case using filtered data (combined with actual compression algorithms) will obviously produce better compression results.
|
|
Data units may be either 8bit or 16bit used with Diff8bit or Diff16bit functions respectively.
|
|
The 8bitVram function allows to write to VRAM directly (which uses 16bit data bus) by writing two 8bit values at once,
|
|
the downside is that it is eventually slower as the 8bitWram function.
|
|
-----------------------------------------------------------------*/
|
|
void swi_Diff8bitUnFilterWram(u32 source, u32 dest)
|
|
{
|
|
u32 header = CPUReadMemory(source);
|
|
source += 4;
|
|
|
|
if(((source & 0xe000000) == 0) ||
|
|
(((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0))
|
|
return;
|
|
|
|
int len = header >> 8;
|
|
|
|
u8 data = CPUReadByte(source++);
|
|
CPUWriteByte(dest++, data);
|
|
len--;
|
|
|
|
while(len > 0) {
|
|
u8 diff = CPUReadByte(source++);
|
|
data += diff;
|
|
CPUWriteByte(dest++, data);
|
|
len--;
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x17 - Diff8bitUnFilterVram
|
|
These aren't actually real decompression functions, destination data will have exactly the same size as source data.
|
|
However, assume a bitmap or wave form to contain a stream of increasing numbers such like 10..19, the filtered/unfiltered data would be:
|
|
|
|
unfiltered: 10 11 12 13 14 15 16 17 18 19
|
|
filtered: 10 +1 +1 +1 +1 +1 +1 +1 +1 +1
|
|
|
|
In this case using filtered data (combined with actual compression algorithms) will obviously produce better compression results.
|
|
Data units may be either 8bit or 16bit used with Diff8bit or Diff16bit functions respectively.
|
|
The 8bitVram function allows to write to VRAM directly (which uses 16bit data bus) by writing two 8bit values at once,
|
|
the downside is that it is eventually slower as the 8bitWram function.
|
|
-----------------------------------------------------------------*/
|
|
void swi_Diff8bitUnFilterVram(u32 source, u32 dest)
|
|
{
|
|
u32 header = CPUReadMemory(source);
|
|
source += 4;
|
|
|
|
if(((source & 0xe000000) == 0) ||
|
|
((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0)
|
|
return;
|
|
|
|
int len = header >> 8;
|
|
|
|
u8 data = CPUReadByte(source++);
|
|
u16 writeData = data;
|
|
int shift = 8;
|
|
int bytes = 1;
|
|
|
|
while(len >= 2) {
|
|
u8 diff = CPUReadByte(source++);
|
|
data += diff;
|
|
writeData |= (data << shift);
|
|
bytes++;
|
|
shift += 8;
|
|
if(bytes == 2) {
|
|
CPUWriteHalfWord(dest, writeData);
|
|
dest += 2;
|
|
len -= 2;
|
|
bytes = 0;
|
|
writeData = 0;
|
|
shift = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x18 - Diff16bitUnFilter
|
|
These aren't actually real decompression functions, destination data will have exactly the same size as source data.
|
|
However, assume a bitmap or wave form to contain a stream of increasing numbers such like 10..19, the filtered/unfiltered data would be:
|
|
|
|
unfiltered: 10 11 12 13 14 15 16 17 18 19
|
|
filtered: 10 +1 +1 +1 +1 +1 +1 +1 +1 +1
|
|
|
|
In this case using filtered data (combined with actual compression algorithms) will obviously produce better compression results.
|
|
Data units may be either 8bit or 16bit used with Diff8bit or Diff16bit functions respectively.
|
|
The 8bitVram function allows to write to VRAM directly (which uses 16bit data bus) by writing two 8bit values at once,
|
|
the downside is that it is eventually slower as the 8bitWram function.
|
|
-----------------------------------------------------------------*/
|
|
void swi_Diff16bitUnFilter(u32 source, u32 dest)
|
|
{
|
|
u32 header = CPUReadMemory(source);
|
|
source += 4;
|
|
|
|
if(((source & 0xe000000) == 0) ||
|
|
((source + ((header >> 8) & 0x1fffff)) & 0xe000000) == 0)
|
|
return;
|
|
|
|
int len = header >> 8;
|
|
|
|
u16 data = CPUReadHalfWord(source);
|
|
source += 2;
|
|
CPUWriteHalfWord(dest, data);
|
|
dest += 2;
|
|
len -= 2;
|
|
|
|
while(len >= 2) {
|
|
u16 diff = CPUReadHalfWord(source);
|
|
source += 2;
|
|
data += diff;
|
|
CPUWriteHalfWord(dest, data);
|
|
dest += 2;
|
|
len -= 2;
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x18 - Diff16bitUnFilter
|
|
Calculates the value of the assignment to ((SoundArea)sa).vchn[x].fr when playing the wave data, wa,
|
|
with the interval (MIDI KEY) mk and the fine adjustment value (halftones=256) fp.
|
|
-----------------------------------------------------------------*/
|
|
|
|
typedef struct {
|
|
u16 Type; //Always 0
|
|
u16 Stat; //Loop Mode
|
|
u32 Freq; //Frequency Q10
|
|
u32 Loop; //Loop Start
|
|
u32 Size; //Loop End/Length
|
|
s8 Data[1]; //PCM Data
|
|
} WaveData;
|
|
|
|
u32 swi_MidiKey2Freq(WaveData* wa, u32 mk, u32 fp)
|
|
{
|
|
if(mk > 178)
|
|
{
|
|
fp = 0xFF000000;
|
|
mk = 178;
|
|
}
|
|
|
|
u8 scale = ScaleTable[mk];
|
|
u32 freq = FreqTable[(scale & 0x0F)];
|
|
u32 hn = scale / 16;
|
|
u32 temp2 = freq >> hn;
|
|
|
|
u32 scale2 = ScaleTable[mk+1];
|
|
u32 freq2 = FreqTable[(scale2 & 0x0F)];
|
|
u32 hn2 = scale2 / 16;
|
|
u32 temp4 = freq2 >> hn2;
|
|
|
|
u32 diff = temp4 - temp2;
|
|
u32 temp6 = umul3232H32(diff,fp);
|
|
|
|
u32 wave_freq = wa->Freq;
|
|
|
|
u32 result = umul3232H32((temp6+temp2),wave_freq);
|
|
|
|
return result;
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x1C - SoundDriverMain
|
|
Main of the sound driver.
|
|
Call every 1/60 of a second. The flow of the process is to call SoundDriverVSync, which is explained later, immediately after the V-Blank interrupt.
|
|
After that, this routine is called after BG and OBJ processing is executed.
|
|
-----------------------------------------------------------------*/
|
|
void swi_SoundDriverMain()
|
|
{
|
|
u32 m4data = CPUReadMemory(0x03007FF0);
|
|
u32 m4dataOriginal = m4data;
|
|
u32 flag = CPUReadMemory(m4data);
|
|
|
|
if(flag == 0x68736D53) //Special engine flag
|
|
{
|
|
CPUWriteMemory(m4data,flag+1);
|
|
|
|
void (*functionPtr)(u32);
|
|
functionPtr = (void *)CPUReadMemory(m4data+0x20);
|
|
|
|
//If MPlayMain exists call it?
|
|
if(functionPtr)
|
|
{
|
|
u32 param = CPUReadMemory(m4data+0x24);
|
|
|
|
functionPtr(param);
|
|
}
|
|
|
|
functionPtr = (void *)CPUReadMemory(m4data+0x28);
|
|
//If CgbSound exists call it?
|
|
if(functionPtr)
|
|
{
|
|
u32 param = CPUReadMemory(m4data+0x24);
|
|
|
|
functionPtr(param);
|
|
}
|
|
|
|
u32 adr = m4data + 0x350;
|
|
u32 freq = CPUReadMemory(m4data+0x10);
|
|
u8 unknown = CPUReadByte(m4data+0x04);
|
|
|
|
if(unknown > 0)
|
|
{
|
|
u8 unknown2 = CPUReadByte(m4data+0x0B);
|
|
unknown2 -= unknown-1;
|
|
adr += freq * unknown2;
|
|
}
|
|
|
|
u32 unknownConst = 0x630;
|
|
u8 unknown1 = CPUReadByte(m4data+0x05);
|
|
if(unknown1)
|
|
{
|
|
u32 temp = 0;
|
|
//misc crap from 1E20 to 1E70
|
|
if(unknown == 2)
|
|
temp = m4data+0x350;
|
|
else
|
|
temp = adr+freq;
|
|
|
|
int count = freq;
|
|
|
|
do
|
|
{
|
|
s32 unknown2;
|
|
s32 unknown3;
|
|
|
|
unknown2 = *(s8*)(adr+unknownConst);
|
|
unknown3 = *(s8*)(adr);
|
|
unknown2 += unknown3;
|
|
|
|
unknown3 = *(s8*)(temp+unknownConst);
|
|
unknown2 += unknown3;
|
|
|
|
unknown3 = *(s8*)(temp);
|
|
temp += 1;
|
|
unknown2 += unknown3;
|
|
|
|
unknown2 *= unknown1;
|
|
|
|
unknown2 >>= 9;
|
|
|
|
//If its negative
|
|
if(unknown2 & 0x80)
|
|
{
|
|
unknown2+=1;
|
|
}
|
|
|
|
//Store new note?
|
|
*(s8*)(adr+unknownConst) = unknown2;
|
|
*(s8*)(adr) = unknown2;
|
|
|
|
} while (count--);
|
|
}
|
|
|
|
//1E74
|
|
u32 adr2 = unknownConst + adr;
|
|
|
|
u32 count = freq>>3;
|
|
|
|
if(count <= 0)
|
|
{
|
|
CPUWriteMemory(adr,0);
|
|
adr += 4;
|
|
CPUWriteMemory(adr2,0);
|
|
adr2 += 4;
|
|
}
|
|
|
|
count >>= 1;
|
|
|
|
if(count <= 0)
|
|
{
|
|
CPUWriteMemory(adr,0);
|
|
adr += 4;
|
|
CPUWriteMemory(adr2,0);
|
|
adr2 += 4;
|
|
|
|
CPUWriteMemory(adr,0);
|
|
adr += 4;
|
|
CPUWriteMemory(adr2,0);
|
|
adr2 += 4;
|
|
}
|
|
|
|
do
|
|
{
|
|
CPUWriteMemory(adr,0);
|
|
adr += 4;
|
|
CPUWriteMemory(adr2,0);
|
|
adr2 += 4;
|
|
|
|
CPUWriteMemory(adr,0);
|
|
adr += 4;
|
|
CPUWriteMemory(adr2,0);
|
|
adr2 += 4;
|
|
|
|
CPUWriteMemory(adr,0);
|
|
adr += 4;
|
|
CPUWriteMemory(adr2,0);
|
|
adr2 += 4;
|
|
|
|
CPUWriteMemory(adr,0);
|
|
adr += 4;
|
|
CPUWriteMemory(adr2,0);
|
|
adr2 += 4;
|
|
} while(count-- > 0);
|
|
|
|
u32 unknown4 = CPUReadMemory(m4data+0x14);
|
|
u32 unknown5 = CPUReadMemory(m4data+0x18);
|
|
u8 loopcount = CPUReadByte(m4data+0x06);
|
|
|
|
//Why jump 0x50 forward?
|
|
m4data += 0x50;
|
|
|
|
do
|
|
{
|
|
//00001EAE
|
|
u32 unknown7 = CPUReadMemory(m4data+0x24);
|
|
u8 unknown8 = CPUReadByte(m4data);
|
|
|
|
//00001EBE
|
|
if(unknown8 & 0xC7)
|
|
{
|
|
if(!(unknown8 & 0x80))
|
|
{
|
|
if(unknown8 & 0x40)
|
|
{
|
|
u8 unknown11 = 0x03;
|
|
CPUWriteByte(m4data,unknown11);
|
|
CPUWriteMemory(m4data+0x28,unknown7+0x10);
|
|
CPUWriteMemory(m4data+0x18,CPUReadMemory(unknown7+0x0C));
|
|
CPUWriteByte(m4data+0x09,0x00);
|
|
CPUWriteMemory(m4data+0x1C,0x00);
|
|
|
|
//00001EDE
|
|
u8 unknown9 = CPUReadByte(unknown7+0x03);
|
|
|
|
if(unknown8 & 0xC0)
|
|
{
|
|
//00001EE6
|
|
unknown11 = 0x13;
|
|
CPUWriteByte(m4data,unknown11);
|
|
}
|
|
//jump to 1F46
|
|
|
|
//00001F46
|
|
u8 unknown10 = CPUReadByte(m4data+0x04);
|
|
|
|
//WTF a byte is ALWAYS less than 0xFF
|
|
//0xFF might be an error code?
|
|
if(unknown10 >= 0xFF)
|
|
{
|
|
unknown10 = 0xFF;
|
|
unknown11--;
|
|
CPUWriteByte(m4data,unknown11);
|
|
}
|
|
|
|
//00001F54
|
|
CPUWriteByte(m4data+0x09,unknown10);
|
|
u32 unknown12 = (CPUReadByte(m4dataOriginal+0x07) + 1) * unknown11;
|
|
u32 unknown13 = unknown12 << 4;
|
|
unknown12 = unknown13 * CPUReadByte(m4data+0x02);
|
|
unknown12 >>= 8;
|
|
CPUWriteByte(m4data+0x0A,(u8)unknown12);
|
|
|
|
//00001F68
|
|
u32 unknown14 = (CPUReadByte(m4data+0x03) * unknown13) >> 8;
|
|
CPUWriteByte(m4data+0x0B,(u8)unknown12);
|
|
}
|
|
}
|
|
}
|
|
m4data += 0x40; //000020EA
|
|
} while (loopcount--); //000020E8
|
|
|
|
//000020F0
|
|
//Reset flag?
|
|
CPUWriteMemory(m4data,0x68736D53);
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x23 - MusicPlayerContinue
|
|
-----------------------------------------------------------------*/
|
|
void swi_MusicPlayerContinue(u32 dst)
|
|
{
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x24 - MusicPlayerFadeOut
|
|
-----------------------------------------------------------------*/
|
|
void swi_MusicPlayerFadeOut(u32 dst)
|
|
{
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x2A - SoundGetJumpList
|
|
Receives pointers to 36 additional sound-related BIOS functions.
|
|
-----------------------------------------------------------------*/
|
|
extern void swi_Invalid();
|
|
void swi_SoundGetJumpList(u32 dst)
|
|
{
|
|
//Dummy out the jump list by forcing all of them to return immediately
|
|
for(int i = 0; i < 0x24; i++) {
|
|
CPUWriteMemory(dst, (u32)&swi_Invalid);
|
|
dst += 4;
|
|
}
|
|
}
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x00 - SoftReset
|
|
Clears 200h bytes of RAM (containing stacks, and BIOS IRQ vector/flags), initializes system, supervisor, and irq stack pointers,
|
|
sets R0-R12, LR_svc, SPSR_svc, LR_irq, and SPSR_irq to zero, and enters system mode.
|
|
|
|
Host sp_svc sp_irq sp_sys zerofilled area return address
|
|
GBA 3007FE0h 3007FA0h 3007F00h [3007E00h..3007FFFh] Flag[3007FFAh]
|
|
|
|
The GBA return address 8bit flag is interpreted as 00h=8000000h (ROM), or 01h-FFh=2000000h (RAM), entered in ARM state.
|
|
Note: The reset is applied only to the CPU that has executed the SWI (ie. on the NDS, the other CPU will remain unaffected).
|
|
Return: Does not return to calling procedure, instead, loads the above return address into R14, and then jumps to that address by a "BX R14" opcode.
|
|
-----------------------------------------------------------------*/
|
|
/*void swi_SoftReset()
|
|
{
|
|
//Implemented in assembly
|
|
}*/
|
|
|
|
/*-----------------------------------------------------------------
|
|
0x01 - RegisterRamReset
|
|
Resets the I/O registers and RAM specified in ResetFlags. However, it does not clear the CPU internal RAM area from 3007E00h-3007FFFh.
|
|
|
|
r0 ResetFlags
|
|
Bit Expl.
|
|
0 Clear 256K on-board WRAM ;-don't use when returning to WRAM
|
|
1 Clear 32K in-chip WRAM ;-excluding last 200h bytes
|
|
2 Clear Palette
|
|
3 Clear VRAM
|
|
4 Clear OAM ;-zerofilled! does NOT disable OBJs!
|
|
5 Reset SIO registers ;-switches to general purpose mode!
|
|
6 Reset Sound registers
|
|
7 Reset all other registers (except SIO, Sound)
|
|
|
|
Return: No return value.
|
|
Bug: LSBs of SIODATA32 are always destroyed, even if Bit5 of R0 was cleared.
|
|
The function always switches the screen into forced blank by setting DISPCNT=0080h (regardless of incoming R0, screen becomes white).
|
|
-----------------------------------------------------------------*/
|
|
#define COPY_MODE_FILL 1<<24
|
|
void swi_RegisterRamReset(u32 flags)
|
|
{
|
|
// no need to trace here. this is only called directly from GBA.cpp
|
|
// to emulate bios initialization
|
|
|
|
u32 zero = 0;
|
|
CPUUpdateRegister(0x0, 0x80);
|
|
|
|
if(flags) {
|
|
if(flags & 0x01) {
|
|
// clear work RAM
|
|
swi_CpuFastSet((u32)&zero, 0x02000000, 0x40000/4 | COPY_MODE_FILL);
|
|
}
|
|
if(flags & 0x02) {
|
|
// clear internal RAM
|
|
swi_CpuFastSet((u32)&zero, 0x03000000, 0x7e00/4 | COPY_MODE_FILL); // don't clear 0x7e00-0x7fff
|
|
}
|
|
if(flags & 0x04) {
|
|
// clear palette RAM
|
|
swi_CpuFastSet((u32)&zero, 0x05000000, 0x400/4 | COPY_MODE_FILL);
|
|
}
|
|
if(flags & 0x08) {
|
|
// clear VRAM
|
|
swi_CpuFastSet((u32)&zero, 0x06000000, 0x18000/4 | COPY_MODE_FILL);
|
|
}
|
|
if(flags & 0x10) {
|
|
// clean OAM
|
|
swi_CpuFastSet((u32)&zero, 0x07000000, 0x400/4 | COPY_MODE_FILL);
|
|
}
|
|
|
|
if(flags & 0x80) {
|
|
int i;
|
|
for(i = 0; i < 0x10; i++)
|
|
CPUUpdateRegister(0x200+i*2, 0);
|
|
|
|
for(i = 0; i < 0xF; i++)
|
|
CPUUpdateRegister(0x4+i*2, 0);
|
|
|
|
for(i = 0; i < 0x20; i++)
|
|
CPUUpdateRegister(0x20+i*2, 0);
|
|
|
|
for(i = 0; i < 0x18; i++)
|
|
CPUUpdateRegister(0xb0+i*2, 0);
|
|
|
|
CPUUpdateRegister(0x130, 0);
|
|
CPUUpdateRegister(0x20, 0x100);
|
|
CPUUpdateRegister(0x30, 0x100);
|
|
CPUUpdateRegister(0x26, 0x100);
|
|
CPUUpdateRegister(0x36, 0x100);
|
|
}
|
|
|
|
if(flags & 0x20) {
|
|
int i;
|
|
for(i = 0; i < 8; i++)
|
|
CPUUpdateRegister(0x110+i*2, 0);
|
|
CPUUpdateRegister(0x134, 0x8000);
|
|
for(i = 0; i < 7; i++)
|
|
CPUUpdateRegister(0x140+i*2, 0);
|
|
}
|
|
|
|
if(flags & 0x40) {
|
|
int i;
|
|
CPUWriteByte(0x4000084, 0);
|
|
CPUWriteByte(0x4000084, 0x80);
|
|
CPUWriteMemory(0x4000080, 0x880e0000);
|
|
CPUUpdateRegister(0x88, CPUReadHalfWord(0x4000088)&0x3ff);
|
|
CPUWriteByte(0x4000070, 0x70);
|
|
for(i = 0; i < 8; i++)
|
|
CPUUpdateRegister(0x90+i*2, 0);
|
|
CPUWriteByte(0x4000070, 0);
|
|
for(i = 0; i < 8; i++)
|
|
CPUUpdateRegister(0x90+i*2, 0);
|
|
CPUWriteByte(0x4000084, 0);
|
|
}
|
|
}
|
|
}
|