link dolphin ar, ai, & mtx. Finish Dolphin SDK

This commit is contained in:
Cuyler36
2025-06-18 07:43:31 -04:00
parent 2893c0b33d
commit 8a2a1eb41a
11 changed files with 1810 additions and 32 deletions
+348
View File
@@ -0,0 +1,348 @@
#include <dolphin/ai.h>
#include <dolphin/gx.h>
#include <dolphin/hw_regs.h>
#include <dolphin/os.h>
#include <macros.h>
#include "gx/__gx.h"
static AISCallback __AIS_Callback;
static AIDCallback __AID_Callback;
static u8* __CallbackStack;
static u8* __OldStack;
static BOOL __AI_init_flag;
static OSTime bound_32KHz;
static OSTime bound_48KHz;
static OSTime min_wait;
static OSTime max_wait;
static OSTime buffer;
struct STRUCT_TIMELOG {
OSTime t_start;
OSTime t1;
OSTime t2;
OSTime t3;
OSTime t4;
OSTime t_end;
};
#if DEBUG
struct STRUCT_TIMELOG profile;
#endif
static void __AI_set_stream_sample_rate(u32 rate);
static void __AIDHandler(__OSInterrupt interrupt, OSContext* context);
static void __AISHandler(__OSInterrupt interrupt, OSContext* context);
static void __AICallbackStackSwitch(void* cb);
static void __AI_SRC_INIT(void);
AIDCallback AIRegisterDMACallback(AIDCallback callback)
{
AIDCallback old_callback;
BOOL old;
old_callback = __AID_Callback;
old = OSDisableInterrupts();
__AID_Callback = callback;
OSRestoreInterrupts(old);
return old_callback;
}
void AIInitDMA(u32 start_addr, u32 length)
{
BOOL old;
old = OSDisableInterrupts();
__DSPRegs[24] = (__DSPRegs[24] & 0xFFFFFC00) | (start_addr >> 16);
__DSPRegs[25] = (__DSPRegs[25] & 0xFFFF001F) | (start_addr & 0xFFFF);
ASSERTMSGLINE(0x12E, (length & 0x1F) == 0,
"AIStartDMA: length must be multiple of 32 bytes");
__DSPRegs[27] = (__DSPRegs[27] & 0xFFFF8000) | ((length >> 5) & 0xFFFF);
OSRestoreInterrupts(old);
}
void AIStartDMA(void) { __DSPRegs[27] = __DSPRegs[27] | 0x8000; }
u32 AIGetStreamSampleCount(void) {
return __AIRegs[2];
}
void AIResetStreamSampleCount(void)
{
__AIRegs[0] = (__AIRegs[0] & ~0x20) | 0x20;
}
inline void AISetStreamTrigger(u32 trigger) { __AIRegs[3] = trigger; }
u32 AIGetStreamTrigger(void) {
return __AIRegs[3];
}
void AISetStreamPlayState(u32 state)
{
BOOL old;
u8 vol_left;
u8 vol_right;
if (state != AIGetStreamPlayState()) {
if (AIGetStreamSampleRate() == 0 && state == AI_STREAM_START) {
vol_left = AIGetStreamVolRight();
vol_right = AIGetStreamVolLeft();
AISetStreamVolRight(0);
AISetStreamVolLeft(0);
old = OSDisableInterrupts();
__AI_SRC_INIT();
SET_REG_FIELD(0, __AIRegs[0], 1, 5, 1);
SET_REG_FIELD(0, __AIRegs[0], 1, 0, AI_STREAM_START);
OSRestoreInterrupts(old);
AISetStreamVolLeft(vol_left);
AISetStreamVolRight(vol_right);
return;
}
SET_REG_FIELD(0x27F, __AIRegs[0], 1, 0, state);
}
}
u32 AIGetStreamPlayState(void) { return __AIRegs[0] & 1; }
void AISetDSPSampleRate(u32 rate)
{
BOOL old;
u32 play_state;
u32 afr_state;
u8 vol_left;
u8 vol_right;
if (rate != AIGetDSPSampleRate()) {
__AIRegs[0] = (__AIRegs[0] & 0xFFFFFFBF);
if (rate == AI_SAMPLERATE_32KHZ) {
vol_left = AIGetStreamVolLeft();
vol_right = AIGetStreamVolRight();
play_state = AIGetStreamPlayState();
afr_state = AIGetStreamSampleRate();
AISetStreamVolLeft(0U);
AISetStreamVolRight(0U);
old = OSDisableInterrupts();
__AI_SRC_INIT();
SET_REG_FIELD(0x2D8, __AIRegs[0], 1, 5, 1);
SET_REG_FIELD(0x2D9, __AIRegs[0], 1, 1, afr_state);
SET_REG_FIELD(0x2DA, __AIRegs[0], 1, 0, play_state);
__AIRegs[0] |= 0x40;
OSRestoreInterrupts(old);
AISetStreamVolLeft(vol_left);
AISetStreamVolRight(vol_right);
}
}
}
u32 AIGetDSPSampleRate(void) { return GET_REG_FIELD(__AIRegs[0], 1, 6) ^ 1; }
void AISetStreamSampleRate(u32 rate)
{
if (rate == AI_SAMPLERATE_48KHZ) {
__AI_set_stream_sample_rate(rate);
return;
}
#if DEBUG
OSReport("AISetStreamSampleRate(): OBSOLETED. Only 48KHz streaming from "
"disk is supported!\n");
#endif
}
static void __AI_set_stream_sample_rate(u32 rate)
{
BOOL old;
u32 play_state;
u8 vol_left;
u8 vol_right;
u32 dsp_src_state;
if (rate != AIGetStreamSampleRate()) {
play_state = AIGetStreamPlayState();
vol_left = AIGetStreamVolLeft();
vol_right = AIGetStreamVolRight();
AISetStreamVolRight(0);
AISetStreamVolLeft(0);
dsp_src_state = __AIRegs[0] & 0x40;
SET_REG_FIELD(0, __AIRegs[0], 1, 6, 0);
old = OSDisableInterrupts();
__AI_SRC_INIT();
__AIRegs[0] |= dsp_src_state;
SET_REG_FIELD(0x368, __AIRegs[0], 1, 5, 1);
SET_REG_FIELD(0x369, __AIRegs[0], 1, 1, rate);
OSRestoreInterrupts(old);
AISetStreamPlayState(play_state);
AISetStreamVolLeft(vol_left);
AISetStreamVolRight(vol_right);
}
}
u32 AIGetStreamSampleRate(void) { return GET_REG_FIELD(__AIRegs[0], 1, 1); }
void AISetStreamVolLeft(u8 vol)
{
SET_REG_FIELD(0x3A3, __AIRegs[1], 8, 0, vol);
}
u8 AIGetStreamVolLeft(void) { return GET_REG_FIELD(__AIRegs[1], 8, 0); }
void AISetStreamVolRight(u8 vol)
{
SET_REG_FIELD(0x3CC, __AIRegs[1], 8, 8, vol);
}
u8 AIGetStreamVolRight(void) { return (__AIRegs[1] & (0xFF << 8)) >> 8; }
void AIInit(u8* stack)
{
if (__AI_init_flag != TRUE) {
bound_32KHz = OSNanosecondsToTicks(31524);
bound_48KHz = OSNanosecondsToTicks(42024);
min_wait = OSNanosecondsToTicks(42000);
max_wait = OSNanosecondsToTicks(63000);
buffer = OSNanosecondsToTicks(3000);
AISetStreamVolRight(0);
AISetStreamVolLeft(0);
AISetStreamTrigger(0);
AIResetStreamSampleCount();
__AI_set_stream_sample_rate(AI_SAMPLERATE_48KHZ);
AISetDSPSampleRate(AI_SAMPLERATE_32KHZ);
#if DEBUG
OSReport("AIInit(): DSP is 32KHz\n");
#endif
__AIS_Callback = NULL;
__AID_Callback = NULL;
__CallbackStack = stack;
if (stack) {
ASSERTMSGLINE(0x444, ((u32)stack & 7) != 0,
"AIInit: stack must be 8-byte aligned");
}
__OSSetInterruptHandler(5, __AIDHandler);
__OSUnmaskInterrupts(0x04000000);
__OSSetInterruptHandler(8, __AISHandler);
__OSUnmaskInterrupts(0x800000);
__AI_init_flag = TRUE;
}
}
static void __AISHandler(__OSInterrupt interrupt, OSContext* context)
{
OSContext exceptionContext;
__AIRegs[0] |= 8;
OSClearContext(&exceptionContext);
OSSetCurrentContext(&exceptionContext);
if (__AIS_Callback) {
__AIS_Callback(__AIRegs[2]);
}
OSClearContext(&exceptionContext);
OSSetCurrentContext(context);
}
static void __AIDHandler(__OSInterrupt interrupt, OSContext* context)
{
OSContext exceptionContext;
u16 tmp;
tmp = __DSPRegs[5];
tmp = (tmp & ~0xA0) | 8;
__DSPRegs[5] = tmp;
OSClearContext(&exceptionContext);
OSSetCurrentContext(&exceptionContext);
if (__AID_Callback) {
if (__CallbackStack) {
__AICallbackStackSwitch(__AID_Callback);
} else {
__AID_Callback();
}
}
OSClearContext(&exceptionContext);
OSSetCurrentContext(context);
}
static asm void __AICallbackStackSwitch(register void* cb)
{
#ifdef __MWERKS__ // clang-format off
nofralloc
mflr r0
stw r0, 0x4(r1)
stwu r1, -0x18(r1)
stw r31, 0x14(r1)
mr r31, r3
lis r5, __OldStack@ha
addi r5, r5, __OldStack@l
stw r1, 0x0(r5)
lis r5, __CallbackStack@ha
addi r5, r5, __CallbackStack@l
lwz r1, 0x0(r5)
subi r1, r1, 0x8
mtlr r31
blrl
lis r5, __OldStack@ha
addi r5, r5, __OldStack@l
lwz r1, 0x0(r5)
lwz r0, 0x1c(r1)
lwz r31, 0x14(r1)
addi r1, r1, 0x18
mtlr r0
blr
#endif // clang-format on
}
void __AI_SRC_INIT(void)
{
OSTime rising_32khz = 0;
OSTime rising_48khz = 0;
OSTime diff = 0;
OSTime t1 = 0;
OSTime temp;
u32 temp0;
u32 temp1;
u32 done = 0;
u32 volume = 0;
u32 Init_Cnt = 0;
u32 walking = 0;
walking = 0;
Init_Cnt = 0;
temp = 0;
#if DEBUG
profile.t_start = OSGetTime();
#endif
while (!done) {
SET_REG_FIELD(0, __AIRegs[0], 1, 5, 1);
SET_REG_FIELD(0, __AIRegs[0], 1, 1, 0);
SET_REG_FIELD(0, __AIRegs[0], 1, 0, AI_STREAM_START);
temp0 = __AIRegs[2];
while (temp0 == __AIRegs[2]) { }
rising_32khz = OSGetTime();
SET_REG_FIELD(0, __AIRegs[0], 1, 1, 1);
SET_REG_FIELD(0, __AIRegs[0], 1, 0, AI_STREAM_START);
temp1 = __AIRegs[2];
while (temp1 == __AIRegs[2]) { }
rising_48khz = OSGetTime();
diff = rising_48khz - rising_32khz;
SET_REG_FIELD(0, __AIRegs[0], 1, 1, 0);
SET_REG_FIELD(0, __AIRegs[0], 1, 0, AI_STREAM_STOP);
if (diff < bound_32KHz - buffer) {
temp = min_wait;
done = 1;
Init_Cnt++;
} else if (diff >= bound_32KHz + buffer
&& diff < bound_48KHz - buffer) {
temp = max_wait;
done = 1;
Init_Cnt++;
} else {
done = 0;
walking = 1;
Init_Cnt++;
}
}
while (rising_48khz + temp > OSGetTime()) { }
#if DEBUG
profile.t_end = OSGetTime();
#endif
}
+13
View File
@@ -0,0 +1,13 @@
#ifndef _DOLPHIN_AR_INTERNAL_H_
#define _DOLPHIN_AR_INTERNAL_H_
#include <dolphin/ar.h>
void __ARQPopTaskQueueHi(void);
void __ARQServiceQueueLo(void);
void __ARQCallbackHack(u32 pointerToARQRequest);
void __ARQInterruptServiceRoutine(void);
void __ARQInitTempQueue(void);
void __ARQPushTempQueue(struct ARQRequest* task);
#endif // _DOLPHIN_AR_INTERNAL_H_
+314
View File
@@ -0,0 +1,314 @@
#include <dolphin/ar.h>
// #include "fake_tgmath.h"
#include <dolphin.h>
#include <dolphin/base/PPCArch.h>
#include <string.h>
#include <macros.h>
#include "ar/__ar.h"
static void (*__AR_Callback)();
static u32 __AR_Size;
static u32 __AR_InternalSize;
static u32 __AR_ExpansionSize;
static u32 __AR_StackPointer;
static u32 __AR_FreeBlocks;
static u32* __AR_BlockLength;
static int __AR_init_flag;
// functions
static void __ARHandler(__OSInterrupt exception, struct OSContext* context);
static void __ARWaitForDMA(void);
static void __ARWriteDMA(u32 mmem_addr, u32 aram_addr, u32 length);
static void __ARReadDMA(u32 mmem_addr, u32 aram_addr, u32 length);
static void __ARChecksize(void);
ARQCallback ARRegisterDMACallback(ARQCallback callback)
{
ARQCallback old_callback;
int old;
old_callback = __AR_Callback;
old = OSDisableInterrupts();
__AR_Callback = callback;
OSRestoreInterrupts(old);
return old_callback;
}
void ARStartDMA(u32 type, u32 mainmem_addr, u32 aram_addr, u32 length)
{
int old;
old = OSDisableInterrupts();
__DSPRegs[DSP_ARAM_DMA_MM_HI]
= (__DSPRegs[DSP_ARAM_DMA_MM_HI] & 0xFFFFFC00 | (mainmem_addr >> 0x10));
__DSPRegs[DSP_ARAM_DMA_MM_LO]
= (__DSPRegs[DSP_ARAM_DMA_MM_LO] & 0xFFFF001F | ((u16)mainmem_addr));
__DSPRegs[DSP_ARAM_DMA_ARAM_HI]
= (__DSPRegs[DSP_ARAM_DMA_ARAM_HI] & 0xFFFFFC00 | (aram_addr >> 0x10));
__DSPRegs[DSP_ARAM_DMA_ARAM_LO]
= (__DSPRegs[DSP_ARAM_DMA_ARAM_LO] & 0xFFFF001F | ((u16)aram_addr));
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
= __DSPRegs[DSP_ARAM_DMA_SIZE_HI] & ~0x8000 | ((type << 0xF) & ~0x7FFF);
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
= (__DSPRegs[DSP_ARAM_DMA_SIZE_HI] & 0xFFFFFC00) | (length >> 0x10);
__DSPRegs[DSP_ARAM_DMA_SIZE_LO]
= (__DSPRegs[DSP_ARAM_DMA_SIZE_LO] & 0xFFFF001F)
| (length & 0x0000FFFF);
OSRestoreInterrupts(old);
}
u32 ARAlloc(u32 length)
{
u32 tmp;
int old;
old = OSDisableInterrupts();
ASSERTMSGLINE(0x17E, !(length & 0x1F),
"ARAlloc(): length is not multiple of 32bytes!");
ASSERTMSGLINE(0x182, length <= (__AR_Size - __AR_StackPointer),
"ARAlloc(): Out of ARAM!");
ASSERTMSGLINE(0x183, __AR_FreeBlocks, "ARAlloc(): No more free blocks!");
tmp = __AR_StackPointer;
__AR_StackPointer += length;
*__AR_BlockLength = length;
__AR_BlockLength += 1;
__AR_FreeBlocks -= 1;
OSRestoreInterrupts(old);
return tmp;
}
u32 ARInit(u32* stack_index_addr, u32 num_entries)
{
BOOL enabled;
if (__AR_init_flag == 1) {
return 0x4000;
}
enabled = OSDisableInterrupts();
__AR_Callback = NULL;
__OSSetInterruptHandler(6, __ARHandler);
__OSUnmaskInterrupts(0x02000000);
__AR_StackPointer = 0x4000;
__AR_FreeBlocks = num_entries;
__AR_BlockLength = stack_index_addr;
// WHY?
__DSPRegs[DSP_ARAM_REFRESH] = __DSPRegs[DSP_ARAM_REFRESH] & 0xff
| __DSPRegs[DSP_ARAM_REFRESH] & ~0xff;
__ARChecksize();
__AR_init_flag = 1;
OSRestoreInterrupts(enabled);
return __AR_StackPointer;
}
u32 ARGetBaseAddress(void) { return 0x4000; }
u32 ARGetSize(void) { return __AR_Size; }
static void __ARHandler(__OSInterrupt exception, struct OSContext* context)
{
struct OSContext exceptionContext;
u16 tmp;
tmp = __DSPRegs[DSP_CONTROL_STATUS];
tmp = (tmp & ~0x88) | 0x20;
__DSPRegs[DSP_CONTROL_STATUS] = (tmp);
OSClearContext(&exceptionContext);
OSSetCurrentContext(&exceptionContext);
if (__AR_Callback) {
__AR_Callback();
}
OSClearContext(&exceptionContext);
OSSetCurrentContext(context);
}
static void __ARClearInterrupt(void)
{
u16 tmp;
tmp = __DSPRegs[DSP_CONTROL_STATUS];
tmp = tmp & ~0x88 | 0x20;
__DSPRegs[DSP_CONTROL_STATUS] = tmp;
}
static void __ARWaitForDMA(void)
{
while (__DSPRegs[DSP_CONTROL_STATUS] & 0x200)
;
}
static void __ARWriteDMA(u32 mmem_addr, u32 aram_addr, u32 length)
{
// Main mem address
__DSPRegs[DSP_ARAM_DMA_MM_HI]
= (u16)((__DSPRegs[DSP_ARAM_DMA_MM_HI] & ~0x03ff)
| (u16)(mmem_addr >> 16));
__DSPRegs[DSP_ARAM_DMA_MM_LO]
= (u16)((__DSPRegs[DSP_ARAM_DMA_MM_LO] & ~0xffe0)
| (u16)(mmem_addr & 0xffff));
// ARAM address
__DSPRegs[DSP_ARAM_DMA_ARAM_HI]
= (u16)((__DSPRegs[DSP_ARAM_DMA_ARAM_HI] & ~0x03ff)
| (u16)(aram_addr >> 16));
__DSPRegs[DSP_ARAM_DMA_ARAM_LO]
= (u16)((__DSPRegs[DSP_ARAM_DMA_ARAM_LO] & ~0xffe0)
| (u16)(aram_addr & 0xffff));
// DMA buffer size
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
= (u16)(__DSPRegs[DSP_ARAM_DMA_SIZE_HI] & ~0x8000);
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
= (u16)((__DSPRegs[DSP_ARAM_DMA_SIZE_HI] & ~0x03ff)
| (u16)(length >> 16));
__DSPRegs[DSP_ARAM_DMA_SIZE_LO]
= (u16)((__DSPRegs[DSP_ARAM_DMA_SIZE_LO] & ~0xffe0)
| (u16)(length & 0xffff));
__ARWaitForDMA();
__ARClearInterrupt();
}
static void __ARReadDMA(u32 mmem_addr, u32 aram_addr, u32 length)
{
// Main mem address
__DSPRegs[DSP_ARAM_DMA_MM_HI]
= (u16)((__DSPRegs[DSP_ARAM_DMA_MM_HI] & ~0x03ff)
| (u16)(mmem_addr >> 16));
__DSPRegs[DSP_ARAM_DMA_MM_LO]
= (u16)((__DSPRegs[DSP_ARAM_DMA_MM_LO] & ~0xffe0)
| (u16)(mmem_addr & 0xffff));
// ARAM address
__DSPRegs[DSP_ARAM_DMA_ARAM_HI]
= (u16)((__DSPRegs[DSP_ARAM_DMA_ARAM_HI] & ~0x03ff)
| (u16)(aram_addr >> 16));
__DSPRegs[DSP_ARAM_DMA_ARAM_LO]
= (u16)((__DSPRegs[DSP_ARAM_DMA_ARAM_LO] & ~0xffe0)
| (u16)(aram_addr & 0xffff));
// DMA buffer size
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
= (u16)(__DSPRegs[DSP_ARAM_DMA_SIZE_HI] | 0x8000);
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
= (u16)((__DSPRegs[DSP_ARAM_DMA_SIZE_HI] & ~0x03ff)
| (u16)(length >> 16));
__DSPRegs[DSP_ARAM_DMA_SIZE_LO]
= (u16)((__DSPRegs[DSP_ARAM_DMA_SIZE_LO] & ~0xffe0)
| (u16)(length & 0xffff));
__ARWaitForDMA();
__ARClearInterrupt();
}
static void __ARChecksize(void)
{
u8 test_data_pad[63];
u8 dummy_data_pad[63];
u8 buffer_pad[63];
u32* test_data;
u32* dummy_data;
u32* buffer;
u16 ARAM_mode;
u32 ARAM_size;
u32 i;
do {
} while (!(__DSPRegs[DSP_ARAM_MODE] & 1));
ARAM_mode = 3;
ARAM_size = __AR_InternalSize = 0x1000000;
__DSPRegs[DSP_ARAM_SIZE]
= ((__DSPRegs[DSP_ARAM_SIZE] & 0xFFFFFFC0) | ARAM_mode) | 0x20;
test_data = (void*)ALIGN_NEXT((u32)test_data_pad, 0x20);
dummy_data = (void*)ALIGN_NEXT((u32)dummy_data_pad, 0x20);
buffer = (void*)ALIGN_NEXT((u32)buffer_pad, 0x20);
for (i = 0; i < 8; i++) {
test_data[i] = 0xDEADBEEF;
dummy_data[i] = 0xBAD0BAD0;
}
DCFlushRange(test_data, 0x20);
DCFlushRange(dummy_data, 0x20);
__AR_ExpansionSize = 0;
__ARWriteDMA((u32)dummy_data, ARAM_size + 0x0, 0x20U);
__ARWriteDMA((u32)dummy_data, ARAM_size + 0x200000, 0x20U);
__ARWriteDMA((u32)dummy_data, ARAM_size + 0x1000000, 0x20U);
__ARWriteDMA((u32)dummy_data, ARAM_size + 0x200, 0x20U);
__ARWriteDMA((u32)dummy_data, ARAM_size + 0x400000, 0x20U);
memset(buffer, 0, 0x20);
DCFlushRange(buffer, 0x20);
__ARWriteDMA((u32)test_data, ARAM_size, 0x20U);
DCInvalidateRange(buffer, 0x20);
__ARReadDMA((u32)buffer, ARAM_size, 0x20U);
PPCSync();
if (*buffer == *test_data) {
memset(buffer, 0, 0x20);
DCFlushRange(buffer, 0x20);
__ARReadDMA((u32)buffer, ARAM_size + 0x200000, 0x20U);
PPCSync();
if (*buffer == *test_data) {
ARAM_size += 0x200000;
__AR_ExpansionSize = 0x200000;
} else {
memset(buffer, 0, 0x20);
DCFlushRange(buffer, 0x20);
__ARReadDMA((u32)buffer, ARAM_size + 0x01000000, 0x20U);
PPCSync();
if (*buffer == *test_data) {
ARAM_mode |= 8;
ARAM_size += 0x400000;
__AR_ExpansionSize = 0x400000;
} else {
memset(buffer, 0, 0x20);
DCFlushRange(buffer, 0x20);
__ARReadDMA((u32)buffer, ARAM_size + 0x200, 0x20U);
PPCSync();
if (*buffer == *test_data) {
ARAM_mode |= 0x10;
ARAM_size += 0x800000;
__AR_ExpansionSize = 0x800000;
} else {
memset(buffer, 0, 0x20);
DCFlushRange(buffer, 0x20);
__ARReadDMA((u32)buffer, ARAM_size + 0x400000, 0x20U);
PPCSync();
if (*buffer == *test_data) {
ARAM_mode |= 0x18;
ARAM_size += 0x01000000;
__AR_ExpansionSize = 0x1000000;
} else {
ARAM_mode |= 0x20;
ARAM_size += 0x02000000;
__AR_ExpansionSize = 0x2000000;
}
}
}
}
__DSPRegs[DSP_ARAM_SIZE]
= ((u16)(__DSPRegs[DSP_ARAM_SIZE] & 0xFFFFFFC0) | ARAM_mode);
}
*(u32*)OSPhysicalToUncached(0xD0) = ARAM_size;
__AR_Size = ARAM_size;
}
+150
View File
@@ -0,0 +1,150 @@
#include <dolphin.h>
#include <dolphin/ar.h>
#include <macros.h>
#include "ar/__ar.h"
static struct ARQRequest* __ARQRequestQueueHi;
static struct ARQRequest* __ARQRequestTailHi;
static struct ARQRequest* __ARQRequestQueueLo;
static struct ARQRequest* __ARQRequestTailLo;
static struct ARQRequest* __ARQRequestPendingHi;
static struct ARQRequest* __ARQRequestPendingLo;
static ARQCallback __ARQCallbackHi;
static ARQCallback __ARQCallbackLo;
static u32 __ARQChunkSize;
static int __ARQ_init_flag;
inline void __ARQPopTaskQueueHi(void)
{
if (__ARQRequestQueueHi) {
if (__ARQRequestQueueHi->type == 0) {
ARStartDMA(__ARQRequestQueueHi->type, __ARQRequestQueueHi->source,
__ARQRequestQueueHi->dest, __ARQRequestQueueHi->length);
} else {
ARStartDMA(__ARQRequestQueueHi->type, __ARQRequestQueueHi->dest,
__ARQRequestQueueHi->source,
__ARQRequestQueueHi->length);
}
__ARQCallbackHi = __ARQRequestQueueHi->callback;
__ARQRequestPendingHi = __ARQRequestQueueHi;
__ARQRequestQueueHi = __ARQRequestQueueHi->next;
}
}
void __ARQServiceQueueLo(void)
{
if (__ARQRequestPendingLo == 0 && __ARQRequestQueueLo) {
__ARQRequestPendingLo = __ARQRequestQueueLo;
__ARQRequestQueueLo = __ARQRequestQueueLo->next;
}
if (__ARQRequestPendingLo) {
if (__ARQRequestPendingLo->length <= __ARQChunkSize) {
if (__ARQRequestPendingLo->type == 0) {
ARStartDMA(
__ARQRequestPendingLo->type, __ARQRequestPendingLo->source,
__ARQRequestPendingLo->dest, __ARQRequestPendingLo->length);
} else {
ARStartDMA(__ARQRequestPendingLo->type,
__ARQRequestPendingLo->dest,
__ARQRequestPendingLo->source,
__ARQRequestPendingLo->length);
}
__ARQCallbackLo = __ARQRequestPendingLo->callback;
} else if (__ARQRequestPendingLo->type == 0) {
ARStartDMA(__ARQRequestPendingLo->type,
__ARQRequestPendingLo->source,
__ARQRequestPendingLo->dest, __ARQChunkSize);
} else {
ARStartDMA(__ARQRequestPendingLo->type, __ARQRequestPendingLo->dest,
__ARQRequestPendingLo->source, __ARQChunkSize);
}
__ARQRequestPendingLo->length -= __ARQChunkSize;
__ARQRequestPendingLo->source += __ARQChunkSize;
__ARQRequestPendingLo->dest += __ARQChunkSize;
}
}
void __ARQCallbackHack(u32 unused) { }
void __ARQInterruptServiceRoutine()
{
if (__ARQCallbackHi) {
__ARQCallbackHi((u32)__ARQRequestPendingHi);
__ARQRequestPendingHi = NULL;
__ARQCallbackHi = NULL;
} else if (__ARQCallbackLo) {
__ARQCallbackLo((u32)__ARQRequestPendingLo);
__ARQRequestPendingLo = NULL;
__ARQCallbackLo = NULL;
}
__ARQPopTaskQueueHi();
if (__ARQRequestPendingHi == 0) {
__ARQServiceQueueLo();
}
}
void ARQInit(void)
{
if (__ARQ_init_flag != 1) {
__ARQRequestQueueHi = __ARQRequestQueueLo = NULL;
__ARQChunkSize = 0x1000;
ARRegisterDMACallback(__ARQInterruptServiceRoutine);
__ARQRequestPendingHi = NULL;
__ARQRequestPendingLo = NULL;
__ARQCallbackHi = NULL;
__ARQCallbackLo = NULL;
__ARQ_init_flag = 1;
}
}
void ARQPostRequest(struct ARQRequest* request, u32 owner, u32 type,
u32 priority, u32 source, u32 dest, u32 length,
ARQCallback callback)
{
int level;
ASSERTLINE(0x1A9, request);
ASSERTLINE(0x1AA, (type == ARQ_TYPE_MRAM_TO_ARAM)
|| (type == ARQ_TYPE_ARAM_TO_MRAM));
ASSERTLINE(0x1AB, (priority == ARQ_PRIORITY_LOW)
|| (priority == ARQ_PRIORITY_HIGH));
ASSERTLINE(0x1AE, (length % ARQ_DMA_ALIGNMENT) == 0);
request->next = NULL;
request->owner = owner;
request->type = type;
request->source = source;
request->dest = dest;
request->length = length;
if (callback) {
request->callback = callback;
} else {
request->callback = __ARQCallbackHack;
}
level = OSDisableInterrupts();
switch (priority) {
case ARQ_PRIORITY_LOW:
if (__ARQRequestQueueLo) {
__ARQRequestTailLo->next = request;
} else {
__ARQRequestQueueLo = request;
}
__ARQRequestTailLo = request;
break;
case ARQ_PRIORITY_HIGH:
if (__ARQRequestQueueHi) {
__ARQRequestTailHi->next = request;
} else {
__ARQRequestQueueHi = request;
}
__ARQRequestTailHi = request;
break;
}
if ((__ARQRequestPendingHi == 0) && (__ARQRequestPendingLo == 0)) {
__ARQPopTaskQueueHi();
if (__ARQRequestPendingHi == 0) {
__ARQServiceQueueLo();
}
}
OSRestoreInterrupts(level);
}
+563
View File
@@ -0,0 +1,563 @@
#include <dolphin.h>
#include <dolphin/mtx.h>
#include <macros.h>
#define qr0 0
// unsorted externs
extern f32 sinf(f32);
extern f32 cosf(f32);
extern f32 tanf(f32);
// .sbss
static float Unit01[2] = { 0.0f, 1.0f };
// MEME: if this function is not here, 0.0f and 1.0f have wrong order in .sdata2
void C_MTXIdentity(GC_Mtx mtx)
{
mtx[0][0] = 1.0f;
mtx[0][1] = 0.0f;
mtx[0][2] = 0.0f;
mtx[1][0] = 0.0f;
mtx[1][1] = 1.0f;
mtx[1][2] = 0.0f;
mtx[2][0] = 0.0f;
mtx[2][1] = 0.0f;
mtx[2][2] = 1.0f;
}
void PSMTXIdentity(register GC_Mtx m)
{
register f32 c_zero = 0.0f;
register f32 c_one = 1.0f;
register f32 c_01;
register f32 c_10;
#ifdef __MWERKS__ // clang-format off
asm {
psq_st c_zero, 8(m), 0, qr0
ps_merge01 c_01, c_zero, c_one
psq_st c_zero, 24(m), 0, qr0
ps_merge10 c_10, c_one, c_zero
psq_st c_zero, 32(m), 0, qr0
psq_st c_01, 16(m), 0, qr0
psq_st c_10, 0(m), 0, qr0
psq_st c_10, 40(m), 0, qr0
}
#endif // clang-format on
}
asm void PSMTXCopy(const register GC_Mtx src, register GC_Mtx dst)
{
#ifdef __MWERKS__ // clang-format off
psq_l f0, 0(src), 0, qr0
psq_st f0, 0(dst), 0, qr0
psq_l f1, 8(src), 0, qr0
psq_st f1, 8(dst), 0, qr0
psq_l f2, 16(src), 0, qr0
psq_st f2, 16(dst), 0, qr0
psq_l f3, 24(src), 0, qr0
psq_st f3, 24(dst), 0, qr0
psq_l f4, 32(src), 0, qr0
psq_st f4, 32(dst), 0, qr0
psq_l f5, 40(src), 0, qr0
psq_st f5, 40(dst), 0, qr0
#endif // clang-format on
}
asm void PSMTXConcat(const register GC_Mtx mA, const register GC_Mtx mB, register GC_Mtx mAB)
{
#ifdef __MWERKS__ // clang-format off
nofralloc
stwu r1, -64(r1)
psq_l f0, 0(mA), 0, qr0
stfd f14, 8(r1)
psq_l f6, 0(mB), 0, qr0
lis r6, Unit01@ha
psq_l f7, 8(mB), 0, qr0
stfd f15, 16(r1)
addi r6, r6, Unit01@l
stfd f31, 40(r1)
psq_l f8, 16(mB), 0, qr0
ps_muls0 f12, f6, f0
psq_l f2, 16(mA), 0, qr0
ps_muls0 f13, f7, f0
psq_l f31, 0(r6), 0, qr0
ps_muls0 f14, f6, f2
psq_l f9, 24(mB), 0, qr0
ps_muls0 f15, f7, f2
psq_l f1, 8(mA), 0, qr0
ps_madds1 f12, f8, f0, f12
psq_l f3, 24(mA), 0, qr0
ps_madds1 f14, f8, f2, f14
psq_l f10, 32(mB), 0, qr0
ps_madds1 f13, f9, f0, f13
psq_l f11, 40(mB), 0, qr0
ps_madds1 f15, f9, f2, f15
psq_l f4, 32(mA), 0, qr0
psq_l f5, 40(mA), 0, qr0
ps_madds0 f12, f10, f1, f12
ps_madds0 f13, f11, f1, f13
ps_madds0 f14, f10, f3, f14
ps_madds0 f15, f11, f3, f15
psq_st f12, 0(mAB), 0, qr0
ps_muls0 f2, f6, f4
ps_madds1 f13, f31, f1, f13
ps_muls0 f0, f7, f4
psq_st f14, 16(mAB), 0, qr0
ps_madds1 f15, f31, f3, f15
psq_st f13, 8(mAB), 0, qr0
ps_madds1 f2, f8, f4, f2
ps_madds1 f0, f9, f4, f0
ps_madds0 f2, f10, f5, f2
lfd f14, 8(r1)
psq_st f15, 24(mAB), 0, qr0
ps_madds0 f0, f11, f5, f0
psq_st f2, 32(mAB), 0, qr0
ps_madds1 f0, f31, f5, f0
lfd f15, 16(r1)
psq_st f0, 40(mAB), 0, qr0
lfd f31, 40(r1)
addi r1, r1, 64
blr
#endif // clang-format on
}
asm u32 PSMTXInverse(const register GC_Mtx src, register GC_Mtx inv)
{
#ifdef __MWERKS__ // clang-format off
psq_l f0, 0(src), 1, qr0
psq_l f1, 4(src), 0, qr0
psq_l f2, 16(src), 1, qr0
ps_merge10 f6, f1, f0
psq_l f3, 20(src), 0, qr0
psq_l f4, 32(src), 1, qr0
ps_merge10 f7, f3, f2
psq_l f5, 36(src), 0, qr0
ps_mul f11, f3, f6
ps_mul f13, f5, f7
ps_merge10 f8, f5, f4
ps_msub f11, f1, f7, f11
ps_mul f12, f1, f8
ps_msub f13, f3, f8, f13
ps_mul f10, f3, f4
ps_msub f12, f5, f6, f12
ps_mul f9, f0, f5
ps_mul f8, f1, f2
ps_sub f6, f6, f6
ps_msub f10, f2, f5, f10
ps_mul f7, f0, f13
ps_msub f9, f1, f4, f9
ps_madd f7, f2, f12, f7
ps_msub f8, f0, f3, f8
ps_madd f7, f4, f11, f7
ps_cmpo0 cr0, f7, f6
bne skip_return
li r3, 0
blr
skip_return:
fres f0, f7
ps_add f6, f0, f0
ps_mul f5, f0, f0
ps_nmsub f0, f7, f5, f6
lfs f1, 12(src)
ps_muls0 f13, f13, f0
lfs f2, 28(src)
ps_muls0 f12, f12, f0
lfs f3, 44(src)
ps_muls0 f11, f11, f0
ps_merge00 f5, f13, f12
ps_muls0 f10, f10, f0
ps_merge11 f4, f13, f12
ps_muls0 f9, f9, f0
psq_st f5, 0(inv), 0, qr0
ps_mul f6, f13, f1
psq_st f4, 16(inv), 0, qr0
ps_muls0 f8, f8, f0
ps_madd f6, f12, f2, f6
psq_st f10, 32(inv), 1, qr0
ps_nmadd f6, f11, f3, f6
psq_st f9, 36(inv), 1, qr0
ps_mul f7, f10, f1
ps_merge00 f5, f11, f6
psq_st f8, 40(inv), 1, qr0
ps_merge11 f4, f11, f6
psq_st f5, 8(inv), 0, qr0
ps_madd f7, f9, f2, f7
psq_st f4, 24(inv), 0, qr0
ps_nmadd f7, f8, f3, f7
li r3, 1
psq_st f7, 44(inv), 1, qr0
#endif // clang-format on
}
void PSMTXRotRad(GC_Mtx m, char axis, f32 rad)
{
f32 s = sinf(rad);
f32 c = cosf(rad);
PSMTXRotTrig(m, axis, s, c);
}
void PSMTXRotTrig(register GC_Mtx m, register char axis, register f32 sinA,
register f32 cosA)
{
register f32 fc0;
register f32 fc1;
register f32 nsinA;
register f32 fw0, fw1, fw2, fw3;
fc0 = 0.0f;
fc1 = 1.0f;
#ifdef __MWERKS__ // clang-format off
asm {
ori axis, axis, 0x20
ps_neg nsinA, sinA
cmplwi axis, 'x'
beq _case_x
cmplwi axis, 'y'
beq _case_y
cmplwi axis, 'z'
beq _case_z
b _end
_case_x:
psq_st fc1, 0(m), 1, qr0
psq_st fc0, 4(m), 0, qr0
ps_merge00 fw0, sinA, cosA
psq_st fc0, 12(m), 0, qr0
ps_merge00 fw1, cosA, nsinA
psq_st fc0, 28(m), 0, qr0
psq_st fc0, 44(m), 1, qr0
psq_st fw0, 36(m), 0, qr0
psq_st fw1, 20(m), 0, qr0
b _end;
_case_y:
ps_merge00 fw0, cosA, fc0
ps_merge00 fw1, fc0, fc1
psq_st fc0, 24(m), 0, qr0
psq_st fw0, 0(m), 0, qr0
ps_merge00 fw2, nsinA, fc0
ps_merge00 fw3, sinA, fc0
psq_st fw0, 40(m), 0, qr0
psq_st fw1, 16(m), 0, qr0
psq_st fw3, 8(m), 0, qr0
psq_st fw2, 32(m), 0, qr0
b _end;
_case_z:
psq_st fc0, 8(m), 0, qr0
ps_merge00 fw0, sinA, cosA
ps_merge00 fw2, cosA, nsinA
psq_st fc0, 24(m), 0, qr0
psq_st fc0, 32(m), 0, qr0
ps_merge00 fw1, fc1, fc0
psq_st fw0, 16(m), 0, qr0
psq_st fw2, 0(m), 0, qr0
psq_st fw1, 40(m), 0, qr0
_end:
}
#endif // clang-format on
}
static inline void __PSMTXRotAxisRadInternal(register GC_Mtx m,
const register Vec* axis,
register f32 sT, register f32 cT)
{
}
void PSMTXRotAxisRad(register GC_Mtx m, const Vec* axis, register f32 rad)
{
register f32 tmp0, tmp1, tmp2, tmp3, tmp4;
register f32 tmp5, tmp6, tmp7, tmp8, tmp9;
register f32 sT;
register f32 cT;
register f32 oneMinusCosT;
register f32 zero;
Vec axisNormalized;
register Vec* axisNormalizedPtr;
zero = 0.0f;
axisNormalizedPtr = &axisNormalized;
sT = sinf(rad);
cT = cosf(rad);
oneMinusCosT = 1.0f - cT;
PSVECNormalize(axis, axisNormalizedPtr);
// `rad` reused here -- absolutely disgusting.
// Also might've been an inline as in prime.
#ifdef __MWERKS__ // clang-format off
asm {
psq_l rad, 0x0(axisNormalizedPtr), 0, qr0
lfs tmp1, 0x8(axisNormalizedPtr)
ps_merge00 tmp0, cT, cT
ps_muls0 tmp4, rad, oneMinusCosT
ps_muls0 tmp5, tmp1, oneMinusCosT
ps_muls1 tmp3, tmp4, rad
ps_muls0 tmp2, tmp4, rad
ps_muls0 rad, rad, sT
ps_muls0 tmp4, tmp4, tmp1
fnmsubs tmp6, tmp1, sT, tmp3
fmadds tmp7, tmp1, sT, tmp3
ps_neg tmp9, rad
ps_sum0 tmp8, tmp4, zero, rad
ps_sum0 tmp2, tmp2, tmp6, tmp0
ps_sum1 tmp3, tmp0, tmp7, tmp3
ps_sum0 tmp6, tmp9, zero, tmp4
ps_sum0 tmp9, tmp4, tmp4, tmp9
psq_st tmp8, 0x8(m), 0, qr0
ps_muls0 tmp5, tmp5, tmp1
psq_st tmp2, 0x0(m), 0, qr0
ps_sum1 tmp4, rad, tmp9, tmp4
psq_st tmp3, 0x10(m), 0, qr0
ps_sum0 tmp5, tmp5, zero, tmp0
psq_st tmp6, 0x18(m), 0, qr0
psq_st tmp4, 0x20(m), 0, qr0
psq_st tmp5, 0x28(m), 0, qr0
}
#endif // clang-format on
}
void PSMTXTrans(register GC_Mtx m, register f32 xT, register f32 yT,
register f32 zT)
{
register f32 c0 = 0.0F;
register f32 c1 = 1.0F;
#ifdef __MWERKS__ // clang-format off
asm {
stfs xT, 12(m)
stfs yT, 28(m)
psq_st c0, 4(m), 0, qr0
psq_st c0, 32(m), 0, qr0
stfs c0, 16(m)
stfs c1, 20(m)
stfs c0, 24(m)
stfs c1, 40(m)
stfs zT, 44(m)
stfs c1, 0(m)
}
#endif // clang-format on
}
asm void PSMTXTransApply(const register GC_Mtx src, register GC_Mtx dst, register f32 xT,
register f32 yT, register f32 zT)
{
#ifdef __MWERKS__ // clang-format off
nofralloc
psq_l fp4, 0(src), 0, qr0
psq_l fp5, 8(src), 0, qr0
psq_l fp7, 24(src), 0, qr0
psq_l fp8, 40(src), 0, qr0
ps_sum1 fp5, xT, fp5, fp5
psq_l fp6, 16(src), 0, qr0
ps_sum1 fp7, yT, fp7, fp7
psq_l fp9, 32(src), 0, qr0
ps_sum1 fp8, zT, fp8, fp8
psq_st fp4, 0(dst), 0, qr0
psq_st fp5, 8(dst), 0, qr0
psq_st fp6, 16(dst), 0, qr0
psq_st fp7, 24(dst), 0, qr0
psq_st fp9, 32(dst), 0, qr0
psq_st fp8, 40(dst), 0, qr0
blr
#endif // clang-format on
}
void PSMTXScale(register GC_Mtx m, register f32 xS, register f32 yS,
register f32 zS)
{
register f32 c0 = 0.0F;
#ifdef __MWERKS__ // clang-format off
asm {
stfs xS, 0(m)
psq_st c0, 4(m), 0, 0
psq_st c0, 12(m), 0, 0
stfs yS, 20(m)
psq_st c0, 24(m), 0, 0
psq_st c0, 32(m), 0, 0
stfs zS, 40(m)
stfs c0, 44(m)
}
#endif // clang-format on
}
asm void PSMTXScaleApply(const register GC_Mtx src, register GC_Mtx dst, register f32 xS,
register f32 yS, register f32 zS)
{
#ifdef __MWERKS__ // clang-format off
nofralloc
psq_l fp4, 0(src), 0, 0
psq_l fp5, 8(src), 0, 0
ps_muls0 fp4, fp4, xS
psq_l fp6, 16(src), 0, 0
ps_muls0 fp5, fp5, xS
psq_l fp7, 24(src), 0, 0
ps_muls0 fp6, fp6, yS
psq_l fp8, 32(src), 0, 0
psq_st fp4, 0(dst), 0, 0
ps_muls0 fp7, fp7, yS
psq_l fp2, 40(src), 0, 0
psq_st fp5, 8(dst), 0, 0
ps_muls0 fp8, fp8, zS
psq_st fp6, 16(dst), 0, 0
ps_muls0 fp2, fp2, zS
psq_st fp7, 24(dst), 0, 0
psq_st fp8, 32(dst), 0, 0
psq_st fp2, 40(dst), 0, 0
blr
#endif // clang-format on
}
void PSMTXQuat(register GC_Mtx m, const register PSQuaternion* q)
{
register f32 c_zero, c_one, c_two, scale;
register f32 tmp0, tmp1, tmp2, tmp3, tmp4;
register f32 tmp5, tmp6, tmp7, tmp8, tmp9;
c_one = 1.0F;
#ifdef __MWERKS__ // clang-format off
asm {
psq_l tmp0, 0(q), 0, 0
psq_l tmp1, 8(q), 0, 0
fsubs c_zero, c_one, c_one
fadds c_two, c_one, c_one
ps_mul tmp2, tmp0, tmp0
ps_merge10 tmp5, tmp0, tmp0
ps_madd tmp4, tmp1, tmp1, tmp2
ps_mul tmp3, tmp1, tmp1
ps_sum0 scale, tmp4, tmp4, tmp4
ps_muls1 tmp7, tmp5, tmp1
fres tmp9, scale
ps_sum1 tmp4, tmp3, tmp4, tmp2
ps_nmsub scale, scale, tmp9, c_two
ps_muls1 tmp6, tmp1, tmp1
ps_mul scale, tmp9, scale
ps_sum0 tmp2, tmp2, tmp2, tmp2
fmuls scale, scale, c_two
ps_madd tmp8, tmp0, tmp5, tmp6
ps_msub tmp6, tmp0, tmp5, tmp6
psq_st c_zero, 12(m), 1, 0
ps_nmsub tmp2, tmp2, scale, c_one
ps_nmsub tmp4, tmp4, scale, c_one
psq_st c_zero, 44(m), 1, 0
ps_mul tmp8, tmp8, scale
ps_mul tmp6, tmp6, scale
psq_st tmp2, 40(m), 1, 0
ps_madds0 tmp5, tmp0, tmp1, tmp7
ps_merge00 tmp1, tmp8, tmp4
ps_nmsub tmp7, tmp7, c_two, tmp5
ps_merge10 tmp0, tmp4, tmp6
psq_st tmp1, 16(m), 0, 0
ps_mul tmp5, tmp5, scale
ps_mul tmp7, tmp7, scale
psq_st tmp0, 0(m), 0, 0
psq_st tmp5, 8(m), 1, 0
ps_merge10 tmp3, tmp7, c_zero
ps_merge01 tmp9, tmp7, tmp5
psq_st tmp3, 24(m), 0, 0
psq_st tmp9, 32(m), 0, 0
}
#endif // clang-format on
}
void C_MTXLookAt(GC_Mtx m, const Vec* camPos, const Vec* camUp, const Vec* target) {
Vec vLook;
Vec vRight;
Vec vUp;
ASSERTMSGLINE(2105, m, "MTXLookAt(): NULL MtxPtr 'm' ");
ASSERTMSGLINE(2106, camPos, "MTXLookAt(): NULL VecPtr 'camPos' ");
ASSERTMSGLINE(2107, camUp, "MTXLookAt(): NULL VecPtr 'camUp' ");
ASSERTMSGLINE(2108, target, "MTXLookAt(): NULL Point3dPtr 'target' ");
vLook.x = camPos->x - target->x;
vLook.y = camPos->y - target->y;
vLook.z = camPos->z - target->z;
VECNormalize(&vLook, &vLook);
VECCrossProduct(camUp, &vLook, &vRight);
VECNormalize(&vRight, &vRight);
VECCrossProduct(&vLook, &vRight, &vUp);
m[0][0] = vRight.x;
m[0][1] = vRight.y;
m[0][2] = vRight.z;
m[0][3] = -((camPos->z * vRight.z) + ((camPos->x * vRight.x) + (camPos->y * vRight.y)));
m[1][0] = vUp.x;
m[1][1] = vUp.y;
m[1][2] = vUp.z;
m[1][3] = -((camPos->z * vUp.z) + ((camPos->x * vUp.x) + (camPos->y * vUp.y)));
m[2][0] = vLook.x;
m[2][1] = vLook.y;
m[2][2] = vLook.z;
m[2][3] = -((camPos->z * vLook.z) + ((camPos->x * vLook.x) + (camPos->y * vLook.y)));
}
void C_MTXLightFrustum(GC_Mtx m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 scaleS,
f32 scaleT, f32 transS, f32 transT)
{
f32 _tmp;
_tmp = 1 / (r - l);
m[0][0] = (scaleS * (2 * n * _tmp));
m[0][1] = 0;
m[0][2] = (scaleS * (_tmp * (r + l))) - transS;
m[0][3] = 0;
_tmp = 1 / (t - b);
m[1][0] = 0;
m[1][1] = (scaleT * (2 * n * _tmp));
m[1][2] = (scaleT * (_tmp * (t + b))) - transT;
m[1][3] = 0;
m[2][0] = 0;
m[2][1] = 0;
m[2][2] = -1;
m[2][3] = 0;
}
void C_MTXLightPerspective(GC_Mtx m, f32 fovY, f32 aspect, f32 scaleS, f32 scaleT,
f32 transS, f32 transT)
{
f32 angle;
f32 cot;
angle = (0.5f * fovY);
angle = angle * 0.017453293f;
cot = 1 / tanf(angle);
m[0][0] = (scaleS * (cot / aspect));
m[0][1] = 0;
m[0][2] = -transS;
m[0][3] = 0;
m[1][0] = 0;
m[1][1] = (cot * scaleT);
m[1][2] = -transT;
m[1][3] = 0;
m[2][0] = 0;
m[2][1] = 0;
m[2][2] = -1;
m[2][3] = 0;
}
void C_MTXLightOrtho(GC_Mtx m, f32 t, f32 b, f32 l, f32 r, f32 scaleS, f32 scaleT,
f32 transS, f32 transT)
{
f32 _tmp;
_tmp = 1 / (r - l);
m[0][0] = (2 * _tmp * scaleS);
m[0][1] = 0;
m[0][2] = 0;
m[0][3] = (transS + (scaleS * (_tmp * -(r + l))));
_tmp = 1 / (t - b);
m[1][0] = 0;
m[1][1] = (2 * _tmp * scaleT);
m[1][2] = 0;
m[1][3] = (transT + (scaleT * (_tmp * -(t + b))));
m[2][0] = 0;
m[2][1] = 0;
m[2][2] = 0;
m[2][3] = 1;
}
+88
View File
@@ -0,0 +1,88 @@
#include <dolphin.h>
#include <dolphin/mtx.h>
#include <macros.h>
extern f32 tanf(f32);
// NOTE: this is not present in SMS but needed for .sdata2 to match
// stolen from prime
void C_MTXFrustum(Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f)
{
f32 tmp;
tmp = 1.0f / (r - l);
m[0][0] = (2 * n) * tmp;
m[0][1] = 0.0f;
m[0][2] = (r + l) * tmp;
m[0][3] = 0.0f;
tmp = 1.0f / (t - b);
m[1][0] = 0.0f;
m[1][1] = (2 * n) * tmp;
m[1][2] = (t + b) * tmp;
m[1][3] = 0.0f;
m[2][0] = 0.0f;
m[2][1] = 0.0f;
tmp = 1.0f / (f - n);
m[2][2] = -(n)*tmp;
m[2][3] = -(f * n) * tmp;
m[3][0] = 0.0f;
m[3][1] = 0.0f;
m[3][2] = -1.0f;
m[3][3] = 0.0f;
}
void C_MTXPerspective(GC_Mtx m, f32 fovY, f32 aspect, f32 n, f32 f)
{
f32 angle;
f32 cot;
f32 tmp;
angle = (0.5f * fovY);
angle = angle * 0.017453293f;
cot = 1 / tanf(angle);
m[0][0] = (cot / aspect);
m[0][1] = 0;
m[0][2] = 0;
m[0][3] = 0;
m[1][0] = 0;
m[1][1] = (cot);
m[1][2] = 0;
m[1][3] = 0;
m[2][0] = 0;
m[2][1] = 0;
tmp = 1 / (f - n);
m[2][2] = (-n * tmp);
m[2][3] = (tmp * -(f * n));
m[3][0] = 0;
m[3][1] = 0;
m[3][2] = -1;
m[3][3] = 0;
}
void C_MTXOrtho(GC_Mtx m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f)
{
f32 tmp;
tmp = 1 / (r - l);
m[0][0] = 2 * tmp;
m[0][1] = 0;
m[0][2] = 0;
m[0][3] = (tmp * -(r + l));
tmp = 1 / (t - b);
m[1][0] = 0;
m[1][1] = 2 * tmp;
m[1][2] = 0;
m[1][3] = (tmp * -(t + b));
m[2][0] = 0;
m[2][1] = 0;
tmp = 1 / (f - n);
m[2][2] = (-1 * tmp);
m[2][3] = (-f * tmp);
m[3][0] = 0;
m[3][1] = 0;
m[3][2] = 0;
m[3][3] = 1;
}
+103
View File
@@ -0,0 +1,103 @@
#include <dolphin.h>
#include <dolphin/mtx.h>
#define qr0 0
asm void PSMTXMultVec(const register Mtx44 m, const register Vec* src, register Vec* dst)
{
#ifdef __MWERKS__ // clang-format off
nofralloc
psq_l f0, Vec.x(src), 0, qr0
psq_l f2, 0(m), 0, qr0
psq_l f1, Vec.z(src), 1, qr0
ps_mul f4, f2, f0
psq_l f3, 8(m), 0, qr0
ps_madd f5, f3, f1, f4
psq_l f8, 16(m), 0, qr0
ps_sum0 f6, f5, f6, f5
psq_l f9, 24(m), 0, qr0
ps_mul f10, f8, f0
psq_st f6, Vec.x(dst), 1, qr0
ps_madd f11, f9, f1, f10
psq_l f2, 32(m), 0, qr0
ps_sum0 f12, f11, f12, f11
psq_l f3, 40(m), 0, qr0
ps_mul f4, f2, f0
psq_st f12, Vec.y(dst), 1, qr0
ps_madd f5, f3, f1, f4
ps_sum0 f6, f5, f6, f5
psq_st f6, Vec.z(dst), 1, qr0
blr
#endif // clang-format on
}
asm void PSMTXMultVecArray(const register GC_Mtx m, const register Vec* srcBase,
register Vec* dstBase, register u32 count)
{
#ifdef __MWERKS__ // clang-format off
psq_l f13, 0x0(m), 0, qr0
psq_l f12, 0x10(m), 0, qr0
subi count, count, 0x1
psq_l f11, 0x8(m), 0, qr0
ps_merge00 f0, f13, f12
subi dstBase, dstBase, 0x4
psq_l f10, 0x18(m), 0, qr0
ps_merge11 f1, f13, f12
mtctr count
psq_l f4, 0x20(m), 0, qr0
ps_merge00 f2, f11, f10
psq_l f5, 0x28(m), 0, qr0
ps_merge11 f3, f11, f10
psq_l f6, Vec.x(srcBase), 0, qr0
psq_lu f7, Vec.z(srcBase), 1, qr0
ps_madds0 f8, f0, f6, f3
ps_mul f9, f4, f6
ps_madds1 f8, f1, f6, f8
ps_madd f10, f5, f7, f9
loop:
psq_lu f6, Vec.y(srcBase), 0, qr0
ps_madds0 f12, f2, f7, f8
psq_lu f7, Vec.z(srcBase), 1, qr0
ps_sum0 f13, f10, f9, f10
ps_madds0 f8, f0, f6, f3
ps_mul f9, f4, f6
psq_stu f12, 0x4(dstBase), 0, qr0
ps_madds1 f8, f1, f6, f8
psq_stu f13, 0x8(dstBase), 1, qr0
ps_madd f10, f5, f7, f9
bdnz loop
ps_madds0 f12, f2, f7, f8
ps_sum0 f13, f10, f9, f10
psq_stu f12, 0x4(dstBase), 0, qr0
psq_stu f13, 0x8(dstBase), 1, qr0
#endif // clang-format on
}
asm void PSMTXMultVecSR(const register Mtx44 m, const register Vec* src, register Vec* dst)
{
#ifdef __MWERKS__ // clang-format off
psq_l f0, 0x0(m), 0, qr0
psq_l f6, Vec.x(src), 0, qr0
psq_l f2, 0x10(m), 0, qr0
ps_mul f8, f0, f6
psq_l f4, 0x20(m), 0, qr0
ps_mul f10, f2, f6
psq_l f7, Vec.z(src), 1, qr0
ps_mul f12, f4, f6
psq_l f3, 0x18(m), 0, qr0
ps_sum0 f8, f8, f8, f8
psq_l f5, 0x28(m), 0, qr0
ps_sum0 f10, f10, f10, f10
psq_l f1, 0x8(m), 0, qr0
ps_sum0 f12, f12, f12, f12
ps_madd f9, f1, f7, f8
psq_st f9, Vec.x(dst), 1, qr0
ps_madd f11, f3, f7, f10
psq_st f11, Vec.y(dst), 1, qr0
ps_madd f13, f5, f7, f12
psq_st f13, Vec.z(dst), 1, qr0
#endif // clang-format on
}
+171
View File
@@ -0,0 +1,171 @@
#include <dolphin.h>
#include <dolphin/mtx.h>
#define qr0 0
asm void PSVECAdd(register Vec* a, register Vec* b, register Vec* c)
{
#ifdef __MWERKS__ // clang-format off
psq_l f2, Vec.x(a), 0, qr0
psq_l f4, Vec.x(b), 0, qr0
ps_add f6, f2, f4
psq_st f6, Vec.x(c), 0, qr0
psq_l f3, Vec.z(a), 1, qr0
psq_l f5, Vec.z(b), 1, qr0
ps_add f7, f3, f5
psq_st f7, Vec.z(c), 1, qr0
#endif // clang-format on
}
asm void PSVECSubtract(register Vec* a, register Vec* b, register Vec* c)
{
#ifdef __MWERKS__ // clang-format off
psq_l f2, Vec.x(a), 0, qr0
psq_l f4, Vec.x(b), 0, qr0
ps_sub f6, f2, f4
psq_st f6, Vec.x(c), 0, qr0
psq_l f3, Vec.z(a), 1, qr0
psq_l f5, Vec.z(b), 1, qr0
ps_sub f7, f3, f5
psq_st f7, Vec.z(c), 1, qr0
#endif // clang-format on
}
asm void PSVECScale(register Vec* src, register Vec* dst, register f32 mult)
{
#ifdef __MWERKS__ // clang-format off
psq_l f0, Vec.x(src), 0, qr0
psq_l f2, Vec.z(src), 1, qr0
ps_muls0 f0, f0, f1
psq_st f0, Vec.x(dst), 0, qr0
ps_muls0 f0, f2, f1
psq_st f0, Vec.z(dst), 1, qr0
#endif // clang-format on
}
void PSVECNormalize(const register Vec* vec1, register Vec* dst)
{
register float c_half = 0.5f;
register float c_three = 3.0f;
register float v1_xy;
register float v1_z;
register float xx_zz;
register float xx_yy;
register float sqsum;
register float rsqrt;
register float nwork0;
register float nwork1;
#ifdef __MWERKS__ // clang-format off
asm
{
psq_l v1_xy, Vec.x(vec1), 0, qr0
ps_mul xx_yy, v1_xy, v1_xy
psq_l v1_z, Vec.z(vec1), 1, qr0
ps_madd xx_zz, v1_z, v1_z, xx_yy
ps_sum0 sqsum, xx_zz, v1_z, xx_yy
frsqrte rsqrt, sqsum
fmuls nwork0, rsqrt, rsqrt
fmuls nwork1, rsqrt, c_half
fnmsubs nwork0, nwork0, sqsum, c_three
fmuls rsqrt, nwork0, nwork1
ps_muls0 v1_xy, v1_xy, rsqrt
psq_st v1_xy, Vec.x(dst), 0, qr0
ps_muls0 v1_z, v1_z, rsqrt
psq_st v1_z, Vec.z(dst), 1, qr0
}
#endif // clang-format on
}
asm float PSVECMag(register Vec* v)
{
#ifdef __MWERKS__ // clang-format off
psq_l f0, Vec.x(v), 0, qr0
ps_mul f0, f0, f0
lfs f1, Vec.z(v)
ps_madd f1, f1, f1, f0
lfs f4, 0.5f
ps_sum0 f1, f1, f0, f0
frsqrte f0, f1
lfs f3, 3.0f
fmuls f2, f0, f0
fmuls f0, f0, f4
fnmsubs f2, f2, f1, f3
fmuls f0, f2, f0
fsel f0, f0, f0, f1
fmuls f1, f1, f0
#endif // clang-format on
}
asm f32 PSVECDotProduct(register Vec* vec1, register Vec* vec2)
{
#ifdef __MWERKS__ // clang-format off
psq_l f2, Vec.y(vec1), 0, qr0
psq_l f3, Vec.y(vec2), 0, qr0
ps_mul f2, f2, f3
psq_l f5, Vec.x(vec1), 0, qr0
psq_l f4, Vec.x(vec2), 0, qr0
ps_madd f3, f5, f4, f2
ps_sum0 f1, f3, f2, f2
#endif // clang-format on
}
asm void PSVECCrossProduct(register Vec* vec1, register Vec* vec2,
register Vec* dst)
{
#ifdef __MWERKS__ // clang-format off
psq_l f1, Vec.x(vec2), 0, qr0
lfs f2, Vec.z(vec1)
psq_l f0, Vec.x(vec1), 0, qr0
ps_merge10 f6, f1, f1
lfs f3, Vec.z(vec2)
ps_mul f4, f1, f2
ps_muls0 f7, f1, f0
ps_msub f5, f0, f3, f4
ps_msub f8, f0, f6, f7
ps_merge11 f9, f5, f5
ps_merge01 f10, f5, f8
psq_st f9, Vec.x(dst), 1, qr0
ps_neg f10, f10
psq_st f10, Vec.y(dst), 0, qr0
#endif // clang-format on
}
asm f32 PSVECSquareDistance(register Vec* vec1, register Vec* vec2)
{
#ifdef __MWERKS__ // clang-format off
psq_l f0, Vec.y(vec1), 0, qr0
psq_l f1, Vec.y(vec2), 0, qr0
ps_sub f2, f0, f1
psq_l f0, Vec.x(vec1), 0, qr0
psq_l f1, Vec.x(vec2), 0, qr0
ps_mul f2, f2, f2
ps_sub f0, f0, f1
ps_madd f1, f0, f0, f2
ps_sum0 f1, f1, f2, f2
#endif // clang-format on
}
asm f32 PSVECDistance(register Vec* vec1, register Vec* vec2)
{
#ifdef __MWERKS__ // clang-format off
psq_l f0, Vec.y(vec1), 0, qr0
psq_l f1, Vec.y(vec2), 0, qr0
ps_sub f2, f0, f1
psq_l f0, Vec.x(vec1), 0, qr0
psq_l f1, Vec.x(vec2), 0, qr0
ps_mul f2, f2, f2
ps_sub f0, f0, f1
lfs f3, 0.5f
ps_madd f0, f0, f0, f2
ps_sum0 f0, f0, f2, f2
lfs f4, 3.0f
frsqrte f1, f0
fmuls f2, f1, f1
fmuls f1, f1, f3
fnmsubs f2, f2, f0, f4
fmuls f1, f2, f1
fsel f1, f1, f1, f0
fmuls f1, f0, f1
#endif // clang-format on
}