mirror of
https://github.com/ACreTeam/ac-decomp
synced 2026-05-23 06:34:18 -04:00
link dolphin ar, ai, & mtx. Finish Dolphin SDK
This commit is contained in:
@@ -0,0 +1,348 @@
|
||||
#include <dolphin/ai.h>
|
||||
#include <dolphin/gx.h>
|
||||
#include <dolphin/hw_regs.h>
|
||||
#include <dolphin/os.h>
|
||||
#include <macros.h>
|
||||
|
||||
#include "gx/__gx.h"
|
||||
|
||||
static AISCallback __AIS_Callback;
|
||||
static AIDCallback __AID_Callback;
|
||||
static u8* __CallbackStack;
|
||||
static u8* __OldStack;
|
||||
static BOOL __AI_init_flag;
|
||||
static OSTime bound_32KHz;
|
||||
static OSTime bound_48KHz;
|
||||
static OSTime min_wait;
|
||||
static OSTime max_wait;
|
||||
static OSTime buffer;
|
||||
|
||||
struct STRUCT_TIMELOG {
|
||||
OSTime t_start;
|
||||
OSTime t1;
|
||||
OSTime t2;
|
||||
OSTime t3;
|
||||
OSTime t4;
|
||||
OSTime t_end;
|
||||
};
|
||||
|
||||
#if DEBUG
|
||||
struct STRUCT_TIMELOG profile;
|
||||
#endif
|
||||
|
||||
static void __AI_set_stream_sample_rate(u32 rate);
|
||||
static void __AIDHandler(__OSInterrupt interrupt, OSContext* context);
|
||||
static void __AISHandler(__OSInterrupt interrupt, OSContext* context);
|
||||
static void __AICallbackStackSwitch(void* cb);
|
||||
static void __AI_SRC_INIT(void);
|
||||
|
||||
AIDCallback AIRegisterDMACallback(AIDCallback callback)
|
||||
{
|
||||
AIDCallback old_callback;
|
||||
BOOL old;
|
||||
|
||||
old_callback = __AID_Callback;
|
||||
old = OSDisableInterrupts();
|
||||
__AID_Callback = callback;
|
||||
OSRestoreInterrupts(old);
|
||||
return old_callback;
|
||||
}
|
||||
|
||||
void AIInitDMA(u32 start_addr, u32 length)
|
||||
{
|
||||
BOOL old;
|
||||
|
||||
old = OSDisableInterrupts();
|
||||
__DSPRegs[24] = (__DSPRegs[24] & 0xFFFFFC00) | (start_addr >> 16);
|
||||
__DSPRegs[25] = (__DSPRegs[25] & 0xFFFF001F) | (start_addr & 0xFFFF);
|
||||
ASSERTMSGLINE(0x12E, (length & 0x1F) == 0,
|
||||
"AIStartDMA: length must be multiple of 32 bytes");
|
||||
__DSPRegs[27] = (__DSPRegs[27] & 0xFFFF8000) | ((length >> 5) & 0xFFFF);
|
||||
OSRestoreInterrupts(old);
|
||||
}
|
||||
|
||||
void AIStartDMA(void) { __DSPRegs[27] = __DSPRegs[27] | 0x8000; }
|
||||
|
||||
u32 AIGetStreamSampleCount(void) {
|
||||
return __AIRegs[2];
|
||||
}
|
||||
|
||||
void AIResetStreamSampleCount(void)
|
||||
{
|
||||
__AIRegs[0] = (__AIRegs[0] & ~0x20) | 0x20;
|
||||
}
|
||||
|
||||
inline void AISetStreamTrigger(u32 trigger) { __AIRegs[3] = trigger; }
|
||||
|
||||
u32 AIGetStreamTrigger(void) {
|
||||
return __AIRegs[3];
|
||||
}
|
||||
|
||||
void AISetStreamPlayState(u32 state)
|
||||
{
|
||||
BOOL old;
|
||||
u8 vol_left;
|
||||
u8 vol_right;
|
||||
|
||||
if (state != AIGetStreamPlayState()) {
|
||||
if (AIGetStreamSampleRate() == 0 && state == AI_STREAM_START) {
|
||||
vol_left = AIGetStreamVolRight();
|
||||
vol_right = AIGetStreamVolLeft();
|
||||
AISetStreamVolRight(0);
|
||||
AISetStreamVolLeft(0);
|
||||
old = OSDisableInterrupts();
|
||||
__AI_SRC_INIT();
|
||||
SET_REG_FIELD(0, __AIRegs[0], 1, 5, 1);
|
||||
SET_REG_FIELD(0, __AIRegs[0], 1, 0, AI_STREAM_START);
|
||||
OSRestoreInterrupts(old);
|
||||
AISetStreamVolLeft(vol_left);
|
||||
AISetStreamVolRight(vol_right);
|
||||
return;
|
||||
}
|
||||
SET_REG_FIELD(0x27F, __AIRegs[0], 1, 0, state);
|
||||
}
|
||||
}
|
||||
|
||||
u32 AIGetStreamPlayState(void) { return __AIRegs[0] & 1; }
|
||||
|
||||
void AISetDSPSampleRate(u32 rate)
|
||||
{
|
||||
BOOL old;
|
||||
u32 play_state;
|
||||
u32 afr_state;
|
||||
u8 vol_left;
|
||||
u8 vol_right;
|
||||
|
||||
if (rate != AIGetDSPSampleRate()) {
|
||||
__AIRegs[0] = (__AIRegs[0] & 0xFFFFFFBF);
|
||||
if (rate == AI_SAMPLERATE_32KHZ) {
|
||||
vol_left = AIGetStreamVolLeft();
|
||||
vol_right = AIGetStreamVolRight();
|
||||
play_state = AIGetStreamPlayState();
|
||||
afr_state = AIGetStreamSampleRate();
|
||||
AISetStreamVolLeft(0U);
|
||||
AISetStreamVolRight(0U);
|
||||
old = OSDisableInterrupts();
|
||||
__AI_SRC_INIT();
|
||||
SET_REG_FIELD(0x2D8, __AIRegs[0], 1, 5, 1);
|
||||
SET_REG_FIELD(0x2D9, __AIRegs[0], 1, 1, afr_state);
|
||||
SET_REG_FIELD(0x2DA, __AIRegs[0], 1, 0, play_state);
|
||||
__AIRegs[0] |= 0x40;
|
||||
OSRestoreInterrupts(old);
|
||||
AISetStreamVolLeft(vol_left);
|
||||
AISetStreamVolRight(vol_right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
u32 AIGetDSPSampleRate(void) { return GET_REG_FIELD(__AIRegs[0], 1, 6) ^ 1; }
|
||||
|
||||
void AISetStreamSampleRate(u32 rate)
|
||||
{
|
||||
if (rate == AI_SAMPLERATE_48KHZ) {
|
||||
__AI_set_stream_sample_rate(rate);
|
||||
return;
|
||||
}
|
||||
#if DEBUG
|
||||
OSReport("AISetStreamSampleRate(): OBSOLETED. Only 48KHz streaming from "
|
||||
"disk is supported!\n");
|
||||
#endif
|
||||
}
|
||||
|
||||
static void __AI_set_stream_sample_rate(u32 rate)
|
||||
{
|
||||
BOOL old;
|
||||
u32 play_state;
|
||||
u8 vol_left;
|
||||
u8 vol_right;
|
||||
u32 dsp_src_state;
|
||||
|
||||
if (rate != AIGetStreamSampleRate()) {
|
||||
play_state = AIGetStreamPlayState();
|
||||
vol_left = AIGetStreamVolLeft();
|
||||
vol_right = AIGetStreamVolRight();
|
||||
AISetStreamVolRight(0);
|
||||
AISetStreamVolLeft(0);
|
||||
dsp_src_state = __AIRegs[0] & 0x40;
|
||||
SET_REG_FIELD(0, __AIRegs[0], 1, 6, 0);
|
||||
old = OSDisableInterrupts();
|
||||
__AI_SRC_INIT();
|
||||
__AIRegs[0] |= dsp_src_state;
|
||||
SET_REG_FIELD(0x368, __AIRegs[0], 1, 5, 1);
|
||||
SET_REG_FIELD(0x369, __AIRegs[0], 1, 1, rate);
|
||||
OSRestoreInterrupts(old);
|
||||
AISetStreamPlayState(play_state);
|
||||
AISetStreamVolLeft(vol_left);
|
||||
AISetStreamVolRight(vol_right);
|
||||
}
|
||||
}
|
||||
|
||||
u32 AIGetStreamSampleRate(void) { return GET_REG_FIELD(__AIRegs[0], 1, 1); }
|
||||
|
||||
void AISetStreamVolLeft(u8 vol)
|
||||
{
|
||||
SET_REG_FIELD(0x3A3, __AIRegs[1], 8, 0, vol);
|
||||
}
|
||||
|
||||
u8 AIGetStreamVolLeft(void) { return GET_REG_FIELD(__AIRegs[1], 8, 0); }
|
||||
|
||||
void AISetStreamVolRight(u8 vol)
|
||||
{
|
||||
SET_REG_FIELD(0x3CC, __AIRegs[1], 8, 8, vol);
|
||||
}
|
||||
|
||||
u8 AIGetStreamVolRight(void) { return (__AIRegs[1] & (0xFF << 8)) >> 8; }
|
||||
|
||||
void AIInit(u8* stack)
|
||||
{
|
||||
if (__AI_init_flag != TRUE) {
|
||||
bound_32KHz = OSNanosecondsToTicks(31524);
|
||||
bound_48KHz = OSNanosecondsToTicks(42024);
|
||||
min_wait = OSNanosecondsToTicks(42000);
|
||||
max_wait = OSNanosecondsToTicks(63000);
|
||||
buffer = OSNanosecondsToTicks(3000);
|
||||
AISetStreamVolRight(0);
|
||||
AISetStreamVolLeft(0);
|
||||
AISetStreamTrigger(0);
|
||||
AIResetStreamSampleCount();
|
||||
__AI_set_stream_sample_rate(AI_SAMPLERATE_48KHZ);
|
||||
AISetDSPSampleRate(AI_SAMPLERATE_32KHZ);
|
||||
#if DEBUG
|
||||
OSReport("AIInit(): DSP is 32KHz\n");
|
||||
#endif
|
||||
__AIS_Callback = NULL;
|
||||
__AID_Callback = NULL;
|
||||
__CallbackStack = stack;
|
||||
if (stack) {
|
||||
ASSERTMSGLINE(0x444, ((u32)stack & 7) != 0,
|
||||
"AIInit: stack must be 8-byte aligned");
|
||||
}
|
||||
__OSSetInterruptHandler(5, __AIDHandler);
|
||||
__OSUnmaskInterrupts(0x04000000);
|
||||
__OSSetInterruptHandler(8, __AISHandler);
|
||||
__OSUnmaskInterrupts(0x800000);
|
||||
__AI_init_flag = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
static void __AISHandler(__OSInterrupt interrupt, OSContext* context)
|
||||
{
|
||||
OSContext exceptionContext;
|
||||
|
||||
__AIRegs[0] |= 8;
|
||||
OSClearContext(&exceptionContext);
|
||||
OSSetCurrentContext(&exceptionContext);
|
||||
if (__AIS_Callback) {
|
||||
__AIS_Callback(__AIRegs[2]);
|
||||
}
|
||||
OSClearContext(&exceptionContext);
|
||||
OSSetCurrentContext(context);
|
||||
}
|
||||
|
||||
static void __AIDHandler(__OSInterrupt interrupt, OSContext* context)
|
||||
{
|
||||
OSContext exceptionContext;
|
||||
u16 tmp;
|
||||
|
||||
tmp = __DSPRegs[5];
|
||||
tmp = (tmp & ~0xA0) | 8;
|
||||
__DSPRegs[5] = tmp;
|
||||
OSClearContext(&exceptionContext);
|
||||
OSSetCurrentContext(&exceptionContext);
|
||||
if (__AID_Callback) {
|
||||
if (__CallbackStack) {
|
||||
__AICallbackStackSwitch(__AID_Callback);
|
||||
} else {
|
||||
__AID_Callback();
|
||||
}
|
||||
}
|
||||
OSClearContext(&exceptionContext);
|
||||
OSSetCurrentContext(context);
|
||||
}
|
||||
|
||||
static asm void __AICallbackStackSwitch(register void* cb)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
nofralloc
|
||||
mflr r0
|
||||
stw r0, 0x4(r1)
|
||||
stwu r1, -0x18(r1)
|
||||
stw r31, 0x14(r1)
|
||||
mr r31, r3
|
||||
lis r5, __OldStack@ha
|
||||
addi r5, r5, __OldStack@l
|
||||
stw r1, 0x0(r5)
|
||||
lis r5, __CallbackStack@ha
|
||||
addi r5, r5, __CallbackStack@l
|
||||
lwz r1, 0x0(r5)
|
||||
subi r1, r1, 0x8
|
||||
mtlr r31
|
||||
blrl
|
||||
lis r5, __OldStack@ha
|
||||
addi r5, r5, __OldStack@l
|
||||
lwz r1, 0x0(r5)
|
||||
lwz r0, 0x1c(r1)
|
||||
lwz r31, 0x14(r1)
|
||||
addi r1, r1, 0x18
|
||||
mtlr r0
|
||||
blr
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
void __AI_SRC_INIT(void)
|
||||
{
|
||||
OSTime rising_32khz = 0;
|
||||
OSTime rising_48khz = 0;
|
||||
OSTime diff = 0;
|
||||
OSTime t1 = 0;
|
||||
OSTime temp;
|
||||
u32 temp0;
|
||||
u32 temp1;
|
||||
u32 done = 0;
|
||||
u32 volume = 0;
|
||||
u32 Init_Cnt = 0;
|
||||
u32 walking = 0;
|
||||
|
||||
walking = 0;
|
||||
Init_Cnt = 0;
|
||||
temp = 0;
|
||||
|
||||
#if DEBUG
|
||||
profile.t_start = OSGetTime();
|
||||
#endif
|
||||
|
||||
while (!done) {
|
||||
SET_REG_FIELD(0, __AIRegs[0], 1, 5, 1);
|
||||
SET_REG_FIELD(0, __AIRegs[0], 1, 1, 0);
|
||||
SET_REG_FIELD(0, __AIRegs[0], 1, 0, AI_STREAM_START);
|
||||
temp0 = __AIRegs[2];
|
||||
while (temp0 == __AIRegs[2]) { }
|
||||
rising_32khz = OSGetTime();
|
||||
SET_REG_FIELD(0, __AIRegs[0], 1, 1, 1);
|
||||
SET_REG_FIELD(0, __AIRegs[0], 1, 0, AI_STREAM_START);
|
||||
temp1 = __AIRegs[2];
|
||||
while (temp1 == __AIRegs[2]) { }
|
||||
rising_48khz = OSGetTime();
|
||||
diff = rising_48khz - rising_32khz;
|
||||
SET_REG_FIELD(0, __AIRegs[0], 1, 1, 0);
|
||||
SET_REG_FIELD(0, __AIRegs[0], 1, 0, AI_STREAM_STOP);
|
||||
if (diff < bound_32KHz - buffer) {
|
||||
temp = min_wait;
|
||||
done = 1;
|
||||
Init_Cnt++;
|
||||
} else if (diff >= bound_32KHz + buffer
|
||||
&& diff < bound_48KHz - buffer) {
|
||||
temp = max_wait;
|
||||
done = 1;
|
||||
Init_Cnt++;
|
||||
} else {
|
||||
done = 0;
|
||||
walking = 1;
|
||||
Init_Cnt++;
|
||||
}
|
||||
}
|
||||
while (rising_48khz + temp > OSGetTime()) { }
|
||||
#if DEBUG
|
||||
profile.t_end = OSGetTime();
|
||||
#endif
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
#ifndef _DOLPHIN_AR_INTERNAL_H_
|
||||
#define _DOLPHIN_AR_INTERNAL_H_
|
||||
|
||||
#include <dolphin/ar.h>
|
||||
|
||||
void __ARQPopTaskQueueHi(void);
|
||||
void __ARQServiceQueueLo(void);
|
||||
void __ARQCallbackHack(u32 pointerToARQRequest);
|
||||
void __ARQInterruptServiceRoutine(void);
|
||||
void __ARQInitTempQueue(void);
|
||||
void __ARQPushTempQueue(struct ARQRequest* task);
|
||||
|
||||
#endif // _DOLPHIN_AR_INTERNAL_H_
|
||||
@@ -0,0 +1,314 @@
|
||||
#include <dolphin/ar.h>
|
||||
// #include "fake_tgmath.h"
|
||||
#include <dolphin.h>
|
||||
#include <dolphin/base/PPCArch.h>
|
||||
#include <string.h>
|
||||
#include <macros.h>
|
||||
|
||||
#include "ar/__ar.h"
|
||||
|
||||
static void (*__AR_Callback)();
|
||||
static u32 __AR_Size;
|
||||
static u32 __AR_InternalSize;
|
||||
static u32 __AR_ExpansionSize;
|
||||
static u32 __AR_StackPointer;
|
||||
static u32 __AR_FreeBlocks;
|
||||
static u32* __AR_BlockLength;
|
||||
static int __AR_init_flag;
|
||||
|
||||
// functions
|
||||
static void __ARHandler(__OSInterrupt exception, struct OSContext* context);
|
||||
static void __ARWaitForDMA(void);
|
||||
static void __ARWriteDMA(u32 mmem_addr, u32 aram_addr, u32 length);
|
||||
static void __ARReadDMA(u32 mmem_addr, u32 aram_addr, u32 length);
|
||||
static void __ARChecksize(void);
|
||||
|
||||
ARQCallback ARRegisterDMACallback(ARQCallback callback)
|
||||
{
|
||||
ARQCallback old_callback;
|
||||
int old;
|
||||
|
||||
old_callback = __AR_Callback;
|
||||
old = OSDisableInterrupts();
|
||||
__AR_Callback = callback;
|
||||
OSRestoreInterrupts(old);
|
||||
return old_callback;
|
||||
}
|
||||
|
||||
void ARStartDMA(u32 type, u32 mainmem_addr, u32 aram_addr, u32 length)
|
||||
{
|
||||
int old;
|
||||
|
||||
old = OSDisableInterrupts();
|
||||
|
||||
__DSPRegs[DSP_ARAM_DMA_MM_HI]
|
||||
= (__DSPRegs[DSP_ARAM_DMA_MM_HI] & 0xFFFFFC00 | (mainmem_addr >> 0x10));
|
||||
__DSPRegs[DSP_ARAM_DMA_MM_LO]
|
||||
= (__DSPRegs[DSP_ARAM_DMA_MM_LO] & 0xFFFF001F | ((u16)mainmem_addr));
|
||||
__DSPRegs[DSP_ARAM_DMA_ARAM_HI]
|
||||
= (__DSPRegs[DSP_ARAM_DMA_ARAM_HI] & 0xFFFFFC00 | (aram_addr >> 0x10));
|
||||
__DSPRegs[DSP_ARAM_DMA_ARAM_LO]
|
||||
= (__DSPRegs[DSP_ARAM_DMA_ARAM_LO] & 0xFFFF001F | ((u16)aram_addr));
|
||||
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
|
||||
= __DSPRegs[DSP_ARAM_DMA_SIZE_HI] & ~0x8000 | ((type << 0xF) & ~0x7FFF);
|
||||
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
|
||||
= (__DSPRegs[DSP_ARAM_DMA_SIZE_HI] & 0xFFFFFC00) | (length >> 0x10);
|
||||
__DSPRegs[DSP_ARAM_DMA_SIZE_LO]
|
||||
= (__DSPRegs[DSP_ARAM_DMA_SIZE_LO] & 0xFFFF001F)
|
||||
| (length & 0x0000FFFF);
|
||||
OSRestoreInterrupts(old);
|
||||
}
|
||||
|
||||
u32 ARAlloc(u32 length)
|
||||
{
|
||||
u32 tmp;
|
||||
int old;
|
||||
|
||||
old = OSDisableInterrupts();
|
||||
ASSERTMSGLINE(0x17E, !(length & 0x1F),
|
||||
"ARAlloc(): length is not multiple of 32bytes!");
|
||||
ASSERTMSGLINE(0x182, length <= (__AR_Size - __AR_StackPointer),
|
||||
"ARAlloc(): Out of ARAM!");
|
||||
ASSERTMSGLINE(0x183, __AR_FreeBlocks, "ARAlloc(): No more free blocks!");
|
||||
tmp = __AR_StackPointer;
|
||||
__AR_StackPointer += length;
|
||||
*__AR_BlockLength = length;
|
||||
__AR_BlockLength += 1;
|
||||
__AR_FreeBlocks -= 1;
|
||||
OSRestoreInterrupts(old);
|
||||
return tmp;
|
||||
}
|
||||
|
||||
u32 ARInit(u32* stack_index_addr, u32 num_entries)
|
||||
{
|
||||
BOOL enabled;
|
||||
|
||||
if (__AR_init_flag == 1) {
|
||||
return 0x4000;
|
||||
}
|
||||
enabled = OSDisableInterrupts();
|
||||
__AR_Callback = NULL;
|
||||
__OSSetInterruptHandler(6, __ARHandler);
|
||||
__OSUnmaskInterrupts(0x02000000);
|
||||
__AR_StackPointer = 0x4000;
|
||||
__AR_FreeBlocks = num_entries;
|
||||
__AR_BlockLength = stack_index_addr;
|
||||
|
||||
// WHY?
|
||||
__DSPRegs[DSP_ARAM_REFRESH] = __DSPRegs[DSP_ARAM_REFRESH] & 0xff
|
||||
| __DSPRegs[DSP_ARAM_REFRESH] & ~0xff;
|
||||
__ARChecksize();
|
||||
__AR_init_flag = 1;
|
||||
OSRestoreInterrupts(enabled);
|
||||
|
||||
return __AR_StackPointer;
|
||||
}
|
||||
|
||||
u32 ARGetBaseAddress(void) { return 0x4000; }
|
||||
|
||||
u32 ARGetSize(void) { return __AR_Size; }
|
||||
|
||||
static void __ARHandler(__OSInterrupt exception, struct OSContext* context)
|
||||
{
|
||||
struct OSContext exceptionContext;
|
||||
u16 tmp;
|
||||
|
||||
tmp = __DSPRegs[DSP_CONTROL_STATUS];
|
||||
tmp = (tmp & ~0x88) | 0x20;
|
||||
__DSPRegs[DSP_CONTROL_STATUS] = (tmp);
|
||||
OSClearContext(&exceptionContext);
|
||||
OSSetCurrentContext(&exceptionContext);
|
||||
if (__AR_Callback) {
|
||||
__AR_Callback();
|
||||
}
|
||||
OSClearContext(&exceptionContext);
|
||||
OSSetCurrentContext(context);
|
||||
}
|
||||
|
||||
static void __ARClearInterrupt(void)
|
||||
{
|
||||
u16 tmp;
|
||||
tmp = __DSPRegs[DSP_CONTROL_STATUS];
|
||||
tmp = tmp & ~0x88 | 0x20;
|
||||
__DSPRegs[DSP_CONTROL_STATUS] = tmp;
|
||||
}
|
||||
|
||||
static void __ARWaitForDMA(void)
|
||||
{
|
||||
while (__DSPRegs[DSP_CONTROL_STATUS] & 0x200)
|
||||
;
|
||||
}
|
||||
|
||||
static void __ARWriteDMA(u32 mmem_addr, u32 aram_addr, u32 length)
|
||||
{
|
||||
// Main mem address
|
||||
__DSPRegs[DSP_ARAM_DMA_MM_HI]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_MM_HI] & ~0x03ff)
|
||||
| (u16)(mmem_addr >> 16));
|
||||
__DSPRegs[DSP_ARAM_DMA_MM_LO]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_MM_LO] & ~0xffe0)
|
||||
| (u16)(mmem_addr & 0xffff));
|
||||
|
||||
// ARAM address
|
||||
__DSPRegs[DSP_ARAM_DMA_ARAM_HI]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_ARAM_HI] & ~0x03ff)
|
||||
| (u16)(aram_addr >> 16));
|
||||
__DSPRegs[DSP_ARAM_DMA_ARAM_LO]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_ARAM_LO] & ~0xffe0)
|
||||
| (u16)(aram_addr & 0xffff));
|
||||
|
||||
// DMA buffer size
|
||||
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
|
||||
= (u16)(__DSPRegs[DSP_ARAM_DMA_SIZE_HI] & ~0x8000);
|
||||
|
||||
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_SIZE_HI] & ~0x03ff)
|
||||
| (u16)(length >> 16));
|
||||
__DSPRegs[DSP_ARAM_DMA_SIZE_LO]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_SIZE_LO] & ~0xffe0)
|
||||
| (u16)(length & 0xffff));
|
||||
|
||||
__ARWaitForDMA();
|
||||
__ARClearInterrupt();
|
||||
}
|
||||
|
||||
static void __ARReadDMA(u32 mmem_addr, u32 aram_addr, u32 length)
|
||||
{
|
||||
// Main mem address
|
||||
__DSPRegs[DSP_ARAM_DMA_MM_HI]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_MM_HI] & ~0x03ff)
|
||||
| (u16)(mmem_addr >> 16));
|
||||
__DSPRegs[DSP_ARAM_DMA_MM_LO]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_MM_LO] & ~0xffe0)
|
||||
| (u16)(mmem_addr & 0xffff));
|
||||
|
||||
// ARAM address
|
||||
__DSPRegs[DSP_ARAM_DMA_ARAM_HI]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_ARAM_HI] & ~0x03ff)
|
||||
| (u16)(aram_addr >> 16));
|
||||
__DSPRegs[DSP_ARAM_DMA_ARAM_LO]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_ARAM_LO] & ~0xffe0)
|
||||
| (u16)(aram_addr & 0xffff));
|
||||
|
||||
// DMA buffer size
|
||||
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
|
||||
= (u16)(__DSPRegs[DSP_ARAM_DMA_SIZE_HI] | 0x8000);
|
||||
|
||||
__DSPRegs[DSP_ARAM_DMA_SIZE_HI]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_SIZE_HI] & ~0x03ff)
|
||||
| (u16)(length >> 16));
|
||||
__DSPRegs[DSP_ARAM_DMA_SIZE_LO]
|
||||
= (u16)((__DSPRegs[DSP_ARAM_DMA_SIZE_LO] & ~0xffe0)
|
||||
| (u16)(length & 0xffff));
|
||||
|
||||
__ARWaitForDMA();
|
||||
__ARClearInterrupt();
|
||||
}
|
||||
|
||||
static void __ARChecksize(void)
|
||||
{
|
||||
u8 test_data_pad[63];
|
||||
u8 dummy_data_pad[63];
|
||||
u8 buffer_pad[63];
|
||||
u32* test_data;
|
||||
u32* dummy_data;
|
||||
u32* buffer;
|
||||
u16 ARAM_mode;
|
||||
u32 ARAM_size;
|
||||
u32 i;
|
||||
|
||||
do {
|
||||
} while (!(__DSPRegs[DSP_ARAM_MODE] & 1));
|
||||
|
||||
ARAM_mode = 3;
|
||||
ARAM_size = __AR_InternalSize = 0x1000000;
|
||||
|
||||
__DSPRegs[DSP_ARAM_SIZE]
|
||||
= ((__DSPRegs[DSP_ARAM_SIZE] & 0xFFFFFFC0) | ARAM_mode) | 0x20;
|
||||
|
||||
test_data = (void*)ALIGN_NEXT((u32)test_data_pad, 0x20);
|
||||
dummy_data = (void*)ALIGN_NEXT((u32)dummy_data_pad, 0x20);
|
||||
buffer = (void*)ALIGN_NEXT((u32)buffer_pad, 0x20);
|
||||
for (i = 0; i < 8; i++) {
|
||||
test_data[i] = 0xDEADBEEF;
|
||||
dummy_data[i] = 0xBAD0BAD0;
|
||||
}
|
||||
|
||||
DCFlushRange(test_data, 0x20);
|
||||
DCFlushRange(dummy_data, 0x20);
|
||||
|
||||
__AR_ExpansionSize = 0;
|
||||
|
||||
__ARWriteDMA((u32)dummy_data, ARAM_size + 0x0, 0x20U);
|
||||
__ARWriteDMA((u32)dummy_data, ARAM_size + 0x200000, 0x20U);
|
||||
__ARWriteDMA((u32)dummy_data, ARAM_size + 0x1000000, 0x20U);
|
||||
__ARWriteDMA((u32)dummy_data, ARAM_size + 0x200, 0x20U);
|
||||
__ARWriteDMA((u32)dummy_data, ARAM_size + 0x400000, 0x20U);
|
||||
|
||||
memset(buffer, 0, 0x20);
|
||||
DCFlushRange(buffer, 0x20);
|
||||
|
||||
__ARWriteDMA((u32)test_data, ARAM_size, 0x20U);
|
||||
DCInvalidateRange(buffer, 0x20);
|
||||
|
||||
__ARReadDMA((u32)buffer, ARAM_size, 0x20U);
|
||||
PPCSync();
|
||||
|
||||
if (*buffer == *test_data) {
|
||||
memset(buffer, 0, 0x20);
|
||||
DCFlushRange(buffer, 0x20);
|
||||
|
||||
__ARReadDMA((u32)buffer, ARAM_size + 0x200000, 0x20U);
|
||||
PPCSync();
|
||||
|
||||
if (*buffer == *test_data) {
|
||||
ARAM_size += 0x200000;
|
||||
__AR_ExpansionSize = 0x200000;
|
||||
} else {
|
||||
memset(buffer, 0, 0x20);
|
||||
DCFlushRange(buffer, 0x20);
|
||||
|
||||
__ARReadDMA((u32)buffer, ARAM_size + 0x01000000, 0x20U);
|
||||
PPCSync();
|
||||
|
||||
if (*buffer == *test_data) {
|
||||
ARAM_mode |= 8;
|
||||
ARAM_size += 0x400000;
|
||||
__AR_ExpansionSize = 0x400000;
|
||||
} else {
|
||||
memset(buffer, 0, 0x20);
|
||||
DCFlushRange(buffer, 0x20);
|
||||
|
||||
__ARReadDMA((u32)buffer, ARAM_size + 0x200, 0x20U);
|
||||
PPCSync();
|
||||
|
||||
if (*buffer == *test_data) {
|
||||
ARAM_mode |= 0x10;
|
||||
ARAM_size += 0x800000;
|
||||
__AR_ExpansionSize = 0x800000;
|
||||
} else {
|
||||
memset(buffer, 0, 0x20);
|
||||
DCFlushRange(buffer, 0x20);
|
||||
|
||||
__ARReadDMA((u32)buffer, ARAM_size + 0x400000, 0x20U);
|
||||
PPCSync();
|
||||
|
||||
if (*buffer == *test_data) {
|
||||
ARAM_mode |= 0x18;
|
||||
ARAM_size += 0x01000000;
|
||||
__AR_ExpansionSize = 0x1000000;
|
||||
} else {
|
||||
ARAM_mode |= 0x20;
|
||||
ARAM_size += 0x02000000;
|
||||
__AR_ExpansionSize = 0x2000000;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__DSPRegs[DSP_ARAM_SIZE]
|
||||
= ((u16)(__DSPRegs[DSP_ARAM_SIZE] & 0xFFFFFFC0) | ARAM_mode);
|
||||
}
|
||||
*(u32*)OSPhysicalToUncached(0xD0) = ARAM_size;
|
||||
__AR_Size = ARAM_size;
|
||||
}
|
||||
@@ -0,0 +1,150 @@
|
||||
#include <dolphin.h>
|
||||
#include <dolphin/ar.h>
|
||||
#include <macros.h>
|
||||
|
||||
#include "ar/__ar.h"
|
||||
|
||||
static struct ARQRequest* __ARQRequestQueueHi;
|
||||
static struct ARQRequest* __ARQRequestTailHi;
|
||||
static struct ARQRequest* __ARQRequestQueueLo;
|
||||
static struct ARQRequest* __ARQRequestTailLo;
|
||||
static struct ARQRequest* __ARQRequestPendingHi;
|
||||
static struct ARQRequest* __ARQRequestPendingLo;
|
||||
static ARQCallback __ARQCallbackHi;
|
||||
static ARQCallback __ARQCallbackLo;
|
||||
static u32 __ARQChunkSize;
|
||||
static int __ARQ_init_flag;
|
||||
|
||||
inline void __ARQPopTaskQueueHi(void)
|
||||
{
|
||||
if (__ARQRequestQueueHi) {
|
||||
if (__ARQRequestQueueHi->type == 0) {
|
||||
ARStartDMA(__ARQRequestQueueHi->type, __ARQRequestQueueHi->source,
|
||||
__ARQRequestQueueHi->dest, __ARQRequestQueueHi->length);
|
||||
} else {
|
||||
ARStartDMA(__ARQRequestQueueHi->type, __ARQRequestQueueHi->dest,
|
||||
__ARQRequestQueueHi->source,
|
||||
__ARQRequestQueueHi->length);
|
||||
}
|
||||
__ARQCallbackHi = __ARQRequestQueueHi->callback;
|
||||
__ARQRequestPendingHi = __ARQRequestQueueHi;
|
||||
__ARQRequestQueueHi = __ARQRequestQueueHi->next;
|
||||
}
|
||||
}
|
||||
|
||||
void __ARQServiceQueueLo(void)
|
||||
{
|
||||
if (__ARQRequestPendingLo == 0 && __ARQRequestQueueLo) {
|
||||
__ARQRequestPendingLo = __ARQRequestQueueLo;
|
||||
__ARQRequestQueueLo = __ARQRequestQueueLo->next;
|
||||
}
|
||||
if (__ARQRequestPendingLo) {
|
||||
if (__ARQRequestPendingLo->length <= __ARQChunkSize) {
|
||||
if (__ARQRequestPendingLo->type == 0) {
|
||||
ARStartDMA(
|
||||
__ARQRequestPendingLo->type, __ARQRequestPendingLo->source,
|
||||
__ARQRequestPendingLo->dest, __ARQRequestPendingLo->length);
|
||||
} else {
|
||||
ARStartDMA(__ARQRequestPendingLo->type,
|
||||
__ARQRequestPendingLo->dest,
|
||||
__ARQRequestPendingLo->source,
|
||||
__ARQRequestPendingLo->length);
|
||||
}
|
||||
__ARQCallbackLo = __ARQRequestPendingLo->callback;
|
||||
} else if (__ARQRequestPendingLo->type == 0) {
|
||||
ARStartDMA(__ARQRequestPendingLo->type,
|
||||
__ARQRequestPendingLo->source,
|
||||
__ARQRequestPendingLo->dest, __ARQChunkSize);
|
||||
} else {
|
||||
ARStartDMA(__ARQRequestPendingLo->type, __ARQRequestPendingLo->dest,
|
||||
__ARQRequestPendingLo->source, __ARQChunkSize);
|
||||
}
|
||||
__ARQRequestPendingLo->length -= __ARQChunkSize;
|
||||
__ARQRequestPendingLo->source += __ARQChunkSize;
|
||||
__ARQRequestPendingLo->dest += __ARQChunkSize;
|
||||
}
|
||||
}
|
||||
|
||||
void __ARQCallbackHack(u32 unused) { }
|
||||
|
||||
void __ARQInterruptServiceRoutine()
|
||||
{
|
||||
if (__ARQCallbackHi) {
|
||||
__ARQCallbackHi((u32)__ARQRequestPendingHi);
|
||||
__ARQRequestPendingHi = NULL;
|
||||
__ARQCallbackHi = NULL;
|
||||
} else if (__ARQCallbackLo) {
|
||||
__ARQCallbackLo((u32)__ARQRequestPendingLo);
|
||||
__ARQRequestPendingLo = NULL;
|
||||
__ARQCallbackLo = NULL;
|
||||
}
|
||||
__ARQPopTaskQueueHi();
|
||||
if (__ARQRequestPendingHi == 0) {
|
||||
__ARQServiceQueueLo();
|
||||
}
|
||||
}
|
||||
|
||||
void ARQInit(void)
|
||||
{
|
||||
if (__ARQ_init_flag != 1) {
|
||||
__ARQRequestQueueHi = __ARQRequestQueueLo = NULL;
|
||||
__ARQChunkSize = 0x1000;
|
||||
ARRegisterDMACallback(__ARQInterruptServiceRoutine);
|
||||
__ARQRequestPendingHi = NULL;
|
||||
__ARQRequestPendingLo = NULL;
|
||||
__ARQCallbackHi = NULL;
|
||||
__ARQCallbackLo = NULL;
|
||||
__ARQ_init_flag = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void ARQPostRequest(struct ARQRequest* request, u32 owner, u32 type,
|
||||
u32 priority, u32 source, u32 dest, u32 length,
|
||||
ARQCallback callback)
|
||||
{
|
||||
int level;
|
||||
|
||||
ASSERTLINE(0x1A9, request);
|
||||
ASSERTLINE(0x1AA, (type == ARQ_TYPE_MRAM_TO_ARAM)
|
||||
|| (type == ARQ_TYPE_ARAM_TO_MRAM));
|
||||
ASSERTLINE(0x1AB, (priority == ARQ_PRIORITY_LOW)
|
||||
|| (priority == ARQ_PRIORITY_HIGH));
|
||||
ASSERTLINE(0x1AE, (length % ARQ_DMA_ALIGNMENT) == 0);
|
||||
request->next = NULL;
|
||||
request->owner = owner;
|
||||
request->type = type;
|
||||
request->source = source;
|
||||
request->dest = dest;
|
||||
request->length = length;
|
||||
if (callback) {
|
||||
request->callback = callback;
|
||||
} else {
|
||||
request->callback = __ARQCallbackHack;
|
||||
}
|
||||
level = OSDisableInterrupts();
|
||||
switch (priority) {
|
||||
case ARQ_PRIORITY_LOW:
|
||||
if (__ARQRequestQueueLo) {
|
||||
__ARQRequestTailLo->next = request;
|
||||
} else {
|
||||
__ARQRequestQueueLo = request;
|
||||
}
|
||||
__ARQRequestTailLo = request;
|
||||
break;
|
||||
case ARQ_PRIORITY_HIGH:
|
||||
if (__ARQRequestQueueHi) {
|
||||
__ARQRequestTailHi->next = request;
|
||||
} else {
|
||||
__ARQRequestQueueHi = request;
|
||||
}
|
||||
__ARQRequestTailHi = request;
|
||||
break;
|
||||
}
|
||||
if ((__ARQRequestPendingHi == 0) && (__ARQRequestPendingLo == 0)) {
|
||||
__ARQPopTaskQueueHi();
|
||||
if (__ARQRequestPendingHi == 0) {
|
||||
__ARQServiceQueueLo();
|
||||
}
|
||||
}
|
||||
OSRestoreInterrupts(level);
|
||||
}
|
||||
@@ -0,0 +1,563 @@
|
||||
#include <dolphin.h>
|
||||
#include <dolphin/mtx.h>
|
||||
#include <macros.h>
|
||||
|
||||
#define qr0 0
|
||||
|
||||
// unsorted externs
|
||||
extern f32 sinf(f32);
|
||||
extern f32 cosf(f32);
|
||||
extern f32 tanf(f32);
|
||||
|
||||
// .sbss
|
||||
static float Unit01[2] = { 0.0f, 1.0f };
|
||||
|
||||
// MEME: if this function is not here, 0.0f and 1.0f have wrong order in .sdata2
|
||||
void C_MTXIdentity(GC_Mtx mtx)
|
||||
{
|
||||
mtx[0][0] = 1.0f;
|
||||
mtx[0][1] = 0.0f;
|
||||
mtx[0][2] = 0.0f;
|
||||
mtx[1][0] = 0.0f;
|
||||
mtx[1][1] = 1.0f;
|
||||
mtx[1][2] = 0.0f;
|
||||
mtx[2][0] = 0.0f;
|
||||
mtx[2][1] = 0.0f;
|
||||
mtx[2][2] = 1.0f;
|
||||
}
|
||||
|
||||
void PSMTXIdentity(register GC_Mtx m)
|
||||
{
|
||||
register f32 c_zero = 0.0f;
|
||||
register f32 c_one = 1.0f;
|
||||
register f32 c_01;
|
||||
register f32 c_10;
|
||||
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
asm {
|
||||
psq_st c_zero, 8(m), 0, qr0
|
||||
ps_merge01 c_01, c_zero, c_one
|
||||
psq_st c_zero, 24(m), 0, qr0
|
||||
ps_merge10 c_10, c_one, c_zero
|
||||
psq_st c_zero, 32(m), 0, qr0
|
||||
psq_st c_01, 16(m), 0, qr0
|
||||
psq_st c_10, 0(m), 0, qr0
|
||||
psq_st c_10, 40(m), 0, qr0
|
||||
}
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm void PSMTXCopy(const register GC_Mtx src, register GC_Mtx dst)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f0, 0(src), 0, qr0
|
||||
psq_st f0, 0(dst), 0, qr0
|
||||
psq_l f1, 8(src), 0, qr0
|
||||
psq_st f1, 8(dst), 0, qr0
|
||||
psq_l f2, 16(src), 0, qr0
|
||||
psq_st f2, 16(dst), 0, qr0
|
||||
psq_l f3, 24(src), 0, qr0
|
||||
psq_st f3, 24(dst), 0, qr0
|
||||
psq_l f4, 32(src), 0, qr0
|
||||
psq_st f4, 32(dst), 0, qr0
|
||||
psq_l f5, 40(src), 0, qr0
|
||||
psq_st f5, 40(dst), 0, qr0
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm void PSMTXConcat(const register GC_Mtx mA, const register GC_Mtx mB, register GC_Mtx mAB)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
nofralloc
|
||||
stwu r1, -64(r1)
|
||||
psq_l f0, 0(mA), 0, qr0
|
||||
stfd f14, 8(r1)
|
||||
psq_l f6, 0(mB), 0, qr0
|
||||
lis r6, Unit01@ha
|
||||
psq_l f7, 8(mB), 0, qr0
|
||||
stfd f15, 16(r1)
|
||||
addi r6, r6, Unit01@l
|
||||
stfd f31, 40(r1)
|
||||
psq_l f8, 16(mB), 0, qr0
|
||||
ps_muls0 f12, f6, f0
|
||||
psq_l f2, 16(mA), 0, qr0
|
||||
ps_muls0 f13, f7, f0
|
||||
psq_l f31, 0(r6), 0, qr0
|
||||
ps_muls0 f14, f6, f2
|
||||
psq_l f9, 24(mB), 0, qr0
|
||||
ps_muls0 f15, f7, f2
|
||||
psq_l f1, 8(mA), 0, qr0
|
||||
ps_madds1 f12, f8, f0, f12
|
||||
psq_l f3, 24(mA), 0, qr0
|
||||
ps_madds1 f14, f8, f2, f14
|
||||
psq_l f10, 32(mB), 0, qr0
|
||||
ps_madds1 f13, f9, f0, f13
|
||||
psq_l f11, 40(mB), 0, qr0
|
||||
ps_madds1 f15, f9, f2, f15
|
||||
psq_l f4, 32(mA), 0, qr0
|
||||
psq_l f5, 40(mA), 0, qr0
|
||||
ps_madds0 f12, f10, f1, f12
|
||||
ps_madds0 f13, f11, f1, f13
|
||||
ps_madds0 f14, f10, f3, f14
|
||||
ps_madds0 f15, f11, f3, f15
|
||||
psq_st f12, 0(mAB), 0, qr0
|
||||
ps_muls0 f2, f6, f4
|
||||
ps_madds1 f13, f31, f1, f13
|
||||
ps_muls0 f0, f7, f4
|
||||
psq_st f14, 16(mAB), 0, qr0
|
||||
ps_madds1 f15, f31, f3, f15
|
||||
psq_st f13, 8(mAB), 0, qr0
|
||||
ps_madds1 f2, f8, f4, f2
|
||||
ps_madds1 f0, f9, f4, f0
|
||||
ps_madds0 f2, f10, f5, f2
|
||||
lfd f14, 8(r1)
|
||||
psq_st f15, 24(mAB), 0, qr0
|
||||
ps_madds0 f0, f11, f5, f0
|
||||
psq_st f2, 32(mAB), 0, qr0
|
||||
ps_madds1 f0, f31, f5, f0
|
||||
lfd f15, 16(r1)
|
||||
psq_st f0, 40(mAB), 0, qr0
|
||||
lfd f31, 40(r1)
|
||||
addi r1, r1, 64
|
||||
blr
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm u32 PSMTXInverse(const register GC_Mtx src, register GC_Mtx inv)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f0, 0(src), 1, qr0
|
||||
psq_l f1, 4(src), 0, qr0
|
||||
psq_l f2, 16(src), 1, qr0
|
||||
ps_merge10 f6, f1, f0
|
||||
psq_l f3, 20(src), 0, qr0
|
||||
psq_l f4, 32(src), 1, qr0
|
||||
ps_merge10 f7, f3, f2
|
||||
psq_l f5, 36(src), 0, qr0
|
||||
ps_mul f11, f3, f6
|
||||
ps_mul f13, f5, f7
|
||||
ps_merge10 f8, f5, f4
|
||||
ps_msub f11, f1, f7, f11
|
||||
ps_mul f12, f1, f8
|
||||
ps_msub f13, f3, f8, f13
|
||||
ps_mul f10, f3, f4
|
||||
ps_msub f12, f5, f6, f12
|
||||
ps_mul f9, f0, f5
|
||||
ps_mul f8, f1, f2
|
||||
ps_sub f6, f6, f6
|
||||
ps_msub f10, f2, f5, f10
|
||||
ps_mul f7, f0, f13
|
||||
ps_msub f9, f1, f4, f9
|
||||
ps_madd f7, f2, f12, f7
|
||||
ps_msub f8, f0, f3, f8
|
||||
ps_madd f7, f4, f11, f7
|
||||
ps_cmpo0 cr0, f7, f6
|
||||
bne skip_return
|
||||
li r3, 0
|
||||
blr
|
||||
skip_return:
|
||||
fres f0, f7
|
||||
ps_add f6, f0, f0
|
||||
ps_mul f5, f0, f0
|
||||
ps_nmsub f0, f7, f5, f6
|
||||
lfs f1, 12(src)
|
||||
ps_muls0 f13, f13, f0
|
||||
lfs f2, 28(src)
|
||||
ps_muls0 f12, f12, f0
|
||||
lfs f3, 44(src)
|
||||
ps_muls0 f11, f11, f0
|
||||
ps_merge00 f5, f13, f12
|
||||
ps_muls0 f10, f10, f0
|
||||
ps_merge11 f4, f13, f12
|
||||
ps_muls0 f9, f9, f0
|
||||
psq_st f5, 0(inv), 0, qr0
|
||||
ps_mul f6, f13, f1
|
||||
psq_st f4, 16(inv), 0, qr0
|
||||
ps_muls0 f8, f8, f0
|
||||
ps_madd f6, f12, f2, f6
|
||||
psq_st f10, 32(inv), 1, qr0
|
||||
ps_nmadd f6, f11, f3, f6
|
||||
psq_st f9, 36(inv), 1, qr0
|
||||
ps_mul f7, f10, f1
|
||||
ps_merge00 f5, f11, f6
|
||||
psq_st f8, 40(inv), 1, qr0
|
||||
ps_merge11 f4, f11, f6
|
||||
psq_st f5, 8(inv), 0, qr0
|
||||
ps_madd f7, f9, f2, f7
|
||||
psq_st f4, 24(inv), 0, qr0
|
||||
ps_nmadd f7, f8, f3, f7
|
||||
li r3, 1
|
||||
psq_st f7, 44(inv), 1, qr0
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
void PSMTXRotRad(GC_Mtx m, char axis, f32 rad)
|
||||
{
|
||||
f32 s = sinf(rad);
|
||||
f32 c = cosf(rad);
|
||||
|
||||
PSMTXRotTrig(m, axis, s, c);
|
||||
}
|
||||
|
||||
void PSMTXRotTrig(register GC_Mtx m, register char axis, register f32 sinA,
|
||||
register f32 cosA)
|
||||
{
|
||||
register f32 fc0;
|
||||
register f32 fc1;
|
||||
register f32 nsinA;
|
||||
register f32 fw0, fw1, fw2, fw3;
|
||||
|
||||
fc0 = 0.0f;
|
||||
fc1 = 1.0f;
|
||||
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
asm {
|
||||
ori axis, axis, 0x20
|
||||
ps_neg nsinA, sinA
|
||||
cmplwi axis, 'x'
|
||||
beq _case_x
|
||||
cmplwi axis, 'y'
|
||||
beq _case_y
|
||||
cmplwi axis, 'z'
|
||||
beq _case_z
|
||||
b _end
|
||||
|
||||
_case_x:
|
||||
psq_st fc1, 0(m), 1, qr0
|
||||
psq_st fc0, 4(m), 0, qr0
|
||||
ps_merge00 fw0, sinA, cosA
|
||||
psq_st fc0, 12(m), 0, qr0
|
||||
ps_merge00 fw1, cosA, nsinA
|
||||
psq_st fc0, 28(m), 0, qr0
|
||||
psq_st fc0, 44(m), 1, qr0
|
||||
psq_st fw0, 36(m), 0, qr0
|
||||
psq_st fw1, 20(m), 0, qr0
|
||||
b _end;
|
||||
|
||||
_case_y:
|
||||
ps_merge00 fw0, cosA, fc0
|
||||
ps_merge00 fw1, fc0, fc1
|
||||
psq_st fc0, 24(m), 0, qr0
|
||||
psq_st fw0, 0(m), 0, qr0
|
||||
ps_merge00 fw2, nsinA, fc0
|
||||
ps_merge00 fw3, sinA, fc0
|
||||
psq_st fw0, 40(m), 0, qr0
|
||||
psq_st fw1, 16(m), 0, qr0
|
||||
psq_st fw3, 8(m), 0, qr0
|
||||
psq_st fw2, 32(m), 0, qr0
|
||||
b _end;
|
||||
|
||||
_case_z:
|
||||
psq_st fc0, 8(m), 0, qr0
|
||||
ps_merge00 fw0, sinA, cosA
|
||||
ps_merge00 fw2, cosA, nsinA
|
||||
psq_st fc0, 24(m), 0, qr0
|
||||
psq_st fc0, 32(m), 0, qr0
|
||||
ps_merge00 fw1, fc1, fc0
|
||||
psq_st fw0, 16(m), 0, qr0
|
||||
psq_st fw2, 0(m), 0, qr0
|
||||
psq_st fw1, 40(m), 0, qr0
|
||||
|
||||
_end:
|
||||
}
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
static inline void __PSMTXRotAxisRadInternal(register GC_Mtx m,
|
||||
const register Vec* axis,
|
||||
register f32 sT, register f32 cT)
|
||||
{
|
||||
}
|
||||
|
||||
void PSMTXRotAxisRad(register GC_Mtx m, const Vec* axis, register f32 rad)
|
||||
{
|
||||
register f32 tmp0, tmp1, tmp2, tmp3, tmp4;
|
||||
register f32 tmp5, tmp6, tmp7, tmp8, tmp9;
|
||||
|
||||
register f32 sT;
|
||||
register f32 cT;
|
||||
register f32 oneMinusCosT;
|
||||
register f32 zero;
|
||||
Vec axisNormalized;
|
||||
register Vec* axisNormalizedPtr;
|
||||
|
||||
zero = 0.0f;
|
||||
axisNormalizedPtr = &axisNormalized;
|
||||
sT = sinf(rad);
|
||||
cT = cosf(rad);
|
||||
oneMinusCosT = 1.0f - cT;
|
||||
|
||||
PSVECNormalize(axis, axisNormalizedPtr);
|
||||
|
||||
// `rad` reused here -- absolutely disgusting.
|
||||
// Also might've been an inline as in prime.
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
asm {
|
||||
psq_l rad, 0x0(axisNormalizedPtr), 0, qr0
|
||||
lfs tmp1, 0x8(axisNormalizedPtr)
|
||||
ps_merge00 tmp0, cT, cT
|
||||
ps_muls0 tmp4, rad, oneMinusCosT
|
||||
ps_muls0 tmp5, tmp1, oneMinusCosT
|
||||
ps_muls1 tmp3, tmp4, rad
|
||||
ps_muls0 tmp2, tmp4, rad
|
||||
ps_muls0 rad, rad, sT
|
||||
ps_muls0 tmp4, tmp4, tmp1
|
||||
fnmsubs tmp6, tmp1, sT, tmp3
|
||||
fmadds tmp7, tmp1, sT, tmp3
|
||||
ps_neg tmp9, rad
|
||||
ps_sum0 tmp8, tmp4, zero, rad
|
||||
ps_sum0 tmp2, tmp2, tmp6, tmp0
|
||||
ps_sum1 tmp3, tmp0, tmp7, tmp3
|
||||
ps_sum0 tmp6, tmp9, zero, tmp4
|
||||
ps_sum0 tmp9, tmp4, tmp4, tmp9
|
||||
psq_st tmp8, 0x8(m), 0, qr0
|
||||
ps_muls0 tmp5, tmp5, tmp1
|
||||
psq_st tmp2, 0x0(m), 0, qr0
|
||||
ps_sum1 tmp4, rad, tmp9, tmp4
|
||||
psq_st tmp3, 0x10(m), 0, qr0
|
||||
ps_sum0 tmp5, tmp5, zero, tmp0
|
||||
psq_st tmp6, 0x18(m), 0, qr0
|
||||
psq_st tmp4, 0x20(m), 0, qr0
|
||||
psq_st tmp5, 0x28(m), 0, qr0
|
||||
}
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
void PSMTXTrans(register GC_Mtx m, register f32 xT, register f32 yT,
|
||||
register f32 zT)
|
||||
{
|
||||
register f32 c0 = 0.0F;
|
||||
register f32 c1 = 1.0F;
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
asm {
|
||||
stfs xT, 12(m)
|
||||
stfs yT, 28(m)
|
||||
psq_st c0, 4(m), 0, qr0
|
||||
psq_st c0, 32(m), 0, qr0
|
||||
stfs c0, 16(m)
|
||||
stfs c1, 20(m)
|
||||
stfs c0, 24(m)
|
||||
stfs c1, 40(m)
|
||||
stfs zT, 44(m)
|
||||
stfs c1, 0(m)
|
||||
}
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm void PSMTXTransApply(const register GC_Mtx src, register GC_Mtx dst, register f32 xT,
|
||||
register f32 yT, register f32 zT)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
nofralloc
|
||||
psq_l fp4, 0(src), 0, qr0
|
||||
psq_l fp5, 8(src), 0, qr0
|
||||
psq_l fp7, 24(src), 0, qr0
|
||||
psq_l fp8, 40(src), 0, qr0
|
||||
ps_sum1 fp5, xT, fp5, fp5
|
||||
psq_l fp6, 16(src), 0, qr0
|
||||
ps_sum1 fp7, yT, fp7, fp7
|
||||
psq_l fp9, 32(src), 0, qr0
|
||||
ps_sum1 fp8, zT, fp8, fp8
|
||||
|
||||
psq_st fp4, 0(dst), 0, qr0
|
||||
psq_st fp5, 8(dst), 0, qr0
|
||||
psq_st fp6, 16(dst), 0, qr0
|
||||
psq_st fp7, 24(dst), 0, qr0
|
||||
psq_st fp9, 32(dst), 0, qr0
|
||||
psq_st fp8, 40(dst), 0, qr0
|
||||
blr
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
void PSMTXScale(register GC_Mtx m, register f32 xS, register f32 yS,
|
||||
register f32 zS)
|
||||
{
|
||||
register f32 c0 = 0.0F;
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
asm {
|
||||
stfs xS, 0(m)
|
||||
psq_st c0, 4(m), 0, 0
|
||||
psq_st c0, 12(m), 0, 0
|
||||
stfs yS, 20(m)
|
||||
psq_st c0, 24(m), 0, 0
|
||||
psq_st c0, 32(m), 0, 0
|
||||
stfs zS, 40(m)
|
||||
stfs c0, 44(m)
|
||||
}
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm void PSMTXScaleApply(const register GC_Mtx src, register GC_Mtx dst, register f32 xS,
|
||||
register f32 yS, register f32 zS)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
nofralloc
|
||||
psq_l fp4, 0(src), 0, 0
|
||||
psq_l fp5, 8(src), 0, 0
|
||||
ps_muls0 fp4, fp4, xS
|
||||
psq_l fp6, 16(src), 0, 0
|
||||
ps_muls0 fp5, fp5, xS
|
||||
psq_l fp7, 24(src), 0, 0
|
||||
ps_muls0 fp6, fp6, yS
|
||||
psq_l fp8, 32(src), 0, 0
|
||||
psq_st fp4, 0(dst), 0, 0
|
||||
ps_muls0 fp7, fp7, yS
|
||||
psq_l fp2, 40(src), 0, 0
|
||||
psq_st fp5, 8(dst), 0, 0
|
||||
ps_muls0 fp8, fp8, zS
|
||||
psq_st fp6, 16(dst), 0, 0
|
||||
ps_muls0 fp2, fp2, zS
|
||||
psq_st fp7, 24(dst), 0, 0
|
||||
psq_st fp8, 32(dst), 0, 0
|
||||
psq_st fp2, 40(dst), 0, 0
|
||||
blr
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
void PSMTXQuat(register GC_Mtx m, const register PSQuaternion* q)
|
||||
{
|
||||
register f32 c_zero, c_one, c_two, scale;
|
||||
register f32 tmp0, tmp1, tmp2, tmp3, tmp4;
|
||||
register f32 tmp5, tmp6, tmp7, tmp8, tmp9;
|
||||
|
||||
c_one = 1.0F;
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
asm {
|
||||
psq_l tmp0, 0(q), 0, 0
|
||||
psq_l tmp1, 8(q), 0, 0
|
||||
fsubs c_zero, c_one, c_one
|
||||
fadds c_two, c_one, c_one
|
||||
ps_mul tmp2, tmp0, tmp0
|
||||
ps_merge10 tmp5, tmp0, tmp0
|
||||
ps_madd tmp4, tmp1, tmp1, tmp2
|
||||
ps_mul tmp3, tmp1, tmp1
|
||||
ps_sum0 scale, tmp4, tmp4, tmp4
|
||||
ps_muls1 tmp7, tmp5, tmp1
|
||||
fres tmp9, scale
|
||||
ps_sum1 tmp4, tmp3, tmp4, tmp2
|
||||
ps_nmsub scale, scale, tmp9, c_two
|
||||
ps_muls1 tmp6, tmp1, tmp1
|
||||
ps_mul scale, tmp9, scale
|
||||
ps_sum0 tmp2, tmp2, tmp2, tmp2
|
||||
fmuls scale, scale, c_two
|
||||
ps_madd tmp8, tmp0, tmp5, tmp6
|
||||
ps_msub tmp6, tmp0, tmp5, tmp6
|
||||
psq_st c_zero, 12(m), 1, 0
|
||||
ps_nmsub tmp2, tmp2, scale, c_one
|
||||
ps_nmsub tmp4, tmp4, scale, c_one
|
||||
psq_st c_zero, 44(m), 1, 0
|
||||
ps_mul tmp8, tmp8, scale
|
||||
ps_mul tmp6, tmp6, scale
|
||||
psq_st tmp2, 40(m), 1, 0
|
||||
ps_madds0 tmp5, tmp0, tmp1, tmp7
|
||||
ps_merge00 tmp1, tmp8, tmp4
|
||||
ps_nmsub tmp7, tmp7, c_two, tmp5
|
||||
ps_merge10 tmp0, tmp4, tmp6
|
||||
psq_st tmp1, 16(m), 0, 0
|
||||
ps_mul tmp5, tmp5, scale
|
||||
ps_mul tmp7, tmp7, scale
|
||||
psq_st tmp0, 0(m), 0, 0
|
||||
psq_st tmp5, 8(m), 1, 0
|
||||
ps_merge10 tmp3, tmp7, c_zero
|
||||
ps_merge01 tmp9, tmp7, tmp5
|
||||
psq_st tmp3, 24(m), 0, 0
|
||||
psq_st tmp9, 32(m), 0, 0
|
||||
}
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
void C_MTXLookAt(GC_Mtx m, const Vec* camPos, const Vec* camUp, const Vec* target) {
|
||||
Vec vLook;
|
||||
Vec vRight;
|
||||
Vec vUp;
|
||||
|
||||
ASSERTMSGLINE(2105, m, "MTXLookAt(): NULL MtxPtr 'm' ");
|
||||
ASSERTMSGLINE(2106, camPos, "MTXLookAt(): NULL VecPtr 'camPos' ");
|
||||
ASSERTMSGLINE(2107, camUp, "MTXLookAt(): NULL VecPtr 'camUp' ");
|
||||
ASSERTMSGLINE(2108, target, "MTXLookAt(): NULL Point3dPtr 'target' ");
|
||||
|
||||
vLook.x = camPos->x - target->x;
|
||||
vLook.y = camPos->y - target->y;
|
||||
vLook.z = camPos->z - target->z;
|
||||
VECNormalize(&vLook, &vLook);
|
||||
VECCrossProduct(camUp, &vLook, &vRight);
|
||||
VECNormalize(&vRight, &vRight);
|
||||
VECCrossProduct(&vLook, &vRight, &vUp);
|
||||
m[0][0] = vRight.x;
|
||||
m[0][1] = vRight.y;
|
||||
m[0][2] = vRight.z;
|
||||
m[0][3] = -((camPos->z * vRight.z) + ((camPos->x * vRight.x) + (camPos->y * vRight.y)));
|
||||
m[1][0] = vUp.x;
|
||||
m[1][1] = vUp.y;
|
||||
m[1][2] = vUp.z;
|
||||
m[1][3] = -((camPos->z * vUp.z) + ((camPos->x * vUp.x) + (camPos->y * vUp.y)));
|
||||
m[2][0] = vLook.x;
|
||||
m[2][1] = vLook.y;
|
||||
m[2][2] = vLook.z;
|
||||
m[2][3] = -((camPos->z * vLook.z) + ((camPos->x * vLook.x) + (camPos->y * vLook.y)));
|
||||
}
|
||||
|
||||
void C_MTXLightFrustum(GC_Mtx m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 scaleS,
|
||||
f32 scaleT, f32 transS, f32 transT)
|
||||
{
|
||||
f32 _tmp;
|
||||
|
||||
_tmp = 1 / (r - l);
|
||||
m[0][0] = (scaleS * (2 * n * _tmp));
|
||||
m[0][1] = 0;
|
||||
m[0][2] = (scaleS * (_tmp * (r + l))) - transS;
|
||||
m[0][3] = 0;
|
||||
_tmp = 1 / (t - b);
|
||||
m[1][0] = 0;
|
||||
m[1][1] = (scaleT * (2 * n * _tmp));
|
||||
m[1][2] = (scaleT * (_tmp * (t + b))) - transT;
|
||||
m[1][3] = 0;
|
||||
m[2][0] = 0;
|
||||
m[2][1] = 0;
|
||||
m[2][2] = -1;
|
||||
m[2][3] = 0;
|
||||
}
|
||||
|
||||
void C_MTXLightPerspective(GC_Mtx m, f32 fovY, f32 aspect, f32 scaleS, f32 scaleT,
|
||||
f32 transS, f32 transT)
|
||||
{
|
||||
f32 angle;
|
||||
f32 cot;
|
||||
|
||||
angle = (0.5f * fovY);
|
||||
angle = angle * 0.017453293f;
|
||||
cot = 1 / tanf(angle);
|
||||
m[0][0] = (scaleS * (cot / aspect));
|
||||
m[0][1] = 0;
|
||||
m[0][2] = -transS;
|
||||
m[0][3] = 0;
|
||||
m[1][0] = 0;
|
||||
m[1][1] = (cot * scaleT);
|
||||
m[1][2] = -transT;
|
||||
m[1][3] = 0;
|
||||
m[2][0] = 0;
|
||||
m[2][1] = 0;
|
||||
m[2][2] = -1;
|
||||
m[2][3] = 0;
|
||||
}
|
||||
|
||||
void C_MTXLightOrtho(GC_Mtx m, f32 t, f32 b, f32 l, f32 r, f32 scaleS, f32 scaleT,
|
||||
f32 transS, f32 transT)
|
||||
{
|
||||
f32 _tmp;
|
||||
|
||||
_tmp = 1 / (r - l);
|
||||
m[0][0] = (2 * _tmp * scaleS);
|
||||
m[0][1] = 0;
|
||||
m[0][2] = 0;
|
||||
m[0][3] = (transS + (scaleS * (_tmp * -(r + l))));
|
||||
_tmp = 1 / (t - b);
|
||||
m[1][0] = 0;
|
||||
m[1][1] = (2 * _tmp * scaleT);
|
||||
m[1][2] = 0;
|
||||
m[1][3] = (transT + (scaleT * (_tmp * -(t + b))));
|
||||
m[2][0] = 0;
|
||||
m[2][1] = 0;
|
||||
m[2][2] = 0;
|
||||
m[2][3] = 1;
|
||||
}
|
||||
@@ -0,0 +1,88 @@
|
||||
#include <dolphin.h>
|
||||
#include <dolphin/mtx.h>
|
||||
#include <macros.h>
|
||||
|
||||
extern f32 tanf(f32);
|
||||
|
||||
// NOTE: this is not present in SMS but needed for .sdata2 to match
|
||||
// stolen from prime
|
||||
void C_MTXFrustum(Mtx44 m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f)
|
||||
{
|
||||
f32 tmp;
|
||||
|
||||
tmp = 1.0f / (r - l);
|
||||
m[0][0] = (2 * n) * tmp;
|
||||
m[0][1] = 0.0f;
|
||||
m[0][2] = (r + l) * tmp;
|
||||
m[0][3] = 0.0f;
|
||||
|
||||
tmp = 1.0f / (t - b);
|
||||
m[1][0] = 0.0f;
|
||||
m[1][1] = (2 * n) * tmp;
|
||||
m[1][2] = (t + b) * tmp;
|
||||
m[1][3] = 0.0f;
|
||||
|
||||
m[2][0] = 0.0f;
|
||||
m[2][1] = 0.0f;
|
||||
|
||||
tmp = 1.0f / (f - n);
|
||||
m[2][2] = -(n)*tmp;
|
||||
m[2][3] = -(f * n) * tmp;
|
||||
m[3][0] = 0.0f;
|
||||
m[3][1] = 0.0f;
|
||||
m[3][2] = -1.0f;
|
||||
m[3][3] = 0.0f;
|
||||
}
|
||||
|
||||
void C_MTXPerspective(GC_Mtx m, f32 fovY, f32 aspect, f32 n, f32 f)
|
||||
{
|
||||
f32 angle;
|
||||
f32 cot;
|
||||
f32 tmp;
|
||||
|
||||
angle = (0.5f * fovY);
|
||||
angle = angle * 0.017453293f;
|
||||
cot = 1 / tanf(angle);
|
||||
m[0][0] = (cot / aspect);
|
||||
m[0][1] = 0;
|
||||
m[0][2] = 0;
|
||||
m[0][3] = 0;
|
||||
m[1][0] = 0;
|
||||
m[1][1] = (cot);
|
||||
m[1][2] = 0;
|
||||
m[1][3] = 0;
|
||||
m[2][0] = 0;
|
||||
m[2][1] = 0;
|
||||
tmp = 1 / (f - n);
|
||||
m[2][2] = (-n * tmp);
|
||||
m[2][3] = (tmp * -(f * n));
|
||||
m[3][0] = 0;
|
||||
m[3][1] = 0;
|
||||
m[3][2] = -1;
|
||||
m[3][3] = 0;
|
||||
}
|
||||
|
||||
void C_MTXOrtho(GC_Mtx m, f32 t, f32 b, f32 l, f32 r, f32 n, f32 f)
|
||||
{
|
||||
f32 tmp;
|
||||
|
||||
tmp = 1 / (r - l);
|
||||
m[0][0] = 2 * tmp;
|
||||
m[0][1] = 0;
|
||||
m[0][2] = 0;
|
||||
m[0][3] = (tmp * -(r + l));
|
||||
tmp = 1 / (t - b);
|
||||
m[1][0] = 0;
|
||||
m[1][1] = 2 * tmp;
|
||||
m[1][2] = 0;
|
||||
m[1][3] = (tmp * -(t + b));
|
||||
m[2][0] = 0;
|
||||
m[2][1] = 0;
|
||||
tmp = 1 / (f - n);
|
||||
m[2][2] = (-1 * tmp);
|
||||
m[2][3] = (-f * tmp);
|
||||
m[3][0] = 0;
|
||||
m[3][1] = 0;
|
||||
m[3][2] = 0;
|
||||
m[3][3] = 1;
|
||||
}
|
||||
@@ -0,0 +1,103 @@
|
||||
#include <dolphin.h>
|
||||
#include <dolphin/mtx.h>
|
||||
|
||||
#define qr0 0
|
||||
|
||||
asm void PSMTXMultVec(const register Mtx44 m, const register Vec* src, register Vec* dst)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
nofralloc
|
||||
psq_l f0, Vec.x(src), 0, qr0
|
||||
psq_l f2, 0(m), 0, qr0
|
||||
psq_l f1, Vec.z(src), 1, qr0
|
||||
ps_mul f4, f2, f0
|
||||
psq_l f3, 8(m), 0, qr0
|
||||
ps_madd f5, f3, f1, f4
|
||||
psq_l f8, 16(m), 0, qr0
|
||||
ps_sum0 f6, f5, f6, f5
|
||||
psq_l f9, 24(m), 0, qr0
|
||||
ps_mul f10, f8, f0
|
||||
psq_st f6, Vec.x(dst), 1, qr0
|
||||
ps_madd f11, f9, f1, f10
|
||||
psq_l f2, 32(m), 0, qr0
|
||||
ps_sum0 f12, f11, f12, f11
|
||||
psq_l f3, 40(m), 0, qr0
|
||||
ps_mul f4, f2, f0
|
||||
psq_st f12, Vec.y(dst), 1, qr0
|
||||
ps_madd f5, f3, f1, f4
|
||||
ps_sum0 f6, f5, f6, f5
|
||||
psq_st f6, Vec.z(dst), 1, qr0
|
||||
blr
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm void PSMTXMultVecArray(const register GC_Mtx m, const register Vec* srcBase,
|
||||
register Vec* dstBase, register u32 count)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f13, 0x0(m), 0, qr0
|
||||
psq_l f12, 0x10(m), 0, qr0
|
||||
subi count, count, 0x1
|
||||
psq_l f11, 0x8(m), 0, qr0
|
||||
ps_merge00 f0, f13, f12
|
||||
subi dstBase, dstBase, 0x4
|
||||
psq_l f10, 0x18(m), 0, qr0
|
||||
ps_merge11 f1, f13, f12
|
||||
|
||||
mtctr count
|
||||
psq_l f4, 0x20(m), 0, qr0
|
||||
ps_merge00 f2, f11, f10
|
||||
psq_l f5, 0x28(m), 0, qr0
|
||||
ps_merge11 f3, f11, f10
|
||||
psq_l f6, Vec.x(srcBase), 0, qr0
|
||||
psq_lu f7, Vec.z(srcBase), 1, qr0
|
||||
ps_madds0 f8, f0, f6, f3
|
||||
ps_mul f9, f4, f6
|
||||
ps_madds1 f8, f1, f6, f8
|
||||
ps_madd f10, f5, f7, f9
|
||||
|
||||
loop:
|
||||
psq_lu f6, Vec.y(srcBase), 0, qr0
|
||||
ps_madds0 f12, f2, f7, f8
|
||||
psq_lu f7, Vec.z(srcBase), 1, qr0
|
||||
ps_sum0 f13, f10, f9, f10
|
||||
ps_madds0 f8, f0, f6, f3
|
||||
ps_mul f9, f4, f6
|
||||
psq_stu f12, 0x4(dstBase), 0, qr0
|
||||
ps_madds1 f8, f1, f6, f8
|
||||
psq_stu f13, 0x8(dstBase), 1, qr0
|
||||
ps_madd f10, f5, f7, f9
|
||||
bdnz loop
|
||||
|
||||
ps_madds0 f12, f2, f7, f8
|
||||
ps_sum0 f13, f10, f9, f10
|
||||
psq_stu f12, 0x4(dstBase), 0, qr0
|
||||
psq_stu f13, 0x8(dstBase), 1, qr0
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm void PSMTXMultVecSR(const register Mtx44 m, const register Vec* src, register Vec* dst)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f0, 0x0(m), 0, qr0
|
||||
psq_l f6, Vec.x(src), 0, qr0
|
||||
psq_l f2, 0x10(m), 0, qr0
|
||||
ps_mul f8, f0, f6
|
||||
psq_l f4, 0x20(m), 0, qr0
|
||||
ps_mul f10, f2, f6
|
||||
psq_l f7, Vec.z(src), 1, qr0
|
||||
ps_mul f12, f4, f6
|
||||
psq_l f3, 0x18(m), 0, qr0
|
||||
ps_sum0 f8, f8, f8, f8
|
||||
psq_l f5, 0x28(m), 0, qr0
|
||||
ps_sum0 f10, f10, f10, f10
|
||||
psq_l f1, 0x8(m), 0, qr0
|
||||
ps_sum0 f12, f12, f12, f12
|
||||
ps_madd f9, f1, f7, f8
|
||||
psq_st f9, Vec.x(dst), 1, qr0
|
||||
ps_madd f11, f3, f7, f10
|
||||
psq_st f11, Vec.y(dst), 1, qr0
|
||||
ps_madd f13, f5, f7, f12
|
||||
psq_st f13, Vec.z(dst), 1, qr0
|
||||
#endif // clang-format on
|
||||
}
|
||||
@@ -0,0 +1,171 @@
|
||||
#include <dolphin.h>
|
||||
#include <dolphin/mtx.h>
|
||||
|
||||
#define qr0 0
|
||||
|
||||
asm void PSVECAdd(register Vec* a, register Vec* b, register Vec* c)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f2, Vec.x(a), 0, qr0
|
||||
psq_l f4, Vec.x(b), 0, qr0
|
||||
ps_add f6, f2, f4
|
||||
psq_st f6, Vec.x(c), 0, qr0
|
||||
psq_l f3, Vec.z(a), 1, qr0
|
||||
psq_l f5, Vec.z(b), 1, qr0
|
||||
ps_add f7, f3, f5
|
||||
psq_st f7, Vec.z(c), 1, qr0
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm void PSVECSubtract(register Vec* a, register Vec* b, register Vec* c)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f2, Vec.x(a), 0, qr0
|
||||
psq_l f4, Vec.x(b), 0, qr0
|
||||
ps_sub f6, f2, f4
|
||||
psq_st f6, Vec.x(c), 0, qr0
|
||||
psq_l f3, Vec.z(a), 1, qr0
|
||||
psq_l f5, Vec.z(b), 1, qr0
|
||||
ps_sub f7, f3, f5
|
||||
psq_st f7, Vec.z(c), 1, qr0
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm void PSVECScale(register Vec* src, register Vec* dst, register f32 mult)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f0, Vec.x(src), 0, qr0
|
||||
psq_l f2, Vec.z(src), 1, qr0
|
||||
ps_muls0 f0, f0, f1
|
||||
psq_st f0, Vec.x(dst), 0, qr0
|
||||
ps_muls0 f0, f2, f1
|
||||
psq_st f0, Vec.z(dst), 1, qr0
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
void PSVECNormalize(const register Vec* vec1, register Vec* dst)
|
||||
{
|
||||
register float c_half = 0.5f;
|
||||
register float c_three = 3.0f;
|
||||
register float v1_xy;
|
||||
register float v1_z;
|
||||
register float xx_zz;
|
||||
register float xx_yy;
|
||||
register float sqsum;
|
||||
register float rsqrt;
|
||||
register float nwork0;
|
||||
register float nwork1;
|
||||
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
asm
|
||||
{
|
||||
psq_l v1_xy, Vec.x(vec1), 0, qr0
|
||||
ps_mul xx_yy, v1_xy, v1_xy
|
||||
psq_l v1_z, Vec.z(vec1), 1, qr0
|
||||
ps_madd xx_zz, v1_z, v1_z, xx_yy
|
||||
ps_sum0 sqsum, xx_zz, v1_z, xx_yy
|
||||
frsqrte rsqrt, sqsum
|
||||
fmuls nwork0, rsqrt, rsqrt
|
||||
fmuls nwork1, rsqrt, c_half
|
||||
fnmsubs nwork0, nwork0, sqsum, c_three
|
||||
fmuls rsqrt, nwork0, nwork1
|
||||
ps_muls0 v1_xy, v1_xy, rsqrt
|
||||
psq_st v1_xy, Vec.x(dst), 0, qr0
|
||||
ps_muls0 v1_z, v1_z, rsqrt
|
||||
psq_st v1_z, Vec.z(dst), 1, qr0
|
||||
}
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm float PSVECMag(register Vec* v)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f0, Vec.x(v), 0, qr0
|
||||
ps_mul f0, f0, f0
|
||||
lfs f1, Vec.z(v)
|
||||
ps_madd f1, f1, f1, f0
|
||||
lfs f4, 0.5f
|
||||
ps_sum0 f1, f1, f0, f0
|
||||
frsqrte f0, f1
|
||||
lfs f3, 3.0f
|
||||
fmuls f2, f0, f0
|
||||
fmuls f0, f0, f4
|
||||
fnmsubs f2, f2, f1, f3
|
||||
fmuls f0, f2, f0
|
||||
fsel f0, f0, f0, f1
|
||||
fmuls f1, f1, f0
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm f32 PSVECDotProduct(register Vec* vec1, register Vec* vec2)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f2, Vec.y(vec1), 0, qr0
|
||||
psq_l f3, Vec.y(vec2), 0, qr0
|
||||
ps_mul f2, f2, f3
|
||||
psq_l f5, Vec.x(vec1), 0, qr0
|
||||
psq_l f4, Vec.x(vec2), 0, qr0
|
||||
ps_madd f3, f5, f4, f2
|
||||
ps_sum0 f1, f3, f2, f2
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm void PSVECCrossProduct(register Vec* vec1, register Vec* vec2,
|
||||
register Vec* dst)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f1, Vec.x(vec2), 0, qr0
|
||||
lfs f2, Vec.z(vec1)
|
||||
psq_l f0, Vec.x(vec1), 0, qr0
|
||||
ps_merge10 f6, f1, f1
|
||||
lfs f3, Vec.z(vec2)
|
||||
ps_mul f4, f1, f2
|
||||
ps_muls0 f7, f1, f0
|
||||
ps_msub f5, f0, f3, f4
|
||||
ps_msub f8, f0, f6, f7
|
||||
ps_merge11 f9, f5, f5
|
||||
ps_merge01 f10, f5, f8
|
||||
psq_st f9, Vec.x(dst), 1, qr0
|
||||
ps_neg f10, f10
|
||||
psq_st f10, Vec.y(dst), 0, qr0
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm f32 PSVECSquareDistance(register Vec* vec1, register Vec* vec2)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f0, Vec.y(vec1), 0, qr0
|
||||
psq_l f1, Vec.y(vec2), 0, qr0
|
||||
ps_sub f2, f0, f1
|
||||
psq_l f0, Vec.x(vec1), 0, qr0
|
||||
psq_l f1, Vec.x(vec2), 0, qr0
|
||||
ps_mul f2, f2, f2
|
||||
ps_sub f0, f0, f1
|
||||
ps_madd f1, f0, f0, f2
|
||||
ps_sum0 f1, f1, f2, f2
|
||||
#endif // clang-format on
|
||||
}
|
||||
|
||||
asm f32 PSVECDistance(register Vec* vec1, register Vec* vec2)
|
||||
{
|
||||
#ifdef __MWERKS__ // clang-format off
|
||||
psq_l f0, Vec.y(vec1), 0, qr0
|
||||
psq_l f1, Vec.y(vec2), 0, qr0
|
||||
ps_sub f2, f0, f1
|
||||
psq_l f0, Vec.x(vec1), 0, qr0
|
||||
psq_l f1, Vec.x(vec2), 0, qr0
|
||||
ps_mul f2, f2, f2
|
||||
ps_sub f0, f0, f1
|
||||
lfs f3, 0.5f
|
||||
ps_madd f0, f0, f0, f2
|
||||
ps_sum0 f0, f0, f2, f2
|
||||
lfs f4, 3.0f
|
||||
frsqrte f1, f0
|
||||
fmuls f2, f1, f1
|
||||
fmuls f1, f1, f3
|
||||
fnmsubs f2, f2, f0, f4
|
||||
fmuls f1, f2, f1
|
||||
fsel f1, f1, f1, f0
|
||||
fmuls f1, f0, f1
|
||||
#endif // clang-format on
|
||||
}
|
||||
Reference in New Issue
Block a user