/* Copyright (C) 2014-2018 fincs This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. */ #include #include #include #include #include typedef u32 C3D_IVec; typedef union { struct { float w; ///< W-component float z; ///< Z-component float y; ///< Y-component float x; ///< X-component }; float c[4]; } C3D_FVec; typedef union { C3D_FVec r[4]; ///< Rows are vectors float m[4*4]; ///< Raw access } C3D_Mtx; typedef struct { u32 flags[2]; int attrCount; } C3D_AttrInfo; static void AttrInfo_Init(C3D_AttrInfo* info); static int AttrInfo_AddLoader(C3D_AttrInfo* info, int regId, GPU_FORMATS format, int count); static C3D_AttrInfo* C3D_GetAttrInfo(void); typedef struct { u32 offset; u32 flags[2]; } C3D_BufCfg; typedef struct { void* data; GPU_TEXCOLOR fmt : 4; size_t size : 28; union { u32 dim; struct { u16 height; u16 width; }; }; u32 param; u32 border; union { u32 lodParam; struct { u16 lodBias; u8 maxLevel; u8 minLevel; }; }; } C3D_Tex; static void C3D_TexBind(int unitId, C3D_Tex* tex); static void C3D_DepthMap(bool bIsZBuffer, float zScale, float zOffset); static void C3D_CullFace(GPU_CULLMODE mode); static void C3D_StencilTest(void); static void C3D_StencilOp(void); static void C3D_EarlyDepthTest(bool enable, GPU_EARLYDEPTHFUNC function, u32 ref); static void C3D_DepthTest(bool enable, GPU_TESTFUNC function, GPU_WRITEMASK writemask); static void C3D_AlphaTest(bool enable, GPU_TESTFUNC function, int ref); static void C3D_AlphaBlend(GPU_BLENDEQUATION colorEq, GPU_BLENDEQUATION alphaEq, GPU_BLENDFACTOR srcClr, GPU_BLENDFACTOR dstClr, GPU_BLENDFACTOR srcAlpha, GPU_BLENDFACTOR dstAlpha); static void C3D_ColorLogicOp(GPU_LOGICOP op); static void C3D_FragOpMode(GPU_FRAGOPMODE mode); static void C3D_FragOpShadow(float scale, float bias); #define C3D_DEFAULT_CMDBUF_SIZE 0x40000 enum { C3D_UNSIGNED_BYTE = 0, C3D_UNSIGNED_SHORT = 1, }; static bool C3D_Init(size_t cmdBufSize); static void C3D_Fini(void); static void C3D_SetViewport(u32 x, u32 y, u32 w, u32 h); static void C3D_SetScissor(GPU_SCISSORMODE mode, u32 left, u32 top, u32 right, u32 bottom); static void C3D_DrawElements(GPU_Primitive_t primitive, int count); // Immediate-mode vertex submission static void C3D_ImmDrawBegin(GPU_Primitive_t primitive); static void C3D_ImmSendAttrib(float x, float y, float z, float w); static void C3D_ImmDrawEnd(void); typedef struct { u32 data[128]; } C3D_FogLut; static inline float FogLut_CalcZ(float depth, float near, float far) { return far*near/(depth*(far-near)+near); } static void FogLut_FromArray(C3D_FogLut* lut, const float data[129]); typedef struct { void* colorBuf; void* depthBuf; u16 width; u16 height; GPU_COLORBUF colorFmt; GPU_DEPTHBUF depthFmt; u8 colorMask : 4; u8 depthMask : 4; } C3D_FrameBuf; // Flags for C3D_FrameBufClear typedef enum { C3D_CLEAR_COLOR = BIT(0), C3D_CLEAR_DEPTH = BIT(1), C3D_CLEAR_ALL = C3D_CLEAR_COLOR | C3D_CLEAR_DEPTH, } C3D_ClearBits; static u32 C3D_CalcColorBufSize(u32 width, u32 height, GPU_COLORBUF fmt); static u32 C3D_CalcDepthBufSize(u32 width, u32 height, GPU_DEPTHBUF fmt); static void C3D_SetFrameBuf(C3D_FrameBuf* fb); static void C3D_FrameBufClear(C3D_FrameBuf* fb, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth); static void C3D_FrameBufTransfer(C3D_FrameBuf* fb, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags); typedef struct C3D_RenderTarget_tag C3D_RenderTarget; struct C3D_RenderTarget_tag { C3D_FrameBuf frameBuf; bool used, linked; gfxScreen_t screen; gfx3dSide_t side; u32 transferFlags; }; // Flags for C3D_FrameBegin enum { C3D_FRAME_NONBLOCK = BIT(1), // Return false instead of waiting if the GPU is busy }; static bool C3D_FrameBegin(u8 flags); static bool C3D_FrameDrawOn(C3D_RenderTarget* target); static void C3D_RenderTargetSetOutput(C3D_RenderTarget* target, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags); static inline void C3D_RenderTargetClear(C3D_RenderTarget* target, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth) { C3D_FrameBufClear(&target->frameBuf, clearBits, clearColor, clearDepth); } typedef struct { u16 srcRgb, srcAlpha; union { u32 opAll; struct { u32 opRgb:12, opAlpha:12; }; }; u16 funcRgb, funcAlpha; u32 color; u16 scaleRgb, scaleAlpha; } C3D_TexEnv; static inline void C3D_TexEnvInit(C3D_TexEnv* env) { env->srcRgb = GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0); env->srcAlpha = GPU_TEVSOURCES(GPU_PREVIOUS, 0, 0); env->opAll = 0; env->funcRgb = GPU_REPLACE; env->funcAlpha = GPU_REPLACE; env->color = 0xFFFFFFFF; env->scaleRgb = GPU_TEVSCALE_1; env->scaleAlpha = GPU_TEVSCALE_1; } void Mtx_Multiply(C3D_Mtx* out, const C3D_Mtx* a, const C3D_Mtx* b) { // http://www.wolframalpha.com/input/?i={{a,b,c,d},{e,f,g,h},{i,j,k,l},{m,n,o,p}}{{α,β,γ,δ},{ε,θ,ι,κ},{λ,μ,ν,ξ},{ο,π,ρ,σ}} int i, j; for (j = 0; j < 4; ++j) for (i = 0; i < 4; ++i) out->r[j].c[i] = a->r[j].x*b->r[0].c[i] + a->r[j].y*b->r[1].c[i] + a->r[j].z*b->r[2].c[i] + a->r[j].w*b->r[3].c[i]; } typedef struct { u32 fragOpMode; u32 fragOpShadow; u32 zScale, zOffset; GPU_CULLMODE cullMode; bool zBuffer, earlyDepth; GPU_EARLYDEPTHFUNC earlyDepthFunc; u32 earlyDepthRef; u32 alphaTest; u32 stencilMode, stencilOp; u32 depthTest; u32 alphaBlend; GPU_LOGICOP clrLogicOp; } C3D_Effect; typedef struct { gxCmdQueue_s gxQueue; u32* cmdBuf; size_t cmdBufSize; u32 flags; C3D_AttrInfo attrInfo; C3D_Effect effect; u32 texConfig; C3D_Tex* tex[3]; u32 texEnvBuf, texEnvBufClr; C3D_FrameBuf fb; u32 viewport[5]; u32 scissor[3]; } C3D_Context; enum { C3DiF_Active = BIT(0), C3DiF_DrawUsed = BIT(1), C3DiF_AttrInfo = BIT(2), C3DiF_Effect = BIT(4), C3DiF_FrameBuf = BIT(5), C3DiF_Viewport = BIT(6), C3DiF_Scissor = BIT(7), C3DiF_TexEnvBuf = BIT(9), C3DiF_TexStatus = BIT(14), C3DiF_Gas = BIT(18), C3DiF_Reset = BIT(19), #define C3DiF_Tex(n) BIT(23+(n)) C3DiF_TexAll = 7 << 23, }; static C3D_Context __C3D_Context; static inline C3D_Context* C3Di_GetContext(void) { extern C3D_Context __C3D_Context; return &__C3D_Context; } static inline bool addrIsVRAM(const void* addr) { u32 vaddr = (u32)addr; return vaddr >= OS_VRAM_VADDR && vaddr < OS_VRAM_VADDR + OS_VRAM_SIZE; } static inline vramAllocPos addrGetVRAMBank(const void* addr) { u32 vaddr = (u32)addr; return vaddr < OS_VRAM_VADDR + OS_VRAM_SIZE/2 ? VRAM_ALLOC_A : VRAM_ALLOC_B; } static void C3Di_UpdateContext(void); static void C3Di_AttrInfoBind(C3D_AttrInfo* info); static void C3Di_FrameBufBind(C3D_FrameBuf* fb); static void C3Di_TexEnvBind(int id, C3D_TexEnv* env); static void C3Di_SetTex(int unit, C3D_Tex* tex); static void C3Di_EffectBind(C3D_Effect* effect); static bool C3Di_SplitFrame(u32** pBuf, u32* pSize); static void C3Di_RenderQueueInit(void); static void C3Di_RenderQueueExit(void); static void C3Di_RenderQueueWaitDone(void); static void AttrInfo_Init(C3D_AttrInfo* info) { memset(info, 0, sizeof(*info)); info->flags[1] = 0xFFF << 16; } static int AttrInfo_AddLoader(C3D_AttrInfo* info, int regId, GPU_FORMATS format, int count) { if (info->attrCount == 12) return -1; int id = info->attrCount++; if (id < 8) info->flags[0] |= GPU_ATTRIBFMT(id, count, format); else info->flags[1] |= GPU_ATTRIBFMT(id-8, count, format); info->flags[1] = (info->flags[1] &~ (0xF0000000 | BIT(id+16))) | (id << 28); return id; } static C3D_AttrInfo* C3D_GetAttrInfo(void) { C3D_Context* ctx = C3Di_GetContext(); ctx->flags |= C3DiF_AttrInfo; return &ctx->attrInfo; } static void C3Di_AttrInfoBind(C3D_AttrInfo* info) { GPUCMD_AddIncrementalWrites(GPUREG_ATTRIBBUFFERS_FORMAT_LOW, (u32*)info->flags, sizeof(info->flags)/sizeof(u32)); } #define BUFFER_BASE_PADDR 0x18000000 static void C3D_DrawElements(GPU_Primitive_t primitive, int count) { C3Di_UpdateContext(); // Set primitive type GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 2, primitive != GPU_TRIANGLES ? primitive : GPU_GEOMETRY_PRIM); // Start a new primitive (breaks off a triangle strip/fan) GPUCMD_AddWrite(GPUREG_RESTART_PRIMITIVE, 1); // Number of vertices GPUCMD_AddWrite(GPUREG_NUMVERTICES, count); // Enable triangle element drawing mode if necessary GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 2, 0x100); GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 2, 0x100); // Enable drawing mode GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 0); // Trigger element drawing GPUCMD_AddWrite(GPUREG_DRAWELEMENTS, 1); // Go back to configuration mode GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 1); // Disable triangle element drawing mode if necessary GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG, 2, 0); GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 2, 0); // Clear the post-vertex cache GPUCMD_AddWrite(GPUREG_VTX_FUNC, 1); GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x8, 0); GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 0x8, 0); C3Di_GetContext()->flags |= C3DiF_DrawUsed; } static inline C3D_Effect* getEffect() { C3D_Context* ctx = C3Di_GetContext(); ctx->flags |= C3DiF_Effect; return &ctx->effect; } static void C3D_DepthMap(bool bIsZBuffer, float zScale, float zOffset) { C3D_Effect* e = getEffect(); e->zBuffer = bIsZBuffer; e->zScale = f32tof24(zScale); e->zOffset = f32tof24(zOffset); } static void C3D_CullFace(GPU_CULLMODE mode) { C3D_Effect* e = getEffect(); e->cullMode = mode; } static void C3D_StencilTest(void) { C3D_Effect* e = getEffect(); e->stencilMode = false | (GPU_ALWAYS << 4) | (0xFF << 24); } static void C3D_StencilOp(void) { C3D_Effect* e = getEffect(); e->stencilOp = GPU_STENCIL_KEEP | (GPU_STENCIL_KEEP << 4) | (GPU_STENCIL_KEEP << 8); } static void C3D_EarlyDepthTest(bool enable, GPU_EARLYDEPTHFUNC function, u32 ref) { C3D_Effect* e = getEffect(); e->earlyDepth = enable; e->earlyDepthFunc = function; e->earlyDepthRef = ref; } static void C3D_DepthTest(bool enable, GPU_TESTFUNC function, GPU_WRITEMASK writemask) { C3D_Effect* e = getEffect(); e->depthTest = (!!enable) | ((function & 7) << 4) | (writemask << 8); } static void C3D_AlphaTest(bool enable, GPU_TESTFUNC function, int ref) { C3D_Effect* e = getEffect(); e->alphaTest = (!!enable) | ((function & 7) << 4) | (ref << 8); } static void C3D_AlphaBlend(GPU_BLENDEQUATION colorEq, GPU_BLENDEQUATION alphaEq, GPU_BLENDFACTOR srcClr, GPU_BLENDFACTOR dstClr, GPU_BLENDFACTOR srcAlpha, GPU_BLENDFACTOR dstAlpha) { C3D_Effect* e = getEffect(); e->alphaBlend = colorEq | (alphaEq << 8) | (srcClr << 16) | (dstClr << 20) | (srcAlpha << 24) | (dstAlpha << 28); e->fragOpMode &= ~0xFF00; e->fragOpMode |= 0x0100; } static void C3D_ColorLogicOp(GPU_LOGICOP op) { C3D_Effect* e = getEffect(); e->fragOpMode &= ~0xFF00; e->clrLogicOp = op; } static void C3D_FragOpMode(GPU_FRAGOPMODE mode) { C3D_Effect* e = getEffect(); e->fragOpMode &= ~0xFF00FF; e->fragOpMode |= 0xE40000 | mode; } static void C3D_FragOpShadow(float scale, float bias) { C3D_Effect* e = getEffect(); e->fragOpShadow = f32tof16(scale+bias) | (f32tof16(-scale)<<16); } static void C3Di_EffectBind(C3D_Effect* e) { GPUCMD_AddWrite(GPUREG_DEPTHMAP_ENABLE, e->zBuffer ? 1 : 0); GPUCMD_AddWrite(GPUREG_FACECULLING_CONFIG, e->cullMode & 0x3); GPUCMD_AddIncrementalWrites(GPUREG_DEPTHMAP_SCALE, (u32*)&e->zScale, 2); GPUCMD_AddIncrementalWrites(GPUREG_FRAGOP_ALPHA_TEST, (u32*)&e->alphaTest, 4); GPUCMD_AddMaskedWrite(GPUREG_GAS_DELTAZ_DEPTH, 0x8, (u32)GPU_MAKEGASDEPTHFUNC((e->depthTest>>4)&7) << 24); GPUCMD_AddWrite(GPUREG_BLEND_COLOR, 0); GPUCMD_AddWrite(GPUREG_BLEND_FUNC, e->alphaBlend); GPUCMD_AddWrite(GPUREG_LOGIC_OP, e->clrLogicOp); GPUCMD_AddMaskedWrite(GPUREG_COLOR_OPERATION, 7, e->fragOpMode); GPUCMD_AddWrite(GPUREG_FRAGOP_SHADOW, e->fragOpShadow); GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_TEST1, 1, e->earlyDepth ? 1 : 0); GPUCMD_AddWrite(GPUREG_EARLYDEPTH_TEST2, e->earlyDepth ? 1 : 0); GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_FUNC, 1, e->earlyDepthFunc); GPUCMD_AddMaskedWrite(GPUREG_EARLYDEPTH_DATA, 0x7, e->earlyDepthRef); } static void FogLut_FromArray(C3D_FogLut* lut, const float data[129]) { int i; for (i = 0; i < 128; i ++) { float cur = data[i + 0]; float next = data[i + 1]; float diff = next - cur; u32 val = 0; if (cur > 0.0f) { cur *= 0x800; val = (cur < 0x800) ? (u32)cur : 0x7FF; } u32 val2 = 0; if (diff != 0.0f) { diff *= 0x800; if (diff < -0x1000) diff = -0x1000; else if (diff > 0xFFF) diff = 0xFFF; val2 = (s32)diff & 0x1FFF; } lut->data[i] = val2 | (val << 13); } } static const u8 colorFmtSizes[] = {2,1,0,0,0}; static const u8 depthFmtSizes[] = {0,0,1,2}; static u32 C3D_CalcColorBufSize(u32 width, u32 height, GPU_COLORBUF fmt) { u32 size = width*height; return size*(2+colorFmtSizes[fmt]); } static u32 C3D_CalcDepthBufSize(u32 width, u32 height, GPU_DEPTHBUF fmt) { u32 size = width*height; return size*(2+depthFmtSizes[fmt]); } static void C3D_SetFrameBuf(C3D_FrameBuf* fb) { C3D_Context* ctx = C3Di_GetContext(); if (!(ctx->flags & C3DiF_Active)) return; if (fb != &ctx->fb) memcpy(&ctx->fb, fb, sizeof(*fb)); ctx->flags |= C3DiF_FrameBuf; } static void C3Di_FrameBufBind(C3D_FrameBuf* fb) { u32 param[4] = { 0, 0, 0, 0 }; GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_INVALIDATE, 1); param[0] = osConvertVirtToPhys(fb->depthBuf) >> 3; param[1] = osConvertVirtToPhys(fb->colorBuf) >> 3; param[2] = 0x01000000 | (((u32)(fb->height-1) & 0xFFF) << 12) | (fb->width & 0xFFF); GPUCMD_AddIncrementalWrites(GPUREG_DEPTHBUFFER_LOC, param, 3); GPUCMD_AddWrite(GPUREG_RENDERBUF_DIM, param[2]); GPUCMD_AddWrite(GPUREG_DEPTHBUFFER_FORMAT, fb->depthFmt); GPUCMD_AddWrite(GPUREG_COLORBUFFER_FORMAT, colorFmtSizes[fb->colorFmt] | ((u32)fb->colorFmt << 16)); GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_BLOCK32, 0); // Enable or disable color/depth buffers param[0] = param[1] = fb->colorBuf ? fb->colorMask : 0; param[2] = param[3] = fb->depthBuf ? fb->depthMask : 0; GPUCMD_AddIncrementalWrites(GPUREG_COLORBUFFER_READ, param, 4); } static void C3D_FrameBufClear(C3D_FrameBuf* frameBuf, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth) { u32 size = (u32)frameBuf->width * frameBuf->height; u32 cfs = colorFmtSizes[frameBuf->colorFmt]; u32 dfs = depthFmtSizes[frameBuf->depthFmt]; void* colorBufEnd = (u8*)frameBuf->colorBuf + size*(2+cfs); void* depthBufEnd = (u8*)frameBuf->depthBuf + size*(2+dfs); if (clearBits & C3D_CLEAR_COLOR) { if (clearBits & C3D_CLEAR_DEPTH) GX_MemoryFill( (u32*)frameBuf->colorBuf, clearColor, (u32*)colorBufEnd, BIT(0) | (cfs << 8), (u32*)frameBuf->depthBuf, clearDepth, (u32*)depthBufEnd, BIT(0) | (dfs << 8)); else GX_MemoryFill( (u32*)frameBuf->colorBuf, clearColor, (u32*)colorBufEnd, BIT(0) | (cfs << 8), NULL, 0, NULL, 0); } else GX_MemoryFill( (u32*)frameBuf->depthBuf, clearDepth, (u32*)depthBufEnd, BIT(0) | (dfs << 8), NULL, 0, NULL, 0); } static void C3D_FrameBufTransfer(C3D_FrameBuf* frameBuf, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags) { u32* outputFrameBuf = (u32*)gfxGetFramebuffer(screen, side, NULL, NULL); u32 dim = GX_BUFFER_DIM((u32)frameBuf->width, (u32)frameBuf->height); GX_DisplayTransfer((u32*)frameBuf->colorBuf, dim, outputFrameBuf, dim, transferFlags); } static void C3D_ImmDrawBegin(GPU_Primitive_t primitive) { C3Di_UpdateContext(); // Set primitive type GPUCMD_AddMaskedWrite(GPUREG_PRIMITIVE_CONFIG, 2, primitive); // Start a new primitive (breaks off a triangle strip/fan) GPUCMD_AddWrite(GPUREG_RESTART_PRIMITIVE, 1); // Enable vertex submission mode GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 1, 1); // Enable drawing mode GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 0); // Begin immediate-mode vertex submission GPUCMD_AddWrite(GPUREG_FIXEDATTRIB_INDEX, 0xF); } static inline void write24(u8* p, u32 val) { p[0] = val; p[1] = val>>8; p[2] = val>>16; } static void C3D_ImmSendAttrib(float x, float y, float z, float w) { union { u32 packed[3]; struct { u8 x[3]; u8 y[3]; u8 z[3]; u8 w[3]; }; } param; // Convert the values to float24 write24(param.x, f32tof24(x)); write24(param.y, f32tof24(y)); write24(param.z, f32tof24(z)); write24(param.w, f32tof24(w)); // Reverse the packed words u32 p = param.packed[0]; param.packed[0] = param.packed[2]; param.packed[2] = p; // Send the attribute GPUCMD_AddIncrementalWrites(GPUREG_FIXEDATTRIB_DATA0, param.packed, 3); } static void C3D_ImmDrawEnd(void) { // Go back to configuration mode GPUCMD_AddMaskedWrite(GPUREG_START_DRAW_FUNC0, 1, 1); // Disable vertex submission mode GPUCMD_AddMaskedWrite(GPUREG_GEOSTAGE_CONFIG2, 1, 0); // Clear the post-vertex cache GPUCMD_AddWrite(GPUREG_VTX_FUNC, 1); C3Di_GetContext()->flags |= C3DiF_DrawUsed; } static C3D_RenderTarget *linkedTarget[3]; static bool inFrame; static bool swapPending, isTopStereo; static void onQueueFinish(gxCmdQueue_s* queue) { if (swapPending) { gfxScreenSwapBuffers(GFX_TOP, isTopStereo); gfxScreenSwapBuffers(GFX_BOTTOM, false); } } static bool C3Di_WaitAndClearQueue(s64 timeout) { gxCmdQueue_s* queue = &C3Di_GetContext()->gxQueue; if (!gxCmdQueueWait(queue, timeout)) return false; gxCmdQueueStop(queue); gxCmdQueueClear(queue); return true; } static void C3Di_RenderQueueInit(void) { C3D_Context* ctx = C3Di_GetContext(); GX_BindQueue(&ctx->gxQueue); gxCmdQueueSetCallback(&ctx->gxQueue, onQueueFinish, NULL); gxCmdQueueRun(&ctx->gxQueue); } static void C3Di_RenderQueueExit(void) { C3Di_WaitAndClearQueue(-1); gxCmdQueueSetCallback(&C3Di_GetContext()->gxQueue, NULL, NULL); GX_BindQueue(NULL); } static void C3Di_RenderQueueWaitDone(void) { C3Di_WaitAndClearQueue(-1); } static bool C3D_FrameBegin(u8 flags) { inFrame = true; return true; } static bool C3D_FrameDrawOn(C3D_RenderTarget* target) { if (!inFrame) return false; target->used = true; C3D_SetFrameBuf(&target->frameBuf); return true; } static void C3D_FrameFinish(u8 flags) { C3D_Context* ctx = C3Di_GetContext(); if (!inFrame) return; u32 *cmdBuf, cmdBufSize; if (C3Di_SplitFrame(&cmdBuf, &cmdBufSize)) GX_ProcessCommandList(cmdBuf, cmdBufSize*4, flags); GPUCMD_SetBuffer(NULL, 0, 0); inFrame = false; // Flush the entire linear memory if the user did not explicitly mandate to flush the command list if (!(flags & GX_CMDLIST_FLUSH)) { extern u32 __ctru_linear_heap; extern u32 __ctru_linear_heap_size; GSPGPU_FlushDataCache((void*)__ctru_linear_heap, __ctru_linear_heap_size); } C3D_RenderTarget* target; isTopStereo = false; swapPending = true; for (int i = 2; i >= 0; i --) { target = linkedTarget[i]; if (!target || !target->used) continue; target->used = false; C3D_FrameBufTransfer(&target->frameBuf, target->screen, target->side, target->transferFlags); if (target->screen == GFX_TOP && target->side == GFX_RIGHT) isTopStereo = true; } gxCmdQueueRun(&ctx->gxQueue); } static void C3D_FrameEnd(u8 flags) { C3D_Context* ctx = C3Di_GetContext(); C3Di_WaitAndClearQueue((flags & C3D_FRAME_NONBLOCK) ? 0 : -1); GPUCMD_SetBuffer(ctx->cmdBuf, ctx->cmdBufSize, 0); } static void C3D_RenderTargetInit(C3D_RenderTarget* target, int width, int height) { memset(target, 0, sizeof(C3D_RenderTarget)); C3D_FrameBuf* fb = &target->frameBuf; fb->width = width; fb->height = height; } static void C3D_RenderTargetColor(C3D_RenderTarget* target, GPU_COLORBUF fmt) { C3D_FrameBuf* fb = &target->frameBuf; size_t colorSize = C3D_CalcColorBufSize(fb->width, fb->height, fmt); void* colorBuf = vramAlloc(colorSize); if (!colorBuf) return; fb->colorBuf = colorBuf; fb->colorFmt = fmt; fb->colorMask = 0xF; } static void C3D_RenderTargetDepth(C3D_RenderTarget* target, GPU_DEPTHBUF fmt) { C3D_FrameBuf* fb = &target->frameBuf; size_t depthSize = C3D_CalcDepthBufSize(fb->width, fb->height, fmt); void* depthBuf = NULL; vramAllocPos vramBank = addrGetVRAMBank(fb->colorBuf); depthBuf = vramAllocAt(depthSize, vramBank ^ VRAM_ALLOC_ANY); // Attempt opposite bank first... if (!depthBuf) depthBuf = vramAllocAt(depthSize, vramBank); // ... if that fails, attempt same bank if (!depthBuf) return; fb->depthBuf = depthBuf; fb->depthFmt = fmt; fb->depthMask = fmt == GPU_RB_DEPTH24_STENCIL8 ? 0x3 : 0x2; } static void C3D_RenderTargetSetOutput(C3D_RenderTarget* target, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags) { int id = 0; if (screen==GFX_BOTTOM) id = 2; else if (side==GFX_RIGHT) id = 1; if (linkedTarget[id]) { linkedTarget[id]->linked = false; if (!inFrame) C3Di_WaitAndClearQueue(-1); } linkedTarget[id] = target; target->linked = true; target->transferFlags = transferFlags; target->screen = screen; target->side = side; } static void C3Di_TexEnvBind(int id, C3D_TexEnv* env) { if (id >= 4) id += 2; GPUCMD_AddIncrementalWrites(GPUREG_TEXENV0_SOURCE + id*8, (u32*)env, sizeof(C3D_TexEnv)/sizeof(u32)); } static void C3D_TexBind(int unitId, C3D_Tex* tex) { C3D_Context* ctx = C3Di_GetContext(); ctx->flags |= C3DiF_Tex(unitId); ctx->tex[unitId] = tex; } static void C3Di_SetTex(int unit, C3D_Tex* tex) { u32 reg[10]; u32 regcount = 5; reg[0] = tex->border; reg[1] = tex->dim; reg[2] = tex->param; reg[3] = tex->lodParam; reg[4] = osConvertVirtToPhys(tex->data) >> 3; switch (unit) { case 0: GPUCMD_AddIncrementalWrites(GPUREG_TEXUNIT0_BORDER_COLOR, reg, regcount); GPUCMD_AddWrite(GPUREG_TEXUNIT0_TYPE, tex->fmt); break; case 1: GPUCMD_AddIncrementalWrites(GPUREG_TEXUNIT1_BORDER_COLOR, reg, 5); GPUCMD_AddWrite(GPUREG_TEXUNIT1_TYPE, tex->fmt); break; case 2: GPUCMD_AddIncrementalWrites(GPUREG_TEXUNIT2_BORDER_COLOR, reg, 5); GPUCMD_AddWrite(GPUREG_TEXUNIT2_TYPE, tex->fmt); break; } } static void C3Di_OnRestore(void) { C3D_Context* ctx = C3Di_GetContext(); ctx->flags |= C3DiF_AttrInfo | C3DiF_Effect | C3DiF_FrameBuf | C3DiF_Viewport | C3DiF_Scissor | C3DiF_TexAll | C3DiF_TexEnvBuf | C3DiF_Gas | C3DiF_Reset; } #define GXQUEUE_MAX_ENTRIES 32 static gxCmdEntry_s queue_entries[GXQUEUE_MAX_ENTRIES]; static bool C3D_Init(size_t cmdBufSize) { int i; C3D_Context* ctx = C3Di_GetContext(); cmdBufSize = (cmdBufSize + 0xF) &~ 0xF; // 0x10-byte align ctx->cmdBufSize = cmdBufSize/4; ctx->cmdBuf = (u32*)linearAlloc(cmdBufSize); if (!ctx->cmdBuf) return false; ctx->gxQueue.maxEntries = GXQUEUE_MAX_ENTRIES; ctx->gxQueue.entries = queue_entries; ctx->flags = C3DiF_Active | C3DiF_TexEnvBuf | C3DiF_Effect | C3DiF_TexStatus | C3DiF_TexAll | C3DiF_Reset; // TODO: replace with direct struct access C3D_DepthMap(true, -1.0f, 0.0f); C3D_CullFace(GPU_CULL_BACK_CCW); C3D_StencilTest(); C3D_StencilOp(); C3D_EarlyDepthTest(false, GPU_EARLYDEPTH_GREATER, 0); C3D_DepthTest(true, GPU_GREATER, GPU_WRITE_ALL); C3D_AlphaTest(false, GPU_ALWAYS, 0x00); C3D_AlphaBlend(GPU_BLEND_ADD, GPU_BLEND_ADD, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA, GPU_SRC_ALPHA, GPU_ONE_MINUS_SRC_ALPHA); C3D_FragOpMode(GPU_FRAGOPMODE_GL); C3D_FragOpShadow(0.0, 1.0); ctx->texConfig = BIT(12); ctx->texEnvBuf = 0; ctx->texEnvBufClr = 0xFFFFFFFF; for (i = 0; i < 3; i ++) ctx->tex[i] = NULL; C3Di_RenderQueueInit(); GPUCMD_SetBuffer(ctx->cmdBuf, ctx->cmdBufSize, 0); return true; } static void C3D_SetViewport(u32 x, u32 y, u32 w, u32 h) { C3D_Context* ctx = C3Di_GetContext(); ctx->flags |= C3DiF_Viewport | C3DiF_Scissor; ctx->viewport[0] = f32tof24(w / 2.0f); ctx->viewport[1] = f32tof31(2.0f / w) << 1; ctx->viewport[2] = f32tof24(h / 2.0f); ctx->viewport[3] = f32tof31(2.0f / h) << 1; ctx->viewport[4] = (y << 16) | (x & 0xFFFF); ctx->scissor[0] = GPU_SCISSOR_DISABLE; } static void C3D_SetScissor(GPU_SCISSORMODE mode, u32 left, u32 top, u32 right, u32 bottom) { C3D_Context* ctx = C3Di_GetContext(); ctx->flags |= C3DiF_Scissor; ctx->scissor[0] = mode; if (mode == GPU_SCISSOR_DISABLE) return; ctx->scissor[1] = (top << 16) | (left & 0xFFFF); ctx->scissor[2] = ((bottom-1) << 16) | ((right-1) & 0xFFFF); } static void C3Di_Reset(C3D_Context* ctx) { // Reset texture environment C3D_TexEnv texEnv; C3D_TexEnvInit(&texEnv); for (int i = 0; i < 6; i++) { C3Di_TexEnvBind(i, &texEnv); } // Reset lighting GPUCMD_AddWrite(GPUREG_LIGHTING_ENABLE0, false); GPUCMD_AddWrite(GPUREG_LIGHTING_ENABLE1, true); // Reset attirubte buffer info C3D_BufCfg buffers[12] = { 0 }; GPUCMD_AddWrite(GPUREG_ATTRIBBUFFERS_LOC, BUFFER_BASE_PADDR >> 3); GPUCMD_AddIncrementalWrites(GPUREG_ATTRIBBUFFER0_OFFSET, (u32*)buffers, 12 * 3); } static void C3Di_UpdateFramebuffer(C3D_Context* ctx) { if (ctx->flags & C3DiF_DrawUsed) { ctx->flags &= ~C3DiF_DrawUsed; GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_FLUSH, 1); GPUCMD_AddWrite(GPUREG_EARLYDEPTH_CLEAR, 1); } C3Di_FrameBufBind(&ctx->fb); } static void C3Di_UpdateContext(void) { int i; C3D_Context* ctx = C3Di_GetContext(); if (ctx->flags & C3DiF_Reset) { ctx->flags &= ~C3DiF_Reset; C3Di_Reset(ctx); } if (ctx->flags & C3DiF_FrameBuf) { ctx->flags &= ~C3DiF_FrameBuf; C3Di_UpdateFramebuffer(ctx); } if (ctx->flags & C3DiF_Viewport) { ctx->flags &= ~C3DiF_Viewport; GPUCMD_AddIncrementalWrites(GPUREG_VIEWPORT_WIDTH, ctx->viewport, 4); GPUCMD_AddWrite(GPUREG_VIEWPORT_XY, ctx->viewport[4]); } if (ctx->flags & C3DiF_Scissor) { ctx->flags &= ~C3DiF_Scissor; GPUCMD_AddIncrementalWrites(GPUREG_SCISSORTEST_MODE, ctx->scissor, 3); } if (ctx->flags & C3DiF_AttrInfo) { ctx->flags &= ~C3DiF_AttrInfo; C3Di_AttrInfoBind(&ctx->attrInfo); } if (ctx->flags & C3DiF_Effect) { ctx->flags &= ~C3DiF_Effect; C3Di_EffectBind(&ctx->effect); } if (ctx->flags & C3DiF_TexAll) { u32 units = 0; for (i = 0; i < 3; i ++) { if (ctx->tex[i]) { units |= BIT(i); if (ctx->flags & C3DiF_Tex(i)) C3Di_SetTex(i, ctx->tex[i]); } } // Enable texture units and clear texture cache ctx->texConfig &= ~7; ctx->texConfig |= units | BIT(16); ctx->flags &= ~C3DiF_TexAll; ctx->flags |= C3DiF_TexStatus; } if (ctx->flags & C3DiF_TexStatus) { ctx->flags &= ~C3DiF_TexStatus; GPUCMD_AddMaskedWrite(GPUREG_TEXUNIT_CONFIG, 0xB, ctx->texConfig); // Clear texture cache if requested *after* configuring texture units if (ctx->texConfig & BIT(16)) { ctx->texConfig &= ~BIT(16); GPUCMD_AddMaskedWrite(GPUREG_TEXUNIT_CONFIG, 0x4, BIT(16)); } GPUCMD_AddWrite(GPUREG_TEXUNIT0_SHADOW, BIT(0)); } if (ctx->flags & C3DiF_TexEnvBuf) { ctx->flags &= ~C3DiF_TexEnvBuf; GPUCMD_AddMaskedWrite(GPUREG_TEXENV_UPDATE_BUFFER, 0x7, ctx->texEnvBuf); GPUCMD_AddWrite(GPUREG_TEXENV_BUFFER_COLOR, ctx->texEnvBufClr); } } static bool C3Di_SplitFrame(u32** pBuf, u32* pSize) { C3D_Context* ctx = C3Di_GetContext(); if (!gpuCmdBufOffset) return false; // Nothing was drawn if (ctx->flags & C3DiF_DrawUsed) { ctx->flags &= ~C3DiF_DrawUsed; GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_FLUSH, 1); GPUCMD_AddWrite(GPUREG_FRAMEBUFFER_INVALIDATE, 1); GPUCMD_AddWrite(GPUREG_EARLYDEPTH_CLEAR, 1); } GPUCMD_Split(pBuf, pSize); return true; } static void C3D_Fini(void) { C3D_Context* ctx = C3Di_GetContext(); if (!(ctx->flags & C3DiF_Active)) return; C3Di_RenderQueueExit(); linearFree(ctx->cmdBuf); ctx->flags = 0; }