From 09361154834a54be19cc94866e657641be180b05 Mon Sep 17 00:00:00 2001 From: Luke Street Date: Thu, 4 Jun 2026 23:27:30 -0600 Subject: [PATCH] Optimize display lists in J3DShapeDraw This is a stop-gap until DL optimization is upstreamed to Aurora. --- .../JSystem/J3DGraphBase/J3DShapeDraw.h | 4 + .../JSystem/src/J3DGraphBase/J3DShapeDraw.cpp | 343 +++++++++++++++++- .../src/J3DGraphLoader/J3DShapeFactory.cpp | 7 +- 3 files changed, 349 insertions(+), 5 deletions(-) diff --git a/libs/JSystem/include/JSystem/J3DGraphBase/J3DShapeDraw.h b/libs/JSystem/include/JSystem/J3DGraphBase/J3DShapeDraw.h index e6e8d61786..feff2b4991 100644 --- a/libs/JSystem/include/JSystem/J3DGraphBase/J3DShapeDraw.h +++ b/libs/JSystem/include/JSystem/J3DGraphBase/J3DShapeDraw.h @@ -1,6 +1,7 @@ #ifndef J3DSHAPEDRAW_H #define J3DSHAPEDRAW_H +#include #include /** @@ -12,6 +13,9 @@ public: u32 countVertex(u32); void addTexMtxIndexInDL(u32, u32, u32); J3DShapeDraw(u8 const*, u32); +#if TARGET_PC + J3DShapeDraw(u8 const*, u32, const GXVtxDescList*); +#endif void draw() const; virtual ~J3DShapeDraw(); diff --git a/libs/JSystem/src/J3DGraphBase/J3DShapeDraw.cpp b/libs/JSystem/src/J3DGraphBase/J3DShapeDraw.cpp index a1bd69438c..b40f577d79 100644 --- a/libs/JSystem/src/J3DGraphBase/J3DShapeDraw.cpp +++ b/libs/JSystem/src/J3DGraphBase/J3DShapeDraw.cpp @@ -1,15 +1,310 @@ -#include "JSystem/JSystem.h" // IWYU pragma: keep +#include "JSystem/JSystem.h" // IWYU pragma: keep +#include +#include +#include #include "JSystem/J3DGraphBase/J3DShapeDraw.h" #include "JSystem/JKernel/JKRHeap.h" -#include -#include -#include + +#if TARGET_PC +#include +#include +#include +#include "dusk/logging.h" + +namespace { + +u16 read_be16(const u8* data) { + return (u16(data[0]) << 8) | data[1]; +} + +void append_be16(std::vector& out, u16 value) { + out.push_back(value >> 8); + out.push_back(value & 0xFF); +} + +void append_bytes(std::vector& out, const u8* data, u32 size) { + out.insert(out.end(), data, data + size); +} + +bool is_matrix_idx_attr(GXAttr attr) { + return attr >= GX_VA_PNMTXIDX && attr <= GX_VA_TEX7MTXIDX; +} + +bool is_draw_opcode(u8 opcode) { + return opcode == GX_QUADS || opcode == GX_TRIANGLES || opcode == GX_TRIANGLESTRIP || + opcode == GX_TRIANGLEFAN || opcode == GX_LINES || opcode == GX_LINESTRIP || + opcode == GX_POINTS; +} + +bool is_mergeable_draw_opcode(u8 opcode) { + return opcode == GX_QUADS || opcode == GX_TRIANGLES || opcode == GX_TRIANGLESTRIP || + opcode == GX_TRIANGLEFAN; +} + +bool calc_vtx_stride(const GXVtxDescList* vtxDesc, u32& stride) { + stride = 0; + for (; vtxDesc->attr != GX_VA_NULL; vtxDesc++) { + switch (vtxDesc->type) { + case GX_NONE: + break; + case GX_DIRECT: + if (!is_matrix_idx_attr(vtxDesc->attr)) { + return false; + } + stride += 1; + break; + case GX_INDEX8: + stride += 1; + break; + case GX_INDEX16: + stride += 2; + break; + default: + return false; + } + } + return stride != 0; +} + +bool get_command_size(const u8* dlStart, u32 dlSize, u32 offset, u32 stride, u32& cmdSize) { + if (offset >= dlSize) { + return false; + } + + const u8 cmd = dlStart[offset]; + const u8 opcode = cmd & GX_OPCODE_MASK; + switch (opcode) { + case GX_NOP: + case GX_CMD_INVL_VC: + cmdSize = 1; + return true; + case (GX_LOAD_BP_REG & GX_OPCODE_MASK): + cmdSize = 5; + return offset + cmdSize <= dlSize; + case GX_LOAD_CP_REG: + cmdSize = 6; + return offset + cmdSize <= dlSize; + case GX_LOAD_XF_REG: { + if (offset + 5 > dlSize) { + return false; + } + const u16 count = read_be16(dlStart + offset + 1) + 1; + cmdSize = 5 + count * 4; + return offset + cmdSize <= dlSize; + } + case GX_LOAD_INDX_A: + case GX_LOAD_INDX_B: + case GX_LOAD_INDX_C: + case GX_LOAD_INDX_D: + cmdSize = 5; + return offset + cmdSize <= dlSize; + case GX_CMD_CALL_DL: + cmdSize = 9; + return offset + cmdSize <= dlSize; + default: + if (is_draw_opcode(opcode)) { + if (offset + 3 > dlSize) { + return false; + } + const u16 vtxCount = read_be16(dlStart + offset + 1); + cmdSize = 3 + vtxCount * stride; + return offset + cmdSize <= dlSize; + } + return false; + } +} + +struct MergeRun { + u8 cmd = 0; + u16 vtxCount = 0; + std::vector vertices; +}; + +void flush_merge_run(std::vector& out, MergeRun& run) { + if (run.vtxCount == 0) { + return; + } + + out.push_back(run.cmd); + append_be16(out, run.vtxCount); + append_bytes(out, run.vertices.data(), run.vertices.size()); + run.vertices.clear(); + run.vtxCount = 0; +} + +void append_vertex(std::vector& out, const u8* vertices, u32 stride, u16 idx) { + append_bytes(out, vertices + idx * stride, stride); +} + +bool triangulate_draw( + std::vector& out, u8 opcode, const u8* vertices, u32 stride, u16 vtxCount) { + switch (opcode) { + case GX_TRIANGLES: + append_bytes(out, vertices, vtxCount * stride); + return true; + case GX_TRIANGLEFAN: + if (vtxCount < 3) { + return false; + } + for (u16 v = 2; v < vtxCount; v++) { + append_vertex(out, vertices, stride, 0); + append_vertex(out, vertices, stride, v - 1); + append_vertex(out, vertices, stride, v); + } + return true; + case GX_TRIANGLESTRIP: + if (vtxCount < 3) { + return false; + } + for (u16 v = 2; v < vtxCount; v++) { + if ((v & 1) == 0) { + append_vertex(out, vertices, stride, v - 2); + append_vertex(out, vertices, stride, v - 1); + } else { + append_vertex(out, vertices, stride, v - 1); + append_vertex(out, vertices, stride, v - 2); + } + append_vertex(out, vertices, stride, v); + } + return true; + case GX_QUADS: + if ((vtxCount & 3) != 0) { + return false; + } + for (u16 v = 0; v < vtxCount; v += 4) { + append_vertex(out, vertices, stride, v); + append_vertex(out, vertices, stride, v + 1); + append_vertex(out, vertices, stride, v + 2); + append_vertex(out, vertices, stride, v + 2); + append_vertex(out, vertices, stride, v + 3); + append_vertex(out, vertices, stride, v); + } + return true; + default: + return false; + } +} + +void append_triangles_to_run( + std::vector& out, MergeRun& run, u8 cmd, const std::vector& vertices, u32 stride) { + u32 offset = 0; + u32 remaining = vertices.size() / stride; + while (remaining != 0) { + if (run.vtxCount != 0 && run.cmd != cmd) { + flush_merge_run(out, run); + } + + if (run.vtxCount == 0) { + run.cmd = cmd; + } + + u32 available = 0xFFFF - run.vtxCount; + if (available == 0) { + flush_merge_run(out, run); + continue; + } + + u32 toCopy = std::min(remaining, available); + append_bytes(run.vertices, vertices.data() + offset * stride, toCopy * stride); + run.vtxCount += toCopy; + offset += toCopy; + remaining -= toCopy; + + if (run.vtxCount == 0xFFFF) { + flush_merge_run(out, run); + } + } +} + +bool optimize_display_list(const u8* dlStart, u32 dlSize, u32 stride, std::vector& out) { + MergeRun run; + out.reserve(dlSize); + + for (u32 offset = 0; offset < dlSize;) { + u32 cmdSize = 0; + if (!get_command_size(dlStart, dlSize, offset, stride, cmdSize)) { + return false; + } + + const u8 cmd = dlStart[offset]; + const u8 opcode = cmd & GX_OPCODE_MASK; + if (opcode == GX_NOP) { + offset += cmdSize; + continue; + } + + if (!is_draw_opcode(opcode)) { + flush_merge_run(out, run); + append_bytes(out, dlStart + offset, cmdSize); + offset += cmdSize; + continue; + } + + if (!is_mergeable_draw_opcode(opcode)) { + flush_merge_run(out, run); + append_bytes(out, dlStart + offset, cmdSize); + offset += cmdSize; + continue; + } + + const u16 vtxCount = read_be16(dlStart + offset + 1); + const u8* vertices = dlStart + offset + 3; + std::vector triangles; + if (!triangulate_draw(triangles, opcode, vertices, stride, vtxCount)) { + flush_merge_run(out, run); + append_bytes(out, dlStart + offset, cmdSize); + offset += cmdSize; + continue; + } + + append_triangles_to_run(out, run, (GX_TRIANGLES | (cmd & GX_VAT_MASK)), triangles, stride); + offset += cmdSize; + } + + flush_merge_run(out, run); + return true; +} + +void set_display_list_copy(void*& displayList, u32& displayListSize, const u8* data, u32 size) { + const u32 alignedSize = ALIGN_NEXT(size, 0x20); + u8* newDL = JKR_NEW_ARRAY_ARGS(u8, alignedSize, 0x20); + if (size != 0) { + std::memcpy(newDL, data, size); + } + for (u32 i = size; i < alignedSize; i++) { + newDL[i] = 0; + } + + displayList = newDL; + displayListSize = alignedSize; + DCStoreRange(newDL, displayListSize); +} + +} // namespace +#endif u32 J3DShapeDraw::countVertex(u32 stride) { u32 count = 0; u8* dlStart = (u8*)getDisplayList(); +#if TARGET_PC + for (u32 offset = 0; offset < getDisplayListSize();) { + u8 cmd = dlStart[offset]; + u8 opcode = cmd & GX_OPCODE_MASK; + u32 cmdSize = 0; + if (!get_command_size(dlStart, getDisplayListSize(), offset, stride, cmdSize)) { + break; + } + if (!is_draw_opcode(opcode)) { + offset += cmdSize; + continue; + } + int vtxNum = be16(*reinterpret_cast(dlStart + offset + 1)); + count += vtxNum; + offset += 3 + stride * vtxNum; + } +#else for (u8* dl = dlStart; (dl - dlStart) < getDisplayListSize();) { u8 cmd = *(u8*)dl; dl++; @@ -20,6 +315,7 @@ u32 J3DShapeDraw::countVertex(u32 stride) { count += vtxNum; dl = (u8*)dl + stride * vtxNum; } +#endif return count; } @@ -34,13 +330,32 @@ void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) { u8* newDL = newDLStart; for (; (oldDL - oldDLStart) < mDisplayListSize;) { +#if TARGET_PC + u32 oldOffset = oldDL - oldDLStart; + u32 cmdSize = 0; + if (!get_command_size(oldDLStart, mDisplayListSize, oldOffset, stride, cmdSize)) { + memcpy(newDL, oldDL, mDisplayListSize - oldOffset); + newDL += mDisplayListSize - oldOffset; + break; + } +#endif // Copy command u8 cmd = *(u8*)oldDL; oldDL++; *newDL++ = cmd; +#if TARGET_PC + u8 opcode = cmd & GX_OPCODE_MASK; + if (!is_draw_opcode(opcode)) { + memcpy(newDL, oldDL, cmdSize - 1); + oldDL += cmdSize - 1; + newDL += cmdSize - 1; + continue; + } +#else if (cmd != GX_TRIANGLEFAN && cmd != GX_TRIANGLESTRIP) break; +#endif // Copy count int vtxNum = *(u16*)oldDL; @@ -71,11 +386,31 @@ void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) { } J3DShapeDraw::J3DShapeDraw(const u8* displayList, u32 displayListSize) { +#if TARGET_PC + set_display_list_copy(mDisplayList, mDisplayListSize, displayList, displayListSize); +#else mDisplayList = (void*)displayList; mDisplayListSize = displayListSize; +#endif } +#if TARGET_PC +J3DShapeDraw::J3DShapeDraw( + const u8* displayList, u32 displayListSize, const GXVtxDescList* vtxDesc) { + u32 stride = 0; + std::vector optimized; + if (calc_vtx_stride(vtxDesc, stride) && + optimize_display_list(displayList, displayListSize, stride, optimized)) + { + set_display_list_copy(mDisplayList, mDisplayListSize, optimized.data(), optimized.size()); + } else { + set_display_list_copy(mDisplayList, mDisplayListSize, displayList, displayListSize); + } +} +#endif + void J3DShapeDraw::draw() const { + ZoneScoped; GXCallDisplayList(mDisplayList, mDisplayListSize); } diff --git a/libs/JSystem/src/J3DGraphLoader/J3DShapeFactory.cpp b/libs/JSystem/src/J3DGraphLoader/J3DShapeFactory.cpp index f283cb8932..b8b3b14c36 100644 --- a/libs/JSystem/src/J3DGraphLoader/J3DShapeFactory.cpp +++ b/libs/JSystem/src/J3DGraphLoader/J3DShapeFactory.cpp @@ -132,7 +132,12 @@ J3DShapeDraw* J3DShapeFactory::newShapeDraw(int shapeNo, int mtxGroupNo) const { const J3DShapeInitData& shapeInitData = mShapeInitData[mIndexTable[shapeNo]]; const J3DShapeDrawInitData& drawInitData = (&mDrawInitData[shapeInitData.mDrawInitDataIndex])[mtxGroupNo]; +#if TARGET_PC + shapeDraw = JKR_NEW J3DShapeDraw(&mDisplayListData[drawInitData.mDisplayListIndex], drawInitData.mDisplayListSize, + getVtxDescList(shapeNo)); +#else shapeDraw = JKR_NEW J3DShapeDraw(&mDisplayListData[drawInitData.mDisplayListIndex], drawInitData.mDisplayListSize); +#endif J3D_ASSERT_ALLOCMEM(193, shapeDraw); return shapeDraw; } @@ -154,7 +159,7 @@ s32 J3DShapeFactory::calcSize(int shapeNo, u32 flag) { for (u32 i = 0; i < mtxGroupNo; i++) { size += calcSizeShapeMtx(flag, shapeNo, i); - size += 0x0C; + size += sizeof(J3DShapeDraw); } return size;