mirror of
https://github.com/TwilitRealm/dusklight
synced 2026-06-09 03:59:34 -04:00
Optimize display lists in J3DShapeDraw
This is a stop-gap until DL optimization is upstreamed to Aurora.
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
#ifndef J3DSHAPEDRAW_H
|
||||
#define J3DSHAPEDRAW_H
|
||||
|
||||
#include <gx.h>
|
||||
#include <types.h>
|
||||
|
||||
/**
|
||||
@@ -12,6 +13,9 @@ public:
|
||||
u32 countVertex(u32);
|
||||
void addTexMtxIndexInDL(u32, u32, u32);
|
||||
J3DShapeDraw(u8 const*, u32);
|
||||
#if TARGET_PC
|
||||
J3DShapeDraw(u8 const*, u32, const GXVtxDescList*);
|
||||
#endif
|
||||
void draw() const;
|
||||
|
||||
virtual ~J3DShapeDraw();
|
||||
|
||||
@@ -1,15 +1,310 @@
|
||||
#include "JSystem/JSystem.h" // IWYU pragma: keep
|
||||
#include "JSystem/JSystem.h" // IWYU pragma: keep
|
||||
|
||||
#include <cstring>
|
||||
#include <gx.h>
|
||||
#include <stdint.h>
|
||||
#include "JSystem/J3DGraphBase/J3DShapeDraw.h"
|
||||
#include "JSystem/JKernel/JKRHeap.h"
|
||||
#include <cstring>
|
||||
#include <stdint.h>
|
||||
#include <gx.h>
|
||||
|
||||
#if TARGET_PC
|
||||
#include <algorithm>
|
||||
#include <tracy/Tracy.hpp>
|
||||
#include <vector>
|
||||
#include "dusk/logging.h"
|
||||
|
||||
namespace {
|
||||
|
||||
u16 read_be16(const u8* data) {
|
||||
return (u16(data[0]) << 8) | data[1];
|
||||
}
|
||||
|
||||
void append_be16(std::vector<u8>& out, u16 value) {
|
||||
out.push_back(value >> 8);
|
||||
out.push_back(value & 0xFF);
|
||||
}
|
||||
|
||||
void append_bytes(std::vector<u8>& out, const u8* data, u32 size) {
|
||||
out.insert(out.end(), data, data + size);
|
||||
}
|
||||
|
||||
bool is_matrix_idx_attr(GXAttr attr) {
|
||||
return attr >= GX_VA_PNMTXIDX && attr <= GX_VA_TEX7MTXIDX;
|
||||
}
|
||||
|
||||
bool is_draw_opcode(u8 opcode) {
|
||||
return opcode == GX_QUADS || opcode == GX_TRIANGLES || opcode == GX_TRIANGLESTRIP ||
|
||||
opcode == GX_TRIANGLEFAN || opcode == GX_LINES || opcode == GX_LINESTRIP ||
|
||||
opcode == GX_POINTS;
|
||||
}
|
||||
|
||||
bool is_mergeable_draw_opcode(u8 opcode) {
|
||||
return opcode == GX_QUADS || opcode == GX_TRIANGLES || opcode == GX_TRIANGLESTRIP ||
|
||||
opcode == GX_TRIANGLEFAN;
|
||||
}
|
||||
|
||||
bool calc_vtx_stride(const GXVtxDescList* vtxDesc, u32& stride) {
|
||||
stride = 0;
|
||||
for (; vtxDesc->attr != GX_VA_NULL; vtxDesc++) {
|
||||
switch (vtxDesc->type) {
|
||||
case GX_NONE:
|
||||
break;
|
||||
case GX_DIRECT:
|
||||
if (!is_matrix_idx_attr(vtxDesc->attr)) {
|
||||
return false;
|
||||
}
|
||||
stride += 1;
|
||||
break;
|
||||
case GX_INDEX8:
|
||||
stride += 1;
|
||||
break;
|
||||
case GX_INDEX16:
|
||||
stride += 2;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return stride != 0;
|
||||
}
|
||||
|
||||
bool get_command_size(const u8* dlStart, u32 dlSize, u32 offset, u32 stride, u32& cmdSize) {
|
||||
if (offset >= dlSize) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const u8 cmd = dlStart[offset];
|
||||
const u8 opcode = cmd & GX_OPCODE_MASK;
|
||||
switch (opcode) {
|
||||
case GX_NOP:
|
||||
case GX_CMD_INVL_VC:
|
||||
cmdSize = 1;
|
||||
return true;
|
||||
case (GX_LOAD_BP_REG & GX_OPCODE_MASK):
|
||||
cmdSize = 5;
|
||||
return offset + cmdSize <= dlSize;
|
||||
case GX_LOAD_CP_REG:
|
||||
cmdSize = 6;
|
||||
return offset + cmdSize <= dlSize;
|
||||
case GX_LOAD_XF_REG: {
|
||||
if (offset + 5 > dlSize) {
|
||||
return false;
|
||||
}
|
||||
const u16 count = read_be16(dlStart + offset + 1) + 1;
|
||||
cmdSize = 5 + count * 4;
|
||||
return offset + cmdSize <= dlSize;
|
||||
}
|
||||
case GX_LOAD_INDX_A:
|
||||
case GX_LOAD_INDX_B:
|
||||
case GX_LOAD_INDX_C:
|
||||
case GX_LOAD_INDX_D:
|
||||
cmdSize = 5;
|
||||
return offset + cmdSize <= dlSize;
|
||||
case GX_CMD_CALL_DL:
|
||||
cmdSize = 9;
|
||||
return offset + cmdSize <= dlSize;
|
||||
default:
|
||||
if (is_draw_opcode(opcode)) {
|
||||
if (offset + 3 > dlSize) {
|
||||
return false;
|
||||
}
|
||||
const u16 vtxCount = read_be16(dlStart + offset + 1);
|
||||
cmdSize = 3 + vtxCount * stride;
|
||||
return offset + cmdSize <= dlSize;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
struct MergeRun {
|
||||
u8 cmd = 0;
|
||||
u16 vtxCount = 0;
|
||||
std::vector<u8> vertices;
|
||||
};
|
||||
|
||||
void flush_merge_run(std::vector<u8>& out, MergeRun& run) {
|
||||
if (run.vtxCount == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
out.push_back(run.cmd);
|
||||
append_be16(out, run.vtxCount);
|
||||
append_bytes(out, run.vertices.data(), run.vertices.size());
|
||||
run.vertices.clear();
|
||||
run.vtxCount = 0;
|
||||
}
|
||||
|
||||
void append_vertex(std::vector<u8>& out, const u8* vertices, u32 stride, u16 idx) {
|
||||
append_bytes(out, vertices + idx * stride, stride);
|
||||
}
|
||||
|
||||
bool triangulate_draw(
|
||||
std::vector<u8>& out, u8 opcode, const u8* vertices, u32 stride, u16 vtxCount) {
|
||||
switch (opcode) {
|
||||
case GX_TRIANGLES:
|
||||
append_bytes(out, vertices, vtxCount * stride);
|
||||
return true;
|
||||
case GX_TRIANGLEFAN:
|
||||
if (vtxCount < 3) {
|
||||
return false;
|
||||
}
|
||||
for (u16 v = 2; v < vtxCount; v++) {
|
||||
append_vertex(out, vertices, stride, 0);
|
||||
append_vertex(out, vertices, stride, v - 1);
|
||||
append_vertex(out, vertices, stride, v);
|
||||
}
|
||||
return true;
|
||||
case GX_TRIANGLESTRIP:
|
||||
if (vtxCount < 3) {
|
||||
return false;
|
||||
}
|
||||
for (u16 v = 2; v < vtxCount; v++) {
|
||||
if ((v & 1) == 0) {
|
||||
append_vertex(out, vertices, stride, v - 2);
|
||||
append_vertex(out, vertices, stride, v - 1);
|
||||
} else {
|
||||
append_vertex(out, vertices, stride, v - 1);
|
||||
append_vertex(out, vertices, stride, v - 2);
|
||||
}
|
||||
append_vertex(out, vertices, stride, v);
|
||||
}
|
||||
return true;
|
||||
case GX_QUADS:
|
||||
if ((vtxCount & 3) != 0) {
|
||||
return false;
|
||||
}
|
||||
for (u16 v = 0; v < vtxCount; v += 4) {
|
||||
append_vertex(out, vertices, stride, v);
|
||||
append_vertex(out, vertices, stride, v + 1);
|
||||
append_vertex(out, vertices, stride, v + 2);
|
||||
append_vertex(out, vertices, stride, v + 2);
|
||||
append_vertex(out, vertices, stride, v + 3);
|
||||
append_vertex(out, vertices, stride, v);
|
||||
}
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void append_triangles_to_run(
|
||||
std::vector<u8>& out, MergeRun& run, u8 cmd, const std::vector<u8>& vertices, u32 stride) {
|
||||
u32 offset = 0;
|
||||
u32 remaining = vertices.size() / stride;
|
||||
while (remaining != 0) {
|
||||
if (run.vtxCount != 0 && run.cmd != cmd) {
|
||||
flush_merge_run(out, run);
|
||||
}
|
||||
|
||||
if (run.vtxCount == 0) {
|
||||
run.cmd = cmd;
|
||||
}
|
||||
|
||||
u32 available = 0xFFFF - run.vtxCount;
|
||||
if (available == 0) {
|
||||
flush_merge_run(out, run);
|
||||
continue;
|
||||
}
|
||||
|
||||
u32 toCopy = std::min(remaining, available);
|
||||
append_bytes(run.vertices, vertices.data() + offset * stride, toCopy * stride);
|
||||
run.vtxCount += toCopy;
|
||||
offset += toCopy;
|
||||
remaining -= toCopy;
|
||||
|
||||
if (run.vtxCount == 0xFFFF) {
|
||||
flush_merge_run(out, run);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool optimize_display_list(const u8* dlStart, u32 dlSize, u32 stride, std::vector<u8>& out) {
|
||||
MergeRun run;
|
||||
out.reserve(dlSize);
|
||||
|
||||
for (u32 offset = 0; offset < dlSize;) {
|
||||
u32 cmdSize = 0;
|
||||
if (!get_command_size(dlStart, dlSize, offset, stride, cmdSize)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const u8 cmd = dlStart[offset];
|
||||
const u8 opcode = cmd & GX_OPCODE_MASK;
|
||||
if (opcode == GX_NOP) {
|
||||
offset += cmdSize;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_draw_opcode(opcode)) {
|
||||
flush_merge_run(out, run);
|
||||
append_bytes(out, dlStart + offset, cmdSize);
|
||||
offset += cmdSize;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!is_mergeable_draw_opcode(opcode)) {
|
||||
flush_merge_run(out, run);
|
||||
append_bytes(out, dlStart + offset, cmdSize);
|
||||
offset += cmdSize;
|
||||
continue;
|
||||
}
|
||||
|
||||
const u16 vtxCount = read_be16(dlStart + offset + 1);
|
||||
const u8* vertices = dlStart + offset + 3;
|
||||
std::vector<u8> triangles;
|
||||
if (!triangulate_draw(triangles, opcode, vertices, stride, vtxCount)) {
|
||||
flush_merge_run(out, run);
|
||||
append_bytes(out, dlStart + offset, cmdSize);
|
||||
offset += cmdSize;
|
||||
continue;
|
||||
}
|
||||
|
||||
append_triangles_to_run(out, run, (GX_TRIANGLES | (cmd & GX_VAT_MASK)), triangles, stride);
|
||||
offset += cmdSize;
|
||||
}
|
||||
|
||||
flush_merge_run(out, run);
|
||||
return true;
|
||||
}
|
||||
|
||||
void set_display_list_copy(void*& displayList, u32& displayListSize, const u8* data, u32 size) {
|
||||
const u32 alignedSize = ALIGN_NEXT(size, 0x20);
|
||||
u8* newDL = JKR_NEW_ARRAY_ARGS(u8, alignedSize, 0x20);
|
||||
if (size != 0) {
|
||||
std::memcpy(newDL, data, size);
|
||||
}
|
||||
for (u32 i = size; i < alignedSize; i++) {
|
||||
newDL[i] = 0;
|
||||
}
|
||||
|
||||
displayList = newDL;
|
||||
displayListSize = alignedSize;
|
||||
DCStoreRange(newDL, displayListSize);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
#endif
|
||||
|
||||
u32 J3DShapeDraw::countVertex(u32 stride) {
|
||||
u32 count = 0;
|
||||
u8* dlStart = (u8*)getDisplayList();
|
||||
|
||||
#if TARGET_PC
|
||||
for (u32 offset = 0; offset < getDisplayListSize();) {
|
||||
u8 cmd = dlStart[offset];
|
||||
u8 opcode = cmd & GX_OPCODE_MASK;
|
||||
u32 cmdSize = 0;
|
||||
if (!get_command_size(dlStart, getDisplayListSize(), offset, stride, cmdSize)) {
|
||||
break;
|
||||
}
|
||||
if (!is_draw_opcode(opcode)) {
|
||||
offset += cmdSize;
|
||||
continue;
|
||||
}
|
||||
int vtxNum = be16(*reinterpret_cast<u16*>(dlStart + offset + 1));
|
||||
count += vtxNum;
|
||||
offset += 3 + stride * vtxNum;
|
||||
}
|
||||
#else
|
||||
for (u8* dl = dlStart; (dl - dlStart) < getDisplayListSize();) {
|
||||
u8 cmd = *(u8*)dl;
|
||||
dl++;
|
||||
@@ -20,6 +315,7 @@ u32 J3DShapeDraw::countVertex(u32 stride) {
|
||||
count += vtxNum;
|
||||
dl = (u8*)dl + stride * vtxNum;
|
||||
}
|
||||
#endif
|
||||
|
||||
return count;
|
||||
}
|
||||
@@ -34,13 +330,32 @@ void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) {
|
||||
u8* newDL = newDLStart;
|
||||
|
||||
for (; (oldDL - oldDLStart) < mDisplayListSize;) {
|
||||
#if TARGET_PC
|
||||
u32 oldOffset = oldDL - oldDLStart;
|
||||
u32 cmdSize = 0;
|
||||
if (!get_command_size(oldDLStart, mDisplayListSize, oldOffset, stride, cmdSize)) {
|
||||
memcpy(newDL, oldDL, mDisplayListSize - oldOffset);
|
||||
newDL += mDisplayListSize - oldOffset;
|
||||
break;
|
||||
}
|
||||
#endif
|
||||
// Copy command
|
||||
u8 cmd = *(u8*)oldDL;
|
||||
oldDL++;
|
||||
*newDL++ = cmd;
|
||||
|
||||
#if TARGET_PC
|
||||
u8 opcode = cmd & GX_OPCODE_MASK;
|
||||
if (!is_draw_opcode(opcode)) {
|
||||
memcpy(newDL, oldDL, cmdSize - 1);
|
||||
oldDL += cmdSize - 1;
|
||||
newDL += cmdSize - 1;
|
||||
continue;
|
||||
}
|
||||
#else
|
||||
if (cmd != GX_TRIANGLEFAN && cmd != GX_TRIANGLESTRIP)
|
||||
break;
|
||||
#endif
|
||||
|
||||
// Copy count
|
||||
int vtxNum = *(u16*)oldDL;
|
||||
@@ -71,11 +386,31 @@ void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) {
|
||||
}
|
||||
|
||||
J3DShapeDraw::J3DShapeDraw(const u8* displayList, u32 displayListSize) {
|
||||
#if TARGET_PC
|
||||
set_display_list_copy(mDisplayList, mDisplayListSize, displayList, displayListSize);
|
||||
#else
|
||||
mDisplayList = (void*)displayList;
|
||||
mDisplayListSize = displayListSize;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if TARGET_PC
|
||||
J3DShapeDraw::J3DShapeDraw(
|
||||
const u8* displayList, u32 displayListSize, const GXVtxDescList* vtxDesc) {
|
||||
u32 stride = 0;
|
||||
std::vector<u8> optimized;
|
||||
if (calc_vtx_stride(vtxDesc, stride) &&
|
||||
optimize_display_list(displayList, displayListSize, stride, optimized))
|
||||
{
|
||||
set_display_list_copy(mDisplayList, mDisplayListSize, optimized.data(), optimized.size());
|
||||
} else {
|
||||
set_display_list_copy(mDisplayList, mDisplayListSize, displayList, displayListSize);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void J3DShapeDraw::draw() const {
|
||||
ZoneScoped;
|
||||
GXCallDisplayList(mDisplayList, mDisplayListSize);
|
||||
}
|
||||
|
||||
|
||||
@@ -132,7 +132,12 @@ J3DShapeDraw* J3DShapeFactory::newShapeDraw(int shapeNo, int mtxGroupNo) const {
|
||||
const J3DShapeInitData& shapeInitData = mShapeInitData[mIndexTable[shapeNo]];
|
||||
const J3DShapeDrawInitData& drawInitData =
|
||||
(&mDrawInitData[shapeInitData.mDrawInitDataIndex])[mtxGroupNo];
|
||||
#if TARGET_PC
|
||||
shapeDraw = JKR_NEW J3DShapeDraw(&mDisplayListData[drawInitData.mDisplayListIndex], drawInitData.mDisplayListSize,
|
||||
getVtxDescList(shapeNo));
|
||||
#else
|
||||
shapeDraw = JKR_NEW J3DShapeDraw(&mDisplayListData[drawInitData.mDisplayListIndex], drawInitData.mDisplayListSize);
|
||||
#endif
|
||||
J3D_ASSERT_ALLOCMEM(193, shapeDraw);
|
||||
return shapeDraw;
|
||||
}
|
||||
@@ -154,7 +159,7 @@ s32 J3DShapeFactory::calcSize(int shapeNo, u32 flag) {
|
||||
|
||||
for (u32 i = 0; i < mtxGroupNo; i++) {
|
||||
size += calcSizeShapeMtx(flag, shapeNo, i);
|
||||
size += 0x0C;
|
||||
size += sizeof(J3DShapeDraw);
|
||||
}
|
||||
|
||||
return size;
|
||||
|
||||
Reference in New Issue
Block a user