J3DShapeDraw and J3DSkinDeform OK (#2908)

This commit is contained in:
LagoLunatic 2025-12-02 18:38:12 -05:00 committed by GitHub
parent 9a69fa38c7
commit cabc703969
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 28 additions and 30 deletions

View File

@ -1235,7 +1235,7 @@ config.libs = [
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DTexture.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DTexture.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DPacket.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DPacket.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DShapeMtx.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DShapeMtx.cpp"),
Object(NonMatching, "JSystem/J3DGraphBase/J3DShapeDraw.cpp"), Object(MatchingFor(ALL_GCN, "ShieldD"), "JSystem/J3DGraphBase/J3DShapeDraw.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DShape.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DShape.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DMaterial.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DMaterial.cpp"),
Object(Equivalent, "JSystem/J3DGraphBase/J3DMatBlock.cpp"), # virtual function order Object(Equivalent, "JSystem/J3DGraphBase/J3DMatBlock.cpp"), # virtual function order
@ -1254,7 +1254,7 @@ config.libs = [
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DModel.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DModel.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DAnimation.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DAnimation.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DMaterialAnm.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DMaterialAnm.cpp"),
Object(NonMatching, "JSystem/J3DGraphAnimator/J3DSkinDeform.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DSkinDeform.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DCluster.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DCluster.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DJoint.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DJoint.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DMaterialAttach.cpp"), Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DMaterialAttach.cpp"),

View File

@ -347,10 +347,6 @@ int J3DSkinDeform::initMtxIndexArray(J3DModelData* pModelData) {
return kJ3DError_Success; return kJ3DError_Success;
} }
// NONMATCHING - instruction ordering/optimization issue, matches debug
// the compiler needs to delay adding +3 to dl until the end of the while loop for the function to match
// but instead it puts the +3 at the start of the for loop and reworks the other instructions
// can get a 99.93% match on retail by moving where dl is incremented, but it seems fake as it breaks debug, and introduces an operand swap on src
void J3DSkinDeform::changeFastSkinDL(J3DModelData* pModelData) { void J3DSkinDeform::changeFastSkinDL(J3DModelData* pModelData) {
J3D_ASSERT_NULLPTR(740, pModelData != NULL); J3D_ASSERT_NULLPTR(740, pModelData != NULL);
for (u16 i = 0; i < pModelData->getShapeNum(); i++) { for (u16 i = 0; i < pModelData->getShapeNum(); i++) {
@ -389,7 +385,7 @@ void J3DSkinDeform::changeFastSkinDL(J3DModelData* pModelData) {
memcpy(dst, src + 1, (int)(vtxSize - 1)); // The -1 is to remove GX_VA_PNMTXIDX memcpy(dst, src + 1, (int)(vtxSize - 1)); // The -1 is to remove GX_VA_PNMTXIDX
dst += (int)(vtxSize - 1); dst += (int)(vtxSize - 1);
} }
dl += vtxSize * vtxCount; dl = (u8*)dl + vtxSize * vtxCount;
} }
int dlistSize = ((int)dst - (int)displayListStart + 0x1f) & ~0x1f; int dlistSize = ((int)dst - (int)displayListStart + 0x1f) & ~0x1f;

View File

@ -8,55 +8,57 @@
u32 J3DShapeDraw::countVertex(u32 stride) { u32 J3DShapeDraw::countVertex(u32 stride) {
u32 count = 0; u32 count = 0;
uintptr_t dlStart = (uintptr_t)getDisplayList(); u8* dlStart = (u8*)getDisplayList();
for (u8* dl = (u8*)dlStart; ((uintptr_t)dl - dlStart) < getDisplayListSize();) { for (u8* dl = dlStart; (dl - dlStart) < getDisplayListSize();) {
if (*dl != GX_TRIANGLEFAN && *dl != GX_TRIANGLESTRIP) u8 cmd = *(u8*)dl;
dl++;
if (cmd != GX_TRIANGLEFAN && cmd != GX_TRIANGLESTRIP)
break; break;
u16 vtxNum = *((u16*)(dl + 1)); int vtxNum = *((u16*)(dl));
dl += 2;
count += vtxNum; count += vtxNum;
dl += stride * vtxNum; dl = (u8*)dl + stride * vtxNum;
dl += 3;
} }
return count; return count;
} }
// NONMATCHING regalloc
void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) { void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) {
u32 byteNum = countVertex(stride); u32 byteNum = countVertex(stride);
u32 newSize = ALIGN_NEXT(mDisplayListSize + byteNum, 0x20); u32 oldSize = mDisplayListSize;
u32 newSize = ALIGN_NEXT(oldSize + byteNum, 0x20);
u8* newDLStart = new (0x20) u8[newSize]; u8* newDLStart = new (0x20) u8[newSize];
u8* oldDLStart = getDisplayList(); u8* oldDLStart = (u8*)mDisplayList;
u8* oldDL = oldDLStart; u8* oldDL = oldDLStart;
u8* newDL = newDLStart; u8* newDL = newDLStart;
for (; (oldDL - oldDLStart) < mDisplayListSize;) { for (; (oldDL - oldDLStart) < mDisplayListSize;) {
// Copy command // Copy command
u8 h = *oldDL; u8 cmd = *(u8*)oldDL;
*newDL++ = h; oldDL++;
*newDL++ = cmd;
if (h != GX_TRIANGLEFAN && h != GX_TRIANGLESTRIP) if (cmd != GX_TRIANGLEFAN && cmd != GX_TRIANGLESTRIP)
break; break;
// Copy count // Copy count
// regalloc (I suspect there's a way to shove this in a u16 temp without an mr) int vtxNum = *(u16*)oldDL;
s32 vtxNum = *((u16*)(oldDL + 1)); oldDL += 2;
*((u16*)newDL) = vtxNum; *(u16*)newDL = vtxNum;
newDL += 2; newDL += 2;
for (s32 i = 0; i < vtxNum; i++) { for (int i = 0; i < vtxNum; i++) {
u8* oldDLVtx = &oldDL[stride * i + 3]; u8* oldDLVtx = &oldDL[stride * i];
u8 pnmtxidx = *oldDLVtx; u8 pnmtxidx = *oldDLVtx;
memcpy(newDL, oldDLVtx, attrOffs); memcpy(newDL, oldDLVtx, (int)attrOffs);
newDL += attrOffs; newDL += attrOffs;
*newDL++ = valueBase + pnmtxidx; *newDL++ = valueBase + pnmtxidx;
memcpy(newDL, oldDLVtx + attrOffs, stride - attrOffs); memcpy(newDL, oldDLVtx + attrOffs, stride - attrOffs);
newDL += (stride - attrOffs); newDL += (stride - attrOffs);
} }
oldDL += stride * vtxNum; oldDL = (u8*)oldDL + stride * vtxNum;
oldDL += 3;
} }
u32 realSize = ALIGN_NEXT((uintptr_t)newDL - (uintptr_t)newDLStart, 0x20); u32 realSize = ALIGN_NEXT((uintptr_t)newDL - (uintptr_t)newDLStart, 0x20);

View File

@ -70,10 +70,10 @@ struct TAsinAcosTable {
namespace JMath { namespace JMath {
TSinCosTable<13, f32> sincosTable_; TSinCosTable<13, f32> sincosTable_ ATTRIBUTE_ALIGN(32);
TAtanTable atanTable_; TAtanTable atanTable_ ATTRIBUTE_ALIGN(32);
TAsinAcosTable asinAcosTable_; TAsinAcosTable asinAcosTable_ ATTRIBUTE_ALIGN(32);
} // namespace JMath } // namespace JMath