J3DShapeDraw and J3DSkinDeform OK (#2908)

This commit is contained in:
LagoLunatic 2025-12-02 18:38:12 -05:00 committed by GitHub
parent 9a69fa38c7
commit cabc703969
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 28 additions and 30 deletions

View File

@ -1235,7 +1235,7 @@ config.libs = [
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DTexture.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DPacket.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DShapeMtx.cpp"),
Object(NonMatching, "JSystem/J3DGraphBase/J3DShapeDraw.cpp"),
Object(MatchingFor(ALL_GCN, "ShieldD"), "JSystem/J3DGraphBase/J3DShapeDraw.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DShape.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphBase/J3DMaterial.cpp"),
Object(Equivalent, "JSystem/J3DGraphBase/J3DMatBlock.cpp"), # virtual function order
@ -1254,7 +1254,7 @@ config.libs = [
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DModel.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DAnimation.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DMaterialAnm.cpp"),
Object(NonMatching, "JSystem/J3DGraphAnimator/J3DSkinDeform.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DSkinDeform.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DCluster.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DJoint.cpp"),
Object(MatchingFor(ALL_GCN), "JSystem/J3DGraphAnimator/J3DMaterialAttach.cpp"),

View File

@ -347,10 +347,6 @@ int J3DSkinDeform::initMtxIndexArray(J3DModelData* pModelData) {
return kJ3DError_Success;
}
// NONMATCHING - instruction ordering/optimization issue, matches debug
// the compiler needs to delay adding +3 to dl until the end of the while loop for the function to match
// but instead it puts the +3 at the start of the for loop and reworks the other instructions
// can get a 99.93% match on retail by moving where dl is incremented, but it seems fake as it breaks debug, and introduces an operand swap on src
void J3DSkinDeform::changeFastSkinDL(J3DModelData* pModelData) {
J3D_ASSERT_NULLPTR(740, pModelData != NULL);
for (u16 i = 0; i < pModelData->getShapeNum(); i++) {
@ -389,7 +385,7 @@ void J3DSkinDeform::changeFastSkinDL(J3DModelData* pModelData) {
memcpy(dst, src + 1, (int)(vtxSize - 1)); // The -1 is to remove GX_VA_PNMTXIDX
dst += (int)(vtxSize - 1);
}
dl += vtxSize * vtxCount;
dl = (u8*)dl + vtxSize * vtxCount;
}
int dlistSize = ((int)dst - (int)displayListStart + 0x1f) & ~0x1f;

View File

@ -8,55 +8,57 @@
u32 J3DShapeDraw::countVertex(u32 stride) {
u32 count = 0;
uintptr_t dlStart = (uintptr_t)getDisplayList();
u8* dlStart = (u8*)getDisplayList();
for (u8* dl = (u8*)dlStart; ((uintptr_t)dl - dlStart) < getDisplayListSize();) {
if (*dl != GX_TRIANGLEFAN && *dl != GX_TRIANGLESTRIP)
for (u8* dl = dlStart; (dl - dlStart) < getDisplayListSize();) {
u8 cmd = *(u8*)dl;
dl++;
if (cmd != GX_TRIANGLEFAN && cmd != GX_TRIANGLESTRIP)
break;
u16 vtxNum = *((u16*)(dl + 1));
int vtxNum = *((u16*)(dl));
dl += 2;
count += vtxNum;
dl += stride * vtxNum;
dl += 3;
dl = (u8*)dl + stride * vtxNum;
}
return count;
}
// NONMATCHING regalloc
void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) {
u32 byteNum = countVertex(stride);
u32 newSize = ALIGN_NEXT(mDisplayListSize + byteNum, 0x20);
u32 oldSize = mDisplayListSize;
u32 newSize = ALIGN_NEXT(oldSize + byteNum, 0x20);
u8* newDLStart = new (0x20) u8[newSize];
u8* oldDLStart = getDisplayList();
u8* oldDLStart = (u8*)mDisplayList;
u8* oldDL = oldDLStart;
u8* newDL = newDLStart;
for (; (oldDL - oldDLStart) < mDisplayListSize;) {
// Copy command
u8 h = *oldDL;
*newDL++ = h;
u8 cmd = *(u8*)oldDL;
oldDL++;
*newDL++ = cmd;
if (h != GX_TRIANGLEFAN && h != GX_TRIANGLESTRIP)
if (cmd != GX_TRIANGLEFAN && cmd != GX_TRIANGLESTRIP)
break;
// Copy count
// regalloc (I suspect there's a way to shove this in a u16 temp without an mr)
s32 vtxNum = *((u16*)(oldDL + 1));
*((u16*)newDL) = vtxNum;
int vtxNum = *(u16*)oldDL;
oldDL += 2;
*(u16*)newDL = vtxNum;
newDL += 2;
for (s32 i = 0; i < vtxNum; i++) {
u8* oldDLVtx = &oldDL[stride * i + 3];
for (int i = 0; i < vtxNum; i++) {
u8* oldDLVtx = &oldDL[stride * i];
u8 pnmtxidx = *oldDLVtx;
memcpy(newDL, oldDLVtx, attrOffs);
memcpy(newDL, oldDLVtx, (int)attrOffs);
newDL += attrOffs;
*newDL++ = valueBase + pnmtxidx;
memcpy(newDL, oldDLVtx + attrOffs, stride - attrOffs);
newDL += (stride - attrOffs);
}
oldDL += stride * vtxNum;
oldDL += 3;
oldDL = (u8*)oldDL + stride * vtxNum;
}
u32 realSize = ALIGN_NEXT((uintptr_t)newDL - (uintptr_t)newDLStart, 0x20);

View File

@ -70,10 +70,10 @@ struct TAsinAcosTable {
namespace JMath {
TSinCosTable<13, f32> sincosTable_;
TSinCosTable<13, f32> sincosTable_ ATTRIBUTE_ALIGN(32);
TAtanTable atanTable_;
TAtanTable atanTable_ ATTRIBUTE_ALIGN(32);
TAsinAcosTable asinAcosTable_;
TAsinAcosTable asinAcosTable_ ATTRIBUTE_ALIGN(32);
} // namespace JMath