From 00707024bbc95e97f6a9a5506ed5bdc655d3071d Mon Sep 17 00:00:00 2001 From: Luke Street Date: Sat, 13 Jun 2026 10:40:57 -0600 Subject: [PATCH] Update aurora & flower/grass draw batching --- extern/aurora | 2 +- files.cmake | 2 + include/d/actor/d_flower.h | 10 + include/d/actor/d_grass.h | 8 + libs/JSystem/src/J3DGraphBase/J3DShape.cpp | 4 +- .../JSystem/src/J3DGraphBase/J3DShapeDraw.cpp | 348 +++------------- src/d/actor/d_flower.inc | 378 ++++++++++++++++++ src/d/actor/d_grass.inc | 356 +++++++++++++++++ src/dusk/batch.cpp | 72 ++++ src/dusk/batch.hpp | 25 ++ 10 files changed, 909 insertions(+), 296 deletions(-) create mode 100644 src/dusk/batch.cpp create mode 100644 src/dusk/batch.hpp diff --git a/extern/aurora b/extern/aurora index 5143394381..ad55eedb31 160000 --- a/extern/aurora +++ b/extern/aurora @@ -1 +1 @@ -Subproject commit 514339438178ef2bed1b14e5149d90ece0c6e0cc +Subproject commit ad55eedb3121e540b44fbabf8e9f68bfa55ba182 diff --git a/files.cmake b/files.cmake index 99ccfc23c9..a920db2e4f 100644 --- a/files.cmake +++ b/files.cmake @@ -1419,6 +1419,8 @@ set(DUSK_FILES src/dusk/dvd_asset.cpp src/d/actor/d_a_alink_dusk.cpp src/dusk/asserts.cpp + src/dusk/batch.cpp + src/dusk/batch.hpp src/dusk/config.cpp src/dusk/crash_handler.cpp src/dusk/crash_reporting.cpp diff --git a/include/d/actor/d_flower.h b/include/d/actor/d_flower.h index cd505335a8..f63847956a 100644 --- a/include/d/actor/d_flower.h +++ b/include/d/actor/d_flower.h @@ -4,6 +4,10 @@ #include "JSystem/J3DGraphBase/J3DPacket.h" #include "SSystem/SComponent/c_xyz.h" +#if TARGET_PC +#include "dusk/batch.hpp" +#endif + class cCcD_Obj; class dCcMassS_HitInf; class fopAc_ac_c; @@ -107,6 +111,12 @@ public: #if TARGET_PC TGXTexObj mTexObj_l_J_Ohana00_64TEX; TGXTexObj mTexObj_l_J_Ohana01_64128_0419TEX; + + dusk::batch::LeafTemplate mTplHana00; // l_J_hana00DL + dusk::batch::LeafTemplate mTplHana00Cut; // l_J_hana00_cDL + dusk::batch::LeafTemplate mTplHana01; // l_J_hana01DL + dusk::batch::LeafTemplate mTplHana01Cut00; // l_J_hana01_c_00DL + dusk::batch::LeafTemplate mTplHana01Cut; // l_J_hana01_c_01DL #endif }; // Size: 0x12A54 diff --git a/include/d/actor/d_grass.h b/include/d/actor/d_grass.h index 47b948679d..b7cc3d7d95 100644 --- a/include/d/actor/d_grass.h +++ b/include/d/actor/d_grass.h @@ -4,6 +4,10 @@ #include "JSystem/J3DGraphBase/J3DPacket.h" #include "SSystem/SComponent/c_xyz.h" +#if TARGET_PC +#include "../../../src/dusk/batch.hpp" +#endif + class cCcD_Obj; class csXyz; class dCcMassS_HitInf; @@ -110,6 +114,10 @@ public: #if TARGET_PC TGXTexObj mTexObj_l_M_Hijiki00TEX; TGXTexObj mTexObj_l_M_kusa05_RGBATEX; + + dusk::batch::LeafTemplate mTplKusa9q; // l_M_Kusa_9qDL + dusk::batch::LeafTemplate mTplKusa9qCut; // l_M_Kusa_9q_cDL + dusk::batch::LeafTemplate mTplTengusa; // l_M_TenGusaDL #endif }; // Size: 0x1D718 diff --git a/libs/JSystem/src/J3DGraphBase/J3DShape.cpp b/libs/JSystem/src/J3DGraphBase/J3DShape.cpp index fc7ac5b727..4b407d1e05 100644 --- a/libs/JSystem/src/J3DGraphBase/J3DShape.cpp +++ b/libs/JSystem/src/J3DGraphBase/J3DShape.cpp @@ -136,8 +136,8 @@ void J3DLoadCPCmd(u8 addr, u32 val) { #if TARGET_PC static void J3DLoadArrayBasePtr(GXAttr attr, void* data, u32 size, bool le) { u32 idx = (attr == GX_VA_NBT) ? 1 : (attr - GX_VA_POS); - GXCmd1u8(GX_LOAD_AURORA); - GXCmd1u16(GX_LOAD_AURORA_ARRAYBASE | idx); + GXCmd1u8(GX_AURORA); + GXCmd1u16(GX_AURORA_LOAD_ARRAYBASE | idx); GXCmd1u64((u64)data); GXCmd1u32(size); GXCmd1u8(le ? 1 : 0); diff --git a/libs/JSystem/src/J3DGraphBase/J3DShapeDraw.cpp b/libs/JSystem/src/J3DGraphBase/J3DShapeDraw.cpp index b40f577d79..e9abdc85f1 100644 --- a/libs/JSystem/src/J3DGraphBase/J3DShapeDraw.cpp +++ b/libs/JSystem/src/J3DGraphBase/J3DShapeDraw.cpp @@ -7,265 +7,11 @@ #include "JSystem/JKernel/JKRHeap.h" #if TARGET_PC -#include +#include #include -#include -#include "dusk/logging.h" namespace { -u16 read_be16(const u8* data) { - return (u16(data[0]) << 8) | data[1]; -} - -void append_be16(std::vector& out, u16 value) { - out.push_back(value >> 8); - out.push_back(value & 0xFF); -} - -void append_bytes(std::vector& out, const u8* data, u32 size) { - out.insert(out.end(), data, data + size); -} - -bool is_matrix_idx_attr(GXAttr attr) { - return attr >= GX_VA_PNMTXIDX && attr <= GX_VA_TEX7MTXIDX; -} - -bool is_draw_opcode(u8 opcode) { - return opcode == GX_QUADS || opcode == GX_TRIANGLES || opcode == GX_TRIANGLESTRIP || - opcode == GX_TRIANGLEFAN || opcode == GX_LINES || opcode == GX_LINESTRIP || - opcode == GX_POINTS; -} - -bool is_mergeable_draw_opcode(u8 opcode) { - return opcode == GX_QUADS || opcode == GX_TRIANGLES || opcode == GX_TRIANGLESTRIP || - opcode == GX_TRIANGLEFAN; -} - -bool calc_vtx_stride(const GXVtxDescList* vtxDesc, u32& stride) { - stride = 0; - for (; vtxDesc->attr != GX_VA_NULL; vtxDesc++) { - switch (vtxDesc->type) { - case GX_NONE: - break; - case GX_DIRECT: - if (!is_matrix_idx_attr(vtxDesc->attr)) { - return false; - } - stride += 1; - break; - case GX_INDEX8: - stride += 1; - break; - case GX_INDEX16: - stride += 2; - break; - default: - return false; - } - } - return stride != 0; -} - -bool get_command_size(const u8* dlStart, u32 dlSize, u32 offset, u32 stride, u32& cmdSize) { - if (offset >= dlSize) { - return false; - } - - const u8 cmd = dlStart[offset]; - const u8 opcode = cmd & GX_OPCODE_MASK; - switch (opcode) { - case GX_NOP: - case GX_CMD_INVL_VC: - cmdSize = 1; - return true; - case (GX_LOAD_BP_REG & GX_OPCODE_MASK): - cmdSize = 5; - return offset + cmdSize <= dlSize; - case GX_LOAD_CP_REG: - cmdSize = 6; - return offset + cmdSize <= dlSize; - case GX_LOAD_XF_REG: { - if (offset + 5 > dlSize) { - return false; - } - const u16 count = read_be16(dlStart + offset + 1) + 1; - cmdSize = 5 + count * 4; - return offset + cmdSize <= dlSize; - } - case GX_LOAD_INDX_A: - case GX_LOAD_INDX_B: - case GX_LOAD_INDX_C: - case GX_LOAD_INDX_D: - cmdSize = 5; - return offset + cmdSize <= dlSize; - case GX_CMD_CALL_DL: - cmdSize = 9; - return offset + cmdSize <= dlSize; - default: - if (is_draw_opcode(opcode)) { - if (offset + 3 > dlSize) { - return false; - } - const u16 vtxCount = read_be16(dlStart + offset + 1); - cmdSize = 3 + vtxCount * stride; - return offset + cmdSize <= dlSize; - } - return false; - } -} - -struct MergeRun { - u8 cmd = 0; - u16 vtxCount = 0; - std::vector vertices; -}; - -void flush_merge_run(std::vector& out, MergeRun& run) { - if (run.vtxCount == 0) { - return; - } - - out.push_back(run.cmd); - append_be16(out, run.vtxCount); - append_bytes(out, run.vertices.data(), run.vertices.size()); - run.vertices.clear(); - run.vtxCount = 0; -} - -void append_vertex(std::vector& out, const u8* vertices, u32 stride, u16 idx) { - append_bytes(out, vertices + idx * stride, stride); -} - -bool triangulate_draw( - std::vector& out, u8 opcode, const u8* vertices, u32 stride, u16 vtxCount) { - switch (opcode) { - case GX_TRIANGLES: - append_bytes(out, vertices, vtxCount * stride); - return true; - case GX_TRIANGLEFAN: - if (vtxCount < 3) { - return false; - } - for (u16 v = 2; v < vtxCount; v++) { - append_vertex(out, vertices, stride, 0); - append_vertex(out, vertices, stride, v - 1); - append_vertex(out, vertices, stride, v); - } - return true; - case GX_TRIANGLESTRIP: - if (vtxCount < 3) { - return false; - } - for (u16 v = 2; v < vtxCount; v++) { - if ((v & 1) == 0) { - append_vertex(out, vertices, stride, v - 2); - append_vertex(out, vertices, stride, v - 1); - } else { - append_vertex(out, vertices, stride, v - 1); - append_vertex(out, vertices, stride, v - 2); - } - append_vertex(out, vertices, stride, v); - } - return true; - case GX_QUADS: - if ((vtxCount & 3) != 0) { - return false; - } - for (u16 v = 0; v < vtxCount; v += 4) { - append_vertex(out, vertices, stride, v); - append_vertex(out, vertices, stride, v + 1); - append_vertex(out, vertices, stride, v + 2); - append_vertex(out, vertices, stride, v + 2); - append_vertex(out, vertices, stride, v + 3); - append_vertex(out, vertices, stride, v); - } - return true; - default: - return false; - } -} - -void append_triangles_to_run( - std::vector& out, MergeRun& run, u8 cmd, const std::vector& vertices, u32 stride) { - u32 offset = 0; - u32 remaining = vertices.size() / stride; - while (remaining != 0) { - if (run.vtxCount != 0 && run.cmd != cmd) { - flush_merge_run(out, run); - } - - if (run.vtxCount == 0) { - run.cmd = cmd; - } - - u32 available = 0xFFFF - run.vtxCount; - if (available == 0) { - flush_merge_run(out, run); - continue; - } - - u32 toCopy = std::min(remaining, available); - append_bytes(run.vertices, vertices.data() + offset * stride, toCopy * stride); - run.vtxCount += toCopy; - offset += toCopy; - remaining -= toCopy; - - if (run.vtxCount == 0xFFFF) { - flush_merge_run(out, run); - } - } -} - -bool optimize_display_list(const u8* dlStart, u32 dlSize, u32 stride, std::vector& out) { - MergeRun run; - out.reserve(dlSize); - - for (u32 offset = 0; offset < dlSize;) { - u32 cmdSize = 0; - if (!get_command_size(dlStart, dlSize, offset, stride, cmdSize)) { - return false; - } - - const u8 cmd = dlStart[offset]; - const u8 opcode = cmd & GX_OPCODE_MASK; - if (opcode == GX_NOP) { - offset += cmdSize; - continue; - } - - if (!is_draw_opcode(opcode)) { - flush_merge_run(out, run); - append_bytes(out, dlStart + offset, cmdSize); - offset += cmdSize; - continue; - } - - if (!is_mergeable_draw_opcode(opcode)) { - flush_merge_run(out, run); - append_bytes(out, dlStart + offset, cmdSize); - offset += cmdSize; - continue; - } - - const u16 vtxCount = read_be16(dlStart + offset + 1); - const u8* vertices = dlStart + offset + 3; - std::vector triangles; - if (!triangulate_draw(triangles, opcode, vertices, stride, vtxCount)) { - flush_merge_run(out, run); - append_bytes(out, dlStart + offset, cmdSize); - offset += cmdSize; - continue; - } - - append_triangles_to_run(out, run, (GX_TRIANGLES | (cmd & GX_VAT_MASK)), triangles, stride); - offset += cmdSize; - } - - flush_merge_run(out, run); - return true; -} - void set_display_list_copy(void*& displayList, u32& displayListSize, const u8* data, u32 size) { const u32 alignedSize = ALIGN_NEXT(size, 0x20); u8* newDL = JKR_NEW_ARRAY_ARGS(u8, alignedSize, 0x20); @@ -289,20 +35,11 @@ u32 J3DShapeDraw::countVertex(u32 stride) { u8* dlStart = (u8*)getDisplayList(); #if TARGET_PC - for (u32 offset = 0; offset < getDisplayListSize();) { - u8 cmd = dlStart[offset]; - u8 opcode = cmd & GX_OPCODE_MASK; - u32 cmdSize = 0; - if (!get_command_size(dlStart, getDisplayListSize(), offset, stride, cmdSize)) { - break; + aurora::gx::dl::Reader reader{dlStart, getDisplayListSize(), static_cast(stride)}; + while (const auto cmd = reader.next()) { + if (cmd->kind != aurora::gx::dl::Command::Kind::Passthrough) { + count += cmd->draw.vtxCount; } - if (!is_draw_opcode(opcode)) { - offset += cmdSize; - continue; - } - int vtxNum = be16(*reinterpret_cast(dlStart + offset + 1)); - count += vtxNum; - offset += 3 + stride * vtxNum; } #else for (u8* dl = dlStart; (dl - dlStart) < getDisplayListSize();) { @@ -320,6 +57,53 @@ u32 J3DShapeDraw::countVertex(u32 stride) { return count; } +#if TARGET_PC +void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) { + u32 byteNum = countVertex(stride); + u32 oldSize = mDisplayListSize; + u32 newSize = ALIGN_NEXT(oldSize + byteNum, 0x20); + u8* newDLStart = JKR_NEW_ARRAY_ARGS(u8, newSize, 0x20); + u8* oldDLStart = (u8*)mDisplayList; + u8* newDL = newDLStart; + + aurora::gx::dl::Reader reader{oldDLStart, mDisplayListSize, static_cast(stride)}; + while (const auto cmd = reader.next()) { + if (cmd->kind == aurora::gx::dl::Command::Kind::Passthrough) { + std::memcpy(newDL, cmd->data, cmd->size); + newDL += cmd->size; + continue; + } + + const auto& draw = cmd->draw; + const u32 headerSize = draw.vertices - cmd->data; + std::memcpy(newDL, cmd->data, headerSize); + newDL += headerSize; + + for (u32 i = 0; i < draw.vtxCount; i++) { + const u8* oldVtx = draw.vertices + stride * i; + u8 pnmtxidx = oldVtx[0]; + std::memcpy(newDL, oldVtx, attrOffs); + newDL += attrOffs; + *newDL++ = valueBase + pnmtxidx; + std::memcpy(newDL, oldVtx + attrOffs, stride - attrOffs); + newDL += stride - attrOffs; + } + } + if (reader.failed()) { + // preserve the remainder untouched + std::memcpy(newDL, oldDLStart + reader.pos(), mDisplayListSize - reader.pos()); + newDL += mDisplayListSize - reader.pos(); + } + + u32 realSize = ALIGN_NEXT((uintptr_t)newDL - (uintptr_t)newDLStart, 0x20); + for (; (newDL - newDLStart) < newSize; newDL++) + *newDL = 0; + + mDisplayListSize = realSize; + mDisplayList = newDLStart; + DCStoreRange(newDLStart, mDisplayListSize); +} +#else void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) { u32 byteNum = countVertex(stride); u32 oldSize = mDisplayListSize; @@ -330,32 +114,13 @@ void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) { u8* newDL = newDLStart; for (; (oldDL - oldDLStart) < mDisplayListSize;) { -#if TARGET_PC - u32 oldOffset = oldDL - oldDLStart; - u32 cmdSize = 0; - if (!get_command_size(oldDLStart, mDisplayListSize, oldOffset, stride, cmdSize)) { - memcpy(newDL, oldDL, mDisplayListSize - oldOffset); - newDL += mDisplayListSize - oldOffset; - break; - } -#endif // Copy command u8 cmd = *(u8*)oldDL; oldDL++; *newDL++ = cmd; -#if TARGET_PC - u8 opcode = cmd & GX_OPCODE_MASK; - if (!is_draw_opcode(opcode)) { - memcpy(newDL, oldDL, cmdSize - 1); - oldDL += cmdSize - 1; - newDL += cmdSize - 1; - continue; - } -#else if (cmd != GX_TRIANGLEFAN && cmd != GX_TRIANGLESTRIP) break; -#endif // Copy count int vtxNum = *(u16*)oldDL; @@ -384,6 +149,7 @@ void J3DShapeDraw::addTexMtxIndexInDL(u32 stride, u32 attrOffs, u32 valueBase) { mDisplayList = newDLStart; DCStoreRange(newDLStart, mDisplayListSize); } +#endif J3DShapeDraw::J3DShapeDraw(const u8* displayList, u32 displayListSize) { #if TARGET_PC @@ -397,12 +163,8 @@ J3DShapeDraw::J3DShapeDraw(const u8* displayList, u32 displayListSize) { #if TARGET_PC J3DShapeDraw::J3DShapeDraw( const u8* displayList, u32 displayListSize, const GXVtxDescList* vtxDesc) { - u32 stride = 0; - std::vector optimized; - if (calc_vtx_stride(vtxDesc, stride) && - optimize_display_list(displayList, displayListSize, stride, optimized)) - { - set_display_list_copy(mDisplayList, mDisplayListSize, optimized.data(), optimized.size()); + if (const auto optimized = aurora::gx::dl::optimize(displayList, displayListSize, vtxDesc)) { + set_display_list_copy(mDisplayList, mDisplayListSize, optimized->data(), optimized->size()); } else { set_display_list_copy(mDisplayList, mDisplayListSize, displayList, displayListSize); } diff --git a/src/d/actor/d_flower.inc b/src/d/actor/d_flower.inc index e5f055fb16..54b1e022cc 100644 --- a/src/d/actor/d_flower.inc +++ b/src/d/actor/d_flower.inc @@ -15,6 +15,12 @@ const u16 l_J_Ohana00_64TEX__height = 63; using GameVersion = dusk::version::GameVersion; static u8* l_J_Ohana00_64TEX_get() { static u8 buf[0x800]; static bool _ = (dusk::LoadArchivedRelAsset(buf, 'AMEM', "d_a_grass.rel", {{GameVersion::GcnUsa, 0x9060}, {GameVersion::GcnPal, 0x9060}}, 0x800), true); return buf; } #define l_J_Ohana00_64TEX (l_J_Ohana00_64TEX_get()) + +// from d_grass.inc +static MtxP get_model_mtx(Mtx modelMtx, Mtx storage); +static void transform_positions( + const dusk::batch::LeafTemplate& tpl, const Vec* posArray, const Mtx mtx, Vec* xfPos); +static void split_batch(u32& emitted, u32 vtxCount); #else #include "assets/l_J_Ohana00_64TEX.h" #endif @@ -588,6 +594,12 @@ dFlower_packet_c::dFlower_packet_c() { GXInitTexObj(&mTexObj_l_J_Ohana01_64128_0419TEX, l_J_Ohana01_64128_0419TEX, l_J_Ohana01_64128_0419TEX__width + 1, l_J_Ohana01_64128_0419TEX__height + 1, GX_TF_CMPR, GX_MIRROR, GX_MIRROR, GX_FALSE ); + + dusk::batch::decode_leaf_template(l_J_hana00DL, 0x140, mTplHana00); + dusk::batch::decode_leaf_template(l_J_hana00_cDL, 0xC0, mTplHana00Cut); + dusk::batch::decode_leaf_template(l_J_hana01DL, 0x120, mTplHana01); + dusk::batch::decode_leaf_template(l_J_hana01_c_00DL, 0xC0, mTplHana01Cut00); + dusk::batch::decode_leaf_template(l_J_hana01_c_01DL, 0x120, mTplHana01Cut); #endif m_deleteRoom = &dFlower_packet_c::deleteRoom; @@ -597,6 +609,371 @@ dFlower_packet_c::dFlower_packet_c() { #endif } +#if TARGET_PC +static void batch_setup_tev(u32 lightMask) { + GXSetCullMode(GX_CULL_NONE); + + GXSetNumChans(2); + GXSetChanCtrl(GX_COLOR0, GX_FALSE, GX_SRC_REG, GX_SRC_VTX, 0, GX_DF_NONE, GX_AF_NONE); + GXSetChanCtrl(GX_COLOR1, GX_TRUE, GX_SRC_VTX, GX_SRC_REG, lightMask, GX_DF_CLAMP, GX_AF_SPOT); + + GXSetNumTevStages(3); + + GXSetTevOrder(GX_TEVSTAGE0, GX_TEXCOORD_NULL, GX_TEXMAP_NULL, GX_COLOR1A1); + GXSetTevColorIn(GX_TEVSTAGE0, GX_CC_ZERO, GX_CC_ZERO, GX_CC_ZERO, GX_CC_RASC); + GXSetTevColorOp(GX_TEVSTAGE0, GX_TEV_ADD, GX_TB_ZERO, GX_CS_SCALE_1, GX_TRUE, GX_TEVPREV); + GXSetTevAlphaIn(GX_TEVSTAGE0, GX_CA_ZERO, GX_CA_ZERO, GX_CA_ZERO, GX_CA_ZERO); + GXSetTevAlphaOp(GX_TEVSTAGE0, GX_TEV_ADD, GX_TB_ZERO, GX_CS_SCALE_1, GX_TRUE, GX_TEVPREV); + + GXSetTevOrder(GX_TEVSTAGE1, GX_TEXCOORD_NULL, GX_TEXMAP_NULL, GX_COLOR0A0); + GXSetTevColorIn(GX_TEVSTAGE1, GX_CC_ZERO, GX_CC_CPREV, GX_CC_RASC, GX_CC_ZERO); + GXSetTevColorOp(GX_TEVSTAGE1, GX_TEV_ADD, GX_TB_ZERO, GX_CS_SCALE_1, GX_TRUE, GX_TEVPREV); + GXSetTevAlphaIn(GX_TEVSTAGE1, GX_CA_ZERO, GX_CA_ZERO, GX_CA_ZERO, GX_CA_ZERO); + GXSetTevAlphaOp(GX_TEVSTAGE1, GX_TEV_ADD, GX_TB_ZERO, GX_CS_SCALE_1, GX_TRUE, GX_TEVPREV); + + GXSetTevOrder(GX_TEVSTAGE2, GX_TEXCOORD0, GX_TEXMAP0, GX_COLOR_NULL); + GXSetTevColorIn(GX_TEVSTAGE2, GX_CC_ZERO, GX_CC_TEXC, GX_CC_CPREV, GX_CC_C0); + GXSetTevColorOp(GX_TEVSTAGE2, GX_TEV_ADD, GX_TB_ZERO, GX_CS_SCALE_4, GX_TRUE, GX_TEVPREV); + GXSetTevAlphaIn(GX_TEVSTAGE2, GX_CA_ZERO, GX_CA_ZERO, GX_CA_ZERO, GX_CA_TEXA); + GXSetTevAlphaOp(GX_TEVSTAGE2, GX_TEV_ADD, GX_TB_ZERO, GX_CS_SCALE_1, GX_TRUE, GX_TEVPREV); +} + +static GXColor hana00_amb_color(const dFlower_data_c* flower, const dKy_tevstr_c* tevstr) { + GXColor amb = {0, 0, 0, 0xFF}; + if (DEBUG && g_kankyoHIO.navy.grass_adjust_ON != 0) { + amb.r = g_kankyoHIO.navy.grass_ambcol.r * 2; + amb.g = g_kankyoHIO.navy.grass_ambcol.g * 2; + amb.b = g_kankyoHIO.navy.grass_ambcol.b * 2; + } else { + amb.r = (flower->field_0x04 & 0x1F) * 2; + amb.g = ((flower->field_0x04 >> 5) & 0x1F) * 2; + amb.b = ((flower->field_0x04 >> 0xA) & 0x1F) * 2; + } + + if (daPy_py_c::checkNowWolfPowerUp()) { + f32 ambRate = g_env_light.bg_amb_col[0].r / 255.0f; + f32 col = (((flower->field_0x04 & 0x1F) * 2 + 0x10)); + amb.r = col * (ambRate * 4.0f); + + ambRate = g_env_light.bg_amb_col[0].g / 255.0f; + f32 col2 = (((flower->field_0x04 >> 5) & 0x1F) * 2 + 0x10); + amb.g = col2 * (4.0f * ambRate); + + ambRate = g_env_light.bg_amb_col[0].b / 255.0f; + f32 col3 = (((flower->field_0x04 >> 10) & 0x1F) * 2 + 0x10); + amb.b = col3 * (4.0f * ambRate); + } + + if (amb.r == 0x3E) { + amb.r = tevstr->AmbCol.r; + } + + if (amb.g == 0x3E) { + amb.g = tevstr->AmbCol.g; + } + + if (amb.b == 0x3E) { + amb.b = tevstr->AmbCol.b; + } + + return amb; +} + +static GXColor hana01_amb_color(int idx, const dKy_tevstr_c* tevstr) { + f32 rRate = tevstr->AmbCol.r * 0.03125f; + if (rRate > 1.0f) { + rRate = 1.0f; + } + + f32 gRate = tevstr->AmbCol.g * 0.03125f; + if (gRate > 1.0f) { + gRate = 1.0f; + } + + f32 bRate = tevstr->AmbCol.b * 0.03125f; + if (bRate > 1.0f) { + bRate = 1.0f; + } + + GXColor amb = {1, 1, 1, 1}; + + GXColor sub; + sub.r = -0.4f * tevstr->AmbCol.r * rRate; + sub.g = -0.4f * tevstr->AmbCol.g * gRate; + sub.b = -0.4f * tevstr->AmbCol.b * bRate; + + switch (idx & 7) { + case 0: + amb.r = tevstr->AmbCol.r + sub.r; + amb.g = tevstr->AmbCol.g; + amb.b = tevstr->AmbCol.b; + break; + case 1: + amb.r = tevstr->AmbCol.r; + amb.g = tevstr->AmbCol.g + sub.g; + amb.b = tevstr->AmbCol.b; + break; + case 2: + amb.r = tevstr->AmbCol.r; + amb.g = tevstr->AmbCol.g; + amb.b = tevstr->AmbCol.b + sub.b; + break; + case 3: + amb.r = tevstr->AmbCol.r + sub.r; + amb.g = tevstr->AmbCol.g + sub.g; + amb.b = tevstr->AmbCol.b; + break; + case 4: + amb.r = tevstr->AmbCol.r; + amb.g = tevstr->AmbCol.g + sub.g; + amb.b = tevstr->AmbCol.b + sub.b; + break; + case 5: + amb.r = tevstr->AmbCol.r + sub.r; + amb.g = tevstr->AmbCol.g; + amb.b = tevstr->AmbCol.b + sub.b; + break; + case 6: + amb.r = tevstr->AmbCol.r + sub.r; + amb.g = tevstr->AmbCol.g + sub.g; + amb.b = tevstr->AmbCol.b + sub.b; + break; + case 7: + break; + } + + if (daPy_py_c::checkNowWolfPowerUp()) { + f32 ambRate = g_env_light.bg_amb_col[0].r / 255.0f; + amb.r = (amb.r + 8) * (6.0f * ambRate); + + ambRate = g_env_light.bg_amb_col[0].g / 255.0f; + amb.g = (amb.g + 8) * (6.0f * ambRate); + + ambRate = g_env_light.bg_amb_col[0].b / 255.0f; + amb.b = (amb.b + 8) * (6.0f * ambRate); + } + + amb.a = 0xFF; + return amb; +} + +static void flower_emit(const dusk::batch::LeafTemplate& tpl, const Vec* xformedPos, GXColor amb) { + for (u32 i = 0; i < tpl.vtxCount; i++) { + const dusk::batch::LeafTemplate::Vtx& v = tpl.vtx[i]; + const Vec& p = xformedPos[v.pos]; + GXPosition3f32(p.x, p.y, p.z); + GXNormal1x8(v.nrm); + GXColor1x8(v.clr); + GXColor4u8(amb.r, amb.g, amb.b, amb.a); + GXTexCoord1x8(v.tex); + } +} + +void dFlower_packet_c::draw() { + ZoneScoped; + dScnKy_env_light_c* kankyo = dKy_getEnvlight(); + j3dSys.reinitGX(); + + GXSetNumIndStages(0); + dKy_setLight_again(); + GXClearVtxDesc(); + GXSetVtxDesc(GX_VA_POS, GX_INDEX8); + GXSetVtxDesc(GX_VA_NRM, GX_INDEX8); + GXSetVtxDesc(GX_VA_CLR0, GX_INDEX8); + GXSetVtxDesc(GX_VA_TEX0, GX_INDEX8); + GXSetVtxAttrFmt(GX_VTXFMT0, GX_VA_POS, GX_POS_XYZ, GX_F32, 0); + GXSetVtxAttrFmt(GX_VTXFMT0, GX_VA_NRM, GX_NRM_XYZ, GX_F32, 0); + GXSetVtxAttrFmt(GX_VTXFMT0, GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8, 0); + GXSetVtxAttrFmt(GX_VTXFMT0, GX_VA_TEX0, GX_TEX_ST, GX_F32, 0); + GXSetVtxAttrFmt(GX_VTXFMT1, GX_VA_POS, GX_POS_XYZ, GX_F32, 0); + GXSetVtxAttrFmt(GX_VTXFMT1, GX_VA_NRM, GX_NRM_XYZ, GX_F32, 0); + GXSetVtxAttrFmt(GX_VTXFMT1, GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8, 0); + GXSetVtxAttrFmt(GX_VTXFMT1, GX_VA_CLR1, GX_CLR_RGBA, GX_RGBA8, 0); + GXSetVtxAttrFmt(GX_VTXFMT1, GX_VA_TEX0, GX_TEX_ST, GX_F32, 0); + GXSETARRAY(GX_VA_POS, &l_flowerPos, sizeof(l_flowerPos), sizeof(Vec), true); + GXSETARRAY(GX_VA_NRM, &l_flowerNormal, sizeof(l_flowerNormal), sizeof(Vec), true); + GXSETARRAY(GX_VA_CLR0, &l_flowerColor, sizeof(l_flowerColor), sizeof(GXColor), true); + GXSETARRAY(GX_VA_TEX0, &l_flowerTexCoord, sizeof(l_flowerTexCoord), 8, true); + + static GXVtxDescList vtxDescList[] = { + {GX_VA_POS, GX_DIRECT}, + {GX_VA_NRM, GX_INDEX8}, + {GX_VA_CLR0, GX_INDEX8}, + {GX_VA_CLR1, GX_DIRECT}, + {GX_VA_TEX0, GX_INDEX8}, + {GX_VA_NULL, GX_NONE}, + }; + static Vec xfPos[256]; + Mtx identity; + MTXIdentity(identity); + + // --- hana00 --- + for (int i = 0; i < 64; i++) { + dFlower_data_c* first = m_room[i].getData(); + if (first == nullptr || !dComIfGp_roomControl_checkStatusFlag(i, 0x10)) { + continue; + } + + dKy_tevstr_c* tevstr = dComIfGp_roomControl_getTevStr(i); + int lightCount = 6; + + if (dComIfGp_roomControl_getStatusRoomDt(i) != nullptr) { + lightCount = dComIfGp_roomControl_getStatusRoomDt(i)->getLightVecInfoNum(); + } + + if (dKy_SunMoon_Light_Check() && lightCount < 2) { + lightCount = 2; + } + + for (int j = 0; j < 6; j++) { + if (kankyo->field_0x0c18[j].field_0x26 == 1) { + lightCount++; + } + } + + if (lightCount <= 2) { + GXCallDisplayList(l_matLight4DL, 0x80); + } else { + GXCallDisplayList(l_matDL, 0x80); + } + + GXSetTevColorS10(GX_TEVREG0, {0, 0, 0, 0}); + dKy_Global_amb_set(tevstr); + dKy_GxFog_tevstr_set(tevstr); + dKy_setLight_nowroom_grass(tevstr->room_no, 1.0f); + + GXLoadTexObj(&mTexObj_l_J_Ohana00_64TEX, GX_TEXMAP0); + batch_setup_tev(lightCount <= 2 ? (GX_LIGHT1 | GX_LIGHT2 | GX_LIGHT3 | GX_LIGHT4) : + (GX_LIGHT1 | GX_LIGHT2 | GX_LIGHT3 | GX_LIGHT4 | + GX_LIGHT5 | GX_LIGHT6 | GX_LIGHT7)); + GXSetVtxDescv(vtxDescList); + GXLoadPosMtxImm(identity, GX_PNMTX0); + GXLoadNrmMtxImm(j3dSys.getViewMtx(), 0); + + for (int bucket = 0; bucket < 2; bucket++) { + const bool cut = bucket != 0; + const dusk::batch::LeafTemplate& tpl = cut ? mTplHana00Cut : mTplHana00; + + bool open = false; + u32 emitted = 0; + for (dFlower_data_c* flower = first; flower != nullptr; flower = flower->mp_next) { + if (cLib_checkBit(flower->m_state, 4) || + cLib_checkBit(flower->m_state, 0x40)) + { + continue; + } + if ((cLib_checkBit(flower->m_state, 8) != 0) != cut) { + continue; + } + + if (!open) { + GXBegin(GX_TRIANGLES, GX_VTXFMT1, GX_AUTO); + open = true; + } + split_batch(emitted, tpl.vtxCount); + + Mtx interpMtx; + MtxP mtx = get_model_mtx(flower->m_modelMtx, interpMtx); + transform_positions(tpl, reinterpret_cast(l_flowerPos), mtx, xfPos); + flower_emit(tpl, xfPos, hana00_amb_color(flower, tevstr)); + } + if (open) { + GXEnd(); + } + } + } + + // --- hana01 --- + GXSETARRAY(GX_VA_POS, mp_pos, sizeof(l_flowerPos2), sizeof(Vec), true); + GXSETARRAY(GX_VA_NRM, &l_flowerNormal2, sizeof(l_flowerNormal2), sizeof(Vec), true); + GXSETARRAY(GX_VA_CLR0, mp_colors, sizeof(l_flowerColor2), sizeof(GXColor), true); + GXSETARRAY(GX_VA_TEX0, mp_texCoords, sizeof(l_flowerTexCoord2), 8, true); + + for (int i = 0; i < 64; i++) { + dFlower_data_c* first = m_room[i].getData(); + if (first == NULL) { + continue; + } + + dKy_tevstr_c* tevstr = dComIfGp_roomControl_getTevStr(i); + int lightCount = 6; + + if (dComIfGp_roomControl_getStatusRoomDt(i) != NULL) { + lightCount = dComIfGp_roomControl_getStatusRoomDt(i)->getLightVecInfoNum(); + } + +#if DEBUG + if (g_kankyoHIO.light.m_HOSTIO_setting != 0) { + lightCount = g_kankyoHIO.dungeonLight.usedLights; + } +#endif + + if (dKy_SunMoon_Light_Check() == TRUE && lightCount < 2) { + lightCount = 2; + } + + if (lightCount <= 2) { + GXCallDisplayList(mp_mat2Light4DL, m_mat2Light4DL_size); + } else { + GXCallDisplayList(mp_mat2DL, m_mat2DL_size); + } + + GXSetTevColorS10(GX_TEVREG0, {0, 0, 0, 0}); + dKy_Global_amb_set(tevstr); + dKy_GxFog_tevstr_set(tevstr); + dKy_setLight_nowroom_grass(tevstr->room_no, 1.0f); + + GXLoadTexObj(&mTexObj_l_J_Ohana01_64128_0419TEX, GX_TEXMAP0); + batch_setup_tev(lightCount <= 2 ? (GX_LIGHT1 | GX_LIGHT2 | GX_LIGHT3 | GX_LIGHT4) : + (GX_LIGHT1 | GX_LIGHT2 | GX_LIGHT3 | GX_LIGHT4 | + GX_LIGHT5 | GX_LIGHT6 | GX_LIGHT7)); + GXSetVtxDescv(vtxDescList); + GXLoadPosMtxImm(identity, GX_PNMTX0); + GXLoadNrmMtxImm(j3dSys.getViewMtx(), 0); + + const dusk::batch::LeafTemplate* const buckets[3] = { + &mTplHana01, &mTplHana01Cut00, &mTplHana01Cut}; + for (int bucket = 0; bucket < 3; bucket++) { + const dusk::batch::LeafTemplate& tpl = *buckets[bucket]; + + bool open = false; + u32 emitted = 0; + int idx = 0; + for (dFlower_data_c* flower = first; flower != NULL; flower = flower->mp_next, idx++) { + if (cLib_checkBit(flower->m_state, 4) || + !cLib_checkBit(flower->m_state, 0x40)) + { + continue; + } + const int flowerBucket = cLib_checkBit(flower->m_state, 8) ? 2 : + cLib_checkBit(flower->m_state, 0x10) ? 1 : + 0; + if (flowerBucket != bucket) { + continue; + } + + if (!open) { + GXBegin(GX_TRIANGLES, GX_VTXFMT1, GX_AUTO); + open = true; + } + split_batch(emitted, tpl.vtxCount); + + Mtx interpMtx; + MtxP mtx = get_model_mtx(flower->m_modelMtx, interpMtx); + transform_positions(tpl, mp_pos, mtx, xfPos); + flower_emit(tpl, xfPos, hana01_amb_color(idx, tevstr)); + } + if (open) { + GXEnd(); + } + } + } + + GXSetNumTevStages(1); + GXSetNumChans(1); + J3DShape::resetVcdVatCache(); +} +#else void dFlower_packet_c::draw() { ZoneScoped; dScnKy_env_light_c* kankyo = dKy_getEnvlight(); @@ -886,6 +1263,7 @@ void dFlower_packet_c::draw() { J3DShape::resetVcdVatCache(); } +#endif void dFlower_packet_c::calc() { dFlower_anm_c* anm_p = getAnm(); diff --git a/src/d/actor/d_grass.inc b/src/d/actor/d_grass.inc index 8b94368a34..9e8c360518 100644 --- a/src/d/actor/d_grass.inc +++ b/src/d/actor/d_grass.inc @@ -512,11 +512,366 @@ dGrass_packet_c::dGrass_packet_c() { m_Mkusa_9q_cDL_size = 0xC0; field_0x1d714 = 0; +#if TARGET_PC + dusk::batch::decode_leaf_template(mp_Mkusa_9q_DL, m_Mkusa_9q_DL_size, mTplKusa9q); + dusk::batch::decode_leaf_template(mp_Mkusa_9q_cDL, m_Mkusa_9q_cDL_size, mTplKusa9qCut); + dusk::batch::decode_leaf_template(l_M_TenGusaDL, 0xC0, mTplTengusa); +#endif + OS_REPORT("草群メモリ=%fK\n", 117.7734375f); m_deleteRoom = &dGrass_packet_c::deleteRoom; } +#if TARGET_PC +static MtxP get_model_mtx(Mtx modelMtx, Mtx storage) { + if (dusk::frame_interp::lookup_replacement(modelMtx, storage)) { + cMtx_concat(j3dSys.getViewMtx(), storage, storage); + return storage; + } + return modelMtx; +} + +static void transform_positions( + const dusk::batch::LeafTemplate& tpl, const Vec* posArray, const Mtx mtx, Vec* xfPos) { + for (u32 i = 0; i < tpl.posRefCount; i++) { + const u8 idx = tpl.posRefs[i]; + MTXMultVec(mtx, &posArray[idx], &xfPos[idx]); + } +} + +static void split_batch(u32& emitted, u32 vtxCount) { + if (emitted + vtxCount > 0xFFFF) { + GXEnd(); + GXBegin(GX_TRIANGLES, GX_VTXFMT1, GX_AUTO); + emitted = 0; + } + emitted += vtxCount; +} + +static GXColor blade_amb_color(const dGrass_data_c* blade, const dKy_tevstr_c* tevstr) { + GXColor amb; + amb.a = 0; + +#if DEBUG + if (g_kankyoHIO.navy.grass_adjust_ON) { + amb.r = g_kankyoHIO.navy.grass_ambcol.r * 2; + amb.g = g_kankyoHIO.navy.grass_ambcol.g * 2; + amb.b = g_kankyoHIO.navy.grass_ambcol.b * 2; + return amb; + } +#endif + + amb.r = (blade->m_addCol & 0x1F) * 2; + amb.g = ((blade->m_addCol >> 5) & 0x1F) * 2; + amb.b = ((blade->m_addCol >> 0xA) & 0x1F) * 2; + + if (daPy_py_c::checkNowWolfPowerUp()) { + f32 ambRate = g_env_light.bg_amb_col[0].r / 255.0f; + f32 col = (((blade->m_addCol & 0x1F) * 2 + 0x10)); + amb.r = col * (ambRate * 4.0f); + + ambRate = g_env_light.bg_amb_col[0].g / 255.0f; + f32 col2 = (((blade->m_addCol >> 5) & 0x1F) * 2 + 0x10); + amb.g = col2 * (4.0f * ambRate); + + ambRate = g_env_light.bg_amb_col[0].b / 255.0f; + f32 col3 = (((blade->m_addCol >> 10) & 0x1F) * 2 + 0x10); + amb.b = col3 * (4.0f * ambRate); + } + + f32 roomAmbScale = 1.0f - (static_cast(blade->m_pos.x) & 0xFF) * 0.001953125f; + f32 colScale = 1.1f - (static_cast(static_cast(blade->m_pos.x)) & 0xFF) / 2000.0f; + colScale -= (static_cast(blade->m_pos.z) & 0xFF) / 2000.0f; + + if (colScale > 1.0f) { + colScale = 1.0f; + } + + if (amb.r == 0x3E) { + amb.r = tevstr->AmbCol.r * roomAmbScale; + } else { + amb.r = amb.r * colScale; + } + + if (amb.g == 0x3E) { + amb.g = tevstr->AmbCol.g * roomAmbScale; + } else { + amb.g = amb.g * colScale; + } + + if (amb.b == 0x3E) { + amb.b = tevstr->AmbCol.b * roomAmbScale; + } else { + amb.b = amb.b * colScale; + } + + return amb; +} + +static void blade_emit(const dusk::batch::LeafTemplate& tpl, const Vec* xformedPos, + const GXColor* colors, GXColor amb) { + for (u32 i = 0; i < tpl.vtxCount; i++) { + const dusk::batch::LeafTemplate::Vtx& v = tpl.vtx[i]; + const Vec& p = xformedPos[v.pos]; + GXPosition3f32(p.x, p.y, p.z); + GXNormal1x8(v.nrm); + GXColor4u8(amb.r, amb.g, amb.b, colors[v.clr].a); + GXTexCoord1x8(v.tex); + } +} + +void dGrass_packet_c::draw() { + ZoneScoped; + dScnKy_env_light_c* kankyo = dKy_getEnvlight(); + + j3dSys.reinitGX(); + GXSetNumIndStages(0); + dKy_setLight_again(); + GXClearVtxDesc(); + + static GXVtxDescList l_vtxDescList[] = { + {GX_VA_POS, GX_INDEX8}, + {GX_VA_NRM, GX_INDEX8}, + {GX_VA_CLR0, GX_INDEX8}, + {GX_VA_TEX0, GX_INDEX8}, + {GX_VA_NULL, GX_NONE}, + }; + + static GXVtxDescList l_batchVtxDescList[] = { + {GX_VA_POS, GX_DIRECT}, + {GX_VA_NRM, GX_INDEX8}, + {GX_VA_CLR0, GX_DIRECT}, + {GX_VA_TEX0, GX_INDEX8}, + {GX_VA_NULL, GX_NONE}, + }; + + GXSetVtxDescv(l_vtxDescList); + GXSetVtxAttrFmt(GX_VTXFMT0, GX_VA_POS, GX_POS_XYZ, GX_F32, 0); + GXSetVtxAttrFmt(GX_VTXFMT0, GX_VA_NRM, GX_NRM_XYZ, GX_F32, 0); + GXSetVtxAttrFmt(GX_VTXFMT0, GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8, 0); + GXSetVtxAttrFmt(GX_VTXFMT0, GX_VA_TEX0, GX_TEX_ST, GX_F32, 0); + GXSetVtxAttrFmt(GX_VTXFMT1, GX_VA_POS, GX_POS_XYZ, GX_F32, 0); + GXSetVtxAttrFmt(GX_VTXFMT1, GX_VA_NRM, GX_NRM_XYZ, GX_F32, 0); + GXSetVtxAttrFmt(GX_VTXFMT1, GX_VA_CLR0, GX_CLR_RGBA, GX_RGBA8, 0); + GXSetVtxAttrFmt(GX_VTXFMT1, GX_VA_TEX0, GX_TEX_ST, GX_F32, 0); + GXSETARRAY(GX_VA_POS, mp_pos, sizeof(l_pos), sizeof(Vec), true); + GXSETARRAY(GX_VA_NRM, mp_normal, sizeof(l_normal), sizeof(Vec), true); + GXSETARRAY(GX_VA_CLR0, mp_colors, sizeof(l_color), sizeof(GXColor), true); + GXSETARRAY(GX_VA_TEX0, mp_texCoords, sizeof(l_texCoord), 8, true); + + GXColorS10 reg1 = {0, 0, 0, 0}; + + // daytime "shine" alpha curve (TEVREG1 alpha) + f32 daytime = g_env_light.getDaytime(); + f32 ratio; + f32 shine; + if (daytime >= 90.0f && daytime < 135.0f) { + ratio = 1.0f - (0.022222223f * (135.0f - daytime)); + shine = 100.0f - (18.0f * ratio); + } else if (daytime >= 135.0f && daytime < 225.0f) { + ratio = 1.0f - (0.011111111f * (225.0f - daytime)); + shine = 82.0f - (25.0f * ratio); + } else if (daytime >= 225.0f && daytime < 270.0f) { + ratio = 1.0f - (0.022222223f * (270.0f - daytime)); + shine = 57.0f - (-25.0f * ratio); + } else if (daytime >= 270.0f && daytime < 315.0f) { + ratio = (1.0f - (0.022222223f * (315.0f - daytime))); + shine = 82.0f - (-18.0f * ratio); + } else { + shine = 100.0f; + } + +#if DEBUG + if (g_kankyoHIO.navy.grass_shine_value != 0.0f) { + shine = g_kankyoHIO.navy.grass_shine_value; + } +#endif + + static Vec xfPos[256]; + Mtx identity; + PSMTXIdentity(identity); + + for (int i = 0; i < 64; i++) { + dGrass_data_c* first = m_room[i].getData(); + if (first == NULL || !dComIfGp_roomControl_checkStatusFlag(i, 0x10)) { + continue; + } + + int lightCount = 6; + dKy_tevstr_c* tevstr = dComIfGp_roomControl_getTevStr(i); + + f32 lightInf = g_env_light.grass_light_inf_rate * g_env_light.bg_light_influence; + lightInf += 0.5f * (1.0f - lightInf); + + J3DLightInfo* lightInfo = tevstr->mLights[0].getLightInfo(); + reg1.r = lightInfo->mColor.r * lightInf; + reg1.g = lightInfo->mColor.g * lightInf; + reg1.b = lightInfo->mColor.b * lightInf; + reg1.a = shine; + if (memcmp(dComIfGp_getStartStageName(), "D_MN01", 6) == 0) { + reg1.r = 0; + reg1.g = 0x1E; + reg1.b = 5; + reg1.a = 0x50; + } + GFSetTevColorS10(GX_TEVREG1, reg1); + + if (dComIfGp_roomControl_getStatusRoomDt(i) != nullptr) { + lightCount = dComIfGp_roomControl_getStatusRoomDt(i)->getLightVecInfoNum(); + } + +#if DEBUG + if (g_kankyoHIO.light.m_HOSTIO_setting != 0) { + lightCount = g_kankyoHIO.dungeonLight.usedLights; + } +#endif + + if (dKy_SunMoon_Light_Check() == TRUE && lightCount < 2) { + lightCount = 2; + } + + for (int j = 0; j < 6; j++) { + if (kankyo->field_0x0c18[j].field_0x26 == 1) { + lightCount++; + } + } + + // room-level setup + if (first->field_0x05 <= 3 || first->field_0x05 >= 10) { + GXLoadTexObj(&mTexObj_l_M_kusa05_RGBATEX, GX_TEXMAP0); + if (lightCount <= 3) { + GXCallDisplayList(mp_kusa9q_14_DL, m_kusa9q_DL_14_size); + } else { + GXCallDisplayList(mp_kusa9q_DL, m_kusa9q_DL_size); + } + } else { + GXLoadTexObj(&mTexObj_l_M_Hijiki00TEX, GX_TEXMAP0); + GXCallDisplayList(l_Tengusa_matDL, 0xA0); + } + + GFSetTevColorS10(GX_TEVREG2, {0, 0, 0, 0}); + + dKy_Global_amb_set(tevstr); + dKy_GfFog_tevstr_set(tevstr); + dKy_setLight_nowroom_grass(tevstr->room_no, 0.0f); + + GXSetVtxDescv(l_batchVtxDescList); + GXLoadPosMtxImm(identity, GX_PNMTX0); + GXLoadNrmMtxImm(j3dSys.getViewMtx(), 0); + + // buckets: (kusa05 vs tengusa) x (standing vs cut) + bool hasRegrowing = false; + for (int bucket = 0; bucket < 4; bucket++) { + const bool kusaTex = bucket < 2; + const bool cut = (bucket & 1) != 0; + const dusk::batch::LeafTemplate& tpl = + cut ? mTplKusa9qCut : (kusaTex ? mTplKusa9q : mTplTengusa); + + bool open = false; + u32 emitted = 0; + for (dGrass_data_c* blade = first; blade != NULL; blade = blade->mp_next) { + if (cLib_checkBit(blade->field_0x01, 2)) { + continue; // clipped + } + if (blade->field_0x02 < -1) { + hasRegrowing = true; + continue; + } + const bool bladeKusaTex = blade->field_0x05 <= 3 || blade->field_0x05 >= 10; + if (bladeKusaTex != kusaTex || (blade->field_0x02 < 0) != cut) { + continue; + } + + if (!open) { + if (kusaTex) { + GXLoadTexObj(&mTexObj_l_M_kusa05_RGBATEX, GX_TEXMAP0); + if (lightCount <= 2) { + GXCallDisplayList(mp_kusa9q_14_DL, m_kusa9q_DL_14_size); + } else { + GXCallDisplayList(mp_kusa9q_DL, m_kusa9q_DL_size); + } + } else { + GXLoadTexObj(&mTexObj_l_M_Hijiki00TEX, GX_TEXMAP0); + GXCallDisplayList(l_Tengusa_matDL, 0xA0); + } + // change amb_src to GX_SRC_VTX + const u32 lightMask = + (kusaTex && lightCount <= 2) + ? (GX_LIGHT1 | GX_LIGHT2 | GX_LIGHT3 | GX_LIGHT4) + : (GX_LIGHT1 | GX_LIGHT2 | GX_LIGHT3 | GX_LIGHT4 | GX_LIGHT5 | + GX_LIGHT6 | GX_LIGHT7); + GXSetChanCtrl(GX_COLOR0, GX_TRUE, GX_SRC_VTX, GX_SRC_REG, lightMask, + GX_DF_CLAMP, GX_AF_SPOT); + reg1.a = cut ? 0 : shine; + GFSetTevColorS10(GX_TEVREG1, reg1); + GXBegin(GX_TRIANGLES, GX_VTXFMT1, GX_AUTO); + open = true; + } + + split_batch(emitted, tpl.vtxCount); + + Mtx interpMtx; + MtxP mtx = get_model_mtx(blade->m_modelMtx, interpMtx); + transform_positions(tpl, mp_pos, mtx, xfPos); + blade_emit(tpl, xfPos, mp_colors, blade_amb_color(blade, tevstr)); + } + if (open) { + GXEnd(); + } + } + + // regrowing blades have per-blade TEVREG2 alpha + // draw them with the original immediate path + if (hasRegrowing) { + GXSetVtxDescv(l_vtxDescList); + for (dGrass_data_c* blade = first; blade != NULL; blade = blade->mp_next) { + if (blade->field_0x02 >= -1 || cLib_checkBit(blade->field_0x01, 2)) { + continue; + } + + const bool kusaTex = blade->field_0x05 <= 3 || blade->field_0x05 >= 10; + if (kusaTex) { + GXLoadTexObj(&mTexObj_l_M_kusa05_RGBATEX, GX_TEXMAP0); + if (lightCount <= 2) { + GXCallDisplayList(mp_kusa9q_14_DL, m_kusa9q_DL_14_size); + } else { + GXCallDisplayList(mp_kusa9q_DL, m_kusa9q_DL_size); + } + } else { + GXLoadTexObj(&mTexObj_l_M_Hijiki00TEX, GX_TEXMAP0); + GXCallDisplayList(l_Tengusa_matDL, 0xA0); + } + + reg1.a = 0; + GFSetTevColorS10(GX_TEVREG1, reg1); + GXSetChanAmbColor(GX_COLOR0A0, blade_amb_color(blade, tevstr)); + + Mtx modelMtx; + GXLoadPosMtxImm(get_model_mtx(blade->m_modelMtx, modelMtx), GX_PNMTX0); + GXLoadNrmMtxImm(j3dSys.getViewMtx(), 0); + + GFSetTevColorS10(GX_TEVREG2, + {0, 0, 0, static_cast(-0x100 - (blade->field_0x02 << 8) / 40)}); + + if (blade->field_0x02 != -2) { + if (kusaTex) { + GXCallDisplayList(mp_Mkusa_9q_DL, m_Mkusa_9q_DL_size); + } else { + GXCallDisplayList(l_M_TenGusaDL, 0xC0); + } + } else { + GXCallDisplayList(mp_Mkusa_9q_cDL, m_Mkusa_9q_cDL_size); + } + + GFSetTevColorS10(GX_TEVREG2, {0, 0, 0, 0}); + } + } + } + + J3DShape::resetVcdVatCache(); +} +#else void dGrass_packet_c::draw() { ZoneScoped; dScnKy_env_light_c* kankyo = dKy_getEnvlight(); @@ -811,6 +1166,7 @@ void dGrass_packet_c::draw() { J3DShape::resetVcdVatCache(); } +#endif void dGrass_packet_c::calc() { cXyz* temp_r29 = dKyw_get_wind_vec(); diff --git a/src/dusk/batch.cpp b/src/dusk/batch.cpp new file mode 100644 index 0000000000..04cba52b0d --- /dev/null +++ b/src/dusk/batch.cpp @@ -0,0 +1,72 @@ +#include "dusk/batch.hpp" +#include "dusk/logging.h" + +#include +#include + +namespace dusk::batch { + +void decode_leaf_template(const u8* dl, u32 size, LeafTemplate& out) { + out.vtxCount = 0; + out.posRefCount = 0; + bool posSeen[256] = {}; + + static constexpr GXVtxDescList kLeafDesc[] = { + {GX_VA_POS, GX_INDEX8}, + {GX_VA_NRM, GX_INDEX8}, + {GX_VA_CLR0, GX_INDEX8}, + {GX_VA_TEX0, GX_INDEX8}, + {GX_VA_NULL, GX_NONE}, + }; + + aurora::gx::dl::Reader reader{dl, size, kLeafDesc}; + while (const auto cmd = reader.next()) { + if (cmd->kind == aurora::gx::dl::Command::Kind::Passthrough) { + if (cmd->data[0] != GX_NOP) { + DuskLog.fatal("decode_leaf_template: unexpected opcode {:#x}", cmd->data[0]); + } + continue; + } + if (cmd->kind != aurora::gx::dl::Command::Kind::Draw) { + DuskLog.fatal("decode_leaf_template: unexpected pre-optimized draw"); + } + + const auto& draw = cmd->draw; + bool overflow = false; + const bool expanded = + aurora::gx::dl::expand_triangles(draw.prim, draw.vtxCount, [&](u16 i0, u16 i1, u16 i2) { + if (overflow || out.vtxCount + 3 > LeafTemplate::kMaxVtx) { + overflow = true; + return; + } + for (const u16 elem : {i0, i1, i2}) { + LeafTemplate::Vtx& v = out.vtx[out.vtxCount++]; + v.pos = draw.attr_idx(elem, GX_VA_POS); + v.nrm = draw.attr_idx(elem, GX_VA_NRM); + v.clr = draw.attr_idx(elem, GX_VA_CLR0); + v.tex = draw.attr_idx(elem, GX_VA_TEX0); + if (!posSeen[v.pos]) { + posSeen[v.pos] = true; + if (out.posRefCount >= LeafTemplate::kMaxPosRefs) { + overflow = true; + return; + } + out.posRefs[out.posRefCount++] = v.pos; + } + } + }); + if (!expanded) { + DuskLog.fatal("decode_leaf_template: untriangulable draw (prim {:#x}, {} verts)", + static_cast(draw.prim), draw.vtxCount); + } + if (overflow) { + DuskLog.fatal("decode_leaf_template: template overflow ({} verts, {} positions)", + out.vtxCount, out.posRefCount); + } + } + if (reader.failed()) { + DuskLog.fatal("decode_leaf_template: failed to walk display list"); + } +} + +} // namespace dusk::batch diff --git a/src/dusk/batch.hpp b/src/dusk/batch.hpp new file mode 100644 index 0000000000..2569f27761 --- /dev/null +++ b/src/dusk/batch.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include + +namespace dusk::batch { + +struct LeafTemplate { + static constexpr u32 kMaxVtx = 192; + static constexpr u32 kMaxPosRefs = 64; + + struct Vtx { + u8 pos; + u8 nrm; + u8 clr; + u8 tex; + }; + Vtx vtx[kMaxVtx]; + u16 vtxCount = 0; + u8 posRefs[kMaxPosRefs]; + u8 posRefCount = 0; +}; + +void decode_leaf_template(const u8* dl, u32 size, LeafTemplate& out); + +} // namespace dusk