From aa23ae244fff6fb8761c5c949c3d2bec5f7065cb Mon Sep 17 00:00:00 2001 From: Lurs <2795933+Lurs@users.noreply.github.com> Date: Sun, 26 Apr 2026 11:38:14 +0200 Subject: [PATCH] Lurs' squashed commits * first tphd wip * fix CMakeLists.txt after rebase * fix mipmapping (I hope) thanks to decaf-emu. Skipped a few textures in favor of GC assets and added new HD asset file formats * added third hook into dusk for second JKRMemArchive constructor (e.g. for sign textures). skip texture load for textures with imageoffset = 0 to get STG.arc loaded instead. And small refactorings/rebasings. added a few parameters in logging --- .gitmodules | 2 +- CMakeLists.txt | 22 +- extern/aurora | 2 +- files.cmake | 12 + include/dusk/settings.h | 3 + .../J3DGraphLoader/J3DMaterialFactory.h | 31 + .../src/J3DGraphLoader/J3DMaterialFactory.cpp | 12 + .../src/J3DGraphLoader/J3DModelLoader.cpp | 25 +- .../J3DGraphLoader/J3DModelLoaderCalcSize.cpp | 11 +- libs/JSystem/src/JKernel/JKRArchivePub.cpp | 13 + libs/JSystem/src/JKernel/JKRMemArchive.cpp | 13 + src/d/actor/d_a_bg_obj.cpp | 12 + src/d/d_resorce.cpp | 27 + src/dusk/file_select.cpp | 17 + src/dusk/file_select.hpp | 3 + src/dusk/settings.cpp | 6 + src/dusk/tphd/AddrLib.cpp | 435 ++++++++++++ src/dusk/tphd/AddrLib.hpp | 57 ++ src/dusk/tphd/GtxParser.cpp | 89 +++ src/dusk/tphd/GtxParser.hpp | 79 +++ src/dusk/tphd/HdAssetLayer.cpp | 617 ++++++++++++++++++ src/dusk/tphd/HdAssetLayer.hpp | 30 + src/dusk/tphd/TphdPack.cpp | 120 ++++ src/dusk/tphd/TphdPack.hpp | 59 ++ src/f_op/f_op_actor_mng.cpp | 5 +- src/m_Do/m_Do_dvd_thread.cpp | 12 + src/m_Do/m_Do_machine.cpp | 14 + src/m_Do/m_Do_main.cpp | 20 + 28 files changed, 1740 insertions(+), 8 deletions(-) create mode 100644 src/dusk/tphd/AddrLib.cpp create mode 100644 src/dusk/tphd/AddrLib.hpp create mode 100644 src/dusk/tphd/GtxParser.cpp create mode 100644 src/dusk/tphd/GtxParser.hpp create mode 100644 src/dusk/tphd/HdAssetLayer.cpp create mode 100644 src/dusk/tphd/HdAssetLayer.hpp create mode 100644 src/dusk/tphd/TphdPack.cpp create mode 100644 src/dusk/tphd/TphdPack.hpp diff --git a/.gitmodules b/.gitmodules index b386c1754a..7dcdc438ef 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,3 @@ [submodule "extern/aurora"] path = extern/aurora - url = https://github.com/encounter/aurora.git + url = https://github.com/Lurs/aurora.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 6956574919..b00aa1201f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -101,6 +101,10 @@ endif () set(AURORA_ENABLE_DVD ON CACHE BOOL "Enable DVD API support" FORCE) set(AURORA_ENABLE_CARD ON CACHE BOOL "Enable CARD API support" FORCE) set(AURORA_ENABLE_RMLUI ON CACHE BOOL "Enable RmlUi UI support" FORCE) +# Force Freetype (pulled by aurora/RmlUi) to use its bundled mini-zlib. +# Otherwise its find_package(ZLIB) latches onto our FetchContent'd shared zlib, +# duplicating inflate*/inflateInit2_ symbols against zlibstatic. +set(FT_DISABLE_ZLIB TRUE CACHE BOOL "" FORCE) add_subdirectory(extern/aurora EXCLUDE_FROM_ALL) target_compile_definitions(aurora_mtx PRIVATE MTX_USE_PS=1) @@ -115,6 +119,7 @@ else () set(DUSK_ENABLE_UPDATE_CHECKER_DEFAULT ON) endif () option(DUSK_ENABLE_UPDATE_CHECKER "Enable update checking support" ${DUSK_ENABLE_UPDATE_CHECKER_DEFAULT}) +option(DUSK_TPHD "Enable Twilight Princess HD asset support" ON) if(ANDROID) set(DUSK_MOVIE_SUPPORT OFF) @@ -218,7 +223,6 @@ elseif (MSVC) add_compile_options($<$:/utf-8>) endif () - include(FetchContent) # Declare all dependencies first so CMake can download them in parallel @@ -234,7 +238,17 @@ FetchContent_Declare(json URL_HASH SHA256=42f6e95cad6ec532fd372391373363b62a14af6d771056dbfc86160e6dfff7aa DOWNLOAD_EXTRACT_TIMESTAMP TRUE ) -FetchContent_MakeAvailable(cxxopts json) +message(STATUS "dusk: Fetching zlib") +FetchContent_Declare(zlib + URL https://github.com/madler/zlib/releases/download/v1.3.2/zlib-1.3.2.tar.gz + URL_HASH SHA256=bb329a0a2cd0274d05519d61c667c062e06990d72e125ee2dfa8de64f0119d16 + DOWNLOAD_EXTRACT_TIMESTAMP TRUE +) +FetchContent_MakeAvailable(cxxopts json zlib) + +if (NOT TARGET zlibstatic) + set(ZLIB_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE) +endif () if (DUSK_ENABLE_SENTRY_NATIVE) message(STATUS "dusk: Fetching sentry-native") @@ -293,6 +307,9 @@ source_group("dolzel" FILES ${DOLZEL_FILES} ${Z2AUDIOLIB_FILES} ${REL_FILES}) source_group("dusk" FILES ${DUSK_FILES} ${DUSK_HTTP_BACKEND_FILES}) set(GAME_COMPILE_DEFS TARGET_PC WIDESCREEN_SUPPORT=1 AVOID_UB=1 VERSION=0 MTX_USE_PS=1) +if (DUSK_TPHD) + list(APPEND GAME_COMPILE_DEFS DUSK_TPHD=1) +endif () set(GAME_INCLUDE_DIRS include @@ -310,6 +327,7 @@ set(GAME_LIBS aurora::core aurora::gx aurora::gd aurora::si aurora::vi aurora::p Threads::Threads) list(APPEND GAME_LIBS libzstd_static) +list(APPEND GAME_LIBS zlibstatic) if (DUSK_ENABLE_SENTRY_NATIVE) list(APPEND GAME_LIBS sentry) diff --git a/extern/aurora b/extern/aurora index 17be93f0ae..2355678c4f 160000 --- a/extern/aurora +++ b/extern/aurora @@ -1 +1 @@ -Subproject commit 17be93f0ae011fc3202e87e3f2efda4aae250fa5 +Subproject commit 2355678c4f8a543325b83622aeab4a262566ee04 diff --git a/files.cmake b/files.cmake index 53aa7a2636..bb6c76cb1b 100644 --- a/files.cmake +++ b/files.cmake @@ -246,6 +246,18 @@ set(DOLZEL_FILES if(DEBUG) list(APPEND DOLZEL_FILES src/d/d_event_debug.cpp) endif(DEBUG) +if (DUSK_TPHD) + list(APPEND DOLZEL_FILES + src/dusk/tphd/TphdPack.hpp + src/dusk/tphd/TphdPack.cpp + src/dusk/tphd/GtxParser.hpp + src/dusk/tphd/GtxParser.cpp + src/dusk/tphd/AddrLib.hpp + src/dusk/tphd/AddrLib.cpp + src/dusk/tphd/HdAssetLayer.hpp + src/dusk/tphd/HdAssetLayer.cpp + ) +endif(DUSK_TPHD) set(Z2AUDIOLIB_FILES src/Z2AudioLib/Z2Calc.cpp diff --git a/include/dusk/settings.h b/include/dusk/settings.h index 78c5540e58..56d2450263 100644 --- a/include/dusk/settings.h +++ b/include/dusk/settings.h @@ -175,6 +175,9 @@ struct UserSettings { ConfigVar checkForUpdates; ConfigVar cardFileType; ConfigVar enableAdvancedSettings; +#if DUSK_TPHD + ConfigVar hdContentPath; // path to TP-HD decrypted "content" folder +#endif } backend; }; diff --git a/libs/JSystem/include/JSystem/J3DGraphLoader/J3DMaterialFactory.h b/libs/JSystem/include/JSystem/J3DGraphLoader/J3DMaterialFactory.h index 9bdaba4655..2820ca1e90 100644 --- a/libs/JSystem/include/JSystem/J3DGraphLoader/J3DMaterialFactory.h +++ b/libs/JSystem/include/JSystem/J3DGraphLoader/J3DMaterialFactory.h @@ -85,6 +85,33 @@ struct J3DDisplayListInit { /* 0x4 */ BE(u32) field_0x4; }; // size 8 +#if DUSK_TPHD +// MAT4 material-entry blocks have 2 trailing bytes per entry compared to MAT3. +// This strided view skips over the extras at indexing time, leaving the +// in-memory data untouched. +class J3DMaterialInitDataView { +public: + J3DMaterialInitDataView() : mpData(NULL), mStride(sizeof(J3DMaterialInitData)) {} + + void set(const void* data, u32 stride) { + mpData = (u8*)data; + mStride = stride; + } + + J3DMaterialInitData& operator[](int idx) { + return *(J3DMaterialInitData*)(mpData + (idx * mStride)); + } + + J3DMaterialInitData& operator[](int idx) const { + return *(J3DMaterialInitData*)(mpData + (idx * mStride)); + } + +private: + u8* mpData; + u32 mStride; +}; +#endif + struct J3DTexCoord2Info; class J3DCurrentMtxInfo; @@ -146,7 +173,11 @@ public: u8 getMaterialMode(int idx) const { return mpMaterialInitData[mpMaterialID[idx]].mMaterialMode; } /* 0x00 */ u16 mMaterialNum; +#if DUSK_TPHD + /* 0x04 */ J3DMaterialInitDataView mpMaterialInitData; +#else /* 0x04 */ J3DMaterialInitData* mpMaterialInitData; +#endif /* 0x08 */ BE(u16)* mpMaterialID; /* 0x0C */ J3DIndInitData* mpIndInitData; /* 0x10 */ GXColor* mpMatColor; diff --git a/libs/JSystem/src/J3DGraphLoader/J3DMaterialFactory.cpp b/libs/JSystem/src/J3DGraphLoader/J3DMaterialFactory.cpp index 7aa4d31cd4..5aabde57aa 100644 --- a/libs/JSystem/src/J3DGraphLoader/J3DMaterialFactory.cpp +++ b/libs/JSystem/src/J3DGraphLoader/J3DMaterialFactory.cpp @@ -13,7 +13,15 @@ J3DMaterialFactory::J3DMaterialFactory(J3DMaterialBlock const& i_block) { mMaterialNum = i_block.mMaterialNum; +#if DUSK_TPHD + u32 material_init_data_size = sizeof(J3DMaterialInitData); + if (i_block.mBlockType == 'MAT4') { + material_init_data_size += sizeof(u16); + } + mpMaterialInitData.set(JSUConvertOffsetToPtr(&i_block, i_block.mpMaterialInitData), material_init_data_size); +#else mpMaterialInitData = JSUConvertOffsetToPtr(&i_block, i_block.mpMaterialInitData); +#endif mpMaterialID = JSUConvertOffsetToPtr(&i_block, i_block.mpMaterialID); if (i_block.mpIndInitData != (uintptr_t)NULL && (uintptr_t)i_block.mpIndInitData - (uintptr_t)i_block.mpNameTable > 4) { mpIndInitData = JSUConvertOffsetToPtr(&i_block, i_block.mpIndInitData); @@ -54,7 +62,11 @@ J3DMaterialFactory::J3DMaterialFactory(J3DMaterialBlock const& i_block) { J3DMaterialFactory::J3DMaterialFactory(J3DMaterialDLBlock const& i_block) { mMaterialNum = i_block.mMaterialNum; +#if DUSK_TPHD + mpMaterialInitData.set(NULL, sizeof(J3DMaterialInitData)); +#else mpMaterialInitData = NULL; +#endif mpDisplayListInit = JSUConvertOffsetToPtr(&i_block, i_block.mpDisplayListInit); mpPatchingInfo = JSUConvertOffsetToPtr(&i_block, i_block.mpPatchingInfo); mpCurrentMtxInfo = JSUConvertOffsetToPtr(&i_block, i_block.mpCurrentMtxInfo); diff --git a/libs/JSystem/src/J3DGraphLoader/J3DModelLoader.cpp b/libs/JSystem/src/J3DGraphLoader/J3DModelLoader.cpp index c3c72310a0..a5409cd137 100644 --- a/libs/JSystem/src/J3DGraphLoader/J3DModelLoader.cpp +++ b/libs/JSystem/src/J3DGraphLoader/J3DModelLoader.cpp @@ -103,6 +103,9 @@ J3DModelData* J3DModelLoader::load(void const* i_data, u32 i_flags) { readJoint((J3DJointBlock*)block); break; case 'MAT3': +#if DUSK_TPHD + case 'MAT4': // TODO: Real MAT4 support +#endif readMaterial((J3DMaterialBlock*)block, (s32)i_flags); break; case 'MAT2': @@ -147,6 +150,9 @@ J3DMaterialTable* J3DModelLoader::loadMaterialTable(void const* i_data) { for (u32 block_no = 0; block_no < data->mBlockNum; block_no++) { switch (block->mBlockType) { case 'MAT3': +#if DUSK_TPHD + case 'MAT4': // TODO: Real MAT4 support +#endif readMaterialTable((J3DMaterialBlock*)block, flags); break; case 'MAT2': @@ -212,6 +218,9 @@ J3DModelData* J3DModelLoader::loadBinaryDisplayList(void const* i_data, u32 i_fl modifyMaterial(i_flags); break; case 'MAT3': +#if DUSK_TPHD + case 'MAT4': // TODO: Real MAT4 support +#endif flags = 0x50100000; flags |= (i_flags & 0x3000000); mpMaterialBlock = (J3DMaterialBlock*)block; @@ -308,8 +317,22 @@ static GXVtxAttrFmtList getFmt(GXVtxAttrFmtList* i_fmtList, GXAttr i_attr) { return *i_fmtList; } } - +#if DUSK_TPHD + // HD BMDs occasionally have vertex arrays without a format entry. Mirror + // the GC runtime: fall back to J3DSys::initGX defaults. + GXVtxAttrFmtList def{}; + def.attr = i_attr; + def.frac = 0; + if (i_attr == GX_VA_POS) { def.cnt = GX_POS_XYZ; def.type = GX_F32; } + else if (i_attr == GX_VA_NRM) { def.cnt = GX_NRM_XYZ; def.type = GX_F32; } + else if (i_attr == GX_VA_NBT) { def.cnt = GX_NRM_NBT; def.type = GX_F32; } + else if (i_attr >= GX_VA_CLR0 && + i_attr <= GX_VA_CLR1) { def.cnt = GX_CLR_RGBA; def.type = GX_RGBA8; } + else { def.cnt = GX_TEX_ST; def.type = GX_F32; } + return def; +#else OSPanic(__FILE__, __LINE__, "Unable to find vertex attribute format!"); +#endif } #endif diff --git a/libs/JSystem/src/J3DGraphLoader/J3DModelLoaderCalcSize.cpp b/libs/JSystem/src/J3DGraphLoader/J3DModelLoaderCalcSize.cpp index 6f4a86aa83..eea522eb72 100644 --- a/libs/JSystem/src/J3DGraphLoader/J3DModelLoaderCalcSize.cpp +++ b/libs/JSystem/src/J3DGraphLoader/J3DModelLoaderCalcSize.cpp @@ -16,7 +16,8 @@ u16 J3DModelLoader::countMaterialNum(const void* stream) { const J3DModelBlock* block = header->mBlocks; for (int i = 0; i < header->mBlockNum; i++) { - if (block->mBlockType == 'MAT3') { + // TODO: Real MAT4 support + if (block->mBlockType == 'MAT3' || block->mBlockType == 'MAT4') { const J3DMaterialBlock* materialBlock = (const J3DMaterialBlock*)block; return materialBlock->mMaterialNum; } @@ -45,6 +46,8 @@ u32 J3DModelLoader::calcLoadSize(void const* stream, u32 flags_) { size += calcSizeJoint((const J3DJointBlock*)nextBlock); break; case 'MAT3': + case 'MAT4': + // TODO: Real MAT4 support size += calcSizeMaterial((const J3DMaterialBlock*)nextBlock, flags); break; case 'SHP1': @@ -86,6 +89,8 @@ u32 J3DModelLoader::calcLoadMaterialTableSize(const void* stream) { for (u32 i = 0; i < header->mBlockNum; i++) { switch (nextBlock->mBlockType) { case 'MAT3': + case 'MAT4': + // TODO: Real MAT4 support size += calcSizeMaterialTable((const J3DMaterialBlock*)nextBlock, flags); break; case 'TEX1': @@ -135,7 +140,9 @@ u32 J3DModelLoader::calcLoadBinaryDisplayListSize(const void* stream, u32 flags) case 'MDL3': size += calcSizeMaterialDL((const J3DMaterialDLBlock*)nextBlock, flags); break; - case 'MAT3': { + case 'MAT3': + case 'MAT4': { + // TODO: Real MAT4 support u32 flags2 = (J3DMLF_21 | J3DMLF_Material_PE_Full | J3DMLF_Material_Color_LightOn); flags2 |= (u32)flags & (J3DMLF_Material_UseIndirect | J3DMLF_26); mpMaterialBlock = (const J3DMaterialBlock*)nextBlock; diff --git a/libs/JSystem/src/JKernel/JKRArchivePub.cpp b/libs/JSystem/src/JKernel/JKRArchivePub.cpp index 731a049457..b4f5077e17 100644 --- a/libs/JSystem/src/JKernel/JKRArchivePub.cpp +++ b/libs/JSystem/src/JKernel/JKRArchivePub.cpp @@ -9,6 +9,10 @@ #include "JSystem/JKernel/JKRMemArchive.h" #include "JSystem/JUtility/JUTAssert.h" +#if TARGET_PC && DUSK_TPHD +#include "dusk/tphd/HdAssetLayer.hpp" +#endif + JKRArchive* JKRArchive::check_mount_already(s32 entryNum, JKRHeap* heap) { if (heap == NULL) { heap = JKRGetCurrentHeap(); @@ -29,6 +33,15 @@ JKRArchive* JKRArchive::check_mount_already(s32 entryNum, JKRHeap* heap) { JKRArchive* JKRArchive::mount(const char* path, EMountMode mountMode, JKRHeap* heap, EMountDirection mountDirection) { +#if TARGET_PC && DUSK_TPHD + // TPHD arc redirect. + if (path != NULL) { + if (auto hdBuf = dusk::tphd::tryLoadHdArchive(path)) { + return mount((*hdBuf)->data(), heap, mountDirection); + } + } +#endif + s32 entryNum = DVDConvertPathToEntrynum(path); if (entryNum < 0) return NULL; diff --git a/libs/JSystem/src/JKernel/JKRMemArchive.cpp b/libs/JSystem/src/JKernel/JKRMemArchive.cpp index d3b17d8a63..984b2f0ccc 100644 --- a/libs/JSystem/src/JKernel/JKRMemArchive.cpp +++ b/libs/JSystem/src/JKernel/JKRMemArchive.cpp @@ -10,10 +10,23 @@ #include #include "os_report.h" +#if DUSK_TPHD +#include "dusk/tphd/HdAssetLayer.hpp" +#endif + JKRMemArchive::JKRMemArchive(s32 entryNum, JKRArchive::EMountDirection mountDirection) : JKRArchive(entryNum, MOUNT_MEM) { mIsMounted = false; mMountDirection = mountDirection; +#if DUSK_TPHD + // TPHD arc redirect by entrynum. + if (const auto* hd = dusk::tphd::getHdBytesForEntryNum(entryNum)) { + if (!open(const_cast(hd->data()), static_cast(hd->size()), + JKRMEMBREAK_FLAG_UNKNOWN0)) { + return; + } + } else +#endif if (!open(entryNum, mMountDirection)) { return; } diff --git a/src/d/actor/d_a_bg_obj.cpp b/src/d/actor/d_a_bg_obj.cpp index 7d4adb9e47..fb07a6de9c 100644 --- a/src/d/actor/d_a_bg_obj.cpp +++ b/src/d/actor/d_a_bg_obj.cpp @@ -143,7 +143,11 @@ bool daBgObj_c::spec_data_c::Set(void* i_ptr) { default: // Invalid data block type OS_REPORT_ERROR("データブロックタイプが不正です<%d>\n", block_type); +#if DUSK_TPHD + return 0; // HD-port: skip actor instead of asserting. +#else JUT_ASSERT(527, FALSE); +#endif } if (block_type == 0) { @@ -184,7 +188,11 @@ bool daBgObj_c::spec_data_c::Set(void* i_ptr) { default: // Invalid data block type OS_REPORT_ERROR("データブロックタイプが不正です<%d>\n", block_type); +#if DUSK_TPHD + return 0; +#else JUT_ASSERT(570, FALSE); +#endif } if (block_type == 0) { @@ -225,8 +233,12 @@ bool daBgObj_c::spec_data_c::Set(void* i_ptr) { default: // "Data Block type invalid<%d>\n" OSReport_Error("データブロックタイプが不正です<%d>\n", block_type); +#if DUSK_TPHD + return 0; +#else JUT_ASSERT(619, FALSE); break; +#endif } if (block_type == 0) { diff --git a/src/d/d_resorce.cpp b/src/d/d_resorce.cpp index bb982c4afb..8042af2eb8 100644 --- a/src/d/d_resorce.cpp +++ b/src/d/d_resorce.cpp @@ -23,6 +23,10 @@ #ifndef __MWERKS__ #include "dusk/extras.h" #include "dusk/logging.h" +#if DUSK_TPHD +#include "dusk/tphd/HdAssetLayer.hpp" +#include +#endif #endif dRes_info_c::dRes_info_c() { @@ -643,7 +647,20 @@ int dRes_info_c::setRes() { } #if DEBUG +#if DUSK_TPHD + // HD-redirected buffers live outside the JKR heap. Use the + // registered arc-range size; getSize() would return 0/undefined. + void* mArcHdr = ((JKRMemArchive*)mArchive)->mArcHeader; + size_t arcSize = 0; + if (size_t hdRem = 0; aurora::gfx::hd_find_arc_range(mArcHdr, &hdRem)) { + arcSize = hdRem; + } else { + arcSize = JKRGetRootHeap()->getSize(mArcHdr); + } + mSize = arcSize + JKRGetMemBlockSize(NULL, mDataHeap); +#else mSize = JKRGetRootHeap()->getSize(((JKRMemArchive*)mArchive)->mArcHeader) + JKRGetMemBlockSize(NULL, mDataHeap); +#endif if (data_8074C6C0_debug) { JKRExpHeap* zeldaHeap = mDoExt_getZeldaHeap(); OSReport("\e[33mdRes_info_c::setRes <使用=%08x(work:%08x) 連続空き=%08x 残り空き=%08x (%3d) %s.arc\n\e[m", mSize, r28, zeldaHeap->getFreeSize(), zeldaHeap->getTotalFreeSize(), getResNum(), this); @@ -1020,6 +1037,16 @@ int dRes_control_c::setObjectRes(char const* i_arcName, void* i_archiveRes, u32 return 0; } +#if DUSK_TPHD + // HD hook for second JKRMemArchive constructor (see below) + const std::string hdPath = std::format("/res/Object/{}.arc", i_arcName); + if (auto hd = dusk::tphd::tryLoadHdArchive(hdPath)) { + DuskLog.info("[TPHD] setObjectRes redirect: {} -> HD ({} bytes)", + i_arcName, (*hd)->size()); + i_archiveRes = const_cast((*hd)->data()); + i_bufferSize = static_cast((*hd)->size()); + } +#endif JKRMemArchive* memArchive = JKR_NEW JKRMemArchive(i_archiveRes, i_bufferSize, JKRMEMBREAK_FLAG_UNKNOWN0); if (memArchive == NULL || !memArchive->isMounted()) { return 0; diff --git a/src/dusk/file_select.cpp b/src/dusk/file_select.cpp index fcda233c56..ec388f86cd 100644 --- a/src/dusk/file_select.cpp +++ b/src/dusk/file_select.cpp @@ -88,4 +88,21 @@ void ShowFileSelect(FileCallback callback, void* userdata, SDL_Window* window, default_location, allow_many); #endif } + +void ShowFolderSelect(FileCallback callback, void* userdata, SDL_Window* window, + const char* default_location) { + if (callback == nullptr) { + return; + } + +#if USE_IOS_DIALOG + // iOS doesn't expose a folder picker — report unsupported. + callback(userdata, nullptr, "Folder selection is not supported on this platform"); +#else + auto state = std::make_unique(); + state->callback = callback; + state->userdata = userdata; + SDL_ShowOpenFolderDialog(&onSDLDialogFinished, state.release(), window, default_location, false); +#endif +} } // namespace dusk diff --git a/src/dusk/file_select.hpp b/src/dusk/file_select.hpp index 175c5aa086..97c739b67a 100644 --- a/src/dusk/file_select.hpp +++ b/src/dusk/file_select.hpp @@ -12,4 +12,7 @@ void ShowFileSelect(FileCallback callback, void* userdata, SDL_Window* window, const SDL_DialogFileFilter* filters, int nfilters, const char* default_location, bool allow_many); +void ShowFolderSelect(FileCallback callback, void* userdata, SDL_Window* window, + const char* default_location); + } // namespace dusk diff --git a/src/dusk/settings.cpp b/src/dusk/settings.cpp index aa31540898..e2bfb01e3c 100644 --- a/src/dusk/settings.cpp +++ b/src/dusk/settings.cpp @@ -122,6 +122,9 @@ UserSettings g_userSettings = { .checkForUpdates {"backend.checkForUpdates", true}, .cardFileType {"backend.cardFileType", static_cast(CARD_GCIFOLDER)}, .enableAdvancedSettings {"backend.enableAdvancedSettings", false}, +#if DUSK_TPHD + .hdContentPath {"backend.hdContentPath", ""}, +#endif } }; @@ -226,6 +229,9 @@ void registerSettings() { Register(g_userSettings.backend.checkForUpdates); Register(g_userSettings.backend.cardFileType); Register(g_userSettings.backend.enableAdvancedSettings); +#if DUSK_TPHD + Register(g_userSettings.backend.hdContentPath); +#endif } // Transient settings diff --git a/src/dusk/tphd/AddrLib.cpp b/src/dusk/tphd/AddrLib.cpp new file mode 100644 index 0000000000..2bd8755bab --- /dev/null +++ b/src/dusk/tphd/AddrLib.cpp @@ -0,0 +1,435 @@ +/* + * Ported from decaf-emu/addrlib (https://github.com/decaf-emu/addrlib), + * src/r600/r600addrlib.{cpp,h} and src/core/addrlib.cpp. + * + * Original AMD copyright header: + * + * Copyright (C) 2014 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS + * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * -------------------------------------------------------------------------- + * + * This is a minimal extraction of decaf-emu's R600AddrLib hardcoded for + * the Wii-U R700-class GPU (mPipes=2, mBanks=4, group/swap/split sizes). + * The R600 class hierarchy is collapsed to free functions; only the surface + * address paths needed for GTX texture deswizzling are kept. + */ + +#include "AddrLib.hpp" + +#include +#include + +namespace dusk::tphd::addrlib { + +// ---- Wii-U R700 hardware constants ---------------------------------------- +// Match decaf's R600AddrLib state after DecodeGbRegs for the Wii-U register +// configuration: pipes=2, banks=4, group=256B, row=2KB, swap=256B, split=2KB. +static constexpr u32 kPipes = 2; +static constexpr u32 kBanks = 4; +static constexpr u32 kPipeInterleaveBytes = 256; +static constexpr u32 kRowSize = 2048; +static constexpr u32 kSwapSize = 256; +static constexpr u32 kSplitSize = 2048; +// Wii-U does not enable the optimal bank-swap heuristic. +static constexpr bool kOptimalBankSwap = false; + +// ---- Decaf addrcommon.h constants ----------------------------------------- +static constexpr u32 kMicroTileWidth = 8; +static constexpr u32 kMicroTileHeight = 8; +static constexpr u32 kMicroTilePixels = kMicroTileWidth * kMicroTileHeight; +static constexpr u32 kThickTileThickness = 4; + +static constexpr u32 BITS_TO_BYTES(u32 v) { return (v + 7) / 8; } +static constexpr u32 _BIT(u32 v, u32 b) { return (v >> b) & 1; } + +static u32 Log2(u32 v) { + u32 r = 0; + while (v > 1) { v >>= 1; ++r; } + return r; +} + +// ---- Tile-mode classification --------------------------------------------- + +static u32 ComputeSurfaceThickness(TileMode tm) { + switch (tm) { + case TileMode::Tiled1DThick: + case TileMode::Tiled2DThick: + case TileMode::Tiled2BThick: + case TileMode::Tiled3DThick: + case TileMode::Tiled3BThick: + return 4u; + default: + return 1u; + } +} + +static bool IsThickMacroTiled(TileMode tm) { + switch (tm) { + case TileMode::Tiled2DThick: + case TileMode::Tiled2BThick: + case TileMode::Tiled3DThick: + case TileMode::Tiled3BThick: + return true; + default: + return false; + } +} + +static bool IsBankSwappedTileMode(TileMode tm) { + switch (tm) { + case TileMode::Tiled2BThin1: + case TileMode::Tiled2BThin2: + case TileMode::Tiled2BThin4: + case TileMode::Tiled2BThick: + case TileMode::Tiled3BThin1: + case TileMode::Tiled3BThick: + return true; + default: + return false; + } +} + +// AddrTileType: 0=Displayable, 1=NonDisplayable, 2=DepthSampleOrder, 3=Thick. +// Wii-U GTX color textures empirically use the Displayable (bpp-switched) +// microtile layout, depth surfaces use the simple x/y-interleave pattern +// that AMD calls NonDisplayable. (Same convention as Cemu's port.) +static u32 GetTileType(bool isDepth) { + return isDepth ? 1u /* NonDisplayable */ : 0u /* Displayable */; +} + +static u32 ComputeSurfaceRotationFromTileMode(TileMode tm) { + switch (tm) { + case TileMode::Tiled2DThin1: + case TileMode::Tiled2DThin2: + case TileMode::Tiled2DThin4: + case TileMode::Tiled2DThick: + case TileMode::Tiled2BThin1: + case TileMode::Tiled2BThin2: + case TileMode::Tiled2BThin4: + case TileMode::Tiled2BThick: + return kPipes * ((kBanks >> 1) - 1); + case TileMode::Tiled3DThin1: + case TileMode::Tiled3DThick: + case TileMode::Tiled3BThin1: + case TileMode::Tiled3BThick: + return (kPipes >= 4) ? ((kPipes >> 1) - 1) : 1; + default: + return 0; + } +} + +static u32 ComputeMacroTileAspectRatio(TileMode tm) { + switch (tm) { + case TileMode::Tiled2BThin1: + case TileMode::Tiled3DThin1: + case TileMode::Tiled3BThin1: + return 1; + case TileMode::Tiled2DThin2: + case TileMode::Tiled2BThin2: + return 2; + case TileMode::Tiled2DThin4: + case TileMode::Tiled2BThin4: + return 4; + default: + return 1; + } +} + +// ---- Pixel-index-within-microtile ----------------------------------------- + +static u32 ComputePixelIndexWithinMicroTile(u32 x, u32 y, u32 z, u32 bpp, + TileMode tm, u32 tileType) { + u32 b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0, b6 = 0, b7 = 0, b8 = 0; + const u32 x0 = _BIT(x, 0), x1 = _BIT(x, 1), x2 = _BIT(x, 2); + const u32 y0 = _BIT(y, 0), y1 = _BIT(y, 1), y2 = _BIT(y, 2); + const u32 z0 = _BIT(z, 0), z1 = _BIT(z, 1), z2 = _BIT(z, 2); + const u32 thickness = ComputeSurfaceThickness(tm); + + if (tileType == 3 /* Thick */) { + b0 = x0; b1 = y0; b2 = z0; b3 = x1; + b4 = y1; b5 = z1; b6 = x2; b7 = y2; + } else if (tileType == 1 /* NonDisplayable */) { + b0 = x0; b1 = y0; b2 = x1; + b3 = y1; b4 = x2; b5 = y2; + } else { + switch (bpp) { + case 8: + b0 = x0; b1 = x1; b2 = x2; b3 = y1; b4 = y0; b5 = y2; break; + case 16: + b0 = x0; b1 = x1; b2 = x2; b3 = y0; b4 = y1; b5 = y2; break; + case 64: + b0 = x0; b1 = y0; b2 = x1; b3 = x2; b4 = y1; b5 = y2; break; + case 128: + b0 = y0; b1 = x0; b2 = x1; b3 = x2; b4 = y1; b5 = y2; break; + case 32: + case 96: + default: + b0 = x0; b1 = x1; b2 = y0; b3 = x2; b4 = y1; b5 = y2; break; + } + } + if (tileType != 3 && thickness > 1) { + b6 = z0; b7 = z1; + } + if (thickness == 8) { + b8 = z2; + } + return (b0) | (b1 << 1) | (b2 << 2) | (b3 << 3) | (b4 << 4) | + (b5 << 5) | (b6 << 6) | (b7 << 7) | (b8 << 8); +} + +// ---- Pipe / Bank from coord (no rotation) --------------------------------- +// Hardcoded for Wii-U: pipes=2, banks=4. + +static u32 ComputePipeFromCoordWoRotation(u32 x, u32 y) { + return (_BIT(y, 3) ^ _BIT(x, 3)) & 1; +} + +static u32 ComputeBankFromCoordWoRotation(u32 x, u32 y) { + const u32 ty = y / kPipes; + const u32 ty4 = _BIT(ty, 4); + const u32 ty3 = _BIT(ty, 3); + const u32 x3 = _BIT(x, 3); + const u32 x4 = _BIT(x, 4); + u32 b0 = (ty4 ^ x3); + if (kOptimalBankSwap && kPipes == 8) { + b0 ^= _BIT(x, 5); + } + const u32 b1 = (ty3 ^ x4); + return b0 | (b1 << 1); +} + +// ---- Bank-swapped width --------------------------------------------------- + +static u32 ComputeSurfaceBankSwappedWidth(TileMode tm, u32 bpp, u32 numSamples, + u32 pitch) { + if (!IsBankSwappedTileMode(tm)) return 0; + + u32 slicesPerTile = 1; + const u32 bytesPerSample = 8 * bpp; + const u32 samplesPerTile = bytesPerSample ? (kSplitSize / bytesPerSample) : 0; + if (samplesPerTile != 0) { + slicesPerTile = std::max(1u, numSamples / samplesPerTile); + } + if (IsThickMacroTiled(tm)) { + numSamples = 4; + } + const u32 bytesPerTileSlice = numSamples * bytesPerSample / slicesPerTile; + const u32 factor = ComputeMacroTileAspectRatio(tm); + const u32 swapTiles = std::max(1u, (kSwapSize >> 1) / bpp); + const u32 swapWidth = swapTiles * 8 * kBanks; + const u32 heightBytes = numSamples * factor * kPipes * bpp / slicesPerTile; + const u32 swapMax = kPipes * kBanks * kRowSize / heightBytes; + const u32 swapMin = kPipeInterleaveBytes * 8 * kBanks / bytesPerTileSlice; + + u32 bankSwapWidth = std::min(swapMax, std::max(swapMin, swapWidth)); + while (bankSwapWidth >= 2 * pitch) { + bankSwapWidth >>= 1; + } + return bankSwapWidth; +} + +// ---- Surface-address from coord ------------------------------------------- + +static u64 ComputeSurfaceAddrFromCoordMicroTiled(u32 x, u32 y, u32 slice, + u32 bpp, u32 pitch, u32 height, + TileMode tm, bool isDepth) { + const u64 microTileThickness = (tm == TileMode::Tiled1DThick) ? 4u : 1u; + const u64 microTileBytes = + BITS_TO_BYTES(static_cast(kMicroTilePixels * microTileThickness * bpp)); + const u64 microTilesPerRow = pitch / kMicroTileWidth; + const u64 microTileIndexX = x / kMicroTileWidth; + const u64 microTileIndexY = y / kMicroTileHeight; + const u64 microTileIndexZ = slice / microTileThickness; + + const u64 microTileOffset = microTileBytes * + (microTileIndexX + microTileIndexY * microTilesPerRow); + const u64 sliceBytes = + BITS_TO_BYTES(static_cast(pitch * height * microTileThickness * bpp)); + const u64 sliceOffset = microTileIndexZ * sliceBytes; + const u64 pixelIndex = + ComputePixelIndexWithinMicroTile(x, y, slice, bpp, tm, GetTileType(isDepth)); + const u64 pixelOffset = (bpp * pixelIndex) / 8; + + return pixelOffset + microTileOffset + sliceOffset; +} + +static u64 ComputeSurfaceAddrFromCoordMacroTiled(u32 x, u32 y, u32 slice, u32 sample, + u32 bpp, u32 pitch, u32 height, + u32 numSamples, TileMode tm, + bool isDepth, u32 pipeSwizzle, + u32 bankSwizzle) { + const u64 numPipes = kPipes; + const u64 numBanks = kBanks; + const u64 numGroupBits = Log2(kPipeInterleaveBytes); + const u64 numPipeBits = Log2(kPipes); + const u64 numBankBits = Log2(kBanks); + + const u64 microTileThickness = ComputeSurfaceThickness(tm); + const u64 microTileBits = kMicroTilePixels * microTileThickness * bpp * numSamples; + const u64 microTileBytes = microTileBits / 8; + + const u64 pixelIndex = + ComputePixelIndexWithinMicroTile(x, y, slice, bpp, tm, GetTileType(isDepth)); + + u64 sampleOffset, pixelOffset; + if (isDepth) { + sampleOffset = bpp * sample; + pixelOffset = numSamples * bpp * pixelIndex; + } else { + sampleOffset = sample * (microTileBits / numSamples); + pixelOffset = bpp * pixelIndex; + } + u64 elemOffset = pixelOffset + sampleOffset; + + const u64 bytesPerSample = microTileBytes / numSamples; + u64 sampleSlice = 0; + u64 numSampleSplits = 1; + if (numSamples > 1 && microTileBytes > kSplitSize) { + const u64 samplesPerSlice = kSplitSize / bytesPerSample; + numSampleSplits = numSamples / samplesPerSlice; + numSamples = static_cast(samplesPerSlice); + const u64 tileSliceBits = microTileBits / numSampleSplits; + sampleSlice = elemOffset / tileSliceBits; + elemOffset %= tileSliceBits; + } + elemOffset /= 8; + + u64 pipe = ComputePipeFromCoordWoRotation(x, y); + u64 bank = ComputeBankFromCoordWoRotation(x, y); + u64 bankPipe = pipe + numPipes * bank; + const u64 rotation = ComputeSurfaceRotationFromTileMode(tm); + const u64 swizzle = pipeSwizzle + numPipes * bankSwizzle; + u64 sliceIn = slice; + if (IsThickMacroTiled(tm)) { + sliceIn /= kThickTileThickness; + } + bankPipe ^= numPipes * sampleSlice * ((numBanks >> 1) + 1) ^ (swizzle + sliceIn * rotation); + bankPipe %= numPipes * numBanks; + pipe = bankPipe % numPipes; + bank = bankPipe / numPipes; + + const u64 sliceBytes = + BITS_TO_BYTES(static_cast(pitch * height * microTileThickness * bpp * numSamples)); + const u64 sliceOffset = sliceBytes * + ((sampleSlice + numSampleSplits * slice) / microTileThickness); + + u64 macroTilePitch = 8 * numBanks; + u64 macroTileHeight = 8 * numPipes; + switch (tm) { + case TileMode::Tiled2DThin2: + case TileMode::Tiled2BThin2: + macroTilePitch /= 2; + macroTileHeight *= 2; + break; + case TileMode::Tiled2DThin4: + case TileMode::Tiled2BThin4: + macroTilePitch /= 4; + macroTileHeight *= 4; + break; + default: + break; + } + const u64 macroTilesPerRow = pitch / macroTilePitch; + const u64 macroTileBytes = + BITS_TO_BYTES(static_cast(numSamples * microTileThickness * bpp * + macroTileHeight * macroTilePitch)); + const u64 macroTileIndexX = x / macroTilePitch; + const u64 macroTileIndexY = y / macroTileHeight; + const u64 macroTileOffset = macroTileBytes * + (macroTileIndexX + macroTilesPerRow * macroTileIndexY); + + if (IsBankSwappedTileMode(tm)) { + static constexpr u32 bankSwapOrder[] = { 0, 1, 3, 2, 6, 7, 5, 4, 0, 0 }; + const u32 bankSwapWidth = + ComputeSurfaceBankSwappedWidth(tm, bpp, numSamples, pitch); + const u64 swapIndex = (bankSwapWidth != 0) + ? (macroTilePitch * macroTileIndexX / bankSwapWidth) : 0; + bank ^= bankSwapOrder[swapIndex & (kBanks - 1)]; + } + + const u64 groupMask = (1u << numGroupBits) - 1; + const u64 totalOffset = elemOffset + + ((macroTileOffset + sliceOffset) >> (numBankBits + numPipeBits)); + const u64 offsetHigh = (totalOffset & ~groupMask) << (numBankBits + numPipeBits); + const u64 offsetLow = totalOffset & groupMask; + const u64 bankBits = bank << (numPipeBits + numGroupBits); + const u64 pipeBits = pipe << numGroupBits; + return bankBits | pipeBits | offsetLow | offsetHigh; +} + +// ---- High-level deswizzle ------------------------------------------------- + +std::vector deswizzle(const SurfaceDesc& desc, std::span tiledBytes) { + // For BCN formats addrlib operates on block coordinates; bpp is bits per + // 4x4 block (e.g. 64 for BC1). Reduce width/height to block extents. + const u32 blockWidth = desc.isBcn ? (desc.width + 3) / 4 : desc.width; + const u32 blockHeight = desc.isBcn ? (desc.height + 3) / 4 : desc.height; + + const u32 bytesPerElement = desc.bpp / 8; + const u32 linearStride = blockWidth * bytesPerElement; + std::vector linear(static_cast(linearStride) * blockHeight, 0); + + const u32 pipeSwizzle = (desc.swizzle >> 8) & 1; + const u32 bankSwizzle = (desc.swizzle >> 9) & 3; + + // Linear tile modes: trivial copy honoring pitch. + if (desc.tileMode == TileMode::LinearGeneral || + desc.tileMode == TileMode::LinearAligned) { + for (u32 y = 0; y < blockHeight; ++y) { + const u32 srcOff = y * desc.pitch * bytesPerElement; + if (srcOff + linearStride > tiledBytes.size()) break; + std::memcpy(linear.data() + y * linearStride, + tiledBytes.data() + srcOff, linearStride); + } + return linear; + } + + const bool microTiled = + (desc.tileMode == TileMode::Tiled1DThin1 || + desc.tileMode == TileMode::Tiled1DThick); + + for (u32 y = 0; y < blockHeight; ++y) { + for (u32 x = 0; x < blockWidth; ++x) { + u64 srcOff; + if (microTiled) { + srcOff = ComputeSurfaceAddrFromCoordMicroTiled( + x, y, /*slice*/ 0, desc.bpp, desc.pitch, blockHeight, + desc.tileMode, desc.isDepth); + } else { + srcOff = ComputeSurfaceAddrFromCoordMacroTiled( + x, y, /*slice*/ 0, /*sample*/ 0, desc.bpp, desc.pitch, + blockHeight, /*numSamples*/ 1, desc.tileMode, desc.isDepth, + pipeSwizzle, bankSwizzle); + } + if (srcOff + bytesPerElement > tiledBytes.size()) continue; + const u32 dstOff = (y * blockWidth + x) * bytesPerElement; + std::memcpy(linear.data() + dstOff, + tiledBytes.data() + srcOff, bytesPerElement); + } + } + return linear; +} + +} // namespace dusk::tphd::addrlib diff --git a/src/dusk/tphd/AddrLib.hpp b/src/dusk/tphd/AddrLib.hpp new file mode 100644 index 0000000000..49368572b8 --- /dev/null +++ b/src/dusk/tphd/AddrLib.hpp @@ -0,0 +1,57 @@ +// Ported from decaf-emu/addrlib (https://github.com/decaf-emu/addrlib), +// which is itself derived from AMD's address library. +// Copyright (c) 2014 Advanced Micro Devices, Inc. All Rights Reserved. +// Licensed under the AMD MIT-style license; see the AMD copyright header in +// AddrLib.cpp. +// +// Minimal R600/R700 surface-address port sufficient for deswizzling Wii-U +// GTX textures at load time. Hardcoded for Wii-U HW configuration: +// pipes = 2, banks = 4, pipe interleave = 256B, +// row size = 2KB, sample split = 2KB, swap size = 256B. + +#ifndef DUSK_TPHD_ADDRLIB_HPP +#define DUSK_TPHD_ADDRLIB_HPP + +#include +#include + +#include + +namespace dusk::tphd::addrlib { + +enum class TileMode : u32 { + LinearGeneral = 0, + LinearAligned = 1, + Tiled1DThin1 = 2, + Tiled1DThick = 3, + Tiled2DThin1 = 4, + Tiled2DThin2 = 5, + Tiled2DThin4 = 6, + Tiled2DThick = 7, + Tiled2BThin1 = 8, + Tiled2BThin2 = 9, + Tiled2BThin4 = 10, + Tiled2BThick = 11, + Tiled3DThin1 = 12, + Tiled3DThick = 13, + Tiled3BThin1 = 14, + Tiled3BThick = 15, +}; + +struct SurfaceDesc { + u32 width; // pixels (or BCN blocks) + u32 height; // pixels (or BCN blocks) + u32 pitch; // pixels (or BCN blocks) + u32 bpp; // bits per pixel (or per 4x4 BCN block, e.g. 64 for BC1) + TileMode tileMode; + u32 swizzle; // GTX swizzle field; pipe = (>>8)&1, bank = (>>9)&3 + bool isBcn; + bool isDepth; +}; + +// Deswizzle a single surface mip level into a row-major linear buffer. +std::vector deswizzle(const SurfaceDesc& desc, std::span tiledBytes); + +} // namespace dusk::tphd::addrlib + +#endif diff --git a/src/dusk/tphd/GtxParser.cpp b/src/dusk/tphd/GtxParser.cpp new file mode 100644 index 0000000000..1aaaeecf3d --- /dev/null +++ b/src/dusk/tphd/GtxParser.cpp @@ -0,0 +1,89 @@ +#include "GtxParser.hpp" + +#include + +#include "dusk/endian.h" + +namespace dusk::tphd { + +namespace { + +constexpr u32 kBlockTypeEOF = 0x01; +constexpr u32 kBlockTypeSurface = 0x0B; +constexpr u32 kBlockTypeImage = 0x0C; +constexpr u32 kBlockTypeMipChain = 0x0D; + +} + +std::vector parseGtx(std::span gtx) { + std::vector out; + + if (gtx.size() < sizeof(Gfx2Header) || + std::memcmp(gtx.data(), "Gfx2", 4) != 0) { + return out; + } + const auto* fileHdr = reinterpret_cast(gtx.data()); + const u32 headerSize = fileHdr->headerSize; + if (headerSize > gtx.size()) { + return out; + } + + GtxSurface* current = nullptr; + size_t off = headerSize; + + while (off + sizeof(Gfx2BlockHeader) <= gtx.size()) { + const auto* blk = reinterpret_cast(gtx.data() + off); + if (std::memcmp(blk->magic, "BLK{", 4) != 0) { + break; + } + const u32 blockHdrSize = blk->headerSize; + const u32 blockType = blk->blockType; + const u32 blockDataSz = blk->blockDataSize; + + if (blockHdrSize < sizeof(Gfx2BlockHeader) || + off + blockHdrSize + blockDataSz > gtx.size()) { + break; + } + const u8* body = gtx.data() + off + blockHdrSize; + + switch (blockType) { + case kBlockTypeSurface: { + if (blockDataSz < sizeof(Gx2SurfaceBody)) break; + const auto* sb = reinterpret_cast(body); + GtxSurface s{}; + s.format = sb->format; + s.width = sb->width; + s.height = sb->height; + s.depth = sb->depth; + s.mipCount = sb->mipCount; + s.aa = sb->aa; + s.use = sb->use; + s.imgSize = sb->imgSize; + s.mipSize = sb->mipSize; + s.tileMode = sb->tileMode; + s.swizzle = sb->swizzle; + s.pitch = sb->pitch; + for (u32 i = 0; i < 13; ++i) { + s.mipOffsets[i] = sb->mipOffsets[i]; + } + out.push_back(s); + current = &out.back(); + break; + } + case kBlockTypeImage: + if (current) current->baseData = gtx.subspan(off + blockHdrSize, blockDataSz); + break; + case kBlockTypeMipChain: + if (current) current->mipData = gtx.subspan(off + blockHdrSize, blockDataSz); + break; + case kBlockTypeEOF: + return out; + default: + break; + } + off += blockHdrSize + blockDataSz; + } + return out; +} + +} diff --git a/src/dusk/tphd/GtxParser.hpp b/src/dusk/tphd/GtxParser.hpp new file mode 100644 index 0000000000..a33f3e6a12 --- /dev/null +++ b/src/dusk/tphd/GtxParser.hpp @@ -0,0 +1,79 @@ +#ifndef DUSK_TPHD_GTX_PARSER_HPP +#define DUSK_TPHD_GTX_PARSER_HPP + +#include +#include +#include +#include + +#include + +#include "dusk/endian.h" + +namespace dusk::tphd { + +// On-disk GX2 file header. Followed by a stream of BLK{ blocks. +struct Gfx2Header { + /* 0x00 */ char magic[4]; // "Gfx2" + /* 0x04 */ BE(u32) headerSize; +}; + +// Common 0x20-byte header on every BLK{ block. +struct Gfx2BlockHeader { + /* 0x00 */ char magic[4]; // "BLK{" + /* 0x04 */ BE(u32) headerSize; + /* 0x08 */ BE(u32) versionMajor; + /* 0x0C */ BE(u32) versionMinor; + /* 0x10 */ BE(u32) blockType; + /* 0x14 */ BE(u32) blockDataSize; + /* 0x18 */ BE(u32) ident; + /* 0x1C */ BE(u32) flags; +}; +static_assert(sizeof(Gfx2BlockHeader) == 0x20); + +// On-disk surface-info block body (the GX2 surface descriptor layout). +struct Gx2SurfaceBody { + /* 0x00 */ BE(u32) dim; + /* 0x04 */ BE(u32) width; + /* 0x08 */ BE(u32) height; + /* 0x0C */ BE(u32) depth; + /* 0x10 */ BE(u32) mipCount; + /* 0x14 */ BE(u32) format; + /* 0x18 */ BE(u32) aa; + /* 0x1C */ BE(u32) use; + /* 0x20 */ BE(u32) imgSize; + /* 0x24 */ BE(u32) imgPtr; + /* 0x28 */ BE(u32) mipSize; + /* 0x2C */ BE(u32) mipPtr; + /* 0x30 */ BE(u32) tileMode; + /* 0x34 */ BE(u32) swizzle; + /* 0x38 */ BE(u32) alignment; + /* 0x3C */ BE(u32) pitch; + /* 0x40 */ BE(u32) mipOffsets[13]; +}; +static_assert(sizeof(Gx2SurfaceBody) == 0x74); + +struct GtxSurface { + u32 width; + u32 height; + u32 depth; + u32 mipCount; + u32 format; // GX2 surface format code (0x31 BC1, 0x1A RGBA8, ...) + u32 aa; + u32 use; + u32 tileMode; + u32 swizzle; + u32 pitch; + u32 imgSize; // base level size (bytes) + u32 mipSize; // mip chain size (bytes, levels 1..N-1) + std::array mipOffsets; + + std::span baseData; // into the owning GTX buffer + std::span mipData; // into the owning GTX buffer +}; + +std::vector parseGtx(std::span gtxBytes); + +} + +#endif diff --git a/src/dusk/tphd/HdAssetLayer.cpp b/src/dusk/tphd/HdAssetLayer.cpp new file mode 100644 index 0000000000..36f9a254e3 --- /dev/null +++ b/src/dusk/tphd/HdAssetLayer.cpp @@ -0,0 +1,617 @@ +#include "HdAssetLayer.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "JSystem/J3DGraphLoader/J3DModelLoader.h" +#include "JSystem/JKernel/JKRArchive.h" +#include "JSystem/JKernel/JKRDecomp.h" +#include "JSystem/JUtility/JUTTexture.h" +#include "dusk/endian.h" +#include "dusk/logging.h" +#include "AddrLib.hpp" +#include "GtxParser.hpp" +#include "TphdPack.hpp" + +static aurora::Module HdLog("dusk::tphd::hd"); + +namespace dusk::tphd { + +namespace { + +std::filesystem::path g_contentPath; +std::mutex g_cacheMutex; + +// Heap-allocated, never freed — these must outlive g_dComIfG_gameInfo's +// static destructor which holds JKRArchives referencing these bytes. +std::list>& g_mountBuffers() { + static auto* p = new std::list>{}; + return *p; +} +std::unordered_map*>& g_entryNumToBytes() { + static auto* p = new std::unordered_map*>{}; + return *p; +} + +bool endsWithSuffix(std::string_view s, std::string_view suffix) { + return s.size() >= suffix.size() && + s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0; +} + +// On-disk Yaz0 file header. +struct Yaz0Header { + /* 0x00 */ char magic[4]; // "Yaz0" + /* 0x04 */ BE(u32) decompressedSize; + /* 0x08 */ u8 pad[8]; +}; +static_assert(sizeof(Yaz0Header) == 0x10); + +// If `bytes` is a Yaz0 stream, return the inflated payload; otherwise nullopt. +std::optional> tryDecodeYaz0(std::span bytes) { + if (bytes.size() < sizeof(Yaz0Header) || + std::memcmp(bytes.data(), "Yaz0", 4) != 0) { + return std::nullopt; + } + const auto* hdr = reinterpret_cast(bytes.data()); + const u32 expandedSize = hdr->decompressedSize; + std::vector decoded(expandedSize); + JKRDecomp::decodeSZS(const_cast(bytes.data()), decoded.data(), + expandedSize, 0); + return decoded; +} + +std::optional> readWholeFile(const std::filesystem::path& path) { + std::FILE* f = std::fopen(path.string().c_str(), "rb"); + if (!f) return std::nullopt; + std::fseek(f, 0, SEEK_END); + long len = std::ftell(f); + std::fseek(f, 0, SEEK_SET); + if (len < 0) { std::fclose(f); return std::nullopt; } + std::vector buf(static_cast(len)); + size_t got = std::fread(buf.data(), 1, buf.size(), f); + std::fclose(f); + if (got != buf.size()) return std::nullopt; + return buf; +} + +// Extract the path portion under "res/" from JSystem's absolute path. +// Example: "/arcName/res/Stage/D_SB10/R00_00.arc" -> "res/Stage/D_SB10/R00_00.arc" +std::string_view extractResPath(std::string_view gcPath) { + auto p = gcPath.find("res/"); + if (p == std::string_view::npos) return {}; + return gcPath.substr(p); +} + + +// Case-insensitive ASCII suffix match — RARC archives lowercase filenames +// at build time, but our HD pack.gz preserves the original Wii-U authoring +// camelCase. Example: RARC has "coverbg.bti", pack has "coverBG.bti.gtx". +bool endsWithSuffixCI(std::string_view s, std::string_view suffix) { + if (s.size() < suffix.size()) return false; + auto toLower = [](unsigned char c) -> unsigned char { + return (c >= 'A' && c <= 'Z') ? c + ('a' - 'A') : c; + }; + const char* a = s.data() + (s.size() - suffix.size()); + for (size_t i = 0; i < suffix.size(); ++i) { + if (toLower(a[i]) != toLower(suffix[i])) return false; + } + return true; +} + +// Match an arc-relative path (e.g. "bmdr/model.bmd") against the Gfx2 entries +// in the HD pack, which look like "tex/.../.gtx". +const TmpkEntry* findGtxBySuffix(const TphdPack& pack, std::string_view arcRelPath) { + const std::string tail = "/" + std::string(arcRelPath) + ".gtx"; + for (const auto& e : pack.entries()) { + if (e.data.size() < 4 || std::memcmp(e.data.data(), "Gfx2", 4) != 0) continue; + if (endsWithSuffixCI(e.name, tail)) return &e; + } + return nullptr; +} + +// Post-deswizzle CPU expansions to RGBA8. Used for formats whose HD layout +// can't be directly sampled with a GPU view swizzle (IA4 nibble unpack, +// RGB565 16-bit), and as a fallback if R8_PC/RG8_PC view swizzle isn't +// available. GC sampling semantics: I8 -> (I,I,I,I); IA4/IA8 -> (I,I,I,A). + +std::vector expandR5G6B5toRgba8(std::span in, u32 width, u32 height) { + std::vector out(static_cast(width) * height * 4); + const size_t pixelCount = static_cast(width) * height; + for (size_t i = 0; i < pixelCount && (i * 2 + 1) < in.size(); ++i) { + // GX2 stores RGB565 pixel data in GPU-native LE + u16 px; + std::memcpy(&px, &in[i * 2], sizeof(px)); + u8 r5 = static_cast((px >> 11) & 0x1F); + u8 g6 = static_cast((px >> 5) & 0x3F); + u8 b5 = static_cast(px & 0x1F); + out[i * 4 + 0] = static_cast((r5 << 3) | (r5 >> 2)); + out[i * 4 + 1] = static_cast((g6 << 2) | (g6 >> 4)); + out[i * 4 + 2] = static_cast((b5 << 3) | (b5 >> 2)); + out[i * 4 + 3] = 0xFF; + } + return out; +} + +// IA4: high nibble = A, low nibble = I (matches aurora's GC IA4 decoder). +std::vector expandIA4toRgba8(std::span in, u32 width, u32 height) { + std::vector out(static_cast(width) * height * 4); + const size_t pixelCount = static_cast(width) * height; + for (size_t i = 0; i < pixelCount && i < in.size(); ++i) { + u8 b = in[i]; + u8 A = static_cast((b & 0xF0) | (b >> 4)); + u8 I = static_cast(((b & 0x0F) << 4) | (b & 0x0F)); + out[i * 4 + 0] = I; out[i * 4 + 1] = I; out[i * 4 + 2] = I; out[i * 4 + 3] = A; + } + return out; +} + +enum class Expansion { + None, + R5G6B5_to_RGBA8, + IA4_to_RGBA8, +}; + +struct Gx2FormatMapping { + u32 gx2Format; // GX2 surface format + u8 newGxFormat; // Aurora PC-target format + u32 bpp; // Deswizzle bits-per-pixel (per pixel, or per 4x4 block for BCn) + bool isBcn; + Expansion expansion; // Optional post-deswizzle CPU expansion +}; + +// I8/IA8 pass through as R8_PC/RG8_PC (aurora applies .rrrr/.rrrg view +// swizzle on the GPU side — half / quarter the VRAM of CPU-expanded RGBA8). +// IA4 + RGB565 need CPU expansion (nibble / 16-bit unpack). CMPR stays +// BC1_PC (compressed on the GPU). +constexpr Gx2FormatMapping kFormatMap[] = { + // gx2 fmt PC target bpp isBcn expansion + { 0x01 /* I8 */, 0x41 /* R8_PC */, 8, false, Expansion::None }, + { 0x02 /* IA4 */, 0x46 /* RGBA8_PC */, 8, false, Expansion::IA4_to_RGBA8 }, + { 0x07 /* IA8 */, 0x43 /* RG8_PC */, 16, false, Expansion::None }, + { 0x08 /* RGB565 */, 0x46 /* RGBA8_PC */, 16, false, Expansion::R5G6B5_to_RGBA8 }, + { 0x1A /* RGBA8 */, 0x46 /* RGBA8_PC */, 32, false, Expansion::None }, + { 0x31 /* CMPR */, 0x4E /* BC1_PC */, 64, true, Expansion::None }, +}; + +const Gx2FormatMapping* findFormatMapping(u32 gx2Format) { + for (const auto& m : kFormatMap) { + if (m.gx2Format == gx2Format) return &m; + } + return nullptr; +} + +std::vector applyExpansion(Expansion exp, std::vector linear, u32 w, u32 h) { + switch (exp) { + case Expansion::R5G6B5_to_RGBA8: return expandR5G6B5toRgba8(linear, w, h); + case Expansion::IA4_to_RGBA8: return expandIA4toRgba8(linear, w, h); + case Expansion::None: break; + } + return linear; +} + +// Per-mip tile-mode + pitch. Demote rule mirrored from decaf-emu's +// R600AddrLib::ComputeSurfaceMipLevelTileMode (MIT, AMD-derived) — see +// AddrLib.cpp header for the full copyright notice. +// +// R700 macro-tile size: 32 × 16 elements (BCN element = 4×4 block). +// Mips below that are demoted to Tiled1DThin1 (microtile-only, 8-element +// align). +struct MipLevelDesc { + u32 width; + u32 height; + u32 pitch; + addrlib::TileMode tileMode; +}; + +MipLevelDesc mipLevelDesc(const GtxSurface& s, u32 level, bool isBcn, u32 bpp) { + MipLevelDesc d{}; + d.width = std::max(1u, s.width >> level); + d.height = std::max(1u, s.height >> level); + d.tileMode = static_cast(s.tileMode); + + if (level == 0) { + d.pitch = s.pitch; + return d; + } + + if (d.tileMode == addrlib::TileMode::Tiled2DThin1 || + d.tileMode == addrlib::TileMode::Tiled2BThin1) { + // Mirror decaf's widthAlignFactor: when one microtile is smaller than + // the pipe interleave (256 B), the demote threshold scales up. + const u32 microTileBytes = (bpp * 64) / 8; + const u32 widthAlignFactor = (microTileBytes <= 256) ? 256 / microTileBytes : 1; + const u32 demoteWidth = widthAlignFactor * 32; + const u32 wElem = isBcn ? (d.width + 3) / 4 : d.width; + const u32 hElem = isBcn ? (d.height + 3) / 4 : d.height; + if (wElem < demoteWidth || hElem < 16) { + d.tileMode = addrlib::TileMode::Tiled1DThin1; + } + } + + const bool is1D = (d.tileMode == addrlib::TileMode::Tiled1DThin1 || + d.tileMode == addrlib::TileMode::Tiled1DThick); + const u32 alignment = is1D ? 8u : 32u; + u32 levelPitch = std::max(1u, s.pitch >> level); + levelPitch = ((levelPitch + alignment - 1) / alignment) * alignment; + d.pitch = levelPitch; + return d; +} + +// Slice the bytes for a single mip level. Wii-U quirk: mipOffsets[0] is +// often image_size, not a mipData offset. Level 1 +// always starts at 0 in mipData; level >= 2 uses mipOffsets[level - 1]. +std::span mipLevelData(const GtxSurface& s, u32 level) { + if (level == 0) return s.baseData; + if (level >= s.mipCount) return {}; + + u32 start = 0; + if (level >= 2 && level - 1 < s.mipOffsets.size()) { + start = s.mipOffsets[level - 1]; + } + if (start >= s.mipData.size()) return {}; + + u32 end = static_cast(s.mipData.size()); + if (level + 1 < s.mipCount && level < s.mipOffsets.size()) { + const u32 next = s.mipOffsets[level]; + if (next > start && next <= s.mipData.size()) end = next; + } + return s.mipData.subspan(start, end - start); +} + +struct DeswizzleResult { + std::vector bytes; + u32 mipCount; +}; + +DeswizzleResult deswizzleAllMips(const Gx2FormatMapping& m, const GtxSurface& s) { + DeswizzleResult out{}; + const u32 maxLevels = std::min(s.mipCount, 13u); + for (u32 level = 0; level < maxLevels; ++level) { + const std::span slice = mipLevelData(s, level); + if (slice.empty()) break; + + const MipLevelDesc lvl = mipLevelDesc(s, level, m.isBcn, m.bpp); + const addrlib::SurfaceDesc desc{ + .width = lvl.width, + .height = lvl.height, + .pitch = lvl.pitch, + .bpp = m.bpp, + .tileMode = lvl.tileMode, + .swizzle = s.swizzle, + .isBcn = m.isBcn, + .isDepth = false, + }; + + auto linear = applyExpansion(m.expansion, + addrlib::deswizzle(desc, slice), + lvl.width, lvl.height); + out.bytes.insert(out.bytes.end(), linear.begin(), linear.end()); + out.mipCount = level + 1; + } + return out; +} + +void registerHdSurface(const Gx2FormatMapping& m, const GtxSurface& s, + const void* pixelPtr, std::string_view gtxName, + u32 surfaceIdx) { + auto decoded = deswizzleAllMips(m, s); + + HdLog.info("HD reg: ptr={} fmt=0x{:02X} {}x{} mips={}/{} bytes={} gtx={}[{}]", + pixelPtr, m.newGxFormat, s.width, s.height, + decoded.mipCount, s.mipCount, decoded.bytes.size(), + gtxName, surfaceIdx); + + aurora::gfx::HdReplacement r; + r.bytes = std::move(decoded.bytes); + r.width = s.width; + r.height = s.height; + r.gxFormat = m.newGxFormat; + r.mipCount = std::max(decoded.mipCount, 1u); + aurora::gfx::hd_register_replacement(pixelPtr, std::move(r)); +} + +// Lightweight RARC walker that returns per-file offsets without copying +// arc bytes — we need absolute pointers into the cached HD arc bytes +// (stable address) to match what the game later passes to GXInitTexObj. +struct ArcFileInfo { + std::string path; // e.g. "bmdr/model.bmd" + u32 dataOffset; // absolute offset from arc base + u32 dataSize; +}; + +std::vector parseRarcFiles(std::span arc) { + std::vector out; + if (arc.size() < 0x40 || std::memcmp(arc.data(), "RARC", 4) != 0) return out; + + constexpr size_t kMetaBase = sizeof(SArcHeader); // = 0x20 + if (arc.size() < kMetaBase + sizeof(SArcDataInfo)) return out; + + const auto* hdr = reinterpret_cast(arc.data()); + const auto* dataInfo = reinterpret_cast(arc.data() + kMetaBase); + + const u32 nodeCount = dataInfo->num_nodes; + const size_t nodeTbl = dataInfo->node_offset + kMetaBase; + const size_t fileTbl = dataInfo->file_entry_offset + kMetaBase; + const size_t strTbl = dataInfo->string_table_offset + kMetaBase; + const size_t dataBase = kMetaBase + hdr->file_data_offset; + + auto readStringAt = [&](u32 offset) -> std::string { + const u8* start = arc.data() + strTbl + offset; + const u8* bufferEnd = arc.data() + arc.size(); + if (start >= bufferEnd) return {}; + + const void* nul = std::memchr(start, 0, + static_cast(bufferEnd - start)); + const u8* terminator = nul ? static_cast(nul) : bufferEnd; + return std::string(reinterpret_cast(start), + static_cast(terminator - start)); + }; + + const auto* nodes = reinterpret_cast( + arc.data() + nodeTbl); + const auto* files = reinterpret_cast( + arc.data() + fileTbl); + + for (u32 ni = 0; ni < nodeCount; ++ni) { + const auto& node = nodes[ni]; + const std::string dirName = readStringAt(node.name_offset); + const u16 fc = node.num_entries; + const u32 firstIdx = node.first_file_index; + const bool isRoot = (ni == 0); + + for (u32 fi = 0; fi < fc; ++fi) { + const auto& entry = files[firstIdx + fi]; + const u32 typeFlagsAndName = entry.type_flags_and_name_offset; + const u8 typeFlags = static_cast(typeFlagsAndName >> 24); + // Bit 0x01 = file, 0x02 = directory. We only want files. + if ((typeFlags & 0x03) != 0x01) continue; + + std::string fname = readStringAt(typeFlagsAndName & 0xFFFFFF); + if (fname == "." || fname == "..") continue; + + out.push_back({ + (!isRoot && !dirName.empty()) + ? dirName + "/" + fname + : std::move(fname), + static_cast(dataBase + entry.data_offset), + entry.data_size, + }); + } + } + return out; +} + +// Absolute offset of slot `slotIdx`'s BTI header within a BMD's TEX1 block. +// Returns 0 on failure (the TEX1 table never sits at offset 0, so 0 is a +// safe sentinel). +u32 bmdSlotBtiOffset(std::span bmd, u32 slotIdx) { + constexpr size_t kBlocksOffset = offsetof(J3DModelFileData, mBlocks); // = 0x20 + if (bmd.size() < kBlocksOffset || + std::memcmp(bmd.data(), "J3D2", 4) != 0) return 0; + + const auto* fileData = reinterpret_cast(bmd.data()); + const u32 numSections = fileData->mBlockNum; + size_t pos = kBlocksOffset; + + for (u32 i = 0; i < numSections && pos + sizeof(J3DModelBlock) <= bmd.size(); ++i) { + const auto* blk = reinterpret_cast(bmd.data() + pos); + const u32 blockSize = blk->mBlockSize; + if (blk->mBlockType == 'TEX1') { + const auto* tex1 = reinterpret_cast(bmd.data() + pos); + const u16 numTex = tex1->mTextureNum; + if (slotIdx >= numTex) return 0; + const size_t btiAbs = pos + static_cast(tex1->mpTextureRes) + slotIdx * 0x20; + if (btiAbs + 0x20 > bmd.size()) return 0; + return static_cast(btiAbs); + } + if (blockSize == 0) break; + pos += blockSize; + } + return 0; +} + +// Walk the HD arc, pair BMDs with their pack.gz GTX entries, deswizzle each +// HD surface, and register the decoded bytes with aurora under the absolute +// pointer that GXInitTexObj will later receive. +// +// arcBytes must be the STABLE cache vector — its data() must not move after +// this call, or aurora's pointer lookups will miss. +void registerHdTexturesForArc(std::vector& arcBytes, + const std::vector& files, + const TphdPack& pack, + std::string_view arcLabel) { + size_t bmdReg = 0; + size_t btiReg = 0; + + // Phase A: per-slot textures inside BMD/BDL models. + for (const auto& f : files) { + if (!endsWithSuffix(f.path, ".bmd") && !endsWithSuffix(f.path, ".bdl")) continue; + + const TmpkEntry* gtx = findGtxBySuffix(pack, f.path); + if (!gtx) continue; + + std::span bmdBytes(arcBytes.data() + f.dataOffset, f.dataSize); + auto surfaces = parseGtx(gtx->data); + + for (u32 i = 0; i < surfaces.size(); ++i) { + const auto& s = surfaces[i]; + if (s.baseData.empty()) continue; + + const Gx2FormatMapping* m = findFormatMapping(s.format); + if (!m) continue; + + // HD-stub BMDs collapse every BTI's imageOffset to the same + // pixel address. Rewrite each to be slot-unique so our pointer + // map doesn't overwrite. + const u32 btiAbs = bmdSlotBtiOffset(bmdBytes, i); + if (btiAbs == 0) continue; + + auto* timg = reinterpret_cast( + arcBytes.data() + f.dataOffset + btiAbs); + if (timg->imageOffset == 0) { + HdLog.debug("Skip cross-arc placeholder slot {} in {}: " + "imageOffset==0", + i, gtx->name); + continue; + } + + const u32 newImgOff = 0x20 + i * 0x20; + timg->imageOffset = static_cast(newImgOff); + registerHdSurface(*m, s, + arcBytes.data() + f.dataOffset + btiAbs + newImgOff, + gtx->name, i); + ++bmdReg; + } + } + + // Phase B: standalone .bti files. Each BTI is its own arc entry; the + // game loads it via JUTTexture (or similar) which calls GXInitTexObj + // with `(u8*)resTIMG + imageOffset`. Register that exact pointer. + for (const auto& f : files) { + if (!endsWithSuffix(f.path, ".bti")) continue; + if (f.dataSize < 0x20) continue; + + const TmpkEntry* gtx = findGtxBySuffix(pack, f.path); + if (!gtx) continue; + + auto surfaces = parseGtx(gtx->data); + if (surfaces.empty()) continue; + const auto& s = surfaces[0]; + if (s.baseData.empty()) continue; + + const Gx2FormatMapping* m = findFormatMapping(s.format); + if (!m) continue; + + // HD-stub BTIs put garbage in imageOffset. Write 0x20 so BOTH + // consumer paths land on the same address (JUTTexture::storeTIMG and + // direct-access helpers like dKyr_set_btitex_common). Both compute + // i_img + 0x20, matching where we register below. + auto* timg = reinterpret_cast(arcBytes.data() + f.dataOffset); + timg->imageOffset = 0x20; + registerHdSurface(*m, s, arcBytes.data() + f.dataOffset + 0x20, + gtx->name, 0); + ++btiReg; + } + + HdLog.info("registerHdTextures[{}]: {} BMD-slot, {} standalone-BTI replacements", + arcLabel, bmdReg, btiReg); +} + +} + +void setHdContentPath(std::filesystem::path contentPath) { + g_contentPath = std::move(contentPath); + std::lock_guard lk(g_cacheMutex); + g_mountBuffers().clear(); + g_entryNumToBytes().clear(); + aurora::gfx::hd_clear_replacements(); + aurora::gfx::hd_clear_arc_ranges(); + HdLog.info("HD content path set to: {}", + g_contentPath.empty() ? "(disabled)" : g_contentPath.string()); +} + +// HD arcs whose Wii-U layouts don't match the GC UI pipeline. +static constexpr std::string_view kHdSkipList[] = { + "res/Layout/button.arc", + "res/Layout/Title2D.arc", + "res/Layout/main2D.arc", +}; + +std::optional*> tryLoadHdArchive(std::string_view gcPath) { + if (g_contentPath.empty()) return std::nullopt; + + std::string_view resPath = extractResPath(gcPath); + if (resPath.empty()) return std::nullopt; + + for (auto skip : kHdSkipList) { + if (resPath == skip) return std::nullopt; + } + + std::filesystem::path hdArcPath = g_contentPath / std::string(resPath); + if (!std::filesystem::exists(hdArcPath)) { + return std::nullopt; // no HD override — vanilla GC path + } + + auto hdBytesOpt = readWholeFile(hdArcPath); + if (!hdBytesOpt) { + HdLog.warn("HD arc read failed: {}", hdArcPath.string()); + return std::nullopt; + } + + if (auto inflated = tryDecodeYaz0(*hdBytesOpt)) { + HdLog.info("HD arc Yaz0-decompressed: {} -> {} bytes", + hdArcPath.filename().string(), inflated->size()); + hdBytesOpt = std::move(inflated); + } + + auto hdFiles = parseRarcFiles(std::span( + hdBytesOpt->data(), hdBytesOpt->size())); + const bool hasReplaceableContent = std::any_of(hdFiles.begin(), hdFiles.end(), + [](const ArcFileInfo& f) { + return endsWithSuffix(f.path, ".bmd") || + endsWithSuffix(f.path, ".bdl") || + endsWithSuffix(f.path, ".bti") || + endsWithSuffix(f.path, ".bfn") || + endsWithSuffix(f.path, ".brfnt"); + }); + if (!hasReplaceableContent) { + HdLog.info("HD arc {} has no replaceable assets — skipping", + hdArcPath.filename().string()); + return std::nullopt; + } + + // Sidecar pack.gz holds the HD textures. + auto hdPackPath = hdArcPath; + hdPackPath.replace_extension(".pack.gz"); + std::optional hdPack; + if (std::filesystem::exists(hdPackPath)) { + hdPack = TphdPack::loadFromFile(hdPackPath); + if (!hdPack) HdLog.warn("HD pack failed to load: {}", hdPackPath.string()); + } + + // std::list keeps element addresses stable for aurora's pointer map. + std::vector* mountBytes; + std::string filename = hdArcPath.filename().string(); + { + std::lock_guard lk(g_cacheMutex); + g_mountBuffers().emplace_back(std::move(*hdBytesOpt)); + mountBytes = &g_mountBuffers().back(); + } + + HdLog.info("HD arc mount buffer allocated: {} at {} ({} bytes, pack.gz={})", + filename, static_cast(mountBytes->data()), + mountBytes->size(), hdPack ? "yes" : "no"); + + aurora::gfx::hd_register_arc_range(mountBytes->data(), mountBytes->size(), + filename); + if (hdPack) { + registerHdTexturesForArc(*mountBytes, hdFiles, *hdPack, filename); + } + + return mountBytes; +} + +void registerHdBytesForEntryNum(s32 entryNum, const std::vector* bytes) { + if (entryNum < 0 || bytes == nullptr) return; + std::lock_guard lk(g_cacheMutex); + g_entryNumToBytes()[entryNum] = bytes; +} + +const std::vector* getHdBytesForEntryNum(s32 entryNum) { + if (entryNum < 0) return nullptr; + std::lock_guard lk(g_cacheMutex); + auto it = g_entryNumToBytes().find(entryNum); + return (it != g_entryNumToBytes().end()) ? it->second : nullptr; +} + +} diff --git a/src/dusk/tphd/HdAssetLayer.hpp b/src/dusk/tphd/HdAssetLayer.hpp new file mode 100644 index 0000000000..8361bf7e87 --- /dev/null +++ b/src/dusk/tphd/HdAssetLayer.hpp @@ -0,0 +1,30 @@ +#ifndef DUSK_TPHD_HD_ASSET_LAYER_HPP +#define DUSK_TPHD_HD_ASSET_LAYER_HPP + +#include +#include +#include +#include + +#include + +namespace dusk::tphd { + +// Configure the base directory for HD asset overrides. `contentPath` should +// point at a Wii-U `content/` directory (the parent of `res/`). Empty path +// disables HD overrides. +void setHdContentPath(std::filesystem::path contentPath); + +// Returns a pointer to the cached HD archive bytes if an HD variant exists +// for the requested GC path, or std::nullopt otherwise. Caller must not +// outlive the next setHdContentPath() call. +std::optional*> tryLoadHdArchive(std::string_view gcPath); + +// HD bytes lookup by DVD entry number, used by JKRMemArchive's entryNum +// constructor to substitute HD content. +void registerHdBytesForEntryNum(s32 entryNum, const std::vector* bytes); +const std::vector* getHdBytesForEntryNum(s32 entryNum); + +} + +#endif diff --git a/src/dusk/tphd/TphdPack.cpp b/src/dusk/tphd/TphdPack.cpp new file mode 100644 index 0000000000..d4301c55be --- /dev/null +++ b/src/dusk/tphd/TphdPack.cpp @@ -0,0 +1,120 @@ +#include "TphdPack.hpp" + +#include + +#include +#include + +#include "dusk/endian.h" +#include "dusk/logging.h" + +static aurora::Module TphdLog("dusk::tphd"); + +namespace dusk::tphd { + +namespace { + +std::optional> readFile(const std::filesystem::path& path) { + std::FILE* f = std::fopen(path.string().c_str(), "rb"); + if (!f) return std::nullopt; + std::fseek(f, 0, SEEK_END); + long len = std::ftell(f); + std::fseek(f, 0, SEEK_SET); + if (len < 0) { std::fclose(f); return std::nullopt; } + std::vector buf(static_cast(len)); + size_t got = std::fread(buf.data(), 1, buf.size(), f); + std::fclose(f); + if (got != buf.size()) return std::nullopt; + return buf; +} + +} + +std::optional> decompressGzip(std::span in) { + if (in.size() < 18) return std::nullopt; + if (in[0] != 0x1F || in[1] != 0x8B) return std::nullopt; + + u32 isize; + std::memcpy(&isize, in.data() + in.size() - 4, sizeof(isize)); + std::vector out(isize); + + z_stream strm{}; + strm.next_in = const_cast(in.data()); + strm.avail_in = static_cast(in.size()); + strm.next_out = out.data(); + strm.avail_out = static_cast(out.size()); + + if (inflateInit2(&strm, 15 + 16) != Z_OK) return std::nullopt; + int rc = inflate(&strm, Z_FINISH); + inflateEnd(&strm); + if (rc != Z_STREAM_END) return std::nullopt; + return out; +} + +std::vector parseTmpk(std::span in) { + std::vector out; + if (in.size() < sizeof(TmpkRawHeader)) return out; + + const auto* hdr = reinterpret_cast(in.data()); + if (std::memcmp(hdr->magic, "TMPK", 4) != 0) return out; + + const u32 count = hdr->count; + if (in.size() < sizeof(TmpkRawHeader) + count * sizeof(TmpkRawEntry)) return out; + + const auto* entries = reinterpret_cast( + in.data() + sizeof(TmpkRawHeader)); + + out.reserve(count); + for (u32 i = 0; i < count; ++i) { + const u32 nameOff = entries[i].nameOff; + const u32 dataOff = entries[i].dataOff; + const u32 dataSize = entries[i].dataSize; + const u32 flags = entries[i].flags; + + if (nameOff >= in.size() || dataOff + dataSize > in.size()) continue; + + const char* nameStart = reinterpret_cast(in.data() + nameOff); + size_t maxLen = in.size() - nameOff; + const void* nul = std::memchr(nameStart, 0, maxLen); + size_t nameLen = nul ? static_cast(static_cast(nul) - nameStart) + : maxLen; + + out.push_back({ + std::string_view(nameStart, nameLen), + in.subspan(dataOff, dataSize), + flags, + }); + } + return out; +} + +std::optional TphdPack::loadFromMemory(std::span gzipBytes) { + auto inflated = decompressGzip(gzipBytes); + if (!inflated) return std::nullopt; + + TphdPack p; + p.m_buffer = std::move(*inflated); + p.m_entries = parseTmpk(std::span(p.m_buffer.data(), p.m_buffer.size())); + if (p.m_entries.empty() && !p.m_buffer.empty()) { + TphdLog.warn("TMPK parse yielded 0 entries (buffer size {})", p.m_buffer.size()); + } + return p; +} + +std::optional TphdPack::loadFromFile(const std::filesystem::path& path) { + auto raw = readFile(path); + if (!raw) { + TphdLog.error("Failed to read {}", path.string()); + return std::nullopt; + } + return loadFromMemory(*raw); +} + +const TmpkEntry* TphdPack::find(std::string_view name) const { + for (const auto& e : m_entries) { + if (e.name == name) return &e; + } + return nullptr; +} + +} diff --git a/src/dusk/tphd/TphdPack.hpp b/src/dusk/tphd/TphdPack.hpp new file mode 100644 index 0000000000..fdc6272ac3 --- /dev/null +++ b/src/dusk/tphd/TphdPack.hpp @@ -0,0 +1,59 @@ +#ifndef DUSK_TPHD_TPHD_PACK_HPP +#define DUSK_TPHD_TPHD_PACK_HPP + +#include +#include +#include +#include +#include + +#include + +#include "dusk/endian.h" + +namespace dusk::tphd { + +// On-disk TMPK layout. +struct TmpkRawHeader { + /* 0x00 */ char magic[4]; // "TMPK" + /* 0x04 */ BE(u32) count; + /* 0x08 */ u8 pad[8]; +}; +static_assert(sizeof(TmpkRawHeader) == 0x10); + +struct TmpkRawEntry { + /* 0x00 */ BE(u32) nameOff; + /* 0x04 */ BE(u32) dataOff; + /* 0x08 */ BE(u32) dataSize; + /* 0x0C */ BE(u32) flags; +}; +static_assert(sizeof(TmpkRawEntry) == 0x10); + +// Parsed TMPK entry: a view into the inflated pack buffer. +struct TmpkEntry { + std::string_view name; + std::span data; + u32 flags; +}; + +class TphdPack { +public: + static std::optional loadFromFile(const std::filesystem::path& path); + static std::optional loadFromMemory(std::span gzipBytes); + + const std::vector& entries() const { return m_entries; } + const TmpkEntry* find(std::string_view name) const; + +private: + TphdPack() = default; + + std::vector m_buffer; + std::vector m_entries; +}; + +std::optional> decompressGzip(std::span in); +std::vector parseTmpk(std::span in); + +} + +#endif diff --git a/src/f_op/f_op_actor_mng.cpp b/src/f_op/f_op_actor_mng.cpp index 208e868db2..25214c93bc 100644 --- a/src/f_op/f_op_actor_mng.cpp +++ b/src/f_op/f_op_actor_mng.cpp @@ -730,7 +730,10 @@ u8 var_r30 = fopAcM::HeapAdjustEntry; #endif u32 size = i_size & 0xFFFFFF; -#if TARGET_PC +#if DUSK_TPHD + // With TP-HD asset overlays, individual BMDs can be 5-10x their GC originals. + size *= 8; +#elif TARGET_PC size *= 2; #endif bool result = fopAcM_entrySolidHeap_(i_actor, i_heapCallback, size); diff --git a/src/m_Do/m_Do_dvd_thread.cpp b/src/m_Do/m_Do_dvd_thread.cpp index 08197ed30b..1851b14669 100644 --- a/src/m_Do/m_Do_dvd_thread.cpp +++ b/src/m_Do/m_Do_dvd_thread.cpp @@ -16,6 +16,10 @@ #include "m_Do/m_Do_ext.h" #include "os_report.h" +#if DUSK_TPHD +#include "dusk/tphd/HdAssetLayer.hpp" +#endif + s32 mDoDvdThd::main(void* param_0) { JKRThread(OSGetCurrentThread(), 0); #if TARGET_PC @@ -107,6 +111,14 @@ static s32 my_DVDConvertPathToEntrynum(char const* path) { JUT_WARN(437, "can\'t open:[%s]\n", path); } } +#endif +#if DUSK_TPHD + // TPHD arc redirect: cache HD bytes by entry number. + if (entrynum >= 0 && path != NULL) { + if (auto hdBytes = dusk::tphd::tryLoadHdArchive(path)) { + dusk::tphd::registerHdBytesForEntryNum(entrynum, *hdBytes); + } + } #endif return entrynum; } diff --git a/src/m_Do/m_Do_machine.cpp b/src/m_Do/m_Do_machine.cpp index a8f4f6140f..db6beeacfd 100644 --- a/src/m_Do/m_Do_machine.cpp +++ b/src/m_Do/m_Do_machine.cpp @@ -813,6 +813,16 @@ int mDoMch_Create() { gameHeapSize += 0x200000; gameHeapSize += 0x100000; dynamicLinkHeapSize = 0x180000; +#if DUSK_TPHD + // HD assets ship much larger archives/actors (CMPR texture injection + // pushes stage BMDs past 8 MB and Link's Kmdl past 5 MB). Parent arena is + // 1 GB on PC, distribute generously across every heap that holds model + // or animation data at runtime. + archiveHeapSize += 0x08000000; // +128 MB (large RARCs) + gameHeapSize += 0x10000000; // +256 MB (parent of per-actor heaps like "Alink original") + j2dHeapSize += 0x01000000; // +16 MB (UI textures) + dynamicLinkHeapSize += 0x01000000; // +16 MB +#endif #if !DEBUG // Fakematch because the heap sizes differ between debug and retail. @@ -862,7 +872,11 @@ int mDoMch_Create() { #endif archiveHeapSize *= 2; j2dHeapSize *= 2; +#if DUSK_TPHD + gameHeapSize *= 2; +#else gameHeapSize *= 20; // NOTE: increased from 2 to 20 to try to solve heap alloc crashes. maybe do a better fix later +#endif #endif JFWSystem::setSysHeapSize(arenaSize); diff --git a/src/m_Do/m_Do_main.cpp b/src/m_Do/m_Do_main.cpp index 8719998bd5..ad6604e9c7 100644 --- a/src/m_Do/m_Do_main.cpp +++ b/src/m_Do/m_Do_main.cpp @@ -79,6 +79,10 @@ #include "dusk/settings.h" #include "dusk/version.hpp" #include "dusk/discord_presence.hpp" +#if DUSK_TPHD +#include "dusk/tphd/HdAssetLayer.hpp" +#include "dusk/tphd/TphdPack.hpp" +#endif #include "tracy/Tracy.hpp" #include "f_pc/f_pc_draw.h" #include "tracy/Tracy.hpp" @@ -569,7 +573,14 @@ int game_main(int argc, char* argv[]) { config.desiredBackend = ResolveDesiredBackend(parsed_arg_options); config.logCallback = &aurora_log_callback; config.logLevel = startupLogLevel; + // 256 MB is GC-sized. HD-asset injection (HD BMDs + HD pixel buffers + // registered via aurora::gfx::hd_register_replacement) blows past + // that for stages+Link, so bump to 1 GB when TPHD is on. +#if DUSK_TPHD + config.mem1Size = 1024 * 1024 * 1024; +#elif TARGET_PC config.mem1Size = 256 * 1024 * 1024; +#endif config.mem2Size = 24 * 1024 * 1024; config.allowJoystickBackgroundEvents = true; config.imGuiInitCallback = &aurora_imgui_init_callback; @@ -694,6 +705,15 @@ int game_main(int argc, char* argv[]) { OSInit(); +#if DUSK_TPHD + { + const std::string& hdPath = dusk::getSettings().backend.hdContentPath; + if (!hdPath.empty()) { + dusk::tphd::setHdContentPath(hdPath); + } + } +#endif + mDoMain::sPowerOnTime = OSGetTime(); // Reset Data