Lurs' squashed commits

* first tphd wip

* fix CMakeLists.txt after rebase

* fix mipmapping (I hope) thanks to decaf-emu. Skipped a few textures in favor of GC assets and added new HD asset file formats

* added third hook into dusk for second JKRMemArchive constructor (e.g. for sign textures). skip texture load for textures with imageoffset = 0 to get STG.arc loaded instead. And small refactorings/rebasings. added a few parameters in logging
This commit is contained in:
Lurs
2026-04-26 11:38:14 +02:00
committed by Irastris
parent 81c7213993
commit aa23ae244f
28 changed files with 1740 additions and 8 deletions
+1 -1
View File
@@ -1,3 +1,3 @@
[submodule "extern/aurora"]
path = extern/aurora
url = https://github.com/encounter/aurora.git
url = https://github.com/Lurs/aurora.git
+20 -2
View File
@@ -101,6 +101,10 @@ endif ()
set(AURORA_ENABLE_DVD ON CACHE BOOL "Enable DVD API support" FORCE)
set(AURORA_ENABLE_CARD ON CACHE BOOL "Enable CARD API support" FORCE)
set(AURORA_ENABLE_RMLUI ON CACHE BOOL "Enable RmlUi UI support" FORCE)
# Force Freetype (pulled by aurora/RmlUi) to use its bundled mini-zlib.
# Otherwise its find_package(ZLIB) latches onto our FetchContent'd shared zlib,
# duplicating inflate*/inflateInit2_ symbols against zlibstatic.
set(FT_DISABLE_ZLIB TRUE CACHE BOOL "" FORCE)
add_subdirectory(extern/aurora EXCLUDE_FROM_ALL)
target_compile_definitions(aurora_mtx PRIVATE MTX_USE_PS=1)
@@ -115,6 +119,7 @@ else ()
set(DUSK_ENABLE_UPDATE_CHECKER_DEFAULT ON)
endif ()
option(DUSK_ENABLE_UPDATE_CHECKER "Enable update checking support" ${DUSK_ENABLE_UPDATE_CHECKER_DEFAULT})
option(DUSK_TPHD "Enable Twilight Princess HD asset support" ON)
if(ANDROID)
set(DUSK_MOVIE_SUPPORT OFF)
@@ -218,7 +223,6 @@ elseif (MSVC)
add_compile_options($<$<COMPILE_LANGUAGE:C,CXX>:/utf-8>)
endif ()
include(FetchContent)
# Declare all dependencies first so CMake can download them in parallel
@@ -234,7 +238,17 @@ FetchContent_Declare(json
URL_HASH SHA256=42f6e95cad6ec532fd372391373363b62a14af6d771056dbfc86160e6dfff7aa
DOWNLOAD_EXTRACT_TIMESTAMP TRUE
)
FetchContent_MakeAvailable(cxxopts json)
message(STATUS "dusk: Fetching zlib")
FetchContent_Declare(zlib
URL https://github.com/madler/zlib/releases/download/v1.3.2/zlib-1.3.2.tar.gz
URL_HASH SHA256=bb329a0a2cd0274d05519d61c667c062e06990d72e125ee2dfa8de64f0119d16
DOWNLOAD_EXTRACT_TIMESTAMP TRUE
)
FetchContent_MakeAvailable(cxxopts json zlib)
if (NOT TARGET zlibstatic)
set(ZLIB_BUILD_EXAMPLES OFF CACHE BOOL "" FORCE)
endif ()
if (DUSK_ENABLE_SENTRY_NATIVE)
message(STATUS "dusk: Fetching sentry-native")
@@ -293,6 +307,9 @@ source_group("dolzel" FILES ${DOLZEL_FILES} ${Z2AUDIOLIB_FILES} ${REL_FILES})
source_group("dusk" FILES ${DUSK_FILES} ${DUSK_HTTP_BACKEND_FILES})
set(GAME_COMPILE_DEFS TARGET_PC WIDESCREEN_SUPPORT=1 AVOID_UB=1 VERSION=0 MTX_USE_PS=1)
if (DUSK_TPHD)
list(APPEND GAME_COMPILE_DEFS DUSK_TPHD=1)
endif ()
set(GAME_INCLUDE_DIRS
include
@@ -310,6 +327,7 @@ set(GAME_LIBS aurora::core aurora::gx aurora::gd aurora::si aurora::vi aurora::p
Threads::Threads)
list(APPEND GAME_LIBS libzstd_static)
list(APPEND GAME_LIBS zlibstatic)
if (DUSK_ENABLE_SENTRY_NATIVE)
list(APPEND GAME_LIBS sentry)
+1 -1
+12
View File
@@ -246,6 +246,18 @@ set(DOLZEL_FILES
if(DEBUG)
list(APPEND DOLZEL_FILES src/d/d_event_debug.cpp)
endif(DEBUG)
if (DUSK_TPHD)
list(APPEND DOLZEL_FILES
src/dusk/tphd/TphdPack.hpp
src/dusk/tphd/TphdPack.cpp
src/dusk/tphd/GtxParser.hpp
src/dusk/tphd/GtxParser.cpp
src/dusk/tphd/AddrLib.hpp
src/dusk/tphd/AddrLib.cpp
src/dusk/tphd/HdAssetLayer.hpp
src/dusk/tphd/HdAssetLayer.cpp
)
endif(DUSK_TPHD)
set(Z2AUDIOLIB_FILES
src/Z2AudioLib/Z2Calc.cpp
+3
View File
@@ -175,6 +175,9 @@ struct UserSettings {
ConfigVar<bool> checkForUpdates;
ConfigVar<int> cardFileType;
ConfigVar<bool> enableAdvancedSettings;
#if DUSK_TPHD
ConfigVar<std::string> hdContentPath; // path to TP-HD decrypted "content" folder
#endif
} backend;
};
@@ -85,6 +85,33 @@ struct J3DDisplayListInit {
/* 0x4 */ BE(u32) field_0x4;
}; // size 8
#if DUSK_TPHD
// MAT4 material-entry blocks have 2 trailing bytes per entry compared to MAT3.
// This strided view skips over the extras at indexing time, leaving the
// in-memory data untouched.
class J3DMaterialInitDataView {
public:
J3DMaterialInitDataView() : mpData(NULL), mStride(sizeof(J3DMaterialInitData)) {}
void set(const void* data, u32 stride) {
mpData = (u8*)data;
mStride = stride;
}
J3DMaterialInitData& operator[](int idx) {
return *(J3DMaterialInitData*)(mpData + (idx * mStride));
}
J3DMaterialInitData& operator[](int idx) const {
return *(J3DMaterialInitData*)(mpData + (idx * mStride));
}
private:
u8* mpData;
u32 mStride;
};
#endif
struct J3DTexCoord2Info;
class J3DCurrentMtxInfo;
@@ -146,7 +173,11 @@ public:
u8 getMaterialMode(int idx) const { return mpMaterialInitData[mpMaterialID[idx]].mMaterialMode; }
/* 0x00 */ u16 mMaterialNum;
#if DUSK_TPHD
/* 0x04 */ J3DMaterialInitDataView mpMaterialInitData;
#else
/* 0x04 */ J3DMaterialInitData* mpMaterialInitData;
#endif
/* 0x08 */ BE(u16)* mpMaterialID;
/* 0x0C */ J3DIndInitData* mpIndInitData;
/* 0x10 */ GXColor* mpMatColor;
@@ -13,7 +13,15 @@
J3DMaterialFactory::J3DMaterialFactory(J3DMaterialBlock const& i_block) {
mMaterialNum = i_block.mMaterialNum;
#if DUSK_TPHD
u32 material_init_data_size = sizeof(J3DMaterialInitData);
if (i_block.mBlockType == 'MAT4') {
material_init_data_size += sizeof(u16);
}
mpMaterialInitData.set(JSUConvertOffsetToPtr<void>(&i_block, i_block.mpMaterialInitData), material_init_data_size);
#else
mpMaterialInitData = JSUConvertOffsetToPtr<J3DMaterialInitData>(&i_block, i_block.mpMaterialInitData);
#endif
mpMaterialID = JSUConvertOffsetToPtr<BE(u16)>(&i_block, i_block.mpMaterialID);
if (i_block.mpIndInitData != (uintptr_t)NULL && (uintptr_t)i_block.mpIndInitData - (uintptr_t)i_block.mpNameTable > 4) {
mpIndInitData = JSUConvertOffsetToPtr<J3DIndInitData>(&i_block, i_block.mpIndInitData);
@@ -54,7 +62,11 @@ J3DMaterialFactory::J3DMaterialFactory(J3DMaterialBlock const& i_block) {
J3DMaterialFactory::J3DMaterialFactory(J3DMaterialDLBlock const& i_block) {
mMaterialNum = i_block.mMaterialNum;
#if DUSK_TPHD
mpMaterialInitData.set(NULL, sizeof(J3DMaterialInitData));
#else
mpMaterialInitData = NULL;
#endif
mpDisplayListInit = JSUConvertOffsetToPtr<J3DDisplayListInit>(&i_block, i_block.mpDisplayListInit);
mpPatchingInfo = JSUConvertOffsetToPtr<J3DPatchingInfo>(&i_block, i_block.mpPatchingInfo);
mpCurrentMtxInfo = JSUConvertOffsetToPtr<J3DCurrentMtxInfo>(&i_block, i_block.mpCurrentMtxInfo);
@@ -103,6 +103,9 @@ J3DModelData* J3DModelLoader::load(void const* i_data, u32 i_flags) {
readJoint((J3DJointBlock*)block);
break;
case 'MAT3':
#if DUSK_TPHD
case 'MAT4': // TODO: Real MAT4 support
#endif
readMaterial((J3DMaterialBlock*)block, (s32)i_flags);
break;
case 'MAT2':
@@ -147,6 +150,9 @@ J3DMaterialTable* J3DModelLoader::loadMaterialTable(void const* i_data) {
for (u32 block_no = 0; block_no < data->mBlockNum; block_no++) {
switch (block->mBlockType) {
case 'MAT3':
#if DUSK_TPHD
case 'MAT4': // TODO: Real MAT4 support
#endif
readMaterialTable((J3DMaterialBlock*)block, flags);
break;
case 'MAT2':
@@ -212,6 +218,9 @@ J3DModelData* J3DModelLoader::loadBinaryDisplayList(void const* i_data, u32 i_fl
modifyMaterial(i_flags);
break;
case 'MAT3':
#if DUSK_TPHD
case 'MAT4': // TODO: Real MAT4 support
#endif
flags = 0x50100000;
flags |= (i_flags & 0x3000000);
mpMaterialBlock = (J3DMaterialBlock*)block;
@@ -308,8 +317,22 @@ static GXVtxAttrFmtList getFmt(GXVtxAttrFmtList* i_fmtList, GXAttr i_attr) {
return *i_fmtList;
}
}
#if DUSK_TPHD
// HD BMDs occasionally have vertex arrays without a format entry. Mirror
// the GC runtime: fall back to J3DSys::initGX defaults.
GXVtxAttrFmtList def{};
def.attr = i_attr;
def.frac = 0;
if (i_attr == GX_VA_POS) { def.cnt = GX_POS_XYZ; def.type = GX_F32; }
else if (i_attr == GX_VA_NRM) { def.cnt = GX_NRM_XYZ; def.type = GX_F32; }
else if (i_attr == GX_VA_NBT) { def.cnt = GX_NRM_NBT; def.type = GX_F32; }
else if (i_attr >= GX_VA_CLR0 &&
i_attr <= GX_VA_CLR1) { def.cnt = GX_CLR_RGBA; def.type = GX_RGBA8; }
else { def.cnt = GX_TEX_ST; def.type = GX_F32; }
return def;
#else
OSPanic(__FILE__, __LINE__, "Unable to find vertex attribute format!");
#endif
}
#endif
@@ -16,7 +16,8 @@ u16 J3DModelLoader::countMaterialNum(const void* stream) {
const J3DModelBlock* block = header->mBlocks;
for (int i = 0; i < header->mBlockNum; i++)
{
if (block->mBlockType == 'MAT3') {
// TODO: Real MAT4 support
if (block->mBlockType == 'MAT3' || block->mBlockType == 'MAT4') {
const J3DMaterialBlock* materialBlock = (const J3DMaterialBlock*)block;
return materialBlock->mMaterialNum;
}
@@ -45,6 +46,8 @@ u32 J3DModelLoader::calcLoadSize(void const* stream, u32 flags_) {
size += calcSizeJoint((const J3DJointBlock*)nextBlock);
break;
case 'MAT3':
case 'MAT4':
// TODO: Real MAT4 support
size += calcSizeMaterial((const J3DMaterialBlock*)nextBlock, flags);
break;
case 'SHP1':
@@ -86,6 +89,8 @@ u32 J3DModelLoader::calcLoadMaterialTableSize(const void* stream) {
for (u32 i = 0; i < header->mBlockNum; i++) {
switch (nextBlock->mBlockType) {
case 'MAT3':
case 'MAT4':
// TODO: Real MAT4 support
size += calcSizeMaterialTable((const J3DMaterialBlock*)nextBlock, flags);
break;
case 'TEX1':
@@ -135,7 +140,9 @@ u32 J3DModelLoader::calcLoadBinaryDisplayListSize(const void* stream, u32 flags)
case 'MDL3':
size += calcSizeMaterialDL((const J3DMaterialDLBlock*)nextBlock, flags);
break;
case 'MAT3': {
case 'MAT3':
case 'MAT4': {
// TODO: Real MAT4 support
u32 flags2 = (J3DMLF_21 | J3DMLF_Material_PE_Full | J3DMLF_Material_Color_LightOn);
flags2 |= (u32)flags & (J3DMLF_Material_UseIndirect | J3DMLF_26);
mpMaterialBlock = (const J3DMaterialBlock*)nextBlock;
@@ -9,6 +9,10 @@
#include "JSystem/JKernel/JKRMemArchive.h"
#include "JSystem/JUtility/JUTAssert.h"
#if TARGET_PC && DUSK_TPHD
#include "dusk/tphd/HdAssetLayer.hpp"
#endif
JKRArchive* JKRArchive::check_mount_already(s32 entryNum, JKRHeap* heap) {
if (heap == NULL) {
heap = JKRGetCurrentHeap();
@@ -29,6 +33,15 @@ JKRArchive* JKRArchive::check_mount_already(s32 entryNum, JKRHeap* heap) {
JKRArchive* JKRArchive::mount(const char* path, EMountMode mountMode, JKRHeap* heap,
EMountDirection mountDirection) {
#if TARGET_PC && DUSK_TPHD
// TPHD arc redirect.
if (path != NULL) {
if (auto hdBuf = dusk::tphd::tryLoadHdArchive(path)) {
return mount((*hdBuf)->data(), heap, mountDirection);
}
}
#endif
s32 entryNum = DVDConvertPathToEntrynum(path);
if (entryNum < 0)
return NULL;
@@ -10,10 +10,23 @@
#include <stdint.h>
#include "os_report.h"
#if DUSK_TPHD
#include "dusk/tphd/HdAssetLayer.hpp"
#endif
JKRMemArchive::JKRMemArchive(s32 entryNum, JKRArchive::EMountDirection mountDirection)
: JKRArchive(entryNum, MOUNT_MEM) {
mIsMounted = false;
mMountDirection = mountDirection;
#if DUSK_TPHD
// TPHD arc redirect by entrynum.
if (const auto* hd = dusk::tphd::getHdBytesForEntryNum(entryNum)) {
if (!open(const_cast<u8*>(hd->data()), static_cast<u32>(hd->size()),
JKRMEMBREAK_FLAG_UNKNOWN0)) {
return;
}
} else
#endif
if (!open(entryNum, mMountDirection)) {
return;
}
+12
View File
@@ -143,7 +143,11 @@ bool daBgObj_c::spec_data_c::Set(void* i_ptr) {
default:
// Invalid data block type
OS_REPORT_ERROR("データブロックタイプが不正です<%d>\n", block_type);
#if DUSK_TPHD
return 0; // HD-port: skip actor instead of asserting.
#else
JUT_ASSERT(527, FALSE);
#endif
}
if (block_type == 0) {
@@ -184,7 +188,11 @@ bool daBgObj_c::spec_data_c::Set(void* i_ptr) {
default:
// Invalid data block type
OS_REPORT_ERROR("データブロックタイプが不正です<%d>\n", block_type);
#if DUSK_TPHD
return 0;
#else
JUT_ASSERT(570, FALSE);
#endif
}
if (block_type == 0) {
@@ -225,8 +233,12 @@ bool daBgObj_c::spec_data_c::Set(void* i_ptr) {
default:
// "Data Block type invalid<%d>\n"
OSReport_Error("データブロックタイプが不正です<%d>\n", block_type);
#if DUSK_TPHD
return 0;
#else
JUT_ASSERT(619, FALSE);
break;
#endif
}
if (block_type == 0) {
+27
View File
@@ -23,6 +23,10 @@
#ifndef __MWERKS__
#include "dusk/extras.h"
#include "dusk/logging.h"
#if DUSK_TPHD
#include "dusk/tphd/HdAssetLayer.hpp"
#include <aurora/hd_texture.hpp>
#endif
#endif
dRes_info_c::dRes_info_c() {
@@ -643,7 +647,20 @@ int dRes_info_c::setRes() {
}
#if DEBUG
#if DUSK_TPHD
// HD-redirected buffers live outside the JKR heap. Use the
// registered arc-range size; getSize() would return 0/undefined.
void* mArcHdr = ((JKRMemArchive*)mArchive)->mArcHeader;
size_t arcSize = 0;
if (size_t hdRem = 0; aurora::gfx::hd_find_arc_range(mArcHdr, &hdRem)) {
arcSize = hdRem;
} else {
arcSize = JKRGetRootHeap()->getSize(mArcHdr);
}
mSize = arcSize + JKRGetMemBlockSize(NULL, mDataHeap);
#else
mSize = JKRGetRootHeap()->getSize(((JKRMemArchive*)mArchive)->mArcHeader) + JKRGetMemBlockSize(NULL, mDataHeap);
#endif
if (data_8074C6C0_debug) {
JKRExpHeap* zeldaHeap = mDoExt_getZeldaHeap();
OSReport("\e[33mdRes_info_c::setRes <使用=%08x(work:%08x) 連続空き=%08x 残り空き=%08x (%3d) %s.arc\n\e[m", mSize, r28, zeldaHeap->getFreeSize(), zeldaHeap->getTotalFreeSize(), getResNum(), this);
@@ -1020,6 +1037,16 @@ int dRes_control_c::setObjectRes(char const* i_arcName, void* i_archiveRes, u32
return 0;
}
#if DUSK_TPHD
// HD hook for second JKRMemArchive constructor (see below)
const std::string hdPath = std::format("/res/Object/{}.arc", i_arcName);
if (auto hd = dusk::tphd::tryLoadHdArchive(hdPath)) {
DuskLog.info("[TPHD] setObjectRes redirect: {} -> HD ({} bytes)",
i_arcName, (*hd)->size());
i_archiveRes = const_cast<u8*>((*hd)->data());
i_bufferSize = static_cast<u32>((*hd)->size());
}
#endif
JKRMemArchive* memArchive = JKR_NEW JKRMemArchive(i_archiveRes, i_bufferSize, JKRMEMBREAK_FLAG_UNKNOWN0);
if (memArchive == NULL || !memArchive->isMounted()) {
return 0;
+17
View File
@@ -88,4 +88,21 @@ void ShowFileSelect(FileCallback callback, void* userdata, SDL_Window* window,
default_location, allow_many);
#endif
}
void ShowFolderSelect(FileCallback callback, void* userdata, SDL_Window* window,
const char* default_location) {
if (callback == nullptr) {
return;
}
#if USE_IOS_DIALOG
// iOS doesn't expose a folder picker — report unsupported.
callback(userdata, nullptr, "Folder selection is not supported on this platform");
#else
auto state = std::make_unique<SDLDialogCallbackState>();
state->callback = callback;
state->userdata = userdata;
SDL_ShowOpenFolderDialog(&onSDLDialogFinished, state.release(), window, default_location, false);
#endif
}
} // namespace dusk
+3
View File
@@ -12,4 +12,7 @@ void ShowFileSelect(FileCallback callback, void* userdata, SDL_Window* window,
const SDL_DialogFileFilter* filters, int nfilters, const char* default_location,
bool allow_many);
void ShowFolderSelect(FileCallback callback, void* userdata, SDL_Window* window,
const char* default_location);
} // namespace dusk
+6
View File
@@ -122,6 +122,9 @@ UserSettings g_userSettings = {
.checkForUpdates {"backend.checkForUpdates", true},
.cardFileType {"backend.cardFileType", static_cast<int>(CARD_GCIFOLDER)},
.enableAdvancedSettings {"backend.enableAdvancedSettings", false},
#if DUSK_TPHD
.hdContentPath {"backend.hdContentPath", ""},
#endif
}
};
@@ -226,6 +229,9 @@ void registerSettings() {
Register(g_userSettings.backend.checkForUpdates);
Register(g_userSettings.backend.cardFileType);
Register(g_userSettings.backend.enableAdvancedSettings);
#if DUSK_TPHD
Register(g_userSettings.backend.hdContentPath);
#endif
}
// Transient settings
+435
View File
@@ -0,0 +1,435 @@
/*
* Ported from decaf-emu/addrlib (https://github.com/decaf-emu/addrlib),
* src/r600/r600addrlib.{cpp,h} and src/core/addrlib.cpp.
*
* Original AMD copyright header:
*
* Copyright (C) 2014 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*
* --------------------------------------------------------------------------
*
* This is a minimal extraction of decaf-emu's R600AddrLib hardcoded for
* the Wii-U R700-class GPU (mPipes=2, mBanks=4, group/swap/split sizes).
* The R600 class hierarchy is collapsed to free functions; only the surface
* address paths needed for GTX texture deswizzling are kept.
*/
#include "AddrLib.hpp"
#include <algorithm>
#include <cstring>
namespace dusk::tphd::addrlib {
// ---- Wii-U R700 hardware constants ----------------------------------------
// Match decaf's R600AddrLib state after DecodeGbRegs for the Wii-U register
// configuration: pipes=2, banks=4, group=256B, row=2KB, swap=256B, split=2KB.
static constexpr u32 kPipes = 2;
static constexpr u32 kBanks = 4;
static constexpr u32 kPipeInterleaveBytes = 256;
static constexpr u32 kRowSize = 2048;
static constexpr u32 kSwapSize = 256;
static constexpr u32 kSplitSize = 2048;
// Wii-U does not enable the optimal bank-swap heuristic.
static constexpr bool kOptimalBankSwap = false;
// ---- Decaf addrcommon.h constants -----------------------------------------
static constexpr u32 kMicroTileWidth = 8;
static constexpr u32 kMicroTileHeight = 8;
static constexpr u32 kMicroTilePixels = kMicroTileWidth * kMicroTileHeight;
static constexpr u32 kThickTileThickness = 4;
static constexpr u32 BITS_TO_BYTES(u32 v) { return (v + 7) / 8; }
static constexpr u32 _BIT(u32 v, u32 b) { return (v >> b) & 1; }
static u32 Log2(u32 v) {
u32 r = 0;
while (v > 1) { v >>= 1; ++r; }
return r;
}
// ---- Tile-mode classification ---------------------------------------------
static u32 ComputeSurfaceThickness(TileMode tm) {
switch (tm) {
case TileMode::Tiled1DThick:
case TileMode::Tiled2DThick:
case TileMode::Tiled2BThick:
case TileMode::Tiled3DThick:
case TileMode::Tiled3BThick:
return 4u;
default:
return 1u;
}
}
static bool IsThickMacroTiled(TileMode tm) {
switch (tm) {
case TileMode::Tiled2DThick:
case TileMode::Tiled2BThick:
case TileMode::Tiled3DThick:
case TileMode::Tiled3BThick:
return true;
default:
return false;
}
}
static bool IsBankSwappedTileMode(TileMode tm) {
switch (tm) {
case TileMode::Tiled2BThin1:
case TileMode::Tiled2BThin2:
case TileMode::Tiled2BThin4:
case TileMode::Tiled2BThick:
case TileMode::Tiled3BThin1:
case TileMode::Tiled3BThick:
return true;
default:
return false;
}
}
// AddrTileType: 0=Displayable, 1=NonDisplayable, 2=DepthSampleOrder, 3=Thick.
// Wii-U GTX color textures empirically use the Displayable (bpp-switched)
// microtile layout, depth surfaces use the simple x/y-interleave pattern
// that AMD calls NonDisplayable. (Same convention as Cemu's port.)
static u32 GetTileType(bool isDepth) {
return isDepth ? 1u /* NonDisplayable */ : 0u /* Displayable */;
}
static u32 ComputeSurfaceRotationFromTileMode(TileMode tm) {
switch (tm) {
case TileMode::Tiled2DThin1:
case TileMode::Tiled2DThin2:
case TileMode::Tiled2DThin4:
case TileMode::Tiled2DThick:
case TileMode::Tiled2BThin1:
case TileMode::Tiled2BThin2:
case TileMode::Tiled2BThin4:
case TileMode::Tiled2BThick:
return kPipes * ((kBanks >> 1) - 1);
case TileMode::Tiled3DThin1:
case TileMode::Tiled3DThick:
case TileMode::Tiled3BThin1:
case TileMode::Tiled3BThick:
return (kPipes >= 4) ? ((kPipes >> 1) - 1) : 1;
default:
return 0;
}
}
static u32 ComputeMacroTileAspectRatio(TileMode tm) {
switch (tm) {
case TileMode::Tiled2BThin1:
case TileMode::Tiled3DThin1:
case TileMode::Tiled3BThin1:
return 1;
case TileMode::Tiled2DThin2:
case TileMode::Tiled2BThin2:
return 2;
case TileMode::Tiled2DThin4:
case TileMode::Tiled2BThin4:
return 4;
default:
return 1;
}
}
// ---- Pixel-index-within-microtile -----------------------------------------
static u32 ComputePixelIndexWithinMicroTile(u32 x, u32 y, u32 z, u32 bpp,
TileMode tm, u32 tileType) {
u32 b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0, b6 = 0, b7 = 0, b8 = 0;
const u32 x0 = _BIT(x, 0), x1 = _BIT(x, 1), x2 = _BIT(x, 2);
const u32 y0 = _BIT(y, 0), y1 = _BIT(y, 1), y2 = _BIT(y, 2);
const u32 z0 = _BIT(z, 0), z1 = _BIT(z, 1), z2 = _BIT(z, 2);
const u32 thickness = ComputeSurfaceThickness(tm);
if (tileType == 3 /* Thick */) {
b0 = x0; b1 = y0; b2 = z0; b3 = x1;
b4 = y1; b5 = z1; b6 = x2; b7 = y2;
} else if (tileType == 1 /* NonDisplayable */) {
b0 = x0; b1 = y0; b2 = x1;
b3 = y1; b4 = x2; b5 = y2;
} else {
switch (bpp) {
case 8:
b0 = x0; b1 = x1; b2 = x2; b3 = y1; b4 = y0; b5 = y2; break;
case 16:
b0 = x0; b1 = x1; b2 = x2; b3 = y0; b4 = y1; b5 = y2; break;
case 64:
b0 = x0; b1 = y0; b2 = x1; b3 = x2; b4 = y1; b5 = y2; break;
case 128:
b0 = y0; b1 = x0; b2 = x1; b3 = x2; b4 = y1; b5 = y2; break;
case 32:
case 96:
default:
b0 = x0; b1 = x1; b2 = y0; b3 = x2; b4 = y1; b5 = y2; break;
}
}
if (tileType != 3 && thickness > 1) {
b6 = z0; b7 = z1;
}
if (thickness == 8) {
b8 = z2;
}
return (b0) | (b1 << 1) | (b2 << 2) | (b3 << 3) | (b4 << 4) |
(b5 << 5) | (b6 << 6) | (b7 << 7) | (b8 << 8);
}
// ---- Pipe / Bank from coord (no rotation) ---------------------------------
// Hardcoded for Wii-U: pipes=2, banks=4.
static u32 ComputePipeFromCoordWoRotation(u32 x, u32 y) {
return (_BIT(y, 3) ^ _BIT(x, 3)) & 1;
}
static u32 ComputeBankFromCoordWoRotation(u32 x, u32 y) {
const u32 ty = y / kPipes;
const u32 ty4 = _BIT(ty, 4);
const u32 ty3 = _BIT(ty, 3);
const u32 x3 = _BIT(x, 3);
const u32 x4 = _BIT(x, 4);
u32 b0 = (ty4 ^ x3);
if (kOptimalBankSwap && kPipes == 8) {
b0 ^= _BIT(x, 5);
}
const u32 b1 = (ty3 ^ x4);
return b0 | (b1 << 1);
}
// ---- Bank-swapped width ---------------------------------------------------
static u32 ComputeSurfaceBankSwappedWidth(TileMode tm, u32 bpp, u32 numSamples,
u32 pitch) {
if (!IsBankSwappedTileMode(tm)) return 0;
u32 slicesPerTile = 1;
const u32 bytesPerSample = 8 * bpp;
const u32 samplesPerTile = bytesPerSample ? (kSplitSize / bytesPerSample) : 0;
if (samplesPerTile != 0) {
slicesPerTile = std::max<u32>(1u, numSamples / samplesPerTile);
}
if (IsThickMacroTiled(tm)) {
numSamples = 4;
}
const u32 bytesPerTileSlice = numSamples * bytesPerSample / slicesPerTile;
const u32 factor = ComputeMacroTileAspectRatio(tm);
const u32 swapTiles = std::max<u32>(1u, (kSwapSize >> 1) / bpp);
const u32 swapWidth = swapTiles * 8 * kBanks;
const u32 heightBytes = numSamples * factor * kPipes * bpp / slicesPerTile;
const u32 swapMax = kPipes * kBanks * kRowSize / heightBytes;
const u32 swapMin = kPipeInterleaveBytes * 8 * kBanks / bytesPerTileSlice;
u32 bankSwapWidth = std::min(swapMax, std::max(swapMin, swapWidth));
while (bankSwapWidth >= 2 * pitch) {
bankSwapWidth >>= 1;
}
return bankSwapWidth;
}
// ---- Surface-address from coord -------------------------------------------
static u64 ComputeSurfaceAddrFromCoordMicroTiled(u32 x, u32 y, u32 slice,
u32 bpp, u32 pitch, u32 height,
TileMode tm, bool isDepth) {
const u64 microTileThickness = (tm == TileMode::Tiled1DThick) ? 4u : 1u;
const u64 microTileBytes =
BITS_TO_BYTES(static_cast<u32>(kMicroTilePixels * microTileThickness * bpp));
const u64 microTilesPerRow = pitch / kMicroTileWidth;
const u64 microTileIndexX = x / kMicroTileWidth;
const u64 microTileIndexY = y / kMicroTileHeight;
const u64 microTileIndexZ = slice / microTileThickness;
const u64 microTileOffset = microTileBytes *
(microTileIndexX + microTileIndexY * microTilesPerRow);
const u64 sliceBytes =
BITS_TO_BYTES(static_cast<u32>(pitch * height * microTileThickness * bpp));
const u64 sliceOffset = microTileIndexZ * sliceBytes;
const u64 pixelIndex =
ComputePixelIndexWithinMicroTile(x, y, slice, bpp, tm, GetTileType(isDepth));
const u64 pixelOffset = (bpp * pixelIndex) / 8;
return pixelOffset + microTileOffset + sliceOffset;
}
static u64 ComputeSurfaceAddrFromCoordMacroTiled(u32 x, u32 y, u32 slice, u32 sample,
u32 bpp, u32 pitch, u32 height,
u32 numSamples, TileMode tm,
bool isDepth, u32 pipeSwizzle,
u32 bankSwizzle) {
const u64 numPipes = kPipes;
const u64 numBanks = kBanks;
const u64 numGroupBits = Log2(kPipeInterleaveBytes);
const u64 numPipeBits = Log2(kPipes);
const u64 numBankBits = Log2(kBanks);
const u64 microTileThickness = ComputeSurfaceThickness(tm);
const u64 microTileBits = kMicroTilePixels * microTileThickness * bpp * numSamples;
const u64 microTileBytes = microTileBits / 8;
const u64 pixelIndex =
ComputePixelIndexWithinMicroTile(x, y, slice, bpp, tm, GetTileType(isDepth));
u64 sampleOffset, pixelOffset;
if (isDepth) {
sampleOffset = bpp * sample;
pixelOffset = numSamples * bpp * pixelIndex;
} else {
sampleOffset = sample * (microTileBits / numSamples);
pixelOffset = bpp * pixelIndex;
}
u64 elemOffset = pixelOffset + sampleOffset;
const u64 bytesPerSample = microTileBytes / numSamples;
u64 sampleSlice = 0;
u64 numSampleSplits = 1;
if (numSamples > 1 && microTileBytes > kSplitSize) {
const u64 samplesPerSlice = kSplitSize / bytesPerSample;
numSampleSplits = numSamples / samplesPerSlice;
numSamples = static_cast<u32>(samplesPerSlice);
const u64 tileSliceBits = microTileBits / numSampleSplits;
sampleSlice = elemOffset / tileSliceBits;
elemOffset %= tileSliceBits;
}
elemOffset /= 8;
u64 pipe = ComputePipeFromCoordWoRotation(x, y);
u64 bank = ComputeBankFromCoordWoRotation(x, y);
u64 bankPipe = pipe + numPipes * bank;
const u64 rotation = ComputeSurfaceRotationFromTileMode(tm);
const u64 swizzle = pipeSwizzle + numPipes * bankSwizzle;
u64 sliceIn = slice;
if (IsThickMacroTiled(tm)) {
sliceIn /= kThickTileThickness;
}
bankPipe ^= numPipes * sampleSlice * ((numBanks >> 1) + 1) ^ (swizzle + sliceIn * rotation);
bankPipe %= numPipes * numBanks;
pipe = bankPipe % numPipes;
bank = bankPipe / numPipes;
const u64 sliceBytes =
BITS_TO_BYTES(static_cast<u32>(pitch * height * microTileThickness * bpp * numSamples));
const u64 sliceOffset = sliceBytes *
((sampleSlice + numSampleSplits * slice) / microTileThickness);
u64 macroTilePitch = 8 * numBanks;
u64 macroTileHeight = 8 * numPipes;
switch (tm) {
case TileMode::Tiled2DThin2:
case TileMode::Tiled2BThin2:
macroTilePitch /= 2;
macroTileHeight *= 2;
break;
case TileMode::Tiled2DThin4:
case TileMode::Tiled2BThin4:
macroTilePitch /= 4;
macroTileHeight *= 4;
break;
default:
break;
}
const u64 macroTilesPerRow = pitch / macroTilePitch;
const u64 macroTileBytes =
BITS_TO_BYTES(static_cast<u32>(numSamples * microTileThickness * bpp *
macroTileHeight * macroTilePitch));
const u64 macroTileIndexX = x / macroTilePitch;
const u64 macroTileIndexY = y / macroTileHeight;
const u64 macroTileOffset = macroTileBytes *
(macroTileIndexX + macroTilesPerRow * macroTileIndexY);
if (IsBankSwappedTileMode(tm)) {
static constexpr u32 bankSwapOrder[] = { 0, 1, 3, 2, 6, 7, 5, 4, 0, 0 };
const u32 bankSwapWidth =
ComputeSurfaceBankSwappedWidth(tm, bpp, numSamples, pitch);
const u64 swapIndex = (bankSwapWidth != 0)
? (macroTilePitch * macroTileIndexX / bankSwapWidth) : 0;
bank ^= bankSwapOrder[swapIndex & (kBanks - 1)];
}
const u64 groupMask = (1u << numGroupBits) - 1;
const u64 totalOffset = elemOffset +
((macroTileOffset + sliceOffset) >> (numBankBits + numPipeBits));
const u64 offsetHigh = (totalOffset & ~groupMask) << (numBankBits + numPipeBits);
const u64 offsetLow = totalOffset & groupMask;
const u64 bankBits = bank << (numPipeBits + numGroupBits);
const u64 pipeBits = pipe << numGroupBits;
return bankBits | pipeBits | offsetLow | offsetHigh;
}
// ---- High-level deswizzle -------------------------------------------------
std::vector<u8> deswizzle(const SurfaceDesc& desc, std::span<const u8> tiledBytes) {
// For BCN formats addrlib operates on block coordinates; bpp is bits per
// 4x4 block (e.g. 64 for BC1). Reduce width/height to block extents.
const u32 blockWidth = desc.isBcn ? (desc.width + 3) / 4 : desc.width;
const u32 blockHeight = desc.isBcn ? (desc.height + 3) / 4 : desc.height;
const u32 bytesPerElement = desc.bpp / 8;
const u32 linearStride = blockWidth * bytesPerElement;
std::vector<u8> linear(static_cast<size_t>(linearStride) * blockHeight, 0);
const u32 pipeSwizzle = (desc.swizzle >> 8) & 1;
const u32 bankSwizzle = (desc.swizzle >> 9) & 3;
// Linear tile modes: trivial copy honoring pitch.
if (desc.tileMode == TileMode::LinearGeneral ||
desc.tileMode == TileMode::LinearAligned) {
for (u32 y = 0; y < blockHeight; ++y) {
const u32 srcOff = y * desc.pitch * bytesPerElement;
if (srcOff + linearStride > tiledBytes.size()) break;
std::memcpy(linear.data() + y * linearStride,
tiledBytes.data() + srcOff, linearStride);
}
return linear;
}
const bool microTiled =
(desc.tileMode == TileMode::Tiled1DThin1 ||
desc.tileMode == TileMode::Tiled1DThick);
for (u32 y = 0; y < blockHeight; ++y) {
for (u32 x = 0; x < blockWidth; ++x) {
u64 srcOff;
if (microTiled) {
srcOff = ComputeSurfaceAddrFromCoordMicroTiled(
x, y, /*slice*/ 0, desc.bpp, desc.pitch, blockHeight,
desc.tileMode, desc.isDepth);
} else {
srcOff = ComputeSurfaceAddrFromCoordMacroTiled(
x, y, /*slice*/ 0, /*sample*/ 0, desc.bpp, desc.pitch,
blockHeight, /*numSamples*/ 1, desc.tileMode, desc.isDepth,
pipeSwizzle, bankSwizzle);
}
if (srcOff + bytesPerElement > tiledBytes.size()) continue;
const u32 dstOff = (y * blockWidth + x) * bytesPerElement;
std::memcpy(linear.data() + dstOff,
tiledBytes.data() + srcOff, bytesPerElement);
}
}
return linear;
}
} // namespace dusk::tphd::addrlib
+57
View File
@@ -0,0 +1,57 @@
// Ported from decaf-emu/addrlib (https://github.com/decaf-emu/addrlib),
// which is itself derived from AMD's address library.
// Copyright (c) 2014 Advanced Micro Devices, Inc. All Rights Reserved.
// Licensed under the AMD MIT-style license; see the AMD copyright header in
// AddrLib.cpp.
//
// Minimal R600/R700 surface-address port sufficient for deswizzling Wii-U
// GTX textures at load time. Hardcoded for Wii-U HW configuration:
// pipes = 2, banks = 4, pipe interleave = 256B,
// row size = 2KB, sample split = 2KB, swap size = 256B.
#ifndef DUSK_TPHD_ADDRLIB_HPP
#define DUSK_TPHD_ADDRLIB_HPP
#include <span>
#include <vector>
#include <dolphin/types.h>
namespace dusk::tphd::addrlib {
enum class TileMode : u32 {
LinearGeneral = 0,
LinearAligned = 1,
Tiled1DThin1 = 2,
Tiled1DThick = 3,
Tiled2DThin1 = 4,
Tiled2DThin2 = 5,
Tiled2DThin4 = 6,
Tiled2DThick = 7,
Tiled2BThin1 = 8,
Tiled2BThin2 = 9,
Tiled2BThin4 = 10,
Tiled2BThick = 11,
Tiled3DThin1 = 12,
Tiled3DThick = 13,
Tiled3BThin1 = 14,
Tiled3BThick = 15,
};
struct SurfaceDesc {
u32 width; // pixels (or BCN blocks)
u32 height; // pixels (or BCN blocks)
u32 pitch; // pixels (or BCN blocks)
u32 bpp; // bits per pixel (or per 4x4 BCN block, e.g. 64 for BC1)
TileMode tileMode;
u32 swizzle; // GTX swizzle field; pipe = (>>8)&1, bank = (>>9)&3
bool isBcn;
bool isDepth;
};
// Deswizzle a single surface mip level into a row-major linear buffer.
std::vector<u8> deswizzle(const SurfaceDesc& desc, std::span<const u8> tiledBytes);
} // namespace dusk::tphd::addrlib
#endif
+89
View File
@@ -0,0 +1,89 @@
#include "GtxParser.hpp"
#include <cstring>
#include "dusk/endian.h"
namespace dusk::tphd {
namespace {
constexpr u32 kBlockTypeEOF = 0x01;
constexpr u32 kBlockTypeSurface = 0x0B;
constexpr u32 kBlockTypeImage = 0x0C;
constexpr u32 kBlockTypeMipChain = 0x0D;
}
std::vector<GtxSurface> parseGtx(std::span<const u8> gtx) {
std::vector<GtxSurface> out;
if (gtx.size() < sizeof(Gfx2Header) ||
std::memcmp(gtx.data(), "Gfx2", 4) != 0) {
return out;
}
const auto* fileHdr = reinterpret_cast<const Gfx2Header*>(gtx.data());
const u32 headerSize = fileHdr->headerSize;
if (headerSize > gtx.size()) {
return out;
}
GtxSurface* current = nullptr;
size_t off = headerSize;
while (off + sizeof(Gfx2BlockHeader) <= gtx.size()) {
const auto* blk = reinterpret_cast<const Gfx2BlockHeader*>(gtx.data() + off);
if (std::memcmp(blk->magic, "BLK{", 4) != 0) {
break;
}
const u32 blockHdrSize = blk->headerSize;
const u32 blockType = blk->blockType;
const u32 blockDataSz = blk->blockDataSize;
if (blockHdrSize < sizeof(Gfx2BlockHeader) ||
off + blockHdrSize + blockDataSz > gtx.size()) {
break;
}
const u8* body = gtx.data() + off + blockHdrSize;
switch (blockType) {
case kBlockTypeSurface: {
if (blockDataSz < sizeof(Gx2SurfaceBody)) break;
const auto* sb = reinterpret_cast<const Gx2SurfaceBody*>(body);
GtxSurface s{};
s.format = sb->format;
s.width = sb->width;
s.height = sb->height;
s.depth = sb->depth;
s.mipCount = sb->mipCount;
s.aa = sb->aa;
s.use = sb->use;
s.imgSize = sb->imgSize;
s.mipSize = sb->mipSize;
s.tileMode = sb->tileMode;
s.swizzle = sb->swizzle;
s.pitch = sb->pitch;
for (u32 i = 0; i < 13; ++i) {
s.mipOffsets[i] = sb->mipOffsets[i];
}
out.push_back(s);
current = &out.back();
break;
}
case kBlockTypeImage:
if (current) current->baseData = gtx.subspan(off + blockHdrSize, blockDataSz);
break;
case kBlockTypeMipChain:
if (current) current->mipData = gtx.subspan(off + blockHdrSize, blockDataSz);
break;
case kBlockTypeEOF:
return out;
default:
break;
}
off += blockHdrSize + blockDataSz;
}
return out;
}
}
+79
View File
@@ -0,0 +1,79 @@
#ifndef DUSK_TPHD_GTX_PARSER_HPP
#define DUSK_TPHD_GTX_PARSER_HPP
#include <array>
#include <optional>
#include <span>
#include <vector>
#include <dolphin/types.h>
#include "dusk/endian.h"
namespace dusk::tphd {
// On-disk GX2 file header. Followed by a stream of BLK{ blocks.
struct Gfx2Header {
/* 0x00 */ char magic[4]; // "Gfx2"
/* 0x04 */ BE(u32) headerSize;
};
// Common 0x20-byte header on every BLK{ block.
struct Gfx2BlockHeader {
/* 0x00 */ char magic[4]; // "BLK{"
/* 0x04 */ BE(u32) headerSize;
/* 0x08 */ BE(u32) versionMajor;
/* 0x0C */ BE(u32) versionMinor;
/* 0x10 */ BE(u32) blockType;
/* 0x14 */ BE(u32) blockDataSize;
/* 0x18 */ BE(u32) ident;
/* 0x1C */ BE(u32) flags;
};
static_assert(sizeof(Gfx2BlockHeader) == 0x20);
// On-disk surface-info block body (the GX2 surface descriptor layout).
struct Gx2SurfaceBody {
/* 0x00 */ BE(u32) dim;
/* 0x04 */ BE(u32) width;
/* 0x08 */ BE(u32) height;
/* 0x0C */ BE(u32) depth;
/* 0x10 */ BE(u32) mipCount;
/* 0x14 */ BE(u32) format;
/* 0x18 */ BE(u32) aa;
/* 0x1C */ BE(u32) use;
/* 0x20 */ BE(u32) imgSize;
/* 0x24 */ BE(u32) imgPtr;
/* 0x28 */ BE(u32) mipSize;
/* 0x2C */ BE(u32) mipPtr;
/* 0x30 */ BE(u32) tileMode;
/* 0x34 */ BE(u32) swizzle;
/* 0x38 */ BE(u32) alignment;
/* 0x3C */ BE(u32) pitch;
/* 0x40 */ BE(u32) mipOffsets[13];
};
static_assert(sizeof(Gx2SurfaceBody) == 0x74);
struct GtxSurface {
u32 width;
u32 height;
u32 depth;
u32 mipCount;
u32 format; // GX2 surface format code (0x31 BC1, 0x1A RGBA8, ...)
u32 aa;
u32 use;
u32 tileMode;
u32 swizzle;
u32 pitch;
u32 imgSize; // base level size (bytes)
u32 mipSize; // mip chain size (bytes, levels 1..N-1)
std::array<u32, 13> mipOffsets;
std::span<const u8> baseData; // into the owning GTX buffer
std::span<const u8> mipData; // into the owning GTX buffer
};
std::vector<GtxSurface> parseGtx(std::span<const u8> gtxBytes);
}
#endif
+617
View File
@@ -0,0 +1,617 @@
#include "HdAssetLayer.hpp"
#include <algorithm>
#include <cstdio>
#include <cstring>
#include <list>
#include <mutex>
#include <optional>
#include <span>
#include <unordered_map>
#include <vector>
#include <aurora/hd_texture.hpp>
#include <dolphin/dvd.h>
#include "JSystem/J3DGraphLoader/J3DModelLoader.h"
#include "JSystem/JKernel/JKRArchive.h"
#include "JSystem/JKernel/JKRDecomp.h"
#include "JSystem/JUtility/JUTTexture.h"
#include "dusk/endian.h"
#include "dusk/logging.h"
#include "AddrLib.hpp"
#include "GtxParser.hpp"
#include "TphdPack.hpp"
static aurora::Module HdLog("dusk::tphd::hd");
namespace dusk::tphd {
namespace {
std::filesystem::path g_contentPath;
std::mutex g_cacheMutex;
// Heap-allocated, never freed — these must outlive g_dComIfG_gameInfo's
// static destructor which holds JKRArchives referencing these bytes.
std::list<std::vector<u8>>& g_mountBuffers() {
static auto* p = new std::list<std::vector<u8>>{};
return *p;
}
std::unordered_map<s32, const std::vector<u8>*>& g_entryNumToBytes() {
static auto* p = new std::unordered_map<s32, const std::vector<u8>*>{};
return *p;
}
bool endsWithSuffix(std::string_view s, std::string_view suffix) {
return s.size() >= suffix.size() &&
s.compare(s.size() - suffix.size(), suffix.size(), suffix) == 0;
}
// On-disk Yaz0 file header.
struct Yaz0Header {
/* 0x00 */ char magic[4]; // "Yaz0"
/* 0x04 */ BE(u32) decompressedSize;
/* 0x08 */ u8 pad[8];
};
static_assert(sizeof(Yaz0Header) == 0x10);
// If `bytes` is a Yaz0 stream, return the inflated payload; otherwise nullopt.
std::optional<std::vector<u8>> tryDecodeYaz0(std::span<const u8> bytes) {
if (bytes.size() < sizeof(Yaz0Header) ||
std::memcmp(bytes.data(), "Yaz0", 4) != 0) {
return std::nullopt;
}
const auto* hdr = reinterpret_cast<const Yaz0Header*>(bytes.data());
const u32 expandedSize = hdr->decompressedSize;
std::vector<u8> decoded(expandedSize);
JKRDecomp::decodeSZS(const_cast<u8*>(bytes.data()), decoded.data(),
expandedSize, 0);
return decoded;
}
std::optional<std::vector<u8>> readWholeFile(const std::filesystem::path& path) {
std::FILE* f = std::fopen(path.string().c_str(), "rb");
if (!f) return std::nullopt;
std::fseek(f, 0, SEEK_END);
long len = std::ftell(f);
std::fseek(f, 0, SEEK_SET);
if (len < 0) { std::fclose(f); return std::nullopt; }
std::vector<u8> buf(static_cast<size_t>(len));
size_t got = std::fread(buf.data(), 1, buf.size(), f);
std::fclose(f);
if (got != buf.size()) return std::nullopt;
return buf;
}
// Extract the path portion under "res/" from JSystem's absolute path.
// Example: "/arcName/res/Stage/D_SB10/R00_00.arc" -> "res/Stage/D_SB10/R00_00.arc"
std::string_view extractResPath(std::string_view gcPath) {
auto p = gcPath.find("res/");
if (p == std::string_view::npos) return {};
return gcPath.substr(p);
}
// Case-insensitive ASCII suffix match — RARC archives lowercase filenames
// at build time, but our HD pack.gz preserves the original Wii-U authoring
// camelCase. Example: RARC has "coverbg.bti", pack has "coverBG.bti.gtx".
bool endsWithSuffixCI(std::string_view s, std::string_view suffix) {
if (s.size() < suffix.size()) return false;
auto toLower = [](unsigned char c) -> unsigned char {
return (c >= 'A' && c <= 'Z') ? c + ('a' - 'A') : c;
};
const char* a = s.data() + (s.size() - suffix.size());
for (size_t i = 0; i < suffix.size(); ++i) {
if (toLower(a[i]) != toLower(suffix[i])) return false;
}
return true;
}
// Match an arc-relative path (e.g. "bmdr/model.bmd") against the Gfx2 entries
// in the HD pack, which look like "tex/.../<arc-rel>.gtx".
const TmpkEntry* findGtxBySuffix(const TphdPack& pack, std::string_view arcRelPath) {
const std::string tail = "/" + std::string(arcRelPath) + ".gtx";
for (const auto& e : pack.entries()) {
if (e.data.size() < 4 || std::memcmp(e.data.data(), "Gfx2", 4) != 0) continue;
if (endsWithSuffixCI(e.name, tail)) return &e;
}
return nullptr;
}
// Post-deswizzle CPU expansions to RGBA8. Used for formats whose HD layout
// can't be directly sampled with a GPU view swizzle (IA4 nibble unpack,
// RGB565 16-bit), and as a fallback if R8_PC/RG8_PC view swizzle isn't
// available. GC sampling semantics: I8 -> (I,I,I,I); IA4/IA8 -> (I,I,I,A).
std::vector<u8> expandR5G6B5toRgba8(std::span<const u8> in, u32 width, u32 height) {
std::vector<u8> out(static_cast<size_t>(width) * height * 4);
const size_t pixelCount = static_cast<size_t>(width) * height;
for (size_t i = 0; i < pixelCount && (i * 2 + 1) < in.size(); ++i) {
// GX2 stores RGB565 pixel data in GPU-native LE
u16 px;
std::memcpy(&px, &in[i * 2], sizeof(px));
u8 r5 = static_cast<u8>((px >> 11) & 0x1F);
u8 g6 = static_cast<u8>((px >> 5) & 0x3F);
u8 b5 = static_cast<u8>(px & 0x1F);
out[i * 4 + 0] = static_cast<u8>((r5 << 3) | (r5 >> 2));
out[i * 4 + 1] = static_cast<u8>((g6 << 2) | (g6 >> 4));
out[i * 4 + 2] = static_cast<u8>((b5 << 3) | (b5 >> 2));
out[i * 4 + 3] = 0xFF;
}
return out;
}
// IA4: high nibble = A, low nibble = I (matches aurora's GC IA4 decoder).
std::vector<u8> expandIA4toRgba8(std::span<const u8> in, u32 width, u32 height) {
std::vector<u8> out(static_cast<size_t>(width) * height * 4);
const size_t pixelCount = static_cast<size_t>(width) * height;
for (size_t i = 0; i < pixelCount && i < in.size(); ++i) {
u8 b = in[i];
u8 A = static_cast<u8>((b & 0xF0) | (b >> 4));
u8 I = static_cast<u8>(((b & 0x0F) << 4) | (b & 0x0F));
out[i * 4 + 0] = I; out[i * 4 + 1] = I; out[i * 4 + 2] = I; out[i * 4 + 3] = A;
}
return out;
}
enum class Expansion {
None,
R5G6B5_to_RGBA8,
IA4_to_RGBA8,
};
struct Gx2FormatMapping {
u32 gx2Format; // GX2 surface format
u8 newGxFormat; // Aurora PC-target format
u32 bpp; // Deswizzle bits-per-pixel (per pixel, or per 4x4 block for BCn)
bool isBcn;
Expansion expansion; // Optional post-deswizzle CPU expansion
};
// I8/IA8 pass through as R8_PC/RG8_PC (aurora applies .rrrr/.rrrg view
// swizzle on the GPU side — half / quarter the VRAM of CPU-expanded RGBA8).
// IA4 + RGB565 need CPU expansion (nibble / 16-bit unpack). CMPR stays
// BC1_PC (compressed on the GPU).
constexpr Gx2FormatMapping kFormatMap[] = {
// gx2 fmt PC target bpp isBcn expansion
{ 0x01 /* I8 */, 0x41 /* R8_PC */, 8, false, Expansion::None },
{ 0x02 /* IA4 */, 0x46 /* RGBA8_PC */, 8, false, Expansion::IA4_to_RGBA8 },
{ 0x07 /* IA8 */, 0x43 /* RG8_PC */, 16, false, Expansion::None },
{ 0x08 /* RGB565 */, 0x46 /* RGBA8_PC */, 16, false, Expansion::R5G6B5_to_RGBA8 },
{ 0x1A /* RGBA8 */, 0x46 /* RGBA8_PC */, 32, false, Expansion::None },
{ 0x31 /* CMPR */, 0x4E /* BC1_PC */, 64, true, Expansion::None },
};
const Gx2FormatMapping* findFormatMapping(u32 gx2Format) {
for (const auto& m : kFormatMap) {
if (m.gx2Format == gx2Format) return &m;
}
return nullptr;
}
std::vector<u8> applyExpansion(Expansion exp, std::vector<u8> linear, u32 w, u32 h) {
switch (exp) {
case Expansion::R5G6B5_to_RGBA8: return expandR5G6B5toRgba8(linear, w, h);
case Expansion::IA4_to_RGBA8: return expandIA4toRgba8(linear, w, h);
case Expansion::None: break;
}
return linear;
}
// Per-mip tile-mode + pitch. Demote rule mirrored from decaf-emu's
// R600AddrLib::ComputeSurfaceMipLevelTileMode (MIT, AMD-derived) — see
// AddrLib.cpp header for the full copyright notice.
//
// R700 macro-tile size: 32 × 16 elements (BCN element = 4×4 block).
// Mips below that are demoted to Tiled1DThin1 (microtile-only, 8-element
// align).
struct MipLevelDesc {
u32 width;
u32 height;
u32 pitch;
addrlib::TileMode tileMode;
};
MipLevelDesc mipLevelDesc(const GtxSurface& s, u32 level, bool isBcn, u32 bpp) {
MipLevelDesc d{};
d.width = std::max(1u, s.width >> level);
d.height = std::max(1u, s.height >> level);
d.tileMode = static_cast<addrlib::TileMode>(s.tileMode);
if (level == 0) {
d.pitch = s.pitch;
return d;
}
if (d.tileMode == addrlib::TileMode::Tiled2DThin1 ||
d.tileMode == addrlib::TileMode::Tiled2BThin1) {
// Mirror decaf's widthAlignFactor: when one microtile is smaller than
// the pipe interleave (256 B), the demote threshold scales up.
const u32 microTileBytes = (bpp * 64) / 8;
const u32 widthAlignFactor = (microTileBytes <= 256) ? 256 / microTileBytes : 1;
const u32 demoteWidth = widthAlignFactor * 32;
const u32 wElem = isBcn ? (d.width + 3) / 4 : d.width;
const u32 hElem = isBcn ? (d.height + 3) / 4 : d.height;
if (wElem < demoteWidth || hElem < 16) {
d.tileMode = addrlib::TileMode::Tiled1DThin1;
}
}
const bool is1D = (d.tileMode == addrlib::TileMode::Tiled1DThin1 ||
d.tileMode == addrlib::TileMode::Tiled1DThick);
const u32 alignment = is1D ? 8u : 32u;
u32 levelPitch = std::max(1u, s.pitch >> level);
levelPitch = ((levelPitch + alignment - 1) / alignment) * alignment;
d.pitch = levelPitch;
return d;
}
// Slice the bytes for a single mip level. Wii-U quirk: mipOffsets[0] is
// often image_size, not a mipData offset. Level 1
// always starts at 0 in mipData; level >= 2 uses mipOffsets[level - 1].
std::span<const u8> mipLevelData(const GtxSurface& s, u32 level) {
if (level == 0) return s.baseData;
if (level >= s.mipCount) return {};
u32 start = 0;
if (level >= 2 && level - 1 < s.mipOffsets.size()) {
start = s.mipOffsets[level - 1];
}
if (start >= s.mipData.size()) return {};
u32 end = static_cast<u32>(s.mipData.size());
if (level + 1 < s.mipCount && level < s.mipOffsets.size()) {
const u32 next = s.mipOffsets[level];
if (next > start && next <= s.mipData.size()) end = next;
}
return s.mipData.subspan(start, end - start);
}
struct DeswizzleResult {
std::vector<u8> bytes;
u32 mipCount;
};
DeswizzleResult deswizzleAllMips(const Gx2FormatMapping& m, const GtxSurface& s) {
DeswizzleResult out{};
const u32 maxLevels = std::min(s.mipCount, 13u);
for (u32 level = 0; level < maxLevels; ++level) {
const std::span<const u8> slice = mipLevelData(s, level);
if (slice.empty()) break;
const MipLevelDesc lvl = mipLevelDesc(s, level, m.isBcn, m.bpp);
const addrlib::SurfaceDesc desc{
.width = lvl.width,
.height = lvl.height,
.pitch = lvl.pitch,
.bpp = m.bpp,
.tileMode = lvl.tileMode,
.swizzle = s.swizzle,
.isBcn = m.isBcn,
.isDepth = false,
};
auto linear = applyExpansion(m.expansion,
addrlib::deswizzle(desc, slice),
lvl.width, lvl.height);
out.bytes.insert(out.bytes.end(), linear.begin(), linear.end());
out.mipCount = level + 1;
}
return out;
}
void registerHdSurface(const Gx2FormatMapping& m, const GtxSurface& s,
const void* pixelPtr, std::string_view gtxName,
u32 surfaceIdx) {
auto decoded = deswizzleAllMips(m, s);
HdLog.info("HD reg: ptr={} fmt=0x{:02X} {}x{} mips={}/{} bytes={} gtx={}[{}]",
pixelPtr, m.newGxFormat, s.width, s.height,
decoded.mipCount, s.mipCount, decoded.bytes.size(),
gtxName, surfaceIdx);
aurora::gfx::HdReplacement r;
r.bytes = std::move(decoded.bytes);
r.width = s.width;
r.height = s.height;
r.gxFormat = m.newGxFormat;
r.mipCount = std::max(decoded.mipCount, 1u);
aurora::gfx::hd_register_replacement(pixelPtr, std::move(r));
}
// Lightweight RARC walker that returns per-file offsets without copying
// arc bytes — we need absolute pointers into the cached HD arc bytes
// (stable address) to match what the game later passes to GXInitTexObj.
struct ArcFileInfo {
std::string path; // e.g. "bmdr/model.bmd"
u32 dataOffset; // absolute offset from arc base
u32 dataSize;
};
std::vector<ArcFileInfo> parseRarcFiles(std::span<const u8> arc) {
std::vector<ArcFileInfo> out;
if (arc.size() < 0x40 || std::memcmp(arc.data(), "RARC", 4) != 0) return out;
constexpr size_t kMetaBase = sizeof(SArcHeader); // = 0x20
if (arc.size() < kMetaBase + sizeof(SArcDataInfo)) return out;
const auto* hdr = reinterpret_cast<const SArcHeader*>(arc.data());
const auto* dataInfo = reinterpret_cast<const SArcDataInfo*>(arc.data() + kMetaBase);
const u32 nodeCount = dataInfo->num_nodes;
const size_t nodeTbl = dataInfo->node_offset + kMetaBase;
const size_t fileTbl = dataInfo->file_entry_offset + kMetaBase;
const size_t strTbl = dataInfo->string_table_offset + kMetaBase;
const size_t dataBase = kMetaBase + hdr->file_data_offset;
auto readStringAt = [&](u32 offset) -> std::string {
const u8* start = arc.data() + strTbl + offset;
const u8* bufferEnd = arc.data() + arc.size();
if (start >= bufferEnd) return {};
const void* nul = std::memchr(start, 0,
static_cast<size_t>(bufferEnd - start));
const u8* terminator = nul ? static_cast<const u8*>(nul) : bufferEnd;
return std::string(reinterpret_cast<const char*>(start),
static_cast<size_t>(terminator - start));
};
const auto* nodes = reinterpret_cast<const JKRArchive::SDIDirEntry*>(
arc.data() + nodeTbl);
const auto* files = reinterpret_cast<const JKRArchive::SDIFileEntry*>(
arc.data() + fileTbl);
for (u32 ni = 0; ni < nodeCount; ++ni) {
const auto& node = nodes[ni];
const std::string dirName = readStringAt(node.name_offset);
const u16 fc = node.num_entries;
const u32 firstIdx = node.first_file_index;
const bool isRoot = (ni == 0);
for (u32 fi = 0; fi < fc; ++fi) {
const auto& entry = files[firstIdx + fi];
const u32 typeFlagsAndName = entry.type_flags_and_name_offset;
const u8 typeFlags = static_cast<u8>(typeFlagsAndName >> 24);
// Bit 0x01 = file, 0x02 = directory. We only want files.
if ((typeFlags & 0x03) != 0x01) continue;
std::string fname = readStringAt(typeFlagsAndName & 0xFFFFFF);
if (fname == "." || fname == "..") continue;
out.push_back({
(!isRoot && !dirName.empty())
? dirName + "/" + fname
: std::move(fname),
static_cast<u32>(dataBase + entry.data_offset),
entry.data_size,
});
}
}
return out;
}
// Absolute offset of slot `slotIdx`'s BTI header within a BMD's TEX1 block.
// Returns 0 on failure (the TEX1 table never sits at offset 0, so 0 is a
// safe sentinel).
u32 bmdSlotBtiOffset(std::span<const u8> bmd, u32 slotIdx) {
constexpr size_t kBlocksOffset = offsetof(J3DModelFileData, mBlocks); // = 0x20
if (bmd.size() < kBlocksOffset ||
std::memcmp(bmd.data(), "J3D2", 4) != 0) return 0;
const auto* fileData = reinterpret_cast<const J3DModelFileData*>(bmd.data());
const u32 numSections = fileData->mBlockNum;
size_t pos = kBlocksOffset;
for (u32 i = 0; i < numSections && pos + sizeof(J3DModelBlock) <= bmd.size(); ++i) {
const auto* blk = reinterpret_cast<const J3DModelBlock*>(bmd.data() + pos);
const u32 blockSize = blk->mBlockSize;
if (blk->mBlockType == 'TEX1') {
const auto* tex1 = reinterpret_cast<const J3DTextureBlock*>(bmd.data() + pos);
const u16 numTex = tex1->mTextureNum;
if (slotIdx >= numTex) return 0;
const size_t btiAbs = pos + static_cast<u32>(tex1->mpTextureRes) + slotIdx * 0x20;
if (btiAbs + 0x20 > bmd.size()) return 0;
return static_cast<u32>(btiAbs);
}
if (blockSize == 0) break;
pos += blockSize;
}
return 0;
}
// Walk the HD arc, pair BMDs with their pack.gz GTX entries, deswizzle each
// HD surface, and register the decoded bytes with aurora under the absolute
// pointer that GXInitTexObj will later receive.
//
// arcBytes must be the STABLE cache vector — its data() must not move after
// this call, or aurora's pointer lookups will miss.
void registerHdTexturesForArc(std::vector<u8>& arcBytes,
const std::vector<ArcFileInfo>& files,
const TphdPack& pack,
std::string_view arcLabel) {
size_t bmdReg = 0;
size_t btiReg = 0;
// Phase A: per-slot textures inside BMD/BDL models.
for (const auto& f : files) {
if (!endsWithSuffix(f.path, ".bmd") && !endsWithSuffix(f.path, ".bdl")) continue;
const TmpkEntry* gtx = findGtxBySuffix(pack, f.path);
if (!gtx) continue;
std::span<const u8> bmdBytes(arcBytes.data() + f.dataOffset, f.dataSize);
auto surfaces = parseGtx(gtx->data);
for (u32 i = 0; i < surfaces.size(); ++i) {
const auto& s = surfaces[i];
if (s.baseData.empty()) continue;
const Gx2FormatMapping* m = findFormatMapping(s.format);
if (!m) continue;
// HD-stub BMDs collapse every BTI's imageOffset to the same
// pixel address. Rewrite each to be slot-unique so our pointer
// map doesn't overwrite.
const u32 btiAbs = bmdSlotBtiOffset(bmdBytes, i);
if (btiAbs == 0) continue;
auto* timg = reinterpret_cast<ResTIMG*>(
arcBytes.data() + f.dataOffset + btiAbs);
if (timg->imageOffset == 0) {
HdLog.debug("Skip cross-arc placeholder slot {} in {}: "
"imageOffset==0",
i, gtx->name);
continue;
}
const u32 newImgOff = 0x20 + i * 0x20;
timg->imageOffset = static_cast<s32>(newImgOff);
registerHdSurface(*m, s,
arcBytes.data() + f.dataOffset + btiAbs + newImgOff,
gtx->name, i);
++bmdReg;
}
}
// Phase B: standalone .bti files. Each BTI is its own arc entry; the
// game loads it via JUTTexture (or similar) which calls GXInitTexObj
// with `(u8*)resTIMG + imageOffset`. Register that exact pointer.
for (const auto& f : files) {
if (!endsWithSuffix(f.path, ".bti")) continue;
if (f.dataSize < 0x20) continue;
const TmpkEntry* gtx = findGtxBySuffix(pack, f.path);
if (!gtx) continue;
auto surfaces = parseGtx(gtx->data);
if (surfaces.empty()) continue;
const auto& s = surfaces[0];
if (s.baseData.empty()) continue;
const Gx2FormatMapping* m = findFormatMapping(s.format);
if (!m) continue;
// HD-stub BTIs put garbage in imageOffset. Write 0x20 so BOTH
// consumer paths land on the same address (JUTTexture::storeTIMG and
// direct-access helpers like dKyr_set_btitex_common). Both compute
// i_img + 0x20, matching where we register below.
auto* timg = reinterpret_cast<ResTIMG*>(arcBytes.data() + f.dataOffset);
timg->imageOffset = 0x20;
registerHdSurface(*m, s, arcBytes.data() + f.dataOffset + 0x20,
gtx->name, 0);
++btiReg;
}
HdLog.info("registerHdTextures[{}]: {} BMD-slot, {} standalone-BTI replacements",
arcLabel, bmdReg, btiReg);
}
}
void setHdContentPath(std::filesystem::path contentPath) {
g_contentPath = std::move(contentPath);
std::lock_guard lk(g_cacheMutex);
g_mountBuffers().clear();
g_entryNumToBytes().clear();
aurora::gfx::hd_clear_replacements();
aurora::gfx::hd_clear_arc_ranges();
HdLog.info("HD content path set to: {}",
g_contentPath.empty() ? "(disabled)" : g_contentPath.string());
}
// HD arcs whose Wii-U layouts don't match the GC UI pipeline.
static constexpr std::string_view kHdSkipList[] = {
"res/Layout/button.arc",
"res/Layout/Title2D.arc",
"res/Layout/main2D.arc",
};
std::optional<std::vector<u8>*> tryLoadHdArchive(std::string_view gcPath) {
if (g_contentPath.empty()) return std::nullopt;
std::string_view resPath = extractResPath(gcPath);
if (resPath.empty()) return std::nullopt;
for (auto skip : kHdSkipList) {
if (resPath == skip) return std::nullopt;
}
std::filesystem::path hdArcPath = g_contentPath / std::string(resPath);
if (!std::filesystem::exists(hdArcPath)) {
return std::nullopt; // no HD override — vanilla GC path
}
auto hdBytesOpt = readWholeFile(hdArcPath);
if (!hdBytesOpt) {
HdLog.warn("HD arc read failed: {}", hdArcPath.string());
return std::nullopt;
}
if (auto inflated = tryDecodeYaz0(*hdBytesOpt)) {
HdLog.info("HD arc Yaz0-decompressed: {} -> {} bytes",
hdArcPath.filename().string(), inflated->size());
hdBytesOpt = std::move(inflated);
}
auto hdFiles = parseRarcFiles(std::span<const u8>(
hdBytesOpt->data(), hdBytesOpt->size()));
const bool hasReplaceableContent = std::any_of(hdFiles.begin(), hdFiles.end(),
[](const ArcFileInfo& f) {
return endsWithSuffix(f.path, ".bmd") ||
endsWithSuffix(f.path, ".bdl") ||
endsWithSuffix(f.path, ".bti") ||
endsWithSuffix(f.path, ".bfn") ||
endsWithSuffix(f.path, ".brfnt");
});
if (!hasReplaceableContent) {
HdLog.info("HD arc {} has no replaceable assets — skipping",
hdArcPath.filename().string());
return std::nullopt;
}
// Sidecar pack.gz holds the HD textures.
auto hdPackPath = hdArcPath;
hdPackPath.replace_extension(".pack.gz");
std::optional<TphdPack> hdPack;
if (std::filesystem::exists(hdPackPath)) {
hdPack = TphdPack::loadFromFile(hdPackPath);
if (!hdPack) HdLog.warn("HD pack failed to load: {}", hdPackPath.string());
}
// std::list keeps element addresses stable for aurora's pointer map.
std::vector<u8>* mountBytes;
std::string filename = hdArcPath.filename().string();
{
std::lock_guard lk(g_cacheMutex);
g_mountBuffers().emplace_back(std::move(*hdBytesOpt));
mountBytes = &g_mountBuffers().back();
}
HdLog.info("HD arc mount buffer allocated: {} at {} ({} bytes, pack.gz={})",
filename, static_cast<const void*>(mountBytes->data()),
mountBytes->size(), hdPack ? "yes" : "no");
aurora::gfx::hd_register_arc_range(mountBytes->data(), mountBytes->size(),
filename);
if (hdPack) {
registerHdTexturesForArc(*mountBytes, hdFiles, *hdPack, filename);
}
return mountBytes;
}
void registerHdBytesForEntryNum(s32 entryNum, const std::vector<u8>* bytes) {
if (entryNum < 0 || bytes == nullptr) return;
std::lock_guard lk(g_cacheMutex);
g_entryNumToBytes()[entryNum] = bytes;
}
const std::vector<u8>* getHdBytesForEntryNum(s32 entryNum) {
if (entryNum < 0) return nullptr;
std::lock_guard lk(g_cacheMutex);
auto it = g_entryNumToBytes().find(entryNum);
return (it != g_entryNumToBytes().end()) ? it->second : nullptr;
}
}
+30
View File
@@ -0,0 +1,30 @@
#ifndef DUSK_TPHD_HD_ASSET_LAYER_HPP
#define DUSK_TPHD_HD_ASSET_LAYER_HPP
#include <filesystem>
#include <optional>
#include <string_view>
#include <vector>
#include <dolphin/types.h>
namespace dusk::tphd {
// Configure the base directory for HD asset overrides. `contentPath` should
// point at a Wii-U `content/` directory (the parent of `res/`). Empty path
// disables HD overrides.
void setHdContentPath(std::filesystem::path contentPath);
// Returns a pointer to the cached HD archive bytes if an HD variant exists
// for the requested GC path, or std::nullopt otherwise. Caller must not
// outlive the next setHdContentPath() call.
std::optional<std::vector<u8>*> tryLoadHdArchive(std::string_view gcPath);
// HD bytes lookup by DVD entry number, used by JKRMemArchive's entryNum
// constructor to substitute HD content.
void registerHdBytesForEntryNum(s32 entryNum, const std::vector<u8>* bytes);
const std::vector<u8>* getHdBytesForEntryNum(s32 entryNum);
}
#endif
+120
View File
@@ -0,0 +1,120 @@
#include "TphdPack.hpp"
#include <zlib.h>
#include <cstdio>
#include <cstring>
#include "dusk/endian.h"
#include "dusk/logging.h"
static aurora::Module TphdLog("dusk::tphd");
namespace dusk::tphd {
namespace {
std::optional<std::vector<u8>> readFile(const std::filesystem::path& path) {
std::FILE* f = std::fopen(path.string().c_str(), "rb");
if (!f) return std::nullopt;
std::fseek(f, 0, SEEK_END);
long len = std::ftell(f);
std::fseek(f, 0, SEEK_SET);
if (len < 0) { std::fclose(f); return std::nullopt; }
std::vector<u8> buf(static_cast<size_t>(len));
size_t got = std::fread(buf.data(), 1, buf.size(), f);
std::fclose(f);
if (got != buf.size()) return std::nullopt;
return buf;
}
}
std::optional<std::vector<u8>> decompressGzip(std::span<const u8> in) {
if (in.size() < 18) return std::nullopt;
if (in[0] != 0x1F || in[1] != 0x8B) return std::nullopt;
u32 isize;
std::memcpy(&isize, in.data() + in.size() - 4, sizeof(isize));
std::vector<u8> out(isize);
z_stream strm{};
strm.next_in = const_cast<Bytef*>(in.data());
strm.avail_in = static_cast<uInt>(in.size());
strm.next_out = out.data();
strm.avail_out = static_cast<uInt>(out.size());
if (inflateInit2(&strm, 15 + 16) != Z_OK) return std::nullopt;
int rc = inflate(&strm, Z_FINISH);
inflateEnd(&strm);
if (rc != Z_STREAM_END) return std::nullopt;
return out;
}
std::vector<TmpkEntry> parseTmpk(std::span<const u8> in) {
std::vector<TmpkEntry> out;
if (in.size() < sizeof(TmpkRawHeader)) return out;
const auto* hdr = reinterpret_cast<const TmpkRawHeader*>(in.data());
if (std::memcmp(hdr->magic, "TMPK", 4) != 0) return out;
const u32 count = hdr->count;
if (in.size() < sizeof(TmpkRawHeader) + count * sizeof(TmpkRawEntry)) return out;
const auto* entries = reinterpret_cast<const TmpkRawEntry*>(
in.data() + sizeof(TmpkRawHeader));
out.reserve(count);
for (u32 i = 0; i < count; ++i) {
const u32 nameOff = entries[i].nameOff;
const u32 dataOff = entries[i].dataOff;
const u32 dataSize = entries[i].dataSize;
const u32 flags = entries[i].flags;
if (nameOff >= in.size() || dataOff + dataSize > in.size()) continue;
const char* nameStart = reinterpret_cast<const char*>(in.data() + nameOff);
size_t maxLen = in.size() - nameOff;
const void* nul = std::memchr(nameStart, 0, maxLen);
size_t nameLen = nul ? static_cast<size_t>(static_cast<const char*>(nul) - nameStart)
: maxLen;
out.push_back({
std::string_view(nameStart, nameLen),
in.subspan(dataOff, dataSize),
flags,
});
}
return out;
}
std::optional<TphdPack> TphdPack::loadFromMemory(std::span<const u8> gzipBytes) {
auto inflated = decompressGzip(gzipBytes);
if (!inflated) return std::nullopt;
TphdPack p;
p.m_buffer = std::move(*inflated);
p.m_entries = parseTmpk(std::span<const u8>(p.m_buffer.data(), p.m_buffer.size()));
if (p.m_entries.empty() && !p.m_buffer.empty()) {
TphdLog.warn("TMPK parse yielded 0 entries (buffer size {})", p.m_buffer.size());
}
return p;
}
std::optional<TphdPack> TphdPack::loadFromFile(const std::filesystem::path& path) {
auto raw = readFile(path);
if (!raw) {
TphdLog.error("Failed to read {}", path.string());
return std::nullopt;
}
return loadFromMemory(*raw);
}
const TmpkEntry* TphdPack::find(std::string_view name) const {
for (const auto& e : m_entries) {
if (e.name == name) return &e;
}
return nullptr;
}
}
+59
View File
@@ -0,0 +1,59 @@
#ifndef DUSK_TPHD_TPHD_PACK_HPP
#define DUSK_TPHD_TPHD_PACK_HPP
#include <filesystem>
#include <optional>
#include <span>
#include <string_view>
#include <vector>
#include <dolphin/types.h>
#include "dusk/endian.h"
namespace dusk::tphd {
// On-disk TMPK layout.
struct TmpkRawHeader {
/* 0x00 */ char magic[4]; // "TMPK"
/* 0x04 */ BE(u32) count;
/* 0x08 */ u8 pad[8];
};
static_assert(sizeof(TmpkRawHeader) == 0x10);
struct TmpkRawEntry {
/* 0x00 */ BE(u32) nameOff;
/* 0x04 */ BE(u32) dataOff;
/* 0x08 */ BE(u32) dataSize;
/* 0x0C */ BE(u32) flags;
};
static_assert(sizeof(TmpkRawEntry) == 0x10);
// Parsed TMPK entry: a view into the inflated pack buffer.
struct TmpkEntry {
std::string_view name;
std::span<const u8> data;
u32 flags;
};
class TphdPack {
public:
static std::optional<TphdPack> loadFromFile(const std::filesystem::path& path);
static std::optional<TphdPack> loadFromMemory(std::span<const u8> gzipBytes);
const std::vector<TmpkEntry>& entries() const { return m_entries; }
const TmpkEntry* find(std::string_view name) const;
private:
TphdPack() = default;
std::vector<u8> m_buffer;
std::vector<TmpkEntry> m_entries;
};
std::optional<std::vector<u8>> decompressGzip(std::span<const u8> in);
std::vector<TmpkEntry> parseTmpk(std::span<const u8> in);
}
#endif
+4 -1
View File
@@ -730,7 +730,10 @@ u8 var_r30 = fopAcM::HeapAdjustEntry;
#endif
u32 size = i_size & 0xFFFFFF;
#if TARGET_PC
#if DUSK_TPHD
// With TP-HD asset overlays, individual BMDs can be 5-10x their GC originals.
size *= 8;
#elif TARGET_PC
size *= 2;
#endif
bool result = fopAcM_entrySolidHeap_(i_actor, i_heapCallback, size);
+12
View File
@@ -16,6 +16,10 @@
#include "m_Do/m_Do_ext.h"
#include "os_report.h"
#if DUSK_TPHD
#include "dusk/tphd/HdAssetLayer.hpp"
#endif
s32 mDoDvdThd::main(void* param_0) {
JKRThread(OSGetCurrentThread(), 0);
#if TARGET_PC
@@ -107,6 +111,14 @@ static s32 my_DVDConvertPathToEntrynum(char const* path) {
JUT_WARN(437, "can\'t open:[%s]\n", path);
}
}
#endif
#if DUSK_TPHD
// TPHD arc redirect: cache HD bytes by entry number.
if (entrynum >= 0 && path != NULL) {
if (auto hdBytes = dusk::tphd::tryLoadHdArchive(path)) {
dusk::tphd::registerHdBytesForEntryNum(entrynum, *hdBytes);
}
}
#endif
return entrynum;
}
+14
View File
@@ -813,6 +813,16 @@ int mDoMch_Create() {
gameHeapSize += 0x200000;
gameHeapSize += 0x100000;
dynamicLinkHeapSize = 0x180000;
#if DUSK_TPHD
// HD assets ship much larger archives/actors (CMPR texture injection
// pushes stage BMDs past 8 MB and Link's Kmdl past 5 MB). Parent arena is
// 1 GB on PC, distribute generously across every heap that holds model
// or animation data at runtime.
archiveHeapSize += 0x08000000; // +128 MB (large RARCs)
gameHeapSize += 0x10000000; // +256 MB (parent of per-actor heaps like "Alink original")
j2dHeapSize += 0x01000000; // +16 MB (UI textures)
dynamicLinkHeapSize += 0x01000000; // +16 MB
#endif
#if !DEBUG
// Fakematch because the heap sizes differ between debug and retail.
@@ -862,7 +872,11 @@ int mDoMch_Create() {
#endif
archiveHeapSize *= 2;
j2dHeapSize *= 2;
#if DUSK_TPHD
gameHeapSize *= 2;
#else
gameHeapSize *= 20; // NOTE: increased from 2 to 20 to try to solve heap alloc crashes. maybe do a better fix later
#endif
#endif
JFWSystem::setSysHeapSize(arenaSize);
+20
View File
@@ -79,6 +79,10 @@
#include "dusk/settings.h"
#include "dusk/version.hpp"
#include "dusk/discord_presence.hpp"
#if DUSK_TPHD
#include "dusk/tphd/HdAssetLayer.hpp"
#include "dusk/tphd/TphdPack.hpp"
#endif
#include "tracy/Tracy.hpp"
#include "f_pc/f_pc_draw.h"
#include "tracy/Tracy.hpp"
@@ -569,7 +573,14 @@ int game_main(int argc, char* argv[]) {
config.desiredBackend = ResolveDesiredBackend(parsed_arg_options);
config.logCallback = &aurora_log_callback;
config.logLevel = startupLogLevel;
// 256 MB is GC-sized. HD-asset injection (HD BMDs + HD pixel buffers
// registered via aurora::gfx::hd_register_replacement) blows past
// that for stages+Link, so bump to 1 GB when TPHD is on.
#if DUSK_TPHD
config.mem1Size = 1024 * 1024 * 1024;
#elif TARGET_PC
config.mem1Size = 256 * 1024 * 1024;
#endif
config.mem2Size = 24 * 1024 * 1024;
config.allowJoystickBackgroundEvents = true;
config.imGuiInitCallback = &aurora_imgui_init_callback;
@@ -694,6 +705,15 @@ int game_main(int argc, char* argv[]) {
OSInit();
#if DUSK_TPHD
{
const std::string& hdPath = dusk::getSettings().backend.hdContentPath;
if (!hdPath.empty()) {
dusk::tphd::setHdContentPath(hdPath);
}
}
#endif
mDoMain::sPowerOnTime = OSGetTime();
// Reset Data