Restore AC6 mode-1 decoder dump hook via midasm at 0x821CCC5C

Recovers the Apr 23 hand-edit (lost in subsequent refactors) and re-wires
it as a TOML midasm hook so codegen regeneration can no longer drop it.
Hook fires immediately after the guest decompressor (bl 0x822CF510)
returns, reads the entry record via r11 (codec at +1, csize at +8,
usize at +12), source offset from *(r31+22888), entry tag from r10, and
calls Ac6DumpPacDecodedEntry with the decoded buffer at r4. With
AC6_DUMP_PAC_DECODED=1, all 800 compressed entries now drop as
FHM-magic'd entry_*_mode1_*.bin in out/ac6_pac_runtime_dump/.

Also adds streamer-worker dispatch probes (AC6_TRACE_PAC_WORK_ITEMS),
PPC stack walking on PAC reads (AC6_TRACE_PAC_STACKS), the AC6 PAC
index parser, the chunk-coalescing dump fallback, and a user-facing
walkthrough at docs/ac6_asset_extraction_walkthrough.txt.
This commit is contained in:
salh
2026-05-01 21:22:14 +03:00
parent 96e10d2b6e
commit 64f8efbb2b
11 changed files with 1411 additions and 56 deletions
+5
View File
@@ -30,6 +30,8 @@ endif()
set(AC6RECOMP_SOURCES
src/main.cpp
src/ac6_pac_decode_dump.cpp
src/ac6_pac_decoder_probe.cpp
src/ac6_pac_index.cpp
src/d3d_hooks.cpp
src/render_hooks.cpp
src/ac6_texture_overrides.cpp
@@ -48,6 +50,9 @@ endif()
rexglue_setup_target(ac6recomp)
# Link libmspack for AC6 PAC LZX decompression in the runtime dump path.
target_link_libraries(ac6recomp PRIVATE mspack)
if(AC6RECOMP_IPO_SUPPORTED)
set_property(TARGET ac6recomp PROPERTY INTERPROCEDURAL_OPTIMIZATION_RELEASE TRUE)
set_property(TARGET ac6recomp PROPERTY INTERPROCEDURAL_OPTIMIZATION_RELWITHDEBINFO TRUE)
+61
View File
@@ -10609,3 +10609,64 @@ address = 0x821EFF30
name = "ac6PresentIntervalHook"
registers = ["r10"]
jump_address_on_true = 0x821EFF74
# AC6 PAC stream-worker dispatch probe.
#
# rex_sub_82343E18 is the PAC streamer's queue pump. At 0x82343E78 it issues a
# `bctrl` to dispatch the next queued work item; r28 holds the work item and
# ctr holds the function pointer about to run. We intercept here to enumerate
# every distinct work-item virtual the worker invokes, which is what we need
# in order to identify the AC6 "mode 1" guest decoder. Gated at runtime by
# `AC6_TRACE_PAC_WORK_ITEMS=1`; otherwise the hook is a fast no-op.
[[midasm_hook]]
address = 0x82343E78
name = "ac6PacWorkerDispatchHook"
registers = ["r28", "ctr"]
# AC6 PAC stream-worker second-level dispatch probes.
#
# Each of the five non-error worker state handlers (rex_sub_82345608, _738,
# _860, _A00, _B28) calls a different vtable slot on a streaming sub-class
# (slots 16, 20, 24, 32, 36 respectively). The mode-1 decoder is reached
# through one of those slots. Hook each handler's first `bctrl` so we can
# capture the target function each slot resolves to. Cross-referencing those
# targets with `[AC6 PAC] compressed entry written` lines pins the decoder.
[[midasm_hook]]
address = 0x82345660 # in rex_sub_82345608, vtable[16]
name = "ac6PacWorkerL2DispatchHook"
registers = ["r3", "r4", "r5", "r6", "ctr"]
[[midasm_hook]]
address = 0x82345780 # in rex_sub_82345738, vtable[20]
name = "ac6PacWorkerL2DispatchHook"
registers = ["r3", "r4", "r5", "r6", "ctr"]
[[midasm_hook]]
address = 0x823458C4 # in rex_sub_82345860, vtable[24]
name = "ac6PacWorkerL2DispatchHook"
registers = ["r3", "r4", "r5", "r6", "ctr"]
[[midasm_hook]]
address = 0x82345A50 # in rex_sub_82345A00, vtable[32]
name = "ac6PacWorkerL2DispatchHook"
registers = ["r3", "r4", "r5", "r6", "ctr"]
[[midasm_hook]]
address = 0x82345B70 # in rex_sub_82345B28, vtable[36]
name = "ac6PacWorkerL2DispatchHook"
registers = ["r3", "r4", "r5", "r6", "ctr"]
# AC6 mode-1 decoder post-decode dump hook.
#
# Fires immediately after `add r11,r9,r11` resolves the compress-table entry
# record pointer in the streamer's per-entry processing function (PC of the
# next instruction `addi r3,r10,8`). At this site `bl 0x822CF510` has just
# finished decompressing the entry into the buffer at r4's guest VA, so we
# can read the entry record (codec_mode at +1, csize at +8, usize at +12),
# the source offset at *(r31+22888), and dump the decoded bytes via
# Ac6DumpPacDecodedEntry. This re-introduces the decoder hook from the Apr 23
# build whose source-tree call site was lost.
[[midasm_hook]]
address = 0x821CCC5C
name = "ac6PacDecoderDumpHook"
registers = ["r4", "r10", "r11", "r31"]
+219
View File
@@ -0,0 +1,219 @@
================================================================================
AC6 Asset Extraction Walkthrough
================================================================================
Goal: go from a fresh clone of this repository to decoded AC6 asset files
(textures, FHM containers, SWG metadata) on disk.
The recompiled binary patches the guest decompressor at runtime via a midasm
hook (see docs/ac6_extraction_roadmap.md). When the env var
AC6_DUMP_PAC_DECODED=1 is set, every PAC entry the game touches is written
to disk in already-decoded form. The asset pipeline then turns those raw
buffers into typed FHM children, NTXR textures, etc.
--------------------------------------------------------------------------------
0. Prerequisites
--------------------------------------------------------------------------------
- Windows 10/11 x64. (Linux clang-20 also works; commands below assume Windows.)
- Visual Studio 2022 with the "Desktop development with C++" workload installed,
OR a standalone clang-cl/MSVC toolchain.
- LLVM/Clang 20+ on PATH (the project pins clang for codegen).
- CMake 3.25 or newer, Ninja, and Python 3.11+ (for the asset pipeline tools).
- Your own legally obtained copy of Ace Combat 6: Fires of Liberation. The
repository ships no game data.
--------------------------------------------------------------------------------
1. First-time build
--------------------------------------------------------------------------------
Open a 64-bit shell (x64 Native Tools Command Prompt for VS 2022, or any shell
with the right toolchain on PATH). From the repo root, run:
cmake --preset win-amd64-relwithdebinfo
cmake --build --preset win-amd64-relwithdebinfo --target ac6recomp_codegen
cmake --preset win-amd64-relwithdebinfo
cmake --build --preset win-amd64-relwithdebinfo
The two-phase configure is required because the codegen target produces
sources that the second configure has to pick up. Output exe lands at:
out/build/win-amd64-relwithdebinfo/ac6recomp.exe
setup_and_build.bat wraps the same sequence if you would rather run it once.
--------------------------------------------------------------------------------
2. Place the game assets
--------------------------------------------------------------------------------
The game expects DATA.TBL, DATA00.PAC, and DATA01.PAC alongside the exe in an
"assets" subfolder:
out/build/win-amd64-relwithdebinfo/assets/DATA.TBL
out/build/win-amd64-relwithdebinfo/assets/DATA00.PAC
out/build/win-amd64-relwithdebinfo/assets/DATA01.PAC
You will also need a default.xex and any other files the game requires; consult
the project README for the full layout. Without the PAC archives the dumper
has nothing to capture.
--------------------------------------------------------------------------------
3. Run the game with PAC dumping enabled
--------------------------------------------------------------------------------
Use the helper launcher from PowerShell at the repo root:
.\tools\launch_ac6_with_pac_dump.ps1
That sets AC6_DUMP_PAC_DECODED=1 and starts ac6recomp.exe with the working
directory pointing at the build output.
Optional switches (only set these when you need them):
.\tools\launch_ac6_with_pac_dump.ps1 -TraceWorkItems
Lifts the [fs] log category to info so the dumper's
"[AC6 PAC] dumped decoded entry ..." lines appear in ac6recomp.log,
and enables the PAC stream-worker dispatch probes.
.\tools\launch_ac6_with_pac_dump.ps1 -TraceStacks
Adds PPC back-chain stack=[...] traces on each PAC NtReadFile call.
Useful for debugging the stream worker; not needed for routine runs.
Play long enough for the streamer to load the assets you care about. As a
rough guide:
- Title screen + intro: enough for the boot/menu PACs.
- One mission start: enough for that mission's PAC entries.
- Anything new the game streams in adds new dumps; replays do not duplicate
entries that have already been written.
When you are done, close the game window normally.
--------------------------------------------------------------------------------
4. Verify the decoded dumps
--------------------------------------------------------------------------------
The dumper writes to (relative to the repo root):
out/ac6_pac_runtime_dump/
A successful run looks like:
entry_<tag>_mode0_c<csize>_u<usize>_off<hex>.bin <- raw entries
entry_<tag>_mode1_c<csize>_u<usize>_off<hex>.bin <- decoded entries
You should NOT see any .compressed.bin files. If you do, the midasm hook at
0x821CCC5C did not fire for those entries (see Troubleshooting below).
Quick sanity check on a decoded blob:
powershell -Command "(Get-Content out\ac6_pac_runtime_dump\<file>.bin -Encoding Byte -TotalCount 4) -join ','"
The first 4 bytes of any mode-1 dump should be 70,72,77,32 (ASCII "FHM ").
--------------------------------------------------------------------------------
5. Run the asset extraction pipeline
--------------------------------------------------------------------------------
From the repo root, with the dumps in place:
python tools\run_ac6_asset_pipeline.py
The driver runs four stages in order:
1. extract_ac6_pac.py
Pulls the raw 126 entries directly out of DATA00/01.PAC offline.
Outputs to out/ac6_pac_extracted_raw/.
2. extract_ac6_runtime_fhm.py
Walks every entry_*_mode*.bin in out/ac6_pac_runtime_dump/ and
descends into FHM containers, writing typed children to
out/ac6_runtime_fhm_typed/.
3. parse_ac6_swg.py
Parses the UI sprite/widget metadata (.swg children) into
out/ac6_runtime_swg_parsed/.
4. export_ac6_ntxr.py
Converts NTXR texture entries into DDS/TGA in
out/ac6_runtime_ntxr_exported/.
Override any output path with --raw-out, --typed-out, --swg-out, --ntxr-out.
Add --skip-pac-extract if you only want to re-process the runtime dumps.
--------------------------------------------------------------------------------
6. Where the output lives
--------------------------------------------------------------------------------
out/ac6_pac_runtime_dump/ Raw decoded buffers, one file per entry.
out/ac6_pac_extracted_raw/ 126 raw (mode-0) entries pulled offline.
out/ac6_runtime_fhm_typed/ FHM children classified by magic
(NTXR textures, BFX/BSN audio banks,
MDLP/NSXR models, SWG UI, etc.).
out/ac6_runtime_swg_parsed/ JSON metadata for UI sprites.
out/ac6_runtime_ntxr_exported/ DDS/TGA files (one per texture entry).
--------------------------------------------------------------------------------
7. Troubleshooting
--------------------------------------------------------------------------------
* "no entry_*_mode1_*.bin files appeared"
- The game did not stream any compressed entries during the session.
Boot further or load a mission and try again.
- AC6_DUMP_PAC_DECODED was not set. Always launch via the helper script,
or set the env var manually before starting the exe.
* ".compressed.bin files appeared"
- The midasm hook at 0x821CCC5C did not fire. Codegen may have shifted
the underlying instruction sequence. Verify the anchor instruction in
generated/ac6recomp_recomp.10.cpp:
// lwz r11,-18100(r26)
// add r11,r9,r11
// addi r3,r10,8 <- PC of this instruction is the hook address
If the surrounding ops differ, re-anchor by finding the unique
"lwz r11,-18100(r26)" sequence and updating the address in
ac6recomp_config.toml under [[midasm_hook]] name = "ac6PacDecoderDumpHook".
* "logs do not show any [AC6 PAC] lines"
- ac6_performance_mode is on by default and forces log_level=error,
which silences the [fs] category. Run with -TraceWorkItems to lift
[fs] to info. Note: dumps still land in out/ac6_pac_runtime_dump/
regardless of log level.
* "extract_ac6_runtime_fhm.py reports 0 containers"
- The dump dir is empty or the files are still .compressed.bin.
Re-run with the hook fix above.
* "log files rotate and the early dumper lines are gone"
- At trace-level logging the rotating buffer fills in seconds. Do not
raise log_level globally; the per-category lift in -TraceWorkItems
keeps volume manageable.
* "I changed ac6recomp_config.toml and the new hook does nothing"
- You skipped the codegen pass. TOML changes only take effect after:
cmake --build --preset win-amd64-relwithdebinfo --target ac6recomp_codegen
cmake --build --preset win-amd64-relwithdebinfo
--------------------------------------------------------------------------------
8. Quick reference: env vars
--------------------------------------------------------------------------------
AC6_DUMP_PAC_DECODED=1 Required. Enables the dumper sink.
AC6_TRACE_PAC_WORK_ITEMS=1 Optional. Lifts [fs] log category to info,
enables L1/L2 streamer-worker probes.
AC6_TRACE_PAC_STACKS=1 Optional. PPC back-chain on PAC NtReadFile.
The launcher script (.\tools\launch_ac6_with_pac_dump.ps1) sets the first
unconditionally and the others only when -TraceWorkItems / -TraceStacks
are passed.
+460 -32
View File
@@ -1,15 +1,42 @@
#include <rex/logging.h>
#include "ac6_pac_decode_dump.h"
#include "ac6_pac_index.h"
#include <rex/logging.h>
#include <rex/memory.h>
#include <rex/system/kernel_state.h>
#include <algorithm>
#include <array>
#include <atomic>
#include <cstdlib>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <map>
#include <mutex>
#include <optional>
#include <sstream>
#include <string>
#include <string_view>
#include <unordered_set>
#include <vector>
#if defined(_WIN32)
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
#endif
extern "C" {
#include <lzx.h>
#include <mspack.h>
}
namespace {
// ---------------------------------------------------------------------------
// AC6_DUMP_PAC_DECODED env-var gate
// ---------------------------------------------------------------------------
bool DumpingEnabled() {
static const bool enabled = [] {
const char* value = std::getenv("AC6_DUMP_PAC_DECODED");
@@ -18,8 +45,45 @@ bool DumpingEnabled() {
return enabled;
}
std::filesystem::path DumpRoot() {
return std::filesystem::path("out") / "ac6_pac_runtime_dump";
// ---------------------------------------------------------------------------
// Resolve dump directory anchored to the repo root if discoverable, otherwise
// next to the executable.
// ---------------------------------------------------------------------------
std::filesystem::path ExecutableDir() {
#if defined(_WIN32)
wchar_t buffer[MAX_PATH * 2] = {};
const DWORD len = GetModuleFileNameW(nullptr, buffer, static_cast<DWORD>(std::size(buffer)));
if (len > 0 && len < std::size(buffer)) {
return std::filesystem::path(buffer).parent_path();
}
#endif
std::error_code ec;
auto cwd = std::filesystem::current_path(ec);
return ec ? std::filesystem::path() : cwd;
}
std::optional<std::filesystem::path> FindRepoRoot(std::filesystem::path start) {
std::error_code ec;
for (auto cur = start; !cur.empty(); cur = cur.parent_path()) {
if (std::filesystem::exists(cur / "tools" / "run_ac6_asset_pipeline.py", ec)) {
return cur;
}
if (cur.has_parent_path() && cur == cur.parent_path()) {
break;
}
}
return std::nullopt;
}
const std::filesystem::path& DumpRoot() {
static const std::filesystem::path root = [] {
const auto exe_dir = ExecutableDir();
const auto repo = FindRepoRoot(exe_dir);
std::filesystem::path chosen = repo ? *repo / "out" / "ac6_pac_runtime_dump"
: exe_dir / "ac6_pac_runtime_dump";
return chosen;
}();
return root;
}
std::mutex& DumpMutex() {
@@ -27,45 +91,409 @@ std::mutex& DumpMutex() {
return mutex;
}
// ---------------------------------------------------------------------------
// In-memory libmspack LZX adapter (modeled on tools/pac_probe_lzx.cpp).
// ---------------------------------------------------------------------------
struct MemInput {
const uint8_t* data = nullptr;
size_t size = 0;
size_t pos = 0;
};
struct MemOutput {
std::vector<uint8_t> bytes;
};
struct MemFile {
mspack_file base{};
MemInput* in = nullptr;
MemOutput* out = nullptr;
};
int MemRead(mspack_file* f, void* buf, int bytes) {
auto* h = reinterpret_cast<MemFile*>(f);
if (!h || !h->in || bytes < 0) return -1;
const size_t avail = h->in->size - h->in->pos;
const size_t n = std::min<size_t>(avail, static_cast<size_t>(bytes));
if (n) std::memcpy(buf, h->in->data + h->in->pos, n);
h->in->pos += n;
return static_cast<int>(n);
}
int MemWrite(mspack_file* f, void* buf, int bytes) {
auto* h = reinterpret_cast<MemFile*>(f);
if (!h || !h->out || bytes < 0) return -1;
const auto* src = static_cast<const uint8_t*>(buf);
h->out->bytes.insert(h->out->bytes.end(), src, src + bytes);
return bytes;
}
int MemSeek(mspack_file* f, off_t offset, int mode) {
auto* h = reinterpret_cast<MemFile*>(f);
if (!h || !h->in) return -1;
size_t base = 0;
switch (mode) {
case MSPACK_SYS_SEEK_START: base = 0; break;
case MSPACK_SYS_SEEK_CUR: base = h->in->pos; break;
case MSPACK_SYS_SEEK_END: base = h->in->size; break;
default: return -1;
}
if (offset < 0 && static_cast<size_t>(-offset) > base) return -1;
const size_t next = offset >= 0 ? base + size_t(offset) : base - size_t(-offset);
if (next > h->in->size) return -1;
h->in->pos = next;
return 0;
}
off_t MemTell(mspack_file* f) {
auto* h = reinterpret_cast<MemFile*>(f);
return h && h->in ? static_cast<off_t>(h->in->pos) : off_t(-1);
}
void MemMessage(mspack_file*, const char*, ...) {}
void* MemAlloc(mspack_system*, size_t bytes) { return std::malloc(bytes); }
void MemFree(void* p) { std::free(p); }
void MemCopy(void* src, void* dst, size_t bytes) { std::memcpy(dst, src, bytes); }
mspack_system MakeMemSystem() {
mspack_system s{};
s.read = &MemRead;
s.write = &MemWrite;
s.seek = &MemSeek;
s.tell = &MemTell;
s.message = &MemMessage;
s.alloc = &MemAlloc;
s.free = &MemFree;
s.copy = &MemCopy;
return s;
}
bool TryLzxOnce(const uint8_t* compressed, uint32_t csize, uint32_t usize,
int window_bits, int reset_interval, std::vector<uint8_t>* out) {
MemInput in{compressed, csize, 0};
MemOutput dst;
MemFile in_file{};
MemFile out_file{};
in_file.in = &in;
out_file.out = &dst;
mspack_system sys = MakeMemSystem();
auto* lzx = lzxd_init(&sys, &in_file.base, &out_file.base, window_bits, reset_interval,
1 << 15, static_cast<off_t>(usize), 0);
if (!lzx) return false;
const int status = lzxd_decompress(lzx, static_cast<off_t>(usize));
lzxd_free(lzx);
if (status != MSPACK_ERR_OK || dst.bytes.size() != usize) {
return false;
}
*out = std::move(dst.bytes);
return true;
}
struct LzxParams {
int window_bits;
int reset_interval;
};
bool DecompressLzx(const uint8_t* compressed, uint32_t csize, uint32_t usize,
std::vector<uint8_t>* out) {
static std::mutex cache_mutex;
static std::optional<LzxParams> cached;
{
std::scoped_lock lock(cache_mutex);
if (cached) {
if (TryLzxOnce(compressed, csize, usize, cached->window_bits, cached->reset_interval,
out)) {
return true;
}
// Cached params failed; fall through to re-probe.
}
}
constexpr std::array<int, 7> kResetCandidates{0, 1, 2, 4, 8, 16, 32};
for (int wb = 15; wb <= 21; ++wb) {
for (int ri : kResetCandidates) {
if (TryLzxOnce(compressed, csize, usize, wb, ri, out)) {
std::scoped_lock lock(cache_mutex);
cached = LzxParams{wb, ri};
REXFS_INFO("[AC6 PAC] LZX params discovered: window_bits={} reset_interval={}", wb,
ri);
return true;
}
}
}
return false;
}
// ---------------------------------------------------------------------------
// Filename helper.
// ---------------------------------------------------------------------------
std::filesystem::path BuildDumpPath(uint32_t entry_index, uint8_t mode, uint32_t csize,
uint32_t usize, uint32_t source_offset) {
std::ostringstream name;
name << "entry_" << entry_index << "_mode" << uint32_t(mode) << "_c" << csize << "_u" << usize
<< "_off" << std::hex << source_offset << std::dec << ".bin";
return DumpRoot() / name.str();
}
bool WriteBlob(const std::filesystem::path& path, const uint8_t* data, size_t size) {
std::error_code ec;
std::filesystem::create_directories(DumpRoot(), ec);
if (ec) {
REXFS_ERROR("[AC6 PAC] failed to create dump directory {}: {}", DumpRoot().string(),
ec.message());
return false;
}
std::ofstream file(path, std::ios::binary | std::ios::trunc);
if (!file) {
REXFS_ERROR("[AC6 PAC] failed to open dump {}", path.string());
return false;
}
file.write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
if (!file) {
REXFS_ERROR("[AC6 PAC] failed to write dump {}", path.string());
return false;
}
return true;
}
const uint8_t* TranslateGuestBuffer(uint32_t guest_addr, uint32_t length) {
if (guest_addr == 0 || length == 0) return nullptr;
auto* memory = REX_KERNEL_MEMORY();
if (!memory) return nullptr;
if (guest_addr > UINT32_MAX - length) return nullptr;
if (!memory->LookupHeap(guest_addr) || !memory->LookupHeap(guest_addr + length - 1)) {
return nullptr;
}
return memory->TranslateVirtual<const uint8_t*>(guest_addr);
}
// ---------------------------------------------------------------------------
// Per-archive read-chunk tracking. The game streams PAC content in 0x40000
// chunks; we accumulate them and only dump once a known DATA.TBL entry's
// byte range is fully covered.
// ---------------------------------------------------------------------------
struct ChunkRec {
uint32_t length;
uint32_t guest_buffer;
};
struct ArchiveState {
// file_offset -> chunk; sorted, allows range queries.
std::map<uint32_t, ChunkRec> chunks;
// Indices of DATA.TBL entries we have already written out.
std::unordered_set<uint32_t> dumped;
};
ArchiveState& GetArchive(bool is_data01) {
static ArchiveState data00;
static ArchiveState data01;
return is_data01 ? data01 : data00;
}
std::mutex& ArchiveMutex() {
static std::mutex m;
return m;
}
// Returns true if the half-open range [start, end) is fully covered by chunks
// in the (offset-sorted) map, with no gaps.
bool IsRangeCovered(const std::map<uint32_t, ChunkRec>& chunks, uint32_t start, uint32_t end) {
if (end <= start) return false;
auto it = chunks.upper_bound(start);
if (it == chunks.begin()) return false;
--it; // greatest chunk_offset <= start
uint32_t cursor = start;
while (cursor < end) {
const uint32_t chunk_off = it->first;
const uint32_t chunk_end = chunk_off + it->second.length;
if (chunk_off > cursor) return false; // gap
if (chunk_end <= cursor) return false; // chunk ends before cursor
cursor = chunk_end;
++it;
if (cursor >= end) break;
if (it == chunks.end()) return false;
// The next chunk must start at or before cursor.
if (it->first > cursor) return false;
}
return cursor >= end;
}
// Materialize the contiguous bytes [start, end) from the recorded chunks into
// a host buffer. Returns empty on failure.
std::vector<uint8_t> GatherRange(const std::map<uint32_t, ChunkRec>& chunks, uint32_t start,
uint32_t end) {
std::vector<uint8_t> out;
if (end <= start) return out;
out.reserve(end - start);
auto it = chunks.upper_bound(start);
if (it == chunks.begin()) return {};
--it;
uint32_t cursor = start;
while (cursor < end) {
const uint32_t chunk_off = it->first;
const uint32_t chunk_len = it->second.length;
if (chunk_off > cursor || chunk_off + chunk_len <= cursor) return {};
const uint32_t local = cursor - chunk_off;
const uint32_t take = std::min<uint32_t>(chunk_len - local, end - cursor);
const uint8_t* host = TranslateGuestBuffer(it->second.guest_buffer + local, take);
if (!host) return {};
out.insert(out.end(), host, host + take);
cursor += take;
if (cursor >= end) break;
++it;
if (it == chunks.end()) return {};
}
return out;
}
void TryDumpEntry(bool is_data01, uint32_t entry_index, ArchiveState* state) {
if (state->dumped.count(entry_index)) return;
auto rec = ac6_pac_index::GetByIndex(entry_index);
if (!rec) return;
if (rec->is_data01 != is_data01) return;
if (rec->compressed_size == 0) return;
const uint32_t start = rec->offset;
const uint32_t end = rec->offset + rec->compressed_size;
if (!IsRangeCovered(state->chunks, start, end)) return;
std::vector<uint8_t> raw = GatherRange(state->chunks, start, end);
if (raw.size() != rec->compressed_size) return;
const uint16_t entry_index_u16 = static_cast<uint16_t>(rec->index & 0xFFFFu);
if (rec->storage_kind == ac6_pac_index::StorageKind::kRaw) {
if (rec->decompressed_size > raw.size()) {
REXFS_WARN(
"[AC6 PAC] raw entry size mismatch; refusing overread: entry={} csize=0x{:x} "
"usize=0x{:x} pac_offset=0x{:x}",
rec->index, rec->compressed_size, rec->decompressed_size, rec->offset);
state->dumped.insert(entry_index);
return;
}
Ac6DumpPacDecodedEntry(entry_index_u16, /*mode=*/0, rec->compressed_size,
rec->decompressed_size, rec->offset, raw.data());
state->dumped.insert(entry_index);
return;
}
// Compressed (AC6 "mode 1") entries use a custom codec, not vanilla LZX
// (offline pac_extract_offline.exe failed on all 800 compressed entries).
// Persist the raw compressed bytes for later analysis.
static std::atomic<bool> logged_first_compressed{false};
if (!logged_first_compressed.exchange(true)) {
std::ostringstream hex;
hex << std::hex << std::setfill('0');
const size_t n = std::min<size_t>(64, raw.size());
for (size_t i = 0; i < n; ++i) {
hex << std::setw(2) << uint32_t(raw[i]) << ' ';
}
REXFS_INFO(
"[AC6 PAC] first compressed entry (index={} csize=0x{:x} usize=0x{:x} "
"pac_offset=0x{:x}) head[64]={}",
rec->index, rec->compressed_size, rec->decompressed_size, rec->offset, hex.str());
}
std::filesystem::path out_path = BuildDumpPath(entry_index_u16, /*mode=*/1,
rec->compressed_size,
rec->decompressed_size, rec->offset);
out_path.replace_extension(".compressed.bin");
{
std::scoped_lock dump_lock(DumpMutex());
std::error_code ec;
std::filesystem::create_directories(DumpRoot(), ec);
std::ofstream f(out_path, std::ios::binary | std::ios::trunc);
if (f) {
f.write(reinterpret_cast<const char*>(raw.data()),
static_cast<std::streamsize>(raw.size()));
}
}
REXFS_WARN(
"[AC6 PAC] compressed entry written as raw blob (no host-side mode-1 decoder): "
"entry={} csize=0x{:x} usize=0x{:x} pac_offset=0x{:x} path={}",
rec->index, rec->compressed_size, rec->decompressed_size, rec->offset, out_path.string());
state->dumped.insert(entry_index);
}
} // namespace
void Ac6DumpPacDecodedEntry(uint16_t entry_index, uint8_t codec_mode, uint32_t compressed_size,
uint32_t decompressed_size, uint32_t source_offset,
const uint8_t* host_data) {
if (!DumpingEnabled() || !host_data || decompressed_size == 0) {
return;
}
if (!DumpingEnabled() || !host_data || decompressed_size == 0) return;
std::scoped_lock lock(DumpMutex());
std::error_code ec;
const auto root = DumpRoot();
std::filesystem::create_directories(root, ec);
if (ec) {
REXFS_ERROR("[AC6 PAC] failed to create dump directory {}: {}", root.string(), ec.message());
return;
}
std::ostringstream name;
name << "entry_" << entry_index << "_mode" << static_cast<uint32_t>(codec_mode) << "_c" << compressed_size
<< "_u" << decompressed_size << "_off" << std::hex << source_offset << std::dec << ".bin";
const auto path = root / name.str();
std::ofstream file(path, std::ios::binary | std::ios::trunc);
if (!file) {
REXFS_ERROR("[AC6 PAC] failed to open decoded dump {}", path.string());
return;
}
file.write(reinterpret_cast<const char*>(host_data), static_cast<std::streamsize>(decompressed_size));
if (!file) {
REXFS_ERROR("[AC6 PAC] failed to write decoded dump {}", path.string());
return;
}
const auto path =
BuildDumpPath(entry_index, codec_mode, compressed_size, decompressed_size, source_offset);
if (!WriteBlob(path, host_data, decompressed_size)) return;
REXFS_INFO(
"[AC6 PAC] dumped decoded entry index={} mode={} compressed=0x{:x} decompressed=0x{:x} "
"source_offset=0x{:x} path={}",
entry_index, static_cast<uint32_t>(codec_mode), compressed_size, decompressed_size, source_offset,
entry_index, uint32_t(codec_mode), compressed_size, decompressed_size, source_offset,
path.string());
}
void Ac6OnPacReadCompleted(std::string_view path, uint32_t guest_buffer, uint64_t file_offset,
uint32_t bytes_read) {
if (!DumpingEnabled() || guest_buffer == 0 || bytes_read == 0) return;
// DATA.TBL: parse and cache the index.
if (ac6_pac_index::IsDataTblPath(path)) {
if (file_offset != 0) return; // require a full-file read starting at 0
const uint8_t* host = TranslateGuestBuffer(guest_buffer, bytes_read);
if (!host) return;
if (ac6_pac_index::LoadFromBuffer(host, bytes_read)) {
// Successfully indexed; nothing else to do for DATA.TBL itself.
}
return;
}
// DATA00/01.PAC: record this chunk, then check if any DATA.TBL entry's
// full range is now covered by recorded reads.
bool is_data01 = false;
if (!ac6_pac_index::ClassifyPacPath(path, &is_data01)) return;
if (!ac6_pac_index::IsLoaded()) return;
if (file_offset > 0xFFFFFFFFu) return;
const uint32_t offset_u32 = static_cast<uint32_t>(file_offset);
const uint32_t end_u32 =
bytes_read > UINT32_MAX - offset_u32 ? UINT32_MAX : offset_u32 + bytes_read;
std::scoped_lock lock(ArchiveMutex());
auto& archive = GetArchive(is_data01);
archive.chunks[offset_u32] = ChunkRec{bytes_read, guest_buffer};
const auto candidates = ac6_pac_index::FindOverlapping(is_data01, offset_u32, end_u32);
// One-shot diagnostic on first overlapping read after DATA.TBL is loaded.
static std::atomic<bool> logged_first_overlap{false};
if (!candidates.empty() && !logged_first_overlap.exchange(true)) {
const auto rec = ac6_pac_index::GetByIndex(candidates.front());
REXFS_INFO(
"[AC6 PAC] first overlap: archive=DATA0{} chunk[off=0x{:x},len=0x{:x}] candidate "
"entry={} entry_range=[0x{:x},0x{:x}) csize=0x{:x}",
is_data01 ? "1" : "0", offset_u32, bytes_read, candidates.front(),
rec ? rec->offset : 0u, rec ? rec->offset + rec->compressed_size : 0u,
rec ? rec->compressed_size : 0u);
}
for (uint32_t entry_index : candidates) {
TryDumpEntry(is_data01, entry_index, &archive);
}
// Periodic progress log so we can see if entries ever fully cover.
static std::atomic<uint64_t> chunk_count{0};
const uint64_t n = chunk_count.fetch_add(1) + 1;
if (n == 1 || n == 100 || n == 1000 || (n % 5000) == 0) {
REXFS_INFO("[AC6 PAC] progress: archive=DATA0{} chunks_recorded={} dumped_entries={}",
is_data01 ? "1" : "0", archive.chunks.size(), archive.dumped.size());
}
}
+26
View File
@@ -0,0 +1,26 @@
#pragma once
#include <cstdint>
#include <string_view>
// Writes a single decoded PAC entry to the runtime dump directory.
// Filename format: entry_<index>_mode<mode>_c<csize>_u<usize>_off<hex_offset>.bin
void Ac6DumpPacDecodedEntry(uint16_t entry_index, uint8_t codec_mode, uint32_t compressed_size,
uint32_t decompressed_size, uint32_t source_offset,
const uint8_t* host_data);
// Hook called from the kernel-side NtReadFile completion path for any read
// targeting an AC6 PAC archive (DATA00.PAC, DATA01.PAC) or DATA.TBL itself.
// - For DATA.TBL reads: parses and caches the index.
// - For DATA00/01.PAC reads whose (offset, length) match a cached DATA.TBL
// entry: dumps the entry (decompressing first if compressed).
//
// All work is gated on AC6_DUMP_PAC_DECODED=1; otherwise this is a no-op.
//
// Args:
// path - guest path of the file just read (e.g. "game:\\DATA00.PAC")
// guest_buffer - guest virtual address of the read destination buffer
// file_offset - byte offset within the file where the read started
// bytes_read - number of bytes successfully read
void Ac6OnPacReadCompleted(std::string_view path, uint32_t guest_buffer, uint64_t file_offset,
uint32_t bytes_read);
+206
View File
@@ -0,0 +1,206 @@
#include "ac6_pac_decoder_probe.h"
#include <rex/logging.h>
#include <rex/logging/api.h>
#include <rex/memory.h>
#include <rex/ppc/types.h>
#include <rex/system/kernel_state.h>
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <mutex>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>
namespace {
bool EnvFlag(const char* name) {
const char* value = std::getenv(name);
return value && value[0] && std::string_view(value) != "0";
}
bool TraceEnabled() {
static const bool enabled = [] {
const bool work_items = EnvFlag("AC6_TRACE_PAC_WORK_ITEMS");
const bool stacks = EnvFlag("AC6_TRACE_PAC_STACKS");
// ac6_performance_mode forces log_level=error, which silences the
// probe's REXFS_INFO output, the PAC dumper's diagnostic lines, and
// the kernel hook's stack-trace REXKRNL_INFO lines. Lift the relevant
// categories so those reach the log file when their env-var gate is on.
if (work_items) {
rex::SetCategoryLevel(rex::log::fs(), spdlog::level::info);
}
if (stacks) {
rex::SetCategoryLevel(rex::log::krnl(), spdlog::level::info);
}
return work_items;
}();
return enabled;
}
struct TargetState {
uint32_t first_work_item = 0;
uint64_t hit_count = 0;
};
std::mutex& Mutex() {
static std::mutex m;
return m;
}
std::unordered_map<uint32_t, TargetState>& Targets() {
static std::unordered_map<uint32_t, TargetState> m;
return m;
}
} // namespace
void ac6PacWorkerDispatchHook(PPCRegister& r28, PPCRegister& ctr) {
if (!TraceEnabled()) return;
const uint32_t target = ctr.u32;
const uint32_t work_item = r28.u32;
bool first_sighting = false;
uint64_t total = 0;
size_t distinct = 0;
{
std::scoped_lock lock(Mutex());
auto& targets = Targets();
auto& slot = targets[target];
if (slot.hit_count == 0) {
slot.first_work_item = work_item;
first_sighting = true;
}
slot.hit_count++;
total = slot.hit_count;
distinct = targets.size();
}
if (first_sighting) {
REXFS_INFO(
"[AC6 PAC WORKER] new dispatch target=0x{:08X} first_work_item=0x{:08X} "
"(distinct_targets={})",
target, work_item, distinct);
} else if (total == 100 || total == 1000 || (total % 10000) == 0) {
REXFS_INFO("[AC6 PAC WORKER] target=0x{:08X} hits={}", target, total);
}
}
namespace {
struct L2TargetState {
uint32_t first_r3 = 0;
uint32_t first_r4 = 0;
uint32_t first_r5 = 0;
uint32_t first_r6 = 0;
uint64_t hit_count = 0;
// Up to N distinct (r5, r6) tuples observed for this target; the decoder's
// slot will eventually be called with an r5 matching a known csize/usize.
static constexpr size_t kMaxSamples = 12;
std::vector<std::pair<uint32_t, uint32_t>> samples;
};
std::mutex& L2Mutex() {
static std::mutex m;
return m;
}
std::unordered_map<uint32_t, L2TargetState>& L2Targets() {
static std::unordered_map<uint32_t, L2TargetState> m;
return m;
}
} // namespace
void ac6PacWorkerL2DispatchHook(PPCRegister& r3, PPCRegister& r4, PPCRegister& r5,
PPCRegister& r6, PPCRegister& ctr) {
if (!TraceEnabled()) return;
const uint32_t target = ctr.u32;
bool first_sighting = false;
bool new_sample = false;
uint64_t total = 0;
size_t distinct = 0;
size_t samples_count = 0;
L2TargetState snapshot{};
{
std::scoped_lock lock(L2Mutex());
auto& targets = L2Targets();
auto& slot = targets[target];
if (slot.hit_count == 0) {
slot.first_r3 = r3.u32;
slot.first_r4 = r4.u32;
slot.first_r5 = r5.u32;
slot.first_r6 = r6.u32;
first_sighting = true;
}
slot.hit_count++;
// Bounded distinct-(r5,r6) capture so the decoder's argument signature
// becomes observable across later calls (first-sighting often catches
// state-init zeros).
if (slot.samples.size() < L2TargetState::kMaxSamples) {
const std::pair<uint32_t, uint32_t> key{r5.u32, r6.u32};
if (std::find(slot.samples.begin(), slot.samples.end(), key) ==
slot.samples.end()) {
slot.samples.push_back(key);
new_sample = true;
}
}
total = slot.hit_count;
distinct = targets.size();
samples_count = slot.samples.size();
snapshot = slot;
}
if (first_sighting) {
REXFS_INFO(
"[AC6 PAC L2] new target=0x{:08X} r3=0x{:08X} r4=0x{:08X} r5=0x{:08X} "
"r6=0x{:08X} (distinct_l2_targets={})",
target, snapshot.first_r3, snapshot.first_r4, snapshot.first_r5,
snapshot.first_r6, distinct);
} else if (new_sample) {
REXFS_INFO(
"[AC6 PAC L2 sample] target=0x{:08X} r3=0x{:08X} r4=0x{:08X} r5=0x{:08X} "
"r6=0x{:08X} (sample {} / {}, hits={})",
target, r3.u32, r4.u32, r5.u32, r6.u32, samples_count,
L2TargetState::kMaxSamples, total);
} else if (total == 100 || total == 1000 || (total % 10000) == 0) {
REXFS_INFO("[AC6 PAC L2] target=0x{:08X} hits={}", target, total);
}
}
void ac6PacDecoderDumpHook(PPCRegister& r4, PPCRegister& r10, PPCRegister& r11,
PPCRegister& r31) {
auto* memory = REX_KERNEL_MEMORY();
if (!memory) return;
auto load_u8 = [memory](uint32_t va) -> uint8_t {
if (!memory->LookupHeap(va)) return 0;
return *static_cast<const uint8_t*>(memory->TranslateVirtual(va));
};
auto load_u32_be = [memory](uint32_t va) -> uint32_t {
if (va > UINT32_MAX - 3) return 0;
if (!memory->LookupHeap(va) || !memory->LookupHeap(va + 3)) return 0;
return rex::memory::load_and_swap<uint32_t>(memory->TranslateVirtual(va));
};
const uint8_t codec = load_u8(r11.u32 + 1);
const uint32_t csize = load_u32_be(r11.u32 + 8);
const uint32_t usize = load_u32_be(r11.u32 + 12);
const uint32_t source_offset = load_u32_be(r31.u32 + 22888);
if (usize == 0 || r4.u32 == 0 || r4.u32 > UINT32_MAX - usize) return;
if (!memory->LookupHeap(r4.u32) || !memory->LookupHeap(r4.u32 + usize - 1)) return;
const auto* host = memory->TranslateVirtual<const uint8_t*>(r4.u32);
if (!host) return;
Ac6DumpPacDecodedEntry(static_cast<uint16_t>(r10.u32 & 0xFFFFu),
codec, csize, usize, source_offset, host);
}
+47
View File
@@ -0,0 +1,47 @@
#pragma once
#include <rex/ppc/types.h>
// Mid-asm hook on the AC6 PAC stream-worker's work-item dispatch site.
//
// Wired to `0x82343E78` in `rex_sub_82343E18` (the streamer's `bctrl` that
// dispatches the next queued work item). At the moment of the call,
// `ctr` holds the guest function pointer about to run (the work item's
// virtual method[1]) and `r28` holds the work item itself.
//
// Gated by env var `AC6_TRACE_PAC_WORK_ITEMS=1`. When enabled, each
// distinct dispatch target is logged once. The decoder we are hunting for
// will appear here as a target that runs after a compressed PAC entry
// has been fully streamed.
void ac6PacWorkerDispatchHook(PPCRegister& r28, PPCRegister& ctr);
// Second-level dispatch probe, installed at the first `bctrl` inside each of
// the five non-error PAC stream-worker state handlers (rex_sub_82345608,
// _738, _860, _A00, _B28). Each handler invokes a different vtable slot on
// the same streaming sub-class; this hook records the target function each
// slot resolves to. The mode-1 decoder is reached through one of these slots.
//
// All five sites share this single hook; cross-reference the captured target
// addresses against `[AC6 PAC] compressed entry written` lines to identify
// the decoder.
void ac6PacWorkerL2DispatchHook(PPCRegister& r3, PPCRegister& r4, PPCRegister& r5,
PPCRegister& r6, PPCRegister& ctr);
// Mid-asm hook on the AC6 mode-1 decoder's post-decode site.
//
// Wired to `0x821CCC5C` in the streamer's per-entry processing function,
// immediately after `add r11,r9,r11` resolves the compress-table entry
// record pointer. At this point the prior `bl 0x822CF510` has decompressed
// the entry, `r4` holds the destination buffer's guest VA, `r11` points at
// the entry record (codec_mode at +1, csize at +8, usize at +12), `r10`
// has the entry tag (low 16 bits), and `*(r31+22888)` holds the source
// offset. Captured originally as a hand-edit in the Apr 23 build that
// produced FHM-magic'd dumps; re-introduced here as a proper midasm hook
// so codegen regeneration doesn't lose it.
void ac6PacDecoderDumpHook(PPCRegister& r4, PPCRegister& r10, PPCRegister& r11,
PPCRegister& r31);
// Forward decl for the dumper sink (defined in src/ac6_pac_decode_dump.cpp).
void Ac6DumpPacDecodedEntry(uint16_t entry_index, uint8_t codec_mode,
uint32_t compressed_size, uint32_t decompressed_size,
uint32_t source_offset, const uint8_t* host_data);
+152
View File
@@ -0,0 +1,152 @@
#include "ac6_pac_index.h"
#include <algorithm>
#include <cctype>
#include <mutex>
#include <unordered_map>
#include <vector>
#include <rex/logging.h>
namespace ac6_pac_index {
namespace {
constexpr size_t kHeaderSize = 8;
constexpr size_t kEntrySize = 16;
constexpr uint32_t kGroupBitData01 = 0x01000000u;
constexpr uint32_t kGroupBitRaw = 0x00020000u;
uint32_t ReadBE32(const uint8_t* p) {
return (uint32_t(p[0]) << 24) | (uint32_t(p[1]) << 16) | (uint32_t(p[2]) << 8) | uint32_t(p[3]);
}
struct State {
std::mutex mutex;
bool loaded = false;
std::vector<Entry> entries;
// Key: archive (false=DATA00, true=DATA01) packed with offset and csize.
std::unordered_map<uint64_t, uint32_t> by_offset_csize;
};
State& Get() {
static State state;
return state;
}
uint64_t MakeKey(bool is_data01, uint32_t offset, uint32_t csize) {
return (uint64_t(is_data01 ? 1 : 0) << 63) | (uint64_t(offset) << 32) | uint64_t(csize);
}
bool AsciiContainsCi(std::string_view haystack, std::string_view needle) {
auto eq = [](char a, char b) {
return std::tolower(static_cast<unsigned char>(a)) ==
std::tolower(static_cast<unsigned char>(b));
};
return std::search(haystack.begin(), haystack.end(), needle.begin(), needle.end(), eq) !=
haystack.end();
}
} // namespace
bool LoadFromBuffer(const uint8_t* data, size_t size) {
if (!data || size < kHeaderSize) {
return false;
}
const uint32_t count = ReadBE32(data);
const size_t expected = kHeaderSize + (size_t(count) * kEntrySize);
if (size != expected) {
return false;
}
std::vector<Entry> entries;
entries.reserve(count);
std::unordered_map<uint64_t, uint32_t> by_key;
by_key.reserve(count);
for (uint32_t i = 0; i < count; ++i) {
const uint8_t* p = data + kHeaderSize + (size_t(i) * kEntrySize);
Entry e;
e.index = i;
e.group = ReadBE32(p + 0);
e.offset = ReadBE32(p + 4);
e.compressed_size = ReadBE32(p + 8);
e.decompressed_size = ReadBE32(p + 12);
e.is_data01 = (e.group & kGroupBitData01) != 0;
e.storage_kind = (e.group & kGroupBitRaw) ? StorageKind::kRaw : StorageKind::kCompressed;
entries.push_back(e);
if (e.compressed_size > 0) {
by_key.emplace(MakeKey(e.is_data01, e.offset, e.compressed_size), i);
}
}
auto& s = Get();
std::scoped_lock lock(s.mutex);
s.entries = std::move(entries);
s.by_offset_csize = std::move(by_key);
s.loaded = true;
REXFS_INFO("[AC6 PAC] DATA.TBL parsed: {} entries", s.entries.size());
return true;
}
bool IsLoaded() {
auto& s = Get();
std::scoped_lock lock(s.mutex);
return s.loaded;
}
std::optional<Entry> Find(bool is_data01, uint32_t offset, uint32_t compressed_size) {
auto& s = Get();
std::scoped_lock lock(s.mutex);
if (!s.loaded) {
return std::nullopt;
}
auto it = s.by_offset_csize.find(MakeKey(is_data01, offset, compressed_size));
if (it == s.by_offset_csize.end()) {
return std::nullopt;
}
return s.entries[it->second];
}
bool ClassifyPacPath(std::string_view path, bool* is_data01) {
if (AsciiContainsCi(path, "DATA01.PAC")) {
if (is_data01) *is_data01 = true;
return true;
}
if (AsciiContainsCi(path, "DATA00.PAC")) {
if (is_data01) *is_data01 = false;
return true;
}
return false;
}
bool IsDataTblPath(std::string_view path) {
return AsciiContainsCi(path, "DATA.TBL");
}
std::vector<uint32_t> FindOverlapping(bool is_data01, uint32_t range_begin, uint32_t range_end) {
std::vector<uint32_t> hits;
if (range_end <= range_begin) return hits;
auto& s = Get();
std::scoped_lock lock(s.mutex);
if (!s.loaded) return hits;
for (const auto& e : s.entries) {
if (e.is_data01 != is_data01 || e.compressed_size == 0) continue;
const uint32_t e_begin = e.offset;
const uint32_t e_end = e.offset + e.compressed_size;
if (e_begin < range_end && range_begin < e_end) {
hits.push_back(e.index);
}
}
return hits;
}
std::optional<Entry> GetByIndex(uint32_t entry_index) {
auto& s = Get();
std::scoped_lock lock(s.mutex);
if (!s.loaded || entry_index >= s.entries.size()) return std::nullopt;
return s.entries[entry_index];
}
} // namespace ac6_pac_index
+49
View File
@@ -0,0 +1,49 @@
#pragma once
#include <cstdint>
#include <optional>
#include <string_view>
#include <vector>
namespace ac6_pac_index {
enum class StorageKind : uint8_t {
kRaw = 0,
kCompressed = 1,
};
struct Entry {
uint32_t index;
uint32_t group;
uint32_t offset;
uint32_t compressed_size;
uint32_t decompressed_size;
StorageKind storage_kind;
bool is_data01;
};
// Parses DATA.TBL bytes and populates the in-memory index. Idempotent: subsequent
// successful parses replace prior state.
bool LoadFromBuffer(const uint8_t* data, size_t size);
bool IsLoaded();
// Find an entry by (pac archive selector, byte offset, compressed size).
std::optional<Entry> Find(bool is_data01, uint32_t offset, uint32_t compressed_size);
// Returns true if the given resolved guest path names DATA00.PAC or DATA01.PAC,
// and writes the archive selector to *is_data01.
bool ClassifyPacPath(std::string_view path, bool* is_data01);
bool IsDataTblPath(std::string_view path);
// Returns indices of entries in the given archive whose [offset, offset+csize)
// range overlaps the half-open file-byte range [range_begin, range_end).
std::vector<uint32_t> FindOverlapping(bool is_data01, uint32_t range_begin, uint32_t range_end);
// Returns the entry at the given DATA.TBL row index, or nullopt if unloaded
// or out of range.
std::optional<Entry> GetByIndex(uint32_t entry_index);
} // namespace ac6_pac_index
+156 -24
View File
@@ -7,12 +7,17 @@
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <algorithm>
#include <array>
#include <cctype>
#include <cstdlib>
#include <iomanip>
#include <sstream>
#include <native/filesystem/device.h>
#include <rex/kernel/xboxkrnl/private.h>
#include <rex/logging.h>
#include <rex/memory.h>
#include <rex/memory/utils.h>
#include <rex/ppc/function.h>
#include <rex/ppc/types.h>
#include <rex/system/info/file.h>
@@ -27,6 +32,10 @@
#include <rex/system/xtypes.h>
#include <rex/thread/mutex.h>
// AC6-specific PAC dump hook. Defined in src/ac6_pac_decode_dump.cpp.
extern void Ac6OnPacReadCompleted(std::string_view path, uint32_t guest_buffer,
uint64_t file_offset, uint32_t bytes_read);
namespace rex::kernel::xboxkrnl {
using namespace rex::system;
@@ -58,6 +67,81 @@ uint32_t CurrentGuestCallerAddress() {
return lr >= 4 ? (lr - 4) : 0;
}
bool ReadGuestU32BE(uint32_t guest_address, uint32_t* out_value) {
if (!out_value || guest_address > UINT32_MAX - sizeof(uint32_t)) {
return false;
}
auto* memory = REX_KERNEL_MEMORY();
if (!memory || !memory->LookupHeap(guest_address) ||
!memory->LookupHeap(guest_address + sizeof(uint32_t) - 1)) {
return false;
}
const void* host = memory->TranslateVirtual(guest_address);
*out_value = rex::memory::load_and_swap<uint32_t>(host);
return true;
}
std::string CurrentGuestStackTrace() {
auto* thread_state = runtime::ThreadState::Get();
auto* context = thread_state ? thread_state->context() : nullptr;
if (!context) {
return "[]";
}
std::array<uint32_t, 12> callers{};
size_t count = 0;
const uint32_t current_lr = static_cast<uint32_t>(context->lr);
if (current_lr >= 4) {
callers[count++] = current_lr - 4;
}
uint32_t frame = context->r1.u32;
for (size_t depth = 0; depth < callers.size() - count && frame != 0; ++depth) {
uint32_t next_frame = 0;
uint32_t saved_lr = 0;
if (!ReadGuestU32BE(frame, &next_frame) || !ReadGuestU32BE(frame + 8, &saved_lr)) {
break;
}
if (saved_lr >= 4) {
callers[count++] = saved_lr - 4;
}
if (next_frame == 0 || next_frame <= frame) {
break;
}
frame = next_frame;
}
std::ostringstream text;
text << '[' << std::uppercase << std::hex << std::setfill('0');
for (size_t i = 0; i < count; ++i) {
if (i) {
text << ", ";
}
text << std::setw(8) << callers[i];
}
text << ']';
return text.str();
}
bool Ac6PacStackTraceEnabled() {
static const bool enabled = [] {
const char* value = std::getenv("AC6_TRACE_PAC_STACKS");
return value && value[0] && std::string_view(value) != "0";
}();
return enabled;
}
bool Ac6PacDumpingEnabled() {
static const bool enabled = [] {
const char* value = std::getenv("AC6_DUMP_PAC_DECODED");
return value && value[0] && std::string_view(value) != "0";
}();
return enabled;
}
} // namespace
struct CreateOptions {
@@ -250,12 +334,22 @@ ppc_u32_result_t NtReadFile_entry(ppc_u32_t file_handle, ppc_u32_t event_handle,
const bool focused_pac_read = file && IsFocusedAc6PacPath(file->path());
if (focused_pac_read) {
REXKRNL_INFO(
"[AC6 PAC] NtReadFile request caller={:08X} thid={} path={} handle={:#x} len={:#x} "
"offset={} sync={}",
CurrentGuestCallerAddress(), XThread::GetCurrentThreadId(), file->path(),
(uint32_t)file_handle, (uint32_t)buffer_length, byte_offset_ptr ? (int64_t)byte_offset : -1,
file->is_synchronous());
if (Ac6PacStackTraceEnabled()) {
REXKRNL_INFO(
"[AC6 PAC] NtReadFile request caller={:08X} thid={} path={} handle={:#x} len={:#x} "
"offset={} sync={} stack={}",
CurrentGuestCallerAddress(), XThread::GetCurrentThreadId(), file->path(),
(uint32_t)file_handle, (uint32_t)buffer_length,
byte_offset_ptr ? (int64_t)byte_offset : -1, file->is_synchronous(),
CurrentGuestStackTrace());
} else {
REXKRNL_INFO(
"[AC6 PAC] NtReadFile request caller={:08X} thid={} path={} handle={:#x} len={:#x} "
"offset={} sync={}",
CurrentGuestCallerAddress(), XThread::GetCurrentThreadId(), file->path(),
(uint32_t)file_handle, (uint32_t)buffer_length,
byte_offset_ptr ? (int64_t)byte_offset : -1, file->is_synchronous());
}
}
if (XSUCCEEDED(result)) {
@@ -293,12 +387,31 @@ ppc_u32_result_t NtReadFile_entry(ppc_u32_t file_handle, ppc_u32_t event_handle,
signal_event = true;
if (focused_pac_read) {
REXKRNL_INFO(
"[AC6 PAC] NtReadFile result caller={:08X} path={} status={:#x} bytes_read={:#x} "
"iosb_status={:#x} iosb_info={:#x}",
CurrentGuestCallerAddress(), file->path(), result, bytes_read,
io_status_block ? (uint32_t)io_status_block->status : 0xFFFFFFFFu,
io_status_block ? (uint32_t)io_status_block->information : 0xFFFFFFFFu);
if (Ac6PacStackTraceEnabled()) {
REXKRNL_INFO(
"[AC6 PAC] NtReadFile result caller={:08X} path={} status={:#x} bytes_read={:#x} "
"iosb_status={:#x} iosb_info={:#x} stack={}",
CurrentGuestCallerAddress(), file->path(), result, bytes_read,
io_status_block ? (uint32_t)io_status_block->status : 0xFFFFFFFFu,
io_status_block ? (uint32_t)io_status_block->information : 0xFFFFFFFFu,
CurrentGuestStackTrace());
} else {
REXKRNL_INFO(
"[AC6 PAC] NtReadFile result caller={:08X} path={} status={:#x} bytes_read={:#x} "
"iosb_status={:#x} iosb_info={:#x}",
CurrentGuestCallerAddress(), file->path(), result, bytes_read,
io_status_block ? (uint32_t)io_status_block->status : 0xFFFFFFFFu,
io_status_block ? (uint32_t)io_status_block->information : 0xFFFFFFFFu);
}
const bool read_ok = io_status_block ? XSUCCEEDED(io_status_block->status)
: XSUCCEEDED(result);
if (read_ok && bytes_read > 0 && Ac6PacDumpingEnabled()) {
const uint64_t resolved_offset =
byte_offset_ptr ? static_cast<uint64_t>(byte_offset)
: (file->position() >= bytes_read ? file->position() - bytes_read : 0);
Ac6OnPacReadCompleted(file->path(), buffer.guest_address(), resolved_offset, bytes_read);
}
}
}
@@ -349,12 +462,21 @@ ppc_u32_result_t NtReadFileScatter_entry(ppc_u32_t file_handle, ppc_u32_t event_
const bool focused_pac_read = file && IsFocusedAc6PacPath(file->path());
if (focused_pac_read) {
const uint64_t byte_offset = byte_offset_ptr ? static_cast<uint64_t>(*byte_offset_ptr) : 0;
REXKRNL_INFO(
"[AC6 PAC] NtReadFileScatter request caller={:08X} thid={} path={} handle={:#x} "
"len={:#x} offset={} sync={}",
CurrentGuestCallerAddress(), XThread::GetCurrentThreadId(), file->path(),
(uint32_t)file_handle, (uint32_t)length, byte_offset_ptr ? (int64_t)byte_offset : -1,
file->is_synchronous());
if (Ac6PacStackTraceEnabled()) {
REXKRNL_INFO(
"[AC6 PAC] NtReadFileScatter request caller={:08X} thid={} path={} handle={:#x} "
"len={:#x} offset={} sync={} stack={}",
CurrentGuestCallerAddress(), XThread::GetCurrentThreadId(), file->path(),
(uint32_t)file_handle, (uint32_t)length, byte_offset_ptr ? (int64_t)byte_offset : -1,
file->is_synchronous(), CurrentGuestStackTrace());
} else {
REXKRNL_INFO(
"[AC6 PAC] NtReadFileScatter request caller={:08X} thid={} path={} handle={:#x} "
"len={:#x} offset={} sync={}",
CurrentGuestCallerAddress(), XThread::GetCurrentThreadId(), file->path(),
(uint32_t)file_handle, (uint32_t)length, byte_offset_ptr ? (int64_t)byte_offset : -1,
file->is_synchronous());
}
}
if (XSUCCEEDED(result)) {
@@ -379,12 +501,22 @@ ppc_u32_result_t NtReadFileScatter_entry(ppc_u32_t file_handle, ppc_u32_t event_
signal_event = true;
if (focused_pac_read) {
REXKRNL_INFO(
"[AC6 PAC] NtReadFileScatter result caller={:08X} path={} status={:#x} bytes_read={:#x} "
"iosb_status={:#x} iosb_info={:#x}",
CurrentGuestCallerAddress(), file->path(), result, bytes_read,
io_status_block ? (uint32_t)io_status_block->status : 0xFFFFFFFFu,
io_status_block ? (uint32_t)io_status_block->information : 0xFFFFFFFFu);
if (Ac6PacStackTraceEnabled()) {
REXKRNL_INFO(
"[AC6 PAC] NtReadFileScatter result caller={:08X} path={} status={:#x} "
"bytes_read={:#x} iosb_status={:#x} iosb_info={:#x} stack={}",
CurrentGuestCallerAddress(), file->path(), result, bytes_read,
io_status_block ? (uint32_t)io_status_block->status : 0xFFFFFFFFu,
io_status_block ? (uint32_t)io_status_block->information : 0xFFFFFFFFu,
CurrentGuestStackTrace());
} else {
REXKRNL_INFO(
"[AC6 PAC] NtReadFileScatter result caller={:08X} path={} status={:#x} "
"bytes_read={:#x} iosb_status={:#x} iosb_info={:#x}",
CurrentGuestCallerAddress(), file->path(), result, bytes_read,
io_status_block ? (uint32_t)io_status_block->status : 0xFFFFFFFFu,
io_status_block ? (uint32_t)io_status_block->information : 0xFFFFFFFFu);
}
}
}
+30
View File
@@ -1,3 +1,18 @@
[CmdletBinding()]
param(
# Enable the PAC stream-worker dispatch probe. When set, every distinct
# work-item virtual that rex_sub_82343E18 dispatches gets logged once
# via `[AC6 PAC WORKER] new dispatch target=...`. Cross-reference these
# against compressed-entry writes to identify the mode-1 decoder.
[switch]$TraceWorkItems,
# Enable per-NtReadFile guest stack traces on PAC reads. Each call into
# NtReadFile / NtReadFileScatter for a PAC path logs `stack=[...]` with
# the full guest back-chain. Used to pin the decoder when it sits above
# the read-issuing function on the reader thread's call chain.
[switch]$TraceStacks
)
$ErrorActionPreference = 'Stop'
$repoRoot = Split-Path -Parent $PSScriptRoot
@@ -9,6 +24,21 @@ if (-not (Test-Path -LiteralPath $exePath)) {
$env:AC6_DUMP_PAC_DECODED = '1'
Write-Host "AC6_DUMP_PAC_DECODED=1"
if ($TraceWorkItems) {
$env:AC6_TRACE_PAC_WORK_ITEMS = '1'
Write-Host "AC6_TRACE_PAC_WORK_ITEMS=1"
} else {
Remove-Item Env:AC6_TRACE_PAC_WORK_ITEMS -ErrorAction SilentlyContinue
}
if ($TraceStacks) {
$env:AC6_TRACE_PAC_STACKS = '1'
Write-Host "AC6_TRACE_PAC_STACKS=1"
} else {
Remove-Item Env:AC6_TRACE_PAC_STACKS -ErrorAction SilentlyContinue
}
Write-Host "Launching $exePath"
Start-Process -FilePath $exePath -WorkingDirectory (Split-Path -Parent $exePath)