#!/usr/bin/env python3 from __future__ import annotations import argparse import json import re import sys from collections import defaultdict from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent)) from ac6_fhm import ext_for_magic, parse_fhm as parse_fhm_container, safe_tag DUMP_RE = re.compile( r"^entry_(?P\d+)_mode(?P\d+)_c(?P\d+)_u(?P\d+)" r"(?:_off(?P[0-9a-fA-F]+))?\.bin$" ) def load_manifest(path: Path) -> dict: if not path.exists(): return {"entries": []} return json.loads(path.read_text(encoding="utf-8")) def load_manifest_entries(path: Path) -> dict[tuple[int, int], list[dict]]: manifest = load_manifest(path) by_pair: dict[tuple[int, int], list[dict]] = defaultdict(list) for entry in manifest["entries"]: if entry["storage_kind"] != "compressed": continue by_pair[(entry["compressed_size"], entry["decompressed_size"])].append(entry) return by_pair def safe_name(name: str) -> str: return safe_tag(name) def extract_container(blob: bytes, container_dir: Path, output_root: Path, depth: int, max_depth: int) -> list[dict]: children = parse_fhm_container(blob) or [] if not children: return [] child_entries = [] for child in children: safe_magic = safe_name(child.magic) child_name = f"{child.index:03d}_{safe_magic}{ext_for_magic(child.magic)}" child_path = container_dir / child_name child_path.write_bytes(child.data) child_entry = { "index": child.index, "offset": child.offset, "declared_size": child.declared_size, "size": child.size, "magic": child.magic, "path": str(child_path.relative_to(output_root)).replace("\\", "/"), } if child.notes: child_entry["parser_notes"] = child.notes if depth < max_depth and child.data[:4] == b"FHM ": nested_dir = container_dir / f"{child.index:03d}_{safe_magic}" nested_dir.mkdir(parents=True, exist_ok=True) nested_children = extract_container(child.data, nested_dir, output_root, depth + 1, max_depth) if nested_children: child_entry["nested"] = nested_children child_entries.append(child_entry) return child_entries def extract_blob(blob: bytes, label: str, output_root: Path, max_depth: int, source_record: dict) -> dict: container_dir = output_root / safe_name(label) container_dir.mkdir(parents=True, exist_ok=True) children = parse_fhm_container(blob) or [] if not children: raw_path = container_dir / f"{safe_name(label)}.bin" raw_path.write_bytes(blob) return { **source_record, "kind": "raw", "magic": blob[:4].decode("latin-1", errors="replace") if len(blob) >= 4 else "", "size": len(blob), "path": str(raw_path.relative_to(output_root)).replace("\\", "/"), } child_entries = extract_container(blob, container_dir, output_root, 0, max_depth) return { **source_record, "kind": "fhm", "child_count": len(child_entries), "children": child_entries, } def iter_offline_manifest_sources(manifest_path: Path, files_dir: Path) -> list[dict]: manifest = load_manifest(manifest_path) sources = [] manifest_root = manifest_path.parent for entry in manifest.get("entries", []): if not entry.get("extracted"): continue rel_path = entry.get("path") if not rel_path: continue path = manifest_root / rel_path if files_dir and not path.is_relative_to(files_dir): # Keep support for custom --pac-files while still trusting the # manifest's relative paths when they point elsewhere. alt = files_dir / Path(rel_path).name if alt.exists(): path = alt if not path.exists(): continue sources.append({"entry": entry, "path": path}) return sources def main() -> int: parser = argparse.ArgumentParser(description="Extract child payloads from offline-decoded or runtime-dumped AC6 FHM containers.") parser.add_argument( "--dump-dir", type=Path, default=Path("out") / "ac6_pac_runtime_dump", help="Directory containing runtime PAC decode dumps", ) parser.add_argument( "--manifest", type=Path, default=Path("out") / "ac6_pac_extracted_raw" / "manifest.json", help="Manifest produced by extract_ac6_pac.py", ) parser.add_argument( "--pac-files", type=Path, default=None, help="Decoded PAC files directory produced by extract_ac6_pac.py (default: /files)", ) parser.add_argument( "--output", type=Path, default=Path("out") / "ac6_runtime_fhm_typed", help="Output directory for parsed FHM containers and child payloads", ) parser.add_argument( "--include-runtime-dumps", action="store_true", help="Also merge entry_* runtime dumps from --dump-dir when present", ) parser.add_argument( "--max-depth", type=int, default=4, help="Maximum nested FHM recursion depth", ) args = parser.parse_args() dump_dir = args.dump_dir.resolve() manifest_path = args.manifest.resolve() pac_files = args.pac_files.resolve() if args.pac_files else manifest_path.parent / "files" output_root = args.output.resolve() output_root.mkdir(parents=True, exist_ok=True) by_pair = load_manifest_entries(manifest_path) extracted = [] for source in iter_offline_manifest_sources(manifest_path, pac_files): entry = source["entry"] path = source["path"] blob = path.read_bytes() label = f"idx_{entry['index']:04d}" extracted.append( extract_blob( blob, label, output_root, args.max_depth, { "source": "offline_pac", "entry_index": entry["index"], "pac_name": entry["pac_name"], "storage_kind": entry["storage_kind"], "compressed_size": entry["compressed_size"], "decompressed_size": entry["decompressed_size"], "source_offset": entry["offset"], "input_path": str(path.relative_to(manifest_path.parent)).replace("\\", "/"), }, ) ) selected_dumps: dict[tuple[int, int, int, int], Path] = {} runtime_dump_count = 0 runtime_glob = sorted(dump_dir.glob("*.bin")) if args.include_runtime_dumps and dump_dir.exists() else [] for dump_path in runtime_glob: match = DUMP_RE.match(dump_path.name) if not match: continue meta = match.groupdict() key = ( int(meta["record_id"]), int(meta["mode"]), int(meta["compressed_size"]), int(meta["decompressed_size"]), ) current = selected_dumps.get(key) if current is None: selected_dumps[key] = dump_path continue current_match = DUMP_RE.match(current.name) assert current_match is not None current_has_offset = current_match.groupdict()["source_offset"] is not None new_has_offset = meta["source_offset"] is not None if new_has_offset and not current_has_offset: selected_dumps[key] = dump_path for dump_path in sorted(selected_dumps.values()): runtime_dump_count += 1 match = DUMP_RE.match(dump_path.name) assert match is not None meta = match.groupdict() compressed_size = int(meta["compressed_size"]) decompressed_size = int(meta["decompressed_size"]) codec_mode = int(meta["mode"]) record_id = int(meta["record_id"]) source_offset = int(meta["source_offset"], 16) if meta["source_offset"] else None candidates = by_pair.get((compressed_size, decompressed_size), []) base_label = ( f"idx_{candidates[0]['index']:04d}" if len(candidates) == 1 else f"pair_c{compressed_size}_u{decompressed_size}" ) blob = dump_path.read_bytes() extracted.append( extract_blob( blob, f"runtime_{base_label}", output_root, args.max_depth, { "source": "runtime_dump", "dump": dump_path.name, "record_id": record_id, "codec_mode": codec_mode, "compressed_size": compressed_size, "decompressed_size": decompressed_size, "source_offset": source_offset, "candidate_indexes": [entry["index"] for entry in candidates], }, ) ) manifest = { "pac_files": str(pac_files), "dump_dir": str(dump_dir) if args.include_runtime_dumps else None, "manifest": str(manifest_path), "output": str(output_root), "containers": extracted, } (output_root / "manifest.json").write_text(json.dumps(manifest, indent=2), encoding="utf-8") print( json.dumps( { "containers": len(extracted), "offline_sources": sum(1 for item in extracted if item.get("source") == "offline_pac"), "runtime_dumps": runtime_dump_count, "output": str(output_root), }, indent=2, ) ) return 0 if __name__ == "__main__": raise SystemExit(main())