diff --git a/docs/ac6_audio_extraction_walkthrough.txt b/docs/ac6_audio_extraction_walkthrough.txt new file mode 100644 index 00000000..aed111bc --- /dev/null +++ b/docs/ac6_audio_extraction_walkthrough.txt @@ -0,0 +1,299 @@ +================================================================================ +AC6 AUDIO EXTRACTION WALKTHROUGH +================================================================================ + +This walkthrough takes you from a fresh AC6_recomp checkout to playable .wav +files for every track of music, every voiceline, and every cutscene mix in +Ace Combat 6. It also explains why in-game sound effects (engines, missile +lock, explosions, weapons) are NOT covered yet, and what would be required to +extract them. + +If you've never extracted assets from this repo before, do the texture +walkthrough first (docs/ac6_asset_extraction_walkthrough.txt). It gets the +runtime PAC dumps populated, which this walkthrough also benefits from. + + +================================================================================ +0. WHAT YOU GET +================================================================================ + +After running this walkthrough you will have, in +out/ac6_runtime_audio/packs/ : + + bgmpack__00000_off00000000.xma (47 BGM tracks; one .xma each) + bgmpack__00001_off01450000.xma + ... + voicepack_eng__00000_off00000000.xma (7523 voicelines, English) + ... + voicepack_jpn__00000_off00000000.xma (7523 voicelines, Japanese) + ... + demopack_eng__00000_off00000000.xma (31 cutscene mixes) + ... + +If vgmstream is installed, each .xma also has a sibling .wav next to it. + +You will ALSO have, in out/ac6_runtime_audio/, the runtime-captured FHM +RIFFs (currently 42 voice/event clips from the dumps you've already +collected). + +You will NOT yet have: + + - In-game SFX (engines, weapons, lock-on, explosions). See section 6. + - Cutscene video. moviepack.bin is standard ASF/WMV — see section 5. + + +================================================================================ +1. PREREQUISITES +================================================================================ + + - The repo is built and ac6recomp.exe runs (texture walkthrough covers this). + - Python 3.10+ on PATH. + - Asset directory exists at: + out/build/win-amd64-relwithdebinfo/assets/ + containing bgmpack.bin, voicepack_eng.bin, voicepack_jpn.bin, + demopack_eng.bin, demopack_jpn.bin, moviepack.bin. + - (Optional, for .wav decoding) vgmstream-cli.exe. + Download "vgmstream-win64-cli.zip" from https://vgmstream.org/downloads + and extract anywhere — e.g. C:\tools\vgmstream\. + + +================================================================================ +2. THE QUICK PATH +================================================================================ + +From the repo root: + + python tools/export_ac6_audio.py --packs ^ + --vgmstream "C:/tools/vgmstream/vgmstream-cli.exe" + +That's it. Output: + + out/ac6_runtime_audio/ (FHM-channel RIFFs as .xma + .wav) + out/ac6_runtime_audio/packs/ (pack-channel streams as .xma + .wav) + +Without vgmstream installed, omit --vgmstream and you'll get .xma files +only — no .wav. The .xma files are still the actual audio, just in the +Xbox 360 codec; any vgmstream-aware player (foobar2000 + vgmstream +plugin, vgmstream-cli later, etc.) will play them. + + +================================================================================ +3. WHAT THE TOOL DOES, IN DETAIL +================================================================================ + +AC6 ships audio in two structurally different shapes. The exporter handles +each one differently. + +---------------------------------------------------------------------------- +3a) Standalone "pack" files in the asset directory +---------------------------------------------------------------------------- + +bgmpack.bin / voicepack_*.bin / demopack_*.bin are flat concatenations of +independent RIFF/WAVE streams, each aligned to a 0x800-byte sector. + +Layout per pack: + + +-------------------+ offset 0 + | RIFF WAVE | ----+ + | fmt / x2st / data | | sub-stream 0 = 8 + declared_size bytes + +-------------------+ ----+ + | (zero padding) | | aligned up to 0x800 + +-------------------+ + | RIFF WAVE | + | ... | sub-stream 1 + +-------------------+ + ... + +The splitter reads the declared RIFF size at offset 4, copies +8 + declared_size bytes to its own .xma file, then advances to the next +sector boundary and looks for another RIFF. The codec inside is XMA2 for +bgmpack and XMA1 for voicepack/demopack — both decoded fine by vgmstream. + +---------------------------------------------------------------------------- +3b) FHM-embedded RIFF entries +---------------------------------------------------------------------------- + +When AC6 runtime-decompresses a PAC entry containing audio, the FHM +extractor (tools/extract_ac6_runtime_fhm.py) writes each audio child +as _RIFF.bin. Those have the Project Aces RIFF/WAVE quirks: + + - The RIFF size field is set to file_size, NOT file_size - 8. The tool + patches this on export. + - The fmt chunk uses wFormatTag = 0x0165 (XMA1) but actually carries + XMA2 streams (an "x2st" chunk follows). Decoders that key off + wFormatTag alone will misread channels/sample rate; vgmstream knows + to look at x2st. + +The current corpus only has 42 RIFF entries, all from idx_0214 and +idx_0227 — these are voice/event clips. Adding more requires either +playing more of the game with AC6_DUMP_PAC_DECODED=1 or implementing the +offline mode-1 decompressor (see section 6). + + +================================================================================ +4. COMMAND REFERENCE +================================================================================ + +Default (FHM corpus only, decode if vgmstream is on PATH): + + python tools/export_ac6_audio.py + +FHM corpus + asset packs: + + python tools/export_ac6_audio.py --packs + +Asset packs only, skip FHM corpus: + + python tools/export_ac6_audio.py --packs-only + +Dry-run (list what would be exported, write nothing): + + python tools/export_ac6_audio.py --packs --dry-run + +Skip the vgmstream decode step (write .xma only): + + python tools/export_ac6_audio.py --packs --no-decode + +Point at a specific vgmstream binary: + + python tools/export_ac6_audio.py --packs ^ + --vgmstream "C:/tools/vgmstream/vgmstream-cli.exe" + +Custom asset directory (if your build output is elsewhere): + + python tools/export_ac6_audio.py --packs ^ + --assets "D:/games/AC6/assets" + + +================================================================================ +5. CUTSCENE VIDEO (moviepack.bin) +================================================================================ + +moviepack.bin starts with the ASF GUID 30 26 B2 75 8E 66 CF 11 — it's a +standard Microsoft ASF container with WMV9 video and WMA audio inside. +Use ffmpeg directly (not vgmstream, not this tool): + + ffmpeg -i moviepack.bin -map 0 -c copy out/movies/all.wmv + +Or to remux into mp4 (re-encodes audio): + + ffmpeg -i moviepack.bin -c:v copy -c:a aac out/movies/all.mp4 + +The ASF object index identifies individual cutscenes; ffmpeg's segment +muxer can split them, but moviepack also tends to be a single concatenated +ASF, in which case ffmpeg will emit one file. Use ffprobe to inspect. + + +================================================================================ +6. THE MISSING SFX, AND WHAT IT TAKES TO GET THEM +================================================================================ + +If you extract everything above and play the .wavs, you'll hear: music, +dialogue, cutscene mixes. You will NOT hear: jet engines, missile lock +warnings, gun fire, explosions, ECM bursts, ground impacts, cockpit +warnings. Those live in a third channel that the current pipeline cannot +yet open. + +Why: + + - There is no sfxpack.bin. Look at the asset directory and you'll see + bgmpack / voicepack / demopack / moviepack — and that's it for audio. + - SFX waveforms are inside DATA00.PAC / DATA01.PAC, packed as RIFF/XMA + entries inside FHM containers like the voice clips above. + - Those FHM containers are mode-1 compressed. The runtime mode-1 hook + (see ac6_extraction_roadmap.md) decompresses entries as the game + requests them. Anything the game hasn't requested in your sessions + so far is still sitting in DATA*.PAC compressed. + - The cue tables that name the SFX are present already — idx_0000/ + 001_BSN_.bsn (sound bank), idx_0000/00{2,3,4}_BFX_.bfx (effects), + idx_0001/000_nusc.bin (Namco cue table) — but they only contain + pointers. The waveform data they point to is in the un-decompressed + SFX banks. + +Two ways forward: + +(a) Capture more at runtime. + Set AC6_DUMP_PAC_DECODED=1 and play missions with combat. The + SFX-bearing PAC entries will get decompressed by the game and + written to out/ac6_pac_runtime_dump/ as new entry_*_mode1_*.bin + files. Then re-run: + + python tools/run_ac6_asset_pipeline.py --skip-pac-extract + python tools/export_ac6_audio.py --packs + + New idx_ directories under out/ac6_runtime_fhm_typed/ should + contain new RIFF entries. + +(b) Finish the offline mode-1 decompressor. + docs/ac6_extraction_roadmap.md "Path B" — port the guest mode-1 + decoder to Python so every compressed PAC entry can be materialized + without launching the game. This is the only way to guarantee all + SFX are extracted, regardless of what mission state you've reached. + +The recommended near-term option is (a). For a complete archive (b) is +the right answer. + + +================================================================================ +7. TROUBLESHOOTING +================================================================================ + +"found 0 RIFF entries" + Your FHM corpus is empty. Run the asset pipeline first: + python tools/run_ac6_asset_pipeline.py + If that produces no output either, you don't yet have any runtime + dumps — see ac6_asset_extraction_walkthrough.txt. + +"assets dir not found" + Pass --assets pointing at the directory that contains bgmpack.bin + etc. The default assumes a standard build at + out/build/win-amd64-relwithdebinfo/assets/. + +"vgmstream-cli not on PATH; writing .xma only" + Not an error — the .xma files are valid. Install vgmstream + (https://vgmstream.org/downloads) or pass --vgmstream and + re-run. vgmstream is also available as a foobar2000 plugin if you + just want to listen, not convert. + +"truncated stream at 0x... ; stopping" + The pack walker found a RIFF whose declared size runs past the end + of the file. Usually means the pack file is partial / damaged. + Verify the file size against your install media. + +vgmstream decode fails with "unknown format" + Confirm the .xma file starts with 'RIFF' and that bytes 8..12 are + 'WAVE'. If they aren't, the splitter mis-stepped and the file is + junk. Try running with --no-decode and inspect the output bytes. + +The .wav plays as silence / noise + For the 42 FHM-channel clips: if vgmstream warns about the format + tag, that's the 0x0165-but-actually-XMA2 quirk. vgmstream handles + it correctly when an x2st chunk is present. If you patched the + file by hand and removed x2st, restore from out/ac6_runtime_fhm_typed + and re-run the exporter. + + +================================================================================ +8. WHERE THINGS LIVE +================================================================================ + +Inputs: + out/build/win-amd64-relwithdebinfo/assets/bgmpack.bin + out/build/win-amd64-relwithdebinfo/assets/voicepack_*.bin + out/build/win-amd64-relwithdebinfo/assets/demopack_*.bin + out/build/win-amd64-relwithdebinfo/assets/moviepack.bin + out/ac6_runtime_fhm_typed/idx_*/ (FHM-channel RIFFs) + +Outputs: + out/ac6_runtime_audio/ (FHM-channel .xma/.wav) + out/ac6_runtime_audio/packs/ (pack-channel .xma/.wav) + +Tools: + tools/export_ac6_audio.py (this walkthrough's tool) + tools/extract_ac6_runtime_fhm.py (FHM extractor) + tools/run_ac6_asset_pipeline.py (orchestrator) + +Reference: + docs/ac6_extraction_roadmap.md ("Audio extraction" + section, channel 1/2/3) + docs/ac6_asset_pipeline.md (overall pipeline) diff --git a/tools/export_ac6_audio.py b/tools/export_ac6_audio.py new file mode 100644 index 00000000..e553debc --- /dev/null +++ b/tools/export_ac6_audio.py @@ -0,0 +1,263 @@ +#!/usr/bin/env python3 +"""Export AC6 audio to .xma (and optionally .wav via vgmstream-cli). + +Two sources are handled: + +1. Runtime FHM corpus (default). + The FHM extractor drops audio entries as `*_RIFF.bin`. Each is a Project + Aces RIFF/WAVE container with fmt (tag 0x0165 / XMA1), ALIG, x2st (XMA2 + stream descriptor), data. The RIFF size field is non-standard (= file_size + instead of file_size - 8) and gets patched on export. + +2. Monolithic asset packs (--packs). + bgmpack.bin / voicepack_*.bin / demopack_*.bin are flat concatenations of + independent RIFF/WAVE streams (XMA2 for BGM, XMA1 for voice/demo), aligned + to 0x800 sectors. We walk the declared RIFF size to split them. + +Note: in-game SFX (engines, weapons, explosions) are NOT in these packs -- +they live in BSN/BFX/nusc banks pointing into DATA*.PAC entries that aren't +runtime-decoded yet. Those need either the offline mode-1 decompressor or +gameplay capture with AC6_DUMP_PAC_DECODED=1. +""" +from __future__ import annotations + +import argparse +import shutil +import struct +import subprocess +import sys +from pathlib import Path + + +REPO_ROOT = Path(__file__).resolve().parent.parent +DEFAULT_INPUT = REPO_ROOT / "out" / "ac6_runtime_fhm_typed" +DEFAULT_OUTPUT = REPO_ROOT / "out" / "ac6_runtime_audio" +DEFAULT_ASSETS = REPO_ROOT / "out" / "build" / "win-amd64-relwithdebinfo" / "assets" + +PACK_FILES = [ + "bgmpack.bin", + "voicepack_eng.bin", + "voicepack_jpn.bin", + "demopack_eng.bin", + "demopack_jpn.bin", +] + + +def split_pack(pack_path: Path, out_dir: Path) -> int: + """Walk a concatenated multi-RIFF pack and write each stream as its own .xma. + + Each stream begins with `RIFF``WAVE`. The next stream starts at + `pos + 8 + declared_size`, padded up to a 0x800-byte sector boundary. + """ + SECTOR = 0x800 + written = 0 + blob = pack_path.read_bytes() + end = len(blob) + pos = 0 + stem = pack_path.stem + while pos + 12 <= end: + if blob[pos:pos + 4] != b"RIFF" or blob[pos + 8:pos + 12] != b"WAVE": + # Skip to next sector and re-check; some packs have padding regions. + pos = (pos + SECTOR) & ~(SECTOR - 1) + continue + declared = struct.unpack_from(" end: + print(f" [{stem}] truncated stream at 0x{pos:x} (declared {declared}, " + f"only {end - pos - 8} available); stopping") + break + out_name = f"{stem}__{written:05d}_off{pos:08x}.xma" + (out_dir / out_name).write_bytes(blob[pos:pos + total]) + written += 1 + # Advance to next sector boundary. + next_pos = (pos + total + SECTOR - 1) & ~(SECTOR - 1) + if next_pos == pos: + break + pos = next_pos + print(f" [{stem}] {written} streams written") + return written + + +def find_riffs(root: Path) -> list[Path]: + return sorted(p for p in root.rglob("*RIFF.bin") if p.is_file()) + + +def derive_output_name(riff: Path, root: Path) -> str: + rel = riff.relative_to(root) + parts = list(rel.parts) + # parts[-1] is e.g. "001_RIFF.bin"; drop suffix. + leaf = parts[-1].removesuffix(".bin") + # Tag with idx_NNNN and any FHM container directories on the path so names stay unique. + prefix_parts = [p.replace("_FHM_", "f") for p in parts[:-1]] + prefix = "__".join(prefix_parts) + return f"{prefix}__{leaf}.xma" + + +def patch_riff(blob: bytearray) -> tuple[bytearray, dict]: + """Verify it's a RIFF/WAVE and fix the size field. Return (patched, info).""" + if blob[:4] != b"RIFF" or blob[8:12] != b"WAVE": + raise ValueError("not a RIFF/WAVE container") + + file_len = len(blob) + declared = struct.unpack_from(" tuple[bool, str]: + try: + proc = subprocess.run( + [vgmstream, "-o", str(wav_path), str(xma_path)], + capture_output=True, + text=True, + timeout=120, + ) + except FileNotFoundError: + return False, f"vgmstream not found: {vgmstream}" + except subprocess.TimeoutExpired: + return False, "vgmstream timed out" + if proc.returncode != 0: + return False, (proc.stderr or proc.stdout or "vgmstream failed").strip().splitlines()[-1] + return True, "" + + +def main() -> int: + ap = argparse.ArgumentParser(description=__doc__.splitlines()[0]) + ap.add_argument("--input", type=Path, default=DEFAULT_INPUT, + help=f"FHM-typed root (default: {DEFAULT_INPUT})") + ap.add_argument("--output", type=Path, default=DEFAULT_OUTPUT, + help=f"Output directory (default: {DEFAULT_OUTPUT})") + ap.add_argument("--packs", action="store_true", + help="Also split bgmpack/voicepack/demopack from --assets.") + ap.add_argument("--packs-only", action="store_true", + help="Skip the FHM corpus and only split the asset packs.") + ap.add_argument("--assets", type=Path, default=DEFAULT_ASSETS, + help=f"Asset directory containing *.bin packs (default: {DEFAULT_ASSETS})") + ap.add_argument("--vgmstream", default=shutil.which("vgmstream-cli"), + help="Path to vgmstream-cli. If found on PATH it's used by default.") + ap.add_argument("--no-decode", action="store_true", + help="Just copy/patch .xma files; skip vgmstream decode.") + ap.add_argument("--dry-run", action="store_true", + help="List what would be exported and exit.") + args = ap.parse_args() + + do_fhm = not args.packs_only + do_packs = args.packs or args.packs_only + + if do_fhm and not args.input.is_dir(): + print(f"input not found: {args.input}", file=sys.stderr) + return 2 + + riffs = find_riffs(args.input) if do_fhm else [] + if do_fhm: + print(f"found {len(riffs)} RIFF entries under {args.input}") + + pack_paths: list[Path] = [] + if do_packs: + if not args.assets.is_dir(): + print(f"assets dir not found: {args.assets}", file=sys.stderr) + return 2 + pack_paths = [args.assets / n for n in PACK_FILES if (args.assets / n).is_file()] + print(f"found {len(pack_paths)} asset packs under {args.assets}") + + if args.dry_run: + for r in riffs: + print(" ", derive_output_name(r, args.input), f"({r.stat().st_size} B)") + for p in pack_paths: + print(" pack:", p.name, f"({p.stat().st_size} B)") + return 0 + + args.output.mkdir(parents=True, exist_ok=True) + + decode = not args.no_decode and args.vgmstream is not None + if not args.no_decode and args.vgmstream is None: + print("note: vgmstream-cli not on PATH; writing .xma only " + "(re-run with --vgmstream to decode to .wav)") + + decoded = 0 + failed = 0 + + pack_written = 0 + pack_xma_files: list[Path] = [] + if do_packs: + pack_dir = args.output / "packs" + pack_dir.mkdir(parents=True, exist_ok=True) + for p in pack_paths: + print(f"splitting {p.name}...") + pack_written += split_pack(p, pack_dir) + pack_xma_files = sorted(pack_dir.glob("*.xma")) + + if decode and pack_xma_files: + print(f"decoding {len(pack_xma_files)} pack streams via vgmstream...") + for xma_path in pack_xma_files: + wav_path = xma_path.with_suffix(".wav") + if wav_path.is_file() and wav_path.stat().st_size > 0: + decoded += 1 + continue + ok, err = run_vgmstream(args.vgmstream, xma_path, wav_path) + if ok: + decoded += 1 + else: + failed += 1 + print(f" {xma_path.name}: {err}") + + for riff in riffs: + out_name = derive_output_name(riff, args.input) + xma_path = args.output / out_name + blob = bytearray(riff.read_bytes()) + try: + patched, info = patch_riff(blob) + except ValueError as exc: + print(f" skip {riff}: {exc}") + failed += 1 + continue + xma_path.write_bytes(patched) + + tag = info["fmt_tag"] + tag_str = f"0x{tag:04X}" if tag is not None else "?" + marker = "x2st" if info["has_x2st"] else "----" + print(f" {out_name} fmt={tag_str} {marker} data={info['data_size']}") + + if decode: + wav_path = xma_path.with_suffix(".wav") + ok, err = run_vgmstream(args.vgmstream, xma_path, wav_path) + if ok: + decoded += 1 + else: + failed += 1 + print(f" decode failed: {err}") + + print(f"done. fhm_riffs={len(riffs)} pack_streams={pack_written} " + f"wav_decoded={decoded} failures={failed}") + return 0 if failed == 0 else 1 + + +if __name__ == "__main__": + raise SystemExit(main())