diff --git a/tools/converters/res_arc.py b/tools/converters/res_arc.py new file mode 100644 index 000000000..ec0723ea2 --- /dev/null +++ b/tools/converters/res_arc.py @@ -0,0 +1,352 @@ +#!/usr/bin/env python3 + +# this command optionally takes -i and -o for input arc files and output enum files, and --debug which prints out the file it's currently working on. +# Iterates over all files in the orig directory, outputs the enum containing IDs and indices for each files contained in the arc. +# Outputs files to the ./assets/VERSION/ folder. Its path will match the path in the ./orig/files directory. +# Currently does not iterate over files on a disk image, all arc files must be present in a directory such that os.walk can see them + +from binary_funcs import read_bytes_until_null, read_u32, read_u16, read_u8, skip_bytes +import subprocess +from os import walk, makedirs +from pathlib import Path +from typing import NamedTuple, DefaultDict +import re +from sys import argv +import argparse + +debug_print = lambda *args, **kwargs : None + +DTK_PATH = str(Path("./build/tools/dtk.exe")) +OUT_PATH = "assets" +INDENT = " " * 4 +ADD_EXT_TO_ENUM = False +SKIP_STAGE_ARCS = True +SKIP_DEMO_ARCS = True +SKIP_FILES_WITH_AT_SIGN = True +# get when this file was modified last, used to detect if we should re-extract enums +THIS_MTIME = Path(__file__).stat().st_mtime + +class ArcFile(NamedTuple): + file_name:str + index:int + id:int + +class ArcNode(NamedTuple): + node_type:bytes + name:str + inds:range + +class ArcEnumValue(NamedTuple): + enum_value_name:str + value:int + +class ArcEnum(NamedTuple): + enum_name:str + values:list[ArcEnumValue] + +class JointParsedEnums(NamedTuple): + enums:list[ArcEnum] + +def ensure_dir(path:str)->None: + makedirs(path, exist_ok=True) + +def bin_make_str(s:bytes)->str: + s = s.replace(b"\x82\x98", b"x") + try: + s = s.decode("ascii") + except Exception as e: + raise AssertionError(f"{e}: {s}") + return s + +def sanitize_string(s:str)->str: + return re.sub(r"[\s@:\.,\-<>*%\"!&()|\+$]", "_", s) + +def read_str(binf)->str: + return bin_make_str(read_bytes_until_null(binf)) + +def make_enum(e:ArcEnum): + out = [] + out.append(f"enum {e.enum_name} {{") + for v in e.values: + out.append(f"{INDENT}{v.enum_value_name}=0x{v.value:X},") + out.append("};") + + out_enum = "\n".join(out) + + return out_enum + +def parse_bmd(src_path:Path): + global ADD_EXT_TO_ENUM + with src_path.open("rb") as binf: + header_magic = binf.read(4) + known_J3D_magic = b"J3D2" + assert(header_magic == b"J3D2"), f"Attempted to parse {src_path} as bmd/bdl, but J3D header type doesn't match {known_J3D_magic} : {header_magic}" + bmd_magic = binf.read(4) + known_bmd_magics = (b"bmd3", b"bmd2", b"bdl4") + assert(bmd_magic in known_bmd_magics), f"Attempted to parse {src_path} as bmd/bdl, but bmd/bdl header type doesn't match any of {known_bmd_magics} : {bmd_magic}" + skip_bytes(binf, 4) + chunk_count = read_u32(binf) + binf.seek(0x20) + for _ in range(chunk_count): + chunk_begin = binf.tell() + name = bin_make_str(binf.read(4)) + size = read_u32(binf) + next_chunk = chunk_begin + size + if name == "JNT1": + skip_bytes(binf, 12) + name_table = read_u32(binf) + chunk_begin + binf.seek(name_table) + num_strings = read_u16(binf) + found_enums = [] + if ADD_EXT_TO_ENUM: + out_enum_name = sanitize_string(src_path.name.replace(".", "_")).upper() + "_JNT" + else: + out_enum_name = sanitize_string(src_path.name.split(".")[0]).upper() + "_JNT" + + for i in range(num_strings): + binf.seek(name_table + 6 + i * 4) + string_offset = read_u16(binf) + binf.seek(name_table + string_offset) + + joint_name = f"{out_enum_name}_{read_str(binf).upper()}_e" + found_enums.append(ArcEnumValue(joint_name, i)) + # print(joint_name) + return ArcEnum(out_enum_name, found_enums) + binf.seek(next_chunk) + return None + +def extract_joint_enums(src_path:Path): + if ((SKIP_FILES_WITH_AT_SIGN and "@" in str(src_path)) or + (SKIP_STAGE_ARCS and "Stage" in src_path.parts) or + (SKIP_DEMO_ARCS and any(x.startswith("Demo") for x in src_path.parts))): + return JointParsedEnums([]) + + out_jnt_enums:list[ArcEnum] = [] + internal_files = subprocess.run([DTK_PATH, "vfs", "ls", "-r", f"{src_path}:"], stdout=subprocess.PIPE, text=True).stdout + output_folder = Path(str(src_path).replace(".", "__")) + for line in internal_files.split("\n"): + parts = line.split(" | ") + if len(parts) != 3: continue + + internal_file = parts[1].strip(" ") + internal_file_parts = internal_file.split(".") + if len(internal_file_parts) < 2: continue + + extension = internal_file_parts[1] + if (extension not in ("bmd", "bdl")): continue + + internal_file_path = output_folder / internal_file + + # extract file from archive if either + # 1. output file doesn't exist + # 2. the archive file is newer than the output file (modded src) + if (not internal_file_path.exists() or + src_path.stat().st_mtime > internal_file_path.stat().st_mode): + ensure_dir(internal_file_path.parent) + subprocess.run([DTK_PATH, "vfs", "cp", f"{src_path}:{internal_file}", internal_file_path], stdout=subprocess.PIPE) + + out_enums = parse_bmd(internal_file_path) + + out_jnt_enums.append(out_enums) + + return JointParsedEnums(out_jnt_enums) + +def convert_binary_to_resource_enum(src_path: Path, dest_path: Path) -> None: + joint_enums = extract_joint_enums(src_path) + + with src_path.open("rb") as binf: + opening_bytes = binf.read(4) + assert(opening_bytes == b"RARC"), f"Not a rarc file: starts with bytes {opening_bytes}" + skip_bytes(binf, 4) + data_header_offset = read_u32(binf) + binf.seek(data_header_offset) + node_count = read_u32(binf) + node_offset = read_u32(binf) + data_header_offset + total_num_file_entries = read_u32(binf) + file_entries_list_offset = read_u32(binf) + data_header_offset + skip_bytes(binf, 4) + string_list_offset = read_u32(binf) + data_header_offset + skip_bytes(binf, 2) + sync_flag = read_u8(binf) + + found_files:list[ArcFile] = [] + all_file_names = [] + for entry_index in range(total_num_file_entries): + binf.seek(file_entries_list_offset + entry_index * 0x14) + file_id = read_u16(binf) + skip_bytes(binf, 2) + type_and_name_offset = read_u32(binf) + entry_type = type_and_name_offset >> 24 + name_offset = type_and_name_offset & 0x00FFFFFF + binf.seek(string_list_offset + name_offset) + file_name = read_str(binf) + if entry_type & 1: # check to make sure its a file + assert(not sync_flag or file_id == entry_index), \ + f"Sync flag was set, but ID {file_id} does not match Index {entry_index} for file {file_name}" + found_files.append(ArcFile(file_name, entry_index, file_id)) + all_file_names.append(file_name) + + index_file_lookup = {x.index : x for x in found_files} + + found_nodes:list[tuple[ArcNode, list[ArcFile]]] = [] + + for node_index in range(node_count): + binf.seek(node_offset + node_index * 0x10) + this_node_type = bin_make_str(binf.read(4)) + this_node_name_offs = read_u32(binf) + skip_bytes(binf, 2) + this_node_index_count = read_u16(binf) + this_node_first_index = read_u32(binf) + this_node_inds = range(this_node_first_index, this_node_first_index + this_node_index_count) + binf.seek(string_list_offset + this_node_name_offs) + this_node_name = read_str(binf) + this_node = ArcNode(this_node_type, this_node_name, this_node_inds) + found_nodes.append((this_node, [index_file_lookup.get(x) for x in this_node.inds if x in index_file_lookup])) + + out_lines:list[str] = [] + file_stem = src_path.name.split(".")[0] + file_stem_upper = sanitize_string(file_stem.upper()) + + out_lines.append(f"#ifndef RES_{file_stem_upper}_H") + out_lines.append(f"#define RES_{file_stem_upper}_H\n") + + out_ids:list[str] = [] + out_idxs:list[str] = [] + + appearance_count = DefaultDict(int) + + for node, files in found_nodes: + if len(files) == 0: continue + file_type_break = f"{INDENT}/* {node.node_type} */" + out_ids.append(file_type_break) + out_idxs.append(file_type_break) + for file in files: + parts = file.file_name.split(".") + santitized_file_name = sanitize_string(parts[0].upper()) # Sanitize identifier + + ext = "" + if len(parts) > 1: + ext = sanitize_string(parts[1].upper()) + + file_str = f"{file_stem_upper}_{ext}_{santitized_file_name}" + + idx = f"dRes_INDEX_{file_str}" + seen_count = appearance_count[idx] + appearance_count[idx] = seen_count + 1 + if seen_count > 0: + idx += f"_{seen_count}" + + id = f"dRes_ID_{file_str}" + seen_count = appearance_count[id] + appearance_count[id] = seen_count + 1 + if seen_count > 0: + id += f"_{seen_count}" + + # tiny optimization to do less string formatting + out_idxs.append(f"{INDENT}{idx}_e=0x{file.index:X},") + out_ids.append(f"{INDENT}{id}_e=0x{file.id:X},") + + out_lines.append(f"enum dRes_INDEX_{file_stem_upper} {{") + out_lines.extend(out_idxs) + out_lines.append("};\n") + + out_lines.append(f"enum dRes_ID_{file_stem_upper} {{") + out_lines.extend(out_ids) + out_lines.append("};\n") + + for joint_enum in joint_enums.enums: + out_lines.append(make_enum(joint_enum) + "\n") + + out_lines.append(f"#endif /* !RES_{file_stem_upper}_H */") + + out = "\n".join(out_lines) + ensure_dir(dest_path.parent) + with dest_path.open("w") as f: + f.write(out) + +def decompress_file(input_file:Path, output_file:Path) -> None: + # use pathlib to allow for unix+windows paths + subprocess.run([DTK_PATH, "yaz0", "decompress", input_file, "-o", output_file]) + +def extract_enum_from_file(src_path:Path, dst_path:Path) -> None: + assert(src_path.exists()) + + # we can skip extracting this file if all of the following are true + # 1. The output file exists + # 2. The src file is older than the output file (not modded) + # 3. This python file is older than the output file (no updates to how we extract enums) + if (dst_path.exists() and + src_path.stat().st_mtime < dst_path.stat().st_mtime and + THIS_MTIME < dst_path.stat().st_mtime): + return + + # check the first bytes of the file + with src_path.open("rb") as f: + starting_bytes = f.read(4) + + if starting_bytes == b"Yaz0": is_compressed = True + elif starting_bytes == b"RARC": is_compressed = False + # not an arc file although it has the .arc extensions + else: return + + if is_compressed: + # if our file is compressed, then we should decompress it + # we only need to decompress if any of these are true + # 1. We've never decompressed this file before + # 2. The src file is newer than the output file (modded src) + new_src_path = src_path.with_suffix(src_path.suffix + ".decompressed") + if (not new_src_path.exists() or + new_src_path.stat().st_mtime < src_path.stat().st_mtime): + decompress_file(src_path, new_src_path) + src_path = new_src_path + + convert_binary_to_resource_enum(src_path, dst_path) + +def main() -> None: + args = argparse.ArgumentParser() + args.add_argument("--debug", action="store_true", help="Prints each file as it is converted") + group = args.add_argument_group("IO", description="optional") + group.add_argument("-i", "--input", help="input arc file to convert") + group.add_argument("-o", "--output", help="output enum file path") + + args = args.parse_args() + global debug_print + if args.debug: + debug_print = print + + if bool(args.input) ^ bool(args.output): + print("If input or output is provided, both must be present") + exit(1) + + if bool(args.input) and bool(args.output): + extract_enum_from_file(Path(args.input), Path(args.output)) + exit() + + for dir, dirnames, filenames in walk("./orig/"): + dirpath = Path(dir) + if "res" not in dirpath.parts: continue + + for file in filenames: + file_path = dirpath / file + if file_path.suffix == ".arc": + # the version should be the second part of the path + # ./orig/ShieldD/... + version = file_path.parts[1] + # find the res folder, truncate the path to be the part after the res folder + out_path = Path("/".join(file_path.parts[file_path.parts.index("res") + 1:])) + # set the output path to be the designated output + the version + the file's heirarchy + out_path = OUT_PATH / (version / ("res" / out_path)) + # we're going to output to a header file + out_path = out_path.with_name(out_path.name).with_suffix(".h") + debug_print(out_path) + + if (SKIP_STAGE_ARCS and "Stage" in out_path.parts): + continue + + try: + extract_enum_from_file(file_path, out_path) + except AssertionError as e: + print(f"ERROR: {file_path} -> {out_path}\n{e}\n") + +if __name__ == "__main__": + main()