#!/usr/bin/env python3 from binary_funcs import read_bytes_until_null, read_u32, read_u16, read_u8, skip_bytes import subprocess from os import walk, makedirs from pathlib import Path from typing import NamedTuple, DefaultDict import re DTK_PATH = str(Path("./build/tools/dtk.exe")) OUT_PATH = "build/res" INDENT = " " * 4 ADD_EXT_TO_ENUM = False SKIP_STAGE_ARCS = True SKIP_DEMO_ARCS = True SKIP_FILES_WITH_AT_SIGN = True # get when this file was modified last, used to detect if we should re-extract enums THIS_MTIME = Path(__file__).stat().st_mtime class ArcFile(NamedTuple): file_name:str index:int id:int class ArcNode(NamedTuple): node_type:bytes name:str inds:range class ArcEnumValue(NamedTuple): enum_value_name:str value:int class ArcEnum(NamedTuple): enum_name:str values:list[ArcEnumValue] class JointParsedEnums(NamedTuple): enums:list[ArcEnum] def ensure_dir(path:str)->None: makedirs(path, exist_ok=True) def bin_make_str(s:bytes)->str: s = s.replace(b"\x82\x98", b"x") try: s = s.decode("ascii") except Exception as e: raise AssertionError(f"{e}: {s}") return s def sanitize_string(s:str)->str: return re.sub(r"[\s@:\.,\-<>*%\"!&()|\+$]", "_", s) def read_str(binf)->str: return bin_make_str(read_bytes_until_null(binf)) def make_enum(e:ArcEnum): out = [] out.append(f"enum {e.enum_name} {{") for v in e.values: out.append(f"{INDENT}{v.enum_value_name}=0x{v.value:X},") out.append("};") out_enum = "\n".join(out) return out_enum def parse_bmd(src_path:Path): global ADD_EXT_TO_ENUM with src_path.open("rb") as binf: header_magic = binf.read(4) known_J3D_magic = b"J3D2" assert(header_magic == b"J3D2"), f"Attempted to parse {src_path} as bmd/bdl, but J3D header type doesn't match {known_J3D_magic} : {header_magic}" bmd_magic = binf.read(4) known_bmd_magics = (b"bmd3", b"bmd2", b"bdl4") assert(bmd_magic in known_bmd_magics), f"Attempted to parse {src_path} as bmd/bdl, but bmd/bdl header type doesn't match any of {known_bmd_magics} : {bmd_magic}" skip_bytes(binf, 4) chunk_count = read_u32(binf) binf.seek(0x20) for _ in range(chunk_count): chunk_begin = binf.tell() name = bin_make_str(binf.read(4)) size = read_u32(binf) next_chunk = chunk_begin + size if name == "JNT1": skip_bytes(binf, 12) name_table = read_u32(binf) + chunk_begin binf.seek(name_table) num_strings = read_u16(binf) found_enums = [] if ADD_EXT_TO_ENUM: out_enum_name = sanitize_string(src_path.name.replace(".", "_")).upper() + "_JNT" else: out_enum_name = sanitize_string(src_path.name.split(".")[0]).upper() + "_JNT" for i in range(num_strings): binf.seek(name_table + 6 + i * 4) string_offset = read_u16(binf) binf.seek(name_table + string_offset) joint_name = f"{out_enum_name}_{read_str(binf).upper()}_e" found_enums.append(ArcEnumValue(joint_name, i)) # print(joint_name) return ArcEnum(out_enum_name, found_enums) binf.seek(next_chunk) return None def extract_joint_enums(src_path:Path): if ((SKIP_FILES_WITH_AT_SIGN and "@" in str(src_path)) or (SKIP_STAGE_ARCS and "Stage" in src_path.parts) or (SKIP_DEMO_ARCS and any(x.startswith("Demo") for x in src_path.parts))): return JointParsedEnums([]) out_jnt_enums:list[ArcEnum] = [] internal_files = subprocess.run([DTK_PATH, "vfs", "ls", "-r", f"{src_path}:"], stdout=subprocess.PIPE, text=True).stdout output_folder = Path(str(src_path).replace(".", "__")) for line in internal_files.split("\n"): parts = line.split(" | ") if len(parts) != 3: continue internal_file = parts[1].strip(" ") internal_file_parts = internal_file.split(".") if len(internal_file_parts) < 2: continue extension = internal_file_parts[1] if (extension not in ("bmd", "bdl")): continue internal_file_path = output_folder / internal_file # extract file from archive if either # 1. output file doesn't exist # 2. the archive file is newer than the output file (modded src) if (not internal_file_path.exists() or src_path.stat().st_mtime > internal_file_path.stat().st_mode): ensure_dir(internal_file_path.parent) subprocess.run([DTK_PATH, "vfs", "cp", f"{src_path}:{internal_file}", internal_file_path], stdout=subprocess.PIPE) out_enums = parse_bmd(internal_file_path) out_jnt_enums.append(out_enums) return JointParsedEnums(out_jnt_enums) def convert_binary_to_resource_enum(src_path: Path, dest_path: Path) -> None: joint_enums = extract_joint_enums(src_path) with src_path.open("rb") as binf: opening_bytes = binf.read(4) assert(opening_bytes == b"RARC"), f"Not a rarc file: starts with bytes {opening_bytes}" skip_bytes(binf, 4) data_header_offset = read_u32(binf) binf.seek(data_header_offset) node_count = read_u32(binf) node_offset = read_u32(binf) + data_header_offset total_num_file_entries = read_u32(binf) file_entries_list_offset = read_u32(binf) + data_header_offset skip_bytes(binf, 4) string_list_offset = read_u32(binf) + data_header_offset skip_bytes(binf, 2) sync_flag = read_u8(binf) found_files:list[ArcFile] = [] all_file_names = [] for entry_index in range(total_num_file_entries): binf.seek(file_entries_list_offset + entry_index * 0x14) file_id = read_u16(binf) skip_bytes(binf, 2) type_and_name_offset = read_u32(binf) entry_type = type_and_name_offset >> 24 name_offset = type_and_name_offset & 0x00FFFFFF binf.seek(string_list_offset + name_offset) file_name = read_str(binf) if entry_type & 1: # check to make sure its a file assert(not sync_flag or file_id == entry_index), \ f"Sync flag was set, but ID {file_id} does not match Index {entry_index} for file {file_name}" found_files.append(ArcFile(file_name, entry_index, file_id)) all_file_names.append(file_name) index_file_lookup = {x.index : x for x in found_files} found_nodes:list[tuple[ArcNode, list[ArcFile]]] = [] for node_index in range(node_count): binf.seek(node_offset + node_index * 0x10) this_node_type = bin_make_str(binf.read(4)) this_node_name_offs = read_u32(binf) skip_bytes(binf, 2) this_node_index_count = read_u16(binf) this_node_first_index = read_u32(binf) this_node_inds = range(this_node_first_index, this_node_first_index + this_node_index_count) binf.seek(string_list_offset + this_node_name_offs) this_node_name = read_str(binf) this_node = ArcNode(this_node_type, this_node_name, this_node_inds) found_nodes.append((this_node, [index_file_lookup.get(x) for x in this_node.inds if x in index_file_lookup])) out_lines:list[str] = [] file_stem = src_path.name.split(".")[0] file_stem_upper = sanitize_string(file_stem.upper()) out_lines.append(f"#ifndef RES_{file_stem_upper}_H") out_lines.append(f"#define RES_{file_stem_upper}_H\n") out_ids:list[str] = [] out_idxs:list[str] = [] appearance_count = DefaultDict(int) for node, files in found_nodes: if len(files) == 0: continue file_type_break = f"{INDENT}/* {node.node_type} */" out_ids.append(file_type_break) out_idxs.append(file_type_break) for file in files: parts = file.file_name.split(".") santitized_file_name = sanitize_string(parts[0].upper()) # Sanitize identifier seen_count = appearance_count[santitized_file_name] appearance_count[santitized_file_name] += 1 ext = "" if len(parts) > 1: ext = sanitize_string(parts[1].upper()) duplicate_tag = "_" if seen_count > 0: duplicate_tag = f"_{seen_count}_" # tiny optimization to do less string formatting begin_part = f"{INDENT}dRes_" mid_part = f"_{file_stem_upper}_{ext}_{santitized_file_name}{duplicate_tag}e=0x" out_idxs.append(f"{begin_part}INDEX{mid_part}{file.index:X},") out_ids.append(f"{begin_part}ID{mid_part}{file.id:X},") out_lines.append(f"enum dRes_INDEX_{file_stem_upper} {{") out_lines.extend(out_idxs) out_lines.append("};\n") out_lines.append(f"enum dRes_ID_{file_stem_upper} {{") out_lines.extend(out_ids) out_lines.append("};\n") for joint_enum in joint_enums.enums: out_lines.append(make_enum(joint_enum) + "\n") out_lines.append(f"#endif /* !RES_{file_stem_upper}_H */") out = "\n".join(out_lines) ensure_dir(dest_path.parent) with dest_path.open("w") as f: f.write(out) def decompress_file(input_file:Path, output_file:Path) -> None: # use pathlib to allow for unix+windows paths subprocess.run([DTK_PATH, "yaz0", "decompress", input_file, "-o", output_file]) def extract_enum_from_file(src_path:Path, dst_path:Path) -> None: assert(src_path.exists()) # we can skip extracting this file if all of the following are true # 1. The output file exists # 2. The src file is older than the output file (not modded) # 3. This python file is older than the output file (no updates to how we extract enums) if (dst_path.exists() and src_path.stat().st_mtime < dst_path.stat().st_mtime and THIS_MTIME < dst_path.stat().st_mtime): return # check the first bytes of the file with src_path.open("rb") as f: starting_bytes = f.read(4) if starting_bytes == b"Yaz0": is_compressed = True elif starting_bytes == b"RARC": is_compressed = False # not an arc file although it has the .arc extensions else: return if is_compressed: # if our file is compressed, then we should decompress it # we only need to decompress if any of these are true # 1. We've never decompressed this file before # 2. The src file is newer than the output file (modded src) new_src_path = src_path.with_suffix(src_path.suffix + ".decompressed") if (not new_src_path.exists() or new_src_path.stat().st_mtime < src_path.stat().st_mtime): decompress_file(src_path, new_src_path) src_path = new_src_path convert_binary_to_resource_enum(src_path, dst_path) def main() -> None: for dir, dirnames, filenames in walk("./orig/"): dirpath = Path(dir) if "res" not in dirpath.parts: continue for file in filenames: file_path = dirpath / file if file_path.suffix == ".arc": # the version should be the second part of the path # ./orig/ShieldD/... version = file_path.parts[1] # find the res folder, truncate the path to be the part after the res folder out_path = Path("/".join(file_path.parts[file_path.parts.index("res") + 1:])) # set the output path to be the designated output + the version + the file's heirarchy out_path = OUT_PATH / (version / out_path) # we're going to output to a header file, prefix it with "res_" out_path = out_path.with_name("res_" + out_path.name).with_suffix(".h") try: extract_enum_from_file(file_path, out_path) except AssertionError as e: print(f"ERROR: {file_path} -> {out_path}\n{e}\n") if __name__ == "__main__": main()