Update progress script with new assets categories and update csv output format (#510)

* Reorganize csvs and progress.py * Put stuff in the correct folders * Reduce lots of repeated code * Change csv output format * Filter out automaticaly named variables in "Matching" progress calculation for assets * Address Elliptic's review * Don't count handwritten files in progress and add a way to fix files detected in the wrong section * Add missing "total" * More fixing * Add two missing columns * Update paths in Jenkinsfile * Update progress shield in readme * Update progress link
2026-07-08 13:26:14 -04:00 · 2021-12-18 13:37:37 -03:00
parent d5b71bd0f5
commit d4dc34ee71
23 changed files with 259 additions and 132 deletions
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-import argparse, csv, git, json, os, re
+import argparse, csv, git, json, os, re, sys

 parser = argparse.ArgumentParser()

@@ -12,9 +12,16 @@ args = parser.parse_args()
 NON_MATCHING_PATTERN = r'#ifdef\s+NON_MATCHING.*?#pragma\s+GLOBAL_ASM\s*\(\s*"(.*?)"\s*\).*?#endif'
 NOT_ATTEMPTED_PATTERN = r'#pragma\s+GLOBAL_ASM\s*\(\s*"(.*?)"\s*\)'

+# This is the format ZAPD uses to autogenerate variable names
+# It should not be used for properly documented variables
+AUTOGENERATED_ASSET_NAME = re.compile(r".+[0-9A-Fa-f]{6}$")
+
 # TODO: consider making this a parameter of this script
 GAME_VERSION = "mm.us.rev1"

+def eprint(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
 def GetFunctionsByPattern(pattern, files):
    functions = []

@@ -71,6 +78,52 @@ def GetRemovableSize(functions_to_count):

    return size

+def CalculateMapSizes(mapFileList):
+    for mapFile in mapFileList:
+        accumulatedSize = 0
+
+        if mapFile["section"] != ".data":
+            continue
+        if not mapFile["name"].startswith("build/assets/"):
+            continue
+
+        symbolCount = len(mapFile["symbols"])
+        if symbolCount == 0:
+            continue
+
+        # Calculate size of each symbol
+        for index in range(symbolCount - 1):
+            symbol = mapFile["symbols"][index]
+            nextSymbol = mapFile["symbols"][index+1]
+
+            size = nextSymbol["vram"] - symbol["vram"]
+            accumulatedSize += size
+
+            mapFile["symbols"][index]["size"] = size
+
+        # Calculate size of last symbol of the file
+        symbol = mapFile["symbols"][-1]
+        size = mapFile["size"] - accumulatedSize
+        mapFile["symbols"][-1]["size"] = size
+    return mapFileList
+
+def CalculateNonNamedAssets(mapFileList, assetsTracker):
+    for mapFile in mapFileList:
+        if mapFile["section"] != ".data":
+            continue
+        if not mapFile["name"].startswith("build/assets/"):
+            continue
+
+        assetCat = mapFile["name"].split("/")[2]
+
+        for symbol in mapFile["symbols"]:
+            symbolName = symbol["name"]
+            if AUTOGENERATED_ASSET_NAME.search(symbolName) is not None:
+                if assetCat in assetsTracker:
+                    assetsTracker[assetCat]["removableSize"] += symbol["size"]
+    return assetsTracker
+
+
 map_file = ReadAllLines('build/mm.map')

 # Get list of Non-Matchings
@@ -86,12 +139,36 @@ not_attempted_functions = list(set(not_attempted_functions).difference(non_match
 if not args.matching:
    non_matching_functions = []

-# Get asset files
-audio_files = GetCsvFilelist(GAME_VERSION, "audio.csv")
-misc_files = GetCsvFilelist(GAME_VERSION, "misc.csv")
-object_files = GetCsvFilelist(GAME_VERSION, "object.csv")
-scene_files = GetCsvFilelist(GAME_VERSION, "scene.csv")
-texture_files = GetCsvFilelist(GAME_VERSION, "texture.csv")
+# The order of this list should not change to prevent breaking the graph of the website
+# New stuff shall be appended at the end of the list
+assetsCategories = [
+    "archives",
+    "audio",
+    "interface",
+    "misc",
+    "objects",
+    "scenes",
+    "text",
+    # "deleted",
+    # "segments",
+]
+assetsTracker = dict()
+
+# Manual fixer for files that would be counted in wrong categories
+# "filename": "correctSection"
+fileSectionFixer = {
+    "osFlash": "code" # Currently in `src/libultra` (would be counted as boot)
+}
+
+for assetCat in assetsCategories:
+    assetsTracker[assetCat] = dict()
+    # Get asset files
+    assetsTracker[assetCat]["files"] = GetCsvFilelist(GAME_VERSION, f"{assetCat}.csv")
+    assetsTracker[assetCat]["currentSize"] = 0
+    assetsTracker[assetCat]["removableSize"] = 0
+    assetsTracker[assetCat]["totalSize"] = 0
+    assetsTracker[assetCat]["percent"] = 0
+

 # Initialize all the code values
 src = 0
@@ -104,22 +181,36 @@ asm_code = 0
 asm_boot = 0
 asm_ovl = 0
 asm_libultra = 0
-audio = 0
-misc = 0
-object_ = 0
-scene = 0
-texture = 0
+
+mapFileList = []

 for line in map_file:
    line_split =  list(filter(None, line.split(" ")))

    if (len(line_split) == 4 and line_split[0].startswith(".")):
        section = line_split[0]
+        obj_vram = int(line_split[1], 16)
        file_size = int(line_split[2], 16)
-        obj_file = line_split[3]
+        obj_file = line_split[3].strip()
+        objFileSplit = obj_file.split("/")
+
+        fileData = {"name": obj_file, "vram": obj_vram, "size": file_size, "section": section, "symbols": []}
+        mapFileList.append(fileData)

        if (section == ".text"):
-            if (obj_file.startswith("build/src")):
+            objFileName = objFileSplit[-1].split(".o")[0]
+
+            if objFileName in fileSectionFixer:
+                correctSection = fileSectionFixer[objFileName]
+                if correctSection == "code":
+                    src_code += file_size
+                elif correctSection == "libultra":
+                    src_libultra += file_size
+                elif correctSection == "boot":
+                    src_boot += file_size
+                elif correctSection == "overlays":
+                    src_ovl += file_size
+            elif (obj_file.startswith("build/src")):
                if (obj_file.startswith("build/src/code")):
                    src_code += file_size
                elif (obj_file.startswith("build/src/libultra")):
@@ -138,17 +229,29 @@ for line in map_file:
                elif (obj_file.startswith("build/asm/overlays")):
                    asm_ovl += file_size

-        if (section == ".data"):
-            if (obj_file.startswith("build/assets/audio")):
-                audio += file_size
-            elif (obj_file.startswith("build/assets/misc")):
-                misc += file_size
-            elif (obj_file.startswith("build/assets/objects")):
-                object_ += file_size
-            elif (obj_file.startswith("build/assets/scenes")):
-                scene += file_size
-            elif (obj_file.startswith("build/assets/textures")):
-                texture += file_size
+        if section == ".data":
+            if obj_file.startswith("build/assets/"):
+                assetCat = obj_file.split("/")[2]
+                if assetCat in assetsTracker:
+                    assetsTracker[assetCat]["currentSize"] += file_size
+                else:
+                    eprint(f"Found file '{obj_file}' in unknown asset category '{assetCat}'")
+                    eprint("I'll ignore this for now, but please fix it!")
+
+    elif len(line_split) == 2 and line_split[0].startswith("0x00000000"):
+        varVramStr, varName = line_split
+        varVram = int(varVramStr, 16)
+        varName = varName.strip()
+        if varName == "0x0":
+            continue
+        #print(varVram, varName)
+        symbolData = {"name": varName, "vram": varVram, "size": 0}
+        mapFileList[-1]["symbols"].append(symbolData)
+
+mapFileList = CalculateMapSizes(mapFileList)
+
+assetsTracker = CalculateNonNamedAssets(mapFileList, assetsTracker)
+

 # Add libultra to boot.
 src_boot += src_libultra
@@ -184,27 +287,22 @@ boot = src_boot - (non_matching_asm_boot + not_attempted_asm_boot)
 ovl = src_ovl - (non_matching_asm_ovl + not_attempted_asm_ovl)

 # Total code bucket sizes
-code_size = src_code + asm_code
-boot_size = src_boot + asm_boot
-ovl_size = src_ovl + asm_ovl
-handwritten = 0 # Currently unsure of any handwritten asm in MM
+code_size = src_code # + asm_code
+boot_size = src_boot # + asm_boot
+ovl_size  = src_ovl  # + asm_ovl
+handwritten = asm_code + asm_boot + asm_ovl
+
+# Calculate the total amount of decompilable code
+total = code_size + boot_size + ovl_size

 # Calculate size of all assets
-audio_size = 0
-misc_size = 0
-object_size = 0
-scene_size = 0
-texture_size = 0
-for index, f in audio_files:
-    audio_size += os.stat(os.path.join("baserom", f)).st_size
-for index, f in misc_files:
-    misc_size += os.stat(os.path.join("baserom", f)).st_size
-for index, f in object_files:
-    object_size += os.stat(os.path.join("baserom", f)).st_size
-for index, f in scene_files:
-    scene_size += os.stat(os.path.join("baserom", f)).st_size
-for index, f in texture_files:
-    texture_size += os.stat(os.path.join("baserom", f)).st_size
+for assetCat in assetsTracker:
+    for index, f in assetsTracker[assetCat]["files"]:
+        assetsTracker[assetCat]["totalSize"] += os.stat(os.path.join("baserom", f)).st_size
+
+if args.matching:
+    for assetCat in assetsTracker:
+        assetsTracker[assetCat]["currentSize"] -= assetsTracker[assetCat]["removableSize"]

 # Calculate asm and src totals
 src = src_code + src_boot + src_ovl
@@ -214,12 +312,9 @@ asm = asm_code + asm_boot + asm_ovl
 src -= non_matching_asm + not_attempted_asm
 asm += non_matching_asm + not_attempted_asm

-# Calculate the total amount of decompilable code
-total = src + asm
-
 # Calculate assets totals
-assets = audio + misc + object_ + scene + texture
-assets_total = audio_size + misc_size + object_size + scene_size + texture_size
+assets = sum(x["currentSize"] for x in assetsTracker.values())
+assets_total = sum(x["totalSize"] for x in assetsTracker.values())

 # Convert vaules to percentages
 src_percent = 100 * src / total
@@ -227,12 +322,11 @@ asm_percent = 100 * asm / total
 code_percent = 100 * code / code_size
 boot_percent = 100 * boot / boot_size
 ovl_percent = 100 * ovl / ovl_size
+
 assets_percent = 100 * assets / assets_total
-audio_percent = 100 * audio / audio_size
-misc_percent = 100 * misc / misc_size
-object_percent = 100 * object_ / object_size
-scene_percent = 100 * scene / scene_size
-texture_percent = 100 * texture / texture_size
+
+for assetCat in assetsTracker:
+    assetsTracker[assetCat]["percent"] = 100 * assetsTracker[assetCat]["currentSize"] / assetsTracker[assetCat]["totalSize"]

 # convert bytes to masks and rupees
 num_masks = 24
@@ -273,16 +367,22 @@ rupees = int((src % bytes_per_mask) / bytes_per_rupee)
 #print("")

 if args.format == 'csv':
-    version = 1
+    version = 2
    git_object = git.Repo().head.object
    timestamp = str(git_object.committed_date)
    git_hash = git_object.hexsha
-    csv_list = [str(version), timestamp, git_hash, str(code), str(code_size), str(boot), str(boot_size),
-                str(ovl), str(ovl_size), str(src), str(asm), str(len(non_matching_functions)),
-                str(audio), str(audio_size), str(misc), str(misc_size), str(object_), str(object_size),
-                str(scene), str(scene_size), str(texture), str(texture_size)]
+    csv_list = [
+        version, timestamp, git_hash, src, total,
+        boot, boot_size, code, code_size, ovl, ovl_size,
+        asm, len(non_matching_functions),
+    ]
+    csv_list += [
+        assets, assets_total,
+    ]
+    for assetCat in assetsCategories:
+        csv_list += [assetsTracker[assetCat]["currentSize"], assetsTracker[assetCat]["totalSize"]]

-    print(",".join(csv_list))
+    print(",".join(map(str, csv_list)))
 elif args.format == 'shield-json':
    # https://shields.io/endpoint
    print(json.dumps({
@@ -293,18 +393,19 @@ elif args.format == 'shield-json':
    }))
 elif args.format == 'text':
    adjective = "decompiled" if not args.matching else "matched"
+    assetsAdjective = "debinarized" if not args.matching else "identified"

-    print("src:  {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(src, total, adjective, round(src_percent, 4)))
-    print("    boot:     {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(boot, boot_size, adjective, round(boot_percent, 4)))
-    print("    code:     {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(code, code_size, adjective, round(code_percent, 4)))
-    print("    overlays: {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(ovl, ovl_size, adjective, round(ovl_percent, 4)))
+    print("src:    {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(src, total, adjective, round(src_percent, 4)))
+    print("    boot:       {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(boot, boot_size, adjective, round(boot_percent, 4)))
+    print("    code:       {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(code, code_size, adjective, round(code_percent, 4)))
+    print("    overlays:   {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(ovl, ovl_size, adjective, round(ovl_percent, 4)))
    print()
-    print("assets:     {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(assets, assets_total, round(assets_percent, 4)))
-    print("    audio:    {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(audio, audio_size, round(audio_percent, 4)))
-    print("    misc:     {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(misc, misc_size, round(misc_percent, 4)))
-    print("    objects:  {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(object_, object_size, round(object_percent, 4)))
-    print("    scenes:   {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(scene, scene_size, round(scene_percent, 4)))
-    print("    textures: {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(texture, texture_size, round(texture_percent, 4)))
+
+    print("assets: {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(assets, assets_total, assetsAdjective, round(assets_percent, 4)))
+    for assetCat in assetsTracker:
+        data = assetsTracker[assetCat]
+        print("    {:<10}  {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(f"{assetCat}:", data["currentSize"], data["totalSize"], assetsAdjective, round(data["percent"], 4)))
+
    print()
    print("------------------------------------\n")