Update progress script with new assets categories and update csv output format (#510)

* Reorganize csvs and progress.py

* Put stuff in the correct folders

* Reduce lots of repeated code

* Change csv output format

* Filter out automaticaly named variables in "Matching" progress calculation for assets

* Address Elliptic's review

* Don't count handwritten files in progress and add a way to fix files detected in the wrong section

* Add missing "total"

* More fixing

* Add two missing columns

* Update paths in Jenkinsfile

* Update progress shield in readme

* Update progress link
This commit is contained in:
Anghelo Carvajal
2021-12-18 13:37:37 -03:00
committed by GitHub
parent d5b71bd0f5
commit d4dc34ee71
23 changed files with 259 additions and 132 deletions
+171 -70
View File
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
import argparse, csv, git, json, os, re
import argparse, csv, git, json, os, re, sys
parser = argparse.ArgumentParser()
@@ -12,9 +12,16 @@ args = parser.parse_args()
NON_MATCHING_PATTERN = r'#ifdef\s+NON_MATCHING.*?#pragma\s+GLOBAL_ASM\s*\(\s*"(.*?)"\s*\).*?#endif'
NOT_ATTEMPTED_PATTERN = r'#pragma\s+GLOBAL_ASM\s*\(\s*"(.*?)"\s*\)'
# This is the format ZAPD uses to autogenerate variable names
# It should not be used for properly documented variables
AUTOGENERATED_ASSET_NAME = re.compile(r".+[0-9A-Fa-f]{6}$")
# TODO: consider making this a parameter of this script
GAME_VERSION = "mm.us.rev1"
def eprint(*args, **kwargs):
print(*args, file=sys.stderr, **kwargs)
def GetFunctionsByPattern(pattern, files):
functions = []
@@ -71,6 +78,52 @@ def GetRemovableSize(functions_to_count):
return size
def CalculateMapSizes(mapFileList):
for mapFile in mapFileList:
accumulatedSize = 0
if mapFile["section"] != ".data":
continue
if not mapFile["name"].startswith("build/assets/"):
continue
symbolCount = len(mapFile["symbols"])
if symbolCount == 0:
continue
# Calculate size of each symbol
for index in range(symbolCount - 1):
symbol = mapFile["symbols"][index]
nextSymbol = mapFile["symbols"][index+1]
size = nextSymbol["vram"] - symbol["vram"]
accumulatedSize += size
mapFile["symbols"][index]["size"] = size
# Calculate size of last symbol of the file
symbol = mapFile["symbols"][-1]
size = mapFile["size"] - accumulatedSize
mapFile["symbols"][-1]["size"] = size
return mapFileList
def CalculateNonNamedAssets(mapFileList, assetsTracker):
for mapFile in mapFileList:
if mapFile["section"] != ".data":
continue
if not mapFile["name"].startswith("build/assets/"):
continue
assetCat = mapFile["name"].split("/")[2]
for symbol in mapFile["symbols"]:
symbolName = symbol["name"]
if AUTOGENERATED_ASSET_NAME.search(symbolName) is not None:
if assetCat in assetsTracker:
assetsTracker[assetCat]["removableSize"] += symbol["size"]
return assetsTracker
map_file = ReadAllLines('build/mm.map')
# Get list of Non-Matchings
@@ -86,12 +139,36 @@ not_attempted_functions = list(set(not_attempted_functions).difference(non_match
if not args.matching:
non_matching_functions = []
# Get asset files
audio_files = GetCsvFilelist(GAME_VERSION, "audio.csv")
misc_files = GetCsvFilelist(GAME_VERSION, "misc.csv")
object_files = GetCsvFilelist(GAME_VERSION, "object.csv")
scene_files = GetCsvFilelist(GAME_VERSION, "scene.csv")
texture_files = GetCsvFilelist(GAME_VERSION, "texture.csv")
# The order of this list should not change to prevent breaking the graph of the website
# New stuff shall be appended at the end of the list
assetsCategories = [
"archives",
"audio",
"interface",
"misc",
"objects",
"scenes",
"text",
# "deleted",
# "segments",
]
assetsTracker = dict()
# Manual fixer for files that would be counted in wrong categories
# "filename": "correctSection"
fileSectionFixer = {
"osFlash": "code" # Currently in `src/libultra` (would be counted as boot)
}
for assetCat in assetsCategories:
assetsTracker[assetCat] = dict()
# Get asset files
assetsTracker[assetCat]["files"] = GetCsvFilelist(GAME_VERSION, f"{assetCat}.csv")
assetsTracker[assetCat]["currentSize"] = 0
assetsTracker[assetCat]["removableSize"] = 0
assetsTracker[assetCat]["totalSize"] = 0
assetsTracker[assetCat]["percent"] = 0
# Initialize all the code values
src = 0
@@ -104,22 +181,36 @@ asm_code = 0
asm_boot = 0
asm_ovl = 0
asm_libultra = 0
audio = 0
misc = 0
object_ = 0
scene = 0
texture = 0
mapFileList = []
for line in map_file:
line_split = list(filter(None, line.split(" ")))
if (len(line_split) == 4 and line_split[0].startswith(".")):
section = line_split[0]
obj_vram = int(line_split[1], 16)
file_size = int(line_split[2], 16)
obj_file = line_split[3]
obj_file = line_split[3].strip()
objFileSplit = obj_file.split("/")
fileData = {"name": obj_file, "vram": obj_vram, "size": file_size, "section": section, "symbols": []}
mapFileList.append(fileData)
if (section == ".text"):
if (obj_file.startswith("build/src")):
objFileName = objFileSplit[-1].split(".o")[0]
if objFileName in fileSectionFixer:
correctSection = fileSectionFixer[objFileName]
if correctSection == "code":
src_code += file_size
elif correctSection == "libultra":
src_libultra += file_size
elif correctSection == "boot":
src_boot += file_size
elif correctSection == "overlays":
src_ovl += file_size
elif (obj_file.startswith("build/src")):
if (obj_file.startswith("build/src/code")):
src_code += file_size
elif (obj_file.startswith("build/src/libultra")):
@@ -138,17 +229,29 @@ for line in map_file:
elif (obj_file.startswith("build/asm/overlays")):
asm_ovl += file_size
if (section == ".data"):
if (obj_file.startswith("build/assets/audio")):
audio += file_size
elif (obj_file.startswith("build/assets/misc")):
misc += file_size
elif (obj_file.startswith("build/assets/objects")):
object_ += file_size
elif (obj_file.startswith("build/assets/scenes")):
scene += file_size
elif (obj_file.startswith("build/assets/textures")):
texture += file_size
if section == ".data":
if obj_file.startswith("build/assets/"):
assetCat = obj_file.split("/")[2]
if assetCat in assetsTracker:
assetsTracker[assetCat]["currentSize"] += file_size
else:
eprint(f"Found file '{obj_file}' in unknown asset category '{assetCat}'")
eprint("I'll ignore this for now, but please fix it!")
elif len(line_split) == 2 and line_split[0].startswith("0x00000000"):
varVramStr, varName = line_split
varVram = int(varVramStr, 16)
varName = varName.strip()
if varName == "0x0":
continue
#print(varVram, varName)
symbolData = {"name": varName, "vram": varVram, "size": 0}
mapFileList[-1]["symbols"].append(symbolData)
mapFileList = CalculateMapSizes(mapFileList)
assetsTracker = CalculateNonNamedAssets(mapFileList, assetsTracker)
# Add libultra to boot.
src_boot += src_libultra
@@ -184,27 +287,22 @@ boot = src_boot - (non_matching_asm_boot + not_attempted_asm_boot)
ovl = src_ovl - (non_matching_asm_ovl + not_attempted_asm_ovl)
# Total code bucket sizes
code_size = src_code + asm_code
boot_size = src_boot + asm_boot
ovl_size = src_ovl + asm_ovl
handwritten = 0 # Currently unsure of any handwritten asm in MM
code_size = src_code # + asm_code
boot_size = src_boot # + asm_boot
ovl_size = src_ovl # + asm_ovl
handwritten = asm_code + asm_boot + asm_ovl
# Calculate the total amount of decompilable code
total = code_size + boot_size + ovl_size
# Calculate size of all assets
audio_size = 0
misc_size = 0
object_size = 0
scene_size = 0
texture_size = 0
for index, f in audio_files:
audio_size += os.stat(os.path.join("baserom", f)).st_size
for index, f in misc_files:
misc_size += os.stat(os.path.join("baserom", f)).st_size
for index, f in object_files:
object_size += os.stat(os.path.join("baserom", f)).st_size
for index, f in scene_files:
scene_size += os.stat(os.path.join("baserom", f)).st_size
for index, f in texture_files:
texture_size += os.stat(os.path.join("baserom", f)).st_size
for assetCat in assetsTracker:
for index, f in assetsTracker[assetCat]["files"]:
assetsTracker[assetCat]["totalSize"] += os.stat(os.path.join("baserom", f)).st_size
if args.matching:
for assetCat in assetsTracker:
assetsTracker[assetCat]["currentSize"] -= assetsTracker[assetCat]["removableSize"]
# Calculate asm and src totals
src = src_code + src_boot + src_ovl
@@ -214,12 +312,9 @@ asm = asm_code + asm_boot + asm_ovl
src -= non_matching_asm + not_attempted_asm
asm += non_matching_asm + not_attempted_asm
# Calculate the total amount of decompilable code
total = src + asm
# Calculate assets totals
assets = audio + misc + object_ + scene + texture
assets_total = audio_size + misc_size + object_size + scene_size + texture_size
assets = sum(x["currentSize"] for x in assetsTracker.values())
assets_total = sum(x["totalSize"] for x in assetsTracker.values())
# Convert vaules to percentages
src_percent = 100 * src / total
@@ -227,12 +322,11 @@ asm_percent = 100 * asm / total
code_percent = 100 * code / code_size
boot_percent = 100 * boot / boot_size
ovl_percent = 100 * ovl / ovl_size
assets_percent = 100 * assets / assets_total
audio_percent = 100 * audio / audio_size
misc_percent = 100 * misc / misc_size
object_percent = 100 * object_ / object_size
scene_percent = 100 * scene / scene_size
texture_percent = 100 * texture / texture_size
for assetCat in assetsTracker:
assetsTracker[assetCat]["percent"] = 100 * assetsTracker[assetCat]["currentSize"] / assetsTracker[assetCat]["totalSize"]
# convert bytes to masks and rupees
num_masks = 24
@@ -273,16 +367,22 @@ rupees = int((src % bytes_per_mask) / bytes_per_rupee)
#print("")
if args.format == 'csv':
version = 1
version = 2
git_object = git.Repo().head.object
timestamp = str(git_object.committed_date)
git_hash = git_object.hexsha
csv_list = [str(version), timestamp, git_hash, str(code), str(code_size), str(boot), str(boot_size),
str(ovl), str(ovl_size), str(src), str(asm), str(len(non_matching_functions)),
str(audio), str(audio_size), str(misc), str(misc_size), str(object_), str(object_size),
str(scene), str(scene_size), str(texture), str(texture_size)]
csv_list = [
version, timestamp, git_hash, src, total,
boot, boot_size, code, code_size, ovl, ovl_size,
asm, len(non_matching_functions),
]
csv_list += [
assets, assets_total,
]
for assetCat in assetsCategories:
csv_list += [assetsTracker[assetCat]["currentSize"], assetsTracker[assetCat]["totalSize"]]
print(",".join(csv_list))
print(",".join(map(str, csv_list)))
elif args.format == 'shield-json':
# https://shields.io/endpoint
print(json.dumps({
@@ -293,18 +393,19 @@ elif args.format == 'shield-json':
}))
elif args.format == 'text':
adjective = "decompiled" if not args.matching else "matched"
assetsAdjective = "debinarized" if not args.matching else "identified"
print("src: {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(src, total, adjective, round(src_percent, 4)))
print(" boot: {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(boot, boot_size, adjective, round(boot_percent, 4)))
print(" code: {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(code, code_size, adjective, round(code_percent, 4)))
print(" overlays: {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(ovl, ovl_size, adjective, round(ovl_percent, 4)))
print("src: {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(src, total, adjective, round(src_percent, 4)))
print(" boot: {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(boot, boot_size, adjective, round(boot_percent, 4)))
print(" code: {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(code, code_size, adjective, round(code_percent, 4)))
print(" overlays: {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(ovl, ovl_size, adjective, round(ovl_percent, 4)))
print()
print("assets: {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(assets, assets_total, round(assets_percent, 4)))
print(" audio: {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(audio, audio_size, round(audio_percent, 4)))
print(" misc: {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(misc, misc_size, round(misc_percent, 4)))
print(" objects: {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(object_, object_size, round(object_percent, 4)))
print(" scenes: {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(scene, scene_size, round(scene_percent, 4)))
print(" textures: {:>9} / {:>8} bytes reconstructed {:>9.4f}%".format(texture, texture_size, round(texture_percent, 4)))
print("assets: {:>9} / {:>8} total bytes {:<13} {:>9.4f}%".format(assets, assets_total, assetsAdjective, round(assets_percent, 4)))
for assetCat in assetsTracker:
data = assetsTracker[assetCat]
print(" {:<10} {:>9} / {:>8} bytes {:<13} {:>9.4f}%".format(f"{assetCat}:", data["currentSize"], data["totalSize"], assetsAdjective, round(data["percent"], 4)))
print()
print("------------------------------------\n")