Merge branch 'deps/diff-script' into 'master'

tools: improve first_diff.py to handle versions other than us1.0

See merge request banjo.decomp/banjo-kazooie!135
This commit is contained in:
Moses Troyer 2025-11-02 00:37:53 +00:00
commit 1f093a8036
4 changed files with 57 additions and 246 deletions

View File

@ -613,12 +613,12 @@ segments:
- [0xFA8F90, c, bs/walk]
- [0xFAA090, c, bs/walrus]
- [0xFABC10, c, bs/washy]
- [0xFABFD0, textbin, core2_untouched]
# - [0xF88710, c, nc/cameranodelist] #DONE
# - [0xF88BB0, c, code_33250] #DONE
# - [0xF88BB0, c, code_33310] #DONE
# - [0xF89050, c, code_336F0] #DONE
# - [0xF89410, c, code_33AB0] #DONE
- [0xFABFD0, c, nc/cameranodelist]
- [0xFAC720, c, code_33250] # address / size incorrect
- [0xFAC720, c, code_33310]
- [0xFACBC0, c, code_336F0]
- [0xFACD40, c, code_33AB0]
- [0xFACE50, textbin, core2_untouched]
# - [0xF89590, c, code_33C30] #DONE
# - [0xF896A0, c, nc/staticCamera] #DONE
# - [0xF898F0, c, code_33F90] #DONE
@ -1202,8 +1202,8 @@ segments:
- [0x1068510, .rodata, bs/walk]
- [0x10685A0, .rodata, bs/walrus]
- [0x1068600, .rodata, bs/washy]
- [0x1068660, rodatabin, core2_untouched]
# - [0x1044690, .rodata, nc/cameranodelist]
- [0x1068660, .rodata, nc/cameranodelist]
- [0x10686B0, rodatabin, core2_untouched]
# - [0x10446E0, .rodata, code_33F90]
# - [0x1044700, .rodata, code_34310]
# - [0x1044710, .rodata, code_34790]
@ -1472,7 +1472,7 @@ segments:
- [0x0106C980, .bss, bs/walk]
- [0x0106C980, .bss, bs/walrus]
- [0x0106C980, .bss, bs/washy]
# - [0x0106C980, .bss, nc/cameranodelist]
- [0x0106C980, .bss, nc/cameranodelist]
# - [0x0106C980, .bss, code_33C30]
# - [0x0106C980, .bss, nc/staticCamera]
# - [0x0106C980, .bss, code_34310]

View File

@ -1,255 +1,57 @@
#!/usr/bin/env python3
import os.path
# SPDX-FileCopyrightText: © 2022 AngheloAlf
# SPDX-License-Identifier: MIT
from __future__ import annotations
import argparse
from subprocess import check_call
parser = argparse.ArgumentParser(
description="Find the first difference(s) between the built ROM and the base ROM."
)
parser.add_argument(
"-c",
"--count",
type=int,
default=5,
help="find up to this many instruction difference(s)",
)
parser.add_argument(
"-d",
"--diff",
dest="diff_args",
nargs="?",
action="store",
default=False,
const="prompt",
help="run diff.py on the result with the provided arguments"
)
parser.add_argument(
"-m", "--make", help="run ninja before finding difference(s)", action="store_true"
)
args = parser.parse_args()
diff_count = args.count
if args.make:
check_call(["make"])
baseimg = f"decompressed.us.v10.z64"
basemap = f"decompressed.map"
myimg = f"build/us.v10/banjo.us.v10.uncompressed.z64"
mymap = f"build/us.v10/banjo.us.v10.map"
if not os.path.isfile(baseimg):
print(f"{baseimg} must exist.")
exit(1)
if not os.path.isfile(myimg) or not os.path.isfile(mymap):
print(f"{myimg} and {mymap} must exist.")
exit(1)
mybin = open(myimg, "rb").read()
basebin = open(baseimg, "rb").read()
# if len(mybin) != len(basebin):
# print("Modified ROM has different size...")
# exit(1)
if mybin == basebin:
print("No differences!")
exit(0)
import mapfile_parser
from pathlib import Path
import rabbitizer
def search_rom_address(target_addr):
ram_offset = None
prev_ram = 0
prev_rom = 0
prev_sym = "<start of rom>"
cur_file = "<no file>"
prev_file = cur_file
prev_line = ""
with open(mymap) as f:
for line in f:
if "load address" in line:
# Ignore .bss sections since we're looking for a ROM address
if ".bss" in line or ".bss" in prev_line:
ram_offset = None
continue
ram = int(line[16 : 16 + 18], 0)
rom = int(line[59 : 59 + 18], 0)
ram_offset = ram - rom
continue
def decodeInstruction(bytesDiff: bytes, mapFile: mapfile_parser.MapFile) -> str:
word = (bytesDiff[0] << 24) | (bytesDiff[1] << 16) | (bytesDiff[2] << 8) | (bytesDiff[3] << 0)
instr = rabbitizer.Instruction(word)
immOverride = None
prev_line = line
if instr.isJumpWithAddress():
# Instruction is a function call (jal)
if (
ram_offset is None
or "=" in line
or "*fill*" in line
or " 0x" not in line
):
continue
# Get the embedded address of the function call
symAddress = instr.getInstrIndexAsVram()
ram = int(line[16 : 16 + 18], 0)
rom = ram - ram_offset
sym = line.split()[-1]
# Search for the address in the mapfile
symInfo = mapFile.findSymbolByVramOrVrom(symAddress)
if symInfo is not None:
# Use the symbol from the mapfile instead of a raw value
immOverride = symInfo.symbol.name
if "0x" in sym:
ram_offset = None
continue
if "/" in sym:
cur_file = sym
continue
return instr.disassemble(immOverride=immOverride, extraLJust=-20)
if rom > target_addr:
return f"{prev_sym} (RAM 0x{prev_ram:X}, ROM 0x{prev_rom:X}, {prev_file})"
def firstDiffMain():
parser = argparse.ArgumentParser(description="Find the first difference(s) between the built ROM and the base ROM.")
prev_ram = ram
prev_rom = rom
prev_sym = sym
prev_file = cur_file
parser.add_argument("-c", "--count", type=int, default=5, help="find up to this many instruction difference(s)")
parser.add_argument("-v", "--version", help="Which version should be processed", default="us.v10")
parser.add_argument("-a", "--add-colons", action='store_true', help="Add colon between bytes" )
return "at end of rom?"
args = parser.parse_args()
buildFolder = Path("build")
def parse_map(map_fname):
ram_offset = None
cur_file = "<no file>"
syms = {}
prev_sym = None
prev_line = ""
with open(map_fname) as f:
for line in f:
if "load address" in line:
ram = int(line[16 : 16 + 18], 0)
rom = int(line[59 : 59 + 18], 0)
ram_offset = ram - rom
continue
BUILTROM = buildFolder / args.version / f"banjo.{args.version}.z64"
BUILTMAP = buildFolder / args.version / f"banjo.{args.version}.map"
prev_line = line
if args.version != "us1.0":
# we do not compress non-us1.0 roms so we need to use the prelim version
BUILTROM = buildFolder / args.version / f"banjo.{args.version}.prelim.z64"
if (
ram_offset is None
or "=" in line
or "*fill*" in line
or " 0x" not in line
):
continue
EXPECTEDROM = "expected" / BUILTROM
EXPECTEDMAP = "expected" / BUILTMAP
ram = int(line[16 : 16 + 18], 0)
rom = ram - ram_offset
sym = line.split()[-1]
mapfile_parser.frontends.first_diff.doFirstDiff(BUILTMAP, EXPECTEDMAP, BUILTROM, EXPECTEDROM, args.count, mismatchSize=True, addColons=args.add_colons, bytesConverterCallback=decodeInstruction)
if "0x" in sym:
ram_offset = None
continue
elif "/" in sym:
cur_file = sym
continue
syms[sym] = (rom, cur_file, prev_sym, ram)
prev_sym = sym
return syms
def map_diff():
map1 = parse_map(mymap)
map2 = parse_map(basemap)
min_ram = None
found = None
for sym, addr in map1.items():
if sym not in map2:
continue
if addr[0] != map2[sym][0]:
if min_ram is None or addr[0] < min_ram:
min_ram = addr[0]
found = (sym, addr[1], addr[2])
if min_ram is None:
return False
else:
print(
f"Map appears to have shifted just before {found[0]} ({found[1]}) -- in {found[2]}?"
)
if found[2] is not None and found[2] not in map2:
print(
f"(Base map file {basemap} out of date due to new or renamed symbols, so result may be imprecise.)"
)
return True
def hexbytes(bs):
return ":".join("{:02X}".format(c) for c in bs)
found_instr_diff = []
map_search_diff = []
diffs = 0
shift_cap = 100000
for i in range(24, len(mybin), 4):
# (mybin[i:i+4] != basebin[i:i+4], but that's slightly slower in CPython...)
if diffs <= shift_cap and (
mybin[i] != basebin[i]
or mybin[i + 1] != basebin[i + 1]
or mybin[i + 2] != basebin[i + 2]
or mybin[i + 3] != basebin[i + 3]
):
if diffs == 0:
print(f"First difference at ROM addr 0x{i:X}, {search_rom_address(i)}")
print(
f"Bytes: {hexbytes(mybin[i : i + 4])} vs {hexbytes(basebin[i : i + 4])}"
)
diffs += 1
if (
len(found_instr_diff) < diff_count
and mybin[i] >> 2 != basebin[i] >> 2
and not search_rom_address(i) in map_search_diff
):
found_instr_diff.append(i)
map_search_diff.append(search_rom_address(i))
if diffs == 0:
print("No differences but ROMs differ?")
exit()
if len(found_instr_diff) > 0:
for i in found_instr_diff:
print(f"Instruction difference at ROM addr 0x{i:X}, {search_rom_address(i)}")
print(
f"Bytes: {hexbytes(mybin[i : i + 4])} vs {hexbytes(basebin[i : i + 4])}"
)
print()
definite_shift = diffs > shift_cap
if definite_shift:
print(f"Over {shift_cap} differing words, must be a shifted ROM.")
else:
print(f"{diffs} differing word(s).")
if diffs > 100:
if not os.path.isfile(basemap):
print(
f"To find ROM shifts, copy a clean .map file to {basemap} and rerun this script."
)
elif not map_diff():
print(f"No ROM shift{' (!?)' if definite_shift else ''}")
if args.diff_args:
if len(found_instr_diff) < 1:
print(f"No instruction difference to run diff.py on")
exit()
diff_sym = search_rom_address(found_instr_diff[0]).split()[0]
if args.diff_args == "prompt":
diff_args = input("Call diff.py with which arguments? ") or "--"
else:
diff_args = args.diff_args
if diff_args[0] != "-":
diff_args = "-" + diff_args
check_call(
[
"python3",
"diff.py",
diff_args,
diff_sym,
]
)
if __name__ == "__main__":
firstDiffMain()

View File

@ -431,6 +431,13 @@ progressDialog_showDialogMaskZero = 0x803568ac;
progressDialog_showDialogMaskFour = 0x803568cc;
progressDialog_setAndTriggerDialog_4 = 0x803569a0;
/* core2/file.c */
file_getByte_ifExpected = 0x8034B490;
file_isNextByteExpected = 0x8034B3E8;
file_getShort_ifExpected = 0x8034B670;
file_getFloat = 0x8034B170;
file_getWord_ifExpected = 0x8034B5A0;
file_getNFloats_ifExpected = 0x8034B558;
core2_TEXT_END = 0x80363a00;

View File

@ -11,4 +11,6 @@ python-Levenshtein
python-ranges
pyyaml
watchdog
rabbitizer
mapfile_parser
spimdisasm==1.35.0