#!/usr/bin/env python3 # Uninitializer has two modes of operation: # # INJECT # ====== # In this mode, uninitializer is given 3 inputs: # - LHS overlay: overlay without uninitialized memory # (BSS filled with 0s, gaps in data/rdata filled with 0s) # - RHS overlay: overlay with uninitialized memory # (BSS filled with 0x9e, gaps in data/rdata filled with 0x9e) # - Uninitialized memory template: memory to be injected into # the resulting file. This memory was extracted from original # overlays. # # The program iterates over those three files simultaneously. # If it detects uninitialized byte (byte in LHS = 0, byte in RHS = 0x9e) # it injects a byte from uninitialized memory template. # # EXTRACT # ======= # This mode can be used to generate the uninitialized memory # template file. Uninitializer is given 3 inputs: # - LHS overlay # - RHS overlay # - Original overlay file (from MGS) # # The program iterates over those three files, detects uninitialized # bytes and creates the uninitialized memory template file based on # the given original overlay file. # # EXTRACT - example usage # ======================= # # BSS # --- # # At the end of overlay.c file you might have such data: # const char s01a_dword_800E4CC8[] = {0x6, 0x11, 'd', 'r'}; # const char s01a_dword_800E4CCC[] = {'a', 'w', '_', 'l'}; # const char s01a_dword_800E4CD0[] = {'o', 'n', 'g', '_'}; # # Once you have determined that this is indeed BSS and not rdata, # you should replace it with: # int SECTION("overlay.bss") s01a_dword_800E4CC8; # int SECTION("overlay.bss") s01a_dword_800E4CCC; # int SECTION("overlay.bss") s01a_dword_800E4CD0; # # Next, see the "Extracting" section below. # # rdata # ----- # # Once you have identified all rdata in overlay.c, you might # want to inline some strings to source files. Please remember # that memleaks only happen at file boundaries, so there are # no memleaks between strings in a single file. Apart from that # perform the inlining as usual. # # Extracting # ---------- # # Rebuild the project. If there was any uninitialized memory # in variables you modified (as described in sections above), # the build will fail - the uninitialized memory files don't # have the new uninitialized memory. You have to generate it again! # You can do it by executing the following command (replacing s03er): # # python3 uninitializer.py extract ../obj/s03er_lhs.bin ../obj/s03er_rhs.bin ~/stage/s03er/00e05a.bin ../um/s03er.bin # # If the command succeeds, after rebuilding the project the overlay # should match. If the command fails at any of assertions, # this means that the built overlay differs in more than memleaks # compared to the original overlay - you have to find the difference # manually (comparing lhs file with target file). import sys from create_dummy_file import DUMMY_FILE_SIZE import os import string from collections import defaultdict # In couple overlays we observed accesses to BSS variables outside # the overlay bounds. It looks like as if the overlay was truncated. # We don't have any explanation for this, so let's just artifically # trim the last few bytes in overlays that exhibit this behavior for now. BSS_HACK = defaultdict(int, { 's11c_lhs.bin': 4, # in s11c function at 0x800ce014 accesses 0x800d32dc which is just outside the overlay # issues with gasdamge.c BSS 's02c_lhs.bin': 4, 's02d_lhs.bin': 4, 's02e_lhs.bin': 4, }) def get_bss_adjustment(lhs): return BSS_HACK[os.path.basename(lhs)] def inject(lhs, rhs, uninitialized, out): bss_adjustment = get_bss_adjustment(lhs) lhs = open(lhs, 'rb').read() rhs = open(rhs, 'rb').read() if bss_adjustment > 0: lhs = lhs[:-bss_adjustment] if os.path.isfile(uninitialized): uninitialized = open(uninitialized, 'rb').read() else: uninitialized = b"" uninitialized += b"\x00" * DUMMY_FILE_SIZE uninitialized_cur_idx = 0 assert len(rhs) == DUMMY_FILE_SIZE, f"RHS overlay should be {DUMMY_FILE_SIZE} bytes large, but it's {len(rhs)} bytes large" assert len(lhs) < DUMMY_FILE_SIZE, f"LHS overlay is too large ({len(lhs)} bytes large)" assert len(lhs) < len(rhs), f"LHS overlay is too large ({len(lhs)} bytes large)" out_arr = bytearray(lhs) for i, (lb, rb) in enumerate(zip(lhs, rhs)): if lb == rb: continue assert lb == 0 and rb == 0x9e, f"LHS and RHS differ in more than uninitialized memory at offset {i}, {lb:X} != {rb:X}" # Uninitialized memory! out_arr[i] = uninitialized[uninitialized_cur_idx] uninitialized_cur_idx += 1 open(out, 'wb').write(bytes(out_arr)) def extract(lhs, rhs, target, uninitialized_out): bss_adjustment = get_bss_adjustment(lhs) lhs = open(lhs, 'rb').read() rhs = open(rhs, 'rb').read() target = open(target, 'rb').read() if bss_adjustment > 0: lhs = lhs[:-bss_adjustment] assert len(rhs) == DUMMY_FILE_SIZE, f"RHS overlay should be {DUMMY_FILE_SIZE} bytes large, but it's {len(rhs)} bytes large" assert len(lhs) < DUMMY_FILE_SIZE, f"LHS overlay is too large ({len(lhs)} bytes large)" assert len(lhs) < len(rhs), f"LHS overlay is too large ({len(lhs)} bytes large)" assert len(lhs) == len(target), f"LHS overlay should the same size as target. len(lhs) = {len(lhs)}, len(target) = {len(target)}" uninitialized_out_arr = [] nonzero_uninitialized = 0 for i, (lb, rb, tb) in enumerate(zip(lhs, rhs, target)): if lb == rb and rb == tb: continue assert lb == 0 and rb == 0x9e, f"LHS, RHS, target differ in more than uninitialized memory at offset {i}, {lb:X} != {rb:X} != {tb:X}" # Uninitialized memory! uninitialized_out_arr.append(tb) if tb != 0: nonzero_uninitialized += 1 # Trim trailing zeros while uninitialized_out_arr and uninitialized_out_arr[-1] == 0: uninitialized_out_arr.pop() if uninitialized_out is not None: open(uninitialized_out, 'wb').write(bytes(uninitialized_out_arr)) print("Extracted", len(uninitialized_out_arr), "bytes of uninitialized memory") print(f"({nonzero_uninitialized} non-zero bytes)") print() uninitialized_out_arr = [c for c in uninitialized_out_arr if c in bytes(string.printable, encoding='ascii')] print(bytes(uninitialized_out_arr).decode('ascii')) if __name__ == '__main__': if sys.argv[1] == 'inject': lhs = sys.argv[2] rhs = sys.argv[3] uninitialized = sys.argv[4] out = sys.argv[5] inject(lhs, rhs, uninitialized, out) elif sys.argv[1] == 'extract': lhs = sys.argv[2] rhs = sys.argv[3] target = sys.argv[4] uninitialized_out = sys.argv[5] extract(lhs, rhs, target, uninitialized_out) else: print("Unknown mode:", sys.argv[1])