From eb2943d07e2ec2b4104227ee971719ba1dc50ad1 Mon Sep 17 00:00:00 2001 From: robojumper Date: Sun, 16 Mar 2025 17:15:52 +0100 Subject: [PATCH] Fix Ghidra symbol postprocessing (#125) * Fix Ghidra symbol postprocessing * Actually fix namespacing --- tools/ghidra_scripts/DecompMapToGhidra.py | 61 ++----------- tools/ghidra_scripts/postprocess_symbol.py | 99 ++++++++++++++++++++++ 2 files changed, 107 insertions(+), 53 deletions(-) create mode 100644 tools/ghidra_scripts/postprocess_symbol.py diff --git a/tools/ghidra_scripts/DecompMapToGhidra.py b/tools/ghidra_scripts/DecompMapToGhidra.py index d17d97a7..95fc76ee 100644 --- a/tools/ghidra_scripts/DecompMapToGhidra.py +++ b/tools/ghidra_scripts/DecompMapToGhidra.py @@ -11,6 +11,7 @@ import re import demangle demangle.mode = 'demangle' +import postprocess_symbol from ghidra.app.util import NamespaceUtils from ghidra.program.model.symbol import SymbolUtilities @@ -36,54 +37,6 @@ allowed_sections = [ commit = None -def postprocess_demangled_name(demangled): - if demangled.startswith("vtable for "): - demangled = demangled[len("vtable for "):] + "::__vtable" - - if demangled.endswith(" const"): - demangled = demangled[:-len("const ")] - - thunk = False - guard = False - if demangled.startswith("non-virtual thunk to "): - thunk = True - demangled = demangled[len("non-virtual thunk to "):] - - if demangled.startswith("virtual thunk to "): - thunk = True - demangled = demangled[len("virtual thunk to "):] - - if demangled.startswith("guard variable for "): - guard = True - demangled = demangled[len("guard variable for "):] - - template_open = demangled.index("<") if "<" in demangled else None - first_space = demangled.index(" ") if " " in demangled else None - open_paren = demangled.index("(") if "(" in demangled else None - if template_open and first_space and open_paren and first_space < template_open and template_open < open_paren: - # this looks like a templated return type, so drop the return type - demangled = demangled[(first_space+1):] - - demangled = demangled.replace("(anonymous namespace)", "anonymous") - demangled = demangled.replace("operator ", "operator_") - demangled = demangled.replace(" ", "") - if ")::" in demangled: - # dFontMng_c::getFontPath(unsigned char)::TEMP_FONT_NAME - left = demangled.split("(")[0] - right = demangled.rsplit(")")[-1] - # dFontMng_c::getFontPath::TEMP_FONT_NAME - demangled = left + right - else: - demangled = demangled.split("(")[0] - - if thunk: - demangled += "_thunk" - if guard: - demangled += "_guard" - - return demangled - - def do_demangle(name): # try demangling if "__" in name: @@ -104,6 +57,7 @@ def do_demangle(name): default_sym_re = re.compile(".*_[0-9A-Fa-f]{8}$") + def parse_symbol(line): if "entry of" in line: return None @@ -198,15 +152,16 @@ def update_addr(addr, mangled_name, create_function=False): existing_symbol = getSymbolAt(addr) existing_name = existing_symbol.getName(True) if existing_symbol else None + allow_updating_comment = True + if comment["mangled"] and comment["mangled"] == mangled_name: - # skip updating - return + allow_updating_comment = False if not comment["mangled"] and not comment["original"] and existing_name and not default_sym_re.match(existing_name): comment["original"] = existing_name demangled_name = do_demangle(mangled_name) - postprocessed = postprocess_demangled_name(demangled_name) + name_list = postprocess_symbol.postprocess_demangled_name(demangled_name) comment["mangled"] = mangled_name comment["history"].append(commit + " " + demangled_name) @@ -219,7 +174,7 @@ def update_addr(addr, mangled_name, create_function=False): complete_plate_comment = "\n".join(complete_plate_comment) - name_list = [SymbolUtilities.replaceInvalidChars(part, True) for part in postprocessed.split("::")] + name_list = [SymbolUtilities.replaceInvalidChars(part, True) for part in name_list] symbol_str = name_list[-1] namespace = None if len(name_list) > 1: @@ -237,7 +192,7 @@ def update_addr(addr, mangled_name, create_function=False): if create_function: createFunction(addr, None) - if symbol_needs_history(mangled_name): + if symbol_needs_history(mangled_name) and allow_updating_comment: unit.setComment(PLATE_COMMENT, complete_plate_comment) diff --git a/tools/ghidra_scripts/postprocess_symbol.py b/tools/ghidra_scripts/postprocess_symbol.py new file mode 100644 index 00000000..c0b15541 --- /dev/null +++ b/tools/ghidra_scripts/postprocess_symbol.py @@ -0,0 +1,99 @@ +def postprocess_demangled_name(demangled): + """Turn a demangled name into a name that's usable as a Ghidra symbol name, + by stripping illegal characters and making sure namespaces are created as expected""" + if demangled.startswith("vtable for "): + demangled = demangled[len("vtable for "):] + "::__vtable" + + if demangled.endswith(" const"): + demangled = demangled[:-len("const ")] + + thunk = False + guard = False + if demangled.startswith("non-virtual thunk to "): + thunk = True + demangled = demangled[len("non-virtual thunk to "):] + + if demangled.startswith("virtual thunk to "): + thunk = True + demangled = demangled[len("virtual thunk to "):] + + if demangled.startswith("guard variable for "): + guard = True + demangled = demangled[len("guard variable for "):] + + demangled = demangled.replace("(anonymous namespace)", "anonymous") + demangled = demangled.replace("operator ", "operator_") + + template_open = demangled.index("<") if "<" in demangled else None + if template_open is not None: + first_space = demangled.rindex(" ", 0, template_open) if " " in demangled[:template_open] else None + open_paren = demangled.index("(") if "(" in demangled else None + if first_space and open_paren and first_space < template_open and template_open < open_paren: + # this looks like a templated return type, so drop the return type + demangled = demangled[(first_space+1):] + + demangled = demangled.replace(" ", "") + if ")::" in demangled: + # dFontMng_c::getFontPath(unsigned char)::TEMP_FONT_NAME + left = demangled.split("(")[0] + right = demangled.rsplit(")")[-1] + # dFontMng_c::getFontPath::TEMP_FONT_NAME + demangled = left + right + else: + demangled = demangled.split("(")[0] + + if thunk: + demangled += "_thunk" + if guard: + demangled += "_guard" + + name_list = [] + template_depth = 0 + # do not split namespace within template arguments + for part in demangled.split("::"): + if template_depth == 0: + name_list.append(part) + else: + name_list[-1] += "::" + part + + for char in part: + if char == "<": + template_depth += 1 + elif char == ">": + template_depth -= 1 + + return name_list + + +if __name__ == "__main__": + import demangle + demangle.mode = 'demangle' + + testcases = [ + [ + "@GUARD@getFontPath__10dFontMng_cFUc@TEMP_FONT_NAME", + ["dFontMng_c", "getFontPath", "TEMP_FONT_NAME_guard"], + ], + [ + "__vt__Q24mDvd42TUncompressInfo_c", + ["mDvd", "TUncompressInfo_c", "__vtable"], + ], + ["chaseT__4sLibFPiii_i", ["sLib", "chaseT"]], + [ + "CalcAnimationFVS>__Q34nw4r3g3d27@unnamed@g3d_resanmchr_cpp@FPCQ34nw4r3g3d16ResAnmChrFVSDataf_f", + [ + "nw4r", + "g3d", + "anonymous", + "CalcAnimationFVS>", + ], + ], + ["baseID_Turn<10sStateID_c>__Fv_RC10sStateID_c", ["baseID_Turn"]], + ] + + for mangled, postprocessed in testcases: + demangled = demangle.demangle(mangled) + actual = postprocess_demangled_name(demangled) + assert actual == postprocessed, str(demangled) + " -> " + str(actual) + + print("OK")