Fix Ghidra symbol postprocessing (#125)

* Fix Ghidra symbol postprocessing

* Actually fix namespacing
This commit is contained in:
robojumper
2025-03-16 17:15:52 +01:00
committed by GitHub
parent 7400f6c66a
commit eb2943d07e
2 changed files with 107 additions and 53 deletions
+8 -53
View File
@@ -11,6 +11,7 @@ import re
import demangle
demangle.mode = 'demangle'
import postprocess_symbol
from ghidra.app.util import NamespaceUtils
from ghidra.program.model.symbol import SymbolUtilities
@@ -36,54 +37,6 @@ allowed_sections = [
commit = None
def postprocess_demangled_name(demangled):
if demangled.startswith("vtable for "):
demangled = demangled[len("vtable for "):] + "::__vtable"
if demangled.endswith(" const"):
demangled = demangled[:-len("const ")]
thunk = False
guard = False
if demangled.startswith("non-virtual thunk to "):
thunk = True
demangled = demangled[len("non-virtual thunk to "):]
if demangled.startswith("virtual thunk to "):
thunk = True
demangled = demangled[len("virtual thunk to "):]
if demangled.startswith("guard variable for "):
guard = True
demangled = demangled[len("guard variable for "):]
template_open = demangled.index("<") if "<" in demangled else None
first_space = demangled.index(" ") if " " in demangled else None
open_paren = demangled.index("(") if "(" in demangled else None
if template_open and first_space and open_paren and first_space < template_open and template_open < open_paren:
# this looks like a templated return type, so drop the return type
demangled = demangled[(first_space+1):]
demangled = demangled.replace("(anonymous namespace)", "anonymous")
demangled = demangled.replace("operator ", "operator_")
demangled = demangled.replace(" ", "")
if ")::" in demangled:
# dFontMng_c::getFontPath(unsigned char)::TEMP_FONT_NAME
left = demangled.split("(")[0]
right = demangled.rsplit(")")[-1]
# dFontMng_c::getFontPath::TEMP_FONT_NAME
demangled = left + right
else:
demangled = demangled.split("(")[0]
if thunk:
demangled += "_thunk"
if guard:
demangled += "_guard"
return demangled
def do_demangle(name):
# try demangling
if "__" in name:
@@ -104,6 +57,7 @@ def do_demangle(name):
default_sym_re = re.compile(".*_[0-9A-Fa-f]{8}$")
def parse_symbol(line):
if "entry of" in line:
return None
@@ -198,15 +152,16 @@ def update_addr(addr, mangled_name, create_function=False):
existing_symbol = getSymbolAt(addr)
existing_name = existing_symbol.getName(True) if existing_symbol else None
allow_updating_comment = True
if comment["mangled"] and comment["mangled"] == mangled_name:
# skip updating
return
allow_updating_comment = False
if not comment["mangled"] and not comment["original"] and existing_name and not default_sym_re.match(existing_name):
comment["original"] = existing_name
demangled_name = do_demangle(mangled_name)
postprocessed = postprocess_demangled_name(demangled_name)
name_list = postprocess_symbol.postprocess_demangled_name(demangled_name)
comment["mangled"] = mangled_name
comment["history"].append(commit + " " + demangled_name)
@@ -219,7 +174,7 @@ def update_addr(addr, mangled_name, create_function=False):
complete_plate_comment = "\n".join(complete_plate_comment)
name_list = [SymbolUtilities.replaceInvalidChars(part, True) for part in postprocessed.split("::")]
name_list = [SymbolUtilities.replaceInvalidChars(part, True) for part in name_list]
symbol_str = name_list[-1]
namespace = None
if len(name_list) > 1:
@@ -237,7 +192,7 @@ def update_addr(addr, mangled_name, create_function=False):
if create_function:
createFunction(addr, None)
if symbol_needs_history(mangled_name):
if symbol_needs_history(mangled_name) and allow_updating_comment:
unit.setComment(PLATE_COMMENT, complete_plate_comment)
@@ -0,0 +1,99 @@
def postprocess_demangled_name(demangled):
"""Turn a demangled name into a name that's usable as a Ghidra symbol name,
by stripping illegal characters and making sure namespaces are created as expected"""
if demangled.startswith("vtable for "):
demangled = demangled[len("vtable for "):] + "::__vtable"
if demangled.endswith(" const"):
demangled = demangled[:-len("const ")]
thunk = False
guard = False
if demangled.startswith("non-virtual thunk to "):
thunk = True
demangled = demangled[len("non-virtual thunk to "):]
if demangled.startswith("virtual thunk to "):
thunk = True
demangled = demangled[len("virtual thunk to "):]
if demangled.startswith("guard variable for "):
guard = True
demangled = demangled[len("guard variable for "):]
demangled = demangled.replace("(anonymous namespace)", "anonymous")
demangled = demangled.replace("operator ", "operator_")
template_open = demangled.index("<") if "<" in demangled else None
if template_open is not None:
first_space = demangled.rindex(" ", 0, template_open) if " " in demangled[:template_open] else None
open_paren = demangled.index("(") if "(" in demangled else None
if first_space and open_paren and first_space < template_open and template_open < open_paren:
# this looks like a templated return type, so drop the return type
demangled = demangled[(first_space+1):]
demangled = demangled.replace(" ", "")
if ")::" in demangled:
# dFontMng_c::getFontPath(unsigned char)::TEMP_FONT_NAME
left = demangled.split("(")[0]
right = demangled.rsplit(")")[-1]
# dFontMng_c::getFontPath::TEMP_FONT_NAME
demangled = left + right
else:
demangled = demangled.split("(")[0]
if thunk:
demangled += "_thunk"
if guard:
demangled += "_guard"
name_list = []
template_depth = 0
# do not split namespace within template arguments
for part in demangled.split("::"):
if template_depth == 0:
name_list.append(part)
else:
name_list[-1] += "::" + part
for char in part:
if char == "<":
template_depth += 1
elif char == ">":
template_depth -= 1
return name_list
if __name__ == "__main__":
import demangle
demangle.mode = 'demangle'
testcases = [
[
"@GUARD@getFontPath__10dFontMng_cFUc@TEMP_FONT_NAME",
["dFontMng_c", "getFontPath", "TEMP_FONT_NAME_guard"],
],
[
"__vt__Q24mDvd42TUncompressInfo_c<Q23EGG15StreamDecompSZS>",
["mDvd", "TUncompressInfo_c<EGG::StreamDecompSZS>", "__vtable"],
],
["chaseT<i>__4sLibFPiii_i", ["sLib", "chaseT<int>"]],
[
"CalcAnimationFVS<Q44nw4r3g3d27@unnamed@g3d_resanmchr_cpp@46CAnmFmtTraits<Q34nw4r3g3d18ResAnmChrFVS96Data>>__Q34nw4r3g3d27@unnamed@g3d_resanmchr_cpp@FPCQ34nw4r3g3d16ResAnmChrFVSDataf_f",
[
"nw4r",
"g3d",
"anonymous",
"CalcAnimationFVS<nw4r::g3d::anonymous::CAnmFmtTraits<nw4r::g3d::ResAnmChrFVS96Data>>",
],
],
["baseID_Turn<10sStateID_c>__Fv_RC10sStateID_c", ["baseID_Turn<sStateID_c>"]],
]
for mangled, postprocessed in testcases:
demangled = demangle.demangle(mangled)
actual = postprocess_demangled_name(demangled)
assert actual == postprocessed, str(demangled) + " -> " + str(actual)
print("OK")