mirror of
https://github.com/zeldaret/ss
synced 2026-05-28 16:31:21 -04:00
Rework script
This commit is contained in:
@@ -6,6 +6,7 @@
|
||||
__pycache__
|
||||
.mypy_cache
|
||||
.cache/
|
||||
*$py.class
|
||||
|
||||
# Original files
|
||||
orig/*/*
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#Take symbols from the decomp's map files, treat them as authoritative, and import them in Ghidra.
|
||||
#Requires cwdemangle in the PATH
|
||||
#Must run `python .\configure.py --map && ninja` beforehand!
|
||||
#@author robojumper
|
||||
#@category GameCube/Wii
|
||||
@@ -9,7 +8,9 @@
|
||||
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
import demangle
|
||||
demangle.mode = 'demangle'
|
||||
|
||||
from ghidra.app.util import NamespaceUtils
|
||||
from ghidra.program.model.symbol import SymbolUtilities
|
||||
@@ -32,22 +33,75 @@ allowed_sections = [
|
||||
'.rodata',
|
||||
]
|
||||
|
||||
def demangle(name):
|
||||
commit = None
|
||||
|
||||
|
||||
def postprocess_demangled_name(demangled):
|
||||
if demangled.startswith("vtable for "):
|
||||
demangled = demangled[len("vtable for "):] + "::__vtable"
|
||||
|
||||
if demangled.endswith(" const"):
|
||||
demangled = demangled[:-len("const ")]
|
||||
|
||||
thunk = False
|
||||
guard = False
|
||||
if demangled.startswith("non-virtual thunk to "):
|
||||
thunk = True
|
||||
demangled = demangled[len("non-virtual thunk to "):]
|
||||
|
||||
if demangled.startswith("virtual thunk to "):
|
||||
thunk = True
|
||||
demangled = demangled[len("virtual thunk to "):]
|
||||
|
||||
if demangled.startswith("guard variable for "):
|
||||
guard = True
|
||||
demangled = demangled[len("guard variable for "):]
|
||||
|
||||
template_open = demangled.index("<") if "<" in demangled else None
|
||||
first_space = demangled.index(" ") if " " in demangled else None
|
||||
open_paren = demangled.index("(") if "(" in demangled else None
|
||||
if template_open and first_space and open_paren and first_space < template_open and template_open < open_paren:
|
||||
# this looks like a templated return type, so drop the return type
|
||||
demangled = demangled[(first_space+1):]
|
||||
|
||||
demangled = demangled.replace("(anonymous namespace)", "anonymous")
|
||||
demangled = demangled.replace("operator ", "operator_")
|
||||
demangled = demangled.replace(" ", "")
|
||||
if ")::" in demangled:
|
||||
# dFontMng_c::getFontPath(unsigned char)::TEMP_FONT_NAME
|
||||
left = demangled.split("(")[0]
|
||||
right = demangled.rsplit(")")[-1]
|
||||
# dFontMng_c::getFontPath::TEMP_FONT_NAME
|
||||
demangled = left + right
|
||||
else:
|
||||
demangled = demangled.split("(")[0]
|
||||
|
||||
if thunk:
|
||||
demangled += "_thunk"
|
||||
if guard:
|
||||
demangled += "_guard"
|
||||
|
||||
return demangled
|
||||
|
||||
|
||||
def do_demangle(name):
|
||||
# try demangling
|
||||
if "__" in name:
|
||||
try:
|
||||
output = subprocess.check_output(["cwdemangle", name], stderr=subprocess.STDOUT)
|
||||
return output.strip().split("(")[0]
|
||||
except subprocess.CalledProcessError as e:
|
||||
if "Failed to demangle symbol" not in e.output:
|
||||
raise
|
||||
output = demangle.demangle_try(name)
|
||||
return output.strip()
|
||||
except Exception:
|
||||
pass
|
||||
# otherwise we try to undo the effects of the original
|
||||
# ghidra -> symbols.txt export here
|
||||
if not "$" in name and not "arraydtor" in name and not name.startswith("__"):
|
||||
if "$" not in name and "arraydtor" not in name and not name.startswith("__"):
|
||||
name = name.replace("__", "::")
|
||||
name = name.replace("::::", "::__")
|
||||
return name
|
||||
|
||||
|
||||
default_sym_re = re.compile(".*_[0-9A-Fa-f]{8}$")
|
||||
|
||||
def parse_symbol(line):
|
||||
if "entry of" in line:
|
||||
return None
|
||||
@@ -65,7 +119,6 @@ def parse_symbol(line):
|
||||
'vAddr': int(vAddr, 16),
|
||||
}
|
||||
|
||||
default_sym_re = re.compile(".*_[0-9A-Za-z]{8}$")
|
||||
|
||||
def parse_map_file(file):
|
||||
lines = [line for line in file]
|
||||
@@ -77,7 +130,7 @@ def parse_map_file(file):
|
||||
section_name = line.split(' ')[0]
|
||||
if section_name in allowed_sections:
|
||||
sections[section_name] = []
|
||||
i += 4 # go to symbols
|
||||
i += 4 # go to symbols
|
||||
while lines[i].strip() != "":
|
||||
sym = parse_symbol(lines[i])
|
||||
if sym is not None:
|
||||
@@ -87,93 +140,151 @@ def parse_map_file(file):
|
||||
|
||||
return sections
|
||||
|
||||
|
||||
anon_static_re = re.compile("^@[0-9]+$")
|
||||
|
||||
|
||||
def symbol_needs_history(name):
|
||||
if anon_static_re.match(name) or "arraydtor" in name:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
# This script works incrementally by recording
|
||||
# the mangled name in a special plate comment.
|
||||
# If the mangled name is the same, we don't even bother
|
||||
# shelling out to cwdemangle, speeding up the whole process
|
||||
# quite a bit.
|
||||
mangled_prefix = "mangled-decomp-name-v1: "
|
||||
def update_addr(addr, mangled_name):
|
||||
mangled_prefix = "mangled-name"
|
||||
|
||||
# We also keep a history of the original Ghidra name and previous
|
||||
# decomp names in this plate comment, for future reference
|
||||
original_prefix = "original-name"
|
||||
previous_prefix = "full-name"
|
||||
|
||||
|
||||
def parse_comment(plate_comment):
|
||||
ret = {
|
||||
"other": [],
|
||||
"original": None,
|
||||
"history": [],
|
||||
"mangled": None,
|
||||
}
|
||||
|
||||
if plate_comment:
|
||||
for line in plate_comment.splitlines():
|
||||
if line.startswith(mangled_prefix):
|
||||
ret["mangled"] = line.split(" ", 1)[1].strip()
|
||||
elif line.startswith(original_prefix):
|
||||
ret["original"] = line.split(" ", 1)[1].strip()
|
||||
elif line.startswith(previous_prefix):
|
||||
ret["history"].append(line.split(" ", 1)[1].strip())
|
||||
else:
|
||||
ret["other"].append(line.strip())
|
||||
return ret
|
||||
|
||||
|
||||
def update_addr(addr, mangled_name, create_function=False):
|
||||
unit = listing.getCodeUnitAt(addr)
|
||||
if not unit:
|
||||
return
|
||||
|
||||
new_comment_line = mangled_prefix + mangled_name
|
||||
|
||||
comment = unit.getComment(PLATE_COMMENT)
|
||||
update_symbol = False
|
||||
new_comment = None
|
||||
if not comment:
|
||||
# no plate comment here, add one and set symbol
|
||||
update_symbol = True
|
||||
new_comment = [new_comment_line]
|
||||
comment_str = unit.getComment(PLATE_COMMENT)
|
||||
comment = parse_comment(comment_str)
|
||||
|
||||
existing_symbol = getSymbolAt(addr)
|
||||
existing_name = existing_symbol.getName(True) if existing_symbol else None
|
||||
|
||||
if comment["mangled"] and comment["mangled"] == mangled_name:
|
||||
# skip updating
|
||||
return
|
||||
|
||||
if not comment["mangled"] and not comment["original"] and existing_name and not default_sym_re.match(existing_name):
|
||||
comment["original"] = existing_name
|
||||
|
||||
demangled_name = do_demangle(mangled_name)
|
||||
postprocessed = postprocess_demangled_name(demangled_name)
|
||||
comment["mangled"] = mangled_name
|
||||
comment["history"].append(commit + " " + demangled_name)
|
||||
|
||||
complete_plate_comment = comment["other"]
|
||||
if comment["original"]:
|
||||
complete_plate_comment.append(original_prefix + " " + comment["original"])
|
||||
for h in comment["history"]:
|
||||
complete_plate_comment.append(previous_prefix + " " + h)
|
||||
complete_plate_comment.append(mangled_prefix + " " + mangled_name)
|
||||
|
||||
complete_plate_comment = "\n".join(complete_plate_comment)
|
||||
|
||||
name_list = [SymbolUtilities.replaceInvalidChars(part, True) for part in postprocessed.split("::")]
|
||||
symbol_str = name_list[-1]
|
||||
namespace = None
|
||||
if len(name_list) > 1:
|
||||
namespace_str = "::".join(name_list[:-1])
|
||||
namespace = NamespaceUtils.createNamespaceHierarchy(namespace_str, None, currentProgram, IMPORTED)
|
||||
|
||||
sym = getSymbolAt(addr)
|
||||
if namespace is None:
|
||||
namespace = currentProgram.getGlobalNamespace()
|
||||
if sym:
|
||||
sym.setNameAndNamespace(symbol_str, namespace, IMPORTED)
|
||||
else:
|
||||
comment_lines = comment.splitlines()
|
||||
if any (line.startswith(mangled_prefix) for line in comment_lines):
|
||||
# replace with new mangled name
|
||||
new_comment = []
|
||||
for line in comment_lines:
|
||||
if line.startswith(mangled_prefix) and new_comment_line not in line:
|
||||
update_symbol = True
|
||||
new_comment.append(new_comment_line)
|
||||
else:
|
||||
new_comment.append(line)
|
||||
createLabel(addr, symbol_str, namespace, True, IMPORTED)
|
||||
|
||||
else:
|
||||
# existing plate comment without symbol, append
|
||||
update_symbol = True
|
||||
new_comment = [comment, new_comment_line]
|
||||
if create_function:
|
||||
createFunction(addr, postprocessed)
|
||||
|
||||
if update_symbol:
|
||||
new_comment = '\n'.join(new_comment)
|
||||
demangled_name = demangle(mangled_name)
|
||||
if symbol_needs_history(mangled_name):
|
||||
unit.setComment(PLATE_COMMENT, complete_plate_comment)
|
||||
|
||||
# print(new_comment, demangled_name)
|
||||
|
||||
name_list = [SymbolUtilities.replaceInvalidChars(part, True) for part in demangled_name.split("::")]
|
||||
symbol_str = name_list[-1]
|
||||
namespace = None
|
||||
if len(name_list) > 1:
|
||||
namespace_str = "::".join(name_list[:-1])
|
||||
print(mangled_name)
|
||||
namespace = NamespaceUtils.createNamespaceHierarchy(namespace_str, None, currentProgram, IMPORTED)
|
||||
def get_section_names(file_name, build_dir):
|
||||
splits_path = build_dir.replace("build", "config")
|
||||
splits_path = os.path.join(splits_path, "rels", file_name, "splits.txt")
|
||||
section_names = []
|
||||
with open(splits_path, "rt") as file:
|
||||
for line in file:
|
||||
line = line.strip()
|
||||
if line == "":
|
||||
break
|
||||
elif line == "Sections:":
|
||||
continue
|
||||
else:
|
||||
section_names.append(line.split()[0])
|
||||
return section_names
|
||||
|
||||
sym = getSymbolAt(addr)
|
||||
if namespace is None:
|
||||
namespace = currentProgram.getGlobalNamespace()
|
||||
if sym:
|
||||
sym.setNameAndNamespace(symbol_str, namespace, IMPORTED)
|
||||
else:
|
||||
createLabel(addr, symbol_str, namespace, True, IMPORTED)
|
||||
unit.setComment(PLATE_COMMENT, new_comment)
|
||||
|
||||
def apply_symbols_map(symbols_map, file_name):
|
||||
def apply_symbols_map(symbols_map, file_name, build_dir):
|
||||
if file_name != "MAIN":
|
||||
section_names = get_section_names(file_name, build_dir)
|
||||
blocks = mem.getBlocks()
|
||||
blocks = [b for b in blocks if b.getName().startswith(file_name)]
|
||||
|
||||
for section, syms in symbols_map.items():
|
||||
for sym in syms:
|
||||
if file_name == "MAIN":
|
||||
# in the main dol, each symbol is loaded at a fixed address
|
||||
addr_obj = AF.getAddress("0x%08X" % sym["vAddr"])
|
||||
else:
|
||||
# REL sections can't be reliably identified
|
||||
if section != ".text":
|
||||
continue
|
||||
# in rels, every section is relocated indivdually, so treat
|
||||
index = section_names.index(section)
|
||||
block = blocks[index]
|
||||
# in rels, every section is relocated individually, so treat
|
||||
# this as an offset
|
||||
block_name = file_name + "_" + section + "0"
|
||||
block = mem.getBlock(block_name)
|
||||
addr_obj = block.getStart().add(sym["vAddr"])
|
||||
update_addr(addr_obj, sym["name"])
|
||||
is_text = section == ".text"
|
||||
update_addr(addr_obj, sym["name"], create_function=is_text)
|
||||
|
||||
|
||||
path = str(askDirectory("Program build directory (e.g. build/SOUE01)", "Import"))
|
||||
|
||||
commit = askString("Commit hash for symbol history", "Confirm")
|
||||
new_contents = None
|
||||
main_symbols = os.path.join(path, "main.elf.MAP")
|
||||
symbols_map = None
|
||||
with open(main_symbols, "rt") as file:
|
||||
symbols_map = parse_map_file(file)
|
||||
|
||||
apply_symbols_map(symbols_map, "MAIN")
|
||||
apply_symbols_map(symbols_map, "MAIN", path)
|
||||
|
||||
for rel_name in os.listdir(path):
|
||||
if rel_name.endswith("NP"):
|
||||
@@ -182,4 +293,4 @@ for rel_name in os.listdir(path):
|
||||
with open(rel_symbols, "rt") as file:
|
||||
symbols_map = parse_map_file(file)
|
||||
|
||||
apply_symbols_map(symbols_map, rel_name)
|
||||
apply_symbols_map(symbols_map, rel_name, path)
|
||||
|
||||
@@ -12,7 +12,7 @@ AF = currentProgram.getAddressFactory()
|
||||
mem = currentProgram.getMemory()
|
||||
listing = currentProgram.getListing()
|
||||
|
||||
sym_re = re.compile("(?:lbl|fn|FUN|DAT)_[0-9A-Fa-f_]+ = \.([a-z0-9]+):0x([0-9A-Fa-f]{8})(.*)\n")
|
||||
sym_re = re.compile("(?:lbl|fn|FUN|DAT)_[0-9A-Fa-f_]+ = \\.([a-z0-9]+):0x([0-9A-Fa-f]{8})(.*)\n")
|
||||
default_sym_re = re.compile(".*_[0-9A-Za-z]+$")
|
||||
|
||||
used_symbols = set()
|
||||
|
||||
@@ -0,0 +1,543 @@
|
||||
# https://gist.github.com/RootCubed/8f8102fe6cf4ed79a45f1dfe23020a06
|
||||
|
||||
# Demangler / Itanium remangler for the CodeWarrior ABI
|
||||
|
||||
# Adapted from the NVIDIA demangler script by Ninji
|
||||
# See https://gist.github.com/RootCubed/9ebecf21eec344f10164cdfabbf0bb41 (Python)
|
||||
# and https://gist.github.com/RootCubed/d7e2629f4576059853505b7931ffd105 (C++)
|
||||
# for those scripts
|
||||
|
||||
# Ported to Ghidra's Jython / Python 2.7 by robojumper
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import sys
|
||||
|
||||
mode = None
|
||||
verbose = False
|
||||
|
||||
def is_demangle():
|
||||
return mode == 'demangle'
|
||||
|
||||
"""
|
||||
The names of the types in the output
|
||||
First demangled, then remangled for Itanium
|
||||
"""
|
||||
names_mapping = {
|
||||
'v': ('void', 'v'),
|
||||
'b': ('bool', 'b'),
|
||||
'c': ('char', 'c'),
|
||||
's': ('short', 's'),
|
||||
'i': ('int', 'i'),
|
||||
'l': ('long', 'l'),
|
||||
'x': ('long long', 'x'),
|
||||
'Sc': ('signed char', 'a'),
|
||||
'Uc': ('unsigned char', 'h'),
|
||||
'Us': ('unsigned short', 't'),
|
||||
'Ui': ('unsigned int', 'j'),
|
||||
'Ul': ('unsigned long', 'm'),
|
||||
'Ux': ('unsigned long long', 'y'),
|
||||
'f': ('float', 'f'),
|
||||
'd': ('double', 'd'),
|
||||
'r': ('long double', 'e'),
|
||||
'w': ('wchar_t', 'w'),
|
||||
'e': ('...', 'z')
|
||||
}
|
||||
|
||||
"""
|
||||
The names of the methods in the output
|
||||
First demangled, then remangled for Itanium
|
||||
"""
|
||||
method_mapping = {
|
||||
'__dt': ('~$CLS$', 'D0'),
|
||||
'__ct': ('$CLS$', 'C1'),
|
||||
'__nw': ('operator new', 'nw'),
|
||||
'__nwa': ('operator new[]', 'na'),
|
||||
'__dl': ('operator delete', 'dl'),
|
||||
'__dla': ('operator delete[]', 'da'),
|
||||
'__pl': ('operator+', 'pl'),
|
||||
'__mi': ('operator-', 'mi'),
|
||||
'__ml': ('operator*', 'ml'),
|
||||
'__dv': ('operator/', 'dv'),
|
||||
'__md': ('operator%', 'rm'),
|
||||
'__er': ('operator^', 'eo'),
|
||||
'__ad': ('operator&', 'an'),
|
||||
'__or': ('operator|', 'or'),
|
||||
'__co': ('operator~', 'co'),
|
||||
'__nt': ('operator!', 'nt'),
|
||||
'__as': ('operator=', 'aS'),
|
||||
'__lt': ('operator<', 'lt'),
|
||||
'__gt': ('operator>', 'gt'),
|
||||
'__apl': ('operator+=', 'pL'),
|
||||
'__ami': ('operator-=', 'mI'),
|
||||
'__amu': ('operator*=', 'mL'),
|
||||
'__adv': ('operator/=', 'dV'),
|
||||
'__amd': ('operator%=', 'rM'),
|
||||
'__aer': ('operator^=', 'eO'),
|
||||
'__aad': ('operator&=', 'aN'),
|
||||
'__aor': ('operator|=', 'oR'),
|
||||
'__ls': ('operator<<', 'ls'),
|
||||
'__rs': ('operator>>', 'rs'),
|
||||
'__ars': ('operator>>=', 'rS'),
|
||||
'__als': ('operator<<=', 'lS'),
|
||||
'__eq': ('operator==', 'eq'),
|
||||
'__ne': ('operator!=', 'ne'),
|
||||
'__le': ('operator<=', 'le'),
|
||||
'__ge': ('operator>=', 'ge'),
|
||||
'__aa': ('operator&&', 'aa'),
|
||||
'__oo': ('operator||', 'oo'),
|
||||
'__pp': ('operator++', 'pp'),
|
||||
'__mm': ('operator--', 'mm'),
|
||||
'__cm': ('operator,', 'cm'),
|
||||
'__rm': ('operator->*', 'pm'),
|
||||
'__rf': ('operator->', 'pt'),
|
||||
'__cl': ('operator()', 'cl'),
|
||||
'__vc': ('operator[]', 'ix'),
|
||||
}
|
||||
|
||||
def parse_number(s, i):
|
||||
"""
|
||||
Parses a number starting at position i.
|
||||
Examples:
|
||||
parse_number('123ABC', 0) -> (123, 3)
|
||||
|
||||
Args:
|
||||
s (str): The input string to parse.
|
||||
i (int): The starting position in the input string.
|
||||
|
||||
Returns:
|
||||
Tuple[int, int]: The parsed number and the new position in the string.
|
||||
"""
|
||||
num = 0
|
||||
while s[i].isdigit():
|
||||
num = num * 10 + int(s[i])
|
||||
i += 1
|
||||
return num, i
|
||||
|
||||
def parse_typename(s, i):
|
||||
"""
|
||||
Fully processes a mangled typename starting at index i.
|
||||
Examples:
|
||||
(demangle) parse_typename('Q23ABC3DEF', 0) -> ('ABC::DEF', 10)
|
||||
(demangle) parse_typename('Q23ABC6DEF<c>', 0) -> ('ABC::DEF<char>', 13)
|
||||
(remangle) parse_typename('Q23ABC3DEF', 0) -> ('3ABC3DEF', 10)
|
||||
|
||||
Args:
|
||||
s (str): The input string.
|
||||
i (int): The starting index.
|
||||
is_toplevel (bool): Whether the type is a global-level type. Used for remangling.
|
||||
|
||||
Returns:
|
||||
Tuple[str, int]:The parsed typename and the new position in the string.
|
||||
"""
|
||||
if s[i] == 'Q':
|
||||
count = int(s[i + 1])
|
||||
i += 2
|
||||
bits = []
|
||||
for _ in range(count):
|
||||
size, i = parse_number(s, i)
|
||||
bits.append(resolve_templates(s[i:(i + size)], True))
|
||||
i += size
|
||||
if is_demangle():
|
||||
return '::'.join(bits), i
|
||||
else:
|
||||
return ''.join(bits), i
|
||||
else:
|
||||
size, i = parse_number(s, i)
|
||||
return resolve_templates(s[i:(i + size)], True), i + size
|
||||
|
||||
def join_modifiers(modifiers):
|
||||
"""
|
||||
Joins the list of modifiers into a single string.
|
||||
Modifiers are e.g. const, pointer, reference, etc.
|
||||
In a demangled string these are right-to-left (e.g. int const * - pointer to const int)
|
||||
whereas in a mangled string they are left-to-right (e.g. PKi - pointer to const int)
|
||||
|
||||
Args:
|
||||
modifiers (list[str]): The list of modifiers to join.
|
||||
|
||||
Returns:
|
||||
str: The joined string of modifiers.
|
||||
"""
|
||||
if is_demangle():
|
||||
return ''.join(modifiers[::-1])
|
||||
else:
|
||||
return ''.join(modifiers)
|
||||
|
||||
def parse_function(s, i, modifiers, name='', rettype_mode = 'show'):
|
||||
"""
|
||||
Parses a function from a demangled string.
|
||||
Examples:
|
||||
(demangle) parse_function('v_v', 0, ['*']) -> ('void (*) ()', 4)
|
||||
(demangle) parse_function('s_b', 0, ['&']) -> ('bool (&) (short)', 4)
|
||||
(remangle) parse_function('i_v', 0, ['*']) -> ('FviE', 4)
|
||||
|
||||
Args:
|
||||
s (str): The demangled string.
|
||||
i (int): The current index in the string.
|
||||
modifiers (list[str]): The list of modifiers.
|
||||
name (str): An identifier, if available. This is the "main" symbol name.
|
||||
rettype_mode (Literal['show', 'hide_in_demangle', 'remove']): How to handle the return type.
|
||||
|
||||
Returns:
|
||||
Tuple[str, int]: The transformed function signature and the new position in the string.
|
||||
"""
|
||||
# Parse the function args, return type handled later
|
||||
args = []
|
||||
while i < len(s) and s[i] != '_' and s[i] != '@':
|
||||
argtype, i = parse_type(s, i)
|
||||
args.append(argtype)
|
||||
|
||||
# Special case: const
|
||||
# Note that if the function is const, it will be the last modifier
|
||||
# because e.g. CPFv is a (const pointer) to a function
|
||||
const_str = ''
|
||||
if len(modifiers) > 0 and (modifiers[-1] == ' const' or modifiers[-1] == 'K'):
|
||||
const_str = ' const' if is_demangle() else 'K'
|
||||
modifiers.pop()
|
||||
|
||||
mod_str = join_modifiers(modifiers)
|
||||
|
||||
if is_demangle():
|
||||
if mod_str != '':
|
||||
mod_str = '(%s)' % mod_str.strip()
|
||||
arg_str = ', '.join(args) if args[0] != 'void' else ''
|
||||
func_str = '%s%s(%s)%s' % (name, mod_str, arg_str, const_str)
|
||||
if i >= len(s) or s[i] == '@':
|
||||
return func_str, i
|
||||
if rettype_mode == 'hide_in_demangle' or rettype_mode == 'remove':
|
||||
_, i = parse_type(s, i + 1)
|
||||
return func_str, i
|
||||
else:
|
||||
return parse_type(s, i + 1, [' ' + func_str])
|
||||
else:
|
||||
if i < len(s) and s[i] != '@':
|
||||
rettype, i = parse_type(s, i + 1)
|
||||
else:
|
||||
rettype, i = ('', i)
|
||||
if rettype_mode == 'remove':
|
||||
rettype = ''
|
||||
func_encoding = '%s%s' % (rettype, ''.join(args)) if name != '' else 'F%s%sE' % (rettype, ''.join(args))
|
||||
if name != '':
|
||||
func_encoding = 'N%s%sE%s' % (const_str, name, func_encoding)
|
||||
else:
|
||||
func_encoding = '%s%s' % (const_str, func_encoding)
|
||||
return mod_str + func_encoding, i
|
||||
|
||||
def parse_type(s, i, modifiers = None, name='', rettype_mode = 'show'):
|
||||
"""
|
||||
Parses a type from a string - main transformation function.
|
||||
|
||||
Args:
|
||||
s (str): The string to parse.
|
||||
i (int): The starting index.
|
||||
|
||||
Returns:
|
||||
Tuple[str, int]: The transformed type name and the new position in the string.
|
||||
"""
|
||||
|
||||
if modifiers == None:
|
||||
modifiers = []
|
||||
|
||||
# Type modifier is for unsigned/signed
|
||||
type_modifier = ''
|
||||
|
||||
while i < len(s) and s[i].isupper() and s[i] != 'Q':
|
||||
c = s[i]
|
||||
if c == 'C': # Const
|
||||
modifiers.append(' const' if is_demangle() else 'K')
|
||||
elif c == 'P': # Pointer
|
||||
modifiers.append('*' if is_demangle() else 'P')
|
||||
elif c == 'R': # Reference
|
||||
modifiers.append('&' if is_demangle() else 'R')
|
||||
elif c == 'V': # Volatile
|
||||
modifiers.append(' volatile' if is_demangle() else 'V')
|
||||
elif c == 'U' or c == 'S': # Unsigned/Signed
|
||||
type_modifier = c
|
||||
elif c == 'F': # Function, will return early
|
||||
return parse_function(s, i + 1, modifiers, name, rettype_mode)
|
||||
elif c == 'M': # Pointer-to-member
|
||||
class_name, i = parse_type(s, i + 1)
|
||||
|
||||
modifiers.append(' %s::*' % class_name if is_demangle() else 'M%s' % class_name)
|
||||
if s[i] == 'F':
|
||||
# CW includes the hidden pointer arguments in the PTMF signature
|
||||
# and also uses this to communicate constness of the PTMF
|
||||
if s[i:].startswith('FPCvPCv'):
|
||||
modifiers.append(' const' if is_demangle() else 'K')
|
||||
i += 7
|
||||
elif s[i:].startswith('FPCvPv'):
|
||||
i += 6
|
||||
if s[i] == '_':
|
||||
# small hack: simulate Fv_... by reusing the v from FPCvPCv/FPCvPv
|
||||
i -= 1
|
||||
return parse_function(s, i, modifiers)
|
||||
else:
|
||||
# pointer-to-member-nonfunction, continue parsing as normal
|
||||
continue
|
||||
elif c == 'A': # Array
|
||||
count, i = parse_number(s, i + 1)
|
||||
# Automatically skips past the '_' after the number before the next iteration
|
||||
if is_demangle():
|
||||
modstr = join_modifiers(modifiers)
|
||||
if re.search(r'\[.*\]$', modstr) != None:
|
||||
modifiers.insert(0, '[%d]' % count)
|
||||
elif modstr == '':
|
||||
modifiers.insert(0, ' [%d]' % count)
|
||||
else:
|
||||
# modifiers.insert(0, f' ({modstr}) [{count}]')
|
||||
modifiers = [' (%s) [%d]' % (modstr, count)]
|
||||
else:
|
||||
modifiers.append('A%d_' % count)
|
||||
else:
|
||||
raise Exception('Invalid type modifier "' + c + '"')
|
||||
i += 1
|
||||
|
||||
assert i < len(s)
|
||||
assert s[i].isalpha() or s[i].isdigit()
|
||||
|
||||
# Now we have either an identifier or a basic type
|
||||
|
||||
if s[i] == 'Q' or s[i].isdigit():
|
||||
type_name, i = parse_typename(s, i)
|
||||
if not is_demangle():
|
||||
type_name = 'N%sE' % type_name
|
||||
else:
|
||||
# Basic type - combine with type modifier and look up in mapping
|
||||
actual_type = type_modifier + s[i]
|
||||
if actual_type not in names_mapping:
|
||||
raise Exception('Invalid type "' + actual_type + '"')
|
||||
type_name = names_mapping[actual_type][0 if is_demangle() else 1]
|
||||
i += 1
|
||||
|
||||
mod_str = join_modifiers(modifiers)
|
||||
|
||||
if is_demangle():
|
||||
return '%s%s' % (type_name, mod_str), i
|
||||
else:
|
||||
return '%s%s' % (mod_str, type_name), i
|
||||
|
||||
def resolve_templates(s, remangle_add_length):
|
||||
"""
|
||||
Resolves template types in a type string.
|
||||
Examples:
|
||||
(demangle) resolve_templates('std<c>', false) -> 'std<char>'
|
||||
(remangle) resolve_templates('std<c>', false) -> 'stdIcE'
|
||||
(remangle) resolve_templates('std<c>', true) -> '3stdIcE'
|
||||
|
||||
Args:
|
||||
s (str): The string to resolve.
|
||||
remangle_add_length (bool): Whether to add the length prefix in remangling.
|
||||
|
||||
Returns:
|
||||
str: The resolved string.
|
||||
"""
|
||||
begin_pos = s.find('<')
|
||||
if begin_pos == -1:
|
||||
if re.match(r'^@unnamed@.+@$', s) != None:
|
||||
if is_demangle():
|
||||
# name.split('@')[2] contains the name of the file the anonymous namespace is in,
|
||||
# but we lose that information here since we follow c++filt's behavior.
|
||||
return '(anonymous namespace)'
|
||||
else:
|
||||
return '12_GLOBAL__N_1'
|
||||
unnamed_type_m = re.match(r'^@class\$(\d*).+$', s)
|
||||
if unnamed_type_m != None:
|
||||
typenum = int(unnamed_type_m.group(1)) if unnamed_type_m.group(1) != '' else -1
|
||||
if is_demangle():
|
||||
return '{unnamed type#%d}' % (typenum + 2)
|
||||
else:
|
||||
return 'Ut%s_' % (str(typenum) if typenum > -1 else '')
|
||||
if not is_demangle() and remangle_add_length:
|
||||
return '%d%s' % (len(s), s)
|
||||
return s
|
||||
template_str = ''
|
||||
i = begin_pos + 1
|
||||
while i < len(s):
|
||||
if s[i] == ',':
|
||||
if is_demangle():
|
||||
template_str += ', '
|
||||
i += 1
|
||||
continue
|
||||
if s[i] == '>':
|
||||
break
|
||||
elif re.match(r'[-\d]+[>,]', s[i:]) != None:
|
||||
# Integer literal
|
||||
literal = re.match(r'[-\d]+', s[i:])[0]
|
||||
template_str += literal if is_demangle() else 'XLi%sEE' % literal.replace('-', 'n')
|
||||
i += len(literal)
|
||||
else:
|
||||
type, i = parse_type(s, i)
|
||||
template_str += type
|
||||
if is_demangle():
|
||||
template_str = '<%s>' % template_str
|
||||
# replicate c++filt behavior
|
||||
if template_str[-2:] == '>>':
|
||||
template_str = template_str[:-1] + ' >'
|
||||
return s[0:begin_pos] + template_str
|
||||
else:
|
||||
if remangle_add_length:
|
||||
return str(begin_pos) + s[0:begin_pos] + ('I%sE' % template_str)
|
||||
return s[0:begin_pos] + ('I%sE' % template_str)
|
||||
|
||||
def demangle(s):
|
||||
"""
|
||||
Demangles a mangled symbol name.
|
||||
"""
|
||||
|
||||
at_sym = ''
|
||||
thunk_offsets = []
|
||||
m = re.match(r'^@([^@]+)@(.+)$', s)
|
||||
if m != None:
|
||||
m_thunk = re.match(r'^@(\d+)(?:@(\d+))?@(.+)$', s)
|
||||
if m_thunk != None:
|
||||
thunk_offsets = [int(m_thunk.group(1))]
|
||||
if m_thunk.group(2) != None:
|
||||
thunk_offsets.append(int(m_thunk.group(2)))
|
||||
s = m_thunk.group(3)
|
||||
else:
|
||||
at_sym = m.group(1)
|
||||
if at_sym not in ['LOCAL', 'GUARD', 'STRING']:
|
||||
raise Exception('Invalid symbol name "' + s + '"')
|
||||
s = m.group(2)
|
||||
|
||||
template_depth = 0
|
||||
last_possible_end = -1
|
||||
for i in range(1, len(s)):
|
||||
if s[i] == '<':
|
||||
template_depth += 1
|
||||
elif s[i] == '>':
|
||||
template_depth -= 1
|
||||
if template_depth == 0 and i + 2 < len(s) and s[i:i + 2] == '__' and s[i + 2] in 'CFQ0123456789':
|
||||
last_possible_end = i
|
||||
break
|
||||
if last_possible_end == -1:
|
||||
return s
|
||||
|
||||
i = last_possible_end
|
||||
|
||||
method, remainder = s[:i], s[i + 2:]
|
||||
if remainder[0] == 'F':
|
||||
# Global function without class
|
||||
class_name = ''
|
||||
i = 0
|
||||
else:
|
||||
class_name, i = parse_typename(remainder, 0)
|
||||
|
||||
if '<' in method:
|
||||
template_start = method.find('<')
|
||||
pre_template, template = method[:template_start], method[template_start:]
|
||||
resolved_templates = resolve_templates(template, False)
|
||||
else:
|
||||
pre_template, resolved_templates = method, ''
|
||||
|
||||
if pre_template in ['__ct', '__dt']:
|
||||
rettype_mode = 'remove'
|
||||
elif at_sym != '':
|
||||
rettype_mode = 'hide_in_demangle'
|
||||
else:
|
||||
rettype_mode = 'show'
|
||||
|
||||
if method == '__vt':
|
||||
return 'vtable for %s' % class_name if is_demangle() else '_ZTVN%sE' % class_name
|
||||
elif method.startswith('__op'):
|
||||
# Use method because the type might contain templates
|
||||
cv_type_name, _ = parse_type(method[4:], 0)
|
||||
pre_template = 'operator %s' % cv_type_name if is_demangle() else 'cv%s' % cv_type_name
|
||||
# __op cannot be templated
|
||||
resolved_templates = ''
|
||||
elif pre_template in method_mapping:
|
||||
pre_template = method_mapping[pre_template][0 if is_demangle() else 1]
|
||||
if is_demangle():
|
||||
# __ct should use the template of the function, not of the parent class
|
||||
last_class_name = re.sub(r'<.+>', '', class_name).split('::')[-1]
|
||||
pre_template = pre_template.replace('$CLS$', last_class_name)
|
||||
else:
|
||||
if not is_demangle():
|
||||
pre_template = '%d%s' % (len(pre_template), pre_template)
|
||||
|
||||
method = '%s%s' % (pre_template, resolved_templates)
|
||||
|
||||
if is_demangle():
|
||||
demangled = '::'.join(filter(None, [class_name, method]))
|
||||
else:
|
||||
demangled = class_name + method
|
||||
|
||||
if i < len(remainder):
|
||||
demangled, i = parse_type(remainder, i, name=demangled, rettype_mode=rettype_mode)
|
||||
elif not is_demangle():
|
||||
demangled = 'N%sE' % demangled
|
||||
|
||||
if i < len(remainder) and remainder[i] == '@' and at_sym in ['LOCAL', 'GUARD']:
|
||||
subs = remainder[i + 1:].split('@')
|
||||
local_sym_name = subs[0]
|
||||
local_sym_extra = ('_' + subs[1]) if len(subs) > 1 else ''
|
||||
if not is_demangle():
|
||||
local_sym_name = str(len(local_sym_name)) + local_sym_name + local_sym_extra
|
||||
elif at_sym == 'GUARD' and i >= len(remainder):
|
||||
local_sym_name = method
|
||||
elif at_sym == 'STRING' and i < len(remainder) and remainder[i] == '@' and not is_demangle():
|
||||
local_sym_name = '_' + remainder[i + 1]
|
||||
else:
|
||||
local_sym_name = ''
|
||||
|
||||
if is_demangle():
|
||||
if local_sym_name != '':
|
||||
demangled += '::%s' % local_sym_name
|
||||
|
||||
# c++filt removes spaces in (* <symbol name>) -> (*<symbol name>), try to replicate this
|
||||
while True:
|
||||
m = re.search(r'\((?:[*&]|const| )+ (\w+.+)$', demangled)
|
||||
if m == None or m.group(1).startswith('const'):
|
||||
break
|
||||
demangled = demangled[:m.start(1) - 1] + m.group(1)
|
||||
|
||||
if at_sym == 'GUARD':
|
||||
return 'guard variable for %s' % demangled
|
||||
elif at_sym == 'STRING':
|
||||
return '%s::string literal' % demangled
|
||||
elif len(thunk_offsets) > 0:
|
||||
thunk_type = 'virtual' if len(thunk_offsets) == 2 else 'non-virtual'
|
||||
return '%s thunk to %s' % (thunk_type, demangled)
|
||||
else:
|
||||
return demangled
|
||||
else:
|
||||
if len(thunk_offsets) == 1:
|
||||
demangled = 'Th%d_%s' % (thunk_offsets[0], demangled)
|
||||
elif len(thunk_offsets) == 2:
|
||||
demangled = 'Tv%d_n%d_%s' % (thunk_offsets[0], thunk_offsets[1], demangled)
|
||||
if at_sym == 'LOCAL':
|
||||
demangled = 'Z%sE%s' % (demangled, local_sym_name)
|
||||
if at_sym == 'GUARD':
|
||||
demangled = 'GVZ%sE%s' % (demangled, local_sym_name)
|
||||
if at_sym == 'STRING':
|
||||
demangled = 'Z%sEs%s' % (demangled, local_sym_name)
|
||||
return '_Z%s' % demangled
|
||||
|
||||
def demangle_try(s):
|
||||
try:
|
||||
return demangle(s)
|
||||
except Exception as e:
|
||||
sys.stderr.write('Demangler error: ' + str(e) + '\n')
|
||||
raise e
|
||||
|
||||
def main():
|
||||
global mode
|
||||
global verbose
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('symbol', type=str, nargs='?')
|
||||
parser.add_argument('-m', '--mode', choices=['demangle', 'remangle_itanium'], required=True)
|
||||
parser.add_argument('-v', '--verbose', action='store_true', default=False)
|
||||
args = parser.parse_args()
|
||||
mode = args.mode
|
||||
verbose = args.verbose
|
||||
if args.symbol is None:
|
||||
while True:
|
||||
sym = input()
|
||||
print(demangle_try(sym))
|
||||
else:
|
||||
print(demangle_try(args.symbol))
|
||||
return
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user