mirror of
https://github.com/zeldaret/ss
synced 2026-05-23 15:01:38 -04:00
546 lines
19 KiB
Python
546 lines
19 KiB
Python
# https://gist.github.com/RootCubed/8f8102fe6cf4ed79a45f1dfe23020a06
|
|
|
|
# Demangler / Itanium remangler for the CodeWarrior ABI
|
|
|
|
# Adapted from the NVIDIA demangler script by Ninji
|
|
# See https://gist.github.com/RootCubed/9ebecf21eec344f10164cdfabbf0bb41 (Python)
|
|
# and https://gist.github.com/RootCubed/d7e2629f4576059853505b7931ffd105 (C++)
|
|
# for those scripts
|
|
|
|
# Ported to Ghidra's Jython / Python 2.7 by robojumper
|
|
|
|
import argparse
|
|
import re
|
|
import sys
|
|
|
|
mode = None
|
|
verbose = False
|
|
|
|
def is_demangle():
|
|
return mode == 'demangle'
|
|
|
|
"""
|
|
The names of the types in the output
|
|
First demangled, then remangled for Itanium
|
|
"""
|
|
names_mapping = {
|
|
'v': ('void', 'v'),
|
|
'b': ('bool', 'b'),
|
|
'c': ('char', 'c'),
|
|
's': ('short', 's'),
|
|
'i': ('int', 'i'),
|
|
'l': ('long', 'l'),
|
|
'x': ('long long', 'x'),
|
|
'Sc': ('signed char', 'a'),
|
|
'Uc': ('unsigned char', 'h'),
|
|
'Us': ('unsigned short', 't'),
|
|
'Ui': ('unsigned int', 'j'),
|
|
'Ul': ('unsigned long', 'm'),
|
|
'Ux': ('unsigned long long', 'y'),
|
|
'f': ('float', 'f'),
|
|
'd': ('double', 'd'),
|
|
'r': ('long double', 'e'),
|
|
'w': ('wchar_t', 'w'),
|
|
'e': ('...', 'z')
|
|
}
|
|
|
|
"""
|
|
The names of the methods in the output
|
|
First demangled, then remangled for Itanium
|
|
"""
|
|
method_mapping = {
|
|
'__dt': ('~$CLS$', 'D0'),
|
|
'__ct': ('$CLS$', 'C1'),
|
|
'__nw': ('operator new', 'nw'),
|
|
'__nwa': ('operator new[]', 'na'),
|
|
'__dl': ('operator delete', 'dl'),
|
|
'__dla': ('operator delete[]', 'da'),
|
|
'__pl': ('operator+', 'pl'),
|
|
'__mi': ('operator-', 'mi'),
|
|
'__ml': ('operator*', 'ml'),
|
|
'__dv': ('operator/', 'dv'),
|
|
'__md': ('operator%', 'rm'),
|
|
'__er': ('operator^', 'eo'),
|
|
'__ad': ('operator&', 'an'),
|
|
'__or': ('operator|', 'or'),
|
|
'__co': ('operator~', 'co'),
|
|
'__nt': ('operator!', 'nt'),
|
|
'__as': ('operator=', 'aS'),
|
|
'__lt': ('operator<', 'lt'),
|
|
'__gt': ('operator>', 'gt'),
|
|
'__apl': ('operator+=', 'pL'),
|
|
'__ami': ('operator-=', 'mI'),
|
|
'__amu': ('operator*=', 'mL'),
|
|
'__adv': ('operator/=', 'dV'),
|
|
'__amd': ('operator%=', 'rM'),
|
|
'__aer': ('operator^=', 'eO'),
|
|
'__aad': ('operator&=', 'aN'),
|
|
'__aor': ('operator|=', 'oR'),
|
|
'__ls': ('operator<<', 'ls'),
|
|
'__rs': ('operator>>', 'rs'),
|
|
'__ars': ('operator>>=', 'rS'),
|
|
'__als': ('operator<<=', 'lS'),
|
|
'__eq': ('operator==', 'eq'),
|
|
'__ne': ('operator!=', 'ne'),
|
|
'__le': ('operator<=', 'le'),
|
|
'__ge': ('operator>=', 'ge'),
|
|
'__aa': ('operator&&', 'aa'),
|
|
'__oo': ('operator||', 'oo'),
|
|
'__pp': ('operator++', 'pp'),
|
|
'__mm': ('operator--', 'mm'),
|
|
'__cm': ('operator,', 'cm'),
|
|
'__rm': ('operator->*', 'pm'),
|
|
'__rf': ('operator->', 'pt'),
|
|
'__cl': ('operator()', 'cl'),
|
|
'__vc': ('operator[]', 'ix'),
|
|
}
|
|
|
|
def parse_number(s, i):
|
|
"""
|
|
Parses a number starting at position i.
|
|
Examples:
|
|
parse_number('123ABC', 0) -> (123, 3)
|
|
|
|
Args:
|
|
s (str): The input string to parse.
|
|
i (int): The starting position in the input string.
|
|
|
|
Returns:
|
|
Tuple[int, int]: The parsed number and the new position in the string.
|
|
"""
|
|
num = 0
|
|
while s[i].isdigit():
|
|
num = num * 10 + int(s[i])
|
|
i += 1
|
|
return num, i
|
|
|
|
def parse_typename(s, i):
|
|
"""
|
|
Fully processes a mangled typename starting at index i.
|
|
Examples:
|
|
(demangle) parse_typename('Q23ABC3DEF', 0) -> ('ABC::DEF', 10)
|
|
(demangle) parse_typename('Q23ABC6DEF<c>', 0) -> ('ABC::DEF<char>', 13)
|
|
(remangle) parse_typename('Q23ABC3DEF', 0) -> ('3ABC3DEF', 10)
|
|
|
|
Args:
|
|
s (str): The input string.
|
|
i (int): The starting index.
|
|
is_toplevel (bool): Whether the type is a global-level type. Used for remangling.
|
|
|
|
Returns:
|
|
Tuple[str, int]:The parsed typename and the new position in the string.
|
|
"""
|
|
if s[i] == 'Q':
|
|
count = int(s[i + 1])
|
|
i += 2
|
|
bits = []
|
|
for _ in range(count):
|
|
size, i = parse_number(s, i)
|
|
bits.append(resolve_templates(s[i:(i + size)], True))
|
|
i += size
|
|
if is_demangle():
|
|
return '::'.join(bits), i
|
|
else:
|
|
return ''.join(bits), i
|
|
else:
|
|
size, i = parse_number(s, i)
|
|
return resolve_templates(s[i:(i + size)], True), i + size
|
|
|
|
def join_modifiers(modifiers):
|
|
"""
|
|
Joins the list of modifiers into a single string.
|
|
Modifiers are e.g. const, pointer, reference, etc.
|
|
In a demangled string these are right-to-left (e.g. int const * - pointer to const int)
|
|
whereas in a mangled string they are left-to-right (e.g. PKi - pointer to const int)
|
|
|
|
Args:
|
|
modifiers (list[str]): The list of modifiers to join.
|
|
|
|
Returns:
|
|
str: The joined string of modifiers.
|
|
"""
|
|
if is_demangle():
|
|
return ''.join(modifiers[::-1])
|
|
else:
|
|
return ''.join(modifiers)
|
|
|
|
def parse_function(s, i, modifiers, name='', rettype_mode = 'show'):
|
|
"""
|
|
Parses a function from a demangled string.
|
|
Examples:
|
|
(demangle) parse_function('v_v', 0, ['*']) -> ('void (*) ()', 4)
|
|
(demangle) parse_function('s_b', 0, ['&']) -> ('bool (&) (short)', 4)
|
|
(remangle) parse_function('i_v', 0, ['*']) -> ('FviE', 4)
|
|
|
|
Args:
|
|
s (str): The demangled string.
|
|
i (int): The current index in the string.
|
|
modifiers (list[str]): The list of modifiers.
|
|
name (str): An identifier, if available. This is the "main" symbol name.
|
|
rettype_mode (Literal['show', 'hide_in_demangle', 'remove']): How to handle the return type.
|
|
|
|
Returns:
|
|
Tuple[str, int]: The transformed function signature and the new position in the string.
|
|
"""
|
|
# Parse the function args, return type handled later
|
|
args = []
|
|
while i < len(s) and s[i] != '_' and s[i] != '@':
|
|
argtype, i = parse_type(s, i)
|
|
args.append(argtype)
|
|
|
|
# Special case: const
|
|
# Note that if the function is const, it will be the last modifier
|
|
# because e.g. CPFv is a (const pointer) to a function
|
|
const_str = ''
|
|
if len(modifiers) > 0 and (modifiers[-1] == ' const' or modifiers[-1] == 'K'):
|
|
const_str = ' const' if is_demangle() else 'K'
|
|
modifiers.pop()
|
|
|
|
mod_str = join_modifiers(modifiers)
|
|
|
|
if is_demangle():
|
|
if mod_str != '':
|
|
mod_str = '(%s)' % mod_str.strip()
|
|
arg_str = ', '.join(args) if args[0] != 'void' else ''
|
|
func_str = '%s%s(%s)%s' % (name, mod_str, arg_str, const_str)
|
|
if i >= len(s) or s[i] == '@':
|
|
return func_str, i
|
|
if rettype_mode == 'hide_in_demangle' or rettype_mode == 'remove':
|
|
_, i = parse_type(s, i + 1)
|
|
return func_str, i
|
|
else:
|
|
return parse_type(s, i + 1, [' ' + func_str])
|
|
else:
|
|
if i < len(s) and s[i] != '@':
|
|
rettype, i = parse_type(s, i + 1)
|
|
else:
|
|
rettype, i = ('', i)
|
|
if rettype_mode == 'remove':
|
|
rettype = ''
|
|
func_encoding = '%s%s' % (rettype, ''.join(args)) if name != '' else 'F%s%sE' % (rettype, ''.join(args))
|
|
if name != '':
|
|
func_encoding = 'N%s%sE%s' % (const_str, name, func_encoding)
|
|
else:
|
|
func_encoding = '%s%s' % (const_str, func_encoding)
|
|
return mod_str + func_encoding, i
|
|
|
|
def parse_type(s, i, modifiers = None, name='', rettype_mode = 'show'):
|
|
"""
|
|
Parses a type from a string - main transformation function.
|
|
|
|
Args:
|
|
s (str): The string to parse.
|
|
i (int): The starting index.
|
|
|
|
Returns:
|
|
Tuple[str, int]: The transformed type name and the new position in the string.
|
|
"""
|
|
|
|
if modifiers == None:
|
|
modifiers = []
|
|
|
|
# Type modifier is for unsigned/signed
|
|
type_modifier = ''
|
|
|
|
while i < len(s) and s[i].isupper() and s[i] != 'Q':
|
|
c = s[i]
|
|
if c == 'C': # Const
|
|
modifiers.append(' const' if is_demangle() else 'K')
|
|
elif c == 'P': # Pointer
|
|
modifiers.append('*' if is_demangle() else 'P')
|
|
elif c == 'R': # Reference
|
|
modifiers.append('&' if is_demangle() else 'R')
|
|
elif c == 'V': # Volatile
|
|
modifiers.append(' volatile' if is_demangle() else 'V')
|
|
elif c == 'U' or c == 'S': # Unsigned/Signed
|
|
type_modifier = c
|
|
elif c == 'F': # Function, will return early
|
|
return parse_function(s, i + 1, modifiers, name, rettype_mode)
|
|
elif c == 'M': # Pointer-to-member
|
|
class_name, i = parse_type(s, i + 1)
|
|
|
|
modifiers.append(' %s::*' % class_name if is_demangle() else 'M%s' % class_name)
|
|
if s[i] == 'F':
|
|
# CW includes the hidden pointer arguments in the PTMF signature
|
|
# and also uses this to communicate constness of the PTMF
|
|
if s[i:].startswith('FPCvPCv'):
|
|
modifiers.append(' const' if is_demangle() else 'K')
|
|
i += 7
|
|
elif s[i:].startswith('FPCvPv'):
|
|
i += 6
|
|
if s[i] == '_':
|
|
# small hack: simulate Fv_... by reusing the v from FPCvPCv/FPCvPv
|
|
i -= 1
|
|
return parse_function(s, i, modifiers)
|
|
else:
|
|
# pointer-to-member-nonfunction, continue parsing as normal
|
|
continue
|
|
elif c == 'A': # Array
|
|
count, i = parse_number(s, i + 1)
|
|
# Automatically skips past the '_' after the number before the next iteration
|
|
if is_demangle():
|
|
modstr = join_modifiers(modifiers)
|
|
if re.search(r'\[.*\]$', modstr) != None:
|
|
modifiers.insert(0, '[%d]' % count)
|
|
elif modstr == '':
|
|
modifiers.insert(0, ' [%d]' % count)
|
|
else:
|
|
# modifiers.insert(0, f' ({modstr}) [{count}]')
|
|
modifiers = [' (%s) [%d]' % (modstr, count)]
|
|
else:
|
|
modifiers.append('A%d_' % count)
|
|
else:
|
|
raise Exception('Invalid type modifier "' + c + '"')
|
|
i += 1
|
|
|
|
assert i < len(s)
|
|
assert s[i].isalpha() or s[i].isdigit()
|
|
|
|
# Now we have either an identifier or a basic type
|
|
|
|
if s[i] == 'Q' or s[i].isdigit():
|
|
type_name, i = parse_typename(s, i)
|
|
if not is_demangle():
|
|
type_name = 'N%sE' % type_name
|
|
else:
|
|
# Basic type - combine with type modifier and look up in mapping
|
|
actual_type = type_modifier + s[i]
|
|
if actual_type not in names_mapping:
|
|
raise Exception('Invalid type "' + actual_type + '"')
|
|
type_name = names_mapping[actual_type][0 if is_demangle() else 1]
|
|
i += 1
|
|
|
|
mod_str = join_modifiers(modifiers)
|
|
|
|
if is_demangle():
|
|
return '%s%s' % (type_name, mod_str), i
|
|
else:
|
|
return '%s%s' % (mod_str, type_name), i
|
|
|
|
def resolve_templates(s, remangle_add_length):
|
|
"""
|
|
Resolves template types in a type string.
|
|
Examples:
|
|
(demangle) resolve_templates('std<c>', false) -> 'std<char>'
|
|
(remangle) resolve_templates('std<c>', false) -> 'stdIcE'
|
|
(remangle) resolve_templates('std<c>', true) -> '3stdIcE'
|
|
|
|
Args:
|
|
s (str): The string to resolve.
|
|
remangle_add_length (bool): Whether to add the length prefix in remangling.
|
|
|
|
Returns:
|
|
str: The resolved string.
|
|
"""
|
|
begin_pos = s.find('<')
|
|
if begin_pos == -1:
|
|
if re.match(r'^@unnamed@.+@$', s) != None:
|
|
if is_demangle():
|
|
# name.split('@')[2] contains the name of the file the anonymous namespace is in,
|
|
# but we lose that information here since we follow c++filt's behavior.
|
|
return '(anonymous namespace)'
|
|
else:
|
|
return '12_GLOBAL__N_1'
|
|
unnamed_type_m = re.match(r'^@class\$(\d*).+$', s)
|
|
if unnamed_type_m != None:
|
|
typenum = int(unnamed_type_m.group(1)) if unnamed_type_m.group(1) != '' else -1
|
|
if is_demangle():
|
|
return '{unnamed type#%d}' % (typenum + 2)
|
|
else:
|
|
return 'Ut%s_' % (str(typenum) if typenum > -1 else '')
|
|
if not is_demangle() and remangle_add_length:
|
|
return '%d%s' % (len(s), s)
|
|
return s
|
|
template_str = ''
|
|
i = begin_pos + 1
|
|
while i < len(s):
|
|
if s[i] == ',':
|
|
if is_demangle():
|
|
template_str += ', '
|
|
i += 1
|
|
continue
|
|
if s[i] == '>':
|
|
break
|
|
elif re.match(r'[-\d]+[>,]', s[i:]) != None:
|
|
# Integer literal
|
|
# ss/robojumper: fix [0] -> .group(0)
|
|
literal = re.match(r'[-\d]+', s[i:]).group(0)
|
|
template_str += literal if is_demangle() else 'XLi%sEE' % literal.replace('-', 'n')
|
|
i += len(literal)
|
|
else:
|
|
type, i = parse_type(s, i)
|
|
template_str += type
|
|
if is_demangle():
|
|
template_str = '<%s>' % template_str
|
|
# replicate c++filt behavior
|
|
if template_str[-2:] == '>>':
|
|
template_str = template_str[:-1] + ' >'
|
|
return s[0:begin_pos] + template_str
|
|
else:
|
|
if remangle_add_length:
|
|
return str(begin_pos) + s[0:begin_pos] + ('I%sE' % template_str)
|
|
return s[0:begin_pos] + ('I%sE' % template_str)
|
|
|
|
def demangle(s):
|
|
"""
|
|
Demangles a mangled symbol name.
|
|
"""
|
|
|
|
at_sym = ''
|
|
thunk_offsets = []
|
|
m = re.match(r'^@([^@]+)@(.+)$', s)
|
|
if m != None:
|
|
m_thunk = re.match(r'^@(\d+)(?:@(\d+))?@(.+)$', s)
|
|
if m_thunk != None:
|
|
thunk_offsets = [int(m_thunk.group(1))]
|
|
if m_thunk.group(2) != None:
|
|
thunk_offsets.append(int(m_thunk.group(2)))
|
|
s = m_thunk.group(3)
|
|
else:
|
|
at_sym = m.group(1)
|
|
if at_sym not in ['LOCAL', 'GUARD', 'STRING']:
|
|
raise Exception('Invalid symbol name "' + s + '"')
|
|
s = m.group(2)
|
|
|
|
template_depth = 0
|
|
last_possible_end = -1
|
|
for i in range(1, len(s)):
|
|
if s[i] == '<':
|
|
template_depth += 1
|
|
elif s[i] == '>':
|
|
template_depth -= 1
|
|
if template_depth == 0 and i + 2 < len(s) and s[i:i + 2] == '__' and s[i + 2] in 'CFQ0123456789':
|
|
last_possible_end = i
|
|
break
|
|
if last_possible_end == -1:
|
|
return s
|
|
|
|
i = last_possible_end
|
|
|
|
method, remainder = s[:i], s[i + 2:]
|
|
if remainder[0] == 'F':
|
|
# Global function without class
|
|
class_name = ''
|
|
i = 0
|
|
else:
|
|
class_name, i = parse_typename(remainder, 0)
|
|
|
|
if '<' in method:
|
|
template_start = method.find('<')
|
|
pre_template, template = method[:template_start], method[template_start:]
|
|
resolved_templates = resolve_templates(template, False)
|
|
else:
|
|
pre_template, resolved_templates = method, ''
|
|
|
|
if pre_template in ['__ct', '__dt']:
|
|
rettype_mode = 'remove'
|
|
elif at_sym != '':
|
|
rettype_mode = 'hide_in_demangle'
|
|
else:
|
|
rettype_mode = 'show'
|
|
|
|
if method == '__vt':
|
|
return 'vtable for %s' % class_name if is_demangle() else '_ZTVN%sE' % class_name
|
|
elif method.startswith('__op'):
|
|
# Use method because the type might contain templates
|
|
cv_type_name, _ = parse_type(method[4:], 0)
|
|
pre_template = 'operator %s' % cv_type_name if is_demangle() else 'cv%s' % cv_type_name
|
|
# __op cannot be templated
|
|
resolved_templates = ''
|
|
elif pre_template in method_mapping:
|
|
pre_template = method_mapping[pre_template][0 if is_demangle() else 1]
|
|
if is_demangle():
|
|
# __ct should use the template of the function, not of the parent class
|
|
last_class_name = re.sub(r'<.+>', '', class_name).split('::')[-1]
|
|
pre_template = pre_template.replace('$CLS$', last_class_name)
|
|
else:
|
|
if not is_demangle():
|
|
pre_template = '%d%s' % (len(pre_template), pre_template)
|
|
|
|
method = '%s%s' % (pre_template, resolved_templates)
|
|
|
|
if is_demangle():
|
|
demangled = '::'.join(filter(None, [class_name, method]))
|
|
else:
|
|
demangled = class_name + method
|
|
|
|
if i < len(remainder):
|
|
demangled, i = parse_type(remainder, i, name=demangled, rettype_mode=rettype_mode)
|
|
elif not is_demangle():
|
|
demangled = 'N%sE' % demangled
|
|
|
|
if i < len(remainder) and remainder[i] == '@' and at_sym in ['LOCAL', 'GUARD']:
|
|
subs = remainder[i + 1:].split('@')
|
|
local_sym_name = subs[0]
|
|
local_sym_extra = ('_' + subs[1]) if len(subs) > 1 else ''
|
|
if not is_demangle():
|
|
local_sym_name = str(len(local_sym_name)) + local_sym_name + local_sym_extra
|
|
elif at_sym == 'GUARD' and i >= len(remainder):
|
|
local_sym_name = method
|
|
elif at_sym == 'STRING' and i < len(remainder) and remainder[i] == '@' and not is_demangle():
|
|
local_sym_name = '_' + remainder[i + 1]
|
|
else:
|
|
local_sym_name = ''
|
|
|
|
if is_demangle():
|
|
if local_sym_name != '':
|
|
demangled += '::%s' % local_sym_name
|
|
|
|
# c++filt removes spaces in (* <symbol name>) -> (*<symbol name>), try to replicate this
|
|
while True:
|
|
m = re.search(r'\((?:[*&]|const| )+ (\w+.+)$', demangled)
|
|
if m == None or m.group(1).startswith('const'):
|
|
break
|
|
demangled = demangled[:m.start(1) - 1] + m.group(1)
|
|
|
|
if at_sym == 'GUARD':
|
|
return 'guard variable for %s' % demangled
|
|
elif at_sym == 'STRING':
|
|
return '%s::string literal' % demangled
|
|
elif len(thunk_offsets) > 0:
|
|
thunk_type = 'virtual' if len(thunk_offsets) == 2 else 'non-virtual'
|
|
return '%s thunk to %s' % (thunk_type, demangled)
|
|
else:
|
|
return demangled
|
|
else:
|
|
if len(thunk_offsets) == 1:
|
|
demangled = 'Th%d_%s' % (thunk_offsets[0], demangled)
|
|
elif len(thunk_offsets) == 2:
|
|
demangled = 'Tv%d_n%d_%s' % (thunk_offsets[0], thunk_offsets[1], demangled)
|
|
if at_sym == 'LOCAL':
|
|
demangled = 'Z%sE%s' % (demangled, local_sym_name)
|
|
if at_sym == 'GUARD':
|
|
demangled = 'GVZ%sE%s' % (demangled, local_sym_name)
|
|
if at_sym == 'STRING':
|
|
demangled = 'Z%sEs%s' % (demangled, local_sym_name)
|
|
return '_Z%s' % demangled
|
|
|
|
def demangle_try(s):
|
|
try:
|
|
return demangle(s)
|
|
except Exception as e:
|
|
# ss/robojumper: more context
|
|
sys.stderr.write('Demangler error: ' + str(e) + ' trying to demangle ' + s + '\n')
|
|
raise e
|
|
|
|
def main():
|
|
global mode
|
|
global verbose
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('symbol', type=str, nargs='?')
|
|
parser.add_argument('-m', '--mode', choices=['demangle', 'remangle_itanium'], required=True)
|
|
parser.add_argument('-v', '--verbose', action='store_true', default=False)
|
|
args = parser.parse_args()
|
|
mode = args.mode
|
|
verbose = args.verbose
|
|
if args.symbol is None:
|
|
while True:
|
|
sym = input()
|
|
print(demangle_try(sym))
|
|
else:
|
|
print(demangle_try(args.symbol))
|
|
return
|
|
|
|
if __name__ == '__main__':
|
|
main()
|