#@category _MGS """ Instructions: 1. The Ghidra parser can't deal with __LINE__, so in linker.h, #define out STATIC_ASSERT_SIZE() to nothing so that you have #define STATIC_ASSERT_SIZE(struct, size) 2. In Ghidra, click on File > Parse C Source... then click on Save profile to new name to create a new profile that you can call psx.prf. Remove all the source files and options, then individually add each header file in source and its subfolders to the list of source files to parse and use the following parse options, replacing Path/to/ as appropriate and adding any folders that might be missing from the list. Note that on Linux, you might be required to make all the filenames in the PsyQ INCLUDE and INCLUDE/SYS folders lowercase, as well as the INCLUDE/SYS folder name itself. -IPath/to/mgs_reversing/source -IPath/to/mgs_reversing/source/equip -IPath/to/mgs_reversing/source/font -IPath/to/mgs_reversing/source/game -IPath/to/mgs_reversing/source/libdg -IPath/to/mgs_reversing/source/libgcl -IPath/to/mgs_reversing/source/libgv -IPath/to/mgs_reversing/source/libhzd -IPath/to/mgs_reversing/source/memcard -IPath/to/mgs_reversing/source/menu -IPath/to/mgs_reversing/source/mts -IPath/to/mgs_reversing/source/okajima -IPath/to/mgs_reversing/source/takabe -IPath/to/mgs_reversing/source/thing -IPath/to/mgs_reversing/source/weapon -IPath/to/psyq_sdk/psyq_4.3/include -Dmips1 -D__GNUC__ -D_GNU_SOURCE -D__WORDSIZE=32 -D__builtin_va_list=void * -D__DO_NOT_DEFINE_COMPILE -D_Complex -D_WCHAR_T 3. Now restore the static assert definition to linker.h. 4. Change the root_dir and run it from Ghidra's Script Manager. """ import os import sys import string import re from ghidra.program.model.data import ArrayDataType, DataUtilities dtm = currentProgram.getDataTypeManager() root_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../../')) symbol_address_map = { } with open(root_dir + '/obj/asm.map') as map: for line in map: pieces = line.split() if len(pieces) != 2: continue symbol = pieces[1] if symbol.startswith('_'): continue symbol_address_map[symbol] = pieces[0] ishex = lambda s: all(c in string.hexdigits for c in s) defines = { } # Removes text from in between braces, even if they are nested. # https://stackoverflow.com/a/14598135 def strip_braces(string): ret = '' skip1c = 0 for i in string: if i == '{': skip1c += 1 elif i == '}' and skip1c > 0: skip1c -= 1 elif skip1c == 0: ret += i return ret defined_data_ranges = [] updated_keys = set() for root, subdirs, files in os.walk(root_dir + '/source'): for filename in files: file_path = os.path.join(root, filename) if file_path.endswith('.swp'): continue with open(file_path, 'r') as f: lines = f.readlines() for index, line in enumerate(lines): pieces = line.split() if (len(pieces) < 3): continue if pieces[0] == '#define': defines[pieces[1]] = ' '.join(pieces[2:]) continue pieces_base_index = 1 if pieces[0] == 'struct' else 0 var_type = pieces[pieces_base_index] if var_type.__contains__('/') or var_type == 'char': continue data_types = [] dtm.findDataTypes(var_type, data_types) if len(data_types) == 0: continue data_type = data_types[0] var_section = pieces[pieces_base_index + 1] if not var_section.startswith('SECTION'): continue var_name = pieces[pieces_base_index + 2] var_name = var_name.replace(';', '') print_debug_info = False # var_name.__contains__('800B05A8') or var_name.__contains__('800B77E8') if print_debug_info: print('line == ' + line) # Do we have an array? if var_name.__contains__(']'): if print_debug_info: print('detected array') penultimate_char = var_name[len(var_name) - 2] num_elements = 1 # For the missing comma, assuming we're not going to end with a comma. if penultimate_char == '[': if print_debug_info: print('number of elements not specified, counting') # Count elements ourselves. next_line_index = index decl_braces_counted = False while True: next_line = lines[next_line_index] if decl_braces_counted: next_line = strip_braces(next_line).partition('//')[0] else: decl_braces_counted = True next_line_pieces = next_line.count(',') # Assuming we're not going to end with a comma. num_elements += next_line_pieces if next_line.__contains__(';'): break next_line_index += 1 else: if print_debug_info: print('number of elements specified, retrieving') number_regex = re.findall(r'\[([A-Za-z0-9_]+)\]', var_name) if len(number_regex) == 0: continue if print_debug_info: print('regex found a possible number') possible_number = number_regex[0] if possible_number.isdigit(): num_elements = int(possible_number, 10) if print_debug_info: print('found literal with value == ' + str(num_elements)) elif possible_number in defines: if (defines[possible_number].isdigit()): num_elements = int(defines[possible_number], 10) if print_debug_info: print('found use of #define with value == ' + str(num_elements)) else: if print_debug_info: print('found use of #define not yet put in dictionary') # @todo: fix headers that contain relevant #defines possibly being read after the files that need them. continue key = var_name.partition('[')[0] if key not in updated_keys and key in symbol_address_map: address = symbol_address_map[key] if print_debug_info: print('key found in symbol-address map with address == ' + address) if ishex(address) and len(address) == 8 and address.startswith('80'): if print_debug_info: print('valid address, adding array') data_type_array = ArrayDataType(data_type, num_elements, data_type.getLength()) address_as_int = int(address, 16) end_address_as_int = address_as_int + data_type_array.getLength() - 1 for start_end_pairs in defined_data_ranges: defined_data_start = start_end_pairs[0] defined_data_end = start_end_pairs[1] if address_as_int <= defined_data_end and end_address_as_int >= defined_data_start: print('WARNING: OVERLAPPING DATA') print(var_name + ' in range ' + address + ' - ' + hex(end_address_as_int)) print('overlaps ' + getSymbolAt(toAddr(defined_data_start)).getName() + ' in range ' + hex(defined_data_start) + ' - ' + hex(defined_data_end)) print('*****************************') defined_data_ranges.append((address_as_int, end_address_as_int)) start_address = toAddr(address_as_int) end_address = toAddr(end_address_as_int) clearListing(start_address, end_address) DataUtilities.createData(currentProgram, start_address, data_type_array, data_type_array.getLength(), True, DataUtilities.ClearDataMode.CLEAR_SINGLE_DATA) updated_keys.add(key) if print_debug_info: # exit() print('*****************************') else: if print_debug_info: print('detected single element') if var_name not in updated_keys and var_name in symbol_address_map: base_address = symbol_address_map[var_name] if print_debug_info: print('key found in symbol-address map with base_address == ' + base_address) if ishex(base_address) and len(base_address) == 8 and base_address.startswith('80'): if print_debug_info: print('valid address, adding element') address_as_int = int(base_address, 16) end_address_as_int = address_as_int + data_type.getLength() - 1 for start_end_pairs in defined_data_ranges: defined_data_start = start_end_pairs[0] defined_data_end = start_end_pairs[1] if address_as_int <= defined_data_end and end_address_as_int >= defined_data_start: print('WARNING: OVERLAPPING DATA') print(var_name + ' in range ' + base_address + ' - ' + hex(end_address_as_int)) print('overlaps ' + getSymbolAt(toAddr(defined_data_start)).getName() + ' in range ' + hex(defined_data_start) + ' - ' + hex(defined_data_end)) print('*****************************') defined_data_ranges.append((address_as_int, end_address_as_int)) start_address = toAddr(address_as_int) end_address = toAddr(end_address_as_int) clearListing(start_address, end_address) DataUtilities.createData(currentProgram, start_address, data_type, data_type.getLength(), True, DataUtilities.ClearDataMode.CLEAR_SINGLE_DATA) updated_keys.add(var_name) if print_debug_info: # exit() print('*****************************')