import os import re import argparse from re import Match from io import TextIOWrapper import typing from ruamel.yaml import YAML from ruamel.yaml import CommentedMap from ruamel.yaml import CommentedSeq from ruamel.yaml import scalarint #region Types class SymbolInfo: symbol_name: str = None start_address: int = 0 end_address: int = 0 def __init__(self, name:str, start:int, size:int) -> None: self.symbol_name = name self.start_address = start self.end_address = start + size def get_address_range(self)->typing.Tuple[int, int]: return self.start_address, self.end_address class SliceSection: section_symbol: SymbolInfo = None symbols: typing.List[SymbolInfo] = None def __init__(self, symbol: SymbolInfo) -> None: self.section_symbol = symbol self.symbols = [] class SliceInfo: sections: typing.List[SliceSection] = None def __init__(self) -> None: self.sections = [] def get_address_range(self)->typing.Tuple[int, int]: start_address = self.sections[0].section_symbol.start_address end_address = self.sections[-1].section_symbol.end_address if len(self.sections[-1].symbols) > 0: end_address = self.sections[-1].symbols[-1].end_address return start_address, end_address class Address_Sort_Entry: key : str = None value: CommentedMap = None starting_address: int = None def __init__(self, entry_key: str, entry_value: CommentedMap, entry_starting_address: int) -> None: self.key = entry_key self.value = entry_value self.starting_address = entry_starting_address #endregion #region Constants # Dictionary for the offsets we need to apply to the addresses from the map address_offset_map : typing.Dict[str, int] = { ".text": int("0x803702A8", 16), ".rodata": int("0x80641260", 16), ".data": int("0x8064D500", 16), ".bss": int("0x8125A7C0", 16) } prioritized_addresses: typing.List[str] = [".text", ".rodata", ".data", ".bss"] script_dir: str = os.path.dirname(os.path.realpath(__file__)) root_dir: str = os.path.abspath(os.path.join(script_dir, "..")) default_map_path: str = os.path.join(root_dir, "dump/foresta.map") default_binary_slice_file_path: str = os.path.join(root_dir, "config/rel_slices.yml") default_asset_slice_file_path: str = os.path.join(root_dir, "config/assets.yml") specific_tu_pattern_format = r"\s*([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+(?:([0-9a-fA-F]+)\s+(.+?)|\.\.\.data\.\d \(entry of \.data\))\s+({tu_name})\s*" general_symbol_pattern = re.compile(specific_tu_pattern_format.format(tu_name = ".+\.o")) slice_boundary_format = "[{start_address}, {end_address}]" #endregion #region Sorting def sort_by_starting_address(data: CommentedMap, address_sort_keys: typing.List[str])->CommentedMap: if len(data) <= 1: return data ordered_entries : list[Address_Sort_Entry] = [] for key in data.keys(): entry = data[key] starting_address = 0 for address_key in address_sort_keys: if address_key not in entry: continue # Ensure starting_address is an integer if isinstance(entry[address_key], int): starting_address = entry[address_key] elif isinstance(entry[address_key], CommentedSeq): starting_address = entry[address_key][0] else: print('Address key %s is not an int or CommentedSeq! type: %s value: %s' % (address_key, type(entry[address_key]), entry[address_key])) starting_address = 0 break ordered_entries.append(Address_Sort_Entry(key, entry, starting_address)) ordered_entries.sort(key=lambda entry: entry.starting_address) ordered_map = CommentedMap() for ordered_entry in ordered_entries: ordered_map[ordered_entry.key] = ordered_entry.value if ordered_entry.key not in data.ca.items: continue ordered_map.ca.items[ordered_entry.key] = data.ca.items[ordered_entry.key] return ordered_map #endregion #region Symbol Gathering def get_symbol_from_map_match(symbol_match: Match, address_offset: int)->SymbolInfo: name = symbol_match.group(5) start_address = int(symbol_match.group(1), 16) + address_offset size = int(symbol_match.group(2), 16) return SymbolInfo(name, start_address, size) def gather_symbols_for_section(address_offset: int, file_reader:TextIOWrapper, slice_info: SliceInfo, starting_match: Match): section_tu_name = starting_match.group(6) section_symbol = get_symbol_from_map_match(starting_match, address_offset) section = SliceSection(section_symbol) slice_info.sections.append(section) # Keep reading until the end of the section has been reached line: str = None while True: line = file_reader.readline() if not line: return if "entry of .data" in line: continue break next_match: Match = general_symbol_pattern.match(line) while True: # Check if the next match belongs to this group or not curr_match = next_match if not curr_match: break curr_match_tu_name = curr_match.group(6) if curr_match_tu_name != section_tu_name: break curr_match_symbol_name = curr_match.group(5) if curr_match_symbol_name in address_offset_map: gather_symbols_for_section(address_offset, file_reader, slice_info, starting_match) break # Make symbol for current match symbol = get_symbol_from_map_match(curr_match, address_offset) # Check the next match to get a more accurate ending address next_line = file_reader.readline() if not next_line: # Eof reached. Just add as is section.symbols.append(symbol) # Match against the next line next_match = general_symbol_pattern.match(next_line) if not next_match: # Non matching line section.symbols.append(symbol) # Use start address as the end boundary for the slice next_match_start_address = int(next_match.group(1), 16) + address_offset symbol.end_address = next_match_start_address section.symbols.append(symbol) def gather_tu_symbols(tu_name: str, map_path: str)->typing.Dict[str, SliceInfo]: gathered_symbols: typing.Dict[str, SliceInfo] = {} tu_regex = re.compile(specific_tu_pattern_format.format(tu_name = tu_name)) with open(map_path, "r", encoding="utf-8", newline="\n") as file_reader: while True: line = file_reader.readline() if not line: break # Check if the line matches the TU name match = tu_regex.match(line) if not match: continue # It is a match slice_name = match.group(5) if slice_name not in address_offset_map: continue # Add to dictionary offset = address_offset_map[slice_name] slice_info = SliceInfo() gathered_symbols[slice_name] = slice_info gather_symbols_for_section(offset, file_reader, slice_info, match) return gathered_symbols #endregion #region Asset Slices Config File def update_asset_slice_config(tu_name: str, binary_slice_file_path: str, asset_slice_file_path: str, symbols_for_tu: typing.Dict[str, SliceInfo]): if ".data" not in symbols_for_tu: return print("Add data entries to: " + asset_slice_file_path + "? (y/n)") reply = input().lower() if reply != "y" and reply != "yes": return yaml = YAML(typ="rt") data: CommentedMap = None with open(asset_slice_file_path, "r", encoding="utf-8", newline="\n") as file_reader: data = yaml.load(file_reader) binary_commented_map : CommentedMap = None binary_commented_map_key: str = None if "rel" in binary_slice_file_path: binary_commented_map_key = "config/rel.yml" else: binary_commented_map_key = "config/dol.yml" binary_commented_map = data[binary_commented_map_key] insert_tu_name_comment = True for section in symbols_for_tu[".data"].sections: for asset_symbol in section.symbols: print("Add entry for: " + asset_symbol.symbol_name + "? (y/n)") reply = input().lower() if reply != "y" and reply != "yes": continue print("What is the asset type? (optional)") asset_type = input() asset_commented_map : CommentedMap = None if binary_commented_map.__contains__(asset_symbol.symbol_name): asset_commented_map = binary_commented_map[asset_symbol.symbol_name] insert_tu_name_comment = False else: asset_commented_map = CommentedMap() binary_commented_map.insert(len(binary_commented_map), asset_symbol.symbol_name, asset_commented_map) if insert_tu_name_comment: insert_tu_name_comment = False binary_commented_map.yaml_set_comment_before_after_key(key=asset_symbol.symbol_name, indent=2, before=tu_name) # Add in the address range address_commented_seq: CommentedSeq = None if asset_commented_map.__contains__("addrs"): # Re-use the same commented section address_commented_seq = asset_commented_map["addrs"] address_commented_seq.clear() else: address_commented_seq: CommentedSeq = CommentedSeq() # Assign to the slice section asset_commented_map["addrs"] = address_commented_seq # Add in the start and end address start_address, end_address = asset_symbol.get_address_range() address_commented_seq.fa.set_flow_style() address_commented_seq.append(scalarint.HexCapsInt(start_address)) address_commented_seq.append(scalarint.HexCapsInt(end_address)) # Add in the asset type if not asset_type or asset_type is None: # Type not specified if asset_commented_map.__contains__("type"): # Using a previous entry where the type was used, so delete it asset_commented_map.__delitem__("type") continue asset_commented_map["type"] = asset_type # Sort by starting address and replace data[binary_commented_map_key] = sort_by_starting_address(binary_commented_map, ["addrs"]) # Write out to file with open(asset_slice_file_path, "w", encoding="utf-8", newline="\n") as file_writer: yaml.dump(data, file_writer) #endregion #region Slice Config File def update_binary_slice_config(tu_name: str, slice_file_path: str, symbols_for_tu: typing.Dict[str, SliceInfo]): yaml = YAML(typ="rt") yaml.indent(mapping=4, sequence=4, offset=4) data: CommentedMap = None with open(slice_file_path, "r", encoding="utf-8", newline="\n") as file_reader: data = yaml.load(file_reader) tu_c_file_name = tu_name.replace(".o", ".c") slice_commented_map : CommentedMap = None if data.__contains__(tu_c_file_name): print("TU already exists in file. Overwrite values? (y/n)") reply = input().lower() if reply != "y" and reply != "yes": return # Re-use the existing commented map slice_commented_map = data[tu_c_file_name] else: # Create a new commented map slice_commented_map : CommentedMap = CommentedMap() # Add to the end of the file data.insert(len(data), tu_c_file_name, slice_commented_map) for slice_name, slice_info in symbols_for_tu.items(): if len(slice_info.sections) == 0: # No symbols for this TU continue address_commented_seq: CommentedSeq = None if slice_commented_map.__contains__(slice_name): # Re-use the same commented section address_commented_seq = slice_commented_map[slice_name] address_commented_seq.clear() else: address_commented_seq: CommentedSeq = CommentedSeq() # Assign to the slice section slice_commented_map[slice_name] = address_commented_seq # Add in the start and end address start_address, end_address = slice_info.get_address_range() address_commented_seq.fa.set_flow_style() address_commented_seq.append(scalarint.HexCapsInt(start_address)) address_commented_seq.append(scalarint.HexCapsInt(end_address)) # Sort by address data = sort_by_starting_address(data, prioritized_addresses) # Write out to file with open(slice_file_path, "w", encoding="utf-8", newline="\n") as file_writer: yaml.dump(data, file_writer) #endregion #region Main def main(): parser = argparse.ArgumentParser(prog="Translation Unit Config Updater", description="Adds the corresponding addresses to slice config files") parser.add_argument("tu_name", nargs="?", help="Name of the translation unit to get addresses for") parser.add_argument("-map", "--symbol-map", dest="symbol_map", help="Path to the symbol map file used for reference", action="store") parser.add_argument("-binary", "--binary-slices-file", dest="binary_slices_file", help="Path to the binary slices file to write to", action="store") parser.add_argument("-asset", "--asset-slices-file", dest="asset_slices_file", help="Path to the asset slices file to write to", action="store") args = parser.parse_args() # Make sure the translation unit name ends with .o tu_name = args.tu_name if tu_name[-2:] != ".o": tu_name = tu_name + ".o" symbol_map_path = args.symbol_map if not symbol_map_path: symbol_map_path = default_map_path binary_slices_file = args.binary_slices_file if not binary_slices_file: binary_slices_file = default_binary_slice_file_path asset_slices_file = args.asset_slices_file if not asset_slices_file: asset_slices_file = default_asset_slice_file_path # Get the symbols for the TU symbols_for_tu = gather_tu_symbols(tu_name, symbol_map_path) # Make a call to update the binary file update_binary_slice_config(tu_name, binary_slices_file, symbols_for_tu) update_asset_slice_config(tu_name, binary_slices_file, asset_slices_file, symbols_for_tu) if __name__ == "__main__": main() #endregion