From 2d0d9a6bd482d79a895527aba305ca91cb255eb0 Mon Sep 17 00:00:00 2001 From: Hexalotl <15166449+Hexalotl@users.noreply.github.com> Date: Sun, 25 Feb 2024 10:44:58 -0800 Subject: [PATCH] Adding in support to generate more complete context --- tools/decompctx.py | 77 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 70 insertions(+), 7 deletions(-) diff --git a/tools/decompctx.py b/tools/decompctx.py index 1fe9c903..a7a9be55 100644 --- a/tools/decompctx.py +++ b/tools/decompctx.py @@ -10,6 +10,8 @@ import os import re import argparse import pyperclip +from glob import glob +from re import Pattern from io import StringIO from pcpp import Preprocessor from pcpp import CmdPreprocessor @@ -17,6 +19,7 @@ from contextlib import redirect_stdout #region Context Options class ContextGenerationOptions: + should_strip_declspec = False should_strip_attributes = False should_strip_at_address = False should_convert_binary_literals = False @@ -25,8 +28,9 @@ class ContextGenerationOptions: #endregion #region Regex Patterns -at_address_pattern = re.compile(r"(?:.*?)(?:[a-zA-Z_$][\w$]*\s*\*?\s[a-zA-Z_$][\w$]*)\s*((?:AT_ADDRESS|:)(?:\s*\(?\s*)(0x[0-9a-fA-F]+|[a-zA-Z_$][\w$]*)\)?);") +at_address_pattern = re.compile(r"(?:.*?)(?:[a-zA-Z_$][\w$]*\s*\*?\s[a-zA-Z_$][\w$\[\]]*)\s*((?:AT_ADDRESS|:)(?:\s*\(?\s*)(0x[0-9a-fA-F]+|[a-zA-Z_$][\w$]*)\)?);") attribute_pattern = re.compile(r"(__attribute__)") +declspec_pattern = re.compile(r"(__declspec)") binary_literal_pattern = re.compile(r"\b(0b[01]+)\b") trailing_initializer_pattern = re.compile(r"^.*?=\s*\{(?:.|\s)+?(,)?\s*(?:\/\/.*?|\/\*.*?\*\/)*\s*?\}\s*;", re.MULTILINE) enum_array_size_initializer_pattern = re.compile(r"\[\s*([a-zA-Z_$][\w$]*)\s*\]\s*;") @@ -111,6 +115,44 @@ def strip_attributes(text_to_strip: str)->str: return text_to_strip #endregion +#region declspec Stripping +def strip_declspec(text_to_strip: str)->str: + if not text_to_strip: + return text_to_strip + + declspec_matches = reversed(list(re.finditer(declspec_pattern, text_to_strip))) + for declspec_match in declspec_matches: + # Find the end index of the second double paranthesis + paren_count = 0 + + match_span = declspec_match.span(0) + end_index = match_span[1] + declspec_opened = False + while end_index < len(text_to_strip): + if text_to_strip[end_index] == "(": + paren_count += 1 + + if paren_count == 1: + declspec_opened = True + + if text_to_strip[end_index] == ")": + paren_count -= 1 + + if declspec_opened and paren_count == 0: + end_index += 1 + break + + end_index += 1 + + # Create the substring + start_index = match_span[0] + prefix = text_to_strip[0:start_index] + postfix = text_to_strip[end_index:len(text_to_strip)] + text_to_strip = prefix + postfix + + return text_to_strip +#endregion + #region At Address Stripping def strip_at_address(text_to_strip: str) -> str: if not text_to_strip: @@ -276,7 +318,7 @@ def generate_context(preprocessor_arguments: list[str], context_options: Context return None # Do we need to sanitize this further? - if not context_options.should_strip_attributes and not context_options.should_strip_at_address and not context_options.should_strip_initializer_trailing_commas and not context_options.should_convert_binary_literals: + if not context_options.should_strip_declspec and not context_options.should_strip_attributes and not context_options.should_strip_at_address and not context_options.should_strip_initializer_trailing_commas and not context_options.should_convert_binary_literals: # No sanitation needed, so write the entire file out return preprocessor_string_writer.getvalue() @@ -289,6 +331,9 @@ def generate_context(preprocessor_arguments: list[str], context_options: Context if not line_to_write: break + if context_options.should_strip_declspec: + line_to_write = strip_declspec(line_to_write) + if context_options.should_strip_attributes: line_to_write = strip_attributes(line_to_write) @@ -322,6 +367,7 @@ def main(): parser.add_argument("-h", "-help", "--help", dest="help", action="store_true") parser.add_argument("-n64", "--n64-sdk", dest="n64_sdk", help="Path to the N64 SDK top level directory", action="store") parser.add_argument('-D', dest = 'defines', metavar = 'macro[=val]', nargs = 1, action = 'append', help = 'Predefine name as a macro [with value]') + parser.add_argument("--strip-declspec", dest="strip_declspec", help="If __declspec() string should be stripped", action="store_true", default=False) parser.add_argument("--strip-attributes", dest="strip_attributes", help="If __attribute__(()) string should be stripped", action="store_true", default=False) parser.add_argument("--strip-at-address", dest="strip_at_address", help="If AT_ADDRESS or : formatted string should be stripped", action="store_true", default=False) parser.add_argument("--strip-initializer_trailing_commas", dest="strip_initializer_trailing_commas", help="If trailing commas in initializers should be stripped", action="store_true", default=False) @@ -344,7 +390,7 @@ def main(): known_args = parsed_args[0] preprocessor_arguments = ['pcpp'] - if known_args.help or not known_args.c_file: + if known_args.help: # Since this script acts as a wrapper for the main pcpp script # we want to manually display the help and pass it through to the # pcpp preprocessor to show its full list of arguments @@ -371,6 +417,26 @@ def main(): for define in argument_defines: include_defines.append(define) known_defines.append(define.split("=")[0]) + + if not known_args.c_file: + # If not file is specified it is assumed we want to create a mega context + # file that is the aggregate of all include files + include_files : set[str, str] = set() + for include_directory in default_include_directories: + files = [y for x in os.walk(include_directory) for y in glob(os.path.join(x[0], '*.h'))] + for include_file in files: + include_files.add(include_file) + + # Add each file as an input so that pccpp can parse them into a single output file + # Sort the files for some consistency + sorted_files = list(include_files) + sorted_files.sort() + for include_file in include_files: + preprocessor_arguments.append(include_file) + else: + # Add the file we want to read + c_file = known_args.c_file + preprocessor_arguments.append(known_args.c_file) # Add in the default defines unless explicitly passed in as arguments for default_define, default_define_value in default_defines.items(): @@ -400,12 +466,9 @@ def main(): pass_through_args = parsed_args[1] preprocessor_arguments.extend(pass_through_args) - # Add the file we want to read - c_file = known_args.c_file - preprocessor_arguments.append(known_args.c_file) - # Check if we need to do further conversions after the file is preprocessed context_options = ContextGenerationOptions() + context_options.should_strip_declspec = known_args.strip_declspec or known_args.ghidra or known_args.m2c context_options.should_strip_at_address = known_args.strip_at_address or known_args.ghidra or known_args.m2c context_options.should_strip_attributes = known_args.strip_attributes or known_args.m2c context_options.should_convert_binary_literals = known_args.convert_binary_literals or known_args.ghidra