Adding in support to generate more complete context

2026-07-07 12:03:27 -04:00 · 2024-02-25 10:44:58 -08:00
parent a74899a7c5
commit 2d0d9a6bd4
1 changed files with 70 additions and 7 deletions
@@ -10,6 +10,8 @@ import os
 import re
 import argparse
 import pyperclip
+from glob import glob
+from re import Pattern
 from io import StringIO
 from pcpp import Preprocessor
 from pcpp import CmdPreprocessor
@@ -17,6 +19,7 @@ from contextlib import redirect_stdout

 #region Context Options
 class ContextGenerationOptions:
+    should_strip_declspec = False
    should_strip_attributes = False
    should_strip_at_address = False
    should_convert_binary_literals = False
@@ -25,8 +28,9 @@ class ContextGenerationOptions:
 #endregion

 #region Regex Patterns
-at_address_pattern = re.compile(r"(?:.*?)(?:[a-zA-Z_$][\w$]*\s*\*?\s[a-zA-Z_$][\w$]*)\s*((?:AT_ADDRESS|:)(?:\s*\(?\s*)(0x[0-9a-fA-F]+|[a-zA-Z_$][\w$]*)\)?);")
+at_address_pattern = re.compile(r"(?:.*?)(?:[a-zA-Z_$][\w$]*\s*\*?\s[a-zA-Z_$][\w$\[\]]*)\s*((?:AT_ADDRESS|:)(?:\s*\(?\s*)(0x[0-9a-fA-F]+|[a-zA-Z_$][\w$]*)\)?);")
 attribute_pattern = re.compile(r"(__attribute__)")
+declspec_pattern = re.compile(r"(__declspec)")
 binary_literal_pattern = re.compile(r"\b(0b[01]+)\b")
 trailing_initializer_pattern = re.compile(r"^.*?=\s*\{(?:.|\s)+?(,)?\s*(?:\/\/.*?|\/\*.*?\*\/)*\s*?\}\s*;", re.MULTILINE)
 enum_array_size_initializer_pattern = re.compile(r"\[\s*([a-zA-Z_$][\w$]*)\s*\]\s*;")
@@ -111,6 +115,44 @@ def strip_attributes(text_to_strip: str)->str:
    return text_to_strip
 #endregion

+#region declspec Stripping
+def strip_declspec(text_to_strip: str)->str:
+    if not text_to_strip:
+        return text_to_strip
+    
+    declspec_matches = reversed(list(re.finditer(declspec_pattern, text_to_strip)))
+    for declspec_match in declspec_matches:
+        # Find the end index of the second double paranthesis
+        paren_count = 0
+
+        match_span = declspec_match.span(0)
+        end_index = match_span[1]
+        declspec_opened = False
+        while end_index < len(text_to_strip):
+            if text_to_strip[end_index] == "(":
+                paren_count += 1
+
+                if paren_count == 1:
+                    declspec_opened = True
+            
+            if text_to_strip[end_index] == ")":
+                paren_count -= 1
+
+            if declspec_opened and paren_count == 0:
+                end_index += 1
+                break
+
+            end_index += 1
+
+        # Create the substring
+        start_index = match_span[0]
+        prefix = text_to_strip[0:start_index]
+        postfix = text_to_strip[end_index:len(text_to_strip)]
+        text_to_strip = prefix + postfix
+
+    return text_to_strip
+#endregion
+
 #region At Address Stripping
 def strip_at_address(text_to_strip: str) -> str:
    if not text_to_strip:
@@ -276,7 +318,7 @@ def generate_context(preprocessor_arguments: list[str], context_options: Context
                return None
            
            # Do we need to sanitize this further?
-            if not context_options.should_strip_attributes and not context_options.should_strip_at_address and not context_options.should_strip_initializer_trailing_commas and not context_options.should_convert_binary_literals:
+            if not context_options.should_strip_declspec and not context_options.should_strip_attributes and not context_options.should_strip_at_address and not context_options.should_strip_initializer_trailing_commas and not context_options.should_convert_binary_literals:
                # No sanitation needed, so write the entire file out
                return preprocessor_string_writer.getvalue()
            
@@ -289,6 +331,9 @@ def generate_context(preprocessor_arguments: list[str], context_options: Context
                    if not line_to_write:
                        break

+                    if context_options.should_strip_declspec:
+                        line_to_write = strip_declspec(line_to_write)
+
                    if context_options.should_strip_attributes:
                        line_to_write = strip_attributes(line_to_write)

@@ -322,6 +367,7 @@ def main():
    parser.add_argument("-h", "-help", "--help", dest="help", action="store_true")
    parser.add_argument("-n64", "--n64-sdk", dest="n64_sdk", help="Path to the N64 SDK top level directory", action="store")
    parser.add_argument('-D', dest = 'defines', metavar = 'macro[=val]', nargs = 1, action = 'append', help = 'Predefine name as a macro [with value]')
+    parser.add_argument("--strip-declspec", dest="strip_declspec", help="If __declspec() string should be stripped", action="store_true", default=False)
    parser.add_argument("--strip-attributes", dest="strip_attributes", help="If __attribute__(()) string should be stripped", action="store_true", default=False)
    parser.add_argument("--strip-at-address", dest="strip_at_address", help="If AT_ADDRESS or : formatted string should be stripped", action="store_true", default=False)
    parser.add_argument("--strip-initializer_trailing_commas", dest="strip_initializer_trailing_commas", help="If trailing commas in initializers should be stripped", action="store_true", default=False)
@@ -344,7 +390,7 @@ def main():
    known_args = parsed_args[0]
    
    preprocessor_arguments = ['pcpp']
-    if known_args.help or not known_args.c_file:
+    if known_args.help:
        # Since this script acts as a wrapper for the main pcpp script
        # we want to manually display the help and pass it through to the
        # pcpp preprocessor to show its full list of arguments
@@ -371,6 +417,26 @@ def main():
        for define in argument_defines:
            include_defines.append(define)
            known_defines.append(define.split("=")[0])
+
+    if not known_args.c_file:
+        # If not file is specified it is assumed we want to create a mega context
+        # file that is the aggregate of all include files
+        include_files : set[str, str] = set()
+        for include_directory in default_include_directories:
+            files = [y for x in os.walk(include_directory) for y in glob(os.path.join(x[0], '*.h'))]
+            for include_file in files:
+                include_files.add(include_file)
+
+        # Add each file as an input so that pccpp can parse them into a single output file
+        # Sort the files for some consistency
+        sorted_files = list(include_files)
+        sorted_files.sort()
+        for include_file in include_files:
+            preprocessor_arguments.append(include_file)    
+    else:
+        # Add the file we want to read
+        c_file = known_args.c_file
+        preprocessor_arguments.append(known_args.c_file)
    
    # Add in the default defines unless explicitly passed in as arguments
    for default_define, default_define_value in default_defines.items():
@@ -400,12 +466,9 @@ def main():
    pass_through_args = parsed_args[1]
    preprocessor_arguments.extend(pass_through_args)

-    # Add the file we want to read
-    c_file = known_args.c_file
-    preprocessor_arguments.append(known_args.c_file)
-
    # Check if we need to do further conversions after the file is preprocessed
    context_options = ContextGenerationOptions()
+    context_options.should_strip_declspec = known_args.strip_declspec or known_args.ghidra or known_args.m2c
    context_options.should_strip_at_address = known_args.strip_at_address or known_args.ghidra or known_args.m2c
    context_options.should_strip_attributes = known_args.strip_attributes or known_args.m2c
    context_options.should_convert_binary_literals = known_args.convert_binary_literals or known_args.ghidra