From 2d0d9a6bd482d79a895527aba305ca91cb255eb0 Mon Sep 17 00:00:00 2001
From: Hexalotl <15166449+Hexalotl@users.noreply.github.com>
Date: Sun, 25 Feb 2024 10:44:58 -0800
Subject: [PATCH] Adding in support to generate more complete context

---
 tools/decompctx.py | 77 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 70 insertions(+), 7 deletions(-)

diff --git a/tools/decompctx.py b/tools/decompctx.py
index 1fe9c903..a7a9be55 100644
--- a/tools/decompctx.py
+++ b/tools/decompctx.py
@@ -10,6 +10,8 @@ import os
 import re
 import argparse
 import pyperclip
+from glob import glob
+from re import Pattern
 from io import StringIO
 from pcpp import Preprocessor
 from pcpp import CmdPreprocessor
@@ -17,6 +19,7 @@ from contextlib import redirect_stdout
 
 #region Context Options
 class ContextGenerationOptions:
+    should_strip_declspec = False
     should_strip_attributes = False
     should_strip_at_address = False
     should_convert_binary_literals = False
@@ -25,8 +28,9 @@ class ContextGenerationOptions:
 #endregion
 
 #region Regex Patterns
-at_address_pattern = re.compile(r"(?:.*?)(?:[a-zA-Z_$][\w$]*\s*\*?\s[a-zA-Z_$][\w$]*)\s*((?:AT_ADDRESS|:)(?:\s*\(?\s*)(0x[0-9a-fA-F]+|[a-zA-Z_$][\w$]*)\)?);")
+at_address_pattern = re.compile(r"(?:.*?)(?:[a-zA-Z_$][\w$]*\s*\*?\s[a-zA-Z_$][\w$\[\]]*)\s*((?:AT_ADDRESS|:)(?:\s*\(?\s*)(0x[0-9a-fA-F]+|[a-zA-Z_$][\w$]*)\)?);")
 attribute_pattern = re.compile(r"(__attribute__)")
+declspec_pattern = re.compile(r"(__declspec)")
 binary_literal_pattern = re.compile(r"\b(0b[01]+)\b")
 trailing_initializer_pattern = re.compile(r"^.*?=\s*\{(?:.|\s)+?(,)?\s*(?:\/\/.*?|\/\*.*?\*\/)*\s*?\}\s*;", re.MULTILINE)
 enum_array_size_initializer_pattern = re.compile(r"\[\s*([a-zA-Z_$][\w$]*)\s*\]\s*;")
@@ -111,6 +115,44 @@ def strip_attributes(text_to_strip: str)->str:
     return text_to_strip
 #endregion
 
+#region declspec Stripping
+def strip_declspec(text_to_strip: str)->str:
+    if not text_to_strip:
+        return text_to_strip
+    
+    declspec_matches = reversed(list(re.finditer(declspec_pattern, text_to_strip)))
+    for declspec_match in declspec_matches:
+        # Find the end index of the second double paranthesis
+        paren_count = 0
+
+        match_span = declspec_match.span(0)
+        end_index = match_span[1]
+        declspec_opened = False
+        while end_index < len(text_to_strip):
+            if text_to_strip[end_index] == "(":
+                paren_count += 1
+
+                if paren_count == 1:
+                    declspec_opened = True
+            
+            if text_to_strip[end_index] == ")":
+                paren_count -= 1
+
+            if declspec_opened and paren_count == 0:
+                end_index += 1
+                break
+
+            end_index += 1
+
+        # Create the substring
+        start_index = match_span[0]
+        prefix = text_to_strip[0:start_index]
+        postfix = text_to_strip[end_index:len(text_to_strip)]
+        text_to_strip = prefix + postfix
+
+    return text_to_strip
+#endregion
+
 #region At Address Stripping
 def strip_at_address(text_to_strip: str) -> str:
     if not text_to_strip:
@@ -276,7 +318,7 @@ def generate_context(preprocessor_arguments: list[str], context_options: Context
                 return None
             
             # Do we need to sanitize this further?
-            if not context_options.should_strip_attributes and not context_options.should_strip_at_address and not context_options.should_strip_initializer_trailing_commas and not context_options.should_convert_binary_literals:
+            if not context_options.should_strip_declspec and not context_options.should_strip_attributes and not context_options.should_strip_at_address and not context_options.should_strip_initializer_trailing_commas and not context_options.should_convert_binary_literals:
                 # No sanitation needed, so write the entire file out
                 return preprocessor_string_writer.getvalue()
             
@@ -289,6 +331,9 @@ def generate_context(preprocessor_arguments: list[str], context_options: Context
                     if not line_to_write:
                         break
 
+                    if context_options.should_strip_declspec:
+                        line_to_write = strip_declspec(line_to_write)
+
                     if context_options.should_strip_attributes:
                         line_to_write = strip_attributes(line_to_write)
 
@@ -322,6 +367,7 @@ def main():
     parser.add_argument("-h", "-help", "--help", dest="help", action="store_true")
     parser.add_argument("-n64", "--n64-sdk", dest="n64_sdk", help="Path to the N64 SDK top level directory", action="store")
     parser.add_argument('-D', dest = 'defines', metavar = 'macro[=val]', nargs = 1, action = 'append', help = 'Predefine name as a macro [with value]')
+    parser.add_argument("--strip-declspec", dest="strip_declspec", help="If __declspec() string should be stripped", action="store_true", default=False)
     parser.add_argument("--strip-attributes", dest="strip_attributes", help="If __attribute__(()) string should be stripped", action="store_true", default=False)
     parser.add_argument("--strip-at-address", dest="strip_at_address", help="If AT_ADDRESS or : formatted string should be stripped", action="store_true", default=False)
     parser.add_argument("--strip-initializer_trailing_commas", dest="strip_initializer_trailing_commas", help="If trailing commas in initializers should be stripped", action="store_true", default=False)
@@ -344,7 +390,7 @@ def main():
     known_args = parsed_args[0]
     
     preprocessor_arguments = ['pcpp']
-    if known_args.help or not known_args.c_file:
+    if known_args.help:
         # Since this script acts as a wrapper for the main pcpp script
         # we want to manually display the help and pass it through to the
         # pcpp preprocessor to show its full list of arguments
@@ -371,6 +417,26 @@ def main():
         for define in argument_defines:
             include_defines.append(define)
             known_defines.append(define.split("=")[0])
+
+    if not known_args.c_file:
+        # If not file is specified it is assumed we want to create a mega context
+        # file that is the aggregate of all include files
+        include_files : set[str, str] = set()
+        for include_directory in default_include_directories:
+            files = [y for x in os.walk(include_directory) for y in glob(os.path.join(x[0], '*.h'))]
+            for include_file in files:
+                include_files.add(include_file)
+
+        # Add each file as an input so that pccpp can parse them into a single output file
+        # Sort the files for some consistency
+        sorted_files = list(include_files)
+        sorted_files.sort()
+        for include_file in include_files:
+            preprocessor_arguments.append(include_file)    
+    else:
+        # Add the file we want to read
+        c_file = known_args.c_file
+        preprocessor_arguments.append(known_args.c_file)
     
     # Add in the default defines unless explicitly passed in as arguments
     for default_define, default_define_value in default_defines.items():
@@ -400,12 +466,9 @@ def main():
     pass_through_args = parsed_args[1]
     preprocessor_arguments.extend(pass_through_args)
 
-    # Add the file we want to read
-    c_file = known_args.c_file
-    preprocessor_arguments.append(known_args.c_file)
-
     # Check if we need to do further conversions after the file is preprocessed
     context_options = ContextGenerationOptions()
+    context_options.should_strip_declspec = known_args.strip_declspec or known_args.ghidra or known_args.m2c
     context_options.should_strip_at_address = known_args.strip_at_address or known_args.ghidra or known_args.m2c
     context_options.should_strip_attributes = known_args.strip_attributes or known_args.m2c
     context_options.should_convert_binary_literals = known_args.convert_binary_literals or known_args.ghidra