Merge remote-tracking branch 'open-goal/master' into v/translations

2026-06-24 01:41:29 -04:00 · 2023-02-23 17:13:27 -05:00
parent f257cc42bb 2d8ad39fea
commit 75d48a41dc
2735 changed files with 2068207 additions and 46519 deletions
@@ -0,0 +1,4 @@
+@echo off
+cd ..\..
+out\build\Release\bin\goalc --auto-lt --user-auto
+pause
@@ -1,4 +0,0 @@
-@echo off
-cd ..\..
-out\build\Release\bin\goalc --user-auto
-pause
@@ -1,4 +1,4 @@
@echo off
 cd ..\..
-out\build\Release\bin\goalc --auto-lt --user-auto
+out\build\Release\bin\goalc --user-auto
 pause
@@ -0,0 +1,4 @@
+@echo off
+cd ..\..
+out\build\Release\bin\goalc --user-auto --game jak2
+pause
@@ -0,0 +1,4 @@
+@echo off
+cd ..\..
+out\build\Release\bin\gk -boot -fakeiso -debug -v -jak2
+pause
@@ -0,0 +1,4 @@
+@echo off
+cd ..\..
+out\build\Release\bin\gk -fakeiso -debug -v -jak2
+pause
@@ -1,2 +1,2 @@
 cd ..\..
-git update-index --assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc
+git update-index --assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc decompiler\config\jak2_ntsc_v1.jsonc
@@ -1,2 +1,2 @@
 cd ..\..
-git update-index --no-assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc
+git update-index --no-assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc decompiler\config\jak2_ntsc_v1.jsonc
@@ -1,6 +1,6 @@
@echo off
 cd ..\..
 out\build\Release\bin\offline-test -d --iso_data_path iso_data\jak1\ --game jak1
-scripts\update_decomp_reference.py failures\ test\decompiler\reference\
+python3 scripts\update_decomp_reference.py failures\ test\decompiler\reference\ --game jak1
 RMDIR /Q/S failures
 pause
@@ -0,0 +1,6 @@
+@echo off
+cd ..\..
+out\build\Release\bin\offline-test -d --iso_data_path iso_data\jak2\ --game jak2
+python3 scripts\update_decomp_reference.py failures\ test\decompiler\reference\ --game jak2
+RMDIR /Q/S failures
+pause
@@ -0,0 +1,4 @@
+@echo off
+cd ..\..
+out\build\Release\bin\goalc-test --gtest_filter="Jak2TypeConsistency.TypeConsistency"
+pause
@@ -0,0 +1,4 @@
+@echo off
+cd ..\..
+python3 scripts\gsrc\update-gsrc-via-refs.py --game jak2 --decompiler out\build\Release\bin\decompiler.exe --decompiler_config .\decompiler\config\jak2_ntsc_v1.jsonc
+pause
@@ -0,0 +1,15 @@
+import os
+import glob
+
+def get_goal_files(root_dir, ext = "*.gc"):
+    """Get all GOAL source files under root_dir."""
+    return [goal_file for file in os.walk(root_dir) for goal_file in glob.glob(os.path.join(file[0], ext))]
+
+all_files = get_goal_files("./decompiler_out/jak2/import")
+result = ""
+for file in all_files:
+	with open(file) as f:
+		for line in f:
+			if line.startswith("(def"):
+				result += line
+print(result)
@@ -4,7 +4,7 @@ import argparse


 ### Script to track decompilation progress.
-### Example usage: python3 scripts/decomp_progress.py ~/jak-project/goal_src
+### Example usage: python3 scripts/decomp_progress.py ~/jak-project/goal_src/jak2

 def get_goal_files(root_dir, ext = "*.gc"):
    """Get all GOAL source files under root_dir."""
@@ -29,7 +29,7 @@ def print_table(stats, total_gc_files):
    print("-------------------------------------")
    print("| {: <24} | {: >6} |".format("TOTAL", total_lines))
    print("-------------------------------------")
-    estimated_lines = 500000
+    estimated_lines = 1000000
    print("Progress: {}/{} lines ({:.2f}%)".format(total_lines, estimated_lines, 100. * total_lines / estimated_lines))
    print("{}/{} files modified from template ({:.2f}%)".format(len(stats), total_gc_files,
                                                                100. * len(stats) / total_gc_files))
@@ -41,8 +41,7 @@ def main():
    args = parser.parse_args()
    all_files = get_goal_files(args.goal_src)

-    ref_files = get_goal_files(args.goal_src + "/../test/", "*_REF.gc")
-
+    ref_files = get_goal_files(args.goal_src + "/../../test/decompiler/reference/jak2", "*_REF.gc")
    ref_files_no_ext = [os.path.basename(fn)[:-7] for fn in ref_files]


@@ -62,7 +61,7 @@ def main():

        total_gc_files += 1

-        if line_count == 7 or short_name in excluded_files:
+        if line_count < 10 or short_name in excluded_files:
            # the template has 7 lines, just skip it.
            continue

@@ -1,57 +0,0 @@
-import argparse
-parser = argparse.ArgumentParser()
-parser.add_argument("--file")
-args = parser.parse_args()
-
-import re
-labels_with_no_type = []
-
-file_path = "decompiler_out/jak1/{}_disasm.gc".format(args.file)
-with open(file_path) as f:
-  # Find all
-  content = f.readlines()
-  for line in content:
-    labels_with_no_type = labels_with_no_type + re.findall(r'L\d+', line)
-
-# dedup list
-labels_with_no_type = list(dict.fromkeys(labels_with_no_type))
-
-# let's go try to identify the types from the IR2 file if we can
-label_lines = []
-file_path = "decompiler_out/jak1/{}_ir2.asm".format(args.file)
-with open(file_path) as f:
-  # Find all
-  content = f.readlines()
-  prev_line = ""
-  next_label_will_be_lambda = False
-  for i, line in enumerate(content):
-    if ".function (anon-function" in line:
-      next_label_will_be_lambda = True
-    if line.startswith("L"):
-      for label in labels_with_no_type:
-        if line.startswith("{}:".format(label)):
-          # If we were expecting a lambda
-          if next_label_will_be_lambda:
-            label_lines.append("[\"{}\", \"_lambda_\", true]".format(label))
-            labels_with_no_type.remove(label)
-            next_label_will_be_lambda = False
-            break
-          # special case for pairs
-          if "(offset 2)" in line:
-            label_lines.append("[\"{}\", \"pair\", true]".format(label))
-            labels_with_no_type.remove(label)
-            break
-          # Check if the previous line has a `.type`
-          prev_line = content[i-1]
-          if ".type" in prev_line:
-            the_type = prev_line.split(".type ")[1].strip()
-            label_lines.append("[\"{}\", \"{}\", true]".format(label, the_type))
-            labels_with_no_type.remove(label)
-            break
-
-# Print out the labels
-print("Here are the labels I couldn't find a type for:")
-for label in labels_with_no_type:
-  print("- {}".format(label))
-print("And here are the ones I could:")
-print(",\n".join(label_lines))
@@ -0,0 +1,25 @@
+# Merge tools use specific algorithms or assumptions to detect conflicts
+# and not all of them will obviously flag them, even if they use the standard format
+#
+# So this is to ensure no conflict markers get ignored in goal_src atleast
+import os
+
+files_with_unresolved_conflicts = []
+
+for dirpath, subdirs, files in os.walk("./goal_src"):
+  for filename in files:
+    # Get the file contents
+    with open(os.path.join(dirpath, filename), "r") as f:
+      lines = f.readlines()
+      for line in lines:
+        if "<<<<<<<" in line:
+          files_with_unresolved_conflicts.append(os.path.join(dirpath, filename))
+          break
+
+if len(files_with_unresolved_conflicts) == 0:
+  exit(0)
+
+print("There are unresolved conflicts in ./goal_src/")
+for file in files_with_unresolved_conflicts:
+  print(file)
+exit(1)
@@ -1,67 +0,0 @@
-import re
-from jak1_file_list import file_list
-import argparse
-import os
-
-parser = argparse.ArgumentParser()
-parser.add_argument("--files")
-args = parser.parse_args()
-
-files = args.files.split(",")
-
-throw_error = False
-
-method_split_pattern = re.compile('t9-\d+\s\(method-of-object')
-function_split_pattern = re.compile('\(t9-\d+\)')
-missing_res_tag_pattern = re.compile('(sv-\d{2,} int)')
-decompiler_error_pattern = re.compile(';; ERROR')
-missing_arg = re.compile('local-vars.*none\)')
-
-for file in files:
-  src_path = ""
-  for f in file_list:
-    if f[2] != 3:
-      continue
-    if f[0] == file:
-      src_path = f[4]
-      break
-
-  if not os.path.exists("./goal_src/{}".format(src_path)):
-    print("{} couldn't find in /goal_src!".format(file))
-    throw_error = True
-    continue
-
-  file_path = "./goal_src/{}/{}.gc".format(src_path, file)
-  with open(file_path) as f:
-    for lineno, line in enumerate(f):
-      method_split_match = method_split_pattern.search(line)
-      if method_split_match:
-        print("method_split - {}:{}".format(file_path, lineno + 1))
-        throw_error = True
-        continue
-      function_split_match = function_split_pattern.search(line)
-      if function_split_match:
-        print("function_split - {}:{}".format(file_path, lineno + 1))
-        throw_error = True
-        continue
-      missing_res_tag_match = missing_res_tag_pattern.search(line)
-      if missing_res_tag_match:
-        print("missing_res_tag - {}:{}".format(file_path, lineno + 1))
-        throw_error = True
-        continue
-      decompiler_error_match = decompiler_error_pattern.search(line)
-      if decompiler_error_match:
-        print("decompiler_error - {}:{}".format(file_path, lineno + 1))
-        throw_error = True
-        continue
-      missing_arg_match = missing_arg.search(line)
-      if missing_arg_match:
-        print("missing_arg - {}:{}".format(file_path, lineno + 1))
-        throw_error = True
-        continue
-
-if throw_error:
-  print("found potential problems!")
-  exit(1)
-else:
-  print("looks good!")
@@ -0,0 +1,96 @@
+from utils import get_alltypes_path_from_game, get_gsrc_path_from_filename
+
+
+class AllTypesUpdateBlock:
+    def __init__(self):
+        self.data = []
+        self.file_name = ""
+        self.block_id = ""
+
+    def __str__(self):
+        return "{}:{}:{}...".format(self.file_name, self.block_id, self.data[0:20])
+
+
+def update_alltypes_named_blocks(game_name):
+    block_dict = {}
+    # Step 1: Get the blocks
+    get_all_blocks(game_name, block_dict)
+    # Step 2: Update the blocks (group by file name to minimize file IO operations)
+    update_all_blocks(game_name, block_dict)
+
+
+def get_all_blocks(game_name, block_dict):
+    with open(get_alltypes_path_from_game(game_name)) as f:
+        lines = f.readlines()
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            if line.startswith(";; +++") and ":" in line:
+                info = line.replace(";; +++", "")
+                file_name, block_id = info.split(":")
+                new_block = AllTypesUpdateBlock()
+                new_block.file_name = file_name
+                new_block.block_id = block_id
+                # Loop until we find the end of the block, collecting the lines as we go
+                while i < len(lines):
+                    i = i + 1
+                    next_line = lines[i]
+                    if next_line.startswith(";; ---"):
+                        break
+                    new_block.data.append(next_line)
+                # Add to the dictionary
+                if file_name not in block_dict:
+                    block_dict[file_name] = [new_block]
+                else:
+                    block_dict[file_name].append(new_block)
+            else:
+                i = i + 1
+
+
+def update_all_blocks(game_name, block_dict):
+    for file_name, blocks in block_dict.items():
+        # Get the file's lines
+        path = get_gsrc_path_from_filename(game_name, file_name)
+        lines = []
+        final_lines = []
+        with open(path) as f:
+            lines = f.readlines()
+        # Iterate through lines, (before ;; decomp begins) and update the blocks if we find them
+        i = 0
+        while i < len(lines):
+            line = lines[i]
+            if line.lower().startswith(";; decomp begins"):
+                final_lines.append(line)
+                # Add all the rest of the lines until the end
+                while i + 1 < len(lines):
+                    i = i + 1
+                    next_line = lines[i]
+                    final_lines.append(next_line)
+                break
+            if line.startswith(";; +++"):
+                final_lines.append(line)
+                block_id = line.split(";; +++")[1]
+                # Look to see if we actually have that block
+                found_block = False
+                for block in blocks:
+                    if block.block_id == block_id:
+                        found_block = True
+                        # if we found the block, write the data, then proceed ahead until the end
+                        for block_line in block.data:
+                            final_lines.append(block_line)
+                        while i + 1 < len(lines):
+                            i = i + 1
+                            next_line = lines[i]
+                            if next_line.startswith(";; ---"):
+                                final_lines.append(next_line)
+                                i = i + 1
+                                break
+                        break
+                if not found_block:
+                    i = i + 1
+            else:
+                final_lines.append(line)
+                i = i + 1
+        # Update the file contents
+        with open(path, "w") as f:
+            f.writelines(final_lines)
@@ -0,0 +1,808 @@
+import re
+from rapidfuzz import fuzz
+
+# TODO - rename and refactor all usages, it's not _always_ a comment anymore!
+# RetainedCode or something
+class CommentMeta:
+    def __init__(self):
+        self.data = ""
+        self.symbol_before = None
+        self.symbol_inline = None
+        self.symbol_after = None
+        self.symbol_padding_before = None
+        self.symbol_padding_after = None
+        # NOTE - maybe holding more than just 1 line before/after might help?
+        self.code_before = None
+        self.code_after = None
+        self.code_padding_before = None
+        self.code_padding_after = None
+        self.line_num_in_form = None  # None == top level
+        self.containing_form = None  # none - top level
+        self.containing_form_kind = None  # function|method|behaviour
+        self.containing_form_func_name = None  # or the method/behaviour
+        self.containing_form_type = None
+        self.inline = False
+        self.code_in_line = None  # only for inline comments
+        self.line_in_file = None  # a worst-case scenario fallback
+
+    def __str__(self):
+        return "{}:{}:{}".format(self.data, self.symbol_before, self.symbol_after)
+
+
+def debug_nice_formatted_code(val):
+    if val is None:
+        return None
+    return val.strip()[0:20]
+
+
+# returns (symbol | None, padding)
+def backtrack_for_symbol(lines, index):
+    padding = 0
+    for i in range(index - 1, 0, -1):
+        tline = lines[i].strip()
+        matches = re.search(
+            r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
+        )
+        if matches is not None:
+            return matches.group(1), padding
+        elif (
+            not tline.strip() == "" and not tline.strip().startswith(";")
+        ) or "decomp begins" in tline.lower():
+            # we hit a non empty line (but it wasn't a symbol!)
+            return None, padding
+        elif tline.strip() == "":
+            padding = padding + 1
+    return None, padding
+
+
+def symbol_on_line(line):
+    tline = line.lstrip()
+    matches = re.search(
+        r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
+    )
+    if matches is not None:
+        return matches.group(1)
+    return None
+
+
+def lookahead_for_symbol(lines, index):
+    padding = 0
+    for i in range(index + 1, len(lines), 1):
+        tline = lines[i].lstrip()
+        matches = re.search(
+            r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
+        )
+        if matches is not None:
+            return matches.group(1), padding
+        elif (
+            not tline.strip() == "" and not tline.strip().startswith(";")
+        ) or "decomp begins" in tline.lower():
+            # we hit a non empty line (but it wasn't a symbol!)
+            return None, padding
+        elif tline.strip() == "":
+            padding = padding + 1
+    return None, padding
+
+
+def backtrack_for_code(lines, index):
+    padding = 0
+    for i in range(index - 1, 0, -1):
+        line = lines[i]
+        if line.strip() == "":
+            padding = padding + 1
+            continue
+        elif "decomp begins" in line.lower():
+            return None, padding
+        elif line.lstrip().startswith(";"):
+            continue
+        return line, padding
+
+
+def lookahead_for_code(lines, index):
+    padding = 0
+    for i in range(index + 1, len(lines), 1):
+        line = lines[i]
+        if line.strip() == "":
+            padding = padding + 1
+            continue
+        elif "decomp begins" in line.lower():
+            return None, padding
+        elif line.lstrip().startswith(";"):
+            continue
+        return line, padding
+
+
+# returns form, or none
+def is_line_start_of_form(line):
+    if line.lstrip().startswith(";") or "(when *debug-segment*" in line:
+        return None
+    matches = re.search(r"\(\s*([^\s.]*)\s+", line)
+    if matches is not None:
+        return line
+    return None
+
+
+def has_form_ended(stack, line):
+    # if the stack is empty, return true
+    line_before_comment = line.partition(";")[0]
+    for char in line_before_comment:
+        if char == "(":
+            stack.append(char)
+        elif char == ")":
+            if len(stack) == 0:
+                # unbalanced parens?
+                return True
+            stack.pop()
+            if len(stack) == 0:
+                return True
+        else:
+            if len(stack) == 0:
+                return True
+    return False
+
+
+def append_form_metadata(comment, form_start_line):
+    func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_start_line)
+    if func_matches is not None:
+        comment.containing_form_kind = "function"
+        comment.containing_form_func_name = func_matches.group(1)
+        comment.containing_form_type = None
+        return
+    behavior_matches = re.search(
+        r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_start_line
+    )
+    if behavior_matches is not None:
+        comment.containing_form_kind = "behavior"
+        comment.containing_form_func_name = behavior_matches.group(1)
+        comment.containing_form_type = behavior_matches.group(2)
+        return
+    method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_start_line)
+    if method_matches is not None:
+        comment.containing_form_kind = "method"
+        comment.containing_form_func_name = method_matches.group(1)
+        comment.containing_form_type = method_matches.group(2)
+        return
+    comment.containing_form_kind = "unknown"
+    comment.containing_form_func_name = None
+    comment.containing_form_type = None
+
+
+def process_original_lines(lines):
+    comments = []
+    debug_lines = []
+    # track if we are inside a define*/defun/defmethod/deftype/defstate
+    within_form = None
+    line_num_in_form = None
+    form_paren_stack = []
+    found_output = False
+    i = 0
+    while i < len(lines):
+        debug_lines.append(lines[i])
+        tline = lines[i].lstrip()
+        if "decomp begins" in tline.lower():
+            found_output = True
+            i = i + 1
+            continue
+        if not found_output:
+            i = i + 1
+            continue
+        # actually process code
+        if within_form is None:
+            # lets see if we are now in one
+            within_form = is_line_start_of_form(lines[i])
+            if within_form is not None:
+                line_num_in_form = 0
+                if has_form_ended(form_paren_stack, lines[i]):
+                    within_form = None
+                    form_paren_stack = []
+        elif within_form is not None:
+            # check if the form has ended by counting parens
+            if has_form_ended(form_paren_stack, lines[i]):
+                within_form = None
+                form_paren_stack = []
+                line_num_in_form = 0
+            else:
+                line_num_in_form = line_num_in_form + 1
+
+        if tline.startswith(";") or tline.startswith("#|"):
+            # treat decomp deviation blocks as essentially comments as well, so include them in a block comment if appropriate
+            # this is done because there is nothing to match them against (if a comment is inside them for example)
+            # so we have to copy them in full
+            in_deviation_block = False
+            if "decomp deviation" in tline.lower() or tline.startswith("#|"):
+                in_deviation_block = True
+            current_comment = CommentMeta()
+            current_comment.line_in_file = i
+            current_comment.data = lines[i]
+            (
+                current_comment.symbol_before,
+                current_comment.symbol_padding_before,
+            ) = backtrack_for_symbol(lines, i)
+            (
+                current_comment.code_before,
+                current_comment.code_padding_before,
+            ) = backtrack_for_code(lines, i)
+            current_comment.containing_form = within_form
+            if within_form is not None:
+                append_form_metadata(current_comment, within_form)
+            current_comment.line_num_in_form = line_num_in_form
+            current_comment.inline = False
+            # look ahead to handle block comments
+            if i + 1 < len(lines):
+                next_line = lines[i + 1]
+            if "decomp deviation" in next_line.lower() or next_line.startswith("|#"):
+                in_deviation_block = False
+            while i + 1 < len(lines) and (
+                in_deviation_block
+                or next_line.lstrip().startswith(";")
+                or next_line.lstrip().startswith("|#")
+            ):
+                debug_lines.append(lines[i + 1])
+                i = i + 1
+                current_comment.data = current_comment.data + next_line
+                if i + 1 < len(lines):
+                    next_line = lines[i + 1]
+                if "decomp deviation" in next_line.lower() or next_line.startswith(
+                    "|#"
+                ):
+                    in_deviation_block = False
+            (
+                current_comment.symbol_after,
+                current_comment.symbol_padding_after,
+            ) = lookahead_for_symbol(lines, i)
+            (
+                current_comment.code_after,
+                current_comment.code_padding_after,
+            ) = lookahead_for_code(lines, i)
+            comments.append(current_comment)
+            debug_lines.append(
+                ";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
+                    current_comment.symbol_before,
+                    current_comment.symbol_padding_before,
+                    current_comment.symbol_after,
+                    current_comment.symbol_padding_after,
+                    debug_nice_formatted_code(current_comment.code_before),
+                    current_comment.code_padding_before,
+                    debug_nice_formatted_code(current_comment.code_after),
+                    current_comment.code_padding_after,
+                )
+            )
+            debug_lines.append(
+                ";; [DEBUG]: in_form - {}...:{}\n".format(
+                    debug_nice_formatted_code(current_comment.containing_form),
+                    current_comment.line_num_in_form,
+                )
+            )
+        # inline comments
+        # TODO - cleanup duplication
+        elif ";" in tline:
+            current_comment = CommentMeta()
+            current_comment.line_in_file = i
+            current_comment.data = ";" + tline.partition(";")[2]
+            (
+                current_comment.symbol_before,
+                current_comment.symbol_padding_before,
+            ) = backtrack_for_symbol(lines, i)
+            (
+                current_comment.symbol_after,
+                current_comment.symbol_padding_after,
+            ) = lookahead_for_symbol(lines, i)
+            (
+                current_comment.code_before,
+                current_comment.code_padding_before,
+            ) = backtrack_for_code(lines, i)
+            (
+                current_comment.code_after,
+                current_comment.code_padding_after,
+            ) = lookahead_for_code(lines, i)
+            current_comment.containing_form = within_form
+            if within_form is not None:
+                append_form_metadata(current_comment, within_form)
+            current_comment.line_num_in_form = line_num_in_form
+            current_comment.symbol_inline = symbol_on_line(tline)
+            current_comment.inline = True
+            current_comment.code_in_line = tline.partition(";")[0]
+            comments.append(current_comment)
+            debug_lines.append(
+                ";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
+                    current_comment.symbol_before,
+                    current_comment.symbol_padding_before,
+                    current_comment.symbol_after,
+                    current_comment.symbol_padding_after,
+                    debug_nice_formatted_code(current_comment.code_before),
+                    current_comment.code_padding_before,
+                    debug_nice_formatted_code(current_comment.code_after),
+                    current_comment.code_padding_after,
+                )
+            )
+            debug_lines.append(
+                ";; [DEBUG]: in_form - {}...:{} || inline_code - {}...\n".format(
+                    debug_nice_formatted_code(current_comment.containing_form),
+                    current_comment.line_num_in_form,
+                    debug_nice_formatted_code(current_comment.code_in_line),
+                )
+            )
+        i = i + 1
+    return comments, debug_lines
+
+
+def get_symbol_at_line(line):
+    tline = line.strip()
+    matches = re.search(
+        r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
+    )
+    if matches is not None:
+        return matches.group(1)
+    return None
+
+
+def relevant_symbol_comments_for_line_before(line):
+    symbol = get_symbol_at_line(line)
+    if symbol is None:
+        return []
+    # Loop through comments, finding any that match the symbol
+    # they WILL be placed, so we can remove them from our list now
+    i = 0
+    relevant_comments = []
+    while i < len(comments):
+        comment = comments[i]
+        if comment.symbol_after == symbol:
+            relevant_comments.append(comment)
+            comments.pop(i)
+        else:
+            i = i + 1
+    return relevant_comments
+
+
+def padding_before_comment(comment):
+    if comment.containing_form is None:
+        if comment.symbol_after is not None:
+            return "\n" * comment.symbol_padding_after
+    return ""
+
+
+def relevant_symbol_comments_for_inline(line):
+    symbol = get_symbol_at_line(line)
+    if symbol is None:
+        return []
+    # Loop through comments, finding any that match the symbol
+    # they WILL be placed, so we can remove them from our list now
+    i = 0
+    relevant_comments = []
+    while i < len(comments):
+        comment = comments[i]
+        if comment.symbol_inline == symbol:
+            relevant_comments.append(comment)
+            comments.pop(i)
+        else:
+            i = i + 1
+    return relevant_comments
+
+
+def padding_after_comment(comment):
+    if comment.containing_form is None:
+        if comment.symbol_before is not None:
+            return "\n" * comment.symbol_padding_before
+    return ""
+
+
+def relevant_symbol_comments_for_line_after(line):
+    symbol = get_symbol_at_line(line)
+    if symbol is None:
+        return []
+    # Loop through comments, finding any that match the symbol
+    # they WILL be placed, so we can remove them from our list now
+    i = 0
+    relevant_comments = []
+    while i < len(comments):
+        comment = comments[i]
+        # if we can, we prefer to put comments before not after (more accurate re-creation)
+        if comment.symbol_after is None and comment.symbol_before == symbol:
+            relevant_comments.append(comment)
+            comments.pop(i)
+        else:
+            i = i + 1
+    return relevant_comments
+
+
+# the first half of the defmethod/etc lines (before arg list) is less likely to change
+# so we want to split it to weight it more heavily
+def split_def_line(line):
+    first_part = ""
+    second_part = ""
+    for index, char in enumerate(line):
+        if char == "(":
+            if index == 0:
+                first_part = first_part + char
+            else:
+                second_part = second_part + char
+            continue
+        else:
+            if second_part != "":
+                second_part = second_part + char
+            else:
+                first_part = first_part + char
+    return first_part, second_part
+
+
+def get_form_metadata(form_def_line):
+    func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_def_line)
+    if func_matches is not None:
+        return "function", func_matches.group(1), None
+    behavior_matches = re.search(r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_def_line)
+    if behavior_matches is not None:
+        return "behavior", behavior_matches.group(1), behavior_matches.group(2)
+    method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_def_line)
+    if method_matches is not None:
+        return "method", method_matches.group(1), method_matches.group(2)
+    return "unknown", None, None
+
+
+built_in_method_names = [
+    "new",
+    "delete",
+    "print",
+    "inspect",
+    "length",
+    "asize-of",
+    "copy",
+    "relocate",
+    "memusage",
+]
+
+
+def different_method_names(form_func_name, comment_form_func_name):
+    if (
+        comment_form_func_name not in built_in_method_names
+        and form_func_name not in built_in_method_names
+    ):
+        return False
+    return form_func_name != comment_form_func_name
+
+
+def get_relevant_form_comments(form_def_line):
+    form_kind, form_func_name, form_type = get_form_metadata(form_def_line)
+    code_def_part, code_rest = split_def_line(form_def_line)
+    relevant_comments = []
+    i = 0
+    while i < len(comments):
+        comment = comments[i]
+        if comment.containing_form is None:
+            i = i + 1
+            continue
+        (
+            comment_form_kind,
+            comment_form_func_name,
+            comment_form_type,
+        ) = get_form_metadata(comment.containing_form)
+        # First disqualify the form if it's obviously unrelated
+        if comment_form_kind != "unknown":
+            if form_kind != comment_form_kind:
+                i = i + 1
+                continue
+            elif form_kind == "function" and comment_form_func_name != form_func_name:
+                i = i + 1
+                continue
+            elif form_kind == "behavior" and comment_form_func_name != form_func_name:
+                i = i + 1
+                continue
+            elif form_kind == "method" and (
+                comment_form_type != form_type
+                or different_method_names(form_func_name, comment_form_func_name)
+            ):
+                i = i + 1
+                continue
+        # Evaluate it's score (comments and current def line)
+        def_part, rest = split_def_line(comment.containing_form)
+        def_score = fuzz.ratio(code_def_part, def_part) * 0.65
+        if def_score == 65.0 and form_kind != "unknown":
+            relevant_comments.append(comment)
+            comments.pop(i)
+            continue
+        rest_score = fuzz.ratio(code_rest, rest) * 0.35
+        combined_score = def_score + rest_score
+        threshold = 50.0
+        if combined_score < threshold:
+            i = i + 1
+            continue
+        # Now, let's look at ALL other def lines yet to come from the decomp output
+        # if any are a better match, don't add the comment yet -- we'll add it when we get there!
+        # TODO - remove lines from the list as we find them so speed this up
+        found_better_form = False
+        for decomp_def_line in decomp_form_def_lines:
+            line_form_kind, line_form_func_name, line_form_type = get_form_metadata(
+                decomp_def_line
+            )
+            if form_kind != "unknown":
+                if form_kind != line_form_kind:
+                    continue
+                elif form_kind == "function" and line_form_func_name != form_func_name:
+                    continue
+                elif form_kind == "behavior" and line_form_func_name != form_func_name:
+                    continue
+                elif form_kind == "method" and (
+                    line_form_type != form_type
+                    or different_method_names(form_func_name, line_form_func_name)
+                ):
+                    continue
+                def_part, rest = split_def_line(decomp_def_line)
+                def_score = fuzz.ratio(code_def_part, def_part) * 0.65
+                if def_score == 65.0 and form_kind != "unknown":
+                    found_better_form = True
+                    break
+                rest_score = fuzz.ratio(code_rest, rest) * 0.35
+                if combined_score < def_score + rest_score:
+                    found_better_form = True
+                    break
+            # TODO otherwise? still test?
+        if found_better_form:
+            i = i + 1
+            continue
+        relevant_comments.append(comment)
+        comments.pop(i)
+    return relevant_comments
+
+
+# Simple fuzz ratio, but removes obvious outliers like empty lines / lines with only a paren
+def score_alg(line1, line2):
+    tline1 = line1.strip()
+    tline2 = line2.strip()
+    if tline1 == "" or tline1 == ")" or tline1 == "(":
+        return -1
+    if tline2 == "" or tline2 == ")" or tline2 == "(":
+        return -1
+    return fuzz.ratio(tline1, tline2)
+
+
+# TODO - improvement on comparison - a higher score on a longer line == better? some sort of weighting approach here too?
+def merge_retained_code_and_new_code(gsrc_path, decomp_lines, final_lines):
+    decomp_started = False
+    with open(gsrc_path) as f:
+        lines = f.readlines()
+        within_form = None
+        line_num_in_form = None
+        form_paren_stack = []
+        for line in lines:
+            if "[DEBUG]" in line:
+                continue
+            if line.lower().lstrip().startswith(";; decomp begins"):
+                decomp_started = True
+                final_lines.append(line)
+                break
+            if not decomp_started:
+                final_lines.append(line)
+                continue
+
+        i = 0
+        while i < len(decomp_lines):
+            line = decomp_lines[i]
+            # Otherwise, its a part of the output we have to be more careful about
+            # For every line in the decompiled output, we scan our comment list to see if anything matches
+            # if it does, we insert it appropriately and remove the comment from the list
+            #
+            # This is the main source of inefficiency, but the process gets progressively faster as comments are eliminated
+            if within_form is None:
+                # lets see if we are now in one
+                within_form = is_line_start_of_form(line)
+                # TODO - check line for symbol matches?
+                if within_form is not None:
+                    line_num_in_form = 0
+                    if has_form_ended(form_paren_stack, line):
+                        within_form = None
+                        form_paren_stack = []
+                    else:
+                        # Get all of the lines of the form at once
+                        form_start = decomp_lines[i]
+                        form_lines = [form_start]
+                        while i < len(decomp_lines):
+                            i = i + 1
+                            line = decomp_lines[i]
+                            if has_form_ended(form_paren_stack, line):
+                                within_form = None
+                                form_paren_stack = []
+                                break
+                            else:
+                                form_lines.append(line)
+                        # Add any comments needed to the form contents
+                        # - first we get all comments that have match well with the form's start line (ie. defmethod ....)
+                        form_comments = get_relevant_form_comments(form_start)
+                        # - for each comment, let's find which line matches it the best,
+                        # if NONE exceed the threshold (if both match the same, pick the first), we default to the line offset
+                        for comment in form_comments:
+                            highest_score = -1
+                            index_to_insert = -1
+                            threshold = 50.0
+                            place_kind = None
+                            for index, form_line in enumerate(form_lines):
+                                # skip any comments that were previously added
+                                if form_line.lstrip().startswith(";"):
+                                    continue
+                                if comment.code_in_line is not None:
+                                    score = score_alg(form_line, comment.code_in_line)
+                                    if score >= threshold and score > highest_score:
+                                        index_to_insert = index
+                                        highest_score = score
+                                        place_kind = "inline"
+                                if comment.code_before is not None:
+                                    score = score_alg(form_line, comment.code_before)
+                                    if score >= threshold and score > highest_score:
+                                        index_to_insert = index
+                                        highest_score = score
+                                        place_comment_after = True
+                                        place_kind = "next_line"
+                                if comment.code_after is not None:
+                                    score = score_alg(form_line, comment.code_after)
+                                    if score >= threshold and score > highest_score:
+                                        index_to_insert = index
+                                        highest_score = score
+                                        place_comment_after = False
+                                        place_kind = "before_line"
+                            # add the comment!
+                            if index_to_insert == -1:
+                                if comment.inline:
+                                    form_lines[comment.line_num_in_form] = (
+                                        form_lines[index_to_insert].rstrip()
+                                        + " "
+                                        + comment.data
+                                    )
+                                else:
+                                    form_lines.insert(
+                                        comment.line_num_in_form, comment.data
+                                    )
+                            elif comment.inline:
+                                form_index = index_to_insert
+                                if place_kind == "next_line":
+                                    form_index = index_to_insert + 1
+                                form_lines[form_index] = (
+                                    form_lines[form_index].rstrip() + " " + comment.data
+                                )
+                            elif place_kind == "next_line":
+                                form_lines.insert(
+                                    index_to_insert + 1,
+                                    padding_before_comment(comment) + comment.data,
+                                )
+                            else:
+                                form_lines.insert(
+                                    index_to_insert,
+                                    padding_after_comment(comment) + comment.data,
+                                )
+                        # Add the lines to the final output
+                        for form_line in form_lines:
+                            final_lines.append(form_line)
+
+            # Otherwise, we are at the top-level!
+            if within_form is None:
+                before_comments = relevant_symbol_comments_for_line_before(line)
+                for comment in before_comments:
+                    final_lines.append(padding_before_comment(comment) + comment.data)
+                inline_comments = relevant_symbol_comments_for_inline(line)
+                if len(inline_comments) > 0:
+                    comment_str = ""
+                    for comment in inline_comments:
+                        comment_str = comment_str + comment.data.strip()
+                    comment_str = comment_str.replace(";", "")
+                    final_lines.append(
+                        "{} ;; {}".format(line.rstrip(), comment_str.strip())
+                    )
+                else:
+                    final_lines.append(line)
+                after_comments = relevant_symbol_comments_for_line_after(line)
+                for comment in after_comments:
+                    final_lines.append(padding_after_comment(comment) + comment.data)
+            # next line
+            i = i + 1
+
+
+def handle_dangling_blocks(comments, final_lines, debug_lines):
+    for comment in comments:
+        within_form = None
+        line_num_in_form = None
+        form_paren_stack = []
+        found_output = True
+        i = 0
+        index_to_insert = -1
+        highest_score = -1
+        place_comment_after = True
+        threshold = 50.0
+        while i < len(final_lines):
+            debug_lines.append(final_lines[i])
+            tline = final_lines[i].lstrip()
+            if "decomp begins" in tline.lower():
+                found_output = True
+                i = i + 1
+                continue
+            if not found_output:
+                i = i + 1
+                continue
+            line = final_lines[i]
+            # We can try to claw back a bit of efficiency by skipping the inside of forms
+            if within_form is not None:
+                # check if the form has ended by counting parens
+                if has_form_ended(form_paren_stack, line):
+                    if comment.code_in_line is not None:
+                        score = score_alg(line, comment.code_in_line)
+                        if score >= threshold and score > highest_score:
+                            index_to_insert = i
+                            highest_score = score
+                    if comment.code_before is not None:
+                        score = score_alg(line, comment.code_before)
+                        if score >= threshold and score > highest_score:
+                            index_to_insert = i
+                            highest_score = score
+                            place_comment_after = True
+                    if comment.code_after is not None:
+                        score = score_alg(line, comment.code_after)
+                        if score >= threshold and score > highest_score:
+                            index_to_insert = i
+                            highest_score = score
+                            place_comment_after = False
+                    within_form = None
+                    form_paren_stack = []
+                    line_num_in_form = 0
+                else:
+                    line_num_in_form = line_num_in_form + 1
+            else:
+                # lets see if we are now in a form
+                within_form = is_line_start_of_form(line)
+                if within_form is not None:
+                    if comment.code_in_line is not None:
+                        score = score_alg(line, comment.code_in_line)
+                        if score >= threshold and score > highest_score:
+                            index_to_insert = i
+                            highest_score = score
+                    if comment.code_before is not None:
+                        score = score_alg(line, comment.code_before)
+                        if score >= threshold and score > highest_score:
+                            index_to_insert = i
+                            highest_score = score
+                            place_comment_after = True
+                    if comment.code_after is not None:
+                        score = score_alg(line, comment.code_after)
+                        if score >= threshold and score > highest_score:
+                            index_to_insert = i
+                            highest_score = score
+                            place_comment_after = False
+                    line_num_in_form = 0
+                else:
+                    # just normal code, check it
+                    if comment.code_in_line is not None:
+                        score = score_alg(line, comment.code_in_line)
+                        if score >= threshold and score > highest_score:
+                            index_to_insert = i
+                            highest_score = score
+                    if comment.code_before is not None:
+                        score = score_alg(line, comment.code_before)
+                        if score >= threshold and score > highest_score:
+                            index_to_insert = i
+                            highest_score = score
+                            place_comment_after = True
+                    if comment.code_after is not None:
+                        score = score_alg(line, comment.code_after)
+                        if score >= threshold and score > highest_score:
+                            index_to_insert = i
+                            highest_score = score
+                            place_comment_after = False
+            i = i + 1
+        # end of while loop
+        # add the comment!
+        if index_to_insert == -1:
+            if comment.inline:
+                final_lines[comment.line_in_file] = (
+                    final_lines[comment.line_in_file].rstrip() + " " + comment.data
+                )
+            else:
+                final_lines.insert(comment.line_in_file, comment.data)
+        elif comment.inline:
+            final_lines[index_to_insert] = (
+                final_lines[index_to_insert].rstrip() + " " + comment.data
+            )
+        elif place_comment_after:
+            final_lines.insert(
+                index_to_insert + 1,
+                padding_before_comment(comment) + comment.data,
+            )
+        else:
+            final_lines.insert(
+                index_to_insert,
+                padding_after_comment(comment) + comment.data,
+            )
@@ -0,0 +1,33 @@
+from utils import get_gsrc_path_from_filename, get_ref_path_from_filename, get_file_list
+import os
+
+# TODO - hard-coded to jak 2
+
+# Get all the gsrc files, if they aren't empty -- log if they aren't added to the reference tests as well
+file_list = get_file_list("jak2")
+
+# TODO - function for getting just the names
+missing_files = []
+for file in file_list:
+  file_name = ""
+  if file[2] != 3:
+    continue
+  else:
+    file_name = file[0]
+
+  # check gsrc
+  gsrc_path = get_gsrc_path_from_filename("jak2", file_name)
+  if gsrc_path:
+    gsrc_length = 0
+    with open(gsrc_path, 'r') as fp:
+      gsrc_length = len(fp.readlines())
+
+    if gsrc_length > 15:
+      if file_name == "enemy-h":
+        print(file_name)
+      # check if ref exists
+      ref_path = get_ref_path_from_filename("jak2", file_name, "./test/decompiler/reference/")
+      if not os.path.exists(ref_path):
+        missing_files.append(file_name)
+
+print(missing_files)
@@ -0,0 +1,159 @@
+import re
+import argparse
+from utils import get_gsrc_path_from_filename
+from colorama import just_fix_windows_console, Fore, Back, Style
+
+just_fix_windows_console()
+
+parser = argparse.ArgumentParser("lint-gsrc-file")
+parser.add_argument("--game", help="The name of the game", type=str)
+parser.add_argument("--file", help="The name of the file", type=str)
+args = parser.parse_args()
+
+
+class LintMatch:
+    def __init__(self, src_path, offending_lineno, context):
+        self.src_path = src_path
+        self.offending_lineno = offending_lineno
+        self.context = context
+
+    def __str__(self):
+        output = (
+            Style.BRIGHT
+            + Fore.MAGENTA
+            + "@ {}:{}\n".format(self.src_path, self.offending_lineno)
+            + Fore.RESET
+            + Style.RESET_ALL
+        )
+        for line in self.context:
+            # skip lines that are just brackets
+            if line.strip() == ")" or line.strip() == "(":
+                continue
+            output += "\t{}\n".format(line)
+        return output
+
+
+class LinterRule:
+    def __init__(self, level, rule_name, regex_pattern, context_size):
+        self.level = level
+        self.rule_name = rule_name
+        self.regex_pattern = regex_pattern
+        self.context_size = context_size
+        self.matches = []
+
+    def __str__(self):
+        level_color = Fore.LIGHTBLUE_EX
+        if self.level == "WARN":
+            level_color = Fore.YELLOW
+        elif self.level == "ERROR":
+            level_color = Fore.RED
+        return (
+            level_color
+            + "[{}]{} - {} - {}/{}/g".format(
+                self.level,
+                Fore.RESET,
+                level_color + self.rule_name + Fore.RESET,
+                Fore.CYAN,
+                self.regex_pattern.pattern,
+            )
+            + Fore.RESET
+            + ":"
+        )
+
+
+# Construct all rules
+linter_rules = []
+# Infos
+# Warnings
+linter_rules.append(
+    LinterRule("WARN", "method_splits", re.compile("method-of-(?:type|object)"), 3)
+)
+linter_rules.append(
+    LinterRule("WARN", "func_splits", re.compile("\(t9-\d+(?:\s+[^\s]+\s*)?\)"), 3)
+)
+linter_rules.append(
+    LinterRule("WARN", "missing_arg", re.compile("local-vars.*[at].*\s+none\)"), 1)
+)
+# Errors
+linter_rules.append(LinterRule("ERROR", "missing_res_tag", re.compile(".pcpyud"), 1))
+linter_rules.append(LinterRule("ERROR", "decomp_error", re.compile(";; ERROR"), 1))
+linter_rules.append(
+    LinterRule(
+        "ERROR", "casting_stack_var", re.compile("the-as\s+[^\s]*\s+.*\(new 'stack"), 2
+    )
+)
+
+src_path = get_gsrc_path_from_filename(args.game, args.file)
+
+# Iterate through the file line by line, check against each rule
+# if the rule is violated (it matches) then we append the match with useful details
+
+print("Linting GOAL_SRC File...")
+
+
+def get_context(lines, match_span, idx, amount_inclusive):
+    lines_grabbed = []
+    # Strip left pad, while maintaining indent
+    last_line_indent_width = -1
+    last_line_indent = -1
+    while len(lines_grabbed) < amount_inclusive and len(lines) > idx + len(
+        lines_grabbed
+    ):
+        # TODO - first line, colorize the match
+        # if len(lines_grabbed) == 0:
+        #   line = lines[idx + len(lines_grabbed)]
+        #   line = line[:match_span[0]] + Back.RED + line[:match_span[1]] + Back.RESET + line[match_span[1]:]
+        #   line = line.rstrip()
+        line = lines[idx + len(lines_grabbed)].rstrip()
+        indent_width = len(line) - len(line.lstrip())
+        if last_line_indent_width == -1:
+            lines_grabbed.append(line.lstrip())
+        elif last_line_indent == -1:
+            # calculate the difference
+            indent_diff = indent_width - last_line_indent_width
+            last_line_indent = indent_diff
+            stripped_line = line.lstrip()
+            lines_grabbed.append(stripped_line.rjust(indent_diff + len(stripped_line)))
+        else:
+            stripped_line = line.lstrip()
+            lines_grabbed.append(
+                stripped_line.rjust(last_line_indent + len(stripped_line))
+            )
+        last_line_indent_width = indent_width
+    return lines_grabbed
+
+
+with open(src_path) as f:
+    src_lines = f.readlines()
+    for lineno, line in enumerate(src_lines):
+        adjusted_lineno = lineno + 1
+        for rule in linter_rules:
+            match = rule.regex_pattern.search(line)
+            if match:
+                rule.matches.append(
+                    LintMatch(
+                        src_path,
+                        adjusted_lineno,
+                        get_context(src_lines, match.span(), lineno, rule.context_size),
+                    )
+                )
+
+# Iterate through all our linter rules, printing nicely in groups with the
+# context surrounding the match
+#
+# If we find any violations at warning or above, we will ultimately return exit(1)
+throw_error = False
+for rule in linter_rules:
+    # Iterate through violations
+    if len(rule.matches) > 0:
+        print(rule)
+        for match in rule.matches:
+            if rule.level == "ERROR" or rule.level == "WARN":
+                throw_error = True
+            print(match)
+
+if throw_error:
+    print(Fore.RED + "Found potential problems, exiting with code 1!" + Fore.RESET)
+    exit(1)
+else:
+    print(Fore.GREEN + "Looks good!" + Fore.RESET)
@@ -1 +1,3 @@
 rapidfuzz
+GitPython
+colorama
@@ -0,0 +1,38 @@
+# Creates the `*.gd` files that go in ./goal_src/<game>/dgos
+# Takes input from the `dgo.txt` file that is generated by the decompiler
+# Run with all inputs enabled to get all the info!
+
+# example - python .\scripts\gsrc\skeleton_creation\generate_dgo_files.py --game jak2 --dgotxt .\decompiler_out\jak2\dgo.txt
+
+import argparse
+
+parser = argparse.ArgumentParser("generate_dgo_files")
+parser.add_argument("--game", help="The name of the game", type=str)
+parser.add_argument("--dgotxt", help="Path to the dgo.txt file", type=str)
+args = parser.parse_args()
+
+# Read in the dgo.txt file
+with open(args.dgotxt, "r") as f:
+  lines = f.readlines()[2:] # skip the first two lines, assumed to be a comment header and an empty line
+  # OpenGOAL still doesn't have a data serialization/deserialization format
+  # so read line by line, assuming each DGO is seperated by an empty line
+  current_dgo_name = None
+  current_dgo_lines = []
+  for line in lines:
+    if line.strip() == "":
+      # Write out contents to the .gd file
+      if current_dgo_name is not None:
+        path = "./goal_src/{}/dgos/{}".format(args.game, current_dgo_name)
+        print("writing to {}".format(path))
+        with open(path, "w") as wf:
+          wf.writelines(current_dgo_lines)
+      current_dgo_name = None
+      current_dgo_lines = []
+      continue
+    if ".CGO" in line or ".DGO" in line:
+      print("found one! - {}".format(line.strip()))
+      # figure out the name
+      current_dgo_name = line.replace("(", "").replace("\"", "").strip().lower().replace(".dgo", ".gd").replace(".cgo", ".gd")
+      print(current_dgo_name)
+    if current_dgo_name is not None:
+      current_dgo_lines.append(line)
@@ -0,0 +1,36 @@
+# Generates the `(cgo-file...` lines for the game.gp file
+# Attempts to put DGOs in the correct order based on the file order in `all_objs`
+
+import json
+
+common_deps = '("$OUT/obj/cty-guard-turret-button.o")'
+
+ignored_dgos = ["ENGINE", "KERNEL", "ART", "COMMON", "GAME", "NO-XGO"]
+
+dgos_encountered = set()
+dgos_handled = set()
+
+jak2_files = None
+with open("./goal_src/jak2/build/all_objs.json", "r") as f:
+  jak2_files = json.load(f)
+
+# Enumerate the files, order is dictated by code files (version 3)
+# At the end we will fill in any dgos that weren't considerd "required"
+lines = []
+for file in jak2_files:
+  file_name = file[0]
+  version = file[2]
+  dgo_list = file[3]
+  for dgo in dgo_list:
+    dgos_encountered.add(dgo)
+  if version == 3:
+    dgo = dgo_list[0]
+    if dgo.lower() not in dgos_handled and dgo not in ignored_dgos:
+      dgos_handled.add(dgo.lower())
+      lines.append('(cgo-file "{}.gd" {})'.format(dgo.lower(), common_deps))
+for dgo in dgos_encountered:
+  if dgo.lower() not in dgos_handled and dgo not in ignored_dgos:
+    lines.append('(cgo-file "{}.gd" {})'.format(dgo.lower(), common_deps))
+
+for line in lines:
+  print(line)
@@ -36,41 +36,22 @@
 # - there are likely ways to make this more efficient

 import argparse
-import re
-from rapidfuzz import fuzz
+import os
+from code_retention.all_types_retention import update_alltypes_named_blocks
 from utils import get_gsrc_path_from_filename
-
-# TODO - rename and refactor all usages, it's not _always_ a comment anymore!
-# RetainedCode or something
-class CommentMeta:
-    def __init__(self):
-        self.data = ""
-        self.symbol_before = None
-        self.symbol_inline = None
-        self.symbol_after = None
-        self.symbol_padding_before = None
-        self.symbol_padding_after = None
-        # NOTE - maybe holding more than just 1 line before/after might help?
-        self.code_before = None
-        self.code_after = None
-        self.code_padding_before = None
-        self.code_padding_after = None
-        self.line_num_in_form = None  # None == top level
-        self.containing_form = None  # none - top level
-        self.containing_form_kind = None  # function|method|behaviour
-        self.containing_form_func_name = None  # or the method/behaviour
-        self.containing_form_type = None
-        self.inline = False
-        self.code_in_line = None  # only for inline comments
-        self.line_in_file = None  # a worst-case scenario fallback
-
-    def __str__(self):
-        return "{}:{}:{}".format(self.data, self.symbol_before, self.symbol_after)
-
+from code_retention.code_retention import *
+import shutil
+from pathlib import Path
+import subprocess

 parser = argparse.ArgumentParser("update-from-decomp")
 parser.add_argument("--game", help="The name of the game", type=str)
 parser.add_argument("--file", help="The name of the file", type=str)
+parser.add_argument(
+    "--preserve",
+    help="Attempt to preserve comments and marked blocks",
+    action="store_true",
+)
 parser.add_argument(
    "--debug", help="Output debug metadata on every block", action="store_true"
 )
@@ -81,150 +62,14 @@ args = parser.parse_args()

 gsrc_path = get_gsrc_path_from_filename(args.game, args.file)

+# Step 1 - Find and update all named blocks from all-types (useful for enums)
+update_alltypes_named_blocks(args.game)
+
 comments = []
 debug_lines = []
-
-
-def debug_nice_formatted_code(val):
-    if val is None:
-        return None
-    return val.strip()[0:20]
-
-
-# returns (symbol | None, padding)
-def backtrack_for_symbol(lines, index):
-    padding = 0
-    for i in range(index - 1, 0, -1):
-        tline = lines[i].strip()
-        matches = re.search(
-            r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
-        )
-        if matches is not None:
-            return matches.group(1), padding
-        elif (
-            not tline.strip() == "" and not tline.strip().startswith(";")
-        ) or "decomp begins" in tline.lower():
-            # we hit a non empty line (but it wasn't a symbol!)
-            return None, padding
-        elif tline.strip() == "":
-            padding = padding + 1
-    return None, padding
-
-
-def symbol_on_line(line):
-    tline = line.lstrip()
-    matches = re.search(
-        r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
-    )
-    if matches is not None:
-        return matches.group(1)
-    return None
-
-
-def lookahead_for_symbol(lines, index):
-    padding = 0
-    for i in range(index + 1, len(lines), 1):
-        tline = lines[i].lstrip()
-        matches = re.search(
-            r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
-        )
-        if matches is not None:
-            return matches.group(1), padding
-        elif (
-            not tline.strip() == "" and not tline.strip().startswith(";")
-        ) or "decomp begins" in tline.lower():
-            # we hit a non empty line (but it wasn't a symbol!)
-            return None, padding
-        elif tline.strip() == "":
-            padding = padding + 1
-    return None, padding
-
-
-def backtrack_for_code(lines, index):
-    padding = 0
-    for i in range(index - 1, 0, -1):
-        line = lines[i]
-        if line.strip() == "":
-            padding = padding + 1
-            continue
-        elif "decomp begins" in line.lower():
-            return None, padding
-        elif line.lstrip().startswith(";"):
-            continue
-        return line, padding
-
-
-def lookahead_for_code(lines, index):
-    padding = 0
-    for i in range(index + 1, len(lines), 1):
-        line = lines[i]
-        if line.strip() == "":
-            padding = padding + 1
-            continue
-        elif "decomp begins" in line.lower():
-            return None, padding
-        elif line.lstrip().startswith(";"):
-            continue
-        return line, padding
-
-
-# returns form, or none
-def is_line_start_of_form(line):
-    if line.rstrip().startswith(";"):
-        return None
-    matches = re.search(r"\(\s*([^\s.]*)\s+", line)
-    if matches is not None:
-        return line
-    return None
-
-
-def has_form_ended(stack, line):
-    # if the stack is empty, return true
-    line_before_comment = line.partition(";")[0]
-    for char in line_before_comment:
-        if char == "(":
-            stack.append(char)
-        elif char == ")":
-            if len(stack) == 0:
-                # unbalanced parens?
-                return True
-            stack.pop()
-            if len(stack) == 0:
-                return True
-        else:
-            if len(stack) == 0:
-                return True
-    return False
-
-
-def append_form_metadata(comment, form_start_line):
-    func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_start_line)
-    if func_matches is not None:
-        comment.containing_form_kind = "function"
-        comment.containing_form_func_name = func_matches.group(1)
-        comment.containing_form_type = None
-        return
-    behavior_matches = re.search(
-        r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_start_line
-    )
-    if behavior_matches is not None:
-        comment.containing_form_kind = "behavior"
-        comment.containing_form_func_name = behavior_matches.group(1)
-        comment.containing_form_type = behavior_matches.group(2)
-        return
-    method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_start_line)
-    if method_matches is not None:
-        comment.containing_form_kind = "method"
-        comment.containing_form_func_name = method_matches.group(1)
-        comment.containing_form_type = method_matches.group(2)
-        return
-    comment.containing_form_kind = "unknown"
-    comment.containing_form_func_name = None
-    comment.containing_form_type = None
-
-
-decomp_ignore_forms = []
+decomp_ignore_forms = ["defmethod inspect"]
 decomp_ignore_errors = False
+update_with_merge = False

 with open(gsrc_path) as f:
    lines_temp = f.readlines()
@@ -238,161 +83,27 @@ with open(gsrc_path) as f:
            decomp_ignore_errors = True
        if "og:ignore-form" in line:
            decomp_ignore_forms.append(line.partition("ignore-form:")[2].strip())
+        if "og:update-with-merge" in line:
+            update_with_merge = True
        lines.append(line)
-    # track if we are inside a define*/defun/defmethod/deftype/defstate
-    within_form = None
-    line_num_in_form = None
-    form_paren_stack = []
-    found_output = False
-    i = 0
-    while i < len(lines):
-        debug_lines.append(lines[i])
-        tline = lines[i].lstrip()
-        if "decomp begins" in tline.lower():
-            found_output = True
-            i = i + 1
-            continue
-        if not found_output:
-            i = i + 1
-            continue
-        # actually process code
-        if within_form is None:
-            # lets see if we are now in one
-            within_form = is_line_start_of_form(lines[i])
-            if within_form is not None:
-                line_num_in_form = 0
-                if has_form_ended(form_paren_stack, lines[i]):
-                    within_form = None
-                    form_paren_stack = []
-        elif within_form is not None:
-            # check if the form has ended by counting parens
-            if has_form_ended(form_paren_stack, lines[i]):
-                within_form = None
-                form_paren_stack = []
-                line_num_in_form = 0
-            else:
-                line_num_in_form = line_num_in_form + 1
-
-        if tline.startswith(";") or tline.startswith("#|"):
-            # treat decomp deviation blocks as essentially comments as well, so include them in a block comment if appropriate
-            # this is done because there is nothing to match them against (if a comment is inside them for example)
-            # so we have to copy them in full
-            in_deviation_block = False
-            if "decomp deviation" in tline.lower() or tline.startswith("#|"):
-                in_deviation_block = True
-            current_comment = CommentMeta()
-            current_comment.line_in_file = i
-            current_comment.data = lines[i]
-            (
-                current_comment.symbol_before,
-                current_comment.symbol_padding_before,
-            ) = backtrack_for_symbol(lines, i)
-            (
-                current_comment.code_before,
-                current_comment.code_padding_before,
-            ) = backtrack_for_code(lines, i)
-            current_comment.containing_form = within_form
-            if within_form is not None:
-                append_form_metadata(current_comment, within_form)
-            current_comment.line_num_in_form = line_num_in_form
-            current_comment.inline = False
-            # look ahead to handle block comments
-            if i + 1 < len(lines):
-                next_line = lines[i + 1]
-            if "decomp deviation" in next_line.lower() or next_line.startswith("|#"):
-                in_deviation_block = False
-            while i + 1 < len(lines) and (
-                in_deviation_block
-                or next_line.lstrip().startswith(";")
-                or next_line.lstrip().startswith("|#")
-            ):
-                debug_lines.append(lines[i + 1])
-                i = i + 1
-                current_comment.data = current_comment.data + next_line
-                if i + 1 < len(lines):
-                    next_line = lines[i + 1]
-                if "decomp deviation" in next_line.lower() or next_line.startswith(
-                    "|#"
-                ):
-                    in_deviation_block = False
-            (
-                current_comment.symbol_after,
-                current_comment.symbol_padding_after,
-            ) = lookahead_for_symbol(lines, i)
-            (
-                current_comment.code_after,
-                current_comment.code_padding_after,
-            ) = lookahead_for_code(lines, i)
-            comments.append(current_comment)
-            debug_lines.append(
-                ";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
-                    current_comment.symbol_before,
-                    current_comment.symbol_padding_before,
-                    current_comment.symbol_after,
-                    current_comment.symbol_padding_after,
-                    debug_nice_formatted_code(current_comment.code_before),
-                    current_comment.code_padding_before,
-                    debug_nice_formatted_code(current_comment.code_after),
-                    current_comment.code_padding_after,
-                )
-            )
-            debug_lines.append(
-                ";; [DEBUG]: in_form - {}...:{}\n".format(
-                    debug_nice_formatted_code(current_comment.containing_form),
-                    current_comment.line_num_in_form,
-                )
-            )
-        # inline comments
-        # TODO - cleanup duplication
-        elif ";" in tline:
-            current_comment = CommentMeta()
-            current_comment.line_in_file = i
-            current_comment.data = ";" + tline.partition(";")[2]
-            (
-                current_comment.symbol_before,
-                current_comment.symbol_padding_before,
-            ) = backtrack_for_symbol(lines, i)
-            (
-                current_comment.symbol_after,
-                current_comment.symbol_padding_after,
-            ) = lookahead_for_symbol(lines, i)
-            (
-                current_comment.code_before,
-                current_comment.code_padding_before,
-            ) = backtrack_for_code(lines, i)
-            (
-                current_comment.code_after,
-                current_comment.code_padding_after,
-            ) = lookahead_for_code(lines, i)
-            current_comment.containing_form = within_form
-            if within_form is not None:
-                append_form_metadata(current_comment, within_form)
-            current_comment.line_num_in_form = line_num_in_form
-            current_comment.symbol_inline = symbol_on_line(tline)
-            current_comment.inline = True
-            current_comment.code_in_line = tline.partition(";")[0]
-            comments.append(current_comment)
-            debug_lines.append(
-                ";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
-                    current_comment.symbol_before,
-                    current_comment.symbol_padding_before,
-                    current_comment.symbol_after,
-                    current_comment.symbol_padding_after,
-                    debug_nice_formatted_code(current_comment.code_before),
-                    current_comment.code_padding_before,
-                    debug_nice_formatted_code(current_comment.code_after),
-                    current_comment.code_padding_after,
-                )
-            )
-            debug_lines.append(
-                ";; [DEBUG]: in_form - {}...:{} || inline_code - {}...\n".format(
-                    debug_nice_formatted_code(current_comment.containing_form),
-                    current_comment.line_num_in_form,
-                    debug_nice_formatted_code(current_comment.code_in_line),
-                )
-            )
-        i = i + 1
+    if args.preserve:
+        comments, debug_lines = process_original_lines(lines)

+# If we are going to `update_with_merge` then make a backup of the file, and
+# an empty file to use as the common ancestor.
+#
+# This means that all changes will be flagged as a conflict and will not be able to be
+# merged into the repo without being explicitly resolved
+if update_with_merge:
+    subprocess.run(
+        [
+            "git",
+            "restore",
+            gsrc_path
+        ]
+    )
+    shutil.copyfile(gsrc_path, gsrc_path.replace(".gc", ".before.gc"))
+    Path(gsrc_path.replace(".gc", ".empty.gc")).touch()

 if args.debug:
    with open(gsrc_path, "w") as f:
@@ -409,6 +120,8 @@ lines_to_ignore = [
    ";; failed to figure",
    ";; Used lq/sq",
    ";; this part is debug only",
+    ";; WARN: Return type mismatch int vs none",
+    ";; WARN: Stack slot offset",
 ]

 if decomp_ignore_errors:
@@ -428,7 +141,8 @@ def should_ignore_line(line):
    return False


-# TODO - check for existance probably
+# TODO - ignore brackets inside strings!
+
 decomp_file_path = "./decompiler_out/{}/{}_disasm.gc".format(args.game, args.file)
 with open(decomp_file_path) as f:
    lines = f.readlines()
@@ -459,10 +173,10 @@ with open(decomp_file_path) as f:
                if not skip_form:
                    decomp_form_def_lines.append(decomp_within_form)
                    decomp_lines.append(line)
-                while i < len(lines):
+                while i + 1 < len(lines):
                    i = i + 1
                    line = lines[i]
-                    if not skip_form:
+                    if not skip_form and not should_ignore_line(line):
                        decomp_lines.append(line)
                    if has_form_ended(decomp_form_paren_stack, line):
                        decomp_within_form = None
@@ -476,492 +190,64 @@ with open(decomp_file_path) as f:

 # Step 3: Start merging the new code + comments
 final_lines = []
-decomp_started = False
-
-
-def get_symbol_at_line(line):
-    tline = line.strip()
-    matches = re.search(
-        r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
-    )
-    if matches is not None:
-        return matches.group(1)
-    return None
-
-
-def relevant_symbol_comments_for_line_before(line):
-    symbol = get_symbol_at_line(line)
-    if symbol is None:
-        return []
-    # Loop through comments, finding any that match the symbol
-    # they WILL be placed, so we can remove them from our list now
-    i = 0
-    relevant_comments = []
-    while i < len(comments):
-        comment = comments[i]
-        if comment.symbol_after == symbol:
-            relevant_comments.append(comment)
-            comments.pop(i)
-        else:
-            i = i + 1
-    return relevant_comments
-
-
-def padding_before_comment(comment):
-    if comment.containing_form is None:
-        if comment.symbol_after is not None:
-            return "\n" * comment.symbol_padding_after
-    return ""
-
-
-def relevant_symbol_comments_for_inline(line):
-    symbol = get_symbol_at_line(line)
-    if symbol is None:
-        return []
-    # Loop through comments, finding any that match the symbol
-    # they WILL be placed, so we can remove them from our list now
-    i = 0
-    relevant_comments = []
-    while i < len(comments):
-        comment = comments[i]
-        if comment.symbol_inline == symbol:
-            relevant_comments.append(comment)
-            comments.pop(i)
-        else:
-            i = i + 1
-    return relevant_comments
-
-
-def padding_after_comment(comment):
-    if comment.containing_form is None:
-        if comment.symbol_before is not None:
-            return "\n" * comment.symbol_padding_before
-    return ""
-
-
-def relevant_symbol_comments_for_line_after(line):
-    symbol = get_symbol_at_line(line)
-    if symbol is None:
-        return []
-    # Loop through comments, finding any that match the symbol
-    # they WILL be placed, so we can remove them from our list now
-    i = 0
-    relevant_comments = []
-    while i < len(comments):
-        comment = comments[i]
-        # if we can, we prefer to put comments before not after (more accurate re-creation)
-        if comment.symbol_after is None and comment.symbol_before == symbol:
-            relevant_comments.append(comment)
-            comments.pop(i)
-        else:
-            i = i + 1
-    return relevant_comments
-
-
-# the first half of the defmethod/etc lines (before arg list) is less likely to change
-# so we want to split it to weight it more heavily
-def split_def_line(line):
-    first_part = ""
-    second_part = ""
-    for index, char in enumerate(line):
-        if char == "(":
-            if index == 0:
-                first_part = first_part + char
-            else:
-                second_part = second_part + char
-            continue
-        else:
-            if second_part != "":
-                second_part = second_part + char
-            else:
-                first_part = first_part + char
-    return first_part, second_part
-
-
-def get_form_metadata(form_def_line):
-    func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_def_line)
-    if func_matches is not None:
-        return "function", func_matches.group(1), None
-    behavior_matches = re.search(r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_def_line)
-    if behavior_matches is not None:
-        return "behavior", behavior_matches.group(1), behavior_matches.group(2)
-    method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_def_line)
-    if method_matches is not None:
-        return "method", method_matches.group(1), method_matches.group(2)
-    return "unknown", None, None
-
-
-built_in_method_names = [
-    "new",
-    "delete",
-    "print",
-    "inspect",
-    "length",
-    "asize-of",
-    "copy",
-    "relocate",
-    "memusage",
-]
-
-
-def different_method_names(form_func_name, comment_form_func_name):
-    if (
-        comment_form_func_name not in built_in_method_names
-        and form_func_name not in built_in_method_names
-    ):
-        return False
-    return form_func_name != comment_form_func_name
-
-
-def get_relevant_form_comments(form_def_line):
-    form_kind, form_func_name, form_type = get_form_metadata(form_def_line)
-    code_def_part, code_rest = split_def_line(form_def_line)
-    relevant_comments = []
-    i = 0
-    while i < len(comments):
-        comment = comments[i]
-        if comment.containing_form is None:
-            i = i + 1
-            continue
-        (
-            comment_form_kind,
-            comment_form_func_name,
-            comment_form_type,
-        ) = get_form_metadata(comment.containing_form)
-        # First disqualify the form if it's obviously unrelated
-        if comment_form_kind != "unknown":
-            if form_kind != comment_form_kind:
-                i = i + 1
-                continue
-            elif form_kind == "function" and comment_form_func_name != form_func_name:
-                i = i + 1
-                continue
-            elif form_kind == "behavior" and comment_form_func_name != form_func_name:
-                i = i + 1
-                continue
-            elif form_kind == "method" and (
-                comment_form_type != form_type
-                or different_method_names(form_func_name, comment_form_func_name)
-            ):
-                i = i + 1
-                continue
-        # Evaluate it's score (comments and current def line)
-        def_part, rest = split_def_line(comment.containing_form)
-        def_score = fuzz.ratio(code_def_part, def_part) * 0.65
-        if def_score == 65.0 and form_kind != "unknown":
-            relevant_comments.append(comment)
-            comments.pop(i)
-            continue
-        rest_score = fuzz.ratio(code_rest, rest) * 0.35
-        combined_score = def_score + rest_score
-        threshold = 50.0
-        if combined_score < threshold:
-            i = i + 1
-            continue
-        # Now, let's look at ALL other def lines yet to come from the decomp output
-        # if any are a better match, don't add the comment yet -- we'll add it when we get there!
-        # TODO - remove lines from the list as we find them so speed this up
-        found_better_form = False
-        for decomp_def_line in decomp_form_def_lines:
-            line_form_kind, line_form_func_name, line_form_type = get_form_metadata(
-                decomp_def_line
-            )
-            if form_kind != "unknown":
-                if form_kind != line_form_kind:
-                    continue
-                elif form_kind == "function" and line_form_func_name != form_func_name:
-                    continue
-                elif form_kind == "behavior" and line_form_func_name != form_func_name:
-                    continue
-                elif form_kind == "method" and (
-                    line_form_type != form_type
-                    or different_method_names(form_func_name, line_form_func_name)
-                ):
-                    continue
-                def_part, rest = split_def_line(decomp_def_line)
-                def_score = fuzz.ratio(code_def_part, def_part) * 0.65
-                if def_score == 65.0 and form_kind != "unknown":
-                    found_better_form = True
-                    break
-                rest_score = fuzz.ratio(code_rest, rest) * 0.35
-                if combined_score < def_score + rest_score:
-                    found_better_form = True
-                    break
-            # TODO otherwise? still test?
-        if found_better_form:
-            i = i + 1
-            continue
-        relevant_comments.append(comment)
-        comments.pop(i)
-    return relevant_comments
-
-
-# Simple fuzz ratio, but removes obvious outliers like empty lines / lines with only a paren
-def score_alg(line1, line2):
-    tline1 = line1.strip()
-    tline2 = line2.strip()
-    if tline1 == "" or tline1 == ")" or tline1 == "(":
-        return -1
-    if tline2 == "" or tline2 == ")" or tline2 == "(":
-        return -1
-    return fuzz.ratio(tline1, tline2)
-
-
-# TODO - improvement on comparison - a higher score on a longer line == better? some sort of weighting approach here too?
-
-with open(gsrc_path) as f:
-    lines = f.readlines()
-    within_form = None
-    line_num_in_form = None
-    form_paren_stack = []
-    for line in lines:
-        if "[DEBUG]" in line:
-            continue
-        if line.lower().lstrip().startswith(";; decomp begins"):
-            decomp_started = True
+if args.preserve:
+    merge_retained_code_and_new_code(gsrc_path, decomp_lines, final_lines)
+else:
+    with open(gsrc_path) as f:
+        lines = f.readlines()
+        for line in lines:
            final_lines.append(line)
-            break
-        if not decomp_started:
+            if line.lower().startswith(";; decomp begins"):
+                break
+        for line in decomp_lines:
            final_lines.append(line)
-            continue
-
-    i = 0
-    while i < len(decomp_lines):
-        line = decomp_lines[i]
-        # Otherwise, its a part of the output we have to be more careful about
-        # For every line in the decompiled output, we scan our comment list to see if anything matches
-        # if it does, we insert it appropriately and remove the comment from the list
-        #
-        # This is the main source of inefficiency, but the process gets progressively faster as comments are eliminated
-        if within_form is None:
-            # lets see if we are now in one
-            within_form = is_line_start_of_form(line)
-            # TODO - check line for symbol matches?
-            if within_form is not None:
-                line_num_in_form = 0
-                if has_form_ended(form_paren_stack, line):
-                    within_form = None
-                    form_paren_stack = []
-                else:
-                    # Get all of the lines of the form at once
-                    form_start = decomp_lines[i]
-                    form_lines = [form_start]
-                    while i < len(decomp_lines):
-                        i = i + 1
-                        line = decomp_lines[i]
-                        if has_form_ended(form_paren_stack, line):
-                            within_form = None
-                            form_paren_stack = []
-                            break
-                        else:
-                            form_lines.append(line)
-                    # Add any comments needed to the form contents
-                    # - first we get all comments that have match well with the form's start line (ie. defmethod ....)
-                    form_comments = get_relevant_form_comments(form_start)
-                    # - for each comment, let's find which line matches it the best,
-                    # if NONE exceed the threshold (if both match the same, pick the first), we default to the line offset
-                    for comment in form_comments:
-                        highest_score = -1
-                        index_to_insert = -1
-                        threshold = 50.0
-                        place_kind = None
-                        for index, form_line in enumerate(form_lines):
-                            # skip any comments that were previously added
-                            if form_line.lstrip().startswith(";"):
-                                continue
-                            if comment.code_in_line is not None:
-                                score = score_alg(form_line, comment.code_in_line)
-                                if score >= threshold and score > highest_score:
-                                    index_to_insert = index
-                                    highest_score = score
-                                    place_kind = "inline"
-                            if comment.code_before is not None:
-                                score = score_alg(form_line, comment.code_before)
-                                if score >= threshold and score > highest_score:
-                                    index_to_insert = index
-                                    highest_score = score
-                                    place_comment_after = True
-                                    place_kind = "next_line"
-                            if comment.code_after is not None:
-                                score = score_alg(form_line, comment.code_after)
-                                if score >= threshold and score > highest_score:
-                                    index_to_insert = index
-                                    highest_score = score
-                                    place_comment_after = False
-                                    place_kind = "before_line"
-                        # add the comment!
-                        if index_to_insert == -1:
-                            if comment.inline:
-                                form_lines[comment.line_num_in_form] = (
-                                    form_lines[index_to_insert].rstrip()
-                                    + " "
-                                    + comment.data
-                                )
-                            else:
-                                form_lines.insert(
-                                    comment.line_num_in_form, comment.data
-                                )
-                        elif comment.inline:
-                            form_index = index_to_insert
-                            if place_kind == "next_line":
-                                form_index = index_to_insert + 1
-                            form_lines[form_index] = (
-                                form_lines[form_index].rstrip() + " " + comment.data
-                            )
-                        elif place_kind == "next_line":
-                            form_lines.insert(
-                                index_to_insert + 1,
-                                padding_before_comment(comment) + comment.data,
-                            )
-                        else:
-                            form_lines.insert(
-                                index_to_insert,
-                                padding_after_comment(comment) + comment.data,
-                            )
-                    # Add the lines to the final output
-                    for form_line in form_lines:
-                        final_lines.append(form_line)
-
-        # Otherwise, we are at the top-level!
-        if within_form is None:
-            before_comments = relevant_symbol_comments_for_line_before(line)
-            for comment in before_comments:
-                final_lines.append(padding_before_comment(comment) + comment.data)
-            inline_comments = relevant_symbol_comments_for_inline(line)
-            if len(inline_comments) > 0:
-                comment_str = ""
-                for comment in inline_comments:
-                    comment_str = comment_str + comment.data.strip()
-                comment_str = comment_str.replace(";", "")
-                final_lines.append(
-                    "{} ;; {}".format(line.rstrip(), comment_str.strip())
-                )
-            else:
-                final_lines.append(line)
-            after_comments = relevant_symbol_comments_for_line_after(line)
-            for comment in after_comments:
-                final_lines.append(padding_after_comment(comment) + comment.data)
-        # next line
-        i = i + 1

 # Step 3.b: Handle any remaining top level comments
 # If we can't find a code line that meets a threshold, default to their line number
 # - Why is this done after: if a comment is associated with nothing but code, we have no
 #   guarantee where it should go, so we have to wait until all code is populated
 # This is SUPER inefficient, so hopefully we've processed nearly all comments by this point
-for comment in comments:
-    within_form = None
-    line_num_in_form = None
-    form_paren_stack = []
-    found_output = True
-    i = 0
-    index_to_insert = -1
-    highest_score = -1
-    place_comment_after = True
-    threshold = 50.0
-    while i < len(final_lines):
-        debug_lines.append(final_lines[i])
-        tline = final_lines[i].lstrip()
-        if "decomp begins" in tline.lower():
-            found_output = True
-            i = i + 1
-            continue
-        if not found_output:
-            i = i + 1
-            continue
-        line = final_lines[i]
-        # We can try to claw back a bit of efficiency by skipping the inside of forms
-        if within_form is not None:
-            # check if the form has ended by counting parens
-            if has_form_ended(form_paren_stack, line):
-                if comment.code_in_line is not None:
-                    score = score_alg(line, comment.code_in_line)
-                    if score >= threshold and score > highest_score:
-                        index_to_insert = i
-                        highest_score = score
-                if comment.code_before is not None:
-                    score = score_alg(line, comment.code_before)
-                    if score >= threshold and score > highest_score:
-                        index_to_insert = i
-                        highest_score = score
-                        place_comment_after = True
-                if comment.code_after is not None:
-                    score = score_alg(line, comment.code_after)
-                    if score >= threshold and score > highest_score:
-                        index_to_insert = i
-                        highest_score = score
-                        place_comment_after = False
-                within_form = None
-                form_paren_stack = []
-                line_num_in_form = 0
-            else:
-                line_num_in_form = line_num_in_form + 1
-        else:
-            # lets see if we are now in a form
-            within_form = is_line_start_of_form(line)
-            if within_form is not None:
-                if comment.code_in_line is not None:
-                    score = score_alg(line, comment.code_in_line)
-                    if score >= threshold and score > highest_score:
-                        index_to_insert = i
-                        highest_score = score
-                if comment.code_before is not None:
-                    score = score_alg(line, comment.code_before)
-                    if score >= threshold and score > highest_score:
-                        index_to_insert = i
-                        highest_score = score
-                        place_comment_after = True
-                if comment.code_after is not None:
-                    score = score_alg(line, comment.code_after)
-                    if score >= threshold and score > highest_score:
-                        index_to_insert = i
-                        highest_score = score
-                        place_comment_after = False
-                line_num_in_form = 0
-            else:
-                # just normal code, check it
-                if comment.code_in_line is not None:
-                    score = score_alg(line, comment.code_in_line)
-                    if score >= threshold and score > highest_score:
-                        index_to_insert = i
-                        highest_score = score
-                if comment.code_before is not None:
-                    score = score_alg(line, comment.code_before)
-                    if score >= threshold and score > highest_score:
-                        index_to_insert = i
-                        highest_score = score
-                        place_comment_after = True
-                if comment.code_after is not None:
-                    score = score_alg(line, comment.code_after)
-                    if score >= threshold and score > highest_score:
-                        index_to_insert = i
-                        highest_score = score
-                        place_comment_after = False
-        i = i + 1
-    # end of while loop
-    # add the comment!
-    if index_to_insert == -1:
-        if comment.inline:
-            final_lines[comment.line_in_file] = (
-                final_lines[comment.line_in_file].rstrip() + " " + comment.data
-            )
-        else:
-            final_lines.insert(comment.line_in_file, comment.data)
-    elif comment.inline:
-        final_lines[index_to_insert] = (
-            final_lines[index_to_insert].rstrip() + " " + comment.data
-        )
-    elif place_comment_after:
-        final_lines.insert(
-            index_to_insert + 1,
-            padding_before_comment(comment) + comment.data,
-        )
-    else:
-        final_lines.insert(
-            index_to_insert,
-            padding_after_comment(comment) + comment.data,
-        )
+if args.preserve:
+    handle_dangling_blocks(comments, final_lines, debug_lines)

-# Step 4: Write it out
+# Step 4.a: Remove excessive new-lines from the end of the output, only leave a single empty new-line
+lines_to_ignore = 0
+i = len(final_lines) - 1
+while i > 0 and (final_lines[i] == "\n" or final_lines[i] == "0\n"):
+    lines_to_ignore = lines_to_ignore + 1
+    i = i - 1
+
+print("ignoring - {}".format(lines_to_ignore))
+
+# Step 4.b: Write it out
 with open(gsrc_path, "w") as f:
-    f.writelines(final_lines)
+    i = 0
+    while i + lines_to_ignore < len(final_lines):
+        f.write(final_lines[i])
+        i = i + 1
+
+# If we need to merge, now is the time!
+if update_with_merge:
+    shutil.move(gsrc_path, gsrc_path.replace(".gc", ".after.gc"))
+    shutil.move(gsrc_path.replace(".gc", ".before.gc"), gsrc_path)
+    subprocess.run(
+        [
+            "git",
+            "merge-file",
+            gsrc_path,
+            gsrc_path.replace(".gc", ".empty.gc"),
+            gsrc_path.replace(".gc", ".after.gc"),
+            "-L",
+            "Before Updating",
+            "-L",
+            "ignored",
+            "-L",
+            "After Updating",
+        ]
+    )
+    if os.path.exists(gsrc_path.replace(".gc", ".empty.gc")):
+        os.remove(gsrc_path.replace(".gc", ".empty.gc"))
+    if os.path.exists(gsrc_path.replace(".gc", ".before.gc")):
+        os.remove(gsrc_path.replace(".gc", ".before.gc"))
+    if os.path.exists(gsrc_path.replace(".gc", ".after.gc")):
+        os.remove(gsrc_path.replace(".gc", ".after.gc"))
@@ -0,0 +1,64 @@
+# Updates files in gsrc if they are modified in the reference test folder
+# Uses git
+import subprocess
+from git import Repo
+
+repo = Repo("./")
+
+import argparse
+import os
+import glob
+
+parser = argparse.ArgumentParser("update-gsrc-via-refs")
+parser.add_argument("--game", help="The name of the game", type=str)
+parser.add_argument("--decompiler", help="The path to the decompiler", type=str)
+parser.add_argument("--decompiler_config", help="The decomp config", type=str)
+parser.add_argument("--file_pattern", help="Provide a glob pattern to find files, instead of using git status. Relative to the reference test folder", type=str)
+args = parser.parse_args()
+
+def get_files_via_git():
+    file_names = set()
+    for item in repo.index.diff(None):
+        path = item.b_rawpath.decode("utf-8")
+        if args.game in path and "_REF" in path:
+            file_names.add(os.path.basename(path).replace("_REF.gc", ""))
+
+    for item in repo.untracked_files:
+        path = item
+        if args.game in path and "_REF" in path:
+            file_names.add(os.path.basename(path).replace("_REF.gc", ""))
+    return file_names
+
+def get_files_via_glob():
+    file_names = set()
+    for file in glob.glob("./test/decompiler/reference/{}/{}".format(args.game, args.file_pattern), recursive=True):
+        file_names.add(os.path.basename(file).replace("_REF.gc", ""))
+    return file_names
+
+# Get a list of changed files, as well as new files
+file_names = []
+if args.file_pattern:
+    file_names = get_files_via_glob()
+else:
+    file_names = get_files_via_git()
+
+for file_name in file_names:
+    print("Decompiling - {}".format(file_name))
+    # Decompile file
+    subprocess.run(
+        [
+            args.decompiler,
+            "./decompiler/config/{}".format(args.decompiler_config),
+            "./iso_data",
+            "./decompiler_out",
+            "--config-override",
+            '{{"allowed_objects": ["{}"]}}'.format(file_name),
+        ]
+    )
+    print("Updating - {}".format(file_name))
+    # Update gsrc
+    os.system(
+        "python ./scripts/gsrc/update-from-decomp.py --game {} --file {}".format(
+            args.game, file_name
+        )
+    )
@@ -29,3 +29,24 @@ def get_gsrc_path_from_filename(game_name, file_name):
    print("{} couldn't find in /goal_src/{}!".format(file_name, game_name))
    exit(1)
  return path
+
+def get_alltypes_path_from_game(game_name):
+  if game_name == "jak1":
+    return "./decompiler/config/all-types.gc"
+  else:
+    return "./decompiler/config/jak2/all-types.gc"
+
+def get_ref_path_from_filename(game_name, file_name, ref_folder):
+  file_list = get_file_list(game_name)
+  src_path = ""
+  for f in file_list:
+    if f[2] != 3:
+      continue
+    if f[0] == file_name:
+      src_path = f[4]
+      break
+  if src_path == "":
+    print("couldn't determine ref path for {}:{}!".format(game_name, file_name))
+    exit(1)
+  path = os.path.join(ref_folder, game_name, src_path, "{}_REF.gc".format(file_name))
+  return path
@@ -3,4 +3,4 @@
 # Directory of this script
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"

-"${DIR}"/../../build/decompiler/decompiler "${DIR}"/../../decompiler/config/jak1_ntsc_black_label.jsonc "${DIR}"/../../iso_data "${DIR}"/../../decompiler_out
+ "${DIR}"/../../build/decompiler/decompiler "${DIR}"/../../decompiler/config/jak1_ntsc_black_label.jsonc "${DIR}"/../../iso_data "${DIR}"/../../decompiler_out
@@ -5,6 +5,7 @@ vars:
  GK_BIN_RELEASE_DIR: './build/game'
  DECOMP_BIN_RELEASE_DIR: './build/decompiler'
  MEMDUMP_BIN_RELEASE_DIR: './build/tools'
+  TYPESEARCH_BIN_RELEASE_DIR: './build/tools'
  OFFLINETEST_BIN_RELEASE_DIR: './build'
  GOALCTEST_BIN_RELEASE_DIR: './build'
  EXE_FILE_EXTENSION: ''
@@ -5,6 +5,7 @@ vars:
  GK_BIN_RELEASE_DIR: './build/game'
  DECOMP_BIN_RELEASE_DIR: './build/decompiler'
  MEMDUMP_BIN_RELEASE_DIR: './build/tools'
+  TYPESEARCH_BIN_RELEASE_DIR: './build/tools'
  OFFLINETEST_BIN_RELEASE_DIR: './build'
  GOALCTEST_BIN_RELEASE_DIR: './build'
  EXE_FILE_EXTENSION: ''
@@ -5,6 +5,7 @@ vars:
  GK_BIN_RELEASE_DIR: './out/build/Release/bin'
  DECOMP_BIN_RELEASE_DIR: './out/build/Release/bin'
  MEMDUMP_BIN_RELEASE_DIR: './out/build/Release/bin'
+  TYPESEARCH_BIN_RELEASE_DIR: './out/build/Release/bin'
  OFFLINETEST_BIN_RELEASE_DIR: './out/build/Release/bin'
  GOALCTEST_BIN_RELEASE_DIR: './out/build/Release/bin'
  EXE_FILE_EXTENSION: '.exe'
@@ -0,0 +1,14 @@
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument("--game")
+args = parser.parse_args()
+import os
+import glob
+
+def delete_extension(ext):
+  fileList = glob.glob('./decompiler_out/{}/*.{}'.format(args.game, ext))
+  for filePath in fileList:
+    os.remove(filePath)
+
+delete_extension("gc")
+delete_extension("asm")
@@ -0,0 +1,10 @@
+import argparse
+import zipfile
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--file")
+parser.add_argument("--out")
+args = parser.parse_args()
+
+with zipfile.ZipFile(args.file, 'r') as p2s:
+  p2s.extractall(args.out)
@@ -3,32 +3,43 @@ import glob
 import argparse
 import shutil

+from gsrc.utils import get_ref_path_from_filename
+
 ## Script to update failing _REF.gc files
 ## Instructions:
-##  run offline-test with the `--dump-mode` flag set. This generates a "failures" folder.
+##  run offline-test with the `--dump_current_output` flag set. This generates a "failures" folder.
 ## update reference like this
-##    python3 ../scripts/update_decomp_reference.py ./failures ../test/decompiler/reference
+##    python3 ../scripts/update_decomp_reference.py ./failures ../test/decompiler/reference --game [jak1|jak2]

-def get_goal_files(root_dir):
-	return [f for file in os.walk(root_dir) for f in glob.glob(os.path.join(file[0], '*.gc'))]
+def get_failures(root_dir):
+    return [
+        f
+        for file in os.walk(root_dir)
+        for f in glob.glob(os.path.join(file[0], "*.gc"))
+    ]
+
+# removesuffix only added in python 3.9....
+def removesuffix(self: str, suffix: str, /) -> str:
+    if self.endswith(suffix):
+        return self[:-len(suffix)]
+    else:
+        return self[:]

 def main():
    parser = argparse.ArgumentParser()
-    parser.add_argument(dest='diff', help='the failures folder')
-    parser.add_argument(dest='reference', help='the test/decompiler/reference folder')
+    parser.add_argument(dest="diff", help="the failures folder")
+    parser.add_argument(dest="reference", help="the test/decompiler/reference folder")
+    parser.add_argument("--game", help="The name of the game (jak1/jak2)", type=str)
    args = parser.parse_args()

-    location_map = {os.path.basename(x) : x for x in get_goal_files(args.reference)}
-
-    for replacement in get_goal_files(args.diff):
-    	base = os.path.basename(replacement)
-    	if base not in location_map:
-    		print("Could not find file {}".format(base))
-    		exit(-1)
-    	print("replace {} with {}".format(location_map[base], replacement))
-    	shutil.copyfile(replacement, location_map[base])
+    for replacement in get_failures(args.diff):
+        obj_name = removesuffix(os.path.basename(replacement), ".gc").replace("_REF", "")

+        # Find gsrc path, given game-name
+        ref_path = get_ref_path_from_filename(args.game, obj_name, args.reference)

+        print("replace {} with {}".format(ref_path, replacement))
+        shutil.copyfile(replacement, ref_path)

 if __name__ == "__main__":
    main()