mirror of
https://github.com/open-goal/jak-project
synced 2026-06-24 01:41:29 -04:00
Merge remote-tracking branch 'open-goal/master' into v/translations
This commit is contained in:
@@ -0,0 +1,4 @@
|
||||
@echo off
|
||||
cd ..\..
|
||||
out\build\Release\bin\goalc --auto-lt --user-auto
|
||||
pause
|
||||
@@ -1,4 +0,0 @@
|
||||
@echo off
|
||||
cd ..\..
|
||||
out\build\Release\bin\goalc --user-auto
|
||||
pause
|
||||
@@ -1,4 +1,4 @@
|
||||
@echo off
|
||||
cd ..\..
|
||||
out\build\Release\bin\goalc --auto-lt --user-auto
|
||||
out\build\Release\bin\goalc --user-auto
|
||||
pause
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
@echo off
|
||||
cd ..\..
|
||||
out\build\Release\bin\goalc --user-auto --game jak2
|
||||
pause
|
||||
@@ -0,0 +1,4 @@
|
||||
@echo off
|
||||
cd ..\..
|
||||
out\build\Release\bin\gk -boot -fakeiso -debug -v -jak2
|
||||
pause
|
||||
@@ -0,0 +1,4 @@
|
||||
@echo off
|
||||
cd ..\..
|
||||
out\build\Release\bin\gk -fakeiso -debug -v -jak2
|
||||
pause
|
||||
@@ -1,2 +1,2 @@
|
||||
cd ..\..
|
||||
git update-index --assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc
|
||||
git update-index --assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc decompiler\config\jak2_ntsc_v1.jsonc
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
cd ..\..
|
||||
git update-index --no-assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc
|
||||
git update-index --no-assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc decompiler\config\jak2_ntsc_v1.jsonc
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
@echo off
|
||||
cd ..\..
|
||||
out\build\Release\bin\offline-test -d --iso_data_path iso_data\jak1\ --game jak1
|
||||
scripts\update_decomp_reference.py failures\ test\decompiler\reference\
|
||||
python3 scripts\update_decomp_reference.py failures\ test\decompiler\reference\ --game jak1
|
||||
RMDIR /Q/S failures
|
||||
pause
|
||||
@@ -0,0 +1,6 @@
|
||||
@echo off
|
||||
cd ..\..
|
||||
out\build\Release\bin\offline-test -d --iso_data_path iso_data\jak2\ --game jak2
|
||||
python3 scripts\update_decomp_reference.py failures\ test\decompiler\reference\ --game jak2
|
||||
RMDIR /Q/S failures
|
||||
pause
|
||||
@@ -0,0 +1,4 @@
|
||||
@echo off
|
||||
cd ..\..
|
||||
out\build\Release\bin\goalc-test --gtest_filter="Jak2TypeConsistency.TypeConsistency"
|
||||
pause
|
||||
@@ -0,0 +1,4 @@
|
||||
@echo off
|
||||
cd ..\..
|
||||
python3 scripts\gsrc\update-gsrc-via-refs.py --game jak2 --decompiler out\build\Release\bin\decompiler.exe --decompiler_config .\decompiler\config\jak2_ntsc_v1.jsonc
|
||||
pause
|
||||
@@ -0,0 +1,15 @@
|
||||
import os
|
||||
import glob
|
||||
|
||||
def get_goal_files(root_dir, ext = "*.gc"):
|
||||
"""Get all GOAL source files under root_dir."""
|
||||
return [goal_file for file in os.walk(root_dir) for goal_file in glob.glob(os.path.join(file[0], ext))]
|
||||
|
||||
all_files = get_goal_files("./decompiler_out/jak2/import")
|
||||
result = ""
|
||||
for file in all_files:
|
||||
with open(file) as f:
|
||||
for line in f:
|
||||
if line.startswith("(def"):
|
||||
result += line
|
||||
print(result)
|
||||
@@ -4,7 +4,7 @@ import argparse
|
||||
|
||||
|
||||
### Script to track decompilation progress.
|
||||
### Example usage: python3 scripts/decomp_progress.py ~/jak-project/goal_src
|
||||
### Example usage: python3 scripts/decomp_progress.py ~/jak-project/goal_src/jak2
|
||||
|
||||
def get_goal_files(root_dir, ext = "*.gc"):
|
||||
"""Get all GOAL source files under root_dir."""
|
||||
@@ -29,7 +29,7 @@ def print_table(stats, total_gc_files):
|
||||
print("-------------------------------------")
|
||||
print("| {: <24} | {: >6} |".format("TOTAL", total_lines))
|
||||
print("-------------------------------------")
|
||||
estimated_lines = 500000
|
||||
estimated_lines = 1000000
|
||||
print("Progress: {}/{} lines ({:.2f}%)".format(total_lines, estimated_lines, 100. * total_lines / estimated_lines))
|
||||
print("{}/{} files modified from template ({:.2f}%)".format(len(stats), total_gc_files,
|
||||
100. * len(stats) / total_gc_files))
|
||||
@@ -41,8 +41,7 @@ def main():
|
||||
args = parser.parse_args()
|
||||
all_files = get_goal_files(args.goal_src)
|
||||
|
||||
ref_files = get_goal_files(args.goal_src + "/../test/", "*_REF.gc")
|
||||
|
||||
ref_files = get_goal_files(args.goal_src + "/../../test/decompiler/reference/jak2", "*_REF.gc")
|
||||
ref_files_no_ext = [os.path.basename(fn)[:-7] for fn in ref_files]
|
||||
|
||||
|
||||
@@ -62,7 +61,7 @@ def main():
|
||||
|
||||
total_gc_files += 1
|
||||
|
||||
if line_count == 7 or short_name in excluded_files:
|
||||
if line_count < 10 or short_name in excluded_files:
|
||||
# the template has 7 lines, just skip it.
|
||||
continue
|
||||
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--file")
|
||||
args = parser.parse_args()
|
||||
|
||||
import re
|
||||
labels_with_no_type = []
|
||||
|
||||
file_path = "decompiler_out/jak1/{}_disasm.gc".format(args.file)
|
||||
with open(file_path) as f:
|
||||
# Find all
|
||||
content = f.readlines()
|
||||
for line in content:
|
||||
labels_with_no_type = labels_with_no_type + re.findall(r'L\d+', line)
|
||||
|
||||
# dedup list
|
||||
labels_with_no_type = list(dict.fromkeys(labels_with_no_type))
|
||||
|
||||
# let's go try to identify the types from the IR2 file if we can
|
||||
label_lines = []
|
||||
file_path = "decompiler_out/jak1/{}_ir2.asm".format(args.file)
|
||||
with open(file_path) as f:
|
||||
# Find all
|
||||
content = f.readlines()
|
||||
prev_line = ""
|
||||
next_label_will_be_lambda = False
|
||||
for i, line in enumerate(content):
|
||||
if ".function (anon-function" in line:
|
||||
next_label_will_be_lambda = True
|
||||
if line.startswith("L"):
|
||||
for label in labels_with_no_type:
|
||||
if line.startswith("{}:".format(label)):
|
||||
# If we were expecting a lambda
|
||||
if next_label_will_be_lambda:
|
||||
label_lines.append("[\"{}\", \"_lambda_\", true]".format(label))
|
||||
labels_with_no_type.remove(label)
|
||||
next_label_will_be_lambda = False
|
||||
break
|
||||
# special case for pairs
|
||||
if "(offset 2)" in line:
|
||||
label_lines.append("[\"{}\", \"pair\", true]".format(label))
|
||||
labels_with_no_type.remove(label)
|
||||
break
|
||||
# Check if the previous line has a `.type`
|
||||
prev_line = content[i-1]
|
||||
if ".type" in prev_line:
|
||||
the_type = prev_line.split(".type ")[1].strip()
|
||||
label_lines.append("[\"{}\", \"{}\", true]".format(label, the_type))
|
||||
labels_with_no_type.remove(label)
|
||||
break
|
||||
|
||||
# Print out the labels
|
||||
print("Here are the labels I couldn't find a type for:")
|
||||
for label in labels_with_no_type:
|
||||
print("- {}".format(label))
|
||||
print("And here are the ones I could:")
|
||||
print(",\n".join(label_lines))
|
||||
@@ -0,0 +1,25 @@
|
||||
# Merge tools use specific algorithms or assumptions to detect conflicts
|
||||
# and not all of them will obviously flag them, even if they use the standard format
|
||||
#
|
||||
# So this is to ensure no conflict markers get ignored in goal_src atleast
|
||||
import os
|
||||
|
||||
files_with_unresolved_conflicts = []
|
||||
|
||||
for dirpath, subdirs, files in os.walk("./goal_src"):
|
||||
for filename in files:
|
||||
# Get the file contents
|
||||
with open(os.path.join(dirpath, filename), "r") as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
if "<<<<<<<" in line:
|
||||
files_with_unresolved_conflicts.append(os.path.join(dirpath, filename))
|
||||
break
|
||||
|
||||
if len(files_with_unresolved_conflicts) == 0:
|
||||
exit(0)
|
||||
|
||||
print("There are unresolved conflicts in ./goal_src/")
|
||||
for file in files_with_unresolved_conflicts:
|
||||
print(file)
|
||||
exit(1)
|
||||
@@ -1,67 +0,0 @@
|
||||
import re
|
||||
from jak1_file_list import file_list
|
||||
import argparse
|
||||
import os
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--files")
|
||||
args = parser.parse_args()
|
||||
|
||||
files = args.files.split(",")
|
||||
|
||||
throw_error = False
|
||||
|
||||
method_split_pattern = re.compile('t9-\d+\s\(method-of-object')
|
||||
function_split_pattern = re.compile('\(t9-\d+\)')
|
||||
missing_res_tag_pattern = re.compile('(sv-\d{2,} int)')
|
||||
decompiler_error_pattern = re.compile(';; ERROR')
|
||||
missing_arg = re.compile('local-vars.*none\)')
|
||||
|
||||
for file in files:
|
||||
src_path = ""
|
||||
for f in file_list:
|
||||
if f[2] != 3:
|
||||
continue
|
||||
if f[0] == file:
|
||||
src_path = f[4]
|
||||
break
|
||||
|
||||
if not os.path.exists("./goal_src/{}".format(src_path)):
|
||||
print("{} couldn't find in /goal_src!".format(file))
|
||||
throw_error = True
|
||||
continue
|
||||
|
||||
file_path = "./goal_src/{}/{}.gc".format(src_path, file)
|
||||
with open(file_path) as f:
|
||||
for lineno, line in enumerate(f):
|
||||
method_split_match = method_split_pattern.search(line)
|
||||
if method_split_match:
|
||||
print("method_split - {}:{}".format(file_path, lineno + 1))
|
||||
throw_error = True
|
||||
continue
|
||||
function_split_match = function_split_pattern.search(line)
|
||||
if function_split_match:
|
||||
print("function_split - {}:{}".format(file_path, lineno + 1))
|
||||
throw_error = True
|
||||
continue
|
||||
missing_res_tag_match = missing_res_tag_pattern.search(line)
|
||||
if missing_res_tag_match:
|
||||
print("missing_res_tag - {}:{}".format(file_path, lineno + 1))
|
||||
throw_error = True
|
||||
continue
|
||||
decompiler_error_match = decompiler_error_pattern.search(line)
|
||||
if decompiler_error_match:
|
||||
print("decompiler_error - {}:{}".format(file_path, lineno + 1))
|
||||
throw_error = True
|
||||
continue
|
||||
missing_arg_match = missing_arg.search(line)
|
||||
if missing_arg_match:
|
||||
print("missing_arg - {}:{}".format(file_path, lineno + 1))
|
||||
throw_error = True
|
||||
continue
|
||||
|
||||
if throw_error:
|
||||
print("found potential problems!")
|
||||
exit(1)
|
||||
else:
|
||||
print("looks good!")
|
||||
@@ -0,0 +1,96 @@
|
||||
from utils import get_alltypes_path_from_game, get_gsrc_path_from_filename
|
||||
|
||||
|
||||
class AllTypesUpdateBlock:
|
||||
def __init__(self):
|
||||
self.data = []
|
||||
self.file_name = ""
|
||||
self.block_id = ""
|
||||
|
||||
def __str__(self):
|
||||
return "{}:{}:{}...".format(self.file_name, self.block_id, self.data[0:20])
|
||||
|
||||
|
||||
def update_alltypes_named_blocks(game_name):
|
||||
block_dict = {}
|
||||
# Step 1: Get the blocks
|
||||
get_all_blocks(game_name, block_dict)
|
||||
# Step 2: Update the blocks (group by file name to minimize file IO operations)
|
||||
update_all_blocks(game_name, block_dict)
|
||||
|
||||
|
||||
def get_all_blocks(game_name, block_dict):
|
||||
with open(get_alltypes_path_from_game(game_name)) as f:
|
||||
lines = f.readlines()
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
if line.startswith(";; +++") and ":" in line:
|
||||
info = line.replace(";; +++", "")
|
||||
file_name, block_id = info.split(":")
|
||||
new_block = AllTypesUpdateBlock()
|
||||
new_block.file_name = file_name
|
||||
new_block.block_id = block_id
|
||||
# Loop until we find the end of the block, collecting the lines as we go
|
||||
while i < len(lines):
|
||||
i = i + 1
|
||||
next_line = lines[i]
|
||||
if next_line.startswith(";; ---"):
|
||||
break
|
||||
new_block.data.append(next_line)
|
||||
# Add to the dictionary
|
||||
if file_name not in block_dict:
|
||||
block_dict[file_name] = [new_block]
|
||||
else:
|
||||
block_dict[file_name].append(new_block)
|
||||
else:
|
||||
i = i + 1
|
||||
|
||||
|
||||
def update_all_blocks(game_name, block_dict):
|
||||
for file_name, blocks in block_dict.items():
|
||||
# Get the file's lines
|
||||
path = get_gsrc_path_from_filename(game_name, file_name)
|
||||
lines = []
|
||||
final_lines = []
|
||||
with open(path) as f:
|
||||
lines = f.readlines()
|
||||
# Iterate through lines, (before ;; decomp begins) and update the blocks if we find them
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
line = lines[i]
|
||||
if line.lower().startswith(";; decomp begins"):
|
||||
final_lines.append(line)
|
||||
# Add all the rest of the lines until the end
|
||||
while i + 1 < len(lines):
|
||||
i = i + 1
|
||||
next_line = lines[i]
|
||||
final_lines.append(next_line)
|
||||
break
|
||||
if line.startswith(";; +++"):
|
||||
final_lines.append(line)
|
||||
block_id = line.split(";; +++")[1]
|
||||
# Look to see if we actually have that block
|
||||
found_block = False
|
||||
for block in blocks:
|
||||
if block.block_id == block_id:
|
||||
found_block = True
|
||||
# if we found the block, write the data, then proceed ahead until the end
|
||||
for block_line in block.data:
|
||||
final_lines.append(block_line)
|
||||
while i + 1 < len(lines):
|
||||
i = i + 1
|
||||
next_line = lines[i]
|
||||
if next_line.startswith(";; ---"):
|
||||
final_lines.append(next_line)
|
||||
i = i + 1
|
||||
break
|
||||
break
|
||||
if not found_block:
|
||||
i = i + 1
|
||||
else:
|
||||
final_lines.append(line)
|
||||
i = i + 1
|
||||
# Update the file contents
|
||||
with open(path, "w") as f:
|
||||
f.writelines(final_lines)
|
||||
@@ -0,0 +1,808 @@
|
||||
import re
|
||||
from rapidfuzz import fuzz
|
||||
|
||||
# TODO - rename and refactor all usages, it's not _always_ a comment anymore!
|
||||
# RetainedCode or something
|
||||
class CommentMeta:
|
||||
def __init__(self):
|
||||
self.data = ""
|
||||
self.symbol_before = None
|
||||
self.symbol_inline = None
|
||||
self.symbol_after = None
|
||||
self.symbol_padding_before = None
|
||||
self.symbol_padding_after = None
|
||||
# NOTE - maybe holding more than just 1 line before/after might help?
|
||||
self.code_before = None
|
||||
self.code_after = None
|
||||
self.code_padding_before = None
|
||||
self.code_padding_after = None
|
||||
self.line_num_in_form = None # None == top level
|
||||
self.containing_form = None # none - top level
|
||||
self.containing_form_kind = None # function|method|behaviour
|
||||
self.containing_form_func_name = None # or the method/behaviour
|
||||
self.containing_form_type = None
|
||||
self.inline = False
|
||||
self.code_in_line = None # only for inline comments
|
||||
self.line_in_file = None # a worst-case scenario fallback
|
||||
|
||||
def __str__(self):
|
||||
return "{}:{}:{}".format(self.data, self.symbol_before, self.symbol_after)
|
||||
|
||||
|
||||
def debug_nice_formatted_code(val):
|
||||
if val is None:
|
||||
return None
|
||||
return val.strip()[0:20]
|
||||
|
||||
|
||||
# returns (symbol | None, padding)
|
||||
def backtrack_for_symbol(lines, index):
|
||||
padding = 0
|
||||
for i in range(index - 1, 0, -1):
|
||||
tline = lines[i].strip()
|
||||
matches = re.search(
|
||||
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
|
||||
)
|
||||
if matches is not None:
|
||||
return matches.group(1), padding
|
||||
elif (
|
||||
not tline.strip() == "" and not tline.strip().startswith(";")
|
||||
) or "decomp begins" in tline.lower():
|
||||
# we hit a non empty line (but it wasn't a symbol!)
|
||||
return None, padding
|
||||
elif tline.strip() == "":
|
||||
padding = padding + 1
|
||||
return None, padding
|
||||
|
||||
|
||||
def symbol_on_line(line):
|
||||
tline = line.lstrip()
|
||||
matches = re.search(
|
||||
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
|
||||
)
|
||||
if matches is not None:
|
||||
return matches.group(1)
|
||||
return None
|
||||
|
||||
|
||||
def lookahead_for_symbol(lines, index):
|
||||
padding = 0
|
||||
for i in range(index + 1, len(lines), 1):
|
||||
tline = lines[i].lstrip()
|
||||
matches = re.search(
|
||||
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
|
||||
)
|
||||
if matches is not None:
|
||||
return matches.group(1), padding
|
||||
elif (
|
||||
not tline.strip() == "" and not tline.strip().startswith(";")
|
||||
) or "decomp begins" in tline.lower():
|
||||
# we hit a non empty line (but it wasn't a symbol!)
|
||||
return None, padding
|
||||
elif tline.strip() == "":
|
||||
padding = padding + 1
|
||||
return None, padding
|
||||
|
||||
|
||||
def backtrack_for_code(lines, index):
|
||||
padding = 0
|
||||
for i in range(index - 1, 0, -1):
|
||||
line = lines[i]
|
||||
if line.strip() == "":
|
||||
padding = padding + 1
|
||||
continue
|
||||
elif "decomp begins" in line.lower():
|
||||
return None, padding
|
||||
elif line.lstrip().startswith(";"):
|
||||
continue
|
||||
return line, padding
|
||||
|
||||
|
||||
def lookahead_for_code(lines, index):
|
||||
padding = 0
|
||||
for i in range(index + 1, len(lines), 1):
|
||||
line = lines[i]
|
||||
if line.strip() == "":
|
||||
padding = padding + 1
|
||||
continue
|
||||
elif "decomp begins" in line.lower():
|
||||
return None, padding
|
||||
elif line.lstrip().startswith(";"):
|
||||
continue
|
||||
return line, padding
|
||||
|
||||
|
||||
# returns form, or none
|
||||
def is_line_start_of_form(line):
|
||||
if line.lstrip().startswith(";") or "(when *debug-segment*" in line:
|
||||
return None
|
||||
matches = re.search(r"\(\s*([^\s.]*)\s+", line)
|
||||
if matches is not None:
|
||||
return line
|
||||
return None
|
||||
|
||||
|
||||
def has_form_ended(stack, line):
|
||||
# if the stack is empty, return true
|
||||
line_before_comment = line.partition(";")[0]
|
||||
for char in line_before_comment:
|
||||
if char == "(":
|
||||
stack.append(char)
|
||||
elif char == ")":
|
||||
if len(stack) == 0:
|
||||
# unbalanced parens?
|
||||
return True
|
||||
stack.pop()
|
||||
if len(stack) == 0:
|
||||
return True
|
||||
else:
|
||||
if len(stack) == 0:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def append_form_metadata(comment, form_start_line):
|
||||
func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_start_line)
|
||||
if func_matches is not None:
|
||||
comment.containing_form_kind = "function"
|
||||
comment.containing_form_func_name = func_matches.group(1)
|
||||
comment.containing_form_type = None
|
||||
return
|
||||
behavior_matches = re.search(
|
||||
r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_start_line
|
||||
)
|
||||
if behavior_matches is not None:
|
||||
comment.containing_form_kind = "behavior"
|
||||
comment.containing_form_func_name = behavior_matches.group(1)
|
||||
comment.containing_form_type = behavior_matches.group(2)
|
||||
return
|
||||
method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_start_line)
|
||||
if method_matches is not None:
|
||||
comment.containing_form_kind = "method"
|
||||
comment.containing_form_func_name = method_matches.group(1)
|
||||
comment.containing_form_type = method_matches.group(2)
|
||||
return
|
||||
comment.containing_form_kind = "unknown"
|
||||
comment.containing_form_func_name = None
|
||||
comment.containing_form_type = None
|
||||
|
||||
|
||||
def process_original_lines(lines):
|
||||
comments = []
|
||||
debug_lines = []
|
||||
# track if we are inside a define*/defun/defmethod/deftype/defstate
|
||||
within_form = None
|
||||
line_num_in_form = None
|
||||
form_paren_stack = []
|
||||
found_output = False
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
debug_lines.append(lines[i])
|
||||
tline = lines[i].lstrip()
|
||||
if "decomp begins" in tline.lower():
|
||||
found_output = True
|
||||
i = i + 1
|
||||
continue
|
||||
if not found_output:
|
||||
i = i + 1
|
||||
continue
|
||||
# actually process code
|
||||
if within_form is None:
|
||||
# lets see if we are now in one
|
||||
within_form = is_line_start_of_form(lines[i])
|
||||
if within_form is not None:
|
||||
line_num_in_form = 0
|
||||
if has_form_ended(form_paren_stack, lines[i]):
|
||||
within_form = None
|
||||
form_paren_stack = []
|
||||
elif within_form is not None:
|
||||
# check if the form has ended by counting parens
|
||||
if has_form_ended(form_paren_stack, lines[i]):
|
||||
within_form = None
|
||||
form_paren_stack = []
|
||||
line_num_in_form = 0
|
||||
else:
|
||||
line_num_in_form = line_num_in_form + 1
|
||||
|
||||
if tline.startswith(";") or tline.startswith("#|"):
|
||||
# treat decomp deviation blocks as essentially comments as well, so include them in a block comment if appropriate
|
||||
# this is done because there is nothing to match them against (if a comment is inside them for example)
|
||||
# so we have to copy them in full
|
||||
in_deviation_block = False
|
||||
if "decomp deviation" in tline.lower() or tline.startswith("#|"):
|
||||
in_deviation_block = True
|
||||
current_comment = CommentMeta()
|
||||
current_comment.line_in_file = i
|
||||
current_comment.data = lines[i]
|
||||
(
|
||||
current_comment.symbol_before,
|
||||
current_comment.symbol_padding_before,
|
||||
) = backtrack_for_symbol(lines, i)
|
||||
(
|
||||
current_comment.code_before,
|
||||
current_comment.code_padding_before,
|
||||
) = backtrack_for_code(lines, i)
|
||||
current_comment.containing_form = within_form
|
||||
if within_form is not None:
|
||||
append_form_metadata(current_comment, within_form)
|
||||
current_comment.line_num_in_form = line_num_in_form
|
||||
current_comment.inline = False
|
||||
# look ahead to handle block comments
|
||||
if i + 1 < len(lines):
|
||||
next_line = lines[i + 1]
|
||||
if "decomp deviation" in next_line.lower() or next_line.startswith("|#"):
|
||||
in_deviation_block = False
|
||||
while i + 1 < len(lines) and (
|
||||
in_deviation_block
|
||||
or next_line.lstrip().startswith(";")
|
||||
or next_line.lstrip().startswith("|#")
|
||||
):
|
||||
debug_lines.append(lines[i + 1])
|
||||
i = i + 1
|
||||
current_comment.data = current_comment.data + next_line
|
||||
if i + 1 < len(lines):
|
||||
next_line = lines[i + 1]
|
||||
if "decomp deviation" in next_line.lower() or next_line.startswith(
|
||||
"|#"
|
||||
):
|
||||
in_deviation_block = False
|
||||
(
|
||||
current_comment.symbol_after,
|
||||
current_comment.symbol_padding_after,
|
||||
) = lookahead_for_symbol(lines, i)
|
||||
(
|
||||
current_comment.code_after,
|
||||
current_comment.code_padding_after,
|
||||
) = lookahead_for_code(lines, i)
|
||||
comments.append(current_comment)
|
||||
debug_lines.append(
|
||||
";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
|
||||
current_comment.symbol_before,
|
||||
current_comment.symbol_padding_before,
|
||||
current_comment.symbol_after,
|
||||
current_comment.symbol_padding_after,
|
||||
debug_nice_formatted_code(current_comment.code_before),
|
||||
current_comment.code_padding_before,
|
||||
debug_nice_formatted_code(current_comment.code_after),
|
||||
current_comment.code_padding_after,
|
||||
)
|
||||
)
|
||||
debug_lines.append(
|
||||
";; [DEBUG]: in_form - {}...:{}\n".format(
|
||||
debug_nice_formatted_code(current_comment.containing_form),
|
||||
current_comment.line_num_in_form,
|
||||
)
|
||||
)
|
||||
# inline comments
|
||||
# TODO - cleanup duplication
|
||||
elif ";" in tline:
|
||||
current_comment = CommentMeta()
|
||||
current_comment.line_in_file = i
|
||||
current_comment.data = ";" + tline.partition(";")[2]
|
||||
(
|
||||
current_comment.symbol_before,
|
||||
current_comment.symbol_padding_before,
|
||||
) = backtrack_for_symbol(lines, i)
|
||||
(
|
||||
current_comment.symbol_after,
|
||||
current_comment.symbol_padding_after,
|
||||
) = lookahead_for_symbol(lines, i)
|
||||
(
|
||||
current_comment.code_before,
|
||||
current_comment.code_padding_before,
|
||||
) = backtrack_for_code(lines, i)
|
||||
(
|
||||
current_comment.code_after,
|
||||
current_comment.code_padding_after,
|
||||
) = lookahead_for_code(lines, i)
|
||||
current_comment.containing_form = within_form
|
||||
if within_form is not None:
|
||||
append_form_metadata(current_comment, within_form)
|
||||
current_comment.line_num_in_form = line_num_in_form
|
||||
current_comment.symbol_inline = symbol_on_line(tline)
|
||||
current_comment.inline = True
|
||||
current_comment.code_in_line = tline.partition(";")[0]
|
||||
comments.append(current_comment)
|
||||
debug_lines.append(
|
||||
";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
|
||||
current_comment.symbol_before,
|
||||
current_comment.symbol_padding_before,
|
||||
current_comment.symbol_after,
|
||||
current_comment.symbol_padding_after,
|
||||
debug_nice_formatted_code(current_comment.code_before),
|
||||
current_comment.code_padding_before,
|
||||
debug_nice_formatted_code(current_comment.code_after),
|
||||
current_comment.code_padding_after,
|
||||
)
|
||||
)
|
||||
debug_lines.append(
|
||||
";; [DEBUG]: in_form - {}...:{} || inline_code - {}...\n".format(
|
||||
debug_nice_formatted_code(current_comment.containing_form),
|
||||
current_comment.line_num_in_form,
|
||||
debug_nice_formatted_code(current_comment.code_in_line),
|
||||
)
|
||||
)
|
||||
i = i + 1
|
||||
return comments, debug_lines
|
||||
|
||||
|
||||
def get_symbol_at_line(line):
|
||||
tline = line.strip()
|
||||
matches = re.search(
|
||||
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
|
||||
)
|
||||
if matches is not None:
|
||||
return matches.group(1)
|
||||
return None
|
||||
|
||||
|
||||
def relevant_symbol_comments_for_line_before(line):
|
||||
symbol = get_symbol_at_line(line)
|
||||
if symbol is None:
|
||||
return []
|
||||
# Loop through comments, finding any that match the symbol
|
||||
# they WILL be placed, so we can remove them from our list now
|
||||
i = 0
|
||||
relevant_comments = []
|
||||
while i < len(comments):
|
||||
comment = comments[i]
|
||||
if comment.symbol_after == symbol:
|
||||
relevant_comments.append(comment)
|
||||
comments.pop(i)
|
||||
else:
|
||||
i = i + 1
|
||||
return relevant_comments
|
||||
|
||||
|
||||
def padding_before_comment(comment):
|
||||
if comment.containing_form is None:
|
||||
if comment.symbol_after is not None:
|
||||
return "\n" * comment.symbol_padding_after
|
||||
return ""
|
||||
|
||||
|
||||
def relevant_symbol_comments_for_inline(line):
|
||||
symbol = get_symbol_at_line(line)
|
||||
if symbol is None:
|
||||
return []
|
||||
# Loop through comments, finding any that match the symbol
|
||||
# they WILL be placed, so we can remove them from our list now
|
||||
i = 0
|
||||
relevant_comments = []
|
||||
while i < len(comments):
|
||||
comment = comments[i]
|
||||
if comment.symbol_inline == symbol:
|
||||
relevant_comments.append(comment)
|
||||
comments.pop(i)
|
||||
else:
|
||||
i = i + 1
|
||||
return relevant_comments
|
||||
|
||||
|
||||
def padding_after_comment(comment):
|
||||
if comment.containing_form is None:
|
||||
if comment.symbol_before is not None:
|
||||
return "\n" * comment.symbol_padding_before
|
||||
return ""
|
||||
|
||||
|
||||
def relevant_symbol_comments_for_line_after(line):
|
||||
symbol = get_symbol_at_line(line)
|
||||
if symbol is None:
|
||||
return []
|
||||
# Loop through comments, finding any that match the symbol
|
||||
# they WILL be placed, so we can remove them from our list now
|
||||
i = 0
|
||||
relevant_comments = []
|
||||
while i < len(comments):
|
||||
comment = comments[i]
|
||||
# if we can, we prefer to put comments before not after (more accurate re-creation)
|
||||
if comment.symbol_after is None and comment.symbol_before == symbol:
|
||||
relevant_comments.append(comment)
|
||||
comments.pop(i)
|
||||
else:
|
||||
i = i + 1
|
||||
return relevant_comments
|
||||
|
||||
|
||||
# the first half of the defmethod/etc lines (before arg list) is less likely to change
|
||||
# so we want to split it to weight it more heavily
|
||||
def split_def_line(line):
|
||||
first_part = ""
|
||||
second_part = ""
|
||||
for index, char in enumerate(line):
|
||||
if char == "(":
|
||||
if index == 0:
|
||||
first_part = first_part + char
|
||||
else:
|
||||
second_part = second_part + char
|
||||
continue
|
||||
else:
|
||||
if second_part != "":
|
||||
second_part = second_part + char
|
||||
else:
|
||||
first_part = first_part + char
|
||||
return first_part, second_part
|
||||
|
||||
|
||||
def get_form_metadata(form_def_line):
|
||||
func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_def_line)
|
||||
if func_matches is not None:
|
||||
return "function", func_matches.group(1), None
|
||||
behavior_matches = re.search(r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_def_line)
|
||||
if behavior_matches is not None:
|
||||
return "behavior", behavior_matches.group(1), behavior_matches.group(2)
|
||||
method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_def_line)
|
||||
if method_matches is not None:
|
||||
return "method", method_matches.group(1), method_matches.group(2)
|
||||
return "unknown", None, None
|
||||
|
||||
|
||||
built_in_method_names = [
|
||||
"new",
|
||||
"delete",
|
||||
"print",
|
||||
"inspect",
|
||||
"length",
|
||||
"asize-of",
|
||||
"copy",
|
||||
"relocate",
|
||||
"memusage",
|
||||
]
|
||||
|
||||
|
||||
def different_method_names(form_func_name, comment_form_func_name):
|
||||
if (
|
||||
comment_form_func_name not in built_in_method_names
|
||||
and form_func_name not in built_in_method_names
|
||||
):
|
||||
return False
|
||||
return form_func_name != comment_form_func_name
|
||||
|
||||
|
||||
def get_relevant_form_comments(form_def_line):
|
||||
form_kind, form_func_name, form_type = get_form_metadata(form_def_line)
|
||||
code_def_part, code_rest = split_def_line(form_def_line)
|
||||
relevant_comments = []
|
||||
i = 0
|
||||
while i < len(comments):
|
||||
comment = comments[i]
|
||||
if comment.containing_form is None:
|
||||
i = i + 1
|
||||
continue
|
||||
(
|
||||
comment_form_kind,
|
||||
comment_form_func_name,
|
||||
comment_form_type,
|
||||
) = get_form_metadata(comment.containing_form)
|
||||
# First disqualify the form if it's obviously unrelated
|
||||
if comment_form_kind != "unknown":
|
||||
if form_kind != comment_form_kind:
|
||||
i = i + 1
|
||||
continue
|
||||
elif form_kind == "function" and comment_form_func_name != form_func_name:
|
||||
i = i + 1
|
||||
continue
|
||||
elif form_kind == "behavior" and comment_form_func_name != form_func_name:
|
||||
i = i + 1
|
||||
continue
|
||||
elif form_kind == "method" and (
|
||||
comment_form_type != form_type
|
||||
or different_method_names(form_func_name, comment_form_func_name)
|
||||
):
|
||||
i = i + 1
|
||||
continue
|
||||
# Evaluate it's score (comments and current def line)
|
||||
def_part, rest = split_def_line(comment.containing_form)
|
||||
def_score = fuzz.ratio(code_def_part, def_part) * 0.65
|
||||
if def_score == 65.0 and form_kind != "unknown":
|
||||
relevant_comments.append(comment)
|
||||
comments.pop(i)
|
||||
continue
|
||||
rest_score = fuzz.ratio(code_rest, rest) * 0.35
|
||||
combined_score = def_score + rest_score
|
||||
threshold = 50.0
|
||||
if combined_score < threshold:
|
||||
i = i + 1
|
||||
continue
|
||||
# Now, let's look at ALL other def lines yet to come from the decomp output
|
||||
# if any are a better match, don't add the comment yet -- we'll add it when we get there!
|
||||
# TODO - remove lines from the list as we find them so speed this up
|
||||
found_better_form = False
|
||||
for decomp_def_line in decomp_form_def_lines:
|
||||
line_form_kind, line_form_func_name, line_form_type = get_form_metadata(
|
||||
decomp_def_line
|
||||
)
|
||||
if form_kind != "unknown":
|
||||
if form_kind != line_form_kind:
|
||||
continue
|
||||
elif form_kind == "function" and line_form_func_name != form_func_name:
|
||||
continue
|
||||
elif form_kind == "behavior" and line_form_func_name != form_func_name:
|
||||
continue
|
||||
elif form_kind == "method" and (
|
||||
line_form_type != form_type
|
||||
or different_method_names(form_func_name, line_form_func_name)
|
||||
):
|
||||
continue
|
||||
def_part, rest = split_def_line(decomp_def_line)
|
||||
def_score = fuzz.ratio(code_def_part, def_part) * 0.65
|
||||
if def_score == 65.0 and form_kind != "unknown":
|
||||
found_better_form = True
|
||||
break
|
||||
rest_score = fuzz.ratio(code_rest, rest) * 0.35
|
||||
if combined_score < def_score + rest_score:
|
||||
found_better_form = True
|
||||
break
|
||||
# TODO otherwise? still test?
|
||||
if found_better_form:
|
||||
i = i + 1
|
||||
continue
|
||||
relevant_comments.append(comment)
|
||||
comments.pop(i)
|
||||
return relevant_comments
|
||||
|
||||
|
||||
# Simple fuzz ratio, but removes obvious outliers like empty lines / lines with only a paren
|
||||
def score_alg(line1, line2):
|
||||
tline1 = line1.strip()
|
||||
tline2 = line2.strip()
|
||||
if tline1 == "" or tline1 == ")" or tline1 == "(":
|
||||
return -1
|
||||
if tline2 == "" or tline2 == ")" or tline2 == "(":
|
||||
return -1
|
||||
return fuzz.ratio(tline1, tline2)
|
||||
|
||||
|
||||
# TODO - improvement on comparison - a higher score on a longer line == better? some sort of weighting approach here too?
|
||||
def merge_retained_code_and_new_code(gsrc_path, decomp_lines, final_lines):
|
||||
decomp_started = False
|
||||
with open(gsrc_path) as f:
|
||||
lines = f.readlines()
|
||||
within_form = None
|
||||
line_num_in_form = None
|
||||
form_paren_stack = []
|
||||
for line in lines:
|
||||
if "[DEBUG]" in line:
|
||||
continue
|
||||
if line.lower().lstrip().startswith(";; decomp begins"):
|
||||
decomp_started = True
|
||||
final_lines.append(line)
|
||||
break
|
||||
if not decomp_started:
|
||||
final_lines.append(line)
|
||||
continue
|
||||
|
||||
i = 0
|
||||
while i < len(decomp_lines):
|
||||
line = decomp_lines[i]
|
||||
# Otherwise, its a part of the output we have to be more careful about
|
||||
# For every line in the decompiled output, we scan our comment list to see if anything matches
|
||||
# if it does, we insert it appropriately and remove the comment from the list
|
||||
#
|
||||
# This is the main source of inefficiency, but the process gets progressively faster as comments are eliminated
|
||||
if within_form is None:
|
||||
# lets see if we are now in one
|
||||
within_form = is_line_start_of_form(line)
|
||||
# TODO - check line for symbol matches?
|
||||
if within_form is not None:
|
||||
line_num_in_form = 0
|
||||
if has_form_ended(form_paren_stack, line):
|
||||
within_form = None
|
||||
form_paren_stack = []
|
||||
else:
|
||||
# Get all of the lines of the form at once
|
||||
form_start = decomp_lines[i]
|
||||
form_lines = [form_start]
|
||||
while i < len(decomp_lines):
|
||||
i = i + 1
|
||||
line = decomp_lines[i]
|
||||
if has_form_ended(form_paren_stack, line):
|
||||
within_form = None
|
||||
form_paren_stack = []
|
||||
break
|
||||
else:
|
||||
form_lines.append(line)
|
||||
# Add any comments needed to the form contents
|
||||
# - first we get all comments that have match well with the form's start line (ie. defmethod ....)
|
||||
form_comments = get_relevant_form_comments(form_start)
|
||||
# - for each comment, let's find which line matches it the best,
|
||||
# if NONE exceed the threshold (if both match the same, pick the first), we default to the line offset
|
||||
for comment in form_comments:
|
||||
highest_score = -1
|
||||
index_to_insert = -1
|
||||
threshold = 50.0
|
||||
place_kind = None
|
||||
for index, form_line in enumerate(form_lines):
|
||||
# skip any comments that were previously added
|
||||
if form_line.lstrip().startswith(";"):
|
||||
continue
|
||||
if comment.code_in_line is not None:
|
||||
score = score_alg(form_line, comment.code_in_line)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = index
|
||||
highest_score = score
|
||||
place_kind = "inline"
|
||||
if comment.code_before is not None:
|
||||
score = score_alg(form_line, comment.code_before)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = index
|
||||
highest_score = score
|
||||
place_comment_after = True
|
||||
place_kind = "next_line"
|
||||
if comment.code_after is not None:
|
||||
score = score_alg(form_line, comment.code_after)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = index
|
||||
highest_score = score
|
||||
place_comment_after = False
|
||||
place_kind = "before_line"
|
||||
# add the comment!
|
||||
if index_to_insert == -1:
|
||||
if comment.inline:
|
||||
form_lines[comment.line_num_in_form] = (
|
||||
form_lines[index_to_insert].rstrip()
|
||||
+ " "
|
||||
+ comment.data
|
||||
)
|
||||
else:
|
||||
form_lines.insert(
|
||||
comment.line_num_in_form, comment.data
|
||||
)
|
||||
elif comment.inline:
|
||||
form_index = index_to_insert
|
||||
if place_kind == "next_line":
|
||||
form_index = index_to_insert + 1
|
||||
form_lines[form_index] = (
|
||||
form_lines[form_index].rstrip() + " " + comment.data
|
||||
)
|
||||
elif place_kind == "next_line":
|
||||
form_lines.insert(
|
||||
index_to_insert + 1,
|
||||
padding_before_comment(comment) + comment.data,
|
||||
)
|
||||
else:
|
||||
form_lines.insert(
|
||||
index_to_insert,
|
||||
padding_after_comment(comment) + comment.data,
|
||||
)
|
||||
# Add the lines to the final output
|
||||
for form_line in form_lines:
|
||||
final_lines.append(form_line)
|
||||
|
||||
# Otherwise, we are at the top-level!
|
||||
if within_form is None:
|
||||
before_comments = relevant_symbol_comments_for_line_before(line)
|
||||
for comment in before_comments:
|
||||
final_lines.append(padding_before_comment(comment) + comment.data)
|
||||
inline_comments = relevant_symbol_comments_for_inline(line)
|
||||
if len(inline_comments) > 0:
|
||||
comment_str = ""
|
||||
for comment in inline_comments:
|
||||
comment_str = comment_str + comment.data.strip()
|
||||
comment_str = comment_str.replace(";", "")
|
||||
final_lines.append(
|
||||
"{} ;; {}".format(line.rstrip(), comment_str.strip())
|
||||
)
|
||||
else:
|
||||
final_lines.append(line)
|
||||
after_comments = relevant_symbol_comments_for_line_after(line)
|
||||
for comment in after_comments:
|
||||
final_lines.append(padding_after_comment(comment) + comment.data)
|
||||
# next line
|
||||
i = i + 1
|
||||
|
||||
|
||||
def handle_dangling_blocks(comments, final_lines, debug_lines):
|
||||
for comment in comments:
|
||||
within_form = None
|
||||
line_num_in_form = None
|
||||
form_paren_stack = []
|
||||
found_output = True
|
||||
i = 0
|
||||
index_to_insert = -1
|
||||
highest_score = -1
|
||||
place_comment_after = True
|
||||
threshold = 50.0
|
||||
while i < len(final_lines):
|
||||
debug_lines.append(final_lines[i])
|
||||
tline = final_lines[i].lstrip()
|
||||
if "decomp begins" in tline.lower():
|
||||
found_output = True
|
||||
i = i + 1
|
||||
continue
|
||||
if not found_output:
|
||||
i = i + 1
|
||||
continue
|
||||
line = final_lines[i]
|
||||
# We can try to claw back a bit of efficiency by skipping the inside of forms
|
||||
if within_form is not None:
|
||||
# check if the form has ended by counting parens
|
||||
if has_form_ended(form_paren_stack, line):
|
||||
if comment.code_in_line is not None:
|
||||
score = score_alg(line, comment.code_in_line)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
if comment.code_before is not None:
|
||||
score = score_alg(line, comment.code_before)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = True
|
||||
if comment.code_after is not None:
|
||||
score = score_alg(line, comment.code_after)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = False
|
||||
within_form = None
|
||||
form_paren_stack = []
|
||||
line_num_in_form = 0
|
||||
else:
|
||||
line_num_in_form = line_num_in_form + 1
|
||||
else:
|
||||
# lets see if we are now in a form
|
||||
within_form = is_line_start_of_form(line)
|
||||
if within_form is not None:
|
||||
if comment.code_in_line is not None:
|
||||
score = score_alg(line, comment.code_in_line)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
if comment.code_before is not None:
|
||||
score = score_alg(line, comment.code_before)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = True
|
||||
if comment.code_after is not None:
|
||||
score = score_alg(line, comment.code_after)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = False
|
||||
line_num_in_form = 0
|
||||
else:
|
||||
# just normal code, check it
|
||||
if comment.code_in_line is not None:
|
||||
score = score_alg(line, comment.code_in_line)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
if comment.code_before is not None:
|
||||
score = score_alg(line, comment.code_before)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = True
|
||||
if comment.code_after is not None:
|
||||
score = score_alg(line, comment.code_after)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = False
|
||||
i = i + 1
|
||||
# end of while loop
|
||||
# add the comment!
|
||||
if index_to_insert == -1:
|
||||
if comment.inline:
|
||||
final_lines[comment.line_in_file] = (
|
||||
final_lines[comment.line_in_file].rstrip() + " " + comment.data
|
||||
)
|
||||
else:
|
||||
final_lines.insert(comment.line_in_file, comment.data)
|
||||
elif comment.inline:
|
||||
final_lines[index_to_insert] = (
|
||||
final_lines[index_to_insert].rstrip() + " " + comment.data
|
||||
)
|
||||
elif place_comment_after:
|
||||
final_lines.insert(
|
||||
index_to_insert + 1,
|
||||
padding_before_comment(comment) + comment.data,
|
||||
)
|
||||
else:
|
||||
final_lines.insert(
|
||||
index_to_insert,
|
||||
padding_after_comment(comment) + comment.data,
|
||||
)
|
||||
@@ -0,0 +1,33 @@
|
||||
from utils import get_gsrc_path_from_filename, get_ref_path_from_filename, get_file_list
|
||||
import os
|
||||
|
||||
# TODO - hard-coded to jak 2
|
||||
|
||||
# Get all the gsrc files, if they aren't empty -- log if they aren't added to the reference tests as well
|
||||
file_list = get_file_list("jak2")
|
||||
|
||||
# TODO - function for getting just the names
|
||||
missing_files = []
|
||||
for file in file_list:
|
||||
file_name = ""
|
||||
if file[2] != 3:
|
||||
continue
|
||||
else:
|
||||
file_name = file[0]
|
||||
|
||||
# check gsrc
|
||||
gsrc_path = get_gsrc_path_from_filename("jak2", file_name)
|
||||
if gsrc_path:
|
||||
gsrc_length = 0
|
||||
with open(gsrc_path, 'r') as fp:
|
||||
gsrc_length = len(fp.readlines())
|
||||
|
||||
if gsrc_length > 15:
|
||||
if file_name == "enemy-h":
|
||||
print(file_name)
|
||||
# check if ref exists
|
||||
ref_path = get_ref_path_from_filename("jak2", file_name, "./test/decompiler/reference/")
|
||||
if not os.path.exists(ref_path):
|
||||
missing_files.append(file_name)
|
||||
|
||||
print(missing_files)
|
||||
@@ -0,0 +1,159 @@
|
||||
import re
|
||||
import argparse
|
||||
from utils import get_gsrc_path_from_filename
|
||||
from colorama import just_fix_windows_console, Fore, Back, Style
|
||||
|
||||
just_fix_windows_console()
|
||||
|
||||
parser = argparse.ArgumentParser("lint-gsrc-file")
|
||||
parser.add_argument("--game", help="The name of the game", type=str)
|
||||
parser.add_argument("--file", help="The name of the file", type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
class LintMatch:
|
||||
def __init__(self, src_path, offending_lineno, context):
|
||||
self.src_path = src_path
|
||||
self.offending_lineno = offending_lineno
|
||||
self.context = context
|
||||
|
||||
def __str__(self):
|
||||
output = (
|
||||
Style.BRIGHT
|
||||
+ Fore.MAGENTA
|
||||
+ "@ {}:{}\n".format(self.src_path, self.offending_lineno)
|
||||
+ Fore.RESET
|
||||
+ Style.RESET_ALL
|
||||
)
|
||||
for line in self.context:
|
||||
# skip lines that are just brackets
|
||||
if line.strip() == ")" or line.strip() == "(":
|
||||
continue
|
||||
output += "\t{}\n".format(line)
|
||||
return output
|
||||
|
||||
|
||||
class LinterRule:
|
||||
def __init__(self, level, rule_name, regex_pattern, context_size):
|
||||
self.level = level
|
||||
self.rule_name = rule_name
|
||||
self.regex_pattern = regex_pattern
|
||||
self.context_size = context_size
|
||||
self.matches = []
|
||||
|
||||
def __str__(self):
|
||||
level_color = Fore.LIGHTBLUE_EX
|
||||
if self.level == "WARN":
|
||||
level_color = Fore.YELLOW
|
||||
elif self.level == "ERROR":
|
||||
level_color = Fore.RED
|
||||
return (
|
||||
level_color
|
||||
+ "[{}]{} - {} - {}/{}/g".format(
|
||||
self.level,
|
||||
Fore.RESET,
|
||||
level_color + self.rule_name + Fore.RESET,
|
||||
Fore.CYAN,
|
||||
self.regex_pattern.pattern,
|
||||
)
|
||||
+ Fore.RESET
|
||||
+ ":"
|
||||
)
|
||||
|
||||
|
||||
# Construct all rules
|
||||
linter_rules = []
|
||||
# Infos
|
||||
# Warnings
|
||||
linter_rules.append(
|
||||
LinterRule("WARN", "method_splits", re.compile("method-of-(?:type|object)"), 3)
|
||||
)
|
||||
linter_rules.append(
|
||||
LinterRule("WARN", "func_splits", re.compile("\(t9-\d+(?:\s+[^\s]+\s*)?\)"), 3)
|
||||
)
|
||||
linter_rules.append(
|
||||
LinterRule("WARN", "missing_arg", re.compile("local-vars.*[at].*\s+none\)"), 1)
|
||||
)
|
||||
# Errors
|
||||
linter_rules.append(LinterRule("ERROR", "missing_res_tag", re.compile(".pcpyud"), 1))
|
||||
linter_rules.append(LinterRule("ERROR", "decomp_error", re.compile(";; ERROR"), 1))
|
||||
linter_rules.append(
|
||||
LinterRule(
|
||||
"ERROR", "casting_stack_var", re.compile("the-as\s+[^\s]*\s+.*\(new 'stack"), 2
|
||||
)
|
||||
)
|
||||
|
||||
src_path = get_gsrc_path_from_filename(args.game, args.file)
|
||||
|
||||
# Iterate through the file line by line, check against each rule
|
||||
# if the rule is violated (it matches) then we append the match with useful details
|
||||
|
||||
print("Linting GOAL_SRC File...")
|
||||
|
||||
|
||||
def get_context(lines, match_span, idx, amount_inclusive):
|
||||
lines_grabbed = []
|
||||
# Strip left pad, while maintaining indent
|
||||
last_line_indent_width = -1
|
||||
last_line_indent = -1
|
||||
while len(lines_grabbed) < amount_inclusive and len(lines) > idx + len(
|
||||
lines_grabbed
|
||||
):
|
||||
# TODO - first line, colorize the match
|
||||
# if len(lines_grabbed) == 0:
|
||||
# line = lines[idx + len(lines_grabbed)]
|
||||
# line = line[:match_span[0]] + Back.RED + line[:match_span[1]] + Back.RESET + line[match_span[1]:]
|
||||
# line = line.rstrip()
|
||||
line = lines[idx + len(lines_grabbed)].rstrip()
|
||||
indent_width = len(line) - len(line.lstrip())
|
||||
if last_line_indent_width == -1:
|
||||
lines_grabbed.append(line.lstrip())
|
||||
elif last_line_indent == -1:
|
||||
# calculate the difference
|
||||
indent_diff = indent_width - last_line_indent_width
|
||||
last_line_indent = indent_diff
|
||||
stripped_line = line.lstrip()
|
||||
lines_grabbed.append(stripped_line.rjust(indent_diff + len(stripped_line)))
|
||||
else:
|
||||
stripped_line = line.lstrip()
|
||||
lines_grabbed.append(
|
||||
stripped_line.rjust(last_line_indent + len(stripped_line))
|
||||
)
|
||||
last_line_indent_width = indent_width
|
||||
return lines_grabbed
|
||||
|
||||
|
||||
with open(src_path) as f:
|
||||
src_lines = f.readlines()
|
||||
for lineno, line in enumerate(src_lines):
|
||||
adjusted_lineno = lineno + 1
|
||||
for rule in linter_rules:
|
||||
match = rule.regex_pattern.search(line)
|
||||
if match:
|
||||
rule.matches.append(
|
||||
LintMatch(
|
||||
src_path,
|
||||
adjusted_lineno,
|
||||
get_context(src_lines, match.span(), lineno, rule.context_size),
|
||||
)
|
||||
)
|
||||
|
||||
# Iterate through all our linter rules, printing nicely in groups with the
|
||||
# context surrounding the match
|
||||
#
|
||||
# If we find any violations at warning or above, we will ultimately return exit(1)
|
||||
throw_error = False
|
||||
for rule in linter_rules:
|
||||
# Iterate through violations
|
||||
if len(rule.matches) > 0:
|
||||
print(rule)
|
||||
for match in rule.matches:
|
||||
if rule.level == "ERROR" or rule.level == "WARN":
|
||||
throw_error = True
|
||||
print(match)
|
||||
|
||||
if throw_error:
|
||||
print(Fore.RED + "Found potential problems, exiting with code 1!" + Fore.RESET)
|
||||
exit(1)
|
||||
else:
|
||||
print(Fore.GREEN + "Looks good!" + Fore.RESET)
|
||||
@@ -1 +1,3 @@
|
||||
rapidfuzz
|
||||
GitPython
|
||||
colorama
|
||||
|
||||
@@ -0,0 +1,38 @@
|
||||
# Creates the `*.gd` files that go in ./goal_src/<game>/dgos
|
||||
# Takes input from the `dgo.txt` file that is generated by the decompiler
|
||||
# Run with all inputs enabled to get all the info!
|
||||
|
||||
# example - python .\scripts\gsrc\skeleton_creation\generate_dgo_files.py --game jak2 --dgotxt .\decompiler_out\jak2\dgo.txt
|
||||
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser("generate_dgo_files")
|
||||
parser.add_argument("--game", help="The name of the game", type=str)
|
||||
parser.add_argument("--dgotxt", help="Path to the dgo.txt file", type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
# Read in the dgo.txt file
|
||||
with open(args.dgotxt, "r") as f:
|
||||
lines = f.readlines()[2:] # skip the first two lines, assumed to be a comment header and an empty line
|
||||
# OpenGOAL still doesn't have a data serialization/deserialization format
|
||||
# so read line by line, assuming each DGO is seperated by an empty line
|
||||
current_dgo_name = None
|
||||
current_dgo_lines = []
|
||||
for line in lines:
|
||||
if line.strip() == "":
|
||||
# Write out contents to the .gd file
|
||||
if current_dgo_name is not None:
|
||||
path = "./goal_src/{}/dgos/{}".format(args.game, current_dgo_name)
|
||||
print("writing to {}".format(path))
|
||||
with open(path, "w") as wf:
|
||||
wf.writelines(current_dgo_lines)
|
||||
current_dgo_name = None
|
||||
current_dgo_lines = []
|
||||
continue
|
||||
if ".CGO" in line or ".DGO" in line:
|
||||
print("found one! - {}".format(line.strip()))
|
||||
# figure out the name
|
||||
current_dgo_name = line.replace("(", "").replace("\"", "").strip().lower().replace(".dgo", ".gd").replace(".cgo", ".gd")
|
||||
print(current_dgo_name)
|
||||
if current_dgo_name is not None:
|
||||
current_dgo_lines.append(line)
|
||||
@@ -0,0 +1,36 @@
|
||||
# Generates the `(cgo-file...` lines for the game.gp file
|
||||
# Attempts to put DGOs in the correct order based on the file order in `all_objs`
|
||||
|
||||
import json
|
||||
|
||||
common_deps = '("$OUT/obj/cty-guard-turret-button.o")'
|
||||
|
||||
ignored_dgos = ["ENGINE", "KERNEL", "ART", "COMMON", "GAME", "NO-XGO"]
|
||||
|
||||
dgos_encountered = set()
|
||||
dgos_handled = set()
|
||||
|
||||
jak2_files = None
|
||||
with open("./goal_src/jak2/build/all_objs.json", "r") as f:
|
||||
jak2_files = json.load(f)
|
||||
|
||||
# Enumerate the files, order is dictated by code files (version 3)
|
||||
# At the end we will fill in any dgos that weren't considerd "required"
|
||||
lines = []
|
||||
for file in jak2_files:
|
||||
file_name = file[0]
|
||||
version = file[2]
|
||||
dgo_list = file[3]
|
||||
for dgo in dgo_list:
|
||||
dgos_encountered.add(dgo)
|
||||
if version == 3:
|
||||
dgo = dgo_list[0]
|
||||
if dgo.lower() not in dgos_handled and dgo not in ignored_dgos:
|
||||
dgos_handled.add(dgo.lower())
|
||||
lines.append('(cgo-file "{}.gd" {})'.format(dgo.lower(), common_deps))
|
||||
for dgo in dgos_encountered:
|
||||
if dgo.lower() not in dgos_handled and dgo not in ignored_dgos:
|
||||
lines.append('(cgo-file "{}.gd" {})'.format(dgo.lower(), common_deps))
|
||||
|
||||
for line in lines:
|
||||
print(line)
|
||||
@@ -36,41 +36,22 @@
|
||||
# - there are likely ways to make this more efficient
|
||||
|
||||
import argparse
|
||||
import re
|
||||
from rapidfuzz import fuzz
|
||||
import os
|
||||
from code_retention.all_types_retention import update_alltypes_named_blocks
|
||||
from utils import get_gsrc_path_from_filename
|
||||
|
||||
# TODO - rename and refactor all usages, it's not _always_ a comment anymore!
|
||||
# RetainedCode or something
|
||||
class CommentMeta:
|
||||
def __init__(self):
|
||||
self.data = ""
|
||||
self.symbol_before = None
|
||||
self.symbol_inline = None
|
||||
self.symbol_after = None
|
||||
self.symbol_padding_before = None
|
||||
self.symbol_padding_after = None
|
||||
# NOTE - maybe holding more than just 1 line before/after might help?
|
||||
self.code_before = None
|
||||
self.code_after = None
|
||||
self.code_padding_before = None
|
||||
self.code_padding_after = None
|
||||
self.line_num_in_form = None # None == top level
|
||||
self.containing_form = None # none - top level
|
||||
self.containing_form_kind = None # function|method|behaviour
|
||||
self.containing_form_func_name = None # or the method/behaviour
|
||||
self.containing_form_type = None
|
||||
self.inline = False
|
||||
self.code_in_line = None # only for inline comments
|
||||
self.line_in_file = None # a worst-case scenario fallback
|
||||
|
||||
def __str__(self):
|
||||
return "{}:{}:{}".format(self.data, self.symbol_before, self.symbol_after)
|
||||
|
||||
from code_retention.code_retention import *
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
import subprocess
|
||||
|
||||
parser = argparse.ArgumentParser("update-from-decomp")
|
||||
parser.add_argument("--game", help="The name of the game", type=str)
|
||||
parser.add_argument("--file", help="The name of the file", type=str)
|
||||
parser.add_argument(
|
||||
"--preserve",
|
||||
help="Attempt to preserve comments and marked blocks",
|
||||
action="store_true",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--debug", help="Output debug metadata on every block", action="store_true"
|
||||
)
|
||||
@@ -81,150 +62,14 @@ args = parser.parse_args()
|
||||
|
||||
gsrc_path = get_gsrc_path_from_filename(args.game, args.file)
|
||||
|
||||
# Step 1 - Find and update all named blocks from all-types (useful for enums)
|
||||
update_alltypes_named_blocks(args.game)
|
||||
|
||||
comments = []
|
||||
debug_lines = []
|
||||
|
||||
|
||||
def debug_nice_formatted_code(val):
|
||||
if val is None:
|
||||
return None
|
||||
return val.strip()[0:20]
|
||||
|
||||
|
||||
# returns (symbol | None, padding)
|
||||
def backtrack_for_symbol(lines, index):
|
||||
padding = 0
|
||||
for i in range(index - 1, 0, -1):
|
||||
tline = lines[i].strip()
|
||||
matches = re.search(
|
||||
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
|
||||
)
|
||||
if matches is not None:
|
||||
return matches.group(1), padding
|
||||
elif (
|
||||
not tline.strip() == "" and not tline.strip().startswith(";")
|
||||
) or "decomp begins" in tline.lower():
|
||||
# we hit a non empty line (but it wasn't a symbol!)
|
||||
return None, padding
|
||||
elif tline.strip() == "":
|
||||
padding = padding + 1
|
||||
return None, padding
|
||||
|
||||
|
||||
def symbol_on_line(line):
|
||||
tline = line.lstrip()
|
||||
matches = re.search(
|
||||
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
|
||||
)
|
||||
if matches is not None:
|
||||
return matches.group(1)
|
||||
return None
|
||||
|
||||
|
||||
def lookahead_for_symbol(lines, index):
|
||||
padding = 0
|
||||
for i in range(index + 1, len(lines), 1):
|
||||
tline = lines[i].lstrip()
|
||||
matches = re.search(
|
||||
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
|
||||
)
|
||||
if matches is not None:
|
||||
return matches.group(1), padding
|
||||
elif (
|
||||
not tline.strip() == "" and not tline.strip().startswith(";")
|
||||
) or "decomp begins" in tline.lower():
|
||||
# we hit a non empty line (but it wasn't a symbol!)
|
||||
return None, padding
|
||||
elif tline.strip() == "":
|
||||
padding = padding + 1
|
||||
return None, padding
|
||||
|
||||
|
||||
def backtrack_for_code(lines, index):
|
||||
padding = 0
|
||||
for i in range(index - 1, 0, -1):
|
||||
line = lines[i]
|
||||
if line.strip() == "":
|
||||
padding = padding + 1
|
||||
continue
|
||||
elif "decomp begins" in line.lower():
|
||||
return None, padding
|
||||
elif line.lstrip().startswith(";"):
|
||||
continue
|
||||
return line, padding
|
||||
|
||||
|
||||
def lookahead_for_code(lines, index):
|
||||
padding = 0
|
||||
for i in range(index + 1, len(lines), 1):
|
||||
line = lines[i]
|
||||
if line.strip() == "":
|
||||
padding = padding + 1
|
||||
continue
|
||||
elif "decomp begins" in line.lower():
|
||||
return None, padding
|
||||
elif line.lstrip().startswith(";"):
|
||||
continue
|
||||
return line, padding
|
||||
|
||||
|
||||
# returns form, or none
|
||||
def is_line_start_of_form(line):
|
||||
if line.rstrip().startswith(";"):
|
||||
return None
|
||||
matches = re.search(r"\(\s*([^\s.]*)\s+", line)
|
||||
if matches is not None:
|
||||
return line
|
||||
return None
|
||||
|
||||
|
||||
def has_form_ended(stack, line):
|
||||
# if the stack is empty, return true
|
||||
line_before_comment = line.partition(";")[0]
|
||||
for char in line_before_comment:
|
||||
if char == "(":
|
||||
stack.append(char)
|
||||
elif char == ")":
|
||||
if len(stack) == 0:
|
||||
# unbalanced parens?
|
||||
return True
|
||||
stack.pop()
|
||||
if len(stack) == 0:
|
||||
return True
|
||||
else:
|
||||
if len(stack) == 0:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def append_form_metadata(comment, form_start_line):
|
||||
func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_start_line)
|
||||
if func_matches is not None:
|
||||
comment.containing_form_kind = "function"
|
||||
comment.containing_form_func_name = func_matches.group(1)
|
||||
comment.containing_form_type = None
|
||||
return
|
||||
behavior_matches = re.search(
|
||||
r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_start_line
|
||||
)
|
||||
if behavior_matches is not None:
|
||||
comment.containing_form_kind = "behavior"
|
||||
comment.containing_form_func_name = behavior_matches.group(1)
|
||||
comment.containing_form_type = behavior_matches.group(2)
|
||||
return
|
||||
method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_start_line)
|
||||
if method_matches is not None:
|
||||
comment.containing_form_kind = "method"
|
||||
comment.containing_form_func_name = method_matches.group(1)
|
||||
comment.containing_form_type = method_matches.group(2)
|
||||
return
|
||||
comment.containing_form_kind = "unknown"
|
||||
comment.containing_form_func_name = None
|
||||
comment.containing_form_type = None
|
||||
|
||||
|
||||
decomp_ignore_forms = []
|
||||
decomp_ignore_forms = ["defmethod inspect"]
|
||||
decomp_ignore_errors = False
|
||||
update_with_merge = False
|
||||
|
||||
with open(gsrc_path) as f:
|
||||
lines_temp = f.readlines()
|
||||
@@ -238,161 +83,27 @@ with open(gsrc_path) as f:
|
||||
decomp_ignore_errors = True
|
||||
if "og:ignore-form" in line:
|
||||
decomp_ignore_forms.append(line.partition("ignore-form:")[2].strip())
|
||||
if "og:update-with-merge" in line:
|
||||
update_with_merge = True
|
||||
lines.append(line)
|
||||
# track if we are inside a define*/defun/defmethod/deftype/defstate
|
||||
within_form = None
|
||||
line_num_in_form = None
|
||||
form_paren_stack = []
|
||||
found_output = False
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
debug_lines.append(lines[i])
|
||||
tline = lines[i].lstrip()
|
||||
if "decomp begins" in tline.lower():
|
||||
found_output = True
|
||||
i = i + 1
|
||||
continue
|
||||
if not found_output:
|
||||
i = i + 1
|
||||
continue
|
||||
# actually process code
|
||||
if within_form is None:
|
||||
# lets see if we are now in one
|
||||
within_form = is_line_start_of_form(lines[i])
|
||||
if within_form is not None:
|
||||
line_num_in_form = 0
|
||||
if has_form_ended(form_paren_stack, lines[i]):
|
||||
within_form = None
|
||||
form_paren_stack = []
|
||||
elif within_form is not None:
|
||||
# check if the form has ended by counting parens
|
||||
if has_form_ended(form_paren_stack, lines[i]):
|
||||
within_form = None
|
||||
form_paren_stack = []
|
||||
line_num_in_form = 0
|
||||
else:
|
||||
line_num_in_form = line_num_in_form + 1
|
||||
|
||||
if tline.startswith(";") or tline.startswith("#|"):
|
||||
# treat decomp deviation blocks as essentially comments as well, so include them in a block comment if appropriate
|
||||
# this is done because there is nothing to match them against (if a comment is inside them for example)
|
||||
# so we have to copy them in full
|
||||
in_deviation_block = False
|
||||
if "decomp deviation" in tline.lower() or tline.startswith("#|"):
|
||||
in_deviation_block = True
|
||||
current_comment = CommentMeta()
|
||||
current_comment.line_in_file = i
|
||||
current_comment.data = lines[i]
|
||||
(
|
||||
current_comment.symbol_before,
|
||||
current_comment.symbol_padding_before,
|
||||
) = backtrack_for_symbol(lines, i)
|
||||
(
|
||||
current_comment.code_before,
|
||||
current_comment.code_padding_before,
|
||||
) = backtrack_for_code(lines, i)
|
||||
current_comment.containing_form = within_form
|
||||
if within_form is not None:
|
||||
append_form_metadata(current_comment, within_form)
|
||||
current_comment.line_num_in_form = line_num_in_form
|
||||
current_comment.inline = False
|
||||
# look ahead to handle block comments
|
||||
if i + 1 < len(lines):
|
||||
next_line = lines[i + 1]
|
||||
if "decomp deviation" in next_line.lower() or next_line.startswith("|#"):
|
||||
in_deviation_block = False
|
||||
while i + 1 < len(lines) and (
|
||||
in_deviation_block
|
||||
or next_line.lstrip().startswith(";")
|
||||
or next_line.lstrip().startswith("|#")
|
||||
):
|
||||
debug_lines.append(lines[i + 1])
|
||||
i = i + 1
|
||||
current_comment.data = current_comment.data + next_line
|
||||
if i + 1 < len(lines):
|
||||
next_line = lines[i + 1]
|
||||
if "decomp deviation" in next_line.lower() or next_line.startswith(
|
||||
"|#"
|
||||
):
|
||||
in_deviation_block = False
|
||||
(
|
||||
current_comment.symbol_after,
|
||||
current_comment.symbol_padding_after,
|
||||
) = lookahead_for_symbol(lines, i)
|
||||
(
|
||||
current_comment.code_after,
|
||||
current_comment.code_padding_after,
|
||||
) = lookahead_for_code(lines, i)
|
||||
comments.append(current_comment)
|
||||
debug_lines.append(
|
||||
";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
|
||||
current_comment.symbol_before,
|
||||
current_comment.symbol_padding_before,
|
||||
current_comment.symbol_after,
|
||||
current_comment.symbol_padding_after,
|
||||
debug_nice_formatted_code(current_comment.code_before),
|
||||
current_comment.code_padding_before,
|
||||
debug_nice_formatted_code(current_comment.code_after),
|
||||
current_comment.code_padding_after,
|
||||
)
|
||||
)
|
||||
debug_lines.append(
|
||||
";; [DEBUG]: in_form - {}...:{}\n".format(
|
||||
debug_nice_formatted_code(current_comment.containing_form),
|
||||
current_comment.line_num_in_form,
|
||||
)
|
||||
)
|
||||
# inline comments
|
||||
# TODO - cleanup duplication
|
||||
elif ";" in tline:
|
||||
current_comment = CommentMeta()
|
||||
current_comment.line_in_file = i
|
||||
current_comment.data = ";" + tline.partition(";")[2]
|
||||
(
|
||||
current_comment.symbol_before,
|
||||
current_comment.symbol_padding_before,
|
||||
) = backtrack_for_symbol(lines, i)
|
||||
(
|
||||
current_comment.symbol_after,
|
||||
current_comment.symbol_padding_after,
|
||||
) = lookahead_for_symbol(lines, i)
|
||||
(
|
||||
current_comment.code_before,
|
||||
current_comment.code_padding_before,
|
||||
) = backtrack_for_code(lines, i)
|
||||
(
|
||||
current_comment.code_after,
|
||||
current_comment.code_padding_after,
|
||||
) = lookahead_for_code(lines, i)
|
||||
current_comment.containing_form = within_form
|
||||
if within_form is not None:
|
||||
append_form_metadata(current_comment, within_form)
|
||||
current_comment.line_num_in_form = line_num_in_form
|
||||
current_comment.symbol_inline = symbol_on_line(tline)
|
||||
current_comment.inline = True
|
||||
current_comment.code_in_line = tline.partition(";")[0]
|
||||
comments.append(current_comment)
|
||||
debug_lines.append(
|
||||
";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
|
||||
current_comment.symbol_before,
|
||||
current_comment.symbol_padding_before,
|
||||
current_comment.symbol_after,
|
||||
current_comment.symbol_padding_after,
|
||||
debug_nice_formatted_code(current_comment.code_before),
|
||||
current_comment.code_padding_before,
|
||||
debug_nice_formatted_code(current_comment.code_after),
|
||||
current_comment.code_padding_after,
|
||||
)
|
||||
)
|
||||
debug_lines.append(
|
||||
";; [DEBUG]: in_form - {}...:{} || inline_code - {}...\n".format(
|
||||
debug_nice_formatted_code(current_comment.containing_form),
|
||||
current_comment.line_num_in_form,
|
||||
debug_nice_formatted_code(current_comment.code_in_line),
|
||||
)
|
||||
)
|
||||
i = i + 1
|
||||
if args.preserve:
|
||||
comments, debug_lines = process_original_lines(lines)
|
||||
|
||||
# If we are going to `update_with_merge` then make a backup of the file, and
|
||||
# an empty file to use as the common ancestor.
|
||||
#
|
||||
# This means that all changes will be flagged as a conflict and will not be able to be
|
||||
# merged into the repo without being explicitly resolved
|
||||
if update_with_merge:
|
||||
subprocess.run(
|
||||
[
|
||||
"git",
|
||||
"restore",
|
||||
gsrc_path
|
||||
]
|
||||
)
|
||||
shutil.copyfile(gsrc_path, gsrc_path.replace(".gc", ".before.gc"))
|
||||
Path(gsrc_path.replace(".gc", ".empty.gc")).touch()
|
||||
|
||||
if args.debug:
|
||||
with open(gsrc_path, "w") as f:
|
||||
@@ -409,6 +120,8 @@ lines_to_ignore = [
|
||||
";; failed to figure",
|
||||
";; Used lq/sq",
|
||||
";; this part is debug only",
|
||||
";; WARN: Return type mismatch int vs none",
|
||||
";; WARN: Stack slot offset",
|
||||
]
|
||||
|
||||
if decomp_ignore_errors:
|
||||
@@ -428,7 +141,8 @@ def should_ignore_line(line):
|
||||
return False
|
||||
|
||||
|
||||
# TODO - check for existance probably
|
||||
# TODO - ignore brackets inside strings!
|
||||
|
||||
decomp_file_path = "./decompiler_out/{}/{}_disasm.gc".format(args.game, args.file)
|
||||
with open(decomp_file_path) as f:
|
||||
lines = f.readlines()
|
||||
@@ -459,10 +173,10 @@ with open(decomp_file_path) as f:
|
||||
if not skip_form:
|
||||
decomp_form_def_lines.append(decomp_within_form)
|
||||
decomp_lines.append(line)
|
||||
while i < len(lines):
|
||||
while i + 1 < len(lines):
|
||||
i = i + 1
|
||||
line = lines[i]
|
||||
if not skip_form:
|
||||
if not skip_form and not should_ignore_line(line):
|
||||
decomp_lines.append(line)
|
||||
if has_form_ended(decomp_form_paren_stack, line):
|
||||
decomp_within_form = None
|
||||
@@ -476,492 +190,64 @@ with open(decomp_file_path) as f:
|
||||
|
||||
# Step 3: Start merging the new code + comments
|
||||
final_lines = []
|
||||
decomp_started = False
|
||||
|
||||
|
||||
def get_symbol_at_line(line):
|
||||
tline = line.strip()
|
||||
matches = re.search(
|
||||
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
|
||||
)
|
||||
if matches is not None:
|
||||
return matches.group(1)
|
||||
return None
|
||||
|
||||
|
||||
def relevant_symbol_comments_for_line_before(line):
|
||||
symbol = get_symbol_at_line(line)
|
||||
if symbol is None:
|
||||
return []
|
||||
# Loop through comments, finding any that match the symbol
|
||||
# they WILL be placed, so we can remove them from our list now
|
||||
i = 0
|
||||
relevant_comments = []
|
||||
while i < len(comments):
|
||||
comment = comments[i]
|
||||
if comment.symbol_after == symbol:
|
||||
relevant_comments.append(comment)
|
||||
comments.pop(i)
|
||||
else:
|
||||
i = i + 1
|
||||
return relevant_comments
|
||||
|
||||
|
||||
def padding_before_comment(comment):
|
||||
if comment.containing_form is None:
|
||||
if comment.symbol_after is not None:
|
||||
return "\n" * comment.symbol_padding_after
|
||||
return ""
|
||||
|
||||
|
||||
def relevant_symbol_comments_for_inline(line):
|
||||
symbol = get_symbol_at_line(line)
|
||||
if symbol is None:
|
||||
return []
|
||||
# Loop through comments, finding any that match the symbol
|
||||
# they WILL be placed, so we can remove them from our list now
|
||||
i = 0
|
||||
relevant_comments = []
|
||||
while i < len(comments):
|
||||
comment = comments[i]
|
||||
if comment.symbol_inline == symbol:
|
||||
relevant_comments.append(comment)
|
||||
comments.pop(i)
|
||||
else:
|
||||
i = i + 1
|
||||
return relevant_comments
|
||||
|
||||
|
||||
def padding_after_comment(comment):
|
||||
if comment.containing_form is None:
|
||||
if comment.symbol_before is not None:
|
||||
return "\n" * comment.symbol_padding_before
|
||||
return ""
|
||||
|
||||
|
||||
def relevant_symbol_comments_for_line_after(line):
|
||||
symbol = get_symbol_at_line(line)
|
||||
if symbol is None:
|
||||
return []
|
||||
# Loop through comments, finding any that match the symbol
|
||||
# they WILL be placed, so we can remove them from our list now
|
||||
i = 0
|
||||
relevant_comments = []
|
||||
while i < len(comments):
|
||||
comment = comments[i]
|
||||
# if we can, we prefer to put comments before not after (more accurate re-creation)
|
||||
if comment.symbol_after is None and comment.symbol_before == symbol:
|
||||
relevant_comments.append(comment)
|
||||
comments.pop(i)
|
||||
else:
|
||||
i = i + 1
|
||||
return relevant_comments
|
||||
|
||||
|
||||
# the first half of the defmethod/etc lines (before arg list) is less likely to change
|
||||
# so we want to split it to weight it more heavily
|
||||
def split_def_line(line):
|
||||
first_part = ""
|
||||
second_part = ""
|
||||
for index, char in enumerate(line):
|
||||
if char == "(":
|
||||
if index == 0:
|
||||
first_part = first_part + char
|
||||
else:
|
||||
second_part = second_part + char
|
||||
continue
|
||||
else:
|
||||
if second_part != "":
|
||||
second_part = second_part + char
|
||||
else:
|
||||
first_part = first_part + char
|
||||
return first_part, second_part
|
||||
|
||||
|
||||
def get_form_metadata(form_def_line):
|
||||
func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_def_line)
|
||||
if func_matches is not None:
|
||||
return "function", func_matches.group(1), None
|
||||
behavior_matches = re.search(r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_def_line)
|
||||
if behavior_matches is not None:
|
||||
return "behavior", behavior_matches.group(1), behavior_matches.group(2)
|
||||
method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_def_line)
|
||||
if method_matches is not None:
|
||||
return "method", method_matches.group(1), method_matches.group(2)
|
||||
return "unknown", None, None
|
||||
|
||||
|
||||
built_in_method_names = [
|
||||
"new",
|
||||
"delete",
|
||||
"print",
|
||||
"inspect",
|
||||
"length",
|
||||
"asize-of",
|
||||
"copy",
|
||||
"relocate",
|
||||
"memusage",
|
||||
]
|
||||
|
||||
|
||||
def different_method_names(form_func_name, comment_form_func_name):
|
||||
if (
|
||||
comment_form_func_name not in built_in_method_names
|
||||
and form_func_name not in built_in_method_names
|
||||
):
|
||||
return False
|
||||
return form_func_name != comment_form_func_name
|
||||
|
||||
|
||||
def get_relevant_form_comments(form_def_line):
|
||||
form_kind, form_func_name, form_type = get_form_metadata(form_def_line)
|
||||
code_def_part, code_rest = split_def_line(form_def_line)
|
||||
relevant_comments = []
|
||||
i = 0
|
||||
while i < len(comments):
|
||||
comment = comments[i]
|
||||
if comment.containing_form is None:
|
||||
i = i + 1
|
||||
continue
|
||||
(
|
||||
comment_form_kind,
|
||||
comment_form_func_name,
|
||||
comment_form_type,
|
||||
) = get_form_metadata(comment.containing_form)
|
||||
# First disqualify the form if it's obviously unrelated
|
||||
if comment_form_kind != "unknown":
|
||||
if form_kind != comment_form_kind:
|
||||
i = i + 1
|
||||
continue
|
||||
elif form_kind == "function" and comment_form_func_name != form_func_name:
|
||||
i = i + 1
|
||||
continue
|
||||
elif form_kind == "behavior" and comment_form_func_name != form_func_name:
|
||||
i = i + 1
|
||||
continue
|
||||
elif form_kind == "method" and (
|
||||
comment_form_type != form_type
|
||||
or different_method_names(form_func_name, comment_form_func_name)
|
||||
):
|
||||
i = i + 1
|
||||
continue
|
||||
# Evaluate it's score (comments and current def line)
|
||||
def_part, rest = split_def_line(comment.containing_form)
|
||||
def_score = fuzz.ratio(code_def_part, def_part) * 0.65
|
||||
if def_score == 65.0 and form_kind != "unknown":
|
||||
relevant_comments.append(comment)
|
||||
comments.pop(i)
|
||||
continue
|
||||
rest_score = fuzz.ratio(code_rest, rest) * 0.35
|
||||
combined_score = def_score + rest_score
|
||||
threshold = 50.0
|
||||
if combined_score < threshold:
|
||||
i = i + 1
|
||||
continue
|
||||
# Now, let's look at ALL other def lines yet to come from the decomp output
|
||||
# if any are a better match, don't add the comment yet -- we'll add it when we get there!
|
||||
# TODO - remove lines from the list as we find them so speed this up
|
||||
found_better_form = False
|
||||
for decomp_def_line in decomp_form_def_lines:
|
||||
line_form_kind, line_form_func_name, line_form_type = get_form_metadata(
|
||||
decomp_def_line
|
||||
)
|
||||
if form_kind != "unknown":
|
||||
if form_kind != line_form_kind:
|
||||
continue
|
||||
elif form_kind == "function" and line_form_func_name != form_func_name:
|
||||
continue
|
||||
elif form_kind == "behavior" and line_form_func_name != form_func_name:
|
||||
continue
|
||||
elif form_kind == "method" and (
|
||||
line_form_type != form_type
|
||||
or different_method_names(form_func_name, line_form_func_name)
|
||||
):
|
||||
continue
|
||||
def_part, rest = split_def_line(decomp_def_line)
|
||||
def_score = fuzz.ratio(code_def_part, def_part) * 0.65
|
||||
if def_score == 65.0 and form_kind != "unknown":
|
||||
found_better_form = True
|
||||
break
|
||||
rest_score = fuzz.ratio(code_rest, rest) * 0.35
|
||||
if combined_score < def_score + rest_score:
|
||||
found_better_form = True
|
||||
break
|
||||
# TODO otherwise? still test?
|
||||
if found_better_form:
|
||||
i = i + 1
|
||||
continue
|
||||
relevant_comments.append(comment)
|
||||
comments.pop(i)
|
||||
return relevant_comments
|
||||
|
||||
|
||||
# Simple fuzz ratio, but removes obvious outliers like empty lines / lines with only a paren
|
||||
def score_alg(line1, line2):
|
||||
tline1 = line1.strip()
|
||||
tline2 = line2.strip()
|
||||
if tline1 == "" or tline1 == ")" or tline1 == "(":
|
||||
return -1
|
||||
if tline2 == "" or tline2 == ")" or tline2 == "(":
|
||||
return -1
|
||||
return fuzz.ratio(tline1, tline2)
|
||||
|
||||
|
||||
# TODO - improvement on comparison - a higher score on a longer line == better? some sort of weighting approach here too?
|
||||
|
||||
with open(gsrc_path) as f:
|
||||
lines = f.readlines()
|
||||
within_form = None
|
||||
line_num_in_form = None
|
||||
form_paren_stack = []
|
||||
for line in lines:
|
||||
if "[DEBUG]" in line:
|
||||
continue
|
||||
if line.lower().lstrip().startswith(";; decomp begins"):
|
||||
decomp_started = True
|
||||
if args.preserve:
|
||||
merge_retained_code_and_new_code(gsrc_path, decomp_lines, final_lines)
|
||||
else:
|
||||
with open(gsrc_path) as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
final_lines.append(line)
|
||||
break
|
||||
if not decomp_started:
|
||||
if line.lower().startswith(";; decomp begins"):
|
||||
break
|
||||
for line in decomp_lines:
|
||||
final_lines.append(line)
|
||||
continue
|
||||
|
||||
i = 0
|
||||
while i < len(decomp_lines):
|
||||
line = decomp_lines[i]
|
||||
# Otherwise, its a part of the output we have to be more careful about
|
||||
# For every line in the decompiled output, we scan our comment list to see if anything matches
|
||||
# if it does, we insert it appropriately and remove the comment from the list
|
||||
#
|
||||
# This is the main source of inefficiency, but the process gets progressively faster as comments are eliminated
|
||||
if within_form is None:
|
||||
# lets see if we are now in one
|
||||
within_form = is_line_start_of_form(line)
|
||||
# TODO - check line for symbol matches?
|
||||
if within_form is not None:
|
||||
line_num_in_form = 0
|
||||
if has_form_ended(form_paren_stack, line):
|
||||
within_form = None
|
||||
form_paren_stack = []
|
||||
else:
|
||||
# Get all of the lines of the form at once
|
||||
form_start = decomp_lines[i]
|
||||
form_lines = [form_start]
|
||||
while i < len(decomp_lines):
|
||||
i = i + 1
|
||||
line = decomp_lines[i]
|
||||
if has_form_ended(form_paren_stack, line):
|
||||
within_form = None
|
||||
form_paren_stack = []
|
||||
break
|
||||
else:
|
||||
form_lines.append(line)
|
||||
# Add any comments needed to the form contents
|
||||
# - first we get all comments that have match well with the form's start line (ie. defmethod ....)
|
||||
form_comments = get_relevant_form_comments(form_start)
|
||||
# - for each comment, let's find which line matches it the best,
|
||||
# if NONE exceed the threshold (if both match the same, pick the first), we default to the line offset
|
||||
for comment in form_comments:
|
||||
highest_score = -1
|
||||
index_to_insert = -1
|
||||
threshold = 50.0
|
||||
place_kind = None
|
||||
for index, form_line in enumerate(form_lines):
|
||||
# skip any comments that were previously added
|
||||
if form_line.lstrip().startswith(";"):
|
||||
continue
|
||||
if comment.code_in_line is not None:
|
||||
score = score_alg(form_line, comment.code_in_line)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = index
|
||||
highest_score = score
|
||||
place_kind = "inline"
|
||||
if comment.code_before is not None:
|
||||
score = score_alg(form_line, comment.code_before)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = index
|
||||
highest_score = score
|
||||
place_comment_after = True
|
||||
place_kind = "next_line"
|
||||
if comment.code_after is not None:
|
||||
score = score_alg(form_line, comment.code_after)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = index
|
||||
highest_score = score
|
||||
place_comment_after = False
|
||||
place_kind = "before_line"
|
||||
# add the comment!
|
||||
if index_to_insert == -1:
|
||||
if comment.inline:
|
||||
form_lines[comment.line_num_in_form] = (
|
||||
form_lines[index_to_insert].rstrip()
|
||||
+ " "
|
||||
+ comment.data
|
||||
)
|
||||
else:
|
||||
form_lines.insert(
|
||||
comment.line_num_in_form, comment.data
|
||||
)
|
||||
elif comment.inline:
|
||||
form_index = index_to_insert
|
||||
if place_kind == "next_line":
|
||||
form_index = index_to_insert + 1
|
||||
form_lines[form_index] = (
|
||||
form_lines[form_index].rstrip() + " " + comment.data
|
||||
)
|
||||
elif place_kind == "next_line":
|
||||
form_lines.insert(
|
||||
index_to_insert + 1,
|
||||
padding_before_comment(comment) + comment.data,
|
||||
)
|
||||
else:
|
||||
form_lines.insert(
|
||||
index_to_insert,
|
||||
padding_after_comment(comment) + comment.data,
|
||||
)
|
||||
# Add the lines to the final output
|
||||
for form_line in form_lines:
|
||||
final_lines.append(form_line)
|
||||
|
||||
# Otherwise, we are at the top-level!
|
||||
if within_form is None:
|
||||
before_comments = relevant_symbol_comments_for_line_before(line)
|
||||
for comment in before_comments:
|
||||
final_lines.append(padding_before_comment(comment) + comment.data)
|
||||
inline_comments = relevant_symbol_comments_for_inline(line)
|
||||
if len(inline_comments) > 0:
|
||||
comment_str = ""
|
||||
for comment in inline_comments:
|
||||
comment_str = comment_str + comment.data.strip()
|
||||
comment_str = comment_str.replace(";", "")
|
||||
final_lines.append(
|
||||
"{} ;; {}".format(line.rstrip(), comment_str.strip())
|
||||
)
|
||||
else:
|
||||
final_lines.append(line)
|
||||
after_comments = relevant_symbol_comments_for_line_after(line)
|
||||
for comment in after_comments:
|
||||
final_lines.append(padding_after_comment(comment) + comment.data)
|
||||
# next line
|
||||
i = i + 1
|
||||
|
||||
# Step 3.b: Handle any remaining top level comments
|
||||
# If we can't find a code line that meets a threshold, default to their line number
|
||||
# - Why is this done after: if a comment is associated with nothing but code, we have no
|
||||
# guarantee where it should go, so we have to wait until all code is populated
|
||||
# This is SUPER inefficient, so hopefully we've processed nearly all comments by this point
|
||||
for comment in comments:
|
||||
within_form = None
|
||||
line_num_in_form = None
|
||||
form_paren_stack = []
|
||||
found_output = True
|
||||
i = 0
|
||||
index_to_insert = -1
|
||||
highest_score = -1
|
||||
place_comment_after = True
|
||||
threshold = 50.0
|
||||
while i < len(final_lines):
|
||||
debug_lines.append(final_lines[i])
|
||||
tline = final_lines[i].lstrip()
|
||||
if "decomp begins" in tline.lower():
|
||||
found_output = True
|
||||
i = i + 1
|
||||
continue
|
||||
if not found_output:
|
||||
i = i + 1
|
||||
continue
|
||||
line = final_lines[i]
|
||||
# We can try to claw back a bit of efficiency by skipping the inside of forms
|
||||
if within_form is not None:
|
||||
# check if the form has ended by counting parens
|
||||
if has_form_ended(form_paren_stack, line):
|
||||
if comment.code_in_line is not None:
|
||||
score = score_alg(line, comment.code_in_line)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
if comment.code_before is not None:
|
||||
score = score_alg(line, comment.code_before)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = True
|
||||
if comment.code_after is not None:
|
||||
score = score_alg(line, comment.code_after)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = False
|
||||
within_form = None
|
||||
form_paren_stack = []
|
||||
line_num_in_form = 0
|
||||
else:
|
||||
line_num_in_form = line_num_in_form + 1
|
||||
else:
|
||||
# lets see if we are now in a form
|
||||
within_form = is_line_start_of_form(line)
|
||||
if within_form is not None:
|
||||
if comment.code_in_line is not None:
|
||||
score = score_alg(line, comment.code_in_line)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
if comment.code_before is not None:
|
||||
score = score_alg(line, comment.code_before)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = True
|
||||
if comment.code_after is not None:
|
||||
score = score_alg(line, comment.code_after)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = False
|
||||
line_num_in_form = 0
|
||||
else:
|
||||
# just normal code, check it
|
||||
if comment.code_in_line is not None:
|
||||
score = score_alg(line, comment.code_in_line)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
if comment.code_before is not None:
|
||||
score = score_alg(line, comment.code_before)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = True
|
||||
if comment.code_after is not None:
|
||||
score = score_alg(line, comment.code_after)
|
||||
if score >= threshold and score > highest_score:
|
||||
index_to_insert = i
|
||||
highest_score = score
|
||||
place_comment_after = False
|
||||
i = i + 1
|
||||
# end of while loop
|
||||
# add the comment!
|
||||
if index_to_insert == -1:
|
||||
if comment.inline:
|
||||
final_lines[comment.line_in_file] = (
|
||||
final_lines[comment.line_in_file].rstrip() + " " + comment.data
|
||||
)
|
||||
else:
|
||||
final_lines.insert(comment.line_in_file, comment.data)
|
||||
elif comment.inline:
|
||||
final_lines[index_to_insert] = (
|
||||
final_lines[index_to_insert].rstrip() + " " + comment.data
|
||||
)
|
||||
elif place_comment_after:
|
||||
final_lines.insert(
|
||||
index_to_insert + 1,
|
||||
padding_before_comment(comment) + comment.data,
|
||||
)
|
||||
else:
|
||||
final_lines.insert(
|
||||
index_to_insert,
|
||||
padding_after_comment(comment) + comment.data,
|
||||
)
|
||||
if args.preserve:
|
||||
handle_dangling_blocks(comments, final_lines, debug_lines)
|
||||
|
||||
# Step 4: Write it out
|
||||
# Step 4.a: Remove excessive new-lines from the end of the output, only leave a single empty new-line
|
||||
lines_to_ignore = 0
|
||||
i = len(final_lines) - 1
|
||||
while i > 0 and (final_lines[i] == "\n" or final_lines[i] == "0\n"):
|
||||
lines_to_ignore = lines_to_ignore + 1
|
||||
i = i - 1
|
||||
|
||||
print("ignoring - {}".format(lines_to_ignore))
|
||||
|
||||
# Step 4.b: Write it out
|
||||
with open(gsrc_path, "w") as f:
|
||||
f.writelines(final_lines)
|
||||
i = 0
|
||||
while i + lines_to_ignore < len(final_lines):
|
||||
f.write(final_lines[i])
|
||||
i = i + 1
|
||||
|
||||
# If we need to merge, now is the time!
|
||||
if update_with_merge:
|
||||
shutil.move(gsrc_path, gsrc_path.replace(".gc", ".after.gc"))
|
||||
shutil.move(gsrc_path.replace(".gc", ".before.gc"), gsrc_path)
|
||||
subprocess.run(
|
||||
[
|
||||
"git",
|
||||
"merge-file",
|
||||
gsrc_path,
|
||||
gsrc_path.replace(".gc", ".empty.gc"),
|
||||
gsrc_path.replace(".gc", ".after.gc"),
|
||||
"-L",
|
||||
"Before Updating",
|
||||
"-L",
|
||||
"ignored",
|
||||
"-L",
|
||||
"After Updating",
|
||||
]
|
||||
)
|
||||
if os.path.exists(gsrc_path.replace(".gc", ".empty.gc")):
|
||||
os.remove(gsrc_path.replace(".gc", ".empty.gc"))
|
||||
if os.path.exists(gsrc_path.replace(".gc", ".before.gc")):
|
||||
os.remove(gsrc_path.replace(".gc", ".before.gc"))
|
||||
if os.path.exists(gsrc_path.replace(".gc", ".after.gc")):
|
||||
os.remove(gsrc_path.replace(".gc", ".after.gc"))
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
# Updates files in gsrc if they are modified in the reference test folder
|
||||
# Uses git
|
||||
import subprocess
|
||||
from git import Repo
|
||||
|
||||
repo = Repo("./")
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import glob
|
||||
|
||||
parser = argparse.ArgumentParser("update-gsrc-via-refs")
|
||||
parser.add_argument("--game", help="The name of the game", type=str)
|
||||
parser.add_argument("--decompiler", help="The path to the decompiler", type=str)
|
||||
parser.add_argument("--decompiler_config", help="The decomp config", type=str)
|
||||
parser.add_argument("--file_pattern", help="Provide a glob pattern to find files, instead of using git status. Relative to the reference test folder", type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
def get_files_via_git():
|
||||
file_names = set()
|
||||
for item in repo.index.diff(None):
|
||||
path = item.b_rawpath.decode("utf-8")
|
||||
if args.game in path and "_REF" in path:
|
||||
file_names.add(os.path.basename(path).replace("_REF.gc", ""))
|
||||
|
||||
for item in repo.untracked_files:
|
||||
path = item
|
||||
if args.game in path and "_REF" in path:
|
||||
file_names.add(os.path.basename(path).replace("_REF.gc", ""))
|
||||
return file_names
|
||||
|
||||
def get_files_via_glob():
|
||||
file_names = set()
|
||||
for file in glob.glob("./test/decompiler/reference/{}/{}".format(args.game, args.file_pattern), recursive=True):
|
||||
file_names.add(os.path.basename(file).replace("_REF.gc", ""))
|
||||
return file_names
|
||||
|
||||
# Get a list of changed files, as well as new files
|
||||
file_names = []
|
||||
if args.file_pattern:
|
||||
file_names = get_files_via_glob()
|
||||
else:
|
||||
file_names = get_files_via_git()
|
||||
|
||||
for file_name in file_names:
|
||||
print("Decompiling - {}".format(file_name))
|
||||
# Decompile file
|
||||
subprocess.run(
|
||||
[
|
||||
args.decompiler,
|
||||
"./decompiler/config/{}".format(args.decompiler_config),
|
||||
"./iso_data",
|
||||
"./decompiler_out",
|
||||
"--config-override",
|
||||
'{{"allowed_objects": ["{}"]}}'.format(file_name),
|
||||
]
|
||||
)
|
||||
print("Updating - {}".format(file_name))
|
||||
# Update gsrc
|
||||
os.system(
|
||||
"python ./scripts/gsrc/update-from-decomp.py --game {} --file {}".format(
|
||||
args.game, file_name
|
||||
)
|
||||
)
|
||||
@@ -29,3 +29,24 @@ def get_gsrc_path_from_filename(game_name, file_name):
|
||||
print("{} couldn't find in /goal_src/{}!".format(file_name, game_name))
|
||||
exit(1)
|
||||
return path
|
||||
|
||||
def get_alltypes_path_from_game(game_name):
|
||||
if game_name == "jak1":
|
||||
return "./decompiler/config/all-types.gc"
|
||||
else:
|
||||
return "./decompiler/config/jak2/all-types.gc"
|
||||
|
||||
def get_ref_path_from_filename(game_name, file_name, ref_folder):
|
||||
file_list = get_file_list(game_name)
|
||||
src_path = ""
|
||||
for f in file_list:
|
||||
if f[2] != 3:
|
||||
continue
|
||||
if f[0] == file_name:
|
||||
src_path = f[4]
|
||||
break
|
||||
if src_path == "":
|
||||
print("couldn't determine ref path for {}:{}!".format(game_name, file_name))
|
||||
exit(1)
|
||||
path = os.path.join(ref_folder, game_name, src_path, "{}_REF.gc".format(file_name))
|
||||
return path
|
||||
|
||||
@@ -3,4 +3,4 @@
|
||||
# Directory of this script
|
||||
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
"${DIR}"/../../build/decompiler/decompiler "${DIR}"/../../decompiler/config/jak1_ntsc_black_label.jsonc "${DIR}"/../../iso_data "${DIR}"/../../decompiler_out
|
||||
"${DIR}"/../../build/decompiler/decompiler "${DIR}"/../../decompiler/config/jak1_ntsc_black_label.jsonc "${DIR}"/../../iso_data "${DIR}"/../../decompiler_out
|
||||
|
||||
@@ -5,6 +5,7 @@ vars:
|
||||
GK_BIN_RELEASE_DIR: './build/game'
|
||||
DECOMP_BIN_RELEASE_DIR: './build/decompiler'
|
||||
MEMDUMP_BIN_RELEASE_DIR: './build/tools'
|
||||
TYPESEARCH_BIN_RELEASE_DIR: './build/tools'
|
||||
OFFLINETEST_BIN_RELEASE_DIR: './build'
|
||||
GOALCTEST_BIN_RELEASE_DIR: './build'
|
||||
EXE_FILE_EXTENSION: ''
|
||||
|
||||
@@ -5,6 +5,7 @@ vars:
|
||||
GK_BIN_RELEASE_DIR: './build/game'
|
||||
DECOMP_BIN_RELEASE_DIR: './build/decompiler'
|
||||
MEMDUMP_BIN_RELEASE_DIR: './build/tools'
|
||||
TYPESEARCH_BIN_RELEASE_DIR: './build/tools'
|
||||
OFFLINETEST_BIN_RELEASE_DIR: './build'
|
||||
GOALCTEST_BIN_RELEASE_DIR: './build'
|
||||
EXE_FILE_EXTENSION: ''
|
||||
|
||||
@@ -5,6 +5,7 @@ vars:
|
||||
GK_BIN_RELEASE_DIR: './out/build/Release/bin'
|
||||
DECOMP_BIN_RELEASE_DIR: './out/build/Release/bin'
|
||||
MEMDUMP_BIN_RELEASE_DIR: './out/build/Release/bin'
|
||||
TYPESEARCH_BIN_RELEASE_DIR: './out/build/Release/bin'
|
||||
OFFLINETEST_BIN_RELEASE_DIR: './out/build/Release/bin'
|
||||
GOALCTEST_BIN_RELEASE_DIR: './out/build/Release/bin'
|
||||
EXE_FILE_EXTENSION: '.exe'
|
||||
|
||||
@@ -0,0 +1,14 @@
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--game")
|
||||
args = parser.parse_args()
|
||||
import os
|
||||
import glob
|
||||
|
||||
def delete_extension(ext):
|
||||
fileList = glob.glob('./decompiler_out/{}/*.{}'.format(args.game, ext))
|
||||
for filePath in fileList:
|
||||
os.remove(filePath)
|
||||
|
||||
delete_extension("gc")
|
||||
delete_extension("asm")
|
||||
@@ -0,0 +1,10 @@
|
||||
import argparse
|
||||
import zipfile
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--file")
|
||||
parser.add_argument("--out")
|
||||
args = parser.parse_args()
|
||||
|
||||
with zipfile.ZipFile(args.file, 'r') as p2s:
|
||||
p2s.extractall(args.out)
|
||||
@@ -3,32 +3,43 @@ import glob
|
||||
import argparse
|
||||
import shutil
|
||||
|
||||
from gsrc.utils import get_ref_path_from_filename
|
||||
|
||||
## Script to update failing _REF.gc files
|
||||
## Instructions:
|
||||
## run offline-test with the `--dump-mode` flag set. This generates a "failures" folder.
|
||||
## run offline-test with the `--dump_current_output` flag set. This generates a "failures" folder.
|
||||
## update reference like this
|
||||
## python3 ../scripts/update_decomp_reference.py ./failures ../test/decompiler/reference
|
||||
## python3 ../scripts/update_decomp_reference.py ./failures ../test/decompiler/reference --game [jak1|jak2]
|
||||
|
||||
def get_goal_files(root_dir):
|
||||
return [f for file in os.walk(root_dir) for f in glob.glob(os.path.join(file[0], '*.gc'))]
|
||||
def get_failures(root_dir):
|
||||
return [
|
||||
f
|
||||
for file in os.walk(root_dir)
|
||||
for f in glob.glob(os.path.join(file[0], "*.gc"))
|
||||
]
|
||||
|
||||
# removesuffix only added in python 3.9....
|
||||
def removesuffix(self: str, suffix: str, /) -> str:
|
||||
if self.endswith(suffix):
|
||||
return self[:-len(suffix)]
|
||||
else:
|
||||
return self[:]
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(dest='diff', help='the failures folder')
|
||||
parser.add_argument(dest='reference', help='the test/decompiler/reference folder')
|
||||
parser.add_argument(dest="diff", help="the failures folder")
|
||||
parser.add_argument(dest="reference", help="the test/decompiler/reference folder")
|
||||
parser.add_argument("--game", help="The name of the game (jak1/jak2)", type=str)
|
||||
args = parser.parse_args()
|
||||
|
||||
location_map = {os.path.basename(x) : x for x in get_goal_files(args.reference)}
|
||||
|
||||
for replacement in get_goal_files(args.diff):
|
||||
base = os.path.basename(replacement)
|
||||
if base not in location_map:
|
||||
print("Could not find file {}".format(base))
|
||||
exit(-1)
|
||||
print("replace {} with {}".format(location_map[base], replacement))
|
||||
shutil.copyfile(replacement, location_map[base])
|
||||
for replacement in get_failures(args.diff):
|
||||
obj_name = removesuffix(os.path.basename(replacement), ".gc").replace("_REF", "")
|
||||
|
||||
# Find gsrc path, given game-name
|
||||
ref_path = get_ref_path_from_filename(args.game, obj_name, args.reference)
|
||||
|
||||
print("replace {} with {}".format(ref_path, replacement))
|
||||
shutil.copyfile(replacement, ref_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user