Merge remote-tracking branch 'open-goal/master' into v/translations

This commit is contained in:
Tyler Wilding
2023-02-23 17:13:27 -05:00
2735 changed files with 2068207 additions and 46519 deletions
+4
View File
@@ -0,0 +1,4 @@
@echo off
cd ..\..
out\build\Release\bin\goalc --auto-lt --user-auto
pause
-4
View File
@@ -1,4 +0,0 @@
@echo off
cd ..\..
out\build\Release\bin\goalc --user-auto
pause
+1 -1
View File
@@ -1,4 +1,4 @@
@echo off
cd ..\..
out\build\Release\bin\goalc --auto-lt --user-auto
out\build\Release\bin\goalc --user-auto
pause
+4
View File
@@ -0,0 +1,4 @@
@echo off
cd ..\..
out\build\Release\bin\goalc --user-auto --game jak2
pause
+4
View File
@@ -0,0 +1,4 @@
@echo off
cd ..\..
out\build\Release\bin\gk -boot -fakeiso -debug -v -jak2
pause
+4
View File
@@ -0,0 +1,4 @@
@echo off
cd ..\..
out\build\Release\bin\gk -fakeiso -debug -v -jak2
pause
+1 -1
View File
@@ -1,2 +1,2 @@
cd ..\..
git update-index --assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc
git update-index --assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc decompiler\config\jak2_ntsc_v1.jsonc
+1 -1
View File
@@ -1,2 +1,2 @@
cd ..\..
git update-index --no-assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc
git update-index --no-assume-unchanged decompiler\config\jak1_ntsc_black_label.jsonc decompiler\config\jak2_ntsc_v1.jsonc
+1 -1
View File
@@ -1,6 +1,6 @@
@echo off
cd ..\..
out\build\Release\bin\offline-test -d --iso_data_path iso_data\jak1\ --game jak1
scripts\update_decomp_reference.py failures\ test\decompiler\reference\
python3 scripts\update_decomp_reference.py failures\ test\decompiler\reference\ --game jak1
RMDIR /Q/S failures
pause
@@ -0,0 +1,6 @@
@echo off
cd ..\..
out\build\Release\bin\offline-test -d --iso_data_path iso_data\jak2\ --game jak2
python3 scripts\update_decomp_reference.py failures\ test\decompiler\reference\ --game jak2
RMDIR /Q/S failures
pause
+4
View File
@@ -0,0 +1,4 @@
@echo off
cd ..\..
out\build\Release\bin\goalc-test --gtest_filter="Jak2TypeConsistency.TypeConsistency"
pause
@@ -0,0 +1,4 @@
@echo off
cd ..\..
python3 scripts\gsrc\update-gsrc-via-refs.py --game jak2 --decompiler out\build\Release\bin\decompiler.exe --decompiler_config .\decompiler\config\jak2_ntsc_v1.jsonc
pause
+15
View File
@@ -0,0 +1,15 @@
import os
import glob
def get_goal_files(root_dir, ext = "*.gc"):
"""Get all GOAL source files under root_dir."""
return [goal_file for file in os.walk(root_dir) for goal_file in glob.glob(os.path.join(file[0], ext))]
all_files = get_goal_files("./decompiler_out/jak2/import")
result = ""
for file in all_files:
with open(file) as f:
for line in f:
if line.startswith("(def"):
result += line
print(result)
+4 -5
View File
@@ -4,7 +4,7 @@ import argparse
### Script to track decompilation progress.
### Example usage: python3 scripts/decomp_progress.py ~/jak-project/goal_src
### Example usage: python3 scripts/decomp_progress.py ~/jak-project/goal_src/jak2
def get_goal_files(root_dir, ext = "*.gc"):
"""Get all GOAL source files under root_dir."""
@@ -29,7 +29,7 @@ def print_table(stats, total_gc_files):
print("-------------------------------------")
print("| {: <24} | {: >6} |".format("TOTAL", total_lines))
print("-------------------------------------")
estimated_lines = 500000
estimated_lines = 1000000
print("Progress: {}/{} lines ({:.2f}%)".format(total_lines, estimated_lines, 100. * total_lines / estimated_lines))
print("{}/{} files modified from template ({:.2f}%)".format(len(stats), total_gc_files,
100. * len(stats) / total_gc_files))
@@ -41,8 +41,7 @@ def main():
args = parser.parse_args()
all_files = get_goal_files(args.goal_src)
ref_files = get_goal_files(args.goal_src + "/../test/", "*_REF.gc")
ref_files = get_goal_files(args.goal_src + "/../../test/decompiler/reference/jak2", "*_REF.gc")
ref_files_no_ext = [os.path.basename(fn)[:-7] for fn in ref_files]
@@ -62,7 +61,7 @@ def main():
total_gc_files += 1
if line_count == 7 or short_name in excluded_files:
if line_count < 10 or short_name in excluded_files:
# the template has 7 lines, just skip it.
continue
-57
View File
@@ -1,57 +0,0 @@
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--file")
args = parser.parse_args()
import re
labels_with_no_type = []
file_path = "decompiler_out/jak1/{}_disasm.gc".format(args.file)
with open(file_path) as f:
# Find all
content = f.readlines()
for line in content:
labels_with_no_type = labels_with_no_type + re.findall(r'L\d+', line)
# dedup list
labels_with_no_type = list(dict.fromkeys(labels_with_no_type))
# let's go try to identify the types from the IR2 file if we can
label_lines = []
file_path = "decompiler_out/jak1/{}_ir2.asm".format(args.file)
with open(file_path) as f:
# Find all
content = f.readlines()
prev_line = ""
next_label_will_be_lambda = False
for i, line in enumerate(content):
if ".function (anon-function" in line:
next_label_will_be_lambda = True
if line.startswith("L"):
for label in labels_with_no_type:
if line.startswith("{}:".format(label)):
# If we were expecting a lambda
if next_label_will_be_lambda:
label_lines.append("[\"{}\", \"_lambda_\", true]".format(label))
labels_with_no_type.remove(label)
next_label_will_be_lambda = False
break
# special case for pairs
if "(offset 2)" in line:
label_lines.append("[\"{}\", \"pair\", true]".format(label))
labels_with_no_type.remove(label)
break
# Check if the previous line has a `.type`
prev_line = content[i-1]
if ".type" in prev_line:
the_type = prev_line.split(".type ")[1].strip()
label_lines.append("[\"{}\", \"{}\", true]".format(label, the_type))
labels_with_no_type.remove(label)
break
# Print out the labels
print("Here are the labels I couldn't find a type for:")
for label in labels_with_no_type:
print("- {}".format(label))
print("And here are the ones I could:")
print(",\n".join(label_lines))
+25
View File
@@ -0,0 +1,25 @@
# Merge tools use specific algorithms or assumptions to detect conflicts
# and not all of them will obviously flag them, even if they use the standard format
#
# So this is to ensure no conflict markers get ignored in goal_src atleast
import os
files_with_unresolved_conflicts = []
for dirpath, subdirs, files in os.walk("./goal_src"):
for filename in files:
# Get the file contents
with open(os.path.join(dirpath, filename), "r") as f:
lines = f.readlines()
for line in lines:
if "<<<<<<<" in line:
files_with_unresolved_conflicts.append(os.path.join(dirpath, filename))
break
if len(files_with_unresolved_conflicts) == 0:
exit(0)
print("There are unresolved conflicts in ./goal_src/")
for file in files_with_unresolved_conflicts:
print(file)
exit(1)
-67
View File
@@ -1,67 +0,0 @@
import re
from jak1_file_list import file_list
import argparse
import os
parser = argparse.ArgumentParser()
parser.add_argument("--files")
args = parser.parse_args()
files = args.files.split(",")
throw_error = False
method_split_pattern = re.compile('t9-\d+\s\(method-of-object')
function_split_pattern = re.compile('\(t9-\d+\)')
missing_res_tag_pattern = re.compile('(sv-\d{2,} int)')
decompiler_error_pattern = re.compile(';; ERROR')
missing_arg = re.compile('local-vars.*none\)')
for file in files:
src_path = ""
for f in file_list:
if f[2] != 3:
continue
if f[0] == file:
src_path = f[4]
break
if not os.path.exists("./goal_src/{}".format(src_path)):
print("{} couldn't find in /goal_src!".format(file))
throw_error = True
continue
file_path = "./goal_src/{}/{}.gc".format(src_path, file)
with open(file_path) as f:
for lineno, line in enumerate(f):
method_split_match = method_split_pattern.search(line)
if method_split_match:
print("method_split - {}:{}".format(file_path, lineno + 1))
throw_error = True
continue
function_split_match = function_split_pattern.search(line)
if function_split_match:
print("function_split - {}:{}".format(file_path, lineno + 1))
throw_error = True
continue
missing_res_tag_match = missing_res_tag_pattern.search(line)
if missing_res_tag_match:
print("missing_res_tag - {}:{}".format(file_path, lineno + 1))
throw_error = True
continue
decompiler_error_match = decompiler_error_pattern.search(line)
if decompiler_error_match:
print("decompiler_error - {}:{}".format(file_path, lineno + 1))
throw_error = True
continue
missing_arg_match = missing_arg.search(line)
if missing_arg_match:
print("missing_arg - {}:{}".format(file_path, lineno + 1))
throw_error = True
continue
if throw_error:
print("found potential problems!")
exit(1)
else:
print("looks good!")
@@ -0,0 +1,96 @@
from utils import get_alltypes_path_from_game, get_gsrc_path_from_filename
class AllTypesUpdateBlock:
def __init__(self):
self.data = []
self.file_name = ""
self.block_id = ""
def __str__(self):
return "{}:{}:{}...".format(self.file_name, self.block_id, self.data[0:20])
def update_alltypes_named_blocks(game_name):
block_dict = {}
# Step 1: Get the blocks
get_all_blocks(game_name, block_dict)
# Step 2: Update the blocks (group by file name to minimize file IO operations)
update_all_blocks(game_name, block_dict)
def get_all_blocks(game_name, block_dict):
with open(get_alltypes_path_from_game(game_name)) as f:
lines = f.readlines()
i = 0
while i < len(lines):
line = lines[i]
if line.startswith(";; +++") and ":" in line:
info = line.replace(";; +++", "")
file_name, block_id = info.split(":")
new_block = AllTypesUpdateBlock()
new_block.file_name = file_name
new_block.block_id = block_id
# Loop until we find the end of the block, collecting the lines as we go
while i < len(lines):
i = i + 1
next_line = lines[i]
if next_line.startswith(";; ---"):
break
new_block.data.append(next_line)
# Add to the dictionary
if file_name not in block_dict:
block_dict[file_name] = [new_block]
else:
block_dict[file_name].append(new_block)
else:
i = i + 1
def update_all_blocks(game_name, block_dict):
for file_name, blocks in block_dict.items():
# Get the file's lines
path = get_gsrc_path_from_filename(game_name, file_name)
lines = []
final_lines = []
with open(path) as f:
lines = f.readlines()
# Iterate through lines, (before ;; decomp begins) and update the blocks if we find them
i = 0
while i < len(lines):
line = lines[i]
if line.lower().startswith(";; decomp begins"):
final_lines.append(line)
# Add all the rest of the lines until the end
while i + 1 < len(lines):
i = i + 1
next_line = lines[i]
final_lines.append(next_line)
break
if line.startswith(";; +++"):
final_lines.append(line)
block_id = line.split(";; +++")[1]
# Look to see if we actually have that block
found_block = False
for block in blocks:
if block.block_id == block_id:
found_block = True
# if we found the block, write the data, then proceed ahead until the end
for block_line in block.data:
final_lines.append(block_line)
while i + 1 < len(lines):
i = i + 1
next_line = lines[i]
if next_line.startswith(";; ---"):
final_lines.append(next_line)
i = i + 1
break
break
if not found_block:
i = i + 1
else:
final_lines.append(line)
i = i + 1
# Update the file contents
with open(path, "w") as f:
f.writelines(final_lines)
@@ -0,0 +1,808 @@
import re
from rapidfuzz import fuzz
# TODO - rename and refactor all usages, it's not _always_ a comment anymore!
# RetainedCode or something
class CommentMeta:
def __init__(self):
self.data = ""
self.symbol_before = None
self.symbol_inline = None
self.symbol_after = None
self.symbol_padding_before = None
self.symbol_padding_after = None
# NOTE - maybe holding more than just 1 line before/after might help?
self.code_before = None
self.code_after = None
self.code_padding_before = None
self.code_padding_after = None
self.line_num_in_form = None # None == top level
self.containing_form = None # none - top level
self.containing_form_kind = None # function|method|behaviour
self.containing_form_func_name = None # or the method/behaviour
self.containing_form_type = None
self.inline = False
self.code_in_line = None # only for inline comments
self.line_in_file = None # a worst-case scenario fallback
def __str__(self):
return "{}:{}:{}".format(self.data, self.symbol_before, self.symbol_after)
def debug_nice_formatted_code(val):
if val is None:
return None
return val.strip()[0:20]
# returns (symbol | None, padding)
def backtrack_for_symbol(lines, index):
padding = 0
for i in range(index - 1, 0, -1):
tline = lines[i].strip()
matches = re.search(
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
)
if matches is not None:
return matches.group(1), padding
elif (
not tline.strip() == "" and not tline.strip().startswith(";")
) or "decomp begins" in tline.lower():
# we hit a non empty line (but it wasn't a symbol!)
return None, padding
elif tline.strip() == "":
padding = padding + 1
return None, padding
def symbol_on_line(line):
tline = line.lstrip()
matches = re.search(
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
)
if matches is not None:
return matches.group(1)
return None
def lookahead_for_symbol(lines, index):
padding = 0
for i in range(index + 1, len(lines), 1):
tline = lines[i].lstrip()
matches = re.search(
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
)
if matches is not None:
return matches.group(1), padding
elif (
not tline.strip() == "" and not tline.strip().startswith(";")
) or "decomp begins" in tline.lower():
# we hit a non empty line (but it wasn't a symbol!)
return None, padding
elif tline.strip() == "":
padding = padding + 1
return None, padding
def backtrack_for_code(lines, index):
padding = 0
for i in range(index - 1, 0, -1):
line = lines[i]
if line.strip() == "":
padding = padding + 1
continue
elif "decomp begins" in line.lower():
return None, padding
elif line.lstrip().startswith(";"):
continue
return line, padding
def lookahead_for_code(lines, index):
padding = 0
for i in range(index + 1, len(lines), 1):
line = lines[i]
if line.strip() == "":
padding = padding + 1
continue
elif "decomp begins" in line.lower():
return None, padding
elif line.lstrip().startswith(";"):
continue
return line, padding
# returns form, or none
def is_line_start_of_form(line):
if line.lstrip().startswith(";") or "(when *debug-segment*" in line:
return None
matches = re.search(r"\(\s*([^\s.]*)\s+", line)
if matches is not None:
return line
return None
def has_form_ended(stack, line):
# if the stack is empty, return true
line_before_comment = line.partition(";")[0]
for char in line_before_comment:
if char == "(":
stack.append(char)
elif char == ")":
if len(stack) == 0:
# unbalanced parens?
return True
stack.pop()
if len(stack) == 0:
return True
else:
if len(stack) == 0:
return True
return False
def append_form_metadata(comment, form_start_line):
func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_start_line)
if func_matches is not None:
comment.containing_form_kind = "function"
comment.containing_form_func_name = func_matches.group(1)
comment.containing_form_type = None
return
behavior_matches = re.search(
r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_start_line
)
if behavior_matches is not None:
comment.containing_form_kind = "behavior"
comment.containing_form_func_name = behavior_matches.group(1)
comment.containing_form_type = behavior_matches.group(2)
return
method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_start_line)
if method_matches is not None:
comment.containing_form_kind = "method"
comment.containing_form_func_name = method_matches.group(1)
comment.containing_form_type = method_matches.group(2)
return
comment.containing_form_kind = "unknown"
comment.containing_form_func_name = None
comment.containing_form_type = None
def process_original_lines(lines):
comments = []
debug_lines = []
# track if we are inside a define*/defun/defmethod/deftype/defstate
within_form = None
line_num_in_form = None
form_paren_stack = []
found_output = False
i = 0
while i < len(lines):
debug_lines.append(lines[i])
tline = lines[i].lstrip()
if "decomp begins" in tline.lower():
found_output = True
i = i + 1
continue
if not found_output:
i = i + 1
continue
# actually process code
if within_form is None:
# lets see if we are now in one
within_form = is_line_start_of_form(lines[i])
if within_form is not None:
line_num_in_form = 0
if has_form_ended(form_paren_stack, lines[i]):
within_form = None
form_paren_stack = []
elif within_form is not None:
# check if the form has ended by counting parens
if has_form_ended(form_paren_stack, lines[i]):
within_form = None
form_paren_stack = []
line_num_in_form = 0
else:
line_num_in_form = line_num_in_form + 1
if tline.startswith(";") or tline.startswith("#|"):
# treat decomp deviation blocks as essentially comments as well, so include them in a block comment if appropriate
# this is done because there is nothing to match them against (if a comment is inside them for example)
# so we have to copy them in full
in_deviation_block = False
if "decomp deviation" in tline.lower() or tline.startswith("#|"):
in_deviation_block = True
current_comment = CommentMeta()
current_comment.line_in_file = i
current_comment.data = lines[i]
(
current_comment.symbol_before,
current_comment.symbol_padding_before,
) = backtrack_for_symbol(lines, i)
(
current_comment.code_before,
current_comment.code_padding_before,
) = backtrack_for_code(lines, i)
current_comment.containing_form = within_form
if within_form is not None:
append_form_metadata(current_comment, within_form)
current_comment.line_num_in_form = line_num_in_form
current_comment.inline = False
# look ahead to handle block comments
if i + 1 < len(lines):
next_line = lines[i + 1]
if "decomp deviation" in next_line.lower() or next_line.startswith("|#"):
in_deviation_block = False
while i + 1 < len(lines) and (
in_deviation_block
or next_line.lstrip().startswith(";")
or next_line.lstrip().startswith("|#")
):
debug_lines.append(lines[i + 1])
i = i + 1
current_comment.data = current_comment.data + next_line
if i + 1 < len(lines):
next_line = lines[i + 1]
if "decomp deviation" in next_line.lower() or next_line.startswith(
"|#"
):
in_deviation_block = False
(
current_comment.symbol_after,
current_comment.symbol_padding_after,
) = lookahead_for_symbol(lines, i)
(
current_comment.code_after,
current_comment.code_padding_after,
) = lookahead_for_code(lines, i)
comments.append(current_comment)
debug_lines.append(
";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
current_comment.symbol_before,
current_comment.symbol_padding_before,
current_comment.symbol_after,
current_comment.symbol_padding_after,
debug_nice_formatted_code(current_comment.code_before),
current_comment.code_padding_before,
debug_nice_formatted_code(current_comment.code_after),
current_comment.code_padding_after,
)
)
debug_lines.append(
";; [DEBUG]: in_form - {}...:{}\n".format(
debug_nice_formatted_code(current_comment.containing_form),
current_comment.line_num_in_form,
)
)
# inline comments
# TODO - cleanup duplication
elif ";" in tline:
current_comment = CommentMeta()
current_comment.line_in_file = i
current_comment.data = ";" + tline.partition(";")[2]
(
current_comment.symbol_before,
current_comment.symbol_padding_before,
) = backtrack_for_symbol(lines, i)
(
current_comment.symbol_after,
current_comment.symbol_padding_after,
) = lookahead_for_symbol(lines, i)
(
current_comment.code_before,
current_comment.code_padding_before,
) = backtrack_for_code(lines, i)
(
current_comment.code_after,
current_comment.code_padding_after,
) = lookahead_for_code(lines, i)
current_comment.containing_form = within_form
if within_form is not None:
append_form_metadata(current_comment, within_form)
current_comment.line_num_in_form = line_num_in_form
current_comment.symbol_inline = symbol_on_line(tline)
current_comment.inline = True
current_comment.code_in_line = tline.partition(";")[0]
comments.append(current_comment)
debug_lines.append(
";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
current_comment.symbol_before,
current_comment.symbol_padding_before,
current_comment.symbol_after,
current_comment.symbol_padding_after,
debug_nice_formatted_code(current_comment.code_before),
current_comment.code_padding_before,
debug_nice_formatted_code(current_comment.code_after),
current_comment.code_padding_after,
)
)
debug_lines.append(
";; [DEBUG]: in_form - {}...:{} || inline_code - {}...\n".format(
debug_nice_formatted_code(current_comment.containing_form),
current_comment.line_num_in_form,
debug_nice_formatted_code(current_comment.code_in_line),
)
)
i = i + 1
return comments, debug_lines
def get_symbol_at_line(line):
tline = line.strip()
matches = re.search(
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
)
if matches is not None:
return matches.group(1)
return None
def relevant_symbol_comments_for_line_before(line):
symbol = get_symbol_at_line(line)
if symbol is None:
return []
# Loop through comments, finding any that match the symbol
# they WILL be placed, so we can remove them from our list now
i = 0
relevant_comments = []
while i < len(comments):
comment = comments[i]
if comment.symbol_after == symbol:
relevant_comments.append(comment)
comments.pop(i)
else:
i = i + 1
return relevant_comments
def padding_before_comment(comment):
if comment.containing_form is None:
if comment.symbol_after is not None:
return "\n" * comment.symbol_padding_after
return ""
def relevant_symbol_comments_for_inline(line):
symbol = get_symbol_at_line(line)
if symbol is None:
return []
# Loop through comments, finding any that match the symbol
# they WILL be placed, so we can remove them from our list now
i = 0
relevant_comments = []
while i < len(comments):
comment = comments[i]
if comment.symbol_inline == symbol:
relevant_comments.append(comment)
comments.pop(i)
else:
i = i + 1
return relevant_comments
def padding_after_comment(comment):
if comment.containing_form is None:
if comment.symbol_before is not None:
return "\n" * comment.symbol_padding_before
return ""
def relevant_symbol_comments_for_line_after(line):
symbol = get_symbol_at_line(line)
if symbol is None:
return []
# Loop through comments, finding any that match the symbol
# they WILL be placed, so we can remove them from our list now
i = 0
relevant_comments = []
while i < len(comments):
comment = comments[i]
# if we can, we prefer to put comments before not after (more accurate re-creation)
if comment.symbol_after is None and comment.symbol_before == symbol:
relevant_comments.append(comment)
comments.pop(i)
else:
i = i + 1
return relevant_comments
# the first half of the defmethod/etc lines (before arg list) is less likely to change
# so we want to split it to weight it more heavily
def split_def_line(line):
first_part = ""
second_part = ""
for index, char in enumerate(line):
if char == "(":
if index == 0:
first_part = first_part + char
else:
second_part = second_part + char
continue
else:
if second_part != "":
second_part = second_part + char
else:
first_part = first_part + char
return first_part, second_part
def get_form_metadata(form_def_line):
func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_def_line)
if func_matches is not None:
return "function", func_matches.group(1), None
behavior_matches = re.search(r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_def_line)
if behavior_matches is not None:
return "behavior", behavior_matches.group(1), behavior_matches.group(2)
method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_def_line)
if method_matches is not None:
return "method", method_matches.group(1), method_matches.group(2)
return "unknown", None, None
built_in_method_names = [
"new",
"delete",
"print",
"inspect",
"length",
"asize-of",
"copy",
"relocate",
"memusage",
]
def different_method_names(form_func_name, comment_form_func_name):
if (
comment_form_func_name not in built_in_method_names
and form_func_name not in built_in_method_names
):
return False
return form_func_name != comment_form_func_name
def get_relevant_form_comments(form_def_line):
form_kind, form_func_name, form_type = get_form_metadata(form_def_line)
code_def_part, code_rest = split_def_line(form_def_line)
relevant_comments = []
i = 0
while i < len(comments):
comment = comments[i]
if comment.containing_form is None:
i = i + 1
continue
(
comment_form_kind,
comment_form_func_name,
comment_form_type,
) = get_form_metadata(comment.containing_form)
# First disqualify the form if it's obviously unrelated
if comment_form_kind != "unknown":
if form_kind != comment_form_kind:
i = i + 1
continue
elif form_kind == "function" and comment_form_func_name != form_func_name:
i = i + 1
continue
elif form_kind == "behavior" and comment_form_func_name != form_func_name:
i = i + 1
continue
elif form_kind == "method" and (
comment_form_type != form_type
or different_method_names(form_func_name, comment_form_func_name)
):
i = i + 1
continue
# Evaluate it's score (comments and current def line)
def_part, rest = split_def_line(comment.containing_form)
def_score = fuzz.ratio(code_def_part, def_part) * 0.65
if def_score == 65.0 and form_kind != "unknown":
relevant_comments.append(comment)
comments.pop(i)
continue
rest_score = fuzz.ratio(code_rest, rest) * 0.35
combined_score = def_score + rest_score
threshold = 50.0
if combined_score < threshold:
i = i + 1
continue
# Now, let's look at ALL other def lines yet to come from the decomp output
# if any are a better match, don't add the comment yet -- we'll add it when we get there!
# TODO - remove lines from the list as we find them so speed this up
found_better_form = False
for decomp_def_line in decomp_form_def_lines:
line_form_kind, line_form_func_name, line_form_type = get_form_metadata(
decomp_def_line
)
if form_kind != "unknown":
if form_kind != line_form_kind:
continue
elif form_kind == "function" and line_form_func_name != form_func_name:
continue
elif form_kind == "behavior" and line_form_func_name != form_func_name:
continue
elif form_kind == "method" and (
line_form_type != form_type
or different_method_names(form_func_name, line_form_func_name)
):
continue
def_part, rest = split_def_line(decomp_def_line)
def_score = fuzz.ratio(code_def_part, def_part) * 0.65
if def_score == 65.0 and form_kind != "unknown":
found_better_form = True
break
rest_score = fuzz.ratio(code_rest, rest) * 0.35
if combined_score < def_score + rest_score:
found_better_form = True
break
# TODO otherwise? still test?
if found_better_form:
i = i + 1
continue
relevant_comments.append(comment)
comments.pop(i)
return relevant_comments
# Simple fuzz ratio, but removes obvious outliers like empty lines / lines with only a paren
def score_alg(line1, line2):
tline1 = line1.strip()
tline2 = line2.strip()
if tline1 == "" or tline1 == ")" or tline1 == "(":
return -1
if tline2 == "" or tline2 == ")" or tline2 == "(":
return -1
return fuzz.ratio(tline1, tline2)
# TODO - improvement on comparison - a higher score on a longer line == better? some sort of weighting approach here too?
def merge_retained_code_and_new_code(gsrc_path, decomp_lines, final_lines):
decomp_started = False
with open(gsrc_path) as f:
lines = f.readlines()
within_form = None
line_num_in_form = None
form_paren_stack = []
for line in lines:
if "[DEBUG]" in line:
continue
if line.lower().lstrip().startswith(";; decomp begins"):
decomp_started = True
final_lines.append(line)
break
if not decomp_started:
final_lines.append(line)
continue
i = 0
while i < len(decomp_lines):
line = decomp_lines[i]
# Otherwise, its a part of the output we have to be more careful about
# For every line in the decompiled output, we scan our comment list to see if anything matches
# if it does, we insert it appropriately and remove the comment from the list
#
# This is the main source of inefficiency, but the process gets progressively faster as comments are eliminated
if within_form is None:
# lets see if we are now in one
within_form = is_line_start_of_form(line)
# TODO - check line for symbol matches?
if within_form is not None:
line_num_in_form = 0
if has_form_ended(form_paren_stack, line):
within_form = None
form_paren_stack = []
else:
# Get all of the lines of the form at once
form_start = decomp_lines[i]
form_lines = [form_start]
while i < len(decomp_lines):
i = i + 1
line = decomp_lines[i]
if has_form_ended(form_paren_stack, line):
within_form = None
form_paren_stack = []
break
else:
form_lines.append(line)
# Add any comments needed to the form contents
# - first we get all comments that have match well with the form's start line (ie. defmethod ....)
form_comments = get_relevant_form_comments(form_start)
# - for each comment, let's find which line matches it the best,
# if NONE exceed the threshold (if both match the same, pick the first), we default to the line offset
for comment in form_comments:
highest_score = -1
index_to_insert = -1
threshold = 50.0
place_kind = None
for index, form_line in enumerate(form_lines):
# skip any comments that were previously added
if form_line.lstrip().startswith(";"):
continue
if comment.code_in_line is not None:
score = score_alg(form_line, comment.code_in_line)
if score >= threshold and score > highest_score:
index_to_insert = index
highest_score = score
place_kind = "inline"
if comment.code_before is not None:
score = score_alg(form_line, comment.code_before)
if score >= threshold and score > highest_score:
index_to_insert = index
highest_score = score
place_comment_after = True
place_kind = "next_line"
if comment.code_after is not None:
score = score_alg(form_line, comment.code_after)
if score >= threshold and score > highest_score:
index_to_insert = index
highest_score = score
place_comment_after = False
place_kind = "before_line"
# add the comment!
if index_to_insert == -1:
if comment.inline:
form_lines[comment.line_num_in_form] = (
form_lines[index_to_insert].rstrip()
+ " "
+ comment.data
)
else:
form_lines.insert(
comment.line_num_in_form, comment.data
)
elif comment.inline:
form_index = index_to_insert
if place_kind == "next_line":
form_index = index_to_insert + 1
form_lines[form_index] = (
form_lines[form_index].rstrip() + " " + comment.data
)
elif place_kind == "next_line":
form_lines.insert(
index_to_insert + 1,
padding_before_comment(comment) + comment.data,
)
else:
form_lines.insert(
index_to_insert,
padding_after_comment(comment) + comment.data,
)
# Add the lines to the final output
for form_line in form_lines:
final_lines.append(form_line)
# Otherwise, we are at the top-level!
if within_form is None:
before_comments = relevant_symbol_comments_for_line_before(line)
for comment in before_comments:
final_lines.append(padding_before_comment(comment) + comment.data)
inline_comments = relevant_symbol_comments_for_inline(line)
if len(inline_comments) > 0:
comment_str = ""
for comment in inline_comments:
comment_str = comment_str + comment.data.strip()
comment_str = comment_str.replace(";", "")
final_lines.append(
"{} ;; {}".format(line.rstrip(), comment_str.strip())
)
else:
final_lines.append(line)
after_comments = relevant_symbol_comments_for_line_after(line)
for comment in after_comments:
final_lines.append(padding_after_comment(comment) + comment.data)
# next line
i = i + 1
def handle_dangling_blocks(comments, final_lines, debug_lines):
for comment in comments:
within_form = None
line_num_in_form = None
form_paren_stack = []
found_output = True
i = 0
index_to_insert = -1
highest_score = -1
place_comment_after = True
threshold = 50.0
while i < len(final_lines):
debug_lines.append(final_lines[i])
tline = final_lines[i].lstrip()
if "decomp begins" in tline.lower():
found_output = True
i = i + 1
continue
if not found_output:
i = i + 1
continue
line = final_lines[i]
# We can try to claw back a bit of efficiency by skipping the inside of forms
if within_form is not None:
# check if the form has ended by counting parens
if has_form_ended(form_paren_stack, line):
if comment.code_in_line is not None:
score = score_alg(line, comment.code_in_line)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
if comment.code_before is not None:
score = score_alg(line, comment.code_before)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = True
if comment.code_after is not None:
score = score_alg(line, comment.code_after)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = False
within_form = None
form_paren_stack = []
line_num_in_form = 0
else:
line_num_in_form = line_num_in_form + 1
else:
# lets see if we are now in a form
within_form = is_line_start_of_form(line)
if within_form is not None:
if comment.code_in_line is not None:
score = score_alg(line, comment.code_in_line)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
if comment.code_before is not None:
score = score_alg(line, comment.code_before)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = True
if comment.code_after is not None:
score = score_alg(line, comment.code_after)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = False
line_num_in_form = 0
else:
# just normal code, check it
if comment.code_in_line is not None:
score = score_alg(line, comment.code_in_line)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
if comment.code_before is not None:
score = score_alg(line, comment.code_before)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = True
if comment.code_after is not None:
score = score_alg(line, comment.code_after)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = False
i = i + 1
# end of while loop
# add the comment!
if index_to_insert == -1:
if comment.inline:
final_lines[comment.line_in_file] = (
final_lines[comment.line_in_file].rstrip() + " " + comment.data
)
else:
final_lines.insert(comment.line_in_file, comment.data)
elif comment.inline:
final_lines[index_to_insert] = (
final_lines[index_to_insert].rstrip() + " " + comment.data
)
elif place_comment_after:
final_lines.insert(
index_to_insert + 1,
padding_before_comment(comment) + comment.data,
)
else:
final_lines.insert(
index_to_insert,
padding_after_comment(comment) + comment.data,
)
+33
View File
@@ -0,0 +1,33 @@
from utils import get_gsrc_path_from_filename, get_ref_path_from_filename, get_file_list
import os
# TODO - hard-coded to jak 2
# Get all the gsrc files, if they aren't empty -- log if they aren't added to the reference tests as well
file_list = get_file_list("jak2")
# TODO - function for getting just the names
missing_files = []
for file in file_list:
file_name = ""
if file[2] != 3:
continue
else:
file_name = file[0]
# check gsrc
gsrc_path = get_gsrc_path_from_filename("jak2", file_name)
if gsrc_path:
gsrc_length = 0
with open(gsrc_path, 'r') as fp:
gsrc_length = len(fp.readlines())
if gsrc_length > 15:
if file_name == "enemy-h":
print(file_name)
# check if ref exists
ref_path = get_ref_path_from_filename("jak2", file_name, "./test/decompiler/reference/")
if not os.path.exists(ref_path):
missing_files.append(file_name)
print(missing_files)
+159
View File
@@ -0,0 +1,159 @@
import re
import argparse
from utils import get_gsrc_path_from_filename
from colorama import just_fix_windows_console, Fore, Back, Style
just_fix_windows_console()
parser = argparse.ArgumentParser("lint-gsrc-file")
parser.add_argument("--game", help="The name of the game", type=str)
parser.add_argument("--file", help="The name of the file", type=str)
args = parser.parse_args()
class LintMatch:
def __init__(self, src_path, offending_lineno, context):
self.src_path = src_path
self.offending_lineno = offending_lineno
self.context = context
def __str__(self):
output = (
Style.BRIGHT
+ Fore.MAGENTA
+ "@ {}:{}\n".format(self.src_path, self.offending_lineno)
+ Fore.RESET
+ Style.RESET_ALL
)
for line in self.context:
# skip lines that are just brackets
if line.strip() == ")" or line.strip() == "(":
continue
output += "\t{}\n".format(line)
return output
class LinterRule:
def __init__(self, level, rule_name, regex_pattern, context_size):
self.level = level
self.rule_name = rule_name
self.regex_pattern = regex_pattern
self.context_size = context_size
self.matches = []
def __str__(self):
level_color = Fore.LIGHTBLUE_EX
if self.level == "WARN":
level_color = Fore.YELLOW
elif self.level == "ERROR":
level_color = Fore.RED
return (
level_color
+ "[{}]{} - {} - {}/{}/g".format(
self.level,
Fore.RESET,
level_color + self.rule_name + Fore.RESET,
Fore.CYAN,
self.regex_pattern.pattern,
)
+ Fore.RESET
+ ":"
)
# Construct all rules
linter_rules = []
# Infos
# Warnings
linter_rules.append(
LinterRule("WARN", "method_splits", re.compile("method-of-(?:type|object)"), 3)
)
linter_rules.append(
LinterRule("WARN", "func_splits", re.compile("\(t9-\d+(?:\s+[^\s]+\s*)?\)"), 3)
)
linter_rules.append(
LinterRule("WARN", "missing_arg", re.compile("local-vars.*[at].*\s+none\)"), 1)
)
# Errors
linter_rules.append(LinterRule("ERROR", "missing_res_tag", re.compile(".pcpyud"), 1))
linter_rules.append(LinterRule("ERROR", "decomp_error", re.compile(";; ERROR"), 1))
linter_rules.append(
LinterRule(
"ERROR", "casting_stack_var", re.compile("the-as\s+[^\s]*\s+.*\(new 'stack"), 2
)
)
src_path = get_gsrc_path_from_filename(args.game, args.file)
# Iterate through the file line by line, check against each rule
# if the rule is violated (it matches) then we append the match with useful details
print("Linting GOAL_SRC File...")
def get_context(lines, match_span, idx, amount_inclusive):
lines_grabbed = []
# Strip left pad, while maintaining indent
last_line_indent_width = -1
last_line_indent = -1
while len(lines_grabbed) < amount_inclusive and len(lines) > idx + len(
lines_grabbed
):
# TODO - first line, colorize the match
# if len(lines_grabbed) == 0:
# line = lines[idx + len(lines_grabbed)]
# line = line[:match_span[0]] + Back.RED + line[:match_span[1]] + Back.RESET + line[match_span[1]:]
# line = line.rstrip()
line = lines[idx + len(lines_grabbed)].rstrip()
indent_width = len(line) - len(line.lstrip())
if last_line_indent_width == -1:
lines_grabbed.append(line.lstrip())
elif last_line_indent == -1:
# calculate the difference
indent_diff = indent_width - last_line_indent_width
last_line_indent = indent_diff
stripped_line = line.lstrip()
lines_grabbed.append(stripped_line.rjust(indent_diff + len(stripped_line)))
else:
stripped_line = line.lstrip()
lines_grabbed.append(
stripped_line.rjust(last_line_indent + len(stripped_line))
)
last_line_indent_width = indent_width
return lines_grabbed
with open(src_path) as f:
src_lines = f.readlines()
for lineno, line in enumerate(src_lines):
adjusted_lineno = lineno + 1
for rule in linter_rules:
match = rule.regex_pattern.search(line)
if match:
rule.matches.append(
LintMatch(
src_path,
adjusted_lineno,
get_context(src_lines, match.span(), lineno, rule.context_size),
)
)
# Iterate through all our linter rules, printing nicely in groups with the
# context surrounding the match
#
# If we find any violations at warning or above, we will ultimately return exit(1)
throw_error = False
for rule in linter_rules:
# Iterate through violations
if len(rule.matches) > 0:
print(rule)
for match in rule.matches:
if rule.level == "ERROR" or rule.level == "WARN":
throw_error = True
print(match)
if throw_error:
print(Fore.RED + "Found potential problems, exiting with code 1!" + Fore.RESET)
exit(1)
else:
print(Fore.GREEN + "Looks good!" + Fore.RESET)
+2
View File
@@ -1 +1,3 @@
rapidfuzz
GitPython
colorama
@@ -0,0 +1,38 @@
# Creates the `*.gd` files that go in ./goal_src/<game>/dgos
# Takes input from the `dgo.txt` file that is generated by the decompiler
# Run with all inputs enabled to get all the info!
# example - python .\scripts\gsrc\skeleton_creation\generate_dgo_files.py --game jak2 --dgotxt .\decompiler_out\jak2\dgo.txt
import argparse
parser = argparse.ArgumentParser("generate_dgo_files")
parser.add_argument("--game", help="The name of the game", type=str)
parser.add_argument("--dgotxt", help="Path to the dgo.txt file", type=str)
args = parser.parse_args()
# Read in the dgo.txt file
with open(args.dgotxt, "r") as f:
lines = f.readlines()[2:] # skip the first two lines, assumed to be a comment header and an empty line
# OpenGOAL still doesn't have a data serialization/deserialization format
# so read line by line, assuming each DGO is seperated by an empty line
current_dgo_name = None
current_dgo_lines = []
for line in lines:
if line.strip() == "":
# Write out contents to the .gd file
if current_dgo_name is not None:
path = "./goal_src/{}/dgos/{}".format(args.game, current_dgo_name)
print("writing to {}".format(path))
with open(path, "w") as wf:
wf.writelines(current_dgo_lines)
current_dgo_name = None
current_dgo_lines = []
continue
if ".CGO" in line or ".DGO" in line:
print("found one! - {}".format(line.strip()))
# figure out the name
current_dgo_name = line.replace("(", "").replace("\"", "").strip().lower().replace(".dgo", ".gd").replace(".cgo", ".gd")
print(current_dgo_name)
if current_dgo_name is not None:
current_dgo_lines.append(line)
@@ -0,0 +1,36 @@
# Generates the `(cgo-file...` lines for the game.gp file
# Attempts to put DGOs in the correct order based on the file order in `all_objs`
import json
common_deps = '("$OUT/obj/cty-guard-turret-button.o")'
ignored_dgos = ["ENGINE", "KERNEL", "ART", "COMMON", "GAME", "NO-XGO"]
dgos_encountered = set()
dgos_handled = set()
jak2_files = None
with open("./goal_src/jak2/build/all_objs.json", "r") as f:
jak2_files = json.load(f)
# Enumerate the files, order is dictated by code files (version 3)
# At the end we will fill in any dgos that weren't considerd "required"
lines = []
for file in jak2_files:
file_name = file[0]
version = file[2]
dgo_list = file[3]
for dgo in dgo_list:
dgos_encountered.add(dgo)
if version == 3:
dgo = dgo_list[0]
if dgo.lower() not in dgos_handled and dgo not in ignored_dgos:
dgos_handled.add(dgo.lower())
lines.append('(cgo-file "{}.gd" {})'.format(dgo.lower(), common_deps))
for dgo in dgos_encountered:
if dgo.lower() not in dgos_handled and dgo not in ignored_dgos:
lines.append('(cgo-file "{}.gd" {})'.format(dgo.lower(), common_deps))
for line in lines:
print(line)
+92 -806
View File
@@ -36,41 +36,22 @@
# - there are likely ways to make this more efficient
import argparse
import re
from rapidfuzz import fuzz
import os
from code_retention.all_types_retention import update_alltypes_named_blocks
from utils import get_gsrc_path_from_filename
# TODO - rename and refactor all usages, it's not _always_ a comment anymore!
# RetainedCode or something
class CommentMeta:
def __init__(self):
self.data = ""
self.symbol_before = None
self.symbol_inline = None
self.symbol_after = None
self.symbol_padding_before = None
self.symbol_padding_after = None
# NOTE - maybe holding more than just 1 line before/after might help?
self.code_before = None
self.code_after = None
self.code_padding_before = None
self.code_padding_after = None
self.line_num_in_form = None # None == top level
self.containing_form = None # none - top level
self.containing_form_kind = None # function|method|behaviour
self.containing_form_func_name = None # or the method/behaviour
self.containing_form_type = None
self.inline = False
self.code_in_line = None # only for inline comments
self.line_in_file = None # a worst-case scenario fallback
def __str__(self):
return "{}:{}:{}".format(self.data, self.symbol_before, self.symbol_after)
from code_retention.code_retention import *
import shutil
from pathlib import Path
import subprocess
parser = argparse.ArgumentParser("update-from-decomp")
parser.add_argument("--game", help="The name of the game", type=str)
parser.add_argument("--file", help="The name of the file", type=str)
parser.add_argument(
"--preserve",
help="Attempt to preserve comments and marked blocks",
action="store_true",
)
parser.add_argument(
"--debug", help="Output debug metadata on every block", action="store_true"
)
@@ -81,150 +62,14 @@ args = parser.parse_args()
gsrc_path = get_gsrc_path_from_filename(args.game, args.file)
# Step 1 - Find and update all named blocks from all-types (useful for enums)
update_alltypes_named_blocks(args.game)
comments = []
debug_lines = []
def debug_nice_formatted_code(val):
if val is None:
return None
return val.strip()[0:20]
# returns (symbol | None, padding)
def backtrack_for_symbol(lines, index):
padding = 0
for i in range(index - 1, 0, -1):
tline = lines[i].strip()
matches = re.search(
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
)
if matches is not None:
return matches.group(1), padding
elif (
not tline.strip() == "" and not tline.strip().startswith(";")
) or "decomp begins" in tline.lower():
# we hit a non empty line (but it wasn't a symbol!)
return None, padding
elif tline.strip() == "":
padding = padding + 1
return None, padding
def symbol_on_line(line):
tline = line.lstrip()
matches = re.search(
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
)
if matches is not None:
return matches.group(1)
return None
def lookahead_for_symbol(lines, index):
padding = 0
for i in range(index + 1, len(lines), 1):
tline = lines[i].lstrip()
matches = re.search(
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
)
if matches is not None:
return matches.group(1), padding
elif (
not tline.strip() == "" and not tline.strip().startswith(";")
) or "decomp begins" in tline.lower():
# we hit a non empty line (but it wasn't a symbol!)
return None, padding
elif tline.strip() == "":
padding = padding + 1
return None, padding
def backtrack_for_code(lines, index):
padding = 0
for i in range(index - 1, 0, -1):
line = lines[i]
if line.strip() == "":
padding = padding + 1
continue
elif "decomp begins" in line.lower():
return None, padding
elif line.lstrip().startswith(";"):
continue
return line, padding
def lookahead_for_code(lines, index):
padding = 0
for i in range(index + 1, len(lines), 1):
line = lines[i]
if line.strip() == "":
padding = padding + 1
continue
elif "decomp begins" in line.lower():
return None, padding
elif line.lstrip().startswith(";"):
continue
return line, padding
# returns form, or none
def is_line_start_of_form(line):
if line.rstrip().startswith(";"):
return None
matches = re.search(r"\(\s*([^\s.]*)\s+", line)
if matches is not None:
return line
return None
def has_form_ended(stack, line):
# if the stack is empty, return true
line_before_comment = line.partition(";")[0]
for char in line_before_comment:
if char == "(":
stack.append(char)
elif char == ")":
if len(stack) == 0:
# unbalanced parens?
return True
stack.pop()
if len(stack) == 0:
return True
else:
if len(stack) == 0:
return True
return False
def append_form_metadata(comment, form_start_line):
func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_start_line)
if func_matches is not None:
comment.containing_form_kind = "function"
comment.containing_form_func_name = func_matches.group(1)
comment.containing_form_type = None
return
behavior_matches = re.search(
r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_start_line
)
if behavior_matches is not None:
comment.containing_form_kind = "behavior"
comment.containing_form_func_name = behavior_matches.group(1)
comment.containing_form_type = behavior_matches.group(2)
return
method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_start_line)
if method_matches is not None:
comment.containing_form_kind = "method"
comment.containing_form_func_name = method_matches.group(1)
comment.containing_form_type = method_matches.group(2)
return
comment.containing_form_kind = "unknown"
comment.containing_form_func_name = None
comment.containing_form_type = None
decomp_ignore_forms = []
decomp_ignore_forms = ["defmethod inspect"]
decomp_ignore_errors = False
update_with_merge = False
with open(gsrc_path) as f:
lines_temp = f.readlines()
@@ -238,161 +83,27 @@ with open(gsrc_path) as f:
decomp_ignore_errors = True
if "og:ignore-form" in line:
decomp_ignore_forms.append(line.partition("ignore-form:")[2].strip())
if "og:update-with-merge" in line:
update_with_merge = True
lines.append(line)
# track if we are inside a define*/defun/defmethod/deftype/defstate
within_form = None
line_num_in_form = None
form_paren_stack = []
found_output = False
i = 0
while i < len(lines):
debug_lines.append(lines[i])
tline = lines[i].lstrip()
if "decomp begins" in tline.lower():
found_output = True
i = i + 1
continue
if not found_output:
i = i + 1
continue
# actually process code
if within_form is None:
# lets see if we are now in one
within_form = is_line_start_of_form(lines[i])
if within_form is not None:
line_num_in_form = 0
if has_form_ended(form_paren_stack, lines[i]):
within_form = None
form_paren_stack = []
elif within_form is not None:
# check if the form has ended by counting parens
if has_form_ended(form_paren_stack, lines[i]):
within_form = None
form_paren_stack = []
line_num_in_form = 0
else:
line_num_in_form = line_num_in_form + 1
if tline.startswith(";") or tline.startswith("#|"):
# treat decomp deviation blocks as essentially comments as well, so include them in a block comment if appropriate
# this is done because there is nothing to match them against (if a comment is inside them for example)
# so we have to copy them in full
in_deviation_block = False
if "decomp deviation" in tline.lower() or tline.startswith("#|"):
in_deviation_block = True
current_comment = CommentMeta()
current_comment.line_in_file = i
current_comment.data = lines[i]
(
current_comment.symbol_before,
current_comment.symbol_padding_before,
) = backtrack_for_symbol(lines, i)
(
current_comment.code_before,
current_comment.code_padding_before,
) = backtrack_for_code(lines, i)
current_comment.containing_form = within_form
if within_form is not None:
append_form_metadata(current_comment, within_form)
current_comment.line_num_in_form = line_num_in_form
current_comment.inline = False
# look ahead to handle block comments
if i + 1 < len(lines):
next_line = lines[i + 1]
if "decomp deviation" in next_line.lower() or next_line.startswith("|#"):
in_deviation_block = False
while i + 1 < len(lines) and (
in_deviation_block
or next_line.lstrip().startswith(";")
or next_line.lstrip().startswith("|#")
):
debug_lines.append(lines[i + 1])
i = i + 1
current_comment.data = current_comment.data + next_line
if i + 1 < len(lines):
next_line = lines[i + 1]
if "decomp deviation" in next_line.lower() or next_line.startswith(
"|#"
):
in_deviation_block = False
(
current_comment.symbol_after,
current_comment.symbol_padding_after,
) = lookahead_for_symbol(lines, i)
(
current_comment.code_after,
current_comment.code_padding_after,
) = lookahead_for_code(lines, i)
comments.append(current_comment)
debug_lines.append(
";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
current_comment.symbol_before,
current_comment.symbol_padding_before,
current_comment.symbol_after,
current_comment.symbol_padding_after,
debug_nice_formatted_code(current_comment.code_before),
current_comment.code_padding_before,
debug_nice_formatted_code(current_comment.code_after),
current_comment.code_padding_after,
)
)
debug_lines.append(
";; [DEBUG]: in_form - {}...:{}\n".format(
debug_nice_formatted_code(current_comment.containing_form),
current_comment.line_num_in_form,
)
)
# inline comments
# TODO - cleanup duplication
elif ";" in tline:
current_comment = CommentMeta()
current_comment.line_in_file = i
current_comment.data = ";" + tline.partition(";")[2]
(
current_comment.symbol_before,
current_comment.symbol_padding_before,
) = backtrack_for_symbol(lines, i)
(
current_comment.symbol_after,
current_comment.symbol_padding_after,
) = lookahead_for_symbol(lines, i)
(
current_comment.code_before,
current_comment.code_padding_before,
) = backtrack_for_code(lines, i)
(
current_comment.code_after,
current_comment.code_padding_after,
) = lookahead_for_code(lines, i)
current_comment.containing_form = within_form
if within_form is not None:
append_form_metadata(current_comment, within_form)
current_comment.line_num_in_form = line_num_in_form
current_comment.symbol_inline = symbol_on_line(tline)
current_comment.inline = True
current_comment.code_in_line = tline.partition(";")[0]
comments.append(current_comment)
debug_lines.append(
";; [DEBUG]: sym - {}:{} | {}:{} || code - {}...:{} | {}...:{}\n".format(
current_comment.symbol_before,
current_comment.symbol_padding_before,
current_comment.symbol_after,
current_comment.symbol_padding_after,
debug_nice_formatted_code(current_comment.code_before),
current_comment.code_padding_before,
debug_nice_formatted_code(current_comment.code_after),
current_comment.code_padding_after,
)
)
debug_lines.append(
";; [DEBUG]: in_form - {}...:{} || inline_code - {}...\n".format(
debug_nice_formatted_code(current_comment.containing_form),
current_comment.line_num_in_form,
debug_nice_formatted_code(current_comment.code_in_line),
)
)
i = i + 1
if args.preserve:
comments, debug_lines = process_original_lines(lines)
# If we are going to `update_with_merge` then make a backup of the file, and
# an empty file to use as the common ancestor.
#
# This means that all changes will be flagged as a conflict and will not be able to be
# merged into the repo without being explicitly resolved
if update_with_merge:
subprocess.run(
[
"git",
"restore",
gsrc_path
]
)
shutil.copyfile(gsrc_path, gsrc_path.replace(".gc", ".before.gc"))
Path(gsrc_path.replace(".gc", ".empty.gc")).touch()
if args.debug:
with open(gsrc_path, "w") as f:
@@ -409,6 +120,8 @@ lines_to_ignore = [
";; failed to figure",
";; Used lq/sq",
";; this part is debug only",
";; WARN: Return type mismatch int vs none",
";; WARN: Stack slot offset",
]
if decomp_ignore_errors:
@@ -428,7 +141,8 @@ def should_ignore_line(line):
return False
# TODO - check for existance probably
# TODO - ignore brackets inside strings!
decomp_file_path = "./decompiler_out/{}/{}_disasm.gc".format(args.game, args.file)
with open(decomp_file_path) as f:
lines = f.readlines()
@@ -459,10 +173,10 @@ with open(decomp_file_path) as f:
if not skip_form:
decomp_form_def_lines.append(decomp_within_form)
decomp_lines.append(line)
while i < len(lines):
while i + 1 < len(lines):
i = i + 1
line = lines[i]
if not skip_form:
if not skip_form and not should_ignore_line(line):
decomp_lines.append(line)
if has_form_ended(decomp_form_paren_stack, line):
decomp_within_form = None
@@ -476,492 +190,64 @@ with open(decomp_file_path) as f:
# Step 3: Start merging the new code + comments
final_lines = []
decomp_started = False
def get_symbol_at_line(line):
tline = line.strip()
matches = re.search(
r"(?:define|define-extern|defun|defstate|deftype)\s+([^\s]*)\s", tline
)
if matches is not None:
return matches.group(1)
return None
def relevant_symbol_comments_for_line_before(line):
symbol = get_symbol_at_line(line)
if symbol is None:
return []
# Loop through comments, finding any that match the symbol
# they WILL be placed, so we can remove them from our list now
i = 0
relevant_comments = []
while i < len(comments):
comment = comments[i]
if comment.symbol_after == symbol:
relevant_comments.append(comment)
comments.pop(i)
else:
i = i + 1
return relevant_comments
def padding_before_comment(comment):
if comment.containing_form is None:
if comment.symbol_after is not None:
return "\n" * comment.symbol_padding_after
return ""
def relevant_symbol_comments_for_inline(line):
symbol = get_symbol_at_line(line)
if symbol is None:
return []
# Loop through comments, finding any that match the symbol
# they WILL be placed, so we can remove them from our list now
i = 0
relevant_comments = []
while i < len(comments):
comment = comments[i]
if comment.symbol_inline == symbol:
relevant_comments.append(comment)
comments.pop(i)
else:
i = i + 1
return relevant_comments
def padding_after_comment(comment):
if comment.containing_form is None:
if comment.symbol_before is not None:
return "\n" * comment.symbol_padding_before
return ""
def relevant_symbol_comments_for_line_after(line):
symbol = get_symbol_at_line(line)
if symbol is None:
return []
# Loop through comments, finding any that match the symbol
# they WILL be placed, so we can remove them from our list now
i = 0
relevant_comments = []
while i < len(comments):
comment = comments[i]
# if we can, we prefer to put comments before not after (more accurate re-creation)
if comment.symbol_after is None and comment.symbol_before == symbol:
relevant_comments.append(comment)
comments.pop(i)
else:
i = i + 1
return relevant_comments
# the first half of the defmethod/etc lines (before arg list) is less likely to change
# so we want to split it to weight it more heavily
def split_def_line(line):
first_part = ""
second_part = ""
for index, char in enumerate(line):
if char == "(":
if index == 0:
first_part = first_part + char
else:
second_part = second_part + char
continue
else:
if second_part != "":
second_part = second_part + char
else:
first_part = first_part + char
return first_part, second_part
def get_form_metadata(form_def_line):
func_matches = re.search(r"\(defun(?:-debug)? ([^\s]*)", form_def_line)
if func_matches is not None:
return "function", func_matches.group(1), None
behavior_matches = re.search(r"\((?:defbehavior) ([^\s]*) ([^\s]*)", form_def_line)
if behavior_matches is not None:
return "behavior", behavior_matches.group(1), behavior_matches.group(2)
method_matches = re.search(r"\((?:defmethod) ([^\s]*) ([^\s]*)", form_def_line)
if method_matches is not None:
return "method", method_matches.group(1), method_matches.group(2)
return "unknown", None, None
built_in_method_names = [
"new",
"delete",
"print",
"inspect",
"length",
"asize-of",
"copy",
"relocate",
"memusage",
]
def different_method_names(form_func_name, comment_form_func_name):
if (
comment_form_func_name not in built_in_method_names
and form_func_name not in built_in_method_names
):
return False
return form_func_name != comment_form_func_name
def get_relevant_form_comments(form_def_line):
form_kind, form_func_name, form_type = get_form_metadata(form_def_line)
code_def_part, code_rest = split_def_line(form_def_line)
relevant_comments = []
i = 0
while i < len(comments):
comment = comments[i]
if comment.containing_form is None:
i = i + 1
continue
(
comment_form_kind,
comment_form_func_name,
comment_form_type,
) = get_form_metadata(comment.containing_form)
# First disqualify the form if it's obviously unrelated
if comment_form_kind != "unknown":
if form_kind != comment_form_kind:
i = i + 1
continue
elif form_kind == "function" and comment_form_func_name != form_func_name:
i = i + 1
continue
elif form_kind == "behavior" and comment_form_func_name != form_func_name:
i = i + 1
continue
elif form_kind == "method" and (
comment_form_type != form_type
or different_method_names(form_func_name, comment_form_func_name)
):
i = i + 1
continue
# Evaluate it's score (comments and current def line)
def_part, rest = split_def_line(comment.containing_form)
def_score = fuzz.ratio(code_def_part, def_part) * 0.65
if def_score == 65.0 and form_kind != "unknown":
relevant_comments.append(comment)
comments.pop(i)
continue
rest_score = fuzz.ratio(code_rest, rest) * 0.35
combined_score = def_score + rest_score
threshold = 50.0
if combined_score < threshold:
i = i + 1
continue
# Now, let's look at ALL other def lines yet to come from the decomp output
# if any are a better match, don't add the comment yet -- we'll add it when we get there!
# TODO - remove lines from the list as we find them so speed this up
found_better_form = False
for decomp_def_line in decomp_form_def_lines:
line_form_kind, line_form_func_name, line_form_type = get_form_metadata(
decomp_def_line
)
if form_kind != "unknown":
if form_kind != line_form_kind:
continue
elif form_kind == "function" and line_form_func_name != form_func_name:
continue
elif form_kind == "behavior" and line_form_func_name != form_func_name:
continue
elif form_kind == "method" and (
line_form_type != form_type
or different_method_names(form_func_name, line_form_func_name)
):
continue
def_part, rest = split_def_line(decomp_def_line)
def_score = fuzz.ratio(code_def_part, def_part) * 0.65
if def_score == 65.0 and form_kind != "unknown":
found_better_form = True
break
rest_score = fuzz.ratio(code_rest, rest) * 0.35
if combined_score < def_score + rest_score:
found_better_form = True
break
# TODO otherwise? still test?
if found_better_form:
i = i + 1
continue
relevant_comments.append(comment)
comments.pop(i)
return relevant_comments
# Simple fuzz ratio, but removes obvious outliers like empty lines / lines with only a paren
def score_alg(line1, line2):
tline1 = line1.strip()
tline2 = line2.strip()
if tline1 == "" or tline1 == ")" or tline1 == "(":
return -1
if tline2 == "" or tline2 == ")" or tline2 == "(":
return -1
return fuzz.ratio(tline1, tline2)
# TODO - improvement on comparison - a higher score on a longer line == better? some sort of weighting approach here too?
with open(gsrc_path) as f:
lines = f.readlines()
within_form = None
line_num_in_form = None
form_paren_stack = []
for line in lines:
if "[DEBUG]" in line:
continue
if line.lower().lstrip().startswith(";; decomp begins"):
decomp_started = True
if args.preserve:
merge_retained_code_and_new_code(gsrc_path, decomp_lines, final_lines)
else:
with open(gsrc_path) as f:
lines = f.readlines()
for line in lines:
final_lines.append(line)
break
if not decomp_started:
if line.lower().startswith(";; decomp begins"):
break
for line in decomp_lines:
final_lines.append(line)
continue
i = 0
while i < len(decomp_lines):
line = decomp_lines[i]
# Otherwise, its a part of the output we have to be more careful about
# For every line in the decompiled output, we scan our comment list to see if anything matches
# if it does, we insert it appropriately and remove the comment from the list
#
# This is the main source of inefficiency, but the process gets progressively faster as comments are eliminated
if within_form is None:
# lets see if we are now in one
within_form = is_line_start_of_form(line)
# TODO - check line for symbol matches?
if within_form is not None:
line_num_in_form = 0
if has_form_ended(form_paren_stack, line):
within_form = None
form_paren_stack = []
else:
# Get all of the lines of the form at once
form_start = decomp_lines[i]
form_lines = [form_start]
while i < len(decomp_lines):
i = i + 1
line = decomp_lines[i]
if has_form_ended(form_paren_stack, line):
within_form = None
form_paren_stack = []
break
else:
form_lines.append(line)
# Add any comments needed to the form contents
# - first we get all comments that have match well with the form's start line (ie. defmethod ....)
form_comments = get_relevant_form_comments(form_start)
# - for each comment, let's find which line matches it the best,
# if NONE exceed the threshold (if both match the same, pick the first), we default to the line offset
for comment in form_comments:
highest_score = -1
index_to_insert = -1
threshold = 50.0
place_kind = None
for index, form_line in enumerate(form_lines):
# skip any comments that were previously added
if form_line.lstrip().startswith(";"):
continue
if comment.code_in_line is not None:
score = score_alg(form_line, comment.code_in_line)
if score >= threshold and score > highest_score:
index_to_insert = index
highest_score = score
place_kind = "inline"
if comment.code_before is not None:
score = score_alg(form_line, comment.code_before)
if score >= threshold and score > highest_score:
index_to_insert = index
highest_score = score
place_comment_after = True
place_kind = "next_line"
if comment.code_after is not None:
score = score_alg(form_line, comment.code_after)
if score >= threshold and score > highest_score:
index_to_insert = index
highest_score = score
place_comment_after = False
place_kind = "before_line"
# add the comment!
if index_to_insert == -1:
if comment.inline:
form_lines[comment.line_num_in_form] = (
form_lines[index_to_insert].rstrip()
+ " "
+ comment.data
)
else:
form_lines.insert(
comment.line_num_in_form, comment.data
)
elif comment.inline:
form_index = index_to_insert
if place_kind == "next_line":
form_index = index_to_insert + 1
form_lines[form_index] = (
form_lines[form_index].rstrip() + " " + comment.data
)
elif place_kind == "next_line":
form_lines.insert(
index_to_insert + 1,
padding_before_comment(comment) + comment.data,
)
else:
form_lines.insert(
index_to_insert,
padding_after_comment(comment) + comment.data,
)
# Add the lines to the final output
for form_line in form_lines:
final_lines.append(form_line)
# Otherwise, we are at the top-level!
if within_form is None:
before_comments = relevant_symbol_comments_for_line_before(line)
for comment in before_comments:
final_lines.append(padding_before_comment(comment) + comment.data)
inline_comments = relevant_symbol_comments_for_inline(line)
if len(inline_comments) > 0:
comment_str = ""
for comment in inline_comments:
comment_str = comment_str + comment.data.strip()
comment_str = comment_str.replace(";", "")
final_lines.append(
"{} ;; {}".format(line.rstrip(), comment_str.strip())
)
else:
final_lines.append(line)
after_comments = relevant_symbol_comments_for_line_after(line)
for comment in after_comments:
final_lines.append(padding_after_comment(comment) + comment.data)
# next line
i = i + 1
# Step 3.b: Handle any remaining top level comments
# If we can't find a code line that meets a threshold, default to their line number
# - Why is this done after: if a comment is associated with nothing but code, we have no
# guarantee where it should go, so we have to wait until all code is populated
# This is SUPER inefficient, so hopefully we've processed nearly all comments by this point
for comment in comments:
within_form = None
line_num_in_form = None
form_paren_stack = []
found_output = True
i = 0
index_to_insert = -1
highest_score = -1
place_comment_after = True
threshold = 50.0
while i < len(final_lines):
debug_lines.append(final_lines[i])
tline = final_lines[i].lstrip()
if "decomp begins" in tline.lower():
found_output = True
i = i + 1
continue
if not found_output:
i = i + 1
continue
line = final_lines[i]
# We can try to claw back a bit of efficiency by skipping the inside of forms
if within_form is not None:
# check if the form has ended by counting parens
if has_form_ended(form_paren_stack, line):
if comment.code_in_line is not None:
score = score_alg(line, comment.code_in_line)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
if comment.code_before is not None:
score = score_alg(line, comment.code_before)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = True
if comment.code_after is not None:
score = score_alg(line, comment.code_after)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = False
within_form = None
form_paren_stack = []
line_num_in_form = 0
else:
line_num_in_form = line_num_in_form + 1
else:
# lets see if we are now in a form
within_form = is_line_start_of_form(line)
if within_form is not None:
if comment.code_in_line is not None:
score = score_alg(line, comment.code_in_line)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
if comment.code_before is not None:
score = score_alg(line, comment.code_before)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = True
if comment.code_after is not None:
score = score_alg(line, comment.code_after)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = False
line_num_in_form = 0
else:
# just normal code, check it
if comment.code_in_line is not None:
score = score_alg(line, comment.code_in_line)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
if comment.code_before is not None:
score = score_alg(line, comment.code_before)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = True
if comment.code_after is not None:
score = score_alg(line, comment.code_after)
if score >= threshold and score > highest_score:
index_to_insert = i
highest_score = score
place_comment_after = False
i = i + 1
# end of while loop
# add the comment!
if index_to_insert == -1:
if comment.inline:
final_lines[comment.line_in_file] = (
final_lines[comment.line_in_file].rstrip() + " " + comment.data
)
else:
final_lines.insert(comment.line_in_file, comment.data)
elif comment.inline:
final_lines[index_to_insert] = (
final_lines[index_to_insert].rstrip() + " " + comment.data
)
elif place_comment_after:
final_lines.insert(
index_to_insert + 1,
padding_before_comment(comment) + comment.data,
)
else:
final_lines.insert(
index_to_insert,
padding_after_comment(comment) + comment.data,
)
if args.preserve:
handle_dangling_blocks(comments, final_lines, debug_lines)
# Step 4: Write it out
# Step 4.a: Remove excessive new-lines from the end of the output, only leave a single empty new-line
lines_to_ignore = 0
i = len(final_lines) - 1
while i > 0 and (final_lines[i] == "\n" or final_lines[i] == "0\n"):
lines_to_ignore = lines_to_ignore + 1
i = i - 1
print("ignoring - {}".format(lines_to_ignore))
# Step 4.b: Write it out
with open(gsrc_path, "w") as f:
f.writelines(final_lines)
i = 0
while i + lines_to_ignore < len(final_lines):
f.write(final_lines[i])
i = i + 1
# If we need to merge, now is the time!
if update_with_merge:
shutil.move(gsrc_path, gsrc_path.replace(".gc", ".after.gc"))
shutil.move(gsrc_path.replace(".gc", ".before.gc"), gsrc_path)
subprocess.run(
[
"git",
"merge-file",
gsrc_path,
gsrc_path.replace(".gc", ".empty.gc"),
gsrc_path.replace(".gc", ".after.gc"),
"-L",
"Before Updating",
"-L",
"ignored",
"-L",
"After Updating",
]
)
if os.path.exists(gsrc_path.replace(".gc", ".empty.gc")):
os.remove(gsrc_path.replace(".gc", ".empty.gc"))
if os.path.exists(gsrc_path.replace(".gc", ".before.gc")):
os.remove(gsrc_path.replace(".gc", ".before.gc"))
if os.path.exists(gsrc_path.replace(".gc", ".after.gc")):
os.remove(gsrc_path.replace(".gc", ".after.gc"))
+64
View File
@@ -0,0 +1,64 @@
# Updates files in gsrc if they are modified in the reference test folder
# Uses git
import subprocess
from git import Repo
repo = Repo("./")
import argparse
import os
import glob
parser = argparse.ArgumentParser("update-gsrc-via-refs")
parser.add_argument("--game", help="The name of the game", type=str)
parser.add_argument("--decompiler", help="The path to the decompiler", type=str)
parser.add_argument("--decompiler_config", help="The decomp config", type=str)
parser.add_argument("--file_pattern", help="Provide a glob pattern to find files, instead of using git status. Relative to the reference test folder", type=str)
args = parser.parse_args()
def get_files_via_git():
file_names = set()
for item in repo.index.diff(None):
path = item.b_rawpath.decode("utf-8")
if args.game in path and "_REF" in path:
file_names.add(os.path.basename(path).replace("_REF.gc", ""))
for item in repo.untracked_files:
path = item
if args.game in path and "_REF" in path:
file_names.add(os.path.basename(path).replace("_REF.gc", ""))
return file_names
def get_files_via_glob():
file_names = set()
for file in glob.glob("./test/decompiler/reference/{}/{}".format(args.game, args.file_pattern), recursive=True):
file_names.add(os.path.basename(file).replace("_REF.gc", ""))
return file_names
# Get a list of changed files, as well as new files
file_names = []
if args.file_pattern:
file_names = get_files_via_glob()
else:
file_names = get_files_via_git()
for file_name in file_names:
print("Decompiling - {}".format(file_name))
# Decompile file
subprocess.run(
[
args.decompiler,
"./decompiler/config/{}".format(args.decompiler_config),
"./iso_data",
"./decompiler_out",
"--config-override",
'{{"allowed_objects": ["{}"]}}'.format(file_name),
]
)
print("Updating - {}".format(file_name))
# Update gsrc
os.system(
"python ./scripts/gsrc/update-from-decomp.py --game {} --file {}".format(
args.game, file_name
)
)
+21
View File
@@ -29,3 +29,24 @@ def get_gsrc_path_from_filename(game_name, file_name):
print("{} couldn't find in /goal_src/{}!".format(file_name, game_name))
exit(1)
return path
def get_alltypes_path_from_game(game_name):
if game_name == "jak1":
return "./decompiler/config/all-types.gc"
else:
return "./decompiler/config/jak2/all-types.gc"
def get_ref_path_from_filename(game_name, file_name, ref_folder):
file_list = get_file_list(game_name)
src_path = ""
for f in file_list:
if f[2] != 3:
continue
if f[0] == file_name:
src_path = f[4]
break
if src_path == "":
print("couldn't determine ref path for {}:{}!".format(game_name, file_name))
exit(1)
path = os.path.join(ref_folder, game_name, src_path, "{}_REF.gc".format(file_name))
return path
+1 -1
View File
@@ -3,4 +3,4 @@
# Directory of this script
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
"${DIR}"/../../build/decompiler/decompiler "${DIR}"/../../decompiler/config/jak1_ntsc_black_label.jsonc "${DIR}"/../../iso_data "${DIR}"/../../decompiler_out
"${DIR}"/../../build/decompiler/decompiler "${DIR}"/../../decompiler/config/jak1_ntsc_black_label.jsonc "${DIR}"/../../iso_data "${DIR}"/../../decompiler_out
+1
View File
@@ -5,6 +5,7 @@ vars:
GK_BIN_RELEASE_DIR: './build/game'
DECOMP_BIN_RELEASE_DIR: './build/decompiler'
MEMDUMP_BIN_RELEASE_DIR: './build/tools'
TYPESEARCH_BIN_RELEASE_DIR: './build/tools'
OFFLINETEST_BIN_RELEASE_DIR: './build'
GOALCTEST_BIN_RELEASE_DIR: './build'
EXE_FILE_EXTENSION: ''
+1
View File
@@ -5,6 +5,7 @@ vars:
GK_BIN_RELEASE_DIR: './build/game'
DECOMP_BIN_RELEASE_DIR: './build/decompiler'
MEMDUMP_BIN_RELEASE_DIR: './build/tools'
TYPESEARCH_BIN_RELEASE_DIR: './build/tools'
OFFLINETEST_BIN_RELEASE_DIR: './build'
GOALCTEST_BIN_RELEASE_DIR: './build'
EXE_FILE_EXTENSION: ''
+1
View File
@@ -5,6 +5,7 @@ vars:
GK_BIN_RELEASE_DIR: './out/build/Release/bin'
DECOMP_BIN_RELEASE_DIR: './out/build/Release/bin'
MEMDUMP_BIN_RELEASE_DIR: './out/build/Release/bin'
TYPESEARCH_BIN_RELEASE_DIR: './out/build/Release/bin'
OFFLINETEST_BIN_RELEASE_DIR: './out/build/Release/bin'
GOALCTEST_BIN_RELEASE_DIR: './out/build/Release/bin'
EXE_FILE_EXTENSION: '.exe'
+14
View File
@@ -0,0 +1,14 @@
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--game")
args = parser.parse_args()
import os
import glob
def delete_extension(ext):
fileList = glob.glob('./decompiler_out/{}/*.{}'.format(args.game, ext))
for filePath in fileList:
os.remove(filePath)
delete_extension("gc")
delete_extension("asm")
+10
View File
@@ -0,0 +1,10 @@
import argparse
import zipfile
parser = argparse.ArgumentParser()
parser.add_argument("--file")
parser.add_argument("--out")
args = parser.parse_args()
with zipfile.ZipFile(args.file, 'r') as p2s:
p2s.extractall(args.out)
+26 -15
View File
@@ -3,32 +3,43 @@ import glob
import argparse
import shutil
from gsrc.utils import get_ref_path_from_filename
## Script to update failing _REF.gc files
## Instructions:
## run offline-test with the `--dump-mode` flag set. This generates a "failures" folder.
## run offline-test with the `--dump_current_output` flag set. This generates a "failures" folder.
## update reference like this
## python3 ../scripts/update_decomp_reference.py ./failures ../test/decompiler/reference
## python3 ../scripts/update_decomp_reference.py ./failures ../test/decompiler/reference --game [jak1|jak2]
def get_goal_files(root_dir):
return [f for file in os.walk(root_dir) for f in glob.glob(os.path.join(file[0], '*.gc'))]
def get_failures(root_dir):
return [
f
for file in os.walk(root_dir)
for f in glob.glob(os.path.join(file[0], "*.gc"))
]
# removesuffix only added in python 3.9....
def removesuffix(self: str, suffix: str, /) -> str:
if self.endswith(suffix):
return self[:-len(suffix)]
else:
return self[:]
def main():
parser = argparse.ArgumentParser()
parser.add_argument(dest='diff', help='the failures folder')
parser.add_argument(dest='reference', help='the test/decompiler/reference folder')
parser.add_argument(dest="diff", help="the failures folder")
parser.add_argument(dest="reference", help="the test/decompiler/reference folder")
parser.add_argument("--game", help="The name of the game (jak1/jak2)", type=str)
args = parser.parse_args()
location_map = {os.path.basename(x) : x for x in get_goal_files(args.reference)}
for replacement in get_goal_files(args.diff):
base = os.path.basename(replacement)
if base not in location_map:
print("Could not find file {}".format(base))
exit(-1)
print("replace {} with {}".format(location_map[base], replacement))
shutil.copyfile(replacement, location_map[base])
for replacement in get_failures(args.diff):
obj_name = removesuffix(os.path.basename(replacement), ".gc").replace("_REF", "")
# Find gsrc path, given game-name
ref_path = get_ref_path_from_filename(args.game, obj_name, args.reference)
print("replace {} with {}".format(ref_path, replacement))
shutil.copyfile(replacement, ref_path)
if __name__ == "__main__":
main()