diff --git a/Makefile b/Makefile index 3aa9940f7d..38c712c2a9 100644 --- a/Makefile +++ b/Makefile @@ -105,7 +105,7 @@ build/comp/code.yaz0: code.bin python3 yaz0.py -i $< -o $@ disasm: -# python3 disasm.py + @python3 ./tools/disasm.py -d ./asm -e ./include -u . -l ./tables/files.py -f ./tables/functions.py -o ./tables/objects.py -v ./tables/variables.py @while read -r file; do \ python3 ./tools/split_asm.py ./asm/$$file.asm ./asm/nonmatching/$$file; \ done < ./tables/files_with_nonmatching.txt diff --git a/files.py b/tables/files.py similarity index 98% rename from files.py rename to tables/files.py index a265b96842..b9ba581878 100644 --- a/files.py +++ b/tables/files.py @@ -1,4 +1,5 @@ -known_files = { +# Follows the format of (File Location, Name, Start Address, .data/.rodata Segments (Start, End Inclusive)) +{ ('baserom/boot', 'boot', 0x80080060, ((0x800969C0, 0x800A5ABF),)), ('decomp/code', 'code', 0x800A5AC0, ((0x80186028, 0x80186A6F),(0x801AAAB0, 0x801E3F9C),)), ('decomp/ovl_title', 'ovl_title', 0x80800000, ((0x80800860, 0x8080090C),)), diff --git a/tables/files_with_nonmatching.txt b/tables/files_with_nonmatching.txt new file mode 100644 index 0000000000..3e2bc9d62d --- /dev/null +++ b/tables/files_with_nonmatching.txt @@ -0,0 +1,6 @@ +ovl_Bg_Fu_Kaiten_0x80ACB400 +boot_0x800805E0 +boot_0x800968B0 +z_std_dma +z_effect_soft_sprite +z_lib diff --git a/functions.py b/tables/functions.py similarity index 98% rename from functions.py rename to tables/functions.py index c5705f5439..9c391738a7 100644 --- a/functions.py +++ b/tables/functions.py @@ -1,4 +1,5 @@ -known_funcs = { +# Follows the format of Entry Point Address:(Name, Return, Args) +{ 0x80080060:("start","UNK_RET","void"), 0x80080150:("Init_ClearMemory","void","UNK_PTR, UNK_PTR"), 0x80080180:("Idle_InitFramebuffer","void","UNK_TYPE*, UNK_TYPE, UNK_TYPE"), diff --git a/objects.py b/tables/objects.py similarity index 96% rename from objects.py rename to tables/objects.py index 3121a894b7..4e0eb27a6b 100644 --- a/objects.py +++ b/tables/objects.py @@ -1,4 +1,4 @@ -known_objects = { +{ 0x80080150:"idle", 0x80080790:"z_std_dma", 0x80081250:"", diff --git a/variables.py b/tables/variables.py similarity index 93% rename from variables.py rename to tables/variables.py index bf7342fbd2..f5da1a7f88 100644 --- a/variables.py +++ b/tables/variables.py @@ -1,4 +1,5 @@ -known_vars = { +# Follows the format of Address:(Name, Type, Is Array) +{ 0x0001A500:("dmadataRomStart","u32",False), # Start of dmadata 0x00020700:("dmadataRomEnd","u32",False), # Byte immediately after end of dmadata 0x00AC4000:("","UNK_TYPE",False), # this seems low @@ -215,12 +216,3 @@ known_vars = { 0x80BD561C:("bgIkanaRayCompInit","z_ActorCompInitEntry",True), 0x80BD562C:("","UNK_TYPE",False) } - -# these are extra variables needed for one reason or another, they should probably be deleted if possible -extra_vars = ( - ("D_800980D0_","UNK_PTR"), # needed to match? - ("D_80099AD0_","UNK_TYPE"), # needed to match? - ("D_8009A670_","UNK_TYPE"), # needed to match? - ("D_8009B140_","UNK_TYPE"), # needed to match? - ("(*D_801BE960[12])(u8*, z_ActorCompInitEntry*)",""), # TODO better function pointer representation - ) diff --git a/disasm.py b/tools/disasm.py similarity index 86% rename from disasm.py rename to tools/disasm.py index b6ca371193..10df1ba089 100644 --- a/disasm.py +++ b/tools/disasm.py @@ -1,16 +1,20 @@ -import os -import struct +import argparse, os, struct, ast -SPLIT_FILES = True -GENERATE_HEADERS = True +SPLIT_FILES = True # TODO this should be a flag somewhere loadHighRefs = {} loadLowRefs = {} -from objects import * -from functions import * -from variables import * -from files import * +#from tables.objects import * +#from tables.functions import * +#from tables.variables import * +#from tables.files import * + +known_files = {} +known_funcs = dict() +known_objects = dict() +known_vars = dict() +extra_vars = {} regs = { 0:"$zero", 1:"$at", 2:"$v0", 3:"$v1", 4:"$a0", 5:"$a1", 6:"$a2", 7:"$a3", @@ -171,6 +175,8 @@ class Disassembler: self.vars = set() self.data_regions = list() + self.has_done_first_pass = False + self.is_data_cache = {} self.is_code_cache = {} @@ -310,18 +316,18 @@ class Disassembler: if self.is_in_data(addr) and self.is_in_code(word): self.add_function(word) - - def disassemble(self, path): + self.first_pass() + self.second_pass(path) + + def first_pass(self): + if self.has_done_first_pass == True: + return + # TODO keep sorted self.files = sorted(self.files, key = lambda file: file.vaddr) self.data_regions = sorted(self.data_regions, key = lambda region: region[0]) - self.__first_pass() - self.guess_functions_from_data() - self.__second_pass(path) - - def __first_pass(self): for file in self.files: for i in range(0, file.size // 4): inst = file.get_inst(i) @@ -341,8 +347,10 @@ class Disassembler: # don't split if it's the start of a data section, it's probably the same object if not self.is_in_data_or_undef(new_object_start): self.add_object(new_object_start) + self.guess_functions_from_data() + self.has_done_first_pass = True - def __second_pass(self, path): + def second_pass(self, path): for file in self.files: filename = path + '/%s.asm' % self.get_object_name(file.vaddr, file.vaddr); @@ -590,7 +598,8 @@ class Disassembler: return dis def generate_headers(self, path): - with open(path + "functions.h", 'w', newline='\n') as f: + self.first_pass() # find functions and variables + with open(path + "/functions.h", 'w', newline='\n') as f: f.write("#ifndef _FUNCTIONS_H_\n#define _FUNCTIONS_H_\n\n"); f.write('#include \n#include \n#include \n#include \n#include \n#include \n\n'); @@ -603,7 +612,7 @@ class Disassembler: f.write("\n#endif\n"); - with open(path + "variables.h", 'w', newline='\n') as f: + with open(path + "/variables.h", 'w', newline='\n') as f: f.write("#ifndef _VARIABLES_H_\n#define _VARIABLES_H_\n\n"); f.write('#include \n#include \n#include \n#include \n#include \n#include \n\n'); @@ -620,19 +629,58 @@ class Disassembler: f.write("\n#endif\n"); - with open("undef.txt", 'w', newline='\n') as f: - for addr in sorted(self.vars): - f.write("%s = 0x%08X;\n" % (self.make_load(addr), addr)); + def generate_undefined(self, path): + self.first_pass() # find functions and variables + with open(path + "/undef.txt", 'w', newline='\n') as f: + for addr in sorted(self.vars): + f.write("%s = 0x%08X;\n" % (self.make_load(addr), addr)); - # TODO not hard code - f.write(''' + # TODO not hard code + f.write(''' D_80099AD0_ = 0x80099AD0;''' - ) - + ) +# TODO -a --analyze flag? Only when its set will new symbols be added, otherwise use only the supplied ones if __name__ == "__main__": - dis = Disassembler() - dis.load_defaults() - dis.disassemble('./asm/') - dis.generate_headers('./') + parser = argparse.ArgumentParser() + parser.add_argument('-e', '--export-headers', help='export functions and variables into .h files', metavar='path') + parser.add_argument('-u', '--undefined', help='create linker script for undefined symbols', metavar='path') + parser.add_argument('-d', '--disassemble', help='disassemble supplied code files', metavar='path') + parser.add_argument('-l', '--files', help='list of files to disassemble', metavar='filename') + parser.add_argument('-f', '--functions', help='predefined functions', metavar='filename') + parser.add_argument('-o', '--objects', help='predefined code objects', metavar='filename') + parser.add_argument('-v', '--variables', help='predefined variables', metavar='filename') + args = parser.parse_args() + + if args.files != None: + with open(args.files, 'r') as f: + known_files = ast.literal_eval(f.read()) + if args.functions != None: + with open(args.functions, 'r') as f: + known_funcs = ast.literal_eval(f.read()) + if args.objects != None: + with open(args.objects, 'r') as f: + known_objects = ast.literal_eval(f.read()) + if args.variables != None: + with open(args.variables, 'r') as f: + known_vars = ast.literal_eval(f.read()) + # these are extra variables needed for one reason or another, they should probably be deleted if possible + extra_vars = ( + ("D_800980D0_","UNK_PTR"), # needed to match? + ("D_80099AD0_","UNK_TYPE"), # needed to match? + ("D_8009A670_","UNK_TYPE"), # needed to match? + ("D_8009B140_","UNK_TYPE"), # needed to match? + ("(*D_801BE960[12])(u8*, z_ActorCompInitEntry*)",""), # TODO better function pointer representation + ) + dis = Disassembler() + dis.load_defaults() # TODO file loading code should go in here + if args.disassemble != None: + os.makedirs(args.disassemble, exist_ok=True) + dis.disassemble(args.disassemble) + if args.export_headers != None: + os.makedirs(args.export_headers, exist_ok=True) + dis.generate_headers(args.export_headers) + if args.undefined != None: + os.makedirs(args.undefined, exist_ok=True) + dis.generate_undefined(args.undefined)