From dc7b8cc5b4ad54ff71fbc7bc5e641911421f6f1d Mon Sep 17 00:00:00 2001 From: Rozelette Date: Thu, 17 Sep 2020 16:11:59 -0500 Subject: [PATCH] Add some OOT tools (#17) * Add first_diff.py and sym_info.py * Add c tools from OOT. Replace yaz0 tool --- Makefile | 5 +- first_diff.py | 255 +++++++++++++++++ linker_scripts/code_script.txt | 2 +- sym_info.py | 139 ++++++++++ tools/.gitignore | 7 + tools/Makefile | 28 ++ tools/elf2rom.c | 267 ++++++++++++++++++ tools/elf32.c | 191 +++++++++++++ tools/elf32.h | 72 +++++ tools/makeromfs.c | 342 +++++++++++++++++++++++ tools/mkldscript.c | 492 +++++++++++++++++++++++++++++++++ tools/n64chksum.c | 85 ++++++ tools/n64chksum.h | 6 + tools/util.c | 87 ++++++ tools/util.h | 17 ++ tools/vtxdis.c | 257 +++++++++++++++++ tools/yaz0.c | 241 ++++++++++++++++ tools/yaz0.h | 10 + tools/yaz0.py | 181 ------------ tools/yaz0tool.c | 201 ++++++++++++++ 20 files changed, 2702 insertions(+), 183 deletions(-) create mode 100755 first_diff.py create mode 100755 sym_info.py create mode 100644 tools/.gitignore create mode 100644 tools/Makefile create mode 100644 tools/elf2rom.c create mode 100644 tools/elf32.c create mode 100644 tools/elf32.h create mode 100644 tools/makeromfs.c create mode 100644 tools/mkldscript.c create mode 100644 tools/n64chksum.c create mode 100644 tools/n64chksum.h create mode 100644 tools/util.c create mode 100644 tools/util.h create mode 100644 tools/vtxdis.c create mode 100644 tools/yaz0.c create mode 100644 tools/yaz0.h delete mode 100755 tools/yaz0.py create mode 100644 tools/yaz0tool.c diff --git a/Makefile b/Makefile index 57c4b5cd06..9105ec444c 100644 --- a/Makefile +++ b/Makefile @@ -158,11 +158,14 @@ clean: setup: git submodule update --init --recursive python3 -m pip install -r requirements.txt + make -C tools diff-init: all rm -rf expected/ mkdir -p expected/ cp -r build expected/build + cp $(UNCOMPRESSED_ROM) expected/$(UNCOMPRESSED_ROM) + cp $(ROM) expected/$(ROM) init: setup all diff-init @@ -197,5 +200,5 @@ build/decomp/%: decomp/% cp $< $@ build/comp/%.yaz0: build/decomp/% - ./tools/yaz0.py $< $@ + ./tools/yaz0 $< $@ diff --git a/first_diff.py b/first_diff.py new file mode 100755 index 0000000000..bb13b605dc --- /dev/null +++ b/first_diff.py @@ -0,0 +1,255 @@ +#!/usr/bin/env python3 + +import os.path +import argparse +from subprocess import check_call + +parser = argparse.ArgumentParser( + description="Find the first difference(s) between the built ROM and the base ROM." +) +parser.add_argument( + "-c", + "--count", + type=int, + default=5, + help="find up to this many instruction difference(s)", +) +parser.add_argument( + "-d", + "--diff", + dest="diff_args", + nargs="?", + action="store", + default=False, + const="prompt", + help="run diff.py on the result with the provided arguments" +) +parser.add_argument( + "-m", "--make", help="run make before finding difference(s)", action="store_true" +) +args = parser.parse_args() + +diff_count = args.count + +if args.make: + check_call(["make", "-j4", "COMPARE=0"]) + +baseimg = f"expected/rom_uncompressed.z64" +basemap = f"expected/build/mm.map" + +myimg = f"rom_uncompressed.z64" +mymap = f"build/mm.map" + +if not os.path.isfile(baseimg): + print(f"{baseimg} must exist.") + exit(1) +if not os.path.isfile(myimg) or not os.path.isfile(mymap): + print(f"{myimg} and {mymap} must exist.") + exit(1) + +mybin = open(myimg, "rb").read() +basebin = open(baseimg, "rb").read() + +if len(mybin) != len(basebin): + print("Modified ROM has different size...") + exit(1) + +if mybin == basebin: + print("No differences!") + exit(0) + + +def search_rom_address(target_addr): + ram_offset = None + prev_ram = 0 + prev_rom = 0 + prev_sym = "" + cur_file = "" + prev_file = cur_file + prev_line = "" + with open(mymap) as f: + for line in f: + if "load address" in line: + # Ignore .bss sections since we're looking for a ROM address + if ".bss" in line or ".bss" in prev_line: + ram_offset = None + continue + ram = int(line[16 : 16 + 18], 0) + rom = int(line[59 : 59 + 18], 0) + ram_offset = ram - rom + continue + + prev_line = line + + if ( + ram_offset is None + or "=" in line + or "*fill*" in line + or " 0x" not in line + ): + continue + + ram = int(line[16 : 16 + 18], 0) + rom = ram - ram_offset + sym = line.split()[-1] + + if "0x" in sym and "/" not in sym: + ram_offset = None + continue + if "/" in sym: + cur_file = sym + continue + + if rom > target_addr: + return f"{prev_sym} (RAM 0x{prev_ram:X}, ROM 0x{prev_rom:X}, {prev_file})" + + prev_ram = ram + prev_rom = rom + prev_sym = sym + prev_file = cur_file + + return "at end of rom?" + + +def parse_map(map_fname): + ram_offset = None + cur_file = "" + syms = {} + prev_sym = None + prev_line = "" + with open(map_fname) as f: + for line in f: + if "load address" in line: + ram = int(line[16 : 16 + 18], 0) + rom = int(line[59 : 59 + 18], 0) + ram_offset = ram - rom + continue + + prev_line = line + + if ( + ram_offset is None + or "=" in line + or "*fill*" in line + or " 0x" not in line + ): + continue + + ram = int(line[16 : 16 + 18], 0) + rom = ram - ram_offset + sym = line.split()[-1] + + if "0x" in sym and "/" not in sym: + ram_offset = None + continue + elif "/" in sym: + cur_file = sym + continue + + syms[sym] = (rom, cur_file, prev_sym, ram) + prev_sym = sym + + return syms + + +def map_diff(): + map1 = parse_map(mymap) + map2 = parse_map(basemap) + min_ram = None + found = None + for sym, addr in map1.items(): + if sym not in map2: + continue + if addr[0] != map2[sym][0]: + if min_ram is None or addr[0] < min_ram: + min_ram = addr[0] + found = (sym, addr[1], addr[2]) + if min_ram is None: + return False + else: + print( + f"Map appears to have shifted just before {found[0]} ({found[1]}) -- in {found[2]}?" + ) + if found[2] is not None and found[2] not in map2: + print( + f"(Base map file {basemap} out of date due to new or renamed symbols, so result may be imprecise.)" + ) + return True + + +def hexbytes(bs): + return ":".join("{:02X}".format(c) for c in bs) + + +found_instr_diff = [] +map_search_diff = [] +diffs = 0 +shift_cap = 1000 +for i in range(24, len(mybin), 4): + # (mybin[i:i+4] != basebin[i:i+4], but that's slightly slower in CPython...) + if diffs <= shift_cap and ( + mybin[i] != basebin[i] + or mybin[i + 1] != basebin[i + 1] + or mybin[i + 2] != basebin[i + 2] + or mybin[i + 3] != basebin[i + 3] + ): + if diffs == 0: + print(f"First difference at ROM addr 0x{i:X}, {search_rom_address(i)}") + print( + f"Bytes: {hexbytes(mybin[i : i + 4])} vs {hexbytes(basebin[i : i + 4])}" + ) + diffs += 1 + if ( + len(found_instr_diff) < diff_count + and mybin[i] >> 2 != basebin[i] >> 2 + and not search_rom_address(i) in map_search_diff + ): + found_instr_diff.append(i) + map_search_diff.append(search_rom_address(i)) + +if diffs == 0: + print("No differences but ROMs differ?") + exit() + +if len(found_instr_diff) > 0: + for i in found_instr_diff: + print(f"Instruction difference at ROM addr 0x{i:X}, {search_rom_address(i)}") + print( + f"Bytes: {hexbytes(mybin[i : i + 4])} vs {hexbytes(basebin[i : i + 4])}" + ) +print() + +definite_shift = diffs > shift_cap +if definite_shift: + print(f"Over {shift_cap} differing words, must be a shifted ROM.") +else: + print(f"{diffs} differing word(s).") + +if diffs > 100: + if not os.path.isfile(basemap): + print( + f"To find ROM shifts, copy a clean .map file to {basemap} and rerun this script." + ) + elif not map_diff(): + print(f"No ROM shift{' (!?)' if definite_shift else ''}") + +if args.diff_args: + if len(found_instr_diff) < 1: + print(f"No instruction difference to run diff.py on") + exit() + + diff_sym = search_rom_address(found_instr_diff[0]).split()[0] + if args.diff_args == "prompt": + diff_args = input("Call diff.py with which arguments? ") or "--" + else: + diff_args = args.diff_args + if diff_args[0] != "-": + diff_args = "-" + diff_args + check_call( + [ + "python3", + "diff.py", + diff_args, + diff_sym, + ] + ) diff --git a/linker_scripts/code_script.txt b/linker_scripts/code_script.txt index 84cc40e74e..53b25aa830 100644 --- a/linker_scripts/code_script.txt +++ b/linker_scripts/code_script.txt @@ -309,7 +309,7 @@ SECTIONS . += 0x10; boot_bss_end = .; - RomLocation = 0xB3C000; + RomLocation = 0xA8BB20; . = 0x800A5AC0; SegmentStart = .; code : AT(RomLocation) diff --git a/sym_info.py b/sym_info.py new file mode 100755 index 0000000000..beeadd5c37 --- /dev/null +++ b/sym_info.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 + +import os.path +import argparse + +parser = argparse.ArgumentParser( + description="Display various information about a symbol or address." +) +parser.add_argument( + "name", + type=str, + default="", + help="symbol name or ROM/RAM address to lookup" +) +parser.add_argument( + "-e", + "--expected", + dest="use_expected", + action="store_true", + help="use the map file in expected/build/ instead of build/" +) +args = parser.parse_args() + +mymap = "build/mm.map" +if args.use_expected: + mymap = f"expected/{mymap}" + +if not os.path.isfile(mymap): + print(f"{mymap} must exist.") + exit(1) + + +def search_address(target_addr): + is_ram = target_addr & 0x80000000 + ram_offset = None + prev_ram = 0 + prev_rom = 0 + prev_sym = "" + cur_file = "" + prev_file = cur_file + prev_line = "" + with open(mymap) as f: + for line in f: + if "load address" in line: + # Ignore .bss sections if we're looking for a ROM address + if not is_ram and (".bss" in line or ".bss" in prev_line): + ram_offset = None + continue + ram = int(line[16 : 16 + 18], 0) + rom = int(line[59 : 59 + 18], 0) + ram_offset = ram - rom + continue + + prev_line = line + + if ( + ram_offset is None + or "=" in line + or "*fill*" in line + or " 0x" not in line + ): + continue + + ram = int(line[16 : 16 + 18], 0) + rom = ram - ram_offset + sym = line.split()[-1] + + if "0x" in sym and "/" not in sym: + ram_offset = None + continue + if "/" in sym: + cur_file = sym + continue + + if rom == target_addr or (is_ram and ram == target_addr): + return f"{sym} (RAM 0x{ram:X}, ROM 0x{rom:X}, {cur_file})" + if rom > target_addr or (is_ram and ram > target_addr): + offset = target_addr - prev_ram if is_ram else target_addr - prev_rom + return f"at 0x{offset:X} bytes inside {prev_sym} (RAM 0x{prev_ram:X}, ROM 0x{prev_rom:X}, {prev_file})" + + prev_ram = ram + prev_rom = rom + prev_sym = sym + prev_file = cur_file + + return "at end of rom?" + + +def search_symbol(target_sym): + ram_offset = None + cur_file = "" + prev_line = "" + with open(mymap) as f: + for line in f: + if "load address" in line: + ram = int(line[16 : 16 + 18], 0) + rom = int(line[59 : 59 + 18], 0) + ram_offset = ram - rom + continue + + prev_line = line + + if ( + ram_offset is None + or "=" in line + or "*fill*" in line + or " 0x" not in line + ): + continue + + ram = int(line[16 : 16 + 18], 0) + rom = ram - ram_offset + sym = line.split()[-1] + + if "0x" in sym and "/" not in sym: + ram_offset = None + continue + elif "/" in sym: + cur_file = sym + continue + + if sym == target_sym: + return (rom, cur_file, ram) + + return None + + +try: + target_addr = int(args.name, 0) + print(args.name, "is", search_address(target_addr)) +except ValueError: + sym_info = search_symbol(args.name) + if sym_info is not None: + sym_rom = sym_info[0] + sym_file = sym_info[1] + sym_ram = sym_info[2] + print(f"Symbol {args.name} (RAM: 0x{sym_ram:08X}, ROM: 0x{sym_rom:06X}, {sym_file})") + else: + print(f"Symbol {args.name} not found in map file {mymap}") diff --git a/tools/.gitignore b/tools/.gitignore new file mode 100644 index 0000000000..2da9fd53e6 --- /dev/null +++ b/tools/.gitignore @@ -0,0 +1,7 @@ +# Output files +*.exe +yaz0 +makeromfs +elf2rom +mkldscript +vtxdis diff --git a/tools/Makefile b/tools/Makefile new file mode 100644 index 0000000000..f8147317c2 --- /dev/null +++ b/tools/Makefile @@ -0,0 +1,28 @@ +CC := gcc +CFLAGS := -Wall -Wextra -pedantic -std=c99 -g -O2 +PROGRAMS := yaz0 makeromfs elf2rom mkldscript vtxdis +ZAP2 := ZAP2/ZAP2.out + +all: $(PROGRAMS) + cd ZAP2 && $(MAKE) + +clean: + $(RM) $(PROGRAMS) + $(RM) ZAP2/ZAP2.out +# Need to clean the above line later... + +mkldscript_SOURCES := mkldscript.c util.c +elf2rom_SOURCES := elf2rom.c elf32.c n64chksum.c util.c +yaz0_SOURCES := yaz0tool.c yaz0.c util.c +makeromfs_SOURCES := makeromfs.c n64chksum.c util.c +vtxdis_SOURCES := vtxdis.c + +#$(ZAP2): +# cd ZAP2 && $(MAKE) + +define COMPILE = +$(1): $($1_SOURCES) + $(CC) $(CFLAGS) $$^ -o $$@ +endef + +$(foreach p,$(PROGRAMS),$(eval $(call COMPILE,$(p)))) diff --git a/tools/elf2rom.c b/tools/elf2rom.c new file mode 100644 index 0000000000..366c05f080 --- /dev/null +++ b/tools/elf2rom.c @@ -0,0 +1,267 @@ +#include +#include +#include +#include +#include +#include + +#include "elf32.h" +#include "n64chksum.h" +#include "util.h" + +#define ROM_SEG_START_SUFFIX ".rom_start" +#define ROM_SEG_END_SUFFIX ".rom_end" + +struct RomSegment +{ + const char *name; + const void *data; + int size; + int romStart; + int romEnd; +}; + +static struct RomSegment *g_romSegments = NULL; +static int g_romSegmentsCount = 0; +static int g_romSize; + +static bool parse_number(const char *str, int *num) +{ + char *endptr; + long int n = strtol(str, &endptr, 0); + *num = n; + return endptr > str; +} + +static unsigned int round_up(unsigned int num, unsigned int multiple) +{ + num += multiple - 1; + return num / multiple * multiple; +} + +static char *sprintf_alloc(const char *fmt, ...) +{ + va_list args; + int size; + char *buffer; + + va_start(args, fmt); + size = vsnprintf(NULL, 0, fmt, args) + 1; + va_end(args); + + buffer = malloc(size); + + va_start(args, fmt); + vsprintf(buffer, fmt, args); + va_end(args); + + return buffer; +} + +static struct RomSegment *add_rom_segment(const char *name) +{ + int index = g_romSegmentsCount; + + g_romSegmentsCount++; + g_romSegments = realloc(g_romSegments, g_romSegmentsCount * sizeof(*g_romSegments)); + g_romSegments[index].name = name; + return &g_romSegments[index]; +} + +static int find_symbol_value(struct Elf32_Symbol *syms, int numsymbols, const char *name) +{ + struct Elf32_Symbol *sym; + int lo, hi, mid, cmp; + + // Binary search for the symbol. We maintain the invariant that [lo, hi) is + // the interval that remains to search. + lo = 0; + hi = numsymbols; + while (lo < hi) + { + mid = lo + (hi - lo) / 2; + sym = &syms[mid]; + cmp = strcmp(sym->name, name); + + if (cmp == 0) + return (int) sym->value; + else if (cmp < 0) + lo = mid + 1; + else + hi = mid; + } + + util_fatal_error("Symbol %s is not defined\n", name); +} + +static int find_rom_address(struct Elf32_Symbol *syms, int numsymbols, const char *name, const char *suffix) +{ + char *symName = sprintf_alloc("_%sSegmentRom%s", name, suffix); + int ret = find_symbol_value(syms, numsymbols, symName); + free(symName); + return ret; +} + +static int cmp_symbol_by_name(const void *a, const void *b) +{ + return strcmp( + ((struct Elf32_Symbol *)a)->name, + ((struct Elf32_Symbol *)b)->name); +} + +static void parse_input_file(const char *filename) +{ + struct Elf32 elf; + struct Elf32_Symbol *syms; + const void *data; + size_t size; + int numRomSymbols; + int i; + + data = util_read_whole_file(filename, &size); + + if (!elf32_init(&elf, data, size) || elf.machine != ELF_MACHINE_MIPS) + util_fatal_error("%s is not a valid 32-bit MIPS ELF file", filename); + + // sort all symbols that contain the substring "Rom" for fast access + // (sorting all symbols costs 0.1s, might as well avoid that) + syms = malloc(elf.numsymbols * sizeof(struct Elf32_Symbol)); + numRomSymbols = 0; + for (i = 0; i < elf.numsymbols; i++) + { + if (!elf32_get_symbol(&elf, &syms[numRomSymbols], i)) + util_fatal_error("invalid or corrupt ELF file"); + if (strstr(syms[numRomSymbols].name, "Rom")) + numRomSymbols++; + } + qsort(syms, numRomSymbols, sizeof(struct Elf32_Symbol), cmp_symbol_by_name); + + // get ROM segments + // sections of type SHT_PROGBITS and whose name is ..secname are considered ROM segments + for (i = 0; i < elf.shnum; i++) + { + struct Elf32_Section sec; + struct RomSegment *segment; + + if (!elf32_get_section(&elf, &sec, i)) + util_fatal_error("invalid or corrupt ELF file"); + if (sec.type == SHT_PROGBITS && sec.name[0] == '.' && sec.name[1] == '.' + // HACK! ld sometimes marks NOLOAD sections as SHT_PROGBITS for no apparent reason, + // so we must ignore the ..secname.bss sections explicitly + && strchr(sec.name + 2, '.') == NULL) + { + segment = add_rom_segment(sec.name + 2); + segment->data = elf.data + sec.offset; + segment->romStart = find_rom_address(syms, numRomSymbols, segment->name, "Start"); + segment->romEnd = find_rom_address(syms, numRomSymbols, segment->name, "End"); + } + + } + + g_romSize = find_symbol_value(syms, numRomSymbols, "_RomSize"); + + free(syms); +} + +// Writes the N64 ROM, padding the file size to a multiple of 1 MiB +static void write_rom_file(const char *filename, int cicType) +{ + size_t fileSize = round_up(g_romSize, 0x100000); + uint8_t *buffer = calloc(fileSize, 1); + int i; + uint32_t chksum[2]; + + // write segments + for (i = 0; i < g_romSegmentsCount; i++) + { + int size = g_romSegments[i].romEnd - g_romSegments[i].romStart; + + memcpy(buffer + g_romSegments[i].romStart, g_romSegments[i].data, size); + } + + // pad the remaining space with 0xFF + memset(buffer + g_romSize, 0xFF, fileSize - g_romSize); + + // write checksum + if (!n64chksum_calculate(buffer, cicType, chksum)) + util_fatal_error("invalid cic type %i", cicType); + util_write_uint32_be(buffer + 0x10, chksum[0]); + util_write_uint32_be(buffer + 0x14, chksum[1]); + + util_write_whole_file(filename, buffer, fileSize); + free(buffer); +} + +static void usage(const char *execname) +{ + printf("usage: %s -cic input.elf output.z64\n", execname); +} + +int main(int argc, char **argv) +{ + int i; + const char *inputFileName = NULL; + const char *outputFileName = NULL; + int cicType = -1; + + for (i = 1; i < argc; i++) + { + if (argv[i][0] == '-') + { + if (strcmp(argv[i], "-cic") == 0) + { + i++; + if (i >= argc || !parse_number(argv[i], &cicType)) + { + fputs("error: expected number after -cic\n", stderr); + goto bad_args; + } + } + else if (strcmp(argv[i], "-help") == 0) + { + usage(argv[0]); + return 0; + } + else + { + fprintf(stderr, "unknown option %s\n", argv[i]); + goto bad_args; + } + } + else + { + if (inputFileName == NULL) + inputFileName = argv[i]; + else if (outputFileName == NULL) + outputFileName = argv[i]; + else + { + fputs("error: too many parameters specified\n", stderr); + goto bad_args; + } + } + } + if (inputFileName == NULL) + { + fputs("error: no input file specified\n", stderr); + goto bad_args; + } + if (outputFileName == NULL) + { + fputs("error: no output file specified\n", stderr); + goto bad_args; + } + if (cicType == -1) + { + fputs("error: no CIC type specified\n", stderr); + goto bad_args; + } + + parse_input_file(inputFileName); + write_rom_file(outputFileName, cicType); + return 0; + +bad_args: + usage(argv[0]); + return 1; +} diff --git a/tools/elf32.c b/tools/elf32.c new file mode 100644 index 0000000000..f5dcafc0dc --- /dev/null +++ b/tools/elf32.c @@ -0,0 +1,191 @@ +#include +#include +#include +#include + +#include "elf32.h" + +static uint16_t read16_le(const uint8_t *data) +{ + return data[0] << 0 + | data[1] << 8; +} + +static uint32_t read32_le(const uint8_t *data) +{ + return data[0] << 0 + | data[1] << 8 + | data[2] << 16 + | data[3] << 24; +} + +static uint16_t read16_be(const uint8_t *data) +{ + return data[0] << 8 + | data[1] << 0; +} + +static uint32_t read32_be(const uint8_t *data) +{ + return data[0] << 24 + | data[1] << 16 + | data[2] << 8 + | data[3] << 0; +} + +static const void *get_section_header(struct Elf32 *e, int secnum) +{ + size_t secoffset = e->shoff + secnum * 0x28; + + if (secnum >= e->shnum || secoffset >= e->dataSize) + return NULL; + return e->data + secoffset; +} + +static const void *get_section_contents(struct Elf32 *e, int secnum) +{ + size_t secoffset = e->shoff + secnum * 0x28; + size_t dataoffset; + + if (secnum >= e->shnum || secoffset >= e->dataSize) + return NULL; + dataoffset = e->read32(e->data + secoffset + 0x10); + return e->data + dataoffset; +} + +static bool verify_magic(const uint8_t *data) +{ + return (data[0] == 0x7F && data[1] == 'E' && data[2] == 'L' && data[3] == 'F'); +} + +bool elf32_init(struct Elf32 *e, const void *data, size_t size) +{ + unsigned int i; + + e->data = data; + e->dataSize = size; + + if (size < 0x34) + return false; // not big enough for header + + if (!verify_magic(e->data)) + return false; + + if (e->data[4] != 1) + return false; // must be 32-bit + + e->endian = e->data[5]; + + switch (e->endian) + { + case 1: + e->read16 = read16_le; + e->read32 = read32_le; + break; + case 2: + e->read16 = read16_be; + e->read32 = read32_be; + break; + default: + return false; + } + + e->type = e->read16(e->data + 0x10); + e->machine = e->read16(e->data + 0x12); + e->version = e->data[6]; + e->entry = e->read32(e->data + 0x18); + e->phoff = e->read32(e->data + 0x1C); + e->shoff = e->read32(e->data + 0x20); + e->ehsize = e->read16(e->data + 0x28); + e->phentsize = e->read16(e->data + 0x2A); + e->phnum = e->read16(e->data + 0x2C); + e->shentsize = e->read16(e->data + 0x2E); + e->shnum = e->read16(e->data + 0x30); + e->shstrndx = e->read16(e->data + 0x32); + + // find symbol table section + e->symtabndx = -1; + for (i = 0; i < e->shnum; i++) + { + const uint8_t *sechdr = get_section_header(e, i); + uint32_t type = e->read32(sechdr + 0x04); + + if (type == SHT_SYMTAB) + { + e->symtabndx = i; + break; + } + } + + // find .strtab section + e->strtabndx = -1; + for (i = 0; i < e->shnum; i++) + { + const uint8_t *sechdr = get_section_header(e, i); + uint32_t type = e->read32(sechdr + 0x04); + + if (type == SHT_STRTAB) + { + const char *strings = get_section_contents(e, e->shstrndx); + const char *secname = strings + e->read32(sechdr + 0); + + if (strcmp(secname, ".strtab") == 0) + { + e->strtabndx = i; + break; + } + } + } + + e->numsymbols = 0; + if (e->symtabndx != -1) + { + const uint8_t *sechdr = get_section_header(e, e->symtabndx); + //const uint8_t *symtab = get_section_contents(e, e->symtabndx); + + e->numsymbols = e->read32(sechdr + 0x14) / e->read32(sechdr + 0x24); + } + + if (e->shoff + e->shstrndx * 0x28 >= e->dataSize) + return false; + + return true; +} + +bool elf32_get_section(struct Elf32 *e, struct Elf32_Section *sec, int secnum) +{ + const uint8_t *sechdr = get_section_header(e, secnum); + const char *strings = get_section_contents(e, e->shstrndx); + + sec->name = strings + e->read32(sechdr + 0); + sec->type = e->read32(sechdr + 0x04); + sec->flags = e->read32(sechdr + 0x08); + sec->addr = e->read32(sechdr + 0x0C); + sec->offset = e->read32(sechdr + 0x10); + sec->addralign = e->read32(sechdr + 0x20); + sec->entsize = e->read32(sechdr + 0x24); + return true; +} + +bool elf32_get_symbol(struct Elf32 *e, struct Elf32_Symbol *sym, int symnum) +{ + const uint8_t *sechdr; + const uint8_t *symtab; + const char *strings; + int symcount; + + if (e->symtabndx == -1) + return false; + + sechdr = get_section_header(e, e->symtabndx); + symtab = get_section_contents(e, e->symtabndx); + strings = get_section_contents(e, e->strtabndx); + + symcount = e->read32(sechdr + 0x14) / e->read32(sechdr + 0x24); + if (symnum >= symcount) + return false; + + sym->name = strings + e->read32(symtab + symnum * 0x10); + sym->value = e->read32(symtab + symnum * 0x10 + 4); + return true; +} diff --git a/tools/elf32.h b/tools/elf32.h new file mode 100644 index 0000000000..666f6e917b --- /dev/null +++ b/tools/elf32.h @@ -0,0 +1,72 @@ +#ifndef _ELF_H_ +#define _ELF_H_ + +enum +{ + ELF_MACHINE_NONE = 0, + ELF_MACHINE_MIPS = 8, +}; + +enum +{ + ELF_TYPE_RELOC = 1, + ELF_TYPE_EXEC, + ELF_TYPE_SHARED, + ELF_TYPE_CORE, +}; + +struct Elf32 +{ + uint8_t endian; + uint16_t type; + uint16_t machine; + uint32_t version; + uint32_t entry; + uint32_t phoff; + uint32_t shoff; + uint16_t ehsize; + uint16_t phentsize; + uint16_t phnum; + uint16_t shentsize; + uint16_t shnum; + uint16_t shstrndx; + int symtabndx; + int strtabndx; + int numsymbols; + + const uint8_t *data; + size_t dataSize; + uint16_t (*read16)(const uint8_t *); + uint32_t (*read32)(const uint8_t *); +}; + +enum +{ + SHT_NULL = 0, + SHT_PROGBITS, + SHT_SYMTAB, + SHT_STRTAB, +}; + +struct Elf32_Section +{ + const char *name; + uint32_t type; + uint32_t flags; + uint32_t addr; + uint32_t offset; + uint32_t addralign; + uint32_t entsize; +}; + +struct Elf32_Symbol +{ + const char *name; + uint32_t value; +}; + +bool elf32_init(struct Elf32 *e, const void *data, size_t size); +bool elf32_get_section(struct Elf32 *e, struct Elf32_Section *sec, int secnum); +bool elf32_get_symbol(struct Elf32 *e, struct Elf32_Symbol *sym, int symnum); + +#endif diff --git a/tools/makeromfs.c b/tools/makeromfs.c new file mode 100644 index 0000000000..a61c8e0196 --- /dev/null +++ b/tools/makeromfs.c @@ -0,0 +1,342 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "n64chksum.h" +#include "util.h" + +#define ROM_SIZE 0x02000000 + +enum InputObjType +{ + OBJ_NULL, + OBJ_FILE, + OBJ_TABLE, +}; + +struct InputFile +{ + enum InputObjType type; + const char *name; + uint8_t *data; + size_t size; + unsigned int valign; + + uint32_t virtStart; + uint32_t virtEnd; + uint32_t physStart; + uint32_t physEnd; +}; + +static struct InputFile *g_inputFiles = NULL; +static int g_inputFilesCount = 0; + +static unsigned int round_up(unsigned int num, unsigned int multiple) +{ + num += multiple - 1; + return num / multiple * multiple; +} + +static bool is_yaz0_header(const uint8_t *data) +{ + return data[0] == 'Y' + && data[1] == 'a' + && data[2] == 'z' + && data[3] == '0'; +} + +static void compute_offsets(void) +{ + size_t physOffset = 0; + size_t virtOffset = 0; + int i; + + for (i = 0; i < g_inputFilesCount; i++) + { + bool compressed = false; + + if (g_inputFiles[i].type == OBJ_FILE) + { + if (is_yaz0_header(g_inputFiles[i].data)) + compressed = true; + } + else if (g_inputFiles[i].type == OBJ_TABLE) + { + g_inputFiles[i].size = g_inputFilesCount * 16; + } + + virtOffset = round_up(virtOffset, g_inputFiles[i].valign); + + if (g_inputFiles[i].type == OBJ_NULL) + { + g_inputFiles[i].virtStart = 0; + g_inputFiles[i].virtEnd = 0; + g_inputFiles[i].physStart = 0; + g_inputFiles[i].physEnd = 0; + } + else if (compressed) + { + size_t compSize = round_up(g_inputFiles[i].size, 16); + size_t uncompSize = util_read_uint32_be(g_inputFiles[i].data + 4); + + g_inputFiles[i].virtStart = virtOffset; + g_inputFiles[i].virtEnd = virtOffset + uncompSize; + g_inputFiles[i].physStart = physOffset; + g_inputFiles[i].physEnd = physOffset + compSize; + + physOffset += compSize; + virtOffset += uncompSize; + } + else + { + size_t size = g_inputFiles[i].size; + + g_inputFiles[i].virtStart = virtOffset; + g_inputFiles[i].virtEnd = virtOffset + size; + g_inputFiles[i].physStart = physOffset; + g_inputFiles[i].physEnd = 0; + + physOffset += size; + virtOffset += size; + } + } +} + +static void build_rom(const char *filename) +{ + uint8_t *romData = calloc(ROM_SIZE, 1); + size_t pos = 0; + int i; + int j; + uint32_t chksum[2]; + FILE *outFile; + + for (i = 0; i < g_inputFilesCount; i++) + { + size_t size = g_inputFiles[i].size; + + if (pos + round_up(size, 16) > ROM_SIZE) + util_fatal_error("size exceeds max ROM size of 32 KiB"); + + assert(pos % 16 == 0); + + switch (g_inputFiles[i].type) + { + case OBJ_FILE: + // write file data + memcpy(romData + pos, g_inputFiles[i].data, size); + pos += round_up(size, 16); + + free(g_inputFiles[i].data); + break; + case OBJ_TABLE: + for (j = 0; j < g_inputFilesCount; j++) + { + util_write_uint32_be(romData + pos + 0, g_inputFiles[j].virtStart); + util_write_uint32_be(romData + pos + 4, g_inputFiles[j].virtEnd); + util_write_uint32_be(romData + pos + 8, g_inputFiles[j].physStart); + util_write_uint32_be(romData + pos + 12, g_inputFiles[j].physEnd); + + pos += 16; + } + break; + case OBJ_NULL: + break; + } + } + + // Pad the rest of the ROM + while (pos < ROM_SIZE) + { + // This is such a weird thing to pad with. Whatever, Nintendo. + romData[pos] = pos & 0xFF; + pos++; + } + + // calculate checksum + n64chksum_calculate(romData, 6105, chksum); + util_write_uint32_be(romData + 0x10, chksum[0]); + util_write_uint32_be(romData + 0x14, chksum[1]); + + // write file + outFile = fopen(filename, "wb"); + if (outFile == NULL) + util_fatal_error("failed to open file '%s' for writing", filename); + fwrite(romData, ROM_SIZE, 1, outFile); + fclose(outFile); + + free(romData); +} + +static struct InputFile *new_file(void) +{ + int index = g_inputFilesCount; + + g_inputFilesCount++; + g_inputFiles = realloc(g_inputFiles, g_inputFilesCount * sizeof(*g_inputFiles)); + + g_inputFiles[index].valign = 1; + + return &g_inputFiles[index]; +} + +// null terminates the current token and returns a pointer to the next token +static char *token_split(char *str) +{ + while (!isspace(*str)) + { + if (*str == 0) + return str; // end of string + str++; + } + *str = 0; // terminate token + str++; + + // skip remaining whitespace + while (isspace(*str)) + str++; + return str; +} + +// null terminates the current line and returns a pointer to the next line +static char *line_split(char *str) +{ + while (*str != '\n') + { + if (*str == 0) + return str; // end of string + str++; + } + *str = 0; // terminate line + return str + 1; +} + +static void parse_line(char *line, int lineNum) +{ + char *token = line; + int i = 0; + + char *filename = NULL; + enum InputObjType type = -1; + int valign = 1; + struct InputFile *file; + + // iterate through each token + while (token[0] != 0) + { + char *nextToken = token_split(token); + + if (token[0] == '#') // comment - ignore rest of line + return; + + switch (i) + { + case 0: + if (strcmp(token, "file") == 0) + type = OBJ_FILE; + else if (strcmp(token, "filetable") == 0) + type = OBJ_TABLE; + else if (strcmp(token, "null") == 0) + type = OBJ_NULL; + else + util_fatal_error("unknown object type '%s' on line %i", token, lineNum); + break; + case 1: + filename = token; + break; + case 2: + { + int n; + + if (sscanf(token, "align(%i)", &n) == 1) + valign = n; + else + goto junk; + } + break; + default: + junk: + util_fatal_error("junk '%s' on line %i", token, lineNum); + break; + } + + token = nextToken; + i++; + } + + if (i == 0) // empty line + return; + + file = new_file(); + file->valign = valign; + + switch (type) + { + case OBJ_FILE: + if (filename == NULL) + util_fatal_error("no filename specified on line %i", lineNum); + file->type = OBJ_FILE; + file->data = util_read_whole_file(filename, &file->size); + break; + case OBJ_TABLE: + file->type = OBJ_TABLE; + break; + case OBJ_NULL: + file->type = OBJ_NULL; + file->size = 0; + break; + } +} + +static void parse_list(char *list) +{ + char *line = list; + int lineNum = 1; + + // iterate through each line + while (line[0] != 0) + { + char *nextLine = line_split(line); + + parse_line(line, lineNum); + + line = nextLine; + lineNum++; + } +} + +static void usage(const char *execName) +{ + printf("usage: %s FILE_LIST OUTPUT_FILE\n" + "where FILE_LIST is a list of files to include\n" + "and OUTPUT_FILE is the name of the output ROM\n" + "note that 'dmadata' refers to the file list itself and not an external file\n", + execName); +} + +int main(int argc, char **argv) +{ + char *list; + + if (argc != 3) + { + puts("invalid args"); + usage(argv[0]); + return 1; + } + + list = util_read_whole_file(argv[1], NULL); + + parse_list(list); + compute_offsets(); + build_rom(argv[2]); + + free(list); + + return 0; +} diff --git a/tools/mkldscript.c b/tools/mkldscript.c new file mode 100644 index 0000000000..11b7a7e466 --- /dev/null +++ b/tools/mkldscript.c @@ -0,0 +1,492 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" + +#define ARRAY_COUNT(arr) (sizeof(arr) / sizeof(arr[0])) + +static FILE *fout; + +enum +{ + STMT_address, + STMT_after, + STMT_align, + STMT_beginseg, + STMT_endseg, + STMT_entry, + STMT_flags, + STMT_include, + STMT_name, + STMT_number, + STMT_romalign, + STMT_stack, + STMT_increment, +}; + +enum +{ + FLAG_BOOT = (1 << 0), + FLAG_OBJECT = (1 << 1), + FLAG_RAW = (1 << 2), +}; + +struct Segment +{ + uint32_t fields; + char *name; + char *after; + uint32_t flags; + uint32_t address; + uint32_t stack; + uint32_t align; + uint32_t romalign; + uint32_t increment; + uint32_t entry; + uint32_t number; + char **includes; + int includesCount; +}; + +static struct Segment *g_segments = NULL; +static int g_segmentsCount = 0; + +static struct Segment *add_segment(void) +{ + struct Segment *seg; + + g_segmentsCount++; + g_segments = realloc(g_segments, g_segmentsCount * sizeof(*g_segments)); + + seg = &g_segments[g_segmentsCount - 1]; + memset(seg, 0, sizeof(*seg)); + + seg->align = 16; + + return seg; +} + +static char *skip_whitespace(char *str) +{ + while (isspace(*str)) + str++; + return str; +} + +// null terminates the current token and returns a pointer to the next token +static char *token_split(char *str) +{ + while (!isspace(*str)) + { + if (*str == 0) + return str; // end of string + str++; + } + *str = 0; // terminate token + str++; + + return skip_whitespace(str); +} + +// null terminates the current line and returns a pointer to the next line +static char *line_split(char *str) +{ + while (*str != '\n') + { + if (*str == 0) + return str; // end of string + str++; + } + *str = 0; // terminate line + return str + 1; +} + +static bool parse_number(const char *str, unsigned int *num) +{ + char *endptr; + long int n = strtol(str, &endptr, 0); + *num = n; + return endptr > str; +} + +static bool parse_flags(char *str, unsigned int *flags) +{ + unsigned int f = 0; + + while (str[0] != 0) + { + char *next = token_split(str); + + if (strcmp(str, "BOOT") == 0) + f |= FLAG_BOOT; + else if (strcmp(str, "OBJECT") == 0) + f |= FLAG_OBJECT; + else if (strcmp(str, "RAW") == 0) + f |= FLAG_RAW; + else + return false; + + str = next; + } + *flags = f; + return true; +} + +static bool parse_quoted_string(char *str, char **out) +{ + if (*str != '"') + return false; + + str++; + *out = str; + + while (*str != '"') + { + if (*str == 0) + return false; // unterminated quote + str++; + } + *str = 0; + str++; + + str = skip_whitespace(str); + if (*str != 0) + return false; // garbage after filename + + return true; +} + +static bool is_pow_of_2(unsigned int n) +{ + return (n & (n - 1)) == 0; +} + +static const char *const stmtNames[] = +{ + [STMT_address] = "address", + [STMT_after] = "after", + [STMT_align] = "align", + [STMT_beginseg] = "beginseg", + [STMT_endseg] = "endseg", + [STMT_entry] = "entry", + [STMT_flags] = "flags", + [STMT_include] = "include", + [STMT_name] = "name", + [STMT_number] = "number", + [STMT_romalign] = "romalign", + [STMT_stack] = "stack", + [STMT_increment] = "increment", +}; + +static void parse_rom_spec(char *spec) +{ + int lineNum = 1; + char *line = spec; + + struct Segment *currSeg = NULL; + + // iterate over lines + while (line[0] != 0) + { + char *nextLine = line_split(line); + + if (line[0] != 0) + { + char *stmtName = skip_whitespace(line); + char *args = token_split(stmtName); + unsigned int stmt; + + for (stmt = 0; stmt < ARRAY_COUNT(stmtNames); stmt++) + if (strcmp(stmtName, stmtNames[stmt]) == 0) + goto got_stmt; + util_fatal_error("line %i: unknown statement '%s'", lineNum, stmtName); + got_stmt: + + if (currSeg != NULL) + { + // ensure no duplicates (except for 'include') + if (stmt != STMT_include && (currSeg->fields & (1 << stmt))) + util_fatal_error("line %i: duplicate '%s' statement", lineNum, stmtName); + + currSeg->fields |= 1 << stmt; + + // statements valid within a segment definition + switch (stmt) + { + case STMT_beginseg: + util_fatal_error("line %i: '%s' inside of a segment definition", lineNum, stmtName); + break; + case STMT_endseg: + // verify segment data + if (currSeg->name == NULL) + util_fatal_error("line %i: no name specified for segment", lineNum); + if (currSeg->includesCount == 0) + util_fatal_error("line %i: no includes specified for segment", lineNum); + currSeg = NULL; + break; + case STMT_name: + if (!parse_quoted_string(args, &currSeg->name)) + util_fatal_error("line %i: invalid name", lineNum); + break; + case STMT_after: + if (!parse_quoted_string(args, &currSeg->after)) + util_fatal_error("line %i: invalid name for 'after'", lineNum); + break; + case STMT_address: + if (!parse_number(args, &currSeg->address)) + util_fatal_error("line %i: expected number after 'address'", lineNum); + break; + case STMT_number: + if (!parse_number(args, &currSeg->number)) + util_fatal_error("line %i: expected number after 'number'", lineNum); + break; + case STMT_flags: + if (!parse_flags(args, &currSeg->flags)) + util_fatal_error("line %i: invalid flags", lineNum); + break; + case STMT_align: + if (!parse_number(args, &currSeg->align)) + util_fatal_error("line %i: expected number after 'align'", lineNum); + if (!is_pow_of_2(currSeg->align)) + util_fatal_error("line %i: alignment is not a power of two", lineNum); + break; + case STMT_romalign: + if (!parse_number(args, &currSeg->romalign)) + util_fatal_error("line %i: expected number after 'romalign'", lineNum); + if (!is_pow_of_2(currSeg->romalign)) + util_fatal_error("line %i: alignment is not a power of two", lineNum); + break; + case STMT_include: + currSeg->includesCount++; + currSeg->includes = realloc(currSeg->includes, currSeg->includesCount * sizeof(*currSeg->includes)); + if (!parse_quoted_string(args, &currSeg->includes[currSeg->includesCount - 1])) + util_fatal_error("line %i: invalid filename", lineNum); + break; + case STMT_increment: + if (!parse_number(args, &currSeg->increment)) + util_fatal_error("line %i: expected number after 'increment'", lineNum); + break; + default: + fprintf(stderr, "warning: '%s' is not implemented\n", stmtName); + break; + } + } + else + { + // commands valid outside a segment definition + switch (stmt) + { + case STMT_beginseg: + currSeg = add_segment(); + break; + case STMT_endseg: + util_fatal_error("line %i: '%s' outside of a segment definition", lineNum, stmtName); + break; + default: + fprintf(stderr, "warning: '%s' is not implemented\n", stmtName); + break; + } + } + } + + line = nextLine; + lineNum++; + } +} + +static void write_ld_script(void) +{ + int i; + int j; + + fputs("SECTIONS {\n" + " _RomSize = 0;\n" + " _RomStart = _RomSize;\n\n", + fout); + + for (i = 0; i < g_segmentsCount; i++) + { + const struct Segment *seg = &g_segments[i]; + + // align start of ROM segment + if (seg->fields & (1 << STMT_romalign)) + fprintf(fout, " _RomSize = (_RomSize + %i) & ~ %i;\n", seg->romalign - 1, seg->romalign - 1); + + // initialized data (.text, .data, .rodata, .sdata) + + // Increment the start of the section + //if (seg->fields & (1 << STMT_increment)) + //fprintf(fout, " . += 0x%08X;\n", seg->increment); + + fprintf(fout, " _%sSegmentRomStart = _RomSize;\n" + " ..%s ", seg->name, seg->name); + + if (seg->fields & (1 << STMT_after)) + fprintf(fout, "_%sSegmentEnd ", seg->after); + else if (seg->fields & (1 << STMT_number)) + fprintf(fout, "0x%02X000000 ", seg->number); + else if (seg->fields & (1 << STMT_address)) + fprintf(fout, "0x%08X ", seg->address); + + // (AT(_RomSize) isn't necessary, but adds useful "load address" lines to the map file) + fprintf(fout, ": AT(_RomSize)\n {\n" + " _%sSegmentStart = .;\n" + " . = ALIGN(0x10);\n" + " _%sSegmentTextStart = .;\n", + seg->name, seg->name); + + if (seg->fields & (1 << STMT_align)) + fprintf(fout, " . = ALIGN(0x%X);\n", seg->align); + + for (j = 0; j < seg->includesCount; j++) + fprintf(fout, " %s (.text)\n", seg->includes[j]); + + fprintf(fout, " _%sSegmentTextEnd = .;\n", seg->name); + + fprintf(fout, " _%sSegmentTextSize = ABSOLUTE( _%sSegmentTextEnd - _%sSegmentTextStart );\n", seg->name, seg->name, seg->name); + + fprintf(fout, " _%sSegmentDataStart = .;\n", seg->name); + + for (j = 0; j < seg->includesCount; j++) + fprintf(fout, " %s (.data)\n", seg->includes[j]); + + /* + for (j = 0; j < seg->includesCount; j++) + fprintf(fout, " %s (.rodata)\n", seg->includes[j]); + + for (j = 0; j < seg->includesCount; j++) + fprintf(fout, " %s (.sdata)\n", seg->includes[j]); + */ + + //fprintf(fout, " . = ALIGN(0x10);\n"); + fprintf(fout, " _%sSegmentDataEnd = .;\n", seg->name); + + fprintf(fout, " _%sSegmentDataSize = ABSOLUTE( _%sSegmentDataEnd - _%sSegmentDataStart );\n", seg->name, seg->name, seg->name); + + fprintf(fout, " _%sSegmentRoDataStart = .;\n", seg->name); + + for (j = 0; j < seg->includesCount; j++) + fprintf(fout, " %s (.rodata)\n", seg->includes[j]); + + //fprintf(fout, " . = ALIGN(0x10);\n"); + + fprintf(fout, " _%sSegmentRoDataEnd = .;\n", seg->name); + + fprintf(fout, " _%sSegmentRoDataSize = ABSOLUTE( _%sSegmentRoDataEnd - _%sSegmentRoDataStart );\n", seg->name, seg->name, seg->name); + + fprintf(fout, " _%sSegmentSDataStart = .;\n", seg->name); + + for (j = 0; j < seg->includesCount; j++) + fprintf(fout, " %s (.sdata)\n", seg->includes[j]); + + fprintf(fout, " . = ALIGN(0x10);\n"); + + fprintf(fout, " _%sSegmentSDataEnd = .;\n", seg->name); + + fprintf(fout, " _%sSegmentOvlStart = .;\n", seg->name); + + for (j = 0; j < seg->includesCount; j++) + fprintf(fout, " %s (.ovl)\n", seg->includes[j]); + + fprintf(fout, " . = ALIGN(0x10);\n"); + + fprintf(fout, " _%sSegmentOvlEnd = .;\n", seg->name); + + if (seg->fields & (1 << STMT_increment)) + fprintf(fout, " . += 0x%08X;\n", seg->increment); + + + fputs(" }\n", fout); + //fprintf(fout, " _RomSize += ( _%sSegmentDataEnd - _%sSegmentTextStart );\n", seg->name, seg->name); + fprintf(fout, " _RomSize += ( _%sSegmentOvlEnd - _%sSegmentTextStart );\n", seg->name, seg->name); + + fprintf(fout, " _%sSegmentRomEnd = _RomSize;\n\n", seg->name); + + // algn end of ROM segment + if (seg->fields & (1 << STMT_romalign)) + fprintf(fout, " _RomSize = (_RomSize + %i) & ~ %i;\n", seg->romalign - 1, seg->romalign - 1); + + // uninitialized data (.sbss, .scommon, .bss, COMMON) + fprintf(fout, " ..%s.bss ADDR(..%s) + SIZEOF(..%s) (NOLOAD) :\n" + /*" ..%s.bss :\n"*/ + " {\n" + " . = ALIGN(0x10);\n" + " _%sSegmentBssStart = .;\n", + seg->name, seg->name, seg->name, seg->name); + if (seg->fields & (1 << STMT_align)) + fprintf(fout, " . = ALIGN(0x%X);\n", seg->align); + for (j = 0; j < seg->includesCount; j++) + fprintf(fout, " %s (.sbss)\n", seg->includes[j]); + for (j = 0; j < seg->includesCount; j++) + fprintf(fout, " %s (.scommon)\n", seg->includes[j]); + for (j = 0; j < seg->includesCount; j++) + fprintf(fout, " %s (.bss)\n", seg->includes[j]); + for (j = 0; j < seg->includesCount; j++) + fprintf(fout, " %s (COMMON)\n", seg->includes[j]); + fprintf(fout, " . = ALIGN(0x10);\n" + " _%sSegmentBssEnd = .;\n" + " _%sSegmentEnd = .;\n" + " }\n" + " _%sSegmentBssSize = ABSOLUTE( _%sSegmentBssEnd - _%sSegmentBssStart );\n\n", + seg->name, seg->name, seg->name, seg->name, seg->name); + + // Increment the end of the segment + //if (seg->fields & (1 << STMT_increment)) + //fprintf(fout, " . += 0x%08X;\n", seg->increment); + + //fprintf(fout, " ..%s.ovl ADDR(..%s) + SIZEOF(..%s) :\n" + // /*" ..%s.bss :\n"*/ + // " {\n", + // seg->name, seg->name, seg->name); + //fprintf(fout, " _%sSegmentOvlStart = .;\n", seg->name); + + //for (j = 0; j < seg->includesCount; j++) + // fprintf(fout, " %s (.ovl)\n", seg->includes[j]); + + ////fprintf(fout, " . = ALIGN(0x10);\n"); + + //fprintf(fout, " _%sSegmentOvlEnd = .;\n", seg->name); + + //fprintf(fout, "\n }\n"); + } + + + fputs(" _RomEnd = _RomSize;\n}\n", fout); +} + +static void usage(const char *execname) +{ + fprintf(stderr, "Nintendo 64 linker script generation tool v0.01\n" + "usage: %s SPEC_FILE LD_SCRIPT\n" + "SPEC_FILE file describing the organization of object files into segments\n" + "LD_SCRIPT filename of output linker script\n", + execname); +} + +int main(int argc, char **argv) +{ + void *spec; + size_t size; + + if (argc != 3) + { + usage(argv[0]); + return 1; + } + + spec = util_read_whole_file(argv[1], &size); + parse_rom_spec(spec); + fout = fopen(argv[2], "w"); + if (fout == NULL) + util_fatal_error("failed to open file '%s' for writing", argv[2]); + write_ld_script(); + free(spec); + fclose(fout); + + return 0; +} diff --git a/tools/n64chksum.c b/tools/n64chksum.c new file mode 100644 index 0000000000..64cfd3aad5 --- /dev/null +++ b/tools/n64chksum.c @@ -0,0 +1,85 @@ +#include +#include +#include +#include + +#include "n64chksum.h" +#include "util.h" + +//Based on uCON64's N64 checksum algorithm by Andreas Sterbenz + +#define ROL(i, b) (((i) << (b)) | ((i) >> (32 - (b)))) + +bool n64chksum_calculate(const uint8_t *romData, int cicType, uint32_t *chksum) +{ + unsigned int seed; + unsigned int t1, t2, t3, t4, t5, t6; + size_t pos; + + const size_t START = 0x1000; + const size_t END = START + 0x100000; + + // determine initial seed + switch (cicType) + { + case 6101: + case 6102: + seed = 0xF8CA4DDC; + break; + case 6103: + seed = 0xA3886759; + break; + case 6105: + seed = 0xDF26F436; + break; + case 6106: + seed = 0x1FEA617A; + break; + default: + return false; // unknown CIC type + } + + t1 = t2 = t3 = t4 = t5 = t6 = seed; + + for (pos = START; pos < END; pos += 4) + { + unsigned int d = util_read_uint32_be(romData + pos); + unsigned int r = ROL(d, (d & 0x1F)); + + // increment t4 if t6 overflows + if ((t6 + d) < t6) + t4++; + + t6 += d; + t3 ^= d; + t5 += r; + + if (t2 > d) + t2 ^= r; + else + t2 ^= t6 ^ d; + + if (cicType == 6105) + t1 += util_read_uint32_be(&romData[0x0750 + (pos & 0xFF)]) ^ d; + else + t1 += t5 ^ d; + } + + if (cicType == 6103) + { + chksum[0] = (t6 ^ t4) + t3; + chksum[1] = (t5 ^ t2) + t1; + } + else if (cicType == 6106) + { + chksum[0] = (t6 * t4) + t3; + chksum[1] = (t5 * t2) + t1; + } + else + { + chksum[0] = t6 ^ t4 ^ t3; + chksum[1] = t5 ^ t2 ^ t1; + } + + return true; +} diff --git a/tools/n64chksum.h b/tools/n64chksum.h new file mode 100644 index 0000000000..0ca13a819b --- /dev/null +++ b/tools/n64chksum.h @@ -0,0 +1,6 @@ +#ifndef _N64CHKSUM_H_ +#define _N64CHKSUM_H_ + +bool n64chksum_calculate(const uint8_t *romData, int cicType, uint32_t *chksum); + +#endif diff --git a/tools/util.c b/tools/util.c new file mode 100644 index 0000000000..e826906cd3 --- /dev/null +++ b/tools/util.c @@ -0,0 +1,87 @@ +#include +#include +#include +#include +#include +#include + +#include "util.h" + +// displays an error message and exits +void util_fatal_error(const char *msgfmt, ...) +{ + va_list args; + + fputs("error: ", stderr); + + va_start(args, msgfmt); + vfprintf(stderr, msgfmt, args); + va_end(args); + + fputc('\n', stderr); + + exit(1); +} + +// reads a whole file into memory, and returns a pointer to the data +void *util_read_whole_file(const char *filename, size_t *pSize) +{ + FILE *file = fopen(filename, "rb"); + uint8_t *buffer; + size_t size; + + if (file == NULL) + util_fatal_error("failed to open file '%s' for reading: %s", filename, strerror(errno)); + + // get size + fseek(file, 0, SEEK_END); + size = ftell(file); + + // allocate buffer + buffer = malloc(size + 1); + + // read file + fseek(file, 0, SEEK_SET); + if (fread(buffer, size, 1, file) != 1) + util_fatal_error("error reading from file '%s': %s", filename, strerror(errno)); + + // null-terminate the buffer (in case of text files) + buffer[size] = 0; + + fclose(file); + + if (pSize != NULL) + *pSize = size; + return buffer; +} + +// writes data to file +void util_write_whole_file(const char *filename, const void *data, size_t size) +{ + FILE *file = fopen(filename, "wb"); + + if (file == NULL) + util_fatal_error("failed to open file '%s' for writing: %s", filename, strerror(errno)); + + if (fwrite(data, size, 1, file) != 1) + util_fatal_error("error writing to file '%s': %s", filename, strerror(errno)); + + fclose(file); +} + +uint32_t util_read_uint32_be(const uint8_t *data) +{ + return data[0] << 24 + | data[1] << 16 + | data[2] << 8 + | data[3] << 0; +} + +// writes a big-endian 32-bit integer +void util_write_uint32_be(uint8_t *data, uint32_t val) +{ + data[0] = val >> 24; + data[1] = val >> 16; + data[2] = val >> 8; + data[3] = val >> 0; +} diff --git a/tools/util.h b/tools/util.h new file mode 100644 index 0000000000..e34800a193 --- /dev/null +++ b/tools/util.h @@ -0,0 +1,17 @@ +#ifndef _UTIL_H_ +#define _UTIL_H_ + +#ifdef __GNUC__ +__attribute__((format(printf, 1, 2), noreturn)) +#endif +void util_fatal_error(const char *msgfmt, ...); + +void *util_read_whole_file(const char *filename, size_t *pSize); + +void util_write_whole_file(const char *filename, const void *data, size_t size); + +uint32_t util_read_uint32_be(const uint8_t *data); + +void util_write_uint32_be(uint8_t *data, uint32_t val); + +#endif diff --git a/tools/vtxdis.c b/tools/vtxdis.c new file mode 100644 index 0000000000..6650eec1d5 --- /dev/null +++ b/tools/vtxdis.c @@ -0,0 +1,257 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define VTXDIS_VER "0.1" + +#define SWAP16(x) (((x & 0xFF00) >> 8) | ((x & 0x00FF) << 8)) + +typedef struct { + int16_t pos[3]; /* 0x00 */ + int16_t flag; /* 0x06 */ + int16_t tpos[2]; /* 0x08 */ + uint8_t cn[4]; /* 0x0C */ +} Vtx; /* 0x10 */ + +static char *filename = NULL; +static char *data = NULL; +static int offset = 0; +static int data_len = 0; +static int count = 0; + +const struct option cmdline_opts[] = { + { "offset", required_argument, NULL, 'o', }, + { "length", required_argument, NULL, 'l', }, + { "file" , required_argument, NULL, 'f', }, + { "version", no_argument, NULL, '~', }, + { "help", no_argument, NULL, '?', }, + { "count", required_argument, NULL, 'c', }, + { 0, 0, 0, 0 }, +}; + +static uint32_t parse_int(const char *num){ + uint32_t ret = 0; + char outnum[20]; + if(strlen(num) > 2 && num[0] == '0' && (num[1] == 'x' || num[1] == 'X')) { + strncpy(outnum, &num[2], 20); + sscanf(outnum, "%"SCNx32, &ret); + } else if(strlen(num) == 0){ + ret = -1; + } else { + strncpy(outnum, num, 20); + sscanf(outnum, "%"SCNu32, &ret); + } + return ret; + +} + +static void print_usage(void) +{ + puts("vtxdis version " VTXDIS_VER "\n" + "Usage:\n" + " vtxdis -f/--file FILE [options]\n" + " vtxdis -?/--help\n" + " vtxdis --version\n" + "Options:\n" + " -f, --file The file path to extract vertex data from.\n" + " -c, --count The number of vertices to extract.\n" + " -l, --length The amount of data to extract vertices from.\n" + " -o, --offset The offset into file to start reading vertex data.\n" + " -?, --help Prints this help message\n" + " --version Prints the current version\n" + ); +} + +static void print_version(void){ + puts("Version: " VTXDIS_VER); +} + +static void print_vtx_data(Vtx *vtx, int vtx_cnt) +{ + printf("{\n"); + for(int i = 0; i < vtx_cnt; i++) + { + Vtx *v = &vtx[i]; + + printf(" VTX(%d, %d, %d, %d, %d, 0x%02X, 0x%02X, 0x%02X, 0x%02X),\n", v->pos[0], v->pos[1], v->pos[2], v->tpos[0], v->tpos[1], v->cn[0], v->cn[1], v->cn[2], v->cn[3]); + } + printf("}\n"); +} + +static void parse_file(void) +{ + int alloc_size = 0; + struct stat sbuffer; + stat(filename, &sbuffer); + if(errno != 0){ + perror("Count not stat file."); + exit(1); + } + + /* sanity checks */ + if(count > 0) + { + alloc_size = sizeof(Vtx) * count; + if((offset > 0 && (offset + alloc_size) > sbuffer.st_size) || alloc_size > sbuffer.st_size) + { + printf("Requested data is beyond file boundaries."); + exit(1); + } + } + else if(data_len > 0) + { + alloc_size = data_len; + + if((offset > 0 && (offset + alloc_size) > sbuffer.st_size) || alloc_size > sbuffer.st_size) + { + printf("Requested data is beyond file boundaries."); + exit(1); + } + } + else + { + if (offset > 0) + { + alloc_size = sbuffer.st_size - offset; + } + else + { + alloc_size = sbuffer.st_size; + } + } + + if(alloc_size % sizeof(Vtx) != 0) + { + printf("Requested data size is not a multiple of sizeof(Vtx). Requested size is %8x", alloc_size); + exit(1); + } + + FILE *file = fopen(filename, "rb"); + if(!file){ + perror("Could not open file"); + exit(1); + } + + if(offset > 0){ + if(fseek(file, offset, SEEK_SET)){ + perror("Could not seek file"); + fclose(file); + exit(1); + } + } + + Vtx *data = NULL; + data = malloc(alloc_size); + if(!data){ + fclose(file); + perror("Could not allocate vtx data"); + exit(1); + } + + if(!fread(data, alloc_size, 1, file)){ + perror("Could not read from file"); + fclose(file); + free(data); + exit(1); + } + + fclose(file); + + int vtx_cnt = alloc_size / sizeof(Vtx); + for(int i = 0; i < vtx_cnt; i++){ + Vtx *v = &data[i]; + + v->pos[0] = SWAP16(v->pos[0]); + v->pos[1] = SWAP16(v->pos[1]); + v->pos[2] = SWAP16(v->pos[2]); + v->flag = SWAP16(v->flag); + v->tpos[0] = SWAP16(v->tpos[0]); + v->tpos[1] = SWAP16(v->tpos[1]); + } + + print_vtx_data(data, vtx_cnt); + free(data); +} + +int main(int argc, char **argv) +{ + int opt; + int argv_idx = 0; + while(1){ + argv_idx++; + opt = getopt_long(argc, argv, "o:l:f:c:v?", cmdline_opts, NULL); + if(opt == -1){ + break; + } + switch(opt){ + case 'd': + data = optarg; + break; + case '~': + print_version(); + return 0; + case '?': + print_usage(); + return 0; + case 'l': + data_len = parse_int(optarg); + break; + case 'o': + offset = parse_int(optarg); + break; + case 'f': + filename = optarg; + break; + case 'c': + count = parse_int(optarg); + break; + } + } + + if (filename == NULL && data == NULL) + { + printf("Must specify -f or -d\n"); + print_usage(); + exit(1); + } + + if(data_len < 0) + { + printf("Invalid -l/--length parameter passed."); + print_usage(); + exit(1); + } + + if(offset < 0) + { + printf("Invalid -o/--offset parameter passed."); + print_usage(); + exit(1); + } + + if(count < 0) + { + printf("Invalid -c/--count parameter passed."); + print_usage(); + exit(1); + } + + if(count > 0 && data_len > 0) + { + printf("Cannot specify both -c/--count and -l/--length."); + print_usage(); + exit(1); + } + + if(filename != NULL) + { + parse_file(); + } + + return 0; +} \ No newline at end of file diff --git a/tools/yaz0.c b/tools/yaz0.c new file mode 100644 index 0000000000..205ab98fe6 --- /dev/null +++ b/tools/yaz0.c @@ -0,0 +1,241 @@ +#include +#include +#include +#include + +#include "yaz0.h" + +// decoder implementation by thakis of http://www.amnoid.de + +// src points to the yaz0 source data (to the "real" source data, not at the header!) +// dst points to a buffer uncompressedSize bytes large (you get uncompressedSize from +// the second 4 bytes in the Yaz0 header). +void yaz0_decode(uint8_t* src, uint8_t* dst, int uncompressedSize) +{ + int srcPlace = 0, dstPlace = 0; // current read/write positions + + unsigned int validBitCount = 0; // number of valid bits left in "code" byte + uint8_t currCodeByte; + while (dstPlace < uncompressedSize) + { + // read new "code" byte if the current one is used up + if (validBitCount == 0) + { + currCodeByte = src[srcPlace]; + ++srcPlace; + validBitCount = 8; + } + + if ((currCodeByte & 0x80) != 0) + { + // straight copy + dst[dstPlace] = src[srcPlace]; + dstPlace++; + srcPlace++; + } + else + { + // RLE part + uint8_t byte1 = src[srcPlace]; + uint8_t byte2 = src[srcPlace + 1]; + srcPlace += 2; + + unsigned int dist = ((byte1 & 0xF) << 8) | byte2; + unsigned int copySource = dstPlace - (dist + 1); + + unsigned int numBytes = byte1 >> 4; + if (numBytes == 0) + { + numBytes = src[srcPlace] + 0x12; + srcPlace++; + } + else + { + numBytes += 2; + } + + // copy run + for (unsigned int i = 0; i < numBytes; ++i) + { + dst[dstPlace] = dst[copySource]; + copySource++; + dstPlace++; + } + } + + // use next bit from "code" byte + currCodeByte <<= 1; + validBitCount -= 1; + } +} + +// encoder implementation by shevious, with bug fixes by notwa + +typedef uint32_t uint32_t; +typedef uint8_t uint8_t; + +#define MAX_RUNLEN (0xFF + 0x12) + +// simple and straight encoding scheme for Yaz0 +static uint32_t simpleEnc(uint8_t *src, int size, int pos, uint32_t *pMatchPos) +{ + int numBytes = 1; + int matchPos = 0; + + int startPos = pos - 0x1000; + int end = size - pos; + + if (startPos < 0) + startPos = 0; + + // maximum runlength for 3 byte encoding + if (end > MAX_RUNLEN) + end = MAX_RUNLEN; + + for (int i = startPos; i < pos; i++) + { + int j; + + for (j = 0; j < end; j++) + { + if (src[i + j] != src[j + pos]) + break; + } + if (j > numBytes) + { + numBytes = j; + matchPos = i; + } + } + + *pMatchPos = matchPos; + + if (numBytes == 2) + numBytes = 1; + + return numBytes; +} + +// a lookahead encoding scheme for ngc Yaz0 +static uint32_t nintendoEnc(uint8_t *src, int size, int pos, uint32_t *pMatchPos) +{ + uint32_t numBytes = 1; + static uint32_t numBytes1; + static uint32_t matchPos; + static int prevFlag = 0; + + // if prevFlag is set, it means that the previous position + // was determined by look-ahead try. + // so just use it. this is not the best optimization, + // but nintendo's choice for speed. + if (prevFlag == 1) + { + *pMatchPos = matchPos; + prevFlag = 0; + return numBytes1; + } + + prevFlag = 0; + numBytes = simpleEnc(src, size, pos, &matchPos); + *pMatchPos = matchPos; + + // if this position is RLE encoded, then compare to copying 1 byte and next position(pos+1) encoding + if (numBytes >= 3) + { + numBytes1 = simpleEnc(src, size, pos + 1, &matchPos); + // if the next position encoding is +2 longer than current position, choose it. + // this does not guarantee the best optimization, but fairly good optimization with speed. + if (numBytes1 >= numBytes + 2) + { + numBytes = 1; + prevFlag = 1; + } + } + return numBytes; +} + +int yaz0_encode(uint8_t *src, uint8_t *dst, int srcSize) +{ + int srcPos = 0; + int dstPos = 0; + int bufPos = 0; + + uint8_t buf[24]; // 8 codes * 3 bytes maximum + + uint32_t validBitCount = 0; // number of valid bits left in "code" byte + uint8_t currCodeByte = 0; // a bitfield, set bits meaning copy, unset meaning RLE + + while (srcPos < srcSize) + { + uint32_t numBytes; + uint32_t matchPos; + + numBytes = nintendoEnc(src, srcSize, srcPos, &matchPos); + if (numBytes < 3) + { + // straight copy + buf[bufPos] = src[srcPos]; + bufPos++; + srcPos++; + //set flag for straight copy + currCodeByte |= (0x80 >> validBitCount); + } + else + { + //RLE part + uint32_t dist = srcPos - matchPos - 1; + uint8_t byte1, byte2, byte3; + + if (numBytes >= 0x12) // 3 byte encoding + { + byte1 = 0 | (dist >> 8); + byte2 = dist & 0xFF; + buf[bufPos++] = byte1; + buf[bufPos++] = byte2; + // maximum runlength for 3 byte encoding + if (numBytes > MAX_RUNLEN) + numBytes = MAX_RUNLEN; + byte3 = numBytes - 0x12; + buf[bufPos++] = byte3; + } + else // 2 byte encoding + { + byte1 = ((numBytes - 2) << 4) | (dist >> 8); + byte2 = dist & 0xFF; + buf[bufPos++] = byte1; + buf[bufPos++] = byte2; + } + srcPos += numBytes; + } + + validBitCount++; + + // write eight codes + if (validBitCount == 8) + { + dst[dstPos++] = currCodeByte; + for (int j = 0; j < bufPos; j++) + dst[dstPos++] = buf[j]; + + currCodeByte = 0; + validBitCount = 0; + bufPos = 0; + } + } + + if (validBitCount > 0) + { + dst[dstPos++] = currCodeByte; + for (int j = 0; j < bufPos; j++) + dst[dstPos++] = buf[j]; + + currCodeByte = 0; + validBitCount = 0; + bufPos = 0; + } + + while ((dstPos % 16) != 0) + dst[dstPos++] = 0; + + return dstPos; +} diff --git a/tools/yaz0.h b/tools/yaz0.h new file mode 100644 index 0000000000..0cc3703ec9 --- /dev/null +++ b/tools/yaz0.h @@ -0,0 +1,10 @@ +#ifndef _YAZ0_H_ +#define _YAZ0_H_ + +int yaz0_encode2(uint8_t *src, uint8_t *dest, int uncompressedSize); + +void yaz0_decode(uint8_t* src, uint8_t* dst, int uncompressedSize); + +int yaz0_encode(uint8_t *src, uint8_t *dest, int srcSize); + +#endif // _YAZ0_H_ diff --git a/tools/yaz0.py b/tools/yaz0.py deleted file mode 100755 index 67042f6333..0000000000 --- a/tools/yaz0.py +++ /dev/null @@ -1,181 +0,0 @@ -#!/usr/bin/env python3 -import os, sys, argparse - -def read_file(name): - file_data=[] - - try: - with open(name, 'rb') as f: - file_data = f.read() - except IOError: - print('failed to read file ' + name) - sys.exit(2) - return file_data - - -def write_file(name, file_data): - try: - with open(name, 'wb') as f: - f.write(file_data) - except IOError: - print('failed to write file ' + name) - sys.exit(2) - - -def yaz0_decompress(input): - output = bytearray() - - return output - - -max_len = 0xFF + 0x12 -def back_seach(input, size, start_pos): - best_len = 1 - match_pos = 0 - search_pos = max(start_pos - 0x1000, 0) - end_pos = min(size, start_pos + max_len) - - # Seach for substrings that are at least 3 bytes long (the smallest size resulting in a compressed chunk) - token_end_pos = min(start_pos + 3, size) - seatch_len = token_end_pos - start_pos - token = input[start_pos:token_end_pos] - - while search_pos < start_pos: - search_pos = input.find(token, search_pos, start_pos + seatch_len - 1) - if search_pos == -1: - break - - pos1 = search_pos + seatch_len - pos2 = start_pos + seatch_len - - # Find how many more bytes match - while pos2 < end_pos and input[pos1] == input[pos2]: - pos1 += 1 - pos2 += 1 - - found_len = pos2 - start_pos - - if found_len > best_len: - best_len = found_len - seatch_len = found_len - match_pos = search_pos - - if best_len == max_len: - break - - token_end_pos = start_pos + seatch_len - token = input[start_pos:start_pos + seatch_len] - - search_pos += 1 - - return best_len, match_pos - - -prev_flag = False -prev_len = 0 -prev_pos = 0 -def cached_encode(input, size, pos): - global prev_flag - global prev_len - global prev_pos - - # If a previous search found that it was better to have an uncompressed byte, return the position and length that we already found - if prev_flag: - prev_flag = False - return prev_len, prev_pos - - comp_len, comp_pos = back_seach(input, size, pos) - - # Check that it wouldn't be better to have an uncompressed byte then compressing the following data - if comp_len >= 3: - prev_len, prev_pos = back_seach(input, size, pos + 1) - if prev_len >= comp_len + 2: # +2 to account for the uncompressed byte plus 1 more to see if it's better compression - comp_len = 1 - prev_flag = True - - return comp_len, comp_pos - - -def write_yaz0_header(output, size): - output += 'Yaz0'.encode() - - output.append((size & 0xFF000000) >> 24) - output.append((size & 0x00FF0000) >> 16) - output.append((size & 0x0000FF00) >> 8) - output.append( size & 0x000000FF) - - output += '\0\0\0\0\0\0\0\0'.encode() - - -def yaz0_compress(input): - output = bytearray() - - decompressed_size = len(input) - - write_yaz0_header(output, decompressed_size) - - curr_pos = 0 - chunk_bits = 0 - chunk_num_bits = 0 - chunk_data = bytearray() - while curr_pos < decompressed_size: - num_bytes, match_pos = cached_encode(input, decompressed_size, curr_pos) - - if num_bytes < 3: - chunk_data.append(input[curr_pos]) - curr_pos += 1 - chunk_bits |= (0x80 >> chunk_num_bits) - else: - dist = curr_pos - match_pos - 1 - - if num_bytes >= 0x12: - chunk_data.append(dist >> 8) - chunk_data.append(dist & 0xFF) - chunk_data.append(num_bytes - 0x12) - else: - chunk_data.append(((num_bytes - 2) << 4) | (dist >> 8)) - chunk_data.append(dist & 0xFF) - - curr_pos += num_bytes - - chunk_num_bits += 1 - - if chunk_num_bits == 8: - output.append(chunk_bits) - output += chunk_data - - chunk_bits = 0 - chunk_num_bits = 0 - chunk_data = bytearray() - - if chunk_num_bits > 0: - output.append(chunk_bits) - output += chunk_data - - output_size = len(output) - output_padding_amount = ((output_size + 15) // 16) * 16 - output_size - for i in range(output_padding_amount): - output.append(0) - - return output - - -def main(argv): - parser = argparse.ArgumentParser() - parser.add_argument('input', help='input file') - parser.add_argument('output', help='output file') - parser.add_argument('-d', '--decompress', help='decompress file, otherwise compress it', action='store_true', default=False) - args = parser.parse_args() - - input_data = read_file(args.input) - - if args.decompress: - output_data = yaz0_decompress(input_data) - else: - output_data = yaz0_compress(input_data) - - write_file(args.output, output_data) - - -if __name__ == "__main__": - main(sys.argv[1:]) \ No newline at end of file diff --git a/tools/yaz0tool.c b/tools/yaz0tool.c new file mode 100644 index 0000000000..b21468fa20 --- /dev/null +++ b/tools/yaz0tool.c @@ -0,0 +1,201 @@ +#ifdef __linux__ +#define _POSIX_C_SOURCE 199309L +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include "yaz0.h" +#include "util.h" + +// TODO: Windows support +static unsigned long int get_time_milliseconds(void) +{ +#ifdef __linux__ + struct timespec tspec; + + clock_gettime(CLOCK_MONOTONIC, &tspec); + return (tspec.tv_sec * 1000) + tspec.tv_nsec / 1000000; +#else + // dummy + return 0; +#endif +} + +static void print_report(unsigned long int time, size_t compSize, size_t uncompSize) +{ + unsigned int minutes = time / (1000 * 60); + float seconds = (float)(time % (1000 * 60)) / 1000; + + printf("compression ratio: %.2fKiB / %.2fKiB (%.2f%%)\n" + "time: %um %.3fs\n", + (float)compSize / 1024, (float)uncompSize / 1024, + (float)compSize * 100 / (float)uncompSize, + minutes, seconds); +} + +static void compress_file(const char *inputFileName, const char *outputFileName, bool verbose) +{ + size_t uncompSize; + uint8_t *input = util_read_whole_file(inputFileName, &uncompSize); + uint8_t *output = malloc(uncompSize * 2); // TODO: figure out how much space we need + unsigned long int time; + + if (verbose) + { + printf("decompressing %s\n", inputFileName); + time = get_time_milliseconds(); + } + + // compress data + size_t compSize = yaz0_encode(input, output, uncompSize); + + if (verbose) + time = get_time_milliseconds() - time; + + // make Yaz0 header + uint8_t header[16] = {0}; + header[0] = 'Y'; + header[1] = 'a'; + header[2] = 'z'; + header[3] = '0'; + util_write_uint32_be(header + 4, uncompSize); + + // write output file + FILE *outFile = fopen(outputFileName, "wb"); + if (outFile == NULL) + util_fatal_error("failed to open file '%s' for writing", outputFileName); + fwrite(header, sizeof(header), 1, outFile); + fwrite(output, compSize, 1, outFile); + fclose(outFile); + + free(input); + free(output); + + if (verbose) + print_report(time, compSize, uncompSize); +} + +static void decompress_file(const char *inputFileName, const char *outputFileName, bool verbose) +{ + size_t compSize; + uint8_t *input = util_read_whole_file(inputFileName, &compSize); + size_t uncompSize; + uint8_t *output; + unsigned long int time = 0; + + // read header + if (input[0] != 'Y' || input[1] != 'a' || input[2] != 'z' || input[3] != '0') + util_fatal_error("file '%s' does not have a valid Yaz0 header", inputFileName); + uncompSize = util_read_uint32_be(input + 4); + + // decompress data + output = malloc(uncompSize); + + if (verbose) + { + printf("decompressing %s\n", inputFileName); + time = get_time_milliseconds(); + } + + yaz0_decode(input + 16, output, uncompSize); + + if (verbose) + time = get_time_milliseconds() - time; + + // write output file + FILE *outFile = fopen(outputFileName, "wb"); + fwrite(output, uncompSize, 1, outFile); + fclose(outFile); + + free(input); + free(output); + + if (verbose) + print_report(time, compSize, uncompSize); +} + +static void usage(const char *execName) +{ + printf("Yaz0 compressor/decompressor\n" + "usage: %s [-d] [-h] [-v] INPUT_FILE OUTPUT_FILE\n" + "compresses INPUT_FILE using Yaz0 encoding and writes output to OUTPUT_FILE\n" + "Available options:\n" + "-d: decompresses INPUT_FILE, a Yaz0 compressed file, and writes decompressed\n" + " output to OUTPUT_FILE\n" + "-v: prints verbose output (compression ratio and time)\n" + "-h: shows this help message\n", + execName); +} + +int main(int argc, char **argv) +{ + int i; + const char *inputFileName = NULL; + const char *outputFileName = NULL; + bool decompress = false; + bool verbose = false; + + // parse arguments + for (i = 1; i < argc; i++) + { + char *arg = argv[i]; + + if (arg[0] == '-') + { + if (strcmp(arg, "-d") == 0) + decompress = true; + else if (strcmp(arg, "-v") == 0) + verbose = true; + else if (strcmp(arg, "-h") == 0) + { + usage(argv[0]); + return 0; + } + else + { + printf("unknown option %s\n", arg); + usage(argv[0]); + return 1; + } + } + else + { + if (inputFileName == NULL) + inputFileName = arg; + else if (outputFileName == NULL) + outputFileName = arg; + else + { + puts("too many files specified"); + usage(argv[0]); + return 1; + } + } + } + + if (inputFileName == NULL) + { + puts("no input file specified"); + usage(argv[0]); + return 1; + } + if (outputFileName == NULL) + { + puts("no output file specified"); + usage(argv[0]); + return 1; + } + + if (decompress) + decompress_file(inputFileName, outputFileName, verbose); + else + compress_file(inputFileName, outputFileName, verbose); + + return 0; +}