#!/usr/bin/env python3 # Not 100% accurate but it can be useful for decomp.me RESOLVE_VARIABLES = False #------------------------------------------------------------------------------- import struct import re import os import subprocess import sys import platform from glob import glob from iterfzf import iterfzf from capstone import Cs, CS_ARCH_MIPS, CS_MODE_MIPS32 from capstone.mips import * def clipboard(data): if os.name == 'nt': subprocess.run(['clip'], input=data.encode()) else: subprocess.run(['xclip', '-selection', 'clipboard'], input=data.encode()) # def hexdump(data): # return ' '.join(['{:02X}'.format(x) for x in data]) def dw_to_code(path): code = b'' with open(path) as f: for line in f: line = line.strip() tok = line.split(' ') if len(tok) >= 2 and tok[0] == 'dw': num = int(tok[1], 16) a = struct.pack('= addr and val < addr + l: labels[val] = f'.LAB_0x{val:x}' processing_addr += 4 fp_code = fp_code[processing_addr - last_processed:] last_processed = processing_addr processing_addr = addr last_processed = addr while processing_addr < (addr + l): # second pass for inst in md.disasm(code, processing_addr): off = inst.address - addr replace = None # syms if inst.id in branch_inst_1op_imm: val = inst.operands[len(inst.operands)-1].value.imm name = sym_map.get(val) if name: orig = hex(val) replace = (orig, name) # labels if not replace and inst.id in branch_inst: val = inst.operands[len(inst.operands)-1].value.imm label = labels.get(val) if label: orig = hex(val) replace = (orig, label) op = inst.op_str if replace: a, b = replace op = op.replace(a, b) # these are due to bad disassembly of arguments if inst.mnemonic in ["mfc2", "cfc2", "mtc2", "ctc2"]: break this_label = labels.get(inst.address) if this_label: ret.append(this_label + ':') ret.append('/* 0x{:04x} 0x{:x} */ {} {}'.format(off, inst.address, inst.mnemonic, op)) processing_addr += 4 if processing_addr >= (addr + l): break # check if there's a label for this unsupported instruction this_label = labels.get(processing_addr) if this_label: ret.append(this_label + ':') code = code[processing_addr - last_processed:] # TODO: disassemble manually # decompme will do it for us anyway so maybe not? ret.append('/* 0x{:04x} 0x{:x} */ .long 0x{:02x}{:02x}{:02x}{:02x}'.format(processing_addr - addr, processing_addr, code[3], code[2], code[1], code[0])) processing_addr += 4 last_processed = processing_addr code = code[4:] return ret def rreplace(s, old, new): return (s[::-1].replace(old[::-1],new[::-1], 1))[::-1] def patchSymbolsVars(lines, commentsLen = 4): loads = {} lineNumber = -1 out = lines.copy() for line in lines: lineNumber += 1 words = line.replace(",", "").split() if len(words) < commentsLen + 2 or line.find(".LAB") != -1: continue instr = words[commentsLen] val = words[len(words) - 1] reg = words[len(words) - 2] if val.find("0x") == -1: continue pPos = val.find("(") if pPos > 0: reg = val[pPos + 1: len(val) - 1] val = val[0: pPos] # gp variables if reg == "$gp": gp_base = 0x800AB2E4 addr = gp_base + int(val, 16) if addr in sym_map: symbol = sym_map[addr] out[lineNumber] = rreplace(lines[lineNumber], val, "%gp_rel({0})".format(symbol)) # Store hi values per register elif (instr.startswith("l") and val.startswith("0x800")): loads[reg] = lineNumber # Process hi and lo values elif reg in loads: if ( instr.find("t") != -1 or instr.startswith("an") or ('l' in instr and instr.rindex('l') > 1) ): continue val = val.replace("({0})".format(reg), "") l1 = lines[loads[reg]].split() try: addr = int(l1[len(l1) - 1] + "0000", base=16) + int(val, base=16) except: print("Error: Could not read address from line: ", lines[loads[reg]]) print("debug: ", l1[len(l1) - 1], val) del loads[reg] continue offset = "" currAddr = 0 if addr in sym_map: # Address match symbol = sym_map[addr] else: # Find closest adress currAddr = 0 symbol = "" for symAddr in sym_map: if symAddr < addr and symAddr > currAddr: currAddr = symAddr symbol = sym_map[symAddr] diff = addr - currAddr if symbol == "" or diff > 9000: # dunno what offset limit to set print("Error with symbol on line: ", line) del loads[reg] continue offset = "+{0}".format(hex(diff)) if out[loads[reg]] == lines[loads[reg]]: # don't replace hi value more than once out[loads[reg]] = rreplace(lines[loads[reg]], l1[len(l1) - 1], "%hi({0})".format(symbol)) out[lineNumber] = rreplace(lines[lineNumber], val, "%lo({0}{1})".format(symbol, offset)) else: # If we're setting a saved reg, delete saved reg # (maybe add checks for specific instr like 'sw' ?) firstReg = words[commentsLen + 1] if firstReg in loads: del loads[firstReg] return out sym_map = {} def main(path): if not path or not os.path.exists(path): asms = glob('../asm/**/*.s', recursive=True) path = iterfzf(asms) if path: global sym_map sym_map = get_map(path) m = re.search(r'_([a-fA-F0-9]{8}).s$', path) if m: addr = int(m.group(1), 16) name = os.path.basename(path).replace('.s', '') code = dw_to_code(path) lines = disasm(code, addr, name) if RESOLVE_VARIABLES: lines = patchSymbolsVars(lines) text = '\n'.join(lines) + '\n' # xclip causes crashes under WSL if "microsoft-standard" in platform.uname().release: print(text) else: clipboard(text) print('asm is now on your clipboard to paste into decomp.me') else: print('Error: filename', path, 'should end with a 32-bit hex address suffix (for example sub_80027384.s)') else: print("Error: no path provided") if __name__ == '__main__': main(sys.argv[1] if len(sys.argv) > 1 else None)