mgs_reversing/build/decompme_asm.py

308 lines
9.0 KiB
Python
Executable File

#!/usr/bin/env python3
# Not 100% accurate but it can be useful for decomp.me
RESOLVE_VARIABLES = False
#-------------------------------------------------------------------------------
import struct
import re
import os
import subprocess
import sys
import platform
from glob import glob
from iterfzf import iterfzf
from capstone import Cs, CS_ARCH_MIPS, CS_MODE_MIPS32
from capstone.mips import *
def clipboard(data):
if os.name == 'nt':
subprocess.run(['clip'], input=data.encode())
else:
subprocess.run(['xclip', '-selection', 'clipboard'], input=data.encode())
# def hexdump(data):
# return ' '.join(['{:02X}'.format(x) for x in data])
def dw_to_code(path):
code = b''
with open(path) as f:
for line in f:
line = line.strip()
tok = line.split(' ')
if len(tok) >= 2 and tok[0] == 'dw':
num = int(tok[1], 16)
a = struct.pack('<I', num)
code += a
return code
root_dir = os.path.realpath(os.path.join(os.path.dirname(__file__), '../obj'))
def get_map(path):
map_file = os.path.join(root_dir, "asm.map")
# Overlay?
path = os.path.normpath(path).split(os.path.sep)
overlay_name = next((t for s, t in zip(path, path[1:]) if s == "overlays"), None)
overlay_map = os.path.join(root_dir, f"asm_{overlay_name}_lhs.map")
if overlay_name and os.path.exists(overlay_map):
map_file = overlay_map
ret = {}
with open(map_file) as f:
for line in f:
line = line.rstrip().replace(' ', ' ')
tok = line.split(' ')
if len(tok) == 3:
ret[int(tok[1], 16)] = tok[2]
return ret
# branch_inst_1op_reg = [
# MIPS_INS_JR,
# MIPS_INS_JALR,
# ]
branch_inst_1op_imm = [
MIPS_INS_J,
MIPS_INS_JAL,
MIPS_INS_B,
]
branch_inst_3op = [
MIPS_INS_BEQ,
MIPS_INS_BNE,
]
branch_inst_2op = [
MIPS_INS_BEQZ,
MIPS_INS_BNEZ,
MIPS_INS_BGEZ,
MIPS_INS_BGTZ,
MIPS_INS_BLEZ,
MIPS_INS_BLTZ,
MIPS_INS_BGEZAL,
MIPS_INS_BLTZAL,
]
branch_inst = branch_inst_3op + branch_inst_2op + branch_inst_1op_imm
def disasm(code, addr, name):
md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS32)
md.detail = True
ret = []
labels = {}
l = len(code)
ret.append(f'glabel {name}')
processing_addr = addr
last_processed = addr
fp_code = code
# first pass
while processing_addr < (addr + l):
for inst in md.disasm(fp_code, processing_addr):
processing_addr += 4
if inst.id in branch_inst:
val = inst.operands[len(inst.operands)-1].value.imm
if val >= addr and val < addr + l:
labels[val] = f'.LAB_0x{val:x}'
processing_addr += 4
fp_code = fp_code[processing_addr - last_processed:]
last_processed = processing_addr
processing_addr = addr
last_processed = addr
while processing_addr < (addr + l):
# second pass
for inst in md.disasm(code, processing_addr):
off = inst.address - addr
replace = None
# syms
if inst.id in branch_inst_1op_imm:
val = inst.operands[len(inst.operands)-1].value.imm
name = sym_map.get(val)
if name:
orig = hex(val)
replace = (orig, name)
# labels
if not replace and inst.id in branch_inst:
val = inst.operands[len(inst.operands)-1].value.imm
label = labels.get(val)
if label:
orig = hex(val)
replace = (orig, label)
op = inst.op_str
if replace:
a, b = replace
op = op.replace(a, b)
# these are due to bad disassembly of arguments
if inst.mnemonic in ["mfc2", "cfc2", "mtc2", "ctc2"]:
break
this_label = labels.get(inst.address)
if this_label:
ret.append(this_label + ':')
ret.append('/* 0x{:04x} 0x{:x} */ {} {}'.format(off, inst.address, inst.mnemonic, op))
processing_addr += 4
if processing_addr >= (addr + l):
break
# check if there's a label for this unsupported instruction
this_label = labels.get(processing_addr)
if this_label:
ret.append(this_label + ':')
code = code[processing_addr - last_processed:]
# TODO: disassemble manually
# decompme will do it for us anyway so maybe not?
ret.append('/* 0x{:04x} 0x{:x} */ .long 0x{:02x}{:02x}{:02x}{:02x}'.format(processing_addr - addr, processing_addr, code[3], code[2], code[1], code[0]))
processing_addr += 4
last_processed = processing_addr
code = code[4:]
return ret
def rreplace(s, old, new):
return (s[::-1].replace(old[::-1],new[::-1], 1))[::-1]
def patchSymbolsVars(lines, commentsLen = 4):
loads = {}
lineNumber = -1
out = lines.copy()
for line in lines:
lineNumber += 1
words = line.replace(",", "").split()
if len(words) < commentsLen + 2 or line.find(".LAB") != -1:
continue
instr = words[commentsLen]
val = words[len(words) - 1]
reg = words[len(words) - 2]
if val.find("0x") == -1:
continue
pPos = val.find("(")
if pPos > 0:
reg = val[pPos + 1: len(val) - 1]
val = val[0: pPos]
# gp variables
if reg == "$gp":
gp_base = 0x800AB2E4
addr = gp_base + int(val, 16)
if addr in sym_map:
symbol = sym_map[addr]
out[lineNumber] = rreplace(lines[lineNumber], val, "%gp_rel({0})".format(symbol))
# Store hi values per register
elif (instr.startswith("l") and val.startswith("0x800")):
loads[reg] = lineNumber
# Process hi and lo values
elif reg in loads:
if ( instr.find("t") != -1 or instr.startswith("an") or
('l' in instr and instr.rindex('l') > 1) ):
continue
val = val.replace("({0})".format(reg), "")
l1 = lines[loads[reg]].split()
try:
addr = int(l1[len(l1) - 1] + "0000", base=16) + int(val, base=16)
except:
print("Error: Could not read address from line: ", lines[loads[reg]])
print("debug: ", l1[len(l1) - 1], val)
del loads[reg]
continue
offset = ""
currAddr = 0
if addr in sym_map:
# Address match
symbol = sym_map[addr]
else:
# Find closest adress
currAddr = 0
symbol = ""
for symAddr in sym_map:
if symAddr < addr and symAddr > currAddr:
currAddr = symAddr
symbol = sym_map[symAddr]
diff = addr - currAddr
if symbol == "" or diff > 9000: # dunno what offset limit to set
print("Error with symbol on line: ", line)
del loads[reg]
continue
offset = "+{0}".format(hex(diff))
if out[loads[reg]] == lines[loads[reg]]: # don't replace hi value more than once
out[loads[reg]] = rreplace(lines[loads[reg]], l1[len(l1) - 1], "%hi({0})".format(symbol))
out[lineNumber] = rreplace(lines[lineNumber], val, "%lo({0}{1})".format(symbol, offset))
else:
# If we're setting a saved reg, delete saved reg
# (maybe add checks for specific instr like 'sw' ?)
firstReg = words[commentsLen + 1]
if firstReg in loads:
del loads[firstReg]
return out
sym_map = {}
def main(path):
if not path or not os.path.exists(path):
asms = glob('../asm/**/*.s', recursive=True)
path = iterfzf(asms)
if path:
global sym_map
sym_map = get_map(path)
m = re.search(r'_([a-fA-F0-9]{8}).s$', path)
if m:
addr = int(m.group(1), 16)
name = os.path.basename(path).replace('.s', '')
code = dw_to_code(path)
lines = disasm(code, addr, name)
if RESOLVE_VARIABLES:
lines = patchSymbolsVars(lines)
text = '\n'.join(lines) + '\n'
# xclip causes crashes under WSL
if "microsoft-standard" in platform.uname().release:
print(text)
else:
clipboard(text)
print('asm is now on your clipboard to paste into decomp.me')
else:
print('Error: filename', path, 'should end with a 32-bit hex address suffix (for example sub_80027384.s)')
else:
print("Error: no path provided")
if __name__ == '__main__':
main(sys.argv[1] if len(sys.argv) > 1 else None)