Files
jak-project/decompiler/analysis/analyze_inspect_method.cpp
T
Hat Kid 85039fe2d6 jak3: fix all-types file order (#3316)
In #3300, `per_object` in `ObjectFileDB::ir2_analyze_all_types()` was
changed to an `unordered_map`, which messed with the object file order
in `all-types`. This fixes the issue by going through each object file
in order and then looking up the type info from the map instead of
iterating through the map.

This also removes the method IDs from the old method comments and adds a
stub for the `new` method if there is an old method comment for it, in
order to stay consistent with the rest of the method list.
2024-01-18 14:57:10 +01:00

1983 lines
62 KiB
C++

#include "analyze_inspect_method.h"
#include "common/log/log.h"
#include "decompiler/Disasm/InstructionMatching.h"
#include "decompiler/ObjectFile/LinkedObjectFile.h"
namespace decompiler {
// types with duplicate inspects
static const std::vector<std::string> g_duplicate_inspects_jak3 = {
"sky-vertex", "shadow-edge", "hfrag-poly4", "hfrag-poly9",
"hfrag-poly25", "hfrag-mip-packet", "sprite-aux-list", "game-save"};
bool is_set_reg_to_int(AtomicOp* op, Register dst, s64 value) {
// should be a set reg to int math 2 ir
auto set = dynamic_cast<SetVarOp*>(op);
if (!set) {
return false;
}
// destination should be a register
auto dest = set->dst();
if (dst != dest.reg()) {
return false;
}
auto math = set->src();
if (SimpleExpression::Kind::IDENTITY != math.kind()) {
return false;
}
auto arg = math.get_arg(0);
if (!arg.is_int() || value != arg.get_int()) {
return false;
}
return true;
}
bool is_set_reg_to_symbol_value(AtomicOp* op,
std::optional<Register> dst,
const std::string& value) {
// should be a set reg to int math 2 ir
auto set = dynamic_cast<SetVarOp*>(op);
if (!set) {
return false;
}
// destination should be a register
if (dst) {
auto dest = set->dst();
if (dst != dest.reg()) {
return false;
}
}
auto math = set->src();
if (SimpleExpression::Kind::IDENTITY != math.kind()) {
return false;
}
auto arg = math.get_arg(0);
if (!arg.is_sym_val(value)) {
return false;
}
return true;
}
bool is_set_reg_to_symbol_ptr(AtomicOp* op, std::optional<Register> dst, const std::string& value) {
// should be a set reg to int math 2 ir
auto set = dynamic_cast<SetVarOp*>(op);
if (!set) {
return false;
}
// destination should be a register
if (dst) {
auto dest = set->dst();
if (dst != dest.reg()) {
return false;
}
}
auto math = set->src();
if (SimpleExpression::Kind::IDENTITY != math.kind()) {
return false;
}
auto arg = math.get_arg(0);
if (!arg.is_sym_ptr(value)) {
return false;
}
return true;
}
std::optional<std::string> get_set_reg_to_symbol_ptr(AtomicOp* op, std::optional<Register> dst) {
// should be a set reg to int math 2 ir
auto set = dynamic_cast<SetVarOp*>(op);
if (!set) {
return {};
}
// destination should be a register
if (dst) {
auto dest = set->dst();
if (dst != dest.reg()) {
return {};
}
}
auto math = set->src();
if (SimpleExpression::Kind::IDENTITY != math.kind()) {
return {};
}
auto arg = math.get_arg(0);
if (!arg.is_sym_ptr()) {
return {};
}
return arg.get_str();
}
std::optional<std::string> get_set_reg_to_symbol_value(AtomicOp* op, std::optional<Register> dst) {
// should be a set reg to int math 2 ir
auto set = dynamic_cast<SetVarOp*>(op);
if (!set) {
return {};
}
// destination should be a register
if (dst) {
auto dest = set->dst();
if (dst != dest.reg()) {
return {};
}
}
auto math = set->src();
if (SimpleExpression::Kind::IDENTITY != math.kind()) {
return {};
}
auto arg = math.get_arg(0);
if (!arg.is_sym_val()) {
return {};
}
return arg.get_str();
}
bool is_set_reg_to_load(AtomicOp* op, Register dst, int offset) {
auto lvo = dynamic_cast<LoadVarOp*>(op);
if (!lvo) {
return false;
}
// destination should be a register
auto dest = lvo->get_set_destination();
if (dst != dest.reg()) {
return false;
}
if (lvo->kind() != LoadVarOp::Kind::UNSIGNED) {
return false;
}
if (lvo->size() != 4) {
return false;
}
IR2_RegOffset ro;
if (!get_as_reg_offset(lvo->src(), &ro)) {
return false;
}
if (ro.offset != offset) {
return false;
}
return true;
}
std::optional<u64> get_set_reg_to_u64_load(AtomicOp* op,
Register dst,
const LinkedObjectFile& file) {
auto lvo = dynamic_cast<LoadVarOp*>(op);
if (!lvo) {
return std::nullopt;
}
// destination should be a register
auto dest = lvo->get_set_destination();
if (dst != dest.reg()) {
return std::nullopt;
}
if (lvo->src().kind() != SimpleExpression::Kind::IDENTITY) {
return std::nullopt;
}
if (lvo->size() != 8) {
return std::nullopt;
}
const auto& s = lvo->src().get_arg(0);
if (!s.is_label()) {
return std::nullopt;
}
auto lab = file.labels.at(s.label());
auto& low = file.words_by_seg.at(lab.target_segment).at(lab.offset / 4);
auto& hi = file.words_by_seg.at(lab.target_segment).at((lab.offset / 4) + 1);
if (low.kind() != LinkedWord::PLAIN_DATA || hi.kind() != LinkedWord::PLAIN_DATA) {
return std::nullopt;
}
return ((u64)low.data) | (((u64)hi.data) << 32);
}
std::optional<u64> get_set_reg_to_lui(AtomicOp* op, Register dst, const LinkedObjectFile& file) {
auto lvo = dynamic_cast<SetVarOp*>(op);
if (!lvo) {
return std::nullopt;
}
// destination should be a register
auto dest = lvo->get_set_destination();
if (dst != dest.reg()) {
return std::nullopt;
}
if (lvo->src().kind() != SimpleExpression::Kind::IDENTITY) {
return std::nullopt;
}
const auto& s = lvo->src().get_arg(0);
if (!s.is_label()) {
return std::nullopt;
}
auto lab = file.labels.at(s.label());
auto& low = file.words_by_seg.at(lab.target_segment).at(lab.offset / 4);
auto& hi = file.words_by_seg.at(lab.target_segment).at((lab.offset / 4) + 1);
if (low.kind() != LinkedWord::PLAIN_DATA || hi.kind() != LinkedWord::PLAIN_DATA) {
return std::nullopt;
}
return ((u64)low.data) | (((u64)hi.data) << 32);
}
std::optional<std::string> get_string_loaded_to_reg(AtomicOp* op,
Register reg,
LinkedObjectFile& file) {
// should be a set reg to int math 2 ir
auto set = dynamic_cast<SetVarOp*>(op);
if (!set) {
return {};
}
// destination should be a register
auto dest = set->dst();
if (reg != dest.reg()) {
return {};
}
auto math = set->src();
if (SimpleExpression::Kind::IDENTITY != math.kind()) {
return {};
}
auto& src_atom = set->src().get_arg(0);
if (!src_atom.is_label()) {
return {};
}
return file.get_goal_string_by_label(src_atom.label());
}
struct FieldPrint {
static constexpr int NO_ARR = -1;
static constexpr int DYNAMIC_ARRAY = -2;
static constexpr int UNKNOWN_ARR_SIZE = -3;
char format = '\0';
std::string field_name;
std::string field_type_name;
bool has_array = false;
int array_size = NO_ARR;
};
// if a field has a weird inspect, just return the FieldPrint instead of asserting,
// there's too many edge cases in custom prints to account for all of them
FieldPrint handle_custom_prints(FieldPrint& fp, const std::string& /*str*/) {
return fp;
}
FieldPrint get_field_print(const std::string& str) {
int idx = 0;
auto next = [&]() { return str.at(idx++); };
auto peek = [&](int off) { return str.at(idx + off); };
FieldPrint field_print;
// first is ~T
char c0 = next();
ASSERT(c0 == '~');
char c1 = next();
if (c1 == '1' || c1 == '2') {
c1 = next();
}
ASSERT(c1 == 'T');
// next the name:
char name_char = next();
if (name_char == '~') {
return handle_custom_prints(field_print, str);
}
while (name_char != ':' && name_char != '[' && name_char != ' ') {
field_print.field_name.push_back(name_char);
name_char = next();
}
// possibly array thing
if (name_char == '[') {
int size = 0;
char num_char = next();
// dynamic array using a ~D print
// (format "~Tstack[~D] @ #x~X~%" (-> obj allocated-length) (-> obj stack))
if (num_char == '~') {
num_char = next();
ASSERT(num_char == 'D');
num_char = next();
ASSERT(num_char == ']');
// distinguish from dynamic arrays that are set to size 0
field_print.array_size = size = FieldPrint::DYNAMIC_ARRAY;
}
while (num_char >= '0' && num_char <= '9') {
size = size * 10 + (num_char - '0');
num_char = next();
}
field_print.has_array = true;
field_print.array_size = size;
ASSERT(num_char == ']');
char c = next();
// (method 3 array) and some others have a colon instead of a space here
if (c == ':') {
c = next();
}
if (c != ' ') {
return handle_custom_prints(field_print, str);
}
c = next();
if (c != '@') {
return handle_custom_prints(field_print, str);
}
c = next();
if (c != ' ') {
return handle_custom_prints(field_print, str);
}
c = next();
if (c != '#') {
return handle_custom_prints(field_print, str);
}
c = next();
if (c != 'x') {
return handle_custom_prints(field_print, str);
}
} else {
// next a space
char space_char = next();
if (space_char != ' ') {
return handle_custom_prints(field_print, str);
}
}
// next the format
char fmt1 = next();
// if there are extra spaces
if (fmt1 == ' ') {
while (fmt1 == ' ') {
fmt1 = next();
}
}
if (fmt1 == '~' && peek(0) != '`') { // normal ~_~%
char fmt_code = next();
field_print.format = fmt_code;
char end1 = next();
if (end1 != '~') {
return handle_custom_prints(field_print, str);
}
char end2 = next();
if (end2 != '%') {
return handle_custom_prints(field_print, str);
}
ASSERT(idx == (int)str.size());
} else if (fmt1 == '~' && (peek(0) == 'g' || peek(0) == 'G')) { // ~g~%
char fmt_code = next();
field_print.format = fmt_code;
char end1 = next();
if (end1 != '~') {
return handle_custom_prints(field_print, str);
}
char end2 = next();
if (end2 != '%') {
return handle_custom_prints(field_print, str);
}
ASSERT(idx == (int)str.size());
} else if (fmt1 == '#' && peek(0) == '<') { // struct #<my-struct @ #x~X>~%
next();
char type_name_c = next();
while (type_name_c != ' ') {
field_print.field_type_name += type_name_c;
type_name_c = next();
}
std::string expect_end = "@ #x~X>~%";
for (char i : expect_end) {
char c = next();
ASSERT(i == c);
}
field_print.format = 'X';
ASSERT(idx == (int)str.size());
} else if (fmt1 == '#' && peek(4) == ':') { // #x~X : (enum-name
// OR
// #x~X : ~S~%
if (peek(6) != '(' && peek(7) == 'S') {
next();
std::string expect_end = "~X : ~S~%";
for (char i : expect_end) {
char c = next();
ASSERT(i == c);
}
field_print.format = 'X';
} else {
// skip to paren
for (int i = 0; i < 7; i++) {
next();
}
auto name = str.substr(idx);
// some of these don't have the enum name
if (!name.empty()) {
name.pop_back();
field_print.field_type_name = name;
}
field_print.format = 'X';
}
} else if (fmt1 == '#' && peek(0) == 'x') { // #x~X~%
next();
std::string expect_end = "~X~%";
for (char i : expect_end) {
char c = next();
ASSERT(i == c);
}
field_print.format = 'X';
} else if (fmt1 == '~' && peek(0) == '`') { // ~`my-type-with-overriden-print`P~%
next();
char type_name_c = next();
while (type_name_c != '`') {
field_print.field_type_name += type_name_c;
type_name_c = next();
}
std::string expect_end = "P~%";
for (char i : expect_end) {
char c = next();
ASSERT(i == c);
}
field_print.format = 'P';
ASSERT(idx == (int)str.size());
} else if (str.substr(idx - 1) == "(meters ~m)~%") {
field_print.format = 'm';
} else if (str.substr(idx - 1) == "(deg ~r)~%") {
field_print.format = 'r';
} else if (str.substr(idx - 1) == "(seconds ~e)~%") {
field_print.format = 'e';
}
else {
// throw std::runtime_error("other format nyi in get_field_print " + str.substr(idx));
lg::print("other format nyi in get_field_print {}\n", str.substr(idx));
}
return field_print;
}
int get_start_idx_process(Function& function,
const std::string& parent_type,
Env& env,
TypeInspectorResult* result) {
// hack
if (function.name() == "(method 3 process-tree)" || function.name() == "(method 3 process)") {
result->is_basic = true;
return 7;
}
if (parent_type == "process-focusable") {
result->is_basic = true;
}
if (function.basic_blocks.size() != 5) {
lg::print("[iim] inspect {} had {} basic blocks, expected 5\n", function.name(),
function.basic_blocks.size());
return 1;
}
if (!function.ir2.atomic_ops) {
lg::print("[iim] no atomic ops in {}\n", function.name());
return -1;
}
auto& aos = *function.ir2.atomic_ops;
int op_idx = 0;
// block 0:
/*
* (set! gp a0)
* (b! (truthy gp) L370 (set! v1 #f))
*/
if (aos.block_id_to_end_atomic_op.at(0) != 2) {
lg::print("[iim] block 0 had the wrong number of ops: {} for {}\n",
aos.block_id_to_end_atomic_op.at(0), function.name());
return -1;
}
if (!is_op_2(aos.ops.at(op_idx).get(), SimpleExpression::Kind::IDENTITY,
Register(Reg::GPR, Reg::GP), Register(Reg::GPR, Reg::A0))) {
lg::print("[iim] block 0 op 0 bad in {}: {}\n", aos.ops.at(op_idx)->to_string(env),
function.name());
return -1;
}
op_idx++;
auto br = dynamic_cast<BranchOp*>(aos.ops.at(op_idx).get());
if (!br) {
lg::print("[iim] block 0 op 1 bad in {}: {} (not branch)\n", aos.ops.at(1)->to_string(env),
function.name());
return -1;
}
if (br->likely() || br->condition().kind() != IR2_Condition::Kind::TRUTHY ||
!br->condition().src(0).is_var() ||
br->condition().src(0).var().reg() != Register(Reg::GPR, Reg::GP) ||
br->branch_delay().kind() != IR2_BranchDelay::Kind::SET_REG_FALSE ||
br->branch_delay().var(0).reg() != Register(Reg::GPR, Reg::V1)) {
lg::print("[iim] block 0 op 1 bad in {}: {} (bad branch)\n", aos.ops.at(1)->to_string(env),
function.name());
return -1;
}
op_idx++;
// block 1:
/*
* (set! gp gp)
* (b! #t L371 (nop!))
*/
if (aos.block_id_to_end_atomic_op.at(1) != 4) {
lg::print("[iim] block 1 had the wrong number of ops: {} for {}\n",
aos.block_id_to_end_atomic_op.at(1), function.name());
return -1;
}
if (!is_op_2(aos.ops.at(op_idx).get(), SimpleExpression::Kind ::IDENTITY,
Register(Reg::GPR, Reg::GP), Register(Reg::GPR, Reg::GP))) {
lg::print("[iim] op 2 bad in {}: {}\n", aos.ops.at(op_idx)->to_string(env), function.name());
return -1;
}
op_idx++;
auto br2 = dynamic_cast<BranchOp*>(aos.ops.at(op_idx).get());
if (!br2) {
lg::print("[iim] op 3 bad in {}: {} (not branch)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
return -1;
}
if (br2->likely() || br2->condition().kind() != IR2_Condition::Kind::ALWAYS ||
br2->branch_delay().kind() != IR2_BranchDelay::Kind::NOP) {
lg::print("[iim] op3 bad in {}: {} (bad branch)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
return -1;
}
op_idx++;
/*
B2:
or v1, r0, r0 ;; [ 4] (set! v1 0)
B3:
L2:
lw v1, process(s7) ;; [ 5] (set! v1 process)
lwu t9, 28(v1) ;; [ 6] (set! t9 (l.wu (+ v1 28)))
or a0, gp, r0 ;; [ 7] (set! a0 gp)
jalr ra, t9 ;; [ 8] (call!)
sll v0, ra, 0
*/
if (!is_set_reg_to_int(aos.ops.at(op_idx).get(), Register(Reg::GPR, Reg::V1), 0)) {
lg::print("[iim] op4 bad in {}: {} (bad set 0)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
return -1;
}
op_idx++;
// try to determine parent type for far labels
if (parent_type == "UNKNOWN") {
auto parent_op_str = aos.ops.at(op_idx)->to_string(env);
auto parent_type_str = parent_op_str.substr(9);
parent_type_str.pop_back();
result->parent_type_name = parent_type_str;
} else {
if (!is_set_reg_to_symbol_value(aos.ops.at(op_idx).get(), Register(Reg::GPR, Reg::V1),
parent_type)) {
lg::print("[iim] op5 bad in {}: {} (bad set parent type)\n",
aos.ops.at(op_idx)->to_string(env), function.name());
return -1;
}
}
// TODO check if this catches all cases or if there are false positives
// hack to get correct field offsets for children of process
if (result->parent_type_name != "structure") {
result->is_basic = true;
}
op_idx++;
if (aos.ops.at(op_idx).get()->to_string(env) != "(set! t9 (l.wu (+ v1 28)))") {
lg::print("[iim] op6 bad in {}: {} (bad load inspect)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
return -1;
}
op_idx++;
if (aos.ops.at(op_idx).get()->to_string(env) != "(set! a0 gp)") {
lg::print("[iim] op7 bad in {}: {} (bad set arg)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
return -1;
}
op_idx++;
if (aos.ops.at(op_idx).get()->to_string(env) != "(call!)") {
lg::print("[iim] op8 bad in {}: {} (bad call)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
return -1;
}
op_idx++;
/*
lw t9, format(s7) ;; [ 9] (set! t9 format)
daddiu a0, s7, #t ;; [ 10] (set! a0 #t)
daddiu a1, fp, L16 ;; [ 11] (set! a1 L16) "~2Tformation: ~A~%"
lwu a2, 124(gp) ;; [ 12] (set! a2 (l.wu (+ gp 124)))
jalr ra, t9 ;; [ 13] (call!)
sll v0, ra, 0
lw t9, format(s7) ;; [ 14] (set! t9 format)
daddiu a0, s7, #t ;; [ 15] (set! a0 #t)
daddiu a1, fp, L15 ;; [ 16] (set! a1 L15) "~2Tpath: ~A~%"
lwu a2, 128(gp) ;; [ 17] (set! a2 (l.wu (+ gp 128)))
jalr ra, t9 ;; [ 18] (call!)
sll v0, ra, 0
lw t9, format(s7) ;; [ 19] (set! t9 format)
daddiu a0, s7, #t ;; [ 20] (set! a0 #t)
daddiu a1, fp, L14 ;; [ 21] (set! a1 L14) "~2Tformation-timer: ~D~%"
ld a2, 132(gp) ;; [ 22] (set! a2 (l.d (+ gp 132)))
jalr ra, t9 ;; [ 23] (call!)
sll v0, ra, 0
B4:
L3:
or v0, gp, r0 ;; [ 24] (set! v0 gp)
ld ra, 0(sp)
*/
return op_idx;
}
int get_start_idx(Function& function,
LinkedObjectFile& file,
TypeInspectorResult* result,
const std::string& parent_type,
const std::string& type_name,
Env& env) {
if (function.basic_blocks.size() != 5) {
lg::print("[iim] inspect {} had {} basic blocks, expected 5\n", function.name(),
function.basic_blocks.size());
if (parent_type == "basic") {
result->is_basic = true;
}
return -1;
}
if (!function.ir2.atomic_ops) {
lg::print("[iim] no atomic ops in {}\n", function.name());
return -1;
}
auto& aos = *function.ir2.atomic_ops;
int op_idx = 0;
// block 0:
/*
* (set! gp a0)
* (b! (truthy gp) L370 (set! v1 #f))
*/
if (aos.block_id_to_end_atomic_op.at(0) != 2) {
lg::print("[iim] block 0 had the wrong number of ops: {} for {}\n",
aos.block_id_to_end_atomic_op.at(0), function.name());
return -1;
}
if (!is_op_2(aos.ops.at(op_idx).get(), SimpleExpression::Kind::IDENTITY,
Register(Reg::GPR, Reg::GP), Register(Reg::GPR, Reg::A0))) {
lg::print("[iim] block 0 op 0 bad in {}: {}\n", aos.ops.at(op_idx)->to_string(env),
function.name());
return -1;
}
op_idx++;
auto br = dynamic_cast<BranchOp*>(aos.ops.at(op_idx).get());
if (!br) {
lg::print("[iim] block 0 op 1 bad in {}: {} (not branch)\n", aos.ops.at(1)->to_string(env),
function.name());
return -1;
}
if (br->likely() || br->condition().kind() != IR2_Condition::Kind::TRUTHY ||
!br->condition().src(0).is_var() ||
br->condition().src(0).var().reg() != Register(Reg::GPR, Reg::GP) ||
br->branch_delay().kind() != IR2_BranchDelay::Kind::SET_REG_FALSE ||
br->branch_delay().var(0).reg() != Register(Reg::GPR, Reg::V1)) {
lg::print("[iim] block 0 op 1 bad in {}: {} (bad branch)\n", aos.ops.at(1)->to_string(env),
function.name());
return -1;
}
op_idx++;
// block 1:
/*
* (set! gp gp)
* (b! #t L371 (nop!))
*/
if (aos.block_id_to_end_atomic_op.at(1) != 4) {
lg::print("[iim] block 1 had the wrong number of ops: {} for {}\n",
aos.block_id_to_end_atomic_op.at(1), function.name());
return -1;
}
if (!is_op_2(aos.ops.at(op_idx).get(), SimpleExpression::Kind ::IDENTITY,
Register(Reg::GPR, Reg::GP), Register(Reg::GPR, Reg::GP))) {
lg::print("[iim] op 2 bad in {}: {}\n", aos.ops.at(op_idx)->to_string(env), function.name());
return -1;
}
op_idx++;
auto br2 = dynamic_cast<BranchOp*>(aos.ops.at(op_idx).get());
if (!br2) {
lg::print("[iim] op 3 bad in {}: {} (not branch)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
return -1;
}
if (br2->likely() || br2->condition().kind() != IR2_Condition::Kind::ALWAYS ||
br2->branch_delay().kind() != IR2_BranchDelay::Kind::NOP) {
lg::print("[iim] op3 bad in {}: {} (bad branch)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
return -1;
}
op_idx++;
// setup
/*
(set! v1 0)
(set! t9 format)
(set! a0 #t)
(set! a1 L386)
(set! a2 gp)
(set! a3 'vector) (also can be (set! a3 (l.wu (+ gp -4))))
(call!)
*/
if (!is_set_reg_to_int(aos.ops.at(op_idx).get(), Register(Reg::GPR, Reg::V1), 0)) {
lg::print("[iim] op4 bad in {}: {} (bad set 0)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
}
op_idx++;
if (!is_set_reg_to_symbol_value(aos.ops.at(op_idx).get(), Register(Reg::GPR, Reg::T9),
"format")) {
lg::print("[iim] op5 bad in {}: {} (bad set format)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
}
op_idx++;
if (!is_set_reg_to_symbol_ptr(aos.ops.at(op_idx).get(), Register(Reg::GPR, Reg::A0), "#t")) {
lg::print("[iim] op6 bad in {}: {} (bad set #t)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
}
op_idx++;
auto type_name_str =
get_string_loaded_to_reg(aos.ops.at(op_idx).get(), Register(Reg::GPR, Reg::A1), file);
if (!type_name_str) {
lg::print("[iim] op7 bad in {}: {} (bad string)\n", aos.ops.at(op_idx)->to_string(env),
function.name());
} else if (type_name_str != "[~8x] ~A~%") {
lg::print("[iim] op7 bad in {}: {} (bad string: {})\n", aos.ops.at(op_idx)->to_string(env),
function.name(), *type_name_str);
}
op_idx++;
if (!is_op_2(aos.ops.at(op_idx).get(), SimpleExpression::Kind ::IDENTITY,
Register(Reg::GPR, Reg::A2), Register(Reg::GPR, Reg::GP))) {
lg::print("[iim] op 8 bad in {}: {}\n", aos.ops.at(op_idx)->to_string(env), function.name());
return -1;
}
op_idx++;
if (is_set_reg_to_symbol_ptr(aos.ops.at(op_idx).get(), Register(Reg::GPR, Reg::A3), type_name)) {
result->is_basic = false;
} else if (aos.ops.at(op_idx)->to_string(env) == "(set! a3 (l.wu (+ gp -4)))" ||
aos.ops.at(op_idx)->to_string(env) == "(set! a3-0 (l.wu (+ a0-0 -4)))" ||
aos.ops.at(op_idx)->to_string(env) == "(set! a3-0 (l.wu (+ obj -4)))") {
result->is_basic = true;
} else {
lg::print("[iim] op 9 bad in {}: {}\n", aos.ops.at(op_idx)->to_string(env), function.name());
return -1;
}
op_idx++;
if (!dynamic_cast<CallOp*>(aos.ops.at(op_idx).get())) {
lg::print("[iim] op 10 bad in {}: {}\n", aos.ops.at(op_idx)->to_string(env), function.name());
return -1;
}
op_idx++;
return op_idx;
}
std::pair<Register, int> get_base_of_load(const SimpleExpression& load_addr) {
if (load_addr.kind() == SimpleExpression::Kind::IDENTITY) {
const auto& src = load_addr.get_arg(0);
if (src.is_var()) {
return {src.var().reg(), 0};
}
}
if (load_addr.kind() == SimpleExpression::Kind::ADD) {
const auto& src0 = load_addr.get_arg(0);
const auto& src1 = load_addr.get_arg(1);
if (src1.get_kind() == SimpleAtom::Kind::INTEGER_CONSTANT &&
src0.get_kind() == SimpleAtom::Kind::VARIABLE) {
return {src0.var().reg(), src1.get_int()};
}
}
ASSERT(false);
}
bool is_load_with_base(const SimpleExpression& expr, Register base) {
return get_base_of_load(expr).first == base;
}
bool is_get_load(AtomicOp* ir, Register dst, Register base) {
auto as_set = dynamic_cast<LoadVarOp*>(ir);
return as_set && as_set->get_set_destination().reg() == dst &&
is_load_with_base(as_set->src(), base);
}
struct LoadInfo {
int offset = 0;
int size = 0;
LoadVarOp::Kind kind;
};
LoadInfo get_load_info_from_set(AtomicOp* load) {
auto as_load = dynamic_cast<LoadVarOp*>(load);
ASSERT(as_load);
LoadInfo info;
info.kind = as_load->kind();
info.size = as_load->size();
auto base = get_base_of_load(as_load->src());
info.offset = base.second;
return info;
}
int identify_int_field(int idx,
Function& function,
TypeInspectorResult* result,
FieldPrint& print_info) {
auto load_info = get_load_info_from_set(function.ir2.atomic_ops->ops.at(idx++).get());
std::string field_type_name;
if (load_info.kind == LoadVarOp::Kind::UNSIGNED) {
field_type_name += "u";
} else if (load_info.kind == LoadVarOp::Kind::FLOAT) {
ASSERT(false); // ...
}
field_type_name += "int";
switch (load_info.size) {
case 1:
field_type_name += "8";
break;
case 2:
field_type_name += "16";
break;
case 4:
field_type_name += "32";
break;
case 8:
field_type_name += "64";
break;
case 16:
field_type_name += "128";
break;
default:
throw std::runtime_error("unknown load op size in identify int field " +
std::to_string((int)load_info.size));
}
if (print_info.format == 'e') {
field_type_name = "seconds";
ASSERT(load_info.size == 8);
}
int offset = load_info.offset;
if (result->is_basic) {
offset += BASIC_OFFSET;
}
Field field(print_info.field_name, TypeSpec(field_type_name), offset);
result->fields_of_type.push_back(field);
return idx;
}
int identify_float_field(int idx,
Function& function,
TypeInspectorResult* result,
FieldPrint& print_info) {
auto load_info = get_load_info_from_set(function.ir2.atomic_ops->ops.at(idx++).get());
ASSERT(load_info.size == 4);
ASSERT(load_info.kind == LoadVarOp::Kind::FLOAT);
auto& float_move = function.ir2.atomic_ops->ops.at(idx++);
if (!is_op_2(float_move.get(), SimpleExpression::Kind::FPR_TO_GPR, make_gpr(Reg::A2),
make_fpr(0))) {
printf("bad float move: %s\n", float_move->to_string(function.ir2.env).c_str());
ASSERT(false);
}
std::string type;
switch (print_info.format) {
case 'f':
type = "float";
break;
case 'm':
type = "meters";
break;
case 'r':
type = "deg";
break;
case 'X':
type = "float";
result->warnings += "field " + print_info.field_name + " is a float printed as hex? ";
break;
default:
ASSERT(false);
}
int offset = load_info.offset;
if (result->is_basic) {
offset += BASIC_OFFSET;
}
Field field(print_info.field_name, TypeSpec(type), offset);
result->fields_of_type.push_back(field);
return idx;
}
int identify_pointer_field(int idx,
Function& function,
TypeInspectorResult* result,
FieldPrint& print_info) {
auto load_info = get_load_info_from_set(function.ir2.atomic_ops->ops.at(idx++).get());
ASSERT(load_info.size == 4);
ASSERT(load_info.kind == LoadVarOp::Kind::UNSIGNED);
int offset = load_info.offset;
if (result->is_basic) {
offset += BASIC_OFFSET;
}
Field field(print_info.field_name, TypeSpec("pointer"), offset);
result->fields_of_type.push_back(field);
return idx;
}
bool get_ptr_offset_constant_nonzero(const SimpleExpression& math, Register base, int* result) {
// if (!is_reg(math->arg0.get(), base)) {
if (!math.get_arg(0).is_var() || math.get_arg(0).var().reg() != base) {
return false;
}
if (!math.get_arg(1).is_int()) {
return false;
}
*result = math.get_arg(1).get_int();
return true;
}
bool get_ptr_offset(AtomicOp* ir, Register dst, Register base, int* result) {
auto as_set = dynamic_cast<SetVarOp*>(ir);
if (!as_set) {
return false;
}
if (as_set->dst().reg() != dst) {
return false;
}
return get_ptr_offset_constant_nonzero(as_set->src(), base, result);
}
int identify_array_field(int idx,
Function& function,
TypeInspectorResult* result,
FieldPrint& print_info) {
AtomicOp* get_op;
// dynamic array with ~D inspect print
if (print_info.array_size == FieldPrint::DYNAMIC_ARRAY) {
idx++;
get_op = function.ir2.atomic_ops->ops.at(idx).get();
} else {
get_op = function.ir2.atomic_ops->ops.at(idx++).get();
}
int offset = 0;
bool ptr;
if (print_info.array_size == FieldPrint::DYNAMIC_ARRAY) {
ptr = get_ptr_offset(get_op, make_gpr(Reg::A3), make_gpr(Reg::GP), &offset);
} else {
ptr = get_ptr_offset(get_op, make_gpr(Reg::A2), make_gpr(Reg::GP), &offset);
}
if (!ptr) {
printf("bad get ptr offset %s\n", get_op->to_string(function.ir2.env).c_str());
ASSERT(false);
}
if (result->is_basic) {
offset += BASIC_OFFSET;
}
Field field(print_info.field_name, TypeSpec("UNKNOWN"), offset);
if (print_info.array_size > 0) {
field.set_array(print_info.array_size);
} else {
field.set_dynamic();
}
result->fields_of_type.push_back(field);
return idx;
}
int identify_struct_not_inline_field(int idx,
Function& function,
TypeInspectorResult* result,
FieldPrint& print_info) {
auto load_info = get_load_info_from_set(function.ir2.atomic_ops->ops.at(idx++).get());
if (!(load_info.size == 4 && load_info.kind == LoadVarOp::Kind::UNSIGNED)) {
result->warnings += "field " + print_info.field_type_name + " is likely a value type. ";
}
int offset = load_info.offset;
if (result->is_basic) {
offset += BASIC_OFFSET;
}
Field field(print_info.field_name, TypeSpec(print_info.field_type_name), offset);
result->fields_of_type.push_back(field);
return idx;
}
int identify_struct_inline_field(int idx,
Function& function,
TypeInspectorResult* result,
FieldPrint& print_info) {
auto& get_op = function.ir2.atomic_ops->ops.at(idx++);
int offset = 0;
if (!get_ptr_offset(get_op.get(), make_gpr(Reg::A2), make_gpr(Reg::GP), &offset)) {
printf("bad get ptr offset %s\n", get_op->to_string(function.ir2.env).c_str());
// ASSERT(false);
}
if (result->is_basic) {
offset += BASIC_OFFSET;
}
Field field(print_info.field_name, TypeSpec(print_info.field_type_name), offset);
field.set_inline();
result->fields_of_type.push_back(field);
return idx;
}
int identify_basic_field(int idx,
Function& function,
LinkedObjectFile& file,
TypeInspectorResult* result,
FieldPrint& print_info) {
(void)file;
auto load_info = get_load_info_from_set(function.ir2.atomic_ops->ops.at(idx++).get());
ASSERT(load_info.kind == LoadVarOp::Kind::UNSIGNED || load_info.kind == LoadVarOp::Kind::SIGNED);
TypeSpec field_type("basic");
if (load_info.size == 8) {
result->warnings += "field " + print_info.field_name + " uses ~A with a 64-bit load. ";
field_type = TypeSpec("uint64");
} else if (load_info.size == 4) {
// I wonder if this actually "object", or some other type? It seems to be
if (load_info.kind == LoadVarOp::Kind::SIGNED) {
result->warnings += "field " + print_info.field_name + " uses ~A with a signed load. ";
}
} else {
ASSERT(false);
}
int offset = load_info.offset;
if (result->is_basic) {
offset += BASIC_OFFSET;
}
Field field(print_info.field_name, field_type, offset);
result->fields_of_type.push_back(field);
return idx;
}
int identify_string_field(int idx,
Function& function,
LinkedObjectFile& file,
TypeInspectorResult* result,
FieldPrint& print_info) {
(void)file;
auto load_info = get_load_info_from_set(function.ir2.atomic_ops->ops.at(idx++).get());
ASSERT(load_info.kind == LoadVarOp::Kind::UNSIGNED || load_info.kind == LoadVarOp::Kind::SIGNED);
TypeSpec field_type("string");
if (load_info.size == 8) {
result->warnings += "field " + print_info.field_name + " uses ~S with a 64-bit load. ";
field_type = TypeSpec("uint64");
} else if (load_info.size == 4) {
// I wonder if this actually "object", or some other type? It seems to be
if (load_info.kind == LoadVarOp::Kind::SIGNED) {
result->warnings += "field " + print_info.field_name + " uses ~S with a signed load. ";
}
} else {
ASSERT(false);
}
int offset = load_info.offset;
if (result->is_basic) {
offset += BASIC_OFFSET;
}
Field field(print_info.field_name, field_type, offset);
result->fields_of_type.push_back(field);
return idx;
}
int identify_cstring_field(int idx,
Function& function,
LinkedObjectFile& file,
TypeInspectorResult* result,
FieldPrint& print_info) {
(void)file;
auto& get_op = function.ir2.atomic_ops->ops.at(idx++);
// assuming unknown array size at first
int size = FieldPrint::UNKNOWN_ARR_SIZE;
int offset = 0;
std::string comment;
// usually either a daddiu or lq
if (!get_ptr_offset(get_op.get(), make_gpr(Reg::A2), make_gpr(Reg::GP), &offset)) {
// daddiu failed, try lq
auto load_info = get_load_info_from_set(get_op.get());
if (load_info.size == 16) {
size = 16;
offset = load_info.offset;
comment = "field uses ~g print with a quadword load!";
} else {
printf("bad get ptr offset %s\n", get_op->to_string(function.ir2.env).c_str());
ASSERT(false);
}
}
if (result->is_basic) {
offset += BASIC_OFFSET;
}
Field field(print_info.field_name, TypeSpec("uint8"), offset);
field.set_array(size);
if (!comment.empty()) {
field.set_comment(comment);
}
result->fields_of_type.push_back(field);
return idx;
}
int detect(int idx, Function& function, LinkedObjectFile& file, TypeInspectorResult* result) {
auto& get_format_op = function.ir2.atomic_ops->ops.at(idx++);
if (!is_set_reg_to_symbol_value(get_format_op.get(), make_gpr(Reg::T9), "format")) {
return idx;
ASSERT_MSG(false,
fmt::format("bad get format: {}\n", get_format_op->to_string(function.ir2.env)));
}
auto& get_true = function.ir2.atomic_ops->ops.at(idx++);
if (!is_set_reg_to_symbol_ptr(get_true.get(), make_gpr(Reg::A0), "#t")) {
ASSERT_MSG(false, "bad get true");
}
auto sstr = get_string_loaded_to_reg(function.ir2.atomic_ops->ops.at(idx++).get(),
make_gpr(Reg::A1), file);
if (!sstr) {
ASSERT_MSG(false, "bad sstr");
}
// hack to ignore format print from enum->string and other unexpected stuff
if (sstr->substr(0, 1) != "~" || sstr == "~T [~D]~2Tactor-group: ~`actor-group`P~%" ||
sstr == "~T [~D]~2Tbuffer: ~A~%") {
return idx;
}
auto info = get_field_print(*sstr);
auto& first_get_op = function.ir2.atomic_ops->ops.at(idx);
// v1 load (process pointer):
// lw t9, format(s7)
// daddiu a0, s7, #t
// daddiu a1, fp, L389
// lwu v1, 12(gp)
// beq s7, v1, L281
// or a2, s7, r0
// B1:
// lwu v1, 0(v1)
// lwu a2, 28(v1)
// B2:
// L281:
// jalr ra, t9
auto load_a2_gp = is_get_load(first_get_op.get(), make_gpr(Reg::A2), make_gpr(Reg::GP));
auto load_v1_gp = is_get_load(first_get_op.get(), make_gpr(Reg::V1), make_gpr(Reg::GP));
if ((load_a2_gp || load_v1_gp) &&
(info.format == 'D' || info.format == 'd' || info.format == 'X' || info.format == 'e') &&
!info.has_array && info.field_type_name.empty()) {
idx = identify_int_field(idx, function, result, info);
// it's a load!
} else if (is_get_load(first_get_op.get(), make_fpr(0), make_gpr(Reg::GP)) &&
(info.format == 'f' || info.format == 'm' || info.format == 'r' ||
info.format == 'X') &&
!info.has_array && info.field_type_name.empty()) {
idx = identify_float_field(idx, function, result, info);
} else if ((load_a2_gp || load_v1_gp) && info.format == 'A' && !info.has_array &&
info.field_type_name.empty()) {
idx = identify_basic_field(idx, function, file, result, info);
} else if ((load_a2_gp || load_v1_gp) && info.format == 'S' && !info.has_array &&
info.field_type_name.empty()) {
idx = identify_string_field(idx, function, file, result, info);
} else if ((load_a2_gp || load_v1_gp) && (info.format == 'G' || info.format == 'g') &&
!info.has_array && info.field_type_name.empty()) {
idx = identify_cstring_field(idx, function, file, result, info);
} else if ((load_a2_gp || load_v1_gp) && info.format == 'X' && !info.has_array &&
info.field_type_name.empty()) {
idx = identify_pointer_field(idx, function, result, info);
} else if (info.has_array && (info.format == 'X' || info.format == 'P') &&
info.field_type_name.empty()) {
idx = identify_array_field(idx, function, result, info);
} else if (!info.has_array && (info.format == 'X' || info.format == 'P') &&
!info.field_type_name.empty()) {
// structure.
if (is_get_load(first_get_op.get(), make_gpr(Reg::A2), make_gpr(Reg::GP))) {
// not inline
idx = identify_struct_not_inline_field(idx, function, result, info);
} else {
idx = identify_struct_inline_field(idx, function, result, info);
}
}
else {
printf("couldn't do %s, %s, adding unknown field\n", sstr->c_str(),
first_get_op->to_string(function.ir2.env).c_str());
// if all else fails, create an unknown field so the rest of the inspect can pass.
Field unknown("UNKNOWN", TypeSpec("UNKNOWN"), -1);
unknown.set_comment("field could not be read.");
result->fields_of_type.push_back(unknown);
return idx;
}
CallOp* call_op;
if (load_v1_gp) {
call_op = dynamic_cast<CallOp*>(function.ir2.atomic_ops->ops.at(idx = idx + 3).get());
} else {
// dynamic array with ~D inspect print
if (info.array_size == FieldPrint::DYNAMIC_ARRAY) {
idx++;
call_op = dynamic_cast<CallOp*>(function.ir2.atomic_ops->ops.at(idx++).get());
} else {
call_op = dynamic_cast<CallOp*>(function.ir2.atomic_ops->ops.at(idx++).get());
}
}
// inspect strings like "#x~X : ~S~%" load the field twice, once into a2, then into v1,
// then have a bunch of string branches, so we just skip this, since we already have the field.
//
// lw t9, format(s7) ;; [218] (set! t9 format)
// daddiu a0, s7, #t ;; [219] (set! a0 #t)
// daddiu a1, fp, L503 ;; [220] (set! a1 L503) "~1Tclass: #x~X : ~S~%"
// lhu a2, 26(gp) ;; [221] (set! a2 (l.hu (+ gp 26)))
// lhu v1, 26(gp) ;; [222] (set! v1 (l.hu (+ gp 26)))
// addiu a3, r0, 149 ;; [223] (set! a3 149)
// bne v1, a3, L70 ;; [224] (b! (!= v1 a3) L70 (nop!))
// sll r0, r0, 0
if (load_a2_gp) {
auto load_v1_gp_again = is_get_load(function.ir2.atomic_ops->ops.at(idx - 1).get(),
make_gpr(Reg::V1), make_gpr(Reg::GP));
if (load_v1_gp_again) {
return idx;
}
}
if (!call_op) {
printf("bad call\n");
// ASSERT(false);
return -1;
}
return idx;
}
std::string inspect_inspect_method(Function& inspect_method,
const std::string& type_name,
DecompilerTypeSystem& dts,
LinkedObjectFile& file,
DecompilerTypeSystem& previous_game_ts,
TypeInspectorCache& ti_cache,
ObjectFileDB::PerObjectAllTypeInfo& object_file_meta) {
lg::print(" iim: {}\n", inspect_method.name());
TypeInspectorResult result;
ASSERT(type_name == inspect_method.guessed_name.type_name);
TypeFlags flags;
flags.flag = 0;
result.found_flags = dts.lookup_flags(type_name, &flags.flag);
result.type_name = type_name;
result.parent_type_name = dts.lookup_parent_from_inspects(type_name);
result.flags = flags.flag;
result.type_size = flags.size;
result.type_method_count = flags.methods;
// ignore duplicate inspects
if (ti_cache.previous_results.find(type_name) != ti_cache.previous_results.end() &&
!(std::find(g_duplicate_inspects_jak3.begin(), g_duplicate_inspects_jak3.end(), type_name) !=
g_duplicate_inspects_jak3.end())) {
return fmt::format(";; {} is already defined!\n", type_name);
}
// Only set heap-base if it's different from the automatic one
// A child (or child of a child) of process ALWAYS has heap-base set.
if (flags.heap_base > 0) {
auto process_type = dts.ts.get_type_of_type<BasicType>("process");
auto auto_hb = (flags.size - process_type->size() + 0xf) & ~0xf;
if (auto_hb != flags.heap_base) {
result.type_heap_base = std::make_optional(flags.heap_base);
}
}
{
TypeFlags parent_flags;
parent_flags.flag = 0;
if (result.parent_type_name != "UNKNOWN" &&
dts.lookup_flags(result.parent_type_name, &parent_flags.flag)) {
result.parent_method_count = parent_flags.methods;
}
}
if (!result.found_flags) {
lg::print("[iim] no flags found for {}, maybe defined in the kernel\n", type_name);
}
result.parent_type_name = dts.lookup_parent_from_inspects(type_name);
int idx = get_start_idx(inspect_method, file, &result, result.parent_type_name, type_name,
inspect_method.ir2.env);
if (idx < 0) {
idx = get_start_idx_process(inspect_method, result.parent_type_name, inspect_method.ir2.env,
&result);
}
StructureType* old_game_type = nullptr;
if (previous_game_ts.ts.fully_defined_type_exists(type_name)) {
old_game_type = dynamic_cast<StructureType*>(previous_game_ts.ts.lookup_type(type_name));
}
if (idx <= 0) {
// can't get any field...
result.warnings += "Failed to read fields.";
idx = -2;
ti_cache.previous_results[type_name] = result;
return result.print_as_deftype(old_game_type, ti_cache.previous_results, previous_game_ts,
object_file_meta);
}
while (idx < int(inspect_method.ir2.atomic_ops->ops.size()) - 2 && idx != -1) {
// skip over non-format calls in inspects
auto sstr = inspect_method.ir2.atomic_ops->ops.at(idx)->to_string(inspect_method.ir2.env);
if (sstr.substr(sstr.size() - 7) != "format)") {
idx++;
continue;
}
idx = detect(idx, inspect_method, file, &result);
}
if (idx == -1) {
result.warnings += "Failed to read some fields.";
}
ti_cache.previous_results[type_name] = result;
return result.print_as_deftype(old_game_type, ti_cache.previous_results, previous_game_ts,
object_file_meta);
}
std::string old_method_string(const MethodInfo& info, const bool omit_comment = false) {
if (info.type.arg_count() > 0) {
if (info.type.base_type() == "function" || info.type.base_type() == "state") {
std::string result;
if (omit_comment) {
result = fmt::format(" ({} (", info.name);
} else {
result = fmt::format(" ;; ({} (", info.name);
}
bool add = false;
for (int i = 0; i < (int)info.type.arg_count() - 1; i++) {
result += info.type.get_arg(i).print();
result += ' ';
add = true;
}
if (add) {
result.pop_back();
}
result += ") ";
result += info.type.get_arg(info.type.arg_count() - 1).print();
if (info.type.base_type() == "state") {
result += " :state";
}
result += ")";
return result;
}
}
if (omit_comment) {
return fmt::format(" ({} {}) weird method", info.name, info.type.print());
}
return fmt::format(" ;; ({} {}) weird method", info.name, info.type.print());
}
bool allow_guess(const Field& field) {
// allow anything UNKNOWN because we have no idea
if (field.type().base_type() == "UNKNOWN") {
return true;
}
// don't allow known inline's because we get that right.
if (field.is_inline()) {
return false;
}
auto typ = field.type().print();
if (typ == "basic" || typ == "uint32") {
return true;
}
return false;
}
/*
* old_game_type may be null
*/
std::string TypeInspectorResult::print_as_deftype(
StructureType* old_game_type,
std::unordered_map<std::string, TypeInspectorResult>& previous_results,
DecompilerTypeSystem& previous_game_ts,
ObjectFileDB::PerObjectAllTypeInfo& object_file_meta) {
std::string result;
result += "#|\n";
result += fmt::format("(deftype {} ({})\n (", type_name, parent_type_name);
int longest_field_name = 0;
int longest_type_name = 0;
int longest_mods = 0;
std::string inline_string = ":inline";
std::string dynamic_string = ":dynamic";
std::vector<bool> needed, was_guess;
{
const auto& prev_it = previous_results.find(parent_type_name);
if (prev_it != previous_results.end()) {
auto& prev_fields = prev_it->second.fields_of_type;
for (auto& field : fields_of_type) {
auto field_it = std::find(prev_fields.begin(), prev_fields.end(), field);
needed.push_back(field_it == prev_fields.end());
}
} else {
needed.resize(fields_of_type.size(), true);
}
}
for (auto& field : fields_of_type) {
if (!allow_guess(field)) {
was_guess.push_back(false);
continue;
}
if (old_game_type) {
Field old_field;
if (old_game_type->lookup_field(field.name(), &old_field) &&
field.type() != old_field.type()) {
field.type() = old_field.type();
was_guess.push_back(true);
} else {
was_guess.push_back(false);
}
} else {
was_guess.push_back(false);
}
}
for (size_t field_idx = 0; field_idx < fields_of_type.size(); field_idx++) {
if (!needed[field_idx]) {
continue;
}
auto& field = fields_of_type[field_idx];
longest_field_name = std::max(longest_field_name, int(field.name().size()));
longest_type_name = std::max(longest_type_name, int(field.type().print().size()));
int mods = 0;
// mods are array size, :inline, :dynamic
if (field.is_array() && !field.is_dynamic()) {
// "??" for unknown array size
if (field.array_size() == FieldPrint::UNKNOWN_ARR_SIZE) {
mods += 2;
} else {
mods += std::to_string(field.array_size()).size();
}
}
if (field.is_inline()) {
if (mods) {
mods++; // space
}
mods += inline_string.size();
}
if (field.is_dynamic()) {
if (mods) {
mods++; // space
}
mods += dynamic_string.size();
}
longest_mods = std::max(longest_mods, mods);
}
for (size_t field_idx = 0; field_idx < fields_of_type.size(); field_idx++) {
if (!needed[field_idx]) {
continue;
}
auto& field = fields_of_type[field_idx];
result += "(";
result += field.name();
result.append(1 + (longest_field_name - int(field.name().size())), ' ');
result += field.type().print();
result.append(1 + (longest_type_name - int(field.type().print().size())), ' ');
std::string mods;
if (field.is_array() && !field.is_dynamic()) {
if (field.array_size() == FieldPrint::UNKNOWN_ARR_SIZE) {
mods += "??";
mods += " ";
} else {
mods += std::to_string(field.array_size());
mods += " ";
}
}
if (field.is_inline()) {
mods += inline_string;
mods += " ";
}
if (field.is_dynamic()) {
mods += dynamic_string;
mods += " ";
}
result.append(mods);
result.append(longest_mods - int(mods.size() - 1), ' ');
result.append(":offset-assert ");
result.append(std::to_string(field.offset()));
result.append(")");
if (old_game_type) {
Field old_field;
if (old_game_type->lookup_field(field.name(), &old_field)) {
if (old_field.type() != field.type()) {
result += fmt::format(" ;; {}", old_field.type().print());
if (old_field.is_array() && !old_field.is_dynamic()) {
result += fmt::format(" {}", old_field.array_size());
}
if (old_field.is_inline()) {
result += " :inline";
}
if (old_field.is_dynamic()) {
result += " :dynamic";
}
}
}
}
if (field.has_comment()) {
result += fmt::format(" ;; {}", field.comment());
}
if (was_guess[field_idx]) {
result += " ;; guessed by decompiler";
}
result.append("\n ");
}
result.append(")\n");
result.append(fmt::format(" :method-count-assert {}\n", type_method_count));
result.append(fmt::format(" :size-assert #x{:x}\n", type_size));
if (type_heap_base.has_value()) {
result.append(fmt::format(" :heap-base #x{:x}\n", type_heap_base.value()));
}
result.append(fmt::format(" :flag-assert #x{:x}\n ", flags));
if (!warnings.empty()) {
result.append(";; ");
result.append(warnings);
result.append("\n ");
}
std::string state_methods_list;
std::unordered_map<int, std::string> method_states = {};
if (object_file_meta.state_methods.count(type_name) != 0) {
method_states = object_file_meta.state_methods.at(type_name);
for (const auto& [method_id, state_name] : method_states) {
MethodInfo info;
state_methods_list += fmt::format(" {} ;; {}", state_name, method_id);
if (old_game_type && old_game_type->get_my_method(method_id, &info)) {
state_methods_list += ", old:" + old_method_string(info, true);
}
state_methods_list += "\n";
}
}
std::string methods_list;
if (type_method_count > 9) {
MethodInfo old_new_method;
if (old_game_type && old_game_type->get_my_new_method(&old_new_method)) {
methods_list.append(" (new (symbol type) _type_) ;; 0");
methods_list.append(old_method_string(old_new_method));
methods_list.push_back('\n');
}
for (int i = parent_method_count; i < type_method_count; i++) {
// If it's a state-method (virtual state) skip it
if (method_states.find(i) != method_states.end()) {
continue;
}
methods_list.append(fmt::format(" ({}-method-{} () none) ;; {}", type_name, i, i));
if (old_game_type) {
MethodInfo info;
if (old_game_type->get_my_method(i, &info)) {
methods_list += old_method_string(info);
}
}
methods_list.push_back('\n');
}
}
// non-virtual states
std::string non_virtual_states_list;
for (const auto& [state_name, guessed_type_name] : object_file_meta.non_virtual_state_guesses) {
if (type_name == guessed_type_name) {
std::string line;
line += fmt::format(" {}", state_name);
auto it = previous_game_ts.symbol_types.find(state_name);
if (it != previous_game_ts.symbol_types.end()) {
line += fmt::format(" ;; associated process guessed by decompiler, old: {}",
it->second.print());
}
non_virtual_states_list.append(line + "\n");
}
}
// methods and virtual states
if (!methods_list.empty()) {
result.append("(:methods\n");
result.append(methods_list);
result.append(" )\n ");
}
if (!state_methods_list.empty()) {
result.append("(:state-methods\n");
result.append(state_methods_list);
result.append(" )\n ");
}
// non-virtual states
if (!non_virtual_states_list.empty()) {
result.append("(:states\n");
result.append(non_virtual_states_list);
result.append(" )\n ");
}
result.append(")\n");
result += "|#\n";
return result;
}
std::string get_regex_match(const std::string& form, const std::regex& regex) {
std::smatch matches;
if (std::regex_search(form, matches, regex)) {
if (matches.size() == 2) {
return matches[1];
}
}
return "";
}
std::string get_state_symbol_name(LinkedObjectFile& file, const std::string& label_name) {
try {
auto& label = file.get_label_by_name(label_name);
auto& label_words = file.words_by_seg.at(label.target_segment);
int start_word_idx = (label.offset / 4) - 1;
auto& first_word = label_words.at(start_word_idx);
if (first_word.kind() != LinkedWord::TYPE_PTR || first_word.symbol_name() != "state") {
return "";
}
auto& name_word = label_words.at(start_word_idx + 1);
if (name_word.kind() != LinkedWord::SYM_PTR) {
return "";
}
return name_word.symbol_name();
} catch (std::exception& e) {
return "";
}
}
std::string get_label_type_name(LinkedObjectFile& file, const std::string& label_name) {
try {
auto& label = file.get_label_by_name(label_name);
auto& label_words = file.words_by_seg.at(label.target_segment);
int start_word_idx = (label.offset / 4) - 1;
auto& first_word = label_words.at(start_word_idx);
if (first_word.kind() != LinkedWord::TYPE_PTR) {
return "";
}
return first_word.symbol_name();
} catch (std::exception& e) {
return "";
}
}
void inspect_top_level_for_metadata(Function& top_level,
LinkedObjectFile& file,
DecompilerTypeSystem& /*dts*/,
DecompilerTypeSystem& /*previous_game_ts*/,
ObjectFileDB::PerObjectAllTypeInfo& objectFile) {
// State as a method:
/*
lui v1, L267 ;; [ 77] (set! gp-0 L267) [] -> [gp: <uninitialized> ]
ori gp, v1, L267
lw t9, method-set!(s7) ;; [ 78] (set! t9-12 method-set!) [] -> [t9: <uninitialized> ]
lw a0, com-airlock(s7) ;; [ 79] (set! a0-12 com-airlock) [] -> [a0: <uninitialized> ]
addiu a1, r0, 21 ;; [ 80] (set! a1-10 21) [] -> [a1: <uninitialized> ]
or a2, gp, r0 ;; [ 81] (set! a2-10 gp-0) [gp: <uninitialized> ] -> [a2:
<uninitialized> ]
*/
// State as symbol:
/*
lui v1, L753 ;; [354] (set! v1-38 L753) [] -> [v1: <uninitialized> ]
ori v1, v1, L753
sw v1, target-roll(s7) ;; [355] (s.w! target-roll v1-38) [v1: <uninitialized> ] -> []
*/
if (!top_level.ir2.atomic_ops) {
return;
}
// Check for non-method states
std::string last_seen_label;
// TODO - safely increment op number
for (int i = 0; i < (int)top_level.ir2.atomic_ops->ops.size(); i++) {
const auto& aop = top_level.ir2.atomic_ops->ops.at(i);
const std::string as_str = aop.get()->to_string(top_level.ir2.env);
// Keep track of the last seen label so we can easily reference it if a later operation uses
// it
auto label_match = get_regex_match(as_str, std::regex("\\(set!\\s[^\\s]*\\s(L.*)\\)"));
if (!label_match.empty()) {
last_seen_label = label_match;
// Check if the next operation is storing the label
std::string curr_op =
top_level.ir2.atomic_ops->ops.at(i + 1).get()->to_string(top_level.ir2.env);
auto symbol_name = get_regex_match(curr_op, std::regex("\\(s\\.w!\\s([^\\(\\)\\s]*)\\s"));
if (symbol_name.empty()) {
continue;
}
// Check that the label is a state
auto label_type_name = get_label_type_name(file, last_seen_label);
if (label_type_name.empty()) {
continue;
}
objectFile.symbol_types[symbol_name] = label_type_name;
}
if (as_str.find("method-set!") != std::string::npos) {
// The next operation should have the type name
i++;
std::string curr_op = top_level.ir2.atomic_ops->ops.at(i).get()->to_string(top_level.ir2.env);
auto type_match = get_regex_match(curr_op, std::regex("\\(set!\\s[^\\s]*\\s(.*)\\)"));
if (type_match.empty()) {
continue;
}
i++;
// The next operation should have the method id
curr_op = top_level.ir2.atomic_ops->ops.at(i).get()->to_string(top_level.ir2.env);
auto method_id_match = get_regex_match(curr_op, std::regex("\\(set!\\s[^\\s]*\\s(\\d*)\\)"));
if (method_id_match.empty()) {
continue;
}
int method_id = std::stoi(method_id_match);
// Now check the last seen label to see if it's a state
auto state_name = get_state_symbol_name(file, last_seen_label);
if (state_name.empty()) {
continue;
}
// Ensure there are no labels between now and when the `method-set!` is actually called
bool was_another_label = false;
for (int j = i; j < (int)top_level.ir2.atomic_ops->ops.size(); j++) {
const auto& temp_aop = top_level.ir2.atomic_ops->ops.at(j);
const std::string temp_as_str = temp_aop.get()->to_string(top_level.ir2.env);
if (temp_as_str.find("call!") != std::string::npos) {
break;
}
auto temp_label_match =
get_regex_match(temp_as_str, std::regex("\\(set!\\s[^\\s]*\\s(L.*)\\)"));
if (!temp_label_match.empty()) {
was_another_label = true;
break;
}
}
if (!was_another_label) {
objectFile.state_methods[type_match][method_id] = state_name;
}
}
}
// Check for types
// if there's no inspect method, we can use just use the call to the type's new method
// to find the type
for (int i = 0; i < ((int)top_level.ir2.atomic_ops->ops.size()) - 5; i++) {
// lw v1, type(s7) ;; [ 20] (set! v1-10 type) [] -> [v1: <the etype type> ]
const auto& aop_0 = top_level.ir2.atomic_ops->ops.at(i);
if (!is_set_reg_to_symbol_value(aop_0.get(), {}, "type")) {
continue;
}
// lwu t9, 16(v1) ;; [ 21] (set! t9-0 (l.wu (+ v1-10 16)))
// ;; [v1: <the etype type> ] -> [t9: (function symbol type int
// type)
const auto& aop_1 = top_level.ir2.atomic_ops->ops.at(i + 1);
if (!is_set_reg_to_load(aop_1.get(), Register(Reg::GPR, Reg::T9), 16)) {
continue;
}
// daddiu a0, s7, float-type ;; [ 22] (set! a0-0 'float-type) [] -> [a0: symbol ]
const auto& aop_2 = top_level.ir2.atomic_ops->ops.at(i + 2);
auto type_name = get_set_reg_to_symbol_ptr(aop_2.get(), Register(Reg::GPR, Reg::A0));
if (!type_name) {
continue;
}
// lw a1, uint32(s7) ;; [ 23] (set! a1-0 uint32) [] -> [a1: <the etype uint32> ]
const auto& aop_3 = top_level.ir2.atomic_ops->ops.at(i + 3);
auto parent_name = get_set_reg_to_symbol_value(aop_3.get(), Register(Reg::GPR, Reg::A1));
if (!parent_name) {
continue;
}
// ld a2, L117(fp) ;; [ 24] (set! a2-0 (l.d L117)) [] -> [a2: uint ]
const auto& aop_4 = top_level.ir2.atomic_ops->ops.at(i + 4);
auto flags = get_set_reg_to_u64_load(aop_4.get(), Register(Reg::GPR, Reg::A2), file);
if (!flags) {
// far label load
// lui v1, L1352 ;; [ 24] (set! v1-10 L1352)
// ori v1, v1, L1352
// addu v1, fp, v1
// ld a2, 0(v1) ;; [ 25] (set! a2-0 (l.d v1-10))
flags = get_set_reg_to_lui(aop_4.get(), Register(Reg::GPR, Reg::V1), file);
if (!flags) {
continue;
}
}
// jalr ra, t9 ;; [ 25] (call! a0-0 a1-0 a2-0)
const auto& aop_5 = top_level.ir2.atomic_ops->ops.at(i + 5);
if (!dynamic_cast<CallOp*>(aop_5.get())) {
// far labels
const auto& aop_6 = top_level.ir2.atomic_ops->ops.at(i + 6);
if (!dynamic_cast<CallOp*>(aop_6.get())) {
continue;
}
}
if (objectFile.type_info.count(*type_name) == 0) {
objectFile.type_names_in_order.push_back(*type_name);
}
auto& info = objectFile.type_info[*type_name];
if (!info.from_inspect_method) {
// no inspect method! generate a deftype.
info.type_definition = fmt::format(
";; (deftype {} ({})\n"
";; ()\n"
";; :flag-assert #x{:x}\n"
";; )\n",
*type_name, *parent_name, *flags);
}
info.parent = *parent_name;
info.flags = *flags;
}
}
std::string inspect_top_level_symbol_defines(Function& top_level,
LinkedObjectFile& /*file*/,
DecompilerTypeSystem& dts,
DecompilerTypeSystem& previous_game_ts,
ObjectFileDB::PerObjectAllTypeInfo& object_file_meta) {
if (!top_level.ir2.atomic_ops) {
return {};
}
std::string result;
for (auto& aop : top_level.ir2.atomic_ops->ops) {
auto* as_store = dynamic_cast<StoreOp*>(aop.get());
if (as_store && as_store->addr().kind() == SimpleExpression::Kind::IDENTITY &&
as_store->addr().get_arg(0).is_sym_val()) {
auto& sym_name = as_store->addr().get_arg(0).get_str();
const auto sym_already_seen = object_file_meta.already_seen_symbols.find(sym_name) !=
object_file_meta.already_seen_symbols.end();
if (!sym_already_seen) {
object_file_meta.already_seen_symbols.insert(sym_name);
if (dts.ts.partially_defined_type_exists(sym_name)) {
continue;
}
std::string type_name = "object";
// Look to see if we know the type name
if (object_file_meta.symbol_types.count(sym_name) != 0) {
type_name = object_file_meta.symbol_types.at(sym_name);
}
result += fmt::format(";; (define-extern {} {})", sym_name, type_name);
auto it = previous_game_ts.symbol_types.find(sym_name);
if (it != previous_game_ts.symbol_types.end()) {
result += fmt::format(" ;; {}", it->second.print());
}
result += '\n';
}
}
}
return result;
}
} // namespace decompiler