diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h index 5c954eabac..e71ad1b292 100644 --- a/decompiler/IR2/AtomicOp.h +++ b/decompiler/IR2/AtomicOp.h @@ -9,12 +9,15 @@ #include "decompiler/Disasm/Instruction.h" #include "decompiler/IR2/IR2_common.h" #include "Env.h" +#include "common/util/CopyOnWrite.h" namespace decompiler { class FormElement; class ConditionElement; class FormPool; class DecompilerTypeSystem; +struct RegisterTypeState; +class InstrTypeState; /*! * An atomic operation represents a single operation from the point of view of the IR2 system. @@ -733,6 +736,7 @@ class StackSpillStoreOp : public AtomicOp { const Env& env, DecompilerTypeSystem& dts) override; void collect_vars(RegAccessSet& vars) const override; + int offset() const { return m_offset; } private: SimpleAtom m_value; diff --git a/decompiler/IR2/MultiTypeAnalysis.cpp b/decompiler/IR2/MultiTypeAnalysis.cpp index e13fab789d..cce3c19113 100644 --- a/decompiler/IR2/MultiTypeAnalysis.cpp +++ b/decompiler/IR2/MultiTypeAnalysis.cpp @@ -21,209 +21,693 @@ #include "decompiler/Function/Warnings.h" #include "MultiTypeAnalysis.h" #include "decompiler/IR2/Env.h" +#include "decompiler/util/DecompilerTypeSystem.h" +#include "decompiler/Function/Function.h" namespace decompiler { -using RegState = CopyOnWrite; +//RegisterTypeState* TypeAnalysisGraph::alloc_regstate() { +// node_pool.push_back(std::make_unique()); +// return node_pool.back().get(); +//} +// +//TypeAnalysisGraph make_analysis_graph(const TypeSpec& my_type, +// DecompilerTypeSystem& dts, +// Function& func, +// bool verbose) { +// TypeAnalysisGraph result; +// auto clobber_type = result.alloc_regstate(); +// *clobber_type = RegisterTypeState(PossibleType(TP_Type::make_uninitialized())); +// +// InstrTypeState default_state; +// for (auto& slot : func.ir2.env.stack_spills().map()) { +// default_state.add_stack_slot(slot.first); +// } +// +// // approximate the size and complain if it's huge. +// int state_size = +// sizeof(InstrTypeState) + default_state.stack_slot_count() * (sizeof(RegisterNode) + 4); +// +// result.block_start_types.resize(func.basic_blocks.size()); +// result.after_op_types.resize(func.ir2.atomic_ops->ops.size()); +// +// int instr_count = result.block_start_types.size() + result.after_op_types.size(); +// int ref_size_kb = (instr_count * state_size) / 1024; +// +// if (verbose) { +// if (ref_size_kb > 500) { +// lg::info( +// "Func {} has {} instr states, each {} bytes, for a total of {} kb in just references.", +// func.guessed_name.to_string(), instr_count, state_size, ref_size_kb); +// } +// } +// +// result.topo_sort = func.bb_topo_sort(); +// if (verbose) { +// if (result.topo_sort.vist_order.size() > 100) { +// lg::info("Func {} has {} basic blocks.", func.guessed_name.to_string(), +// result.topo_sort.vist_order.size()); +// } +// } +// +// // set up the initial state: +// int allocation_count = 0; +// int uid = 1; +// +// auto& initial_state = result.block_start_types.at(0); +// for (auto& r : initial_state.regs()) { +// // okay to leave these as uninitialized - the function setup stuff will take care of this. +// r.set_alloc(result.alloc_regstate()); +// r.set_uid(uid++); +// allocation_count++; +// } +// for (auto& s : initial_state.slots()) { +// s.second.set_alloc(result.alloc_regstate()); +// s.second.set_uid(uid++); +// allocation_count++; +// } +// +// // do allocations +// auto& aop = func.ir2.atomic_ops; +// bool run_again = true; +// int iterations = 0; +// while (run_again) { +// iterations++; +// run_again = false; +// // do each block in the topological sort order: +// for (auto block_id : result.topo_sort.vist_order) { +// auto& block = func.basic_blocks.at(block_id); +// auto* init_types = &result.block_start_types.at(block_id); +// for (int op_id = aop->block_id_to_first_atomic_op.at(block_id); +// op_id < aop->block_id_to_end_atomic_op.at(block_id); op_id++) { +// AtomicOp* op = aop->ops.at(op_id).get(); +// +// result.after_op_types.at(op_id) = *init_types; +// +// // todo write stack slots. +// auto* op_as_stack_store = dynamic_cast(op); +// if (op_as_stack_store) { +// auto& state = init_types->get_slot(op_as_stack_store->offset()); +// if (!state.alloc()) { +// allocation_count++; +// run_again = true; +// state.set_alloc(result.alloc_regstate()); +// } +// state.set_uid(uid++); +// } +// +// for (const auto& reg : op->write_regs()) { +// if (reg.reg_id() >= Reg::MAX_VAR_REG_ID) { +// continue; +// } +// auto& state = init_types->get(reg); +// if (!state.alloc()) { +// allocation_count++; +// run_again = true; +// state.set_alloc(result.alloc_regstate()); +// } +// state.set_uid(uid++); +// } +// +// for (const auto& reg : op->clobber_regs()) { +// if (reg.reg_id() >= Reg::MAX_VAR_REG_ID) { +// continue; +// } +// auto& c = init_types->get(reg); +// c.set_alloc(clobber_type); +// c.set_uid(uid++); +// } +// +// // for the next op... +// init_types = &result.after_op_types.at(op_id); +// } +// +// // propagate the types: for each possible succ +// for (auto succ_block_id : {block.succ_ft, block.succ_branch}) { +// if (succ_block_id != -1) { +// // set types to LCA (current, new) +// auto& succ_types = result.block_start_types.at(succ_block_id); +// for (size_t i = 0; i < succ_types.regs().size(); i++) { +// auto& succ = succ_types.regs()[i]; +// auto& end = init_types->regs()[i]; +// +// if (succ.uid() == -1) { +// succ.set_uid(end.uid()); +// } else { +// if (succ.uid() == end.uid()) { +// // nice!! +// // lg::info("Saved allocation"); +// } else { +// succ.set_uid(uid++); +// if (!succ.alloc()) { +// run_again = true; +// succ.set_alloc(result.alloc_regstate()); +// allocation_count++; +// } +// } +// } +// } +// } +// } +// } +// } +// +// int allocations_size_kb = (allocation_count * sizeof(RegisterTypeState)) / 1024; +// int total_size_new_method_kb = allocations_size_kb + ref_size_kb; +// int total_size_old_method_kb = (instr_count * 64 * sizeof(RegisterTypeState)) / 1024; +// +// if (total_size_old_method_kb > 1000) { +// lg::info("Function {} new {} kb old {} kb, {} allocs", func.guessed_name.to_string(), +// total_size_new_method_kb, total_size_old_method_kb, allocation_count); +// } +// +// return result; +//} -bool DerefHint::matches(const FieldReverseLookupOutput& value) const { - if (value.tokens.size() != tokens.size()) { - return false; - } - - for (size_t i = 0; i < value.tokens.size(); i++) { - if (!tokens[i].matches(value.tokens[i])) { - return false; - } - } - - return true; -} - -bool DerefHint::Token::matches(const FieldReverseLookupOutput::Token& other) const { - switch (kind) { - case Kind::INTEGER: - return other.kind == FieldReverseLookupOutput::Token::Kind::CONSTANT_IDX && - other.idx == integer; - case Kind::FIELD: - return other.kind == FieldReverseLookupOutput::Token::Kind::FIELD && other.name == name; - case Kind::VAR: - return other.kind == FieldReverseLookupOutput::Token::Kind::VAR_IDX; - default: - assert(false); - } -} - -/*! - * Safely access the decision referenced by this TypeDecisionParent. - * This will work even if the actual RegisterTypeState has been modified since the reference was - * created. - */ -const PossibleType& TypeDecisionParent::get() const { - return instruction->get_const(reg).possible_types.at(type_index); -} - -/*! - * Figure out if this has been eliminated or not. Caches the result to avoid looking it up again and - * again. Elimination cannot be undone. - */ -bool PossibleType::is_valid() const { - if (!m_valid_cache) { - return false; - } - - if (parent.instruction) { - // we have a parent in the tree, check if that parent is eliminated. - if (!parent.get().is_valid()) { - m_valid_cache = false; - return false; - } - } - - return true; -} - -/*! - * If we have multiple types, pick the one with the highest deref path score. - * If warnings is set, and we have to throw away a valid type, prints a warning that we made a - * somewhat arbitrary decision to throw a possible type. - * - * After calling this, you can use get_single_tp_type and get_single_type_decision. - */ -void RegisterTypeState::reduce_to_single_type(DecompWarnings* warnings, - int op_idx, - const DerefHint* hint) { - double best_score = -std::numeric_limits::infinity(); - int best_idx = -1; - bool printed_first_warning = false; - std::string warning_string; - - // find the highest score that's valid. - for (int i = 0; i < (int)possible_types.size(); i++) { - if (possible_types[i].deref_score > best_score && possible_types[i].is_valid()) { - best_idx = i; - best_score = possible_types[i].deref_score; - } - - // if we match the hint, just use that. - if (possible_types[i].deref_path && hint->matches(*possible_types[i].deref_path)) { - best_idx = i; - warnings = nullptr; // never warn if we take the hint - break; - } - } - assert(best_idx != -1); - - // eliminate stuff that isn't the best. - for (int i = 0; i < (int)possible_types.size(); i++) { - if (i != best_idx) { - // warn if we eliminate something that is possibly valid. - if (warnings && possible_types[i].is_valid()) { - if (!printed_first_warning) { - warning_string += fmt::format("Ambiguous type selection at op {}\n", op_idx); - printed_first_warning = true; - } - if (possible_types[best_idx].deref_path) { - warning_string += fmt::format(" {}\n", possible_types[best_idx].deref_path->print()); - } else { - warning_string += fmt::format(" {}\n", possible_types[best_idx].type.print()); - } - } - - possible_types[i].eliminate(); - } - } - - // cache the winner - single_type_cache = best_idx; - - if (warnings && printed_first_warning) { - warnings->general_warning(warning_string); - } -} - -/*! - * After this has been pruned to a single type, gets that type decision. - */ -const PossibleType& RegisterTypeState::get_single_type_decision() const { - assert(single_type_cache.has_value()); - assert(possible_types.at(*single_type_cache).is_valid()); // todo remove. - return possible_types[*single_type_cache]; -} - -/*! - * After this has been pruned to a single type, gets it as a TP_Type. - */ -const TP_Type& RegisterTypeState::get_single_tp_type() const { - return get_single_type_decision().type; -} - -/*! - * If there is at least one possibility to get a desired_type, removes anything that's not a - * desired_type. If it's not possible to get a desired type, does nothing. - */ -void RegisterTypeState::try_elimination(const TypeSpec& desired_types, const TypeSystem& ts) { - std::vector to_eliminate; - int keep_count = 0; - for (int i = 0; i < (int)possible_types.size(); i++) { - const auto& possibility = possible_types[i]; - if (possibility.is_valid()) { - if (ts.tc(desired_types, possibility.type.typespec())) { - keep_count++; - } else { - to_eliminate.push_back(i); - } - } - } - - if (keep_count > 0) { - for (auto idx : to_eliminate) { - possible_types.at(idx).eliminate(); - } - } -} - -namespace { - -/*! - * Create a register type state with no parent and the given typespec. - */ -RegState make_typespec_parent_regstate(const TypeSpec& typespec) { - return make_cow(TP_Type::make_from_ts(typespec)); -} - -/*! - * Create an instruction type state for the first instruction of a function. - */ -InstrTypeState construct_initial_typestate(const TypeSpec& function_type, - const TypeSpec& behavior_type, - const Env& env, - const RegState& uninitialized) { - // start with everything uninitialized - InstrTypeState result(uninitialized); - assert(function_type.base_type() == "function"); - assert(function_type.arg_count() >= 1); // must know the function type. - assert(function_type.arg_count() <= 8 + 1); // 8 args + 1 return. - - for (int i = 0; i < int(function_type.arg_count()) - 1; i++) { - auto reg_id = Register::get_arg_reg(i); - const auto& reg_type = function_type.get_arg(i); - result.get(reg_id) = make_cow(TP_Type::make_from_ts(reg_type)); - } - - if (behavior_type != TypeSpec("none")) { - result.get(Register(Reg::GPR, Reg::S6)) = - make_cow(TP_Type::make_from_ts(behavior_type)); - } - - // set stack slots as uninitialized too. - for (auto slot_info : env.stack_spills().map()) { - result.add_stack_slot(slot_info.first, uninitialized); - } - - return result; -} - -} // namespace +// using RegState = CopyOnWrite; +// +// bool DerefHint::matches(const FieldReverseLookupOutput& value) const { +// if (value.tokens.size() != tokens.size()) { +// return false; +// } +// +// for (size_t i = 0; i < value.tokens.size(); i++) { +// if (!tokens[i].matches(value.tokens[i])) { +// return false; +// } +// } +// +// return true; +// } +// +// bool DerefHint::Token::matches(const FieldReverseLookupOutput::Token& other) const { +// switch (kind) { +// case Kind::INTEGER: +// return other.kind == FieldReverseLookupOutput::Token::Kind::CONSTANT_IDX && +// other.idx == integer; +// case Kind::FIELD: +// return other.kind == FieldReverseLookupOutput::Token::Kind::FIELD && other.name == name; +// case Kind::VAR: +// return other.kind == FieldReverseLookupOutput::Token::Kind::VAR_IDX; +// default: +// assert(false); +// } +// } +// +///*! +// * Safely access the decision referenced by this TypeDecisionParent. +// * This will work even if the actual RegisterTypeState has been modified since the reference was +// * created. +// */ +// const PossibleType& TypeDecisionParent::get() const { +// return instruction->get_const(reg).possible_types.at(type_index); +//} +// +// PossibleType& TypeDecisionParent::get() { +// return instruction->get(reg).mut()->possible_types.at(type_index); +//} +// +///*! +// * Figure out if this has been eliminated or not. Caches the result to avoid looking it up again +// and +// * again. Elimination cannot be undone. +// */ +// bool PossibleType::is_valid() const { +// if (!m_valid_cache) { +// return false; +// } +// +// if (child_count == 0) { +// m_valid_cache = false; +// return false; +// } +// +// if (parent.instruction) { +// // we have a parent in the tree, check if that parent is eliminated. +// if (!parent.get().is_valid()) { +// m_valid_cache = false; +// return false; +// } +// } +// +// return true; +//} +// +// void PossibleType::eliminate() { +// assert(is_valid()); +// if (parent.instruction) { +// auto& par = parent.get(); +// par.child_count--; +// assert(par.child_count >= 0); +// if (!par.child_count) { +// par.eliminate(); +// } +// } +//} +// +///*! +// * If we have multiple types, pick the one with the highest deref path score. +// * If warnings is set, and we have to throw away a valid type, prints a warning that we made a +// * somewhat arbitrary decision to throw a possible type. +// * +// * After calling this, you can use get_single_tp_type and get_single_type_decision. +// */ +// void RegisterTypeState::reduce_to_single_type(DecompWarnings* warnings, +// int op_idx, +// const DerefHint* hint) { +// double best_score = -std::numeric_limits::infinity(); +// int best_idx = -1; +// bool printed_first_warning = false; +// std::string warning_string; +// +// // find the highest score that's valid. +// for (int i = 0; i < (int)possible_types.size(); i++) { +// if (possible_types[i].deref_score > best_score && possible_types[i].is_valid()) { +// best_idx = i; +// best_score = possible_types[i].deref_score; +// } +// +// // if we match the hint, just use that. +// if (possible_types[i].deref_path && hint->matches(*possible_types[i].deref_path)) { +// best_idx = i; +// warnings = nullptr; // never warn if we take the hint +// break; +// } +// } +// assert(best_idx != -1); +// +// // eliminate stuff that isn't the best. +// for (int i = 0; i < (int)possible_types.size(); i++) { +// if (i != best_idx) { +// // warn if we eliminate something that is possibly valid. +// if (warnings && possible_types[i].is_valid()) { +// if (!printed_first_warning) { +// warning_string += fmt::format("Ambiguous type selection at op {}\n", op_idx); +// printed_first_warning = true; +// } +// if (possible_types[best_idx].deref_path) { +// warning_string += fmt::format(" {}\n", possible_types[best_idx].deref_path->print()); +// } else { +// warning_string += fmt::format(" {}\n", possible_types[best_idx].type.print()); +// } +// } +// +// possible_types[i].eliminate(); +// } +// } +// +// // cache the winner +// single_type_cache = best_idx; +// +// if (warnings && printed_first_warning) { +// warnings->general_warning(warning_string); +// } +//} +// +///*! +// * After this has been pruned to a single type, gets that type decision. +// */ +// const PossibleType& RegisterTypeState::get_single_type_decision() const { +// assert(single_type_cache.has_value()); +// assert(possible_types.at(*single_type_cache).is_valid()); // todo remove. +// return possible_types[*single_type_cache]; +//} +// +///*! +// * After this has been pruned to a single type, gets it as a TP_Type. +// */ +// const TP_Type& RegisterTypeState::get_single_tp_type() const { +// return get_single_type_decision().type; +//} +// +///*! +// * If there is at least one possibility to get a desired_type, removes anything that's not a +// * desired_type. If it's not possible to get a desired type, does nothing. +// */ +// bool RegisterTypeState::try_elimination(const TypeSpec& desired_types, const TypeSystem& ts) { +// std::vector to_eliminate; +// int keep_count = 0; +// for (int i = 0; i < (int)possible_types.size(); i++) { +// const auto& possibility = possible_types[i]; +// if (possibility.is_valid()) { +// if (ts.tc(desired_types, possibility.type.typespec())) { +// keep_count++; +// } else { +// to_eliminate.push_back(i); +// } +// } +// } +// +// if (keep_count > 0) { +// for (auto idx : to_eliminate) { +// possible_types.at(idx).eliminate(); +// } +// return true; +// } +// return false; +//} +// +// bool RegisterTypeState::can_eliminate_to_get(const TypeSpec& desired_types, +// const TypeSystem& ts) const { +// for (int i = 0; i < (int)possible_types.size(); i++) { +// const auto& possibility = possible_types[i]; +// if (possibility.is_valid()) { +// if (ts.tc(desired_types, possibility.type.typespec())) { +// return true; +// } +// } +// } +// return false; +//} +// +// void InstrTypeState::inherit(InstrTypeState& prev) { +// for (size_t i = 0; i < m_regs.size(); i++) { +// +// } +//} +// +// namespace { +// +///*! +// * Create a register type state with no parent and the given typespec. +// */ +// RegState make_typespec_parent_regstate(const TypeSpec& typespec) { +// auto result = make_cow(TP_Type::make_from_ts(typespec)); +// result.mut()->reduce_to_single_type(nullptr, -1, nullptr); +// return result; +//} +// +///*! +// * Create a register type state with no parent and the given typespec. +// */ +// RegState make_typespec_parent_regstate(const TP_Type& typespec) { +// auto result = make_cow(typespec); +// result.mut()->reduce_to_single_type(nullptr, -1, nullptr); +// return result; +//} +// +///*! +// * Create an instruction type state for the first instruction of a function. +// */ +// InstrTypeState construct_initial_typestate(const TypeSpec& function_type, +// const TypeSpec& behavior_type, +// const Env& env, +// const RegState& uninitialized) { +// // start with everything uninitialized +// InstrTypeState result(uninitialized); +// assert(function_type.base_type() == "function"); +// assert(function_type.arg_count() >= 1); // must know the function type. +// assert(function_type.arg_count() <= 8 + 1); // 8 args + 1 return. +// +// for (int i = 0; i < int(function_type.arg_count()) - 1; i++) { +// auto reg_id = Register::get_arg_reg(i); +// const auto& reg_type = function_type.get_arg(i); +// result.get(reg_id) = make_typespec_parent_regstate(reg_type); +// } +// +// if (behavior_type != TypeSpec("none")) { +// result.get(Register(Reg::GPR, Reg::S6)) = make_typespec_parent_regstate(behavior_type); +// } +// +// // set stack slots as uninitialized too. +// for (auto slot_info : env.stack_spills().map()) { +// result.add_stack_slot(slot_info.first, uninitialized); +// } +// +// return result; +//} +// +///*! +// * Modify the state to include user cases. Will prune as needed. +// * If we can't make it with pruning, modify. +// */ +// InstrTypeState get_input_types_with_user_casts( +// const std::vector* user_casts, +// const std::unordered_map* stack_casts, +// InstrTypeState& state, +// const DecompilerTypeSystem& dts) { +// // we parse a string from a JSON config file here, so do this in a try/catch +// try { +// // first, see if pruning can help us get closer... +// if (user_casts) { +// for (const auto& cast : *user_casts) { +// TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); +// // first, let's see if we can just prune the tree: +// // TODO: maybe there should be an option to avoid this? +// if (state.get_const(cast.reg).can_eliminate_to_get(type_from_cast, dts.ts)) { +// // we can! Just prune. This modifies the input, which is what we want. +// bool success = state.get(cast.reg).mut()->try_elimination(type_from_cast, dts.ts); +// assert(success); +// } +// } +// } +// +// if (stack_casts) { +// for (const auto& [offset, cast] : *stack_casts) { +// auto stack_state = state.get_stack_slot_const(offset); +// if (!stack_state) { +// throw std::runtime_error(fmt::format( +// "Got a stack cast at offset {}, but didn't find a variable there.", offset)); +// } +// TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); +// if (stack_state->can_eliminate_to_get(type_from_cast, dts.ts)) { +// bool success = +// state.get_stack_slot(offset)->mut()->try_elimination(type_from_cast, dts.ts); +// assert(success); +// } +// } +// } +// +// // now we need to make modifications: +// InstrTypeState result = state; +// +// if (user_casts) { +// for (const auto& cast : *user_casts) { +// TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); +// +// if (!state.get_const(cast.reg).can_eliminate_to_get(type_from_cast, dts.ts)) { +// // nope we can't make it work. +// // need to make a change here. It's fine to lose our decision history here because +// // we showed that there is no way to get what the user wants by pruning. +// result.get(cast.reg) = make_typespec_parent_regstate(type_from_cast); +// } +// } +// } +// +// if (stack_casts) { +// for (const auto& [offset, cast] : *stack_casts) { +// auto stack_state = state.get_stack_slot_const(offset); +// assert(stack_state); +// TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); +// if (!stack_state->can_eliminate_to_get(type_from_cast, dts.ts)) { +// *result.get_stack_slot(offset) = make_typespec_parent_regstate(type_from_cast); +// } +// } +// } +// +// return result; +// +// } catch (std::exception& e) { +// lg::die("Failed to parse type cast hint: {}\n", e.what()); +// throw; +// } +//} +// +// +// +// void simplify_to_single(int idx, DecompWarnings* warnings, DerefHint* hint, InstrTypeState& +// state) { +// for (auto& reg : state.reg_array()) { +// reg.mut()->reduce_to_single_type(warnings, idx, hint); +// } +// +// for (auto& stack : state.stack_slots()) { +// stack.second.mut()->reduce_to_single_type(warnings, idx, hint); +// } +//} +// +///*! +// * Set combined to lca(combined, add) and do single simplification. +// */ +// bool multi_lca(InstrTypeState& combined, +// InstrTypeState& add, +// int pred_idx, +// int succ_idx, +// DecompWarnings* warnings, +// DecompilerTypeSystem& dts) { +// bool result = false; +// // first, simplify add: +// simplify_to_single(pred_idx, warnings, nullptr, add); +// +// for (size_t idx = 0; idx < add.reg_array().size(); idx++) { +// bool diff = false; +// auto new_type = dts.tp_lca(combined.reg_array()[idx]->get_single_tp_type(), +// add.reg_array()[idx]->get_single_tp_type(), &diff); +// if (diff) { +// result = true; +// combined.reg_array()[idx] = make_typespec_parent_regstate(new_type); +// } +// } +//} +// +// +// +//} // namespace +// +// bool run_multi_type_analysis(const TypeSpec& my_type, DecompilerTypeSystem& dts, Function& func) +// { +// // STEP 0 - set decompiler type system settings for this function. these should be cleaned up +// // eventually... +// if (func.guessed_name.kind == FunctionName::FunctionKind::METHOD) { +// dts.type_prop_settings.current_method_type = func.guessed_name.type_name; +// } +// +// // set up none-returning function junk. +// if (my_type.last_arg() == TypeSpec("none")) { +// auto as_end = dynamic_cast(func.ir2.atomic_ops->ops.back().get()); +// assert(as_end); +// as_end->mark_function_as_no_return_value(); +// } +// +// std::vector block_init_types, op_types; +// block_init_types.resize(func.basic_blocks.size()); +// op_types.resize(func.ir2.atomic_ops->ops.size()); +// auto& aop = func.ir2.atomic_ops; +// +// // STEP 1 - topological sort the blocks. This gives us an order where we: +// // - never visit unreachable blocks (we can't type propagate these) +// // - always visit at least one predecessor of a block before that block +// auto order = func.bb_topo_sort(); +// assert(!order.vist_order.empty()); +// assert(order.vist_order.front() == 0); +// +// // STEP 2 - initialize type state for the first block to the function argument types. +// auto uninitialized = make_cow(PossibleType(TP_Type::make_uninitialized())); +// // TODO: behavior types. +// block_init_types.at(0) = +// construct_initial_typestate(my_type, TypeSpec("process"), func.ir2.env, uninitialized); +// +// // STEP 3 - propagate types until the result stops changing +// bool run_again = true; +// while (run_again) { +// run_again = false; +// // do each block in the topological sort order: +// for (auto block_id : order.vist_order) { +// auto& block = func.basic_blocks.at(block_id); +// // pointer to the types (no user casts) before the op. +// auto* preceding_types = &block_init_types.at(block_id); +// +// // ops in block, in order +// for (int op_id = aop->block_id_to_first_atomic_op.at(block_id); +// op_id < aop->block_id_to_end_atomic_op.at(block_id); op_id++) { +// auto& op = aop->ops.at(op_id); +// // look for hints: +// const std::vector* user_casts = nullptr; +// const std::unordered_map* stack_casts = nullptr; +// const auto& cast_it = func.ir2.env.casts().find(op_id); +// if (cast_it != func.ir2.env.casts().end()) { +// user_casts = &cast_it->second; +// } +// +// if (!func.ir2.env.stack_casts().empty()) { +// stack_casts = &func.ir2.env.stack_casts(); +// } +// +// try { +// std::vector>> reg_updates; +// std::vector>> stack_updates; +// if (stack_casts || user_casts) { +// auto casted = +// get_input_types_with_user_casts(user_casts, stack_casts, *preceding_types, dts); +// op->multi_types(casted, preceding_types, ®_updates, &stack_updates); +// } else { +// op->multi_types(*preceding_types, preceding_types, ®_updates, &stack_updates); +// } +// auto& dest = op_types.at(op_id); +// dest = *preceding_types; +// for (auto& update : reg_updates) { +// dest.get(update.first) = update.second; +// } +// for (auto& update : stack_updates) { +// *dest.get_stack_slot(update.first) = update.second; +// } +// } catch (std::runtime_error& e) { +// lg::warn("Function {} failed type prop at op {}: {}", func.guessed_name.to_string(), +// op_id, e.what()); +// func.warnings.type_prop_warning("{}", e.what()); +// // func.ir2.env.set_types(block_init_types, op_types, *func.ir2.atomic_ops, my_type); +// return false; +// } +// +// // for the next op... +// preceding_types = &op_types.at(op_id); +// } +// +// // propagate the types: for each possible succ +// for (auto succ_block_id : {block.succ_ft, block.succ_branch}) { +// if (succ_block_id != -1) { +// // set types to LCA (current, new) +// if (dts.tp_lca(&block_init_types.at(succ_block_id), *preceding_types)) { +// // if something changed, run again! +// run_again = true; +// } +// } +// } +// } +// } +// +// auto last_type = op_types.back().get(Register(Reg::GPR, Reg::V0)).typespec(); +// if (last_type != my_type.last_arg()) { +// func.warnings.info("Return type mismatch {} vs {}.", last_type.print(), +// my_type.last_arg().print()); +// } +// +// // and apply final casts: +// for (auto block_id : order.vist_order) { +// for (int op_id = aop->block_id_to_first_atomic_op.at(block_id); +// op_id < aop->block_id_to_end_atomic_op.at(block_id); op_id++) { +// if (op_id == aop->block_id_to_first_atomic_op.at(block_id)) { +// try_modify_input_types_for_casts(op_id, func.ir2.env.casts(), func.ir2.env.stack_casts(), +// &block_init_types.at(block_id), nullptr, dts); +// } else { +// try_modify_input_types_for_casts(op_id, func.ir2.env.casts(), func.ir2.env.stack_casts(), +// &op_types.at(op_id - 1), nullptr, dts); +// } +// } +// } +// +// // figure out the types of stack spill variables: +// auto& env = func.ir2.env; +// bool changed; +// for (auto& type_info : op_types) { +// for (auto& spill : type_info.spill_slots) { +// auto& slot_info = env.stack_slot_entries[spill.first]; +// slot_info.tp_type = +// dts.tp_lca(env.stack_slot_entries[spill.first].tp_type, spill.second, &changed); +// slot_info.offset = spill.first; +// } +// } +// +// for (auto& type_info : block_init_types) { +// for (auto& spill : type_info.spill_slots) { +// auto& slot_info = env.stack_slot_entries[spill.first]; +// slot_info.tp_type = +// dts.tp_lca(env.stack_slot_entries[spill.first].tp_type, spill.second, &changed); +// slot_info.offset = spill.first; +// } +// } +// +// // convert to typespec +// for (auto& info : env.stack_slot_entries) { +// info.second.typespec = info.second.tp_type.typespec(); +// // debug +// // fmt::print("STACK {} : {} ({})\n", info.first, info.second.typespec.print(), +// // info.second.tp_type.print()); +// } +// +// func.ir2.env.set_types(block_init_types, op_types, *func.ir2.atomic_ops, my_type); +// +// return true; +//} } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/MultiTypeAnalysis.h b/decompiler/IR2/MultiTypeAnalysis.h index 78c0284ed1..62f317324e 100644 --- a/decompiler/IR2/MultiTypeAnalysis.h +++ b/decompiler/IR2/MultiTypeAnalysis.h @@ -2,7 +2,7 @@ #include #include -#include "common/util/CopyOnWrite.h" +#include "decompiler/Function/BasicBlocks.h" #include "decompiler/Disasm/Register.h" #include "decompiler/util/TP_Type.h" #include "common/type_system/TypeSystem.h" @@ -12,6 +12,7 @@ namespace decompiler { class InstrTypeState; class DecompWarnings; struct PossibleType; +struct RegisterTypeState; struct DerefHint { struct Token { @@ -28,12 +29,9 @@ struct DerefHint { /*! * Represents a reference to a type decision made on a previous instruction. */ -struct TypeDecisionParent { - InstrTypeState* instruction = nullptr; - Register reg; - int type_index = -1; - - const PossibleType& get() const; +struct TypeChoiceParent { + RegisterTypeState* reg_type = nullptr; + int idx_in_parent = -1; }; /*! @@ -42,11 +40,18 @@ struct TypeDecisionParent { * Use is_valid to check that it hasn't been eliminated. */ struct PossibleType { - TP_Type type; // the actual type. - std::optional deref_path; // the field accessed to get here - double deref_score = 0.; - TypeDecisionParent parent; // the decision we made to allow this. - void eliminate() { m_valid_cache = false; } + TP_Type type; // the actual type. + std::optional + deref_path; // the field accessed to get here, assuming we did a deref. + double score = 0.; // the sum of scores of all derefs to get here. + + // if we are a child, 0. + // otherwise, the number of children who have a reference to us. + int child_count = 0; + + TypeChoiceParent parent; // the type we used to get this type. + + void eliminate(); // if possible, prune to avoid using this type. bool is_valid() const; // true, unless we were eliminated. PossibleType(const TP_Type& tp_type) : type(tp_type) {} @@ -59,58 +64,106 @@ struct PossibleType { * The set of all possible types in a register. */ struct RegisterTypeState { - std::optional override_type; // this is just for printing errors. - std::optional single_type_cache; + // this is just for printing errors, and isn't used by the analysis + std::optional override_type; + + // if we're simplified to a single type, this will hold in the index in the possible types vector. + + // the types we can be. std::vector possible_types; - RegisterTypeState() = delete; + RegisterTypeState() = default; RegisterTypeState(const PossibleType& single_type) : possible_types({single_type}) {} - void reduce_to_single_type(DecompWarnings* warnings, int op_idx, const DerefHint* hint); + void reduce_to_single_best_type(DecompWarnings* warnings, int op_idx, const DerefHint* hint); + bool is_single_type() const; const PossibleType& get_single_type_decision() const; const TP_Type& get_single_tp_type() const; - void try_elimination(const TypeSpec& desired_types, const TypeSystem& ts); + + bool try_elimination(const TypeSpec& desired_types, const TypeSystem& ts); + bool can_eliminate_to_get(const TypeSpec& desired_types, const TypeSystem& ts) const; + + private: + mutable std::optional single_type_cache; +}; + +/*! + * During setup, this contains a alloc flag and a uid. + * While it's running, it contains a pointer. + */ +/* +struct RegisterNode { + RegisterTypeState* ptr() { return (RegisterTypeState*)data; } + bool alloc() { return data & 1; } + u64 uid() { return data >> 32; } + void set_alloc() { data |= 1; } + void set_uid(u64 uid) { data |= (uid << 32); } + +private: + uintptr_t data = 0; + static_assert(sizeof(uintptr_t) == 8); +}; + */ + +struct RegisterNode { + RegisterTypeState* ptr() { return m_ptr; } + void set_ptr(RegisterTypeState* ptr) { m_ptr = ptr; } + bool alloc() const { return !!m_ptr; } + void set_alloc(RegisterTypeState* state) { + m_ptr = state; + m_alloc_point = true; + } + bool is_alloc_point() const { return m_alloc_point; } + s64 uid() const { return m_uid; } + void set_uid(s64 val) { m_uid = val; } + + private: + RegisterTypeState* m_ptr = nullptr; + s32 m_uid = 0; + bool m_alloc_point = false; }; class InstrTypeState { public: - explicit InstrTypeState(const CopyOnWrite& default_value) { - m_regs.fill(default_value); + void add_stack_slot(int offset) { m_stack_slots.emplace_back(offset, RegisterNode()); } + int stack_slot_count() const { return m_stack_slots.size(); } + std::array& regs() { return m_regs; } + std::vector>& slots() { return m_stack_slots; } + + RegisterNode& get_slot(int offset) { + for(auto& s : m_stack_slots) { + if (s.first == offset) { + return s.second; + } + } + assert(false); } - const RegisterTypeState& get_const(const Register& reg) const { - assert(reg.reg_id() < Reg::MAX_VAR_REG_ID); - return *m_regs[reg.reg_id()]; - } - - CopyOnWrite& get(const Register& reg) { + RegisterNode& get(const Register& reg) { assert(reg.reg_id() < Reg::MAX_VAR_REG_ID); return m_regs[reg.reg_id()]; } - CopyOnWrite& get_stack_slot(int offset) { - for (auto& slot : m_stack_slots) { - if (slot.first == offset) { - return slot.second; - } - } - assert(false); - } - - const RegisterTypeState& get_stack_slot_const(int offset) const { - for (auto& slot : m_stack_slots) { - if (slot.first == offset) { - return *slot.second; - } - } - assert(false); - } - - void add_stack_slot(int offset, const CopyOnWrite& value) { - m_stack_slots.emplace_back(offset, value); - } - private: - std::array, Reg::MAX_VAR_REG_ID> m_regs; - std::vector>> m_stack_slots; + std::array m_regs; + std::vector> m_stack_slots; }; + +struct TypeAnalysisGraph { + std::vector after_op_types; + std::vector block_start_types; + + BlockTopologicalSort topo_sort; + + RegisterTypeState* alloc_regstate(); + + std::vector> node_pool; +}; + +class Function; +class DecompilerTypeSystem; +TypeAnalysisGraph make_analysis_graph(const TypeSpec& my_type, + DecompilerTypeSystem& dts, + Function& func, + bool verbose); + } // namespace decompiler \ No newline at end of file diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index 86db7687b4..5569afec6d 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -22,6 +22,7 @@ #include "decompiler/analysis/symbol_def_map.h" #include "common/goos/PrettyPrinter.h" #include "decompiler/IR2/Form.h" +#include "decompiler/IR2/MultiTypeAnalysis.h" namespace decompiler { @@ -379,6 +380,10 @@ void ObjectFileDB::ir2_type_analysis_pass(const Config& config) { } func.ir2.env.set_stack_structure_hints( try_lookup(config.stack_structure_hints_by_function, func_name)); + + // experimental multi-type pass, for debugging. + auto tg = make_analysis_graph(ts, dts, func, true); + if (run_type_analysis_ir2(ts, dts, func)) { successful_functions++; func.ir2.env.types_succeeded = true; diff --git a/decompiler/analysis/type_analysis.cpp b/decompiler/analysis/type_analysis.cpp index d84ee2ede7..4eafcca2f3 100644 --- a/decompiler/analysis/type_analysis.cpp +++ b/decompiler/analysis/type_analysis.cpp @@ -40,7 +40,7 @@ void modify_input_types_for_casts( // type_from_cast.print()); if (original_type != type_from_cast) { // the cast will have an effect on types. If we are removing the original type, remember it - if (changed_types && changed_types->find(cast.reg) == changed_types->end()) { + if (changed_types) { (*changed_types)[cast.reg] = original_type; }