diff --git a/decompiler/IR2/AtomicOp.h b/decompiler/IR2/AtomicOp.h index e71ad1b292..3bf097d933 100644 --- a/decompiler/IR2/AtomicOp.h +++ b/decompiler/IR2/AtomicOp.h @@ -77,6 +77,11 @@ class AtomicOp { TypeState propagate_types(const TypeState& input, const Env& env, DecompilerTypeSystem& dts); + void multi_types(InstrTypeState* output, + InstrTypeState& input, + const Env& env, + DecompilerTypeSystem& dts); + int op_id() const { return m_my_idx; } const std::vector& read_regs() const { return m_read_regs; } const std::vector& write_regs() const { return m_write_regs; } @@ -97,6 +102,11 @@ class AtomicOp { virtual TypeState propagate_types_internal(const TypeState& input, const Env& env, DecompilerTypeSystem& dts) = 0; + + virtual void multi_types_internal(InstrTypeState* output, + InstrTypeState& input, + const Env& env, + DecompilerTypeSystem& dts); void clobber_temps(); // the register values that are read (at the start of this op) diff --git a/decompiler/IR2/Env.h b/decompiler/IR2/Env.h index e18d5ab451..33e83c0370 100644 --- a/decompiler/IR2/Env.h +++ b/decompiler/IR2/Env.h @@ -10,6 +10,7 @@ #include "decompiler/IR2/IR2_common.h" #include "decompiler/analysis/reg_usage.h" #include "decompiler/config.h" +#include "decompiler/IR2/MultiTypeAnalysis.h" namespace decompiler { class LinkedObjectFile; @@ -208,6 +209,18 @@ class Env { // hacks: bool aggressively_reject_cond_to_value_rewrite = false; + void set_type_graph(std::shared_ptr tg) { + m_tg = std::move(tg); + m_has_new_types = true; + } + + const TypeAnalysisGraph& type_graph() const { + assert(m_has_new_types); + return *m_tg; + } + + bool has_type_graph() const { return m_has_new_types; } + private: RegisterAccess m_end_var; @@ -235,5 +248,8 @@ class Env { std::optional m_type_analysis_return_type; StackSpillMap m_stack_spill_map; + + bool m_has_new_types = false; + std::shared_ptr m_tg; }; } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/FormExpressionAnalysis.cpp b/decompiler/IR2/FormExpressionAnalysis.cpp index fe4ce01f6b..249554afca 100644 --- a/decompiler/IR2/FormExpressionAnalysis.cpp +++ b/decompiler/IR2/FormExpressionAnalysis.cpp @@ -2822,6 +2822,15 @@ FormElement* ConditionElement::make_generic(const Env& env, casted); } + case IR2_Condition::Kind::LESS_THAN_ZERO_UNSIGNED: { + auto casted = make_casts_if_needed(source_forms, types, TypeSpec("uint"), pool, env); + auto zero = pool.alloc_single_element_form( + nullptr, SimpleAtom::make_int_constant(0)); + casted.push_back(zero); + return pool.alloc_element(GenericOperator::make_fixed(FixedOperatorKind::LT), + casted); + } + case IR2_Condition::Kind::GREATER_THAN_ZERO_SIGNED: { auto casted = make_casts_if_needed(source_forms, types, TypeSpec("int"), pool, env); auto zero = pool.alloc_single_element_form( diff --git a/decompiler/IR2/MultiTypeAnalysis.cpp b/decompiler/IR2/MultiTypeAnalysis.cpp index cce3c19113..1bfeb37a36 100644 --- a/decompiler/IR2/MultiTypeAnalysis.cpp +++ b/decompiler/IR2/MultiTypeAnalysis.cpp @@ -26,688 +26,785 @@ namespace decompiler { -//RegisterTypeState* TypeAnalysisGraph::alloc_regstate() { -// node_pool.push_back(std::make_unique()); -// return node_pool.back().get(); -//} -// -//TypeAnalysisGraph make_analysis_graph(const TypeSpec& my_type, -// DecompilerTypeSystem& dts, -// Function& func, -// bool verbose) { -// TypeAnalysisGraph result; -// auto clobber_type = result.alloc_regstate(); -// *clobber_type = RegisterTypeState(PossibleType(TP_Type::make_uninitialized())); -// -// InstrTypeState default_state; -// for (auto& slot : func.ir2.env.stack_spills().map()) { -// default_state.add_stack_slot(slot.first); -// } -// -// // approximate the size and complain if it's huge. -// int state_size = -// sizeof(InstrTypeState) + default_state.stack_slot_count() * (sizeof(RegisterNode) + 4); -// -// result.block_start_types.resize(func.basic_blocks.size()); -// result.after_op_types.resize(func.ir2.atomic_ops->ops.size()); -// -// int instr_count = result.block_start_types.size() + result.after_op_types.size(); -// int ref_size_kb = (instr_count * state_size) / 1024; -// -// if (verbose) { -// if (ref_size_kb > 500) { -// lg::info( -// "Func {} has {} instr states, each {} bytes, for a total of {} kb in just references.", -// func.guessed_name.to_string(), instr_count, state_size, ref_size_kb); -// } -// } -// -// result.topo_sort = func.bb_topo_sort(); -// if (verbose) { -// if (result.topo_sort.vist_order.size() > 100) { -// lg::info("Func {} has {} basic blocks.", func.guessed_name.to_string(), -// result.topo_sort.vist_order.size()); -// } -// } -// -// // set up the initial state: -// int allocation_count = 0; -// int uid = 1; -// -// auto& initial_state = result.block_start_types.at(0); -// for (auto& r : initial_state.regs()) { -// // okay to leave these as uninitialized - the function setup stuff will take care of this. -// r.set_alloc(result.alloc_regstate()); -// r.set_uid(uid++); -// allocation_count++; -// } -// for (auto& s : initial_state.slots()) { -// s.second.set_alloc(result.alloc_regstate()); -// s.second.set_uid(uid++); -// allocation_count++; -// } -// -// // do allocations -// auto& aop = func.ir2.atomic_ops; -// bool run_again = true; -// int iterations = 0; -// while (run_again) { -// iterations++; -// run_again = false; -// // do each block in the topological sort order: -// for (auto block_id : result.topo_sort.vist_order) { -// auto& block = func.basic_blocks.at(block_id); -// auto* init_types = &result.block_start_types.at(block_id); -// for (int op_id = aop->block_id_to_first_atomic_op.at(block_id); -// op_id < aop->block_id_to_end_atomic_op.at(block_id); op_id++) { -// AtomicOp* op = aop->ops.at(op_id).get(); -// -// result.after_op_types.at(op_id) = *init_types; -// -// // todo write stack slots. -// auto* op_as_stack_store = dynamic_cast(op); -// if (op_as_stack_store) { -// auto& state = init_types->get_slot(op_as_stack_store->offset()); -// if (!state.alloc()) { -// allocation_count++; -// run_again = true; -// state.set_alloc(result.alloc_regstate()); -// } -// state.set_uid(uid++); -// } -// -// for (const auto& reg : op->write_regs()) { -// if (reg.reg_id() >= Reg::MAX_VAR_REG_ID) { -// continue; -// } -// auto& state = init_types->get(reg); -// if (!state.alloc()) { -// allocation_count++; -// run_again = true; -// state.set_alloc(result.alloc_regstate()); -// } -// state.set_uid(uid++); -// } -// -// for (const auto& reg : op->clobber_regs()) { -// if (reg.reg_id() >= Reg::MAX_VAR_REG_ID) { -// continue; -// } -// auto& c = init_types->get(reg); -// c.set_alloc(clobber_type); -// c.set_uid(uid++); -// } -// -// // for the next op... -// init_types = &result.after_op_types.at(op_id); -// } -// -// // propagate the types: for each possible succ -// for (auto succ_block_id : {block.succ_ft, block.succ_branch}) { -// if (succ_block_id != -1) { -// // set types to LCA (current, new) -// auto& succ_types = result.block_start_types.at(succ_block_id); -// for (size_t i = 0; i < succ_types.regs().size(); i++) { -// auto& succ = succ_types.regs()[i]; -// auto& end = init_types->regs()[i]; -// -// if (succ.uid() == -1) { -// succ.set_uid(end.uid()); -// } else { -// if (succ.uid() == end.uid()) { -// // nice!! -// // lg::info("Saved allocation"); -// } else { -// succ.set_uid(uid++); -// if (!succ.alloc()) { -// run_again = true; -// succ.set_alloc(result.alloc_regstate()); -// allocation_count++; -// } -// } -// } -// } -// } -// } -// } -// } -// -// int allocations_size_kb = (allocation_count * sizeof(RegisterTypeState)) / 1024; -// int total_size_new_method_kb = allocations_size_kb + ref_size_kb; -// int total_size_old_method_kb = (instr_count * 64 * sizeof(RegisterTypeState)) / 1024; -// -// if (total_size_old_method_kb > 1000) { -// lg::info("Function {} new {} kb old {} kb, {} allocs", func.guessed_name.to_string(), -// total_size_new_method_kb, total_size_old_method_kb, allocation_count); -// } -// -// return result; -//} +RegisterTypeState* TypeAnalysisGraph::alloc_regstate() { + node_pool.push_back(std::make_unique()); + return node_pool.back().get(); +} -// using RegState = CopyOnWrite; -// -// bool DerefHint::matches(const FieldReverseLookupOutput& value) const { -// if (value.tokens.size() != tokens.size()) { -// return false; -// } -// -// for (size_t i = 0; i < value.tokens.size(); i++) { -// if (!tokens[i].matches(value.tokens[i])) { -// return false; -// } -// } -// -// return true; -// } -// -// bool DerefHint::Token::matches(const FieldReverseLookupOutput::Token& other) const { -// switch (kind) { -// case Kind::INTEGER: -// return other.kind == FieldReverseLookupOutput::Token::Kind::CONSTANT_IDX && -// other.idx == integer; -// case Kind::FIELD: -// return other.kind == FieldReverseLookupOutput::Token::Kind::FIELD && other.name == name; -// case Kind::VAR: -// return other.kind == FieldReverseLookupOutput::Token::Kind::VAR_IDX; -// default: -// assert(false); -// } -// } -// -///*! -// * Safely access the decision referenced by this TypeDecisionParent. -// * This will work even if the actual RegisterTypeState has been modified since the reference was -// * created. -// */ -// const PossibleType& TypeDecisionParent::get() const { -// return instruction->get_const(reg).possible_types.at(type_index); -//} -// -// PossibleType& TypeDecisionParent::get() { -// return instruction->get(reg).mut()->possible_types.at(type_index); -//} -// -///*! -// * Figure out if this has been eliminated or not. Caches the result to avoid looking it up again -// and -// * again. Elimination cannot be undone. -// */ -// bool PossibleType::is_valid() const { -// if (!m_valid_cache) { -// return false; -// } -// -// if (child_count == 0) { -// m_valid_cache = false; -// return false; -// } -// -// if (parent.instruction) { -// // we have a parent in the tree, check if that parent is eliminated. -// if (!parent.get().is_valid()) { -// m_valid_cache = false; -// return false; -// } -// } -// -// return true; -//} -// -// void PossibleType::eliminate() { -// assert(is_valid()); -// if (parent.instruction) { -// auto& par = parent.get(); -// par.child_count--; -// assert(par.child_count >= 0); -// if (!par.child_count) { -// par.eliminate(); -// } -// } -//} -// -///*! -// * If we have multiple types, pick the one with the highest deref path score. -// * If warnings is set, and we have to throw away a valid type, prints a warning that we made a -// * somewhat arbitrary decision to throw a possible type. -// * -// * After calling this, you can use get_single_tp_type and get_single_type_decision. -// */ -// void RegisterTypeState::reduce_to_single_type(DecompWarnings* warnings, -// int op_idx, -// const DerefHint* hint) { -// double best_score = -std::numeric_limits::infinity(); -// int best_idx = -1; -// bool printed_first_warning = false; -// std::string warning_string; -// -// // find the highest score that's valid. -// for (int i = 0; i < (int)possible_types.size(); i++) { -// if (possible_types[i].deref_score > best_score && possible_types[i].is_valid()) { -// best_idx = i; -// best_score = possible_types[i].deref_score; -// } -// -// // if we match the hint, just use that. -// if (possible_types[i].deref_path && hint->matches(*possible_types[i].deref_path)) { -// best_idx = i; -// warnings = nullptr; // never warn if we take the hint -// break; -// } -// } -// assert(best_idx != -1); -// -// // eliminate stuff that isn't the best. -// for (int i = 0; i < (int)possible_types.size(); i++) { -// if (i != best_idx) { -// // warn if we eliminate something that is possibly valid. -// if (warnings && possible_types[i].is_valid()) { -// if (!printed_first_warning) { -// warning_string += fmt::format("Ambiguous type selection at op {}\n", op_idx); -// printed_first_warning = true; -// } -// if (possible_types[best_idx].deref_path) { -// warning_string += fmt::format(" {}\n", possible_types[best_idx].deref_path->print()); -// } else { -// warning_string += fmt::format(" {}\n", possible_types[best_idx].type.print()); -// } -// } -// -// possible_types[i].eliminate(); -// } -// } -// -// // cache the winner -// single_type_cache = best_idx; -// -// if (warnings && printed_first_warning) { -// warnings->general_warning(warning_string); -// } -//} -// -///*! -// * After this has been pruned to a single type, gets that type decision. -// */ -// const PossibleType& RegisterTypeState::get_single_type_decision() const { -// assert(single_type_cache.has_value()); -// assert(possible_types.at(*single_type_cache).is_valid()); // todo remove. -// return possible_types[*single_type_cache]; -//} -// -///*! -// * After this has been pruned to a single type, gets it as a TP_Type. -// */ -// const TP_Type& RegisterTypeState::get_single_tp_type() const { -// return get_single_type_decision().type; -//} -// -///*! -// * If there is at least one possibility to get a desired_type, removes anything that's not a -// * desired_type. If it's not possible to get a desired type, does nothing. -// */ -// bool RegisterTypeState::try_elimination(const TypeSpec& desired_types, const TypeSystem& ts) { -// std::vector to_eliminate; -// int keep_count = 0; -// for (int i = 0; i < (int)possible_types.size(); i++) { -// const auto& possibility = possible_types[i]; -// if (possibility.is_valid()) { -// if (ts.tc(desired_types, possibility.type.typespec())) { -// keep_count++; -// } else { -// to_eliminate.push_back(i); -// } -// } -// } -// -// if (keep_count > 0) { -// for (auto idx : to_eliminate) { -// possible_types.at(idx).eliminate(); -// } -// return true; -// } -// return false; -//} -// -// bool RegisterTypeState::can_eliminate_to_get(const TypeSpec& desired_types, -// const TypeSystem& ts) const { -// for (int i = 0; i < (int)possible_types.size(); i++) { -// const auto& possibility = possible_types[i]; -// if (possibility.is_valid()) { -// if (ts.tc(desired_types, possibility.type.typespec())) { -// return true; -// } -// } -// } -// return false; -//} -// -// void InstrTypeState::inherit(InstrTypeState& prev) { -// for (size_t i = 0; i < m_regs.size(); i++) { -// -// } -//} -// -// namespace { -// -///*! -// * Create a register type state with no parent and the given typespec. -// */ -// RegState make_typespec_parent_regstate(const TypeSpec& typespec) { -// auto result = make_cow(TP_Type::make_from_ts(typespec)); -// result.mut()->reduce_to_single_type(nullptr, -1, nullptr); -// return result; -//} -// -///*! -// * Create a register type state with no parent and the given typespec. -// */ -// RegState make_typespec_parent_regstate(const TP_Type& typespec) { -// auto result = make_cow(typespec); -// result.mut()->reduce_to_single_type(nullptr, -1, nullptr); -// return result; -//} -// -///*! -// * Create an instruction type state for the first instruction of a function. -// */ -// InstrTypeState construct_initial_typestate(const TypeSpec& function_type, -// const TypeSpec& behavior_type, -// const Env& env, -// const RegState& uninitialized) { -// // start with everything uninitialized -// InstrTypeState result(uninitialized); -// assert(function_type.base_type() == "function"); -// assert(function_type.arg_count() >= 1); // must know the function type. -// assert(function_type.arg_count() <= 8 + 1); // 8 args + 1 return. -// -// for (int i = 0; i < int(function_type.arg_count()) - 1; i++) { -// auto reg_id = Register::get_arg_reg(i); -// const auto& reg_type = function_type.get_arg(i); -// result.get(reg_id) = make_typespec_parent_regstate(reg_type); -// } -// -// if (behavior_type != TypeSpec("none")) { -// result.get(Register(Reg::GPR, Reg::S6)) = make_typespec_parent_regstate(behavior_type); -// } -// -// // set stack slots as uninitialized too. -// for (auto slot_info : env.stack_spills().map()) { -// result.add_stack_slot(slot_info.first, uninitialized); -// } -// -// return result; -//} -// -///*! -// * Modify the state to include user cases. Will prune as needed. -// * If we can't make it with pruning, modify. -// */ -// InstrTypeState get_input_types_with_user_casts( -// const std::vector* user_casts, -// const std::unordered_map* stack_casts, -// InstrTypeState& state, -// const DecompilerTypeSystem& dts) { -// // we parse a string from a JSON config file here, so do this in a try/catch -// try { -// // first, see if pruning can help us get closer... -// if (user_casts) { -// for (const auto& cast : *user_casts) { -// TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); -// // first, let's see if we can just prune the tree: -// // TODO: maybe there should be an option to avoid this? -// if (state.get_const(cast.reg).can_eliminate_to_get(type_from_cast, dts.ts)) { -// // we can! Just prune. This modifies the input, which is what we want. -// bool success = state.get(cast.reg).mut()->try_elimination(type_from_cast, dts.ts); -// assert(success); -// } -// } -// } -// -// if (stack_casts) { -// for (const auto& [offset, cast] : *stack_casts) { -// auto stack_state = state.get_stack_slot_const(offset); -// if (!stack_state) { -// throw std::runtime_error(fmt::format( -// "Got a stack cast at offset {}, but didn't find a variable there.", offset)); -// } -// TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); -// if (stack_state->can_eliminate_to_get(type_from_cast, dts.ts)) { -// bool success = -// state.get_stack_slot(offset)->mut()->try_elimination(type_from_cast, dts.ts); -// assert(success); -// } -// } -// } -// -// // now we need to make modifications: -// InstrTypeState result = state; -// -// if (user_casts) { -// for (const auto& cast : *user_casts) { -// TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); -// -// if (!state.get_const(cast.reg).can_eliminate_to_get(type_from_cast, dts.ts)) { -// // nope we can't make it work. -// // need to make a change here. It's fine to lose our decision history here because -// // we showed that there is no way to get what the user wants by pruning. -// result.get(cast.reg) = make_typespec_parent_regstate(type_from_cast); -// } -// } -// } -// -// if (stack_casts) { -// for (const auto& [offset, cast] : *stack_casts) { -// auto stack_state = state.get_stack_slot_const(offset); -// assert(stack_state); -// TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); -// if (!stack_state->can_eliminate_to_get(type_from_cast, dts.ts)) { -// *result.get_stack_slot(offset) = make_typespec_parent_regstate(type_from_cast); -// } -// } -// } -// -// return result; -// -// } catch (std::exception& e) { -// lg::die("Failed to parse type cast hint: {}\n", e.what()); -// throw; -// } -//} -// -// -// -// void simplify_to_single(int idx, DecompWarnings* warnings, DerefHint* hint, InstrTypeState& -// state) { -// for (auto& reg : state.reg_array()) { -// reg.mut()->reduce_to_single_type(warnings, idx, hint); -// } -// -// for (auto& stack : state.stack_slots()) { -// stack.second.mut()->reduce_to_single_type(warnings, idx, hint); -// } -//} -// -///*! -// * Set combined to lca(combined, add) and do single simplification. -// */ -// bool multi_lca(InstrTypeState& combined, -// InstrTypeState& add, -// int pred_idx, -// int succ_idx, -// DecompWarnings* warnings, -// DecompilerTypeSystem& dts) { -// bool result = false; -// // first, simplify add: -// simplify_to_single(pred_idx, warnings, nullptr, add); -// -// for (size_t idx = 0; idx < add.reg_array().size(); idx++) { -// bool diff = false; -// auto new_type = dts.tp_lca(combined.reg_array()[idx]->get_single_tp_type(), -// add.reg_array()[idx]->get_single_tp_type(), &diff); -// if (diff) { -// result = true; -// combined.reg_array()[idx] = make_typespec_parent_regstate(new_type); -// } -// } -//} -// -// -// -//} // namespace -// -// bool run_multi_type_analysis(const TypeSpec& my_type, DecompilerTypeSystem& dts, Function& func) -// { -// // STEP 0 - set decompiler type system settings for this function. these should be cleaned up -// // eventually... -// if (func.guessed_name.kind == FunctionName::FunctionKind::METHOD) { -// dts.type_prop_settings.current_method_type = func.guessed_name.type_name; -// } -// -// // set up none-returning function junk. -// if (my_type.last_arg() == TypeSpec("none")) { -// auto as_end = dynamic_cast(func.ir2.atomic_ops->ops.back().get()); -// assert(as_end); -// as_end->mark_function_as_no_return_value(); -// } -// -// std::vector block_init_types, op_types; -// block_init_types.resize(func.basic_blocks.size()); -// op_types.resize(func.ir2.atomic_ops->ops.size()); -// auto& aop = func.ir2.atomic_ops; -// -// // STEP 1 - topological sort the blocks. This gives us an order where we: -// // - never visit unreachable blocks (we can't type propagate these) -// // - always visit at least one predecessor of a block before that block -// auto order = func.bb_topo_sort(); -// assert(!order.vist_order.empty()); -// assert(order.vist_order.front() == 0); -// -// // STEP 2 - initialize type state for the first block to the function argument types. -// auto uninitialized = make_cow(PossibleType(TP_Type::make_uninitialized())); -// // TODO: behavior types. -// block_init_types.at(0) = -// construct_initial_typestate(my_type, TypeSpec("process"), func.ir2.env, uninitialized); -// -// // STEP 3 - propagate types until the result stops changing -// bool run_again = true; -// while (run_again) { -// run_again = false; -// // do each block in the topological sort order: -// for (auto block_id : order.vist_order) { -// auto& block = func.basic_blocks.at(block_id); -// // pointer to the types (no user casts) before the op. -// auto* preceding_types = &block_init_types.at(block_id); -// -// // ops in block, in order -// for (int op_id = aop->block_id_to_first_atomic_op.at(block_id); -// op_id < aop->block_id_to_end_atomic_op.at(block_id); op_id++) { -// auto& op = aop->ops.at(op_id); -// // look for hints: -// const std::vector* user_casts = nullptr; -// const std::unordered_map* stack_casts = nullptr; -// const auto& cast_it = func.ir2.env.casts().find(op_id); -// if (cast_it != func.ir2.env.casts().end()) { -// user_casts = &cast_it->second; -// } -// -// if (!func.ir2.env.stack_casts().empty()) { -// stack_casts = &func.ir2.env.stack_casts(); -// } -// -// try { -// std::vector>> reg_updates; -// std::vector>> stack_updates; -// if (stack_casts || user_casts) { -// auto casted = -// get_input_types_with_user_casts(user_casts, stack_casts, *preceding_types, dts); -// op->multi_types(casted, preceding_types, ®_updates, &stack_updates); -// } else { -// op->multi_types(*preceding_types, preceding_types, ®_updates, &stack_updates); -// } -// auto& dest = op_types.at(op_id); -// dest = *preceding_types; -// for (auto& update : reg_updates) { -// dest.get(update.first) = update.second; -// } -// for (auto& update : stack_updates) { -// *dest.get_stack_slot(update.first) = update.second; -// } -// } catch (std::runtime_error& e) { -// lg::warn("Function {} failed type prop at op {}: {}", func.guessed_name.to_string(), -// op_id, e.what()); -// func.warnings.type_prop_warning("{}", e.what()); -// // func.ir2.env.set_types(block_init_types, op_types, *func.ir2.atomic_ops, my_type); -// return false; -// } -// -// // for the next op... -// preceding_types = &op_types.at(op_id); -// } -// -// // propagate the types: for each possible succ -// for (auto succ_block_id : {block.succ_ft, block.succ_branch}) { -// if (succ_block_id != -1) { -// // set types to LCA (current, new) -// if (dts.tp_lca(&block_init_types.at(succ_block_id), *preceding_types)) { -// // if something changed, run again! -// run_again = true; -// } -// } -// } -// } -// } -// -// auto last_type = op_types.back().get(Register(Reg::GPR, Reg::V0)).typespec(); -// if (last_type != my_type.last_arg()) { -// func.warnings.info("Return type mismatch {} vs {}.", last_type.print(), -// my_type.last_arg().print()); -// } -// -// // and apply final casts: -// for (auto block_id : order.vist_order) { -// for (int op_id = aop->block_id_to_first_atomic_op.at(block_id); -// op_id < aop->block_id_to_end_atomic_op.at(block_id); op_id++) { -// if (op_id == aop->block_id_to_first_atomic_op.at(block_id)) { -// try_modify_input_types_for_casts(op_id, func.ir2.env.casts(), func.ir2.env.stack_casts(), -// &block_init_types.at(block_id), nullptr, dts); -// } else { -// try_modify_input_types_for_casts(op_id, func.ir2.env.casts(), func.ir2.env.stack_casts(), -// &op_types.at(op_id - 1), nullptr, dts); -// } -// } -// } -// -// // figure out the types of stack spill variables: -// auto& env = func.ir2.env; -// bool changed; -// for (auto& type_info : op_types) { -// for (auto& spill : type_info.spill_slots) { -// auto& slot_info = env.stack_slot_entries[spill.first]; -// slot_info.tp_type = -// dts.tp_lca(env.stack_slot_entries[spill.first].tp_type, spill.second, &changed); -// slot_info.offset = spill.first; -// } -// } -// -// for (auto& type_info : block_init_types) { -// for (auto& spill : type_info.spill_slots) { -// auto& slot_info = env.stack_slot_entries[spill.first]; -// slot_info.tp_type = -// dts.tp_lca(env.stack_slot_entries[spill.first].tp_type, spill.second, &changed); -// slot_info.offset = spill.first; -// } -// } -// -// // convert to typespec -// for (auto& info : env.stack_slot_entries) { -// info.second.typespec = info.second.tp_type.typespec(); -// // debug -// // fmt::print("STACK {} : {} ({})\n", info.first, info.second.typespec.print(), -// // info.second.tp_type.print()); -// } -// -// func.ir2.env.set_types(block_init_types, op_types, *func.ir2.atomic_ops, my_type); -// -// return true; -//} +std::shared_ptr allocate_analysis_graph(const TypeSpec& my_type, + DecompilerTypeSystem& dts, + Function& func, + bool verbose) { + auto result = std::make_unique(); + auto clobber_type = result->alloc_regstate(); + *clobber_type = RegisterTypeState(PossibleType(TP_Type::make_uninitialized())); + + InstrTypeState default_state; + for (auto& slot : func.ir2.env.stack_spills().map()) { + default_state.add_stack_slot(slot.first); + } + + // approximate the size and complain if it's huge. + int state_size = + sizeof(InstrTypeState) + default_state.stack_slot_count() * (sizeof(RegisterNode) + 4); + + result->block_start_types.resize(func.basic_blocks.size(), default_state); + result->after_op_types.resize(func.ir2.atomic_ops->ops.size(), default_state); + + int instr_count = result->block_start_types.size() + result->after_op_types.size(); + int ref_size_kb = (instr_count * state_size) / 1024; + + if (verbose) { + if (ref_size_kb > 500) { + lg::info( + "Func {} has {} instr states, each {} bytes, for a total of {} kb in just references.", + func.guessed_name.to_string(), instr_count, state_size, ref_size_kb); + } + } + + result->topo_sort = func.bb_topo_sort(); + if (verbose) { + if (result->topo_sort.vist_order.size() > 100) { + lg::info("Func {} has {} basic blocks.", func.guessed_name.to_string(), + result->topo_sort.vist_order.size()); + } + } + + // set up the initial state: + int allocation_count = 0; + int uid = 1; + + auto& initial_state = result->block_start_types.at(0); + for (auto& r : initial_state.regs()) { + // okay to leave these as uninitialized - the function setup stuff will take care of this. + r.set_alloc(result->alloc_regstate()); + r.set_uid(uid++); + allocation_count++; + } + for (auto& s : initial_state.slots()) { + s.second.set_alloc(result->alloc_regstate()); + s.second.set_uid(uid++); + allocation_count++; + } + + // do allocations + auto& aop = func.ir2.atomic_ops; + bool run_again = true; + int iterations = 0; + + auto alloc_and_write_here = [&](RegisterNode& node) { + if (!node.alloc()) { + allocation_count++; + run_again = true; + node.set_alloc(result->alloc_regstate()); + } + node.set_uid(uid++); + }; + + while (run_again) { + iterations++; + run_again = false; + // do each block in the topological sort order: + for (auto block_id : result->topo_sort.vist_order) { + auto& block = func.basic_blocks.at(block_id); + auto* init_types = &result->block_start_types.at(block_id); + for (int op_id = aop->block_id_to_first_atomic_op.at(block_id); + op_id < aop->block_id_to_end_atomic_op.at(block_id); op_id++) { + AtomicOp* op = aop->ops.at(op_id).get(); + + result->after_op_types.at(op_id) = *init_types; + auto& after_types = result->after_op_types.at(op_id); + + // if we're a stack store, alloc a node for our write to the stack + auto* op_as_stack_store = dynamic_cast(op); + if (op_as_stack_store) { + alloc_and_write_here(after_types.get_slot(op_as_stack_store->offset())); + } + + // if we're a register write, alloc nodes for our writes + for (const auto& reg : op->write_regs()) { + if (reg.reg_id() >= Reg::MAX_VAR_REG_ID) { + continue; + } + alloc_and_write_here(after_types.get(reg)); + } + + // trick for clobbers. + for (const auto& reg : op->clobber_regs()) { + if (reg.reg_id() >= Reg::MAX_VAR_REG_ID) { + continue; + } + auto& c = after_types.get(reg); + c.set_clobber(clobber_type); + c.set_uid(uid++); + } + + // for the next op... + init_types = &after_types; + } + + // propagate the types: for each possible succ + for (auto succ_block_id : {block.succ_ft, block.succ_branch}) { + if (succ_block_id != -1) { + // set types to LCA (current, new) + auto& succ_types = result->block_start_types.at(succ_block_id); + for (size_t i = 0; i < succ_types.regs().size(); i++) { + auto& succ = succ_types.regs()[i]; + auto& end = init_types->regs()[i]; + + if (succ.uid() == -1) { + succ.set_uid(end.uid()); + } else { + if (succ.uid() == end.uid()) { + // nice!! + // lg::info("Saved allocation"); + } else { + succ.set_uid(uid++); + if (!succ.alloc()) { + run_again = true; + succ.set_alloc(result->alloc_regstate()); + allocation_count++; + } + } + } + } + + for (size_t i = 0; i < succ_types.slots().size(); i++) { + auto& succ = succ_types.slots()[i]; + auto& end = init_types->slots().at(i); + + if (succ.second.uid() == -1) { + succ.second.set_uid(end.second.uid()); + } else { + if (succ.second.uid() == end.second.uid()) { + // nice!! + // lg::info("Saved allocation"); + } else { + succ.second.set_uid(uid++); + if (!succ.second.alloc()) { + run_again = true; + succ.second.set_alloc(result->alloc_regstate()); + allocation_count++; + } + } + } + } + } + } + } + } + + int allocations_size_kb = (allocation_count * sizeof(RegisterTypeState)) / 1024; + int total_size_new_method_kb = allocations_size_kb + ref_size_kb; + int total_size_old_method_kb = (instr_count * 64 * sizeof(RegisterTypeState)) / 1024; + + if (total_size_old_method_kb > 1000) { + lg::info("Function {} new {} kb old {} kb, {} allocs", func.guessed_name.to_string(), + total_size_new_method_kb, total_size_old_method_kb, allocation_count); + } + + return result; +} + +bool DerefHint::matches(const FieldReverseLookupOutput& value) const { + if (value.tokens.size() != tokens.size()) { + return false; + } + + for (size_t i = 0; i < value.tokens.size(); i++) { + if (!tokens[i].matches(value.tokens[i])) { + return false; + } + } + + return true; +} + +bool DerefHint::Token::matches(const FieldReverseLookupOutput::Token& other) const { + switch (kind) { + case Kind::INTEGER: + return other.kind == FieldReverseLookupOutput::Token::Kind::CONSTANT_IDX && + other.idx == integer; + case Kind::FIELD: + return other.kind == FieldReverseLookupOutput::Token::Kind::FIELD && other.name == name; + case Kind::VAR: + return other.kind == FieldReverseLookupOutput::Token::Kind::VAR_IDX; + default: + assert(false); + } +} + +const PossibleType& TypeChoiceParent::get() const { + return reg_type->possible_types.at(idx_in_parent); +} + +PossibleType& TypeChoiceParent::get() { + return reg_type->possible_types.at(idx_in_parent); +} + +void TypeChoiceParent::remove_ref() { + assert(get().child_count > 0); + get().child_count--; + if (get().child_count == 0) { + get().eliminate(); + } +} + +/*! + * Have we been eliminated or not? + */ +bool PossibleType::is_valid() const { + if (!m_valid_cache) { + // either explicitly eliminated, or we cached this from last time. + return false; + } + + if (parent.reg_type) { + if (!parent.get().is_valid()) { + // some parent is eliminated, so are we. cache it. + m_valid_cache = false; + return false; + } + } + + return true; +} + +void PossibleType::eliminate() { + assert(is_valid()); // todo remove + // make us invalid + m_valid_cache = false; + if (parent.reg_type) { + parent.remove_ref(); + } +} + +/*! + * If we have multiple types, pick the one with the highest deref path score. + * If warnings is set, and we have to throw away a valid type, prints a warning that we made a + * somewhat arbitrary decision to throw a possible type. + * + * After calling this, you can use get_single_tp_type and get_single_type_decision. + */ +void RegisterTypeState::reduce_to_single_best_type(DecompWarnings* warnings, + int op_idx, + const DerefHint* hint) { + if (is_temp_node) { + assert(single_type_cache); + return; + } + double best_score = -std::numeric_limits::infinity(); + int best_idx = -1; + bool printed_first_warning = false; + std::string warning_string; + + // find the highest score that's valid. + for (int i = 0; i < (int)possible_types.size(); i++) { + if (possible_types[i].score > best_score && possible_types[i].is_valid()) { + best_idx = i; + best_score = possible_types[i].score; + } + + // if we match the hint, just use that. + if (hint && possible_types[i].deref_path && hint->matches(*possible_types[i].deref_path)) { + best_idx = i; + warnings = nullptr; // never warn if we take the hint + break; + } + } + assert(best_idx != -1); + + // eliminate stuff that isn't the best. + for (int i = 0; i < (int)possible_types.size(); i++) { + if (i != best_idx) { + // warn if we eliminate something that is possibly valid. + if (warnings && possible_types[i].is_valid()) { + if (!printed_first_warning) { + warning_string += fmt::format("Ambiguous type selection at op {}\n", op_idx); + printed_first_warning = true; + } + if (possible_types[best_idx].deref_path) { + warning_string += fmt::format(" {}\n", possible_types[best_idx].deref_path->print()); + } else { + warning_string += fmt::format(" {}\n", possible_types[best_idx].type.print()); + } + } + + possible_types[i].eliminate(); + } + } + + // cache the winner + single_type_cache = best_idx; + + if (warnings && printed_first_warning) { + warnings->general_warning(warning_string); + } +} + +/*! + * After this has been pruned to a single type, gets that type decision. + */ +const PossibleType& RegisterTypeState::get_single_type_decision() const { + assert(single_type_cache.has_value()); + assert(possible_types.at(*single_type_cache).is_valid()); // todo remove. + return possible_types[*single_type_cache]; +} + +/*! + * After this has been pruned to a single type, gets it as a TP_Type. + */ +const TP_Type& RegisterTypeState::get_single_tp_type() const { + return get_single_type_decision().type; +} + +/*! + * If there is at least one possibility to get a desired_type, removes anything that's not a + * desired_type. If it's not possible to get a desired type, does nothing. + */ +bool RegisterTypeState::try_elimination(const TypeSpec& desired_types, const TypeSystem& ts) { + std::vector to_eliminate; + int keep_count = 0; + for (int i = 0; i < (int)possible_types.size(); i++) { + const auto& possibility = possible_types[i]; + if (possibility.is_valid()) { + if (ts.tc(desired_types, possibility.type.typespec())) { + keep_count++; + } else { + to_eliminate.push_back(i); + } + } + } + + if (keep_count > 0) { + for (auto idx : to_eliminate) { + possible_types.at(idx).eliminate(); + } + return true; + } + return false; +} + +bool RegisterTypeState::can_eliminate_to_get(const TypeSpec& desired_types, + const TypeSystem& ts) const { + for (int i = 0; i < (int)possible_types.size(); i++) { + const auto& possibility = possible_types[i]; + if (possibility.is_valid()) { + if (ts.tc(desired_types, possibility.type.typespec())) { + return true; + } + } + } + return false; +} + +void InstrTypeState::assign(const Register& reg, const RegisterTypeState& value) { + auto& node = get(reg); + assert(node.is_alloc_point()); + *node.ptr() = value; +} + +namespace { + +template +void for_each_regnode(InstrTypeState& state, const T& f) { + for (auto& reg : state.regs()) { + f(reg); + } + + for (auto& slot : state.slots()) { + f(slot.second); + } +} + +template +void for_each_regnode_pair(InstrTypeState& state_a, InstrTypeState& state_b, const T& f) { + for (size_t i = 0; i < state_a.regs().size(); i++) { + f(state_a.regs()[i], state_b.regs()[i]); + } + + assert(state_a.slots().size() == state_b.slots().size()); + for (size_t i = 0; i < state_a.slots().size(); i++) { + auto& a = state_a.slots()[i]; + auto& b = state_b.slots()[i]; + assert(a.first == b.first); + f(a.second, b.second); + } +} + +/*! + * Create a register type state with no parent and the given typespec. + */ +RegisterTypeState make_typespec_parent_regstate(const TypeSpec& typespec) { + return RegisterTypeState(TP_Type::make_from_ts(typespec)); +} + +/*! + * Create a register type state with no parent and the given typespec. + */ +RegisterTypeState make_typespec_parent_regstate(const TP_Type& typespec) { + return RegisterTypeState(typespec); +} + +/*! + * Create an instruction type state for the first instruction of a function. + */ +void construct_initial_typestate(InstrTypeState* result, + const TypeSpec& function_type, + const TypeSpec& behavior_type, + const Env& env, + const RegisterTypeState& uninitialized) { + // start with everything uninitialized + for_each_regnode(*result, [&](RegisterNode& node) { + assert(node.is_alloc_point()); + *node.ptr() = uninitialized; + }); + + assert(function_type.base_type() == "function"); + assert(function_type.arg_count() >= 1); // must know the function type. + assert(function_type.arg_count() <= 8 + 1); // 8 args + 1 return. + + for (int i = 0; i < int(function_type.arg_count()) - 1; i++) { + auto reg_id = Register::get_arg_reg(i); + const auto& reg_type = function_type.get_arg(i); + result->assign(reg_id, make_typespec_parent_regstate(reg_type)); + ; + } + + if (behavior_type != TypeSpec("none")) { + result->assign(Register(Reg::GPR, Reg::S6), make_typespec_parent_regstate(behavior_type)); + } +} + +/*! + * Modify the state to include user cases. Will prune as needed. + * If we can't make it with pruning, modify. + * TODO: I don't know if this temp nodes thing is a good idea or not + */ +InstrTypeState get_input_types_with_user_casts( + const std::vector* user_casts, + const std::unordered_map* stack_casts, + InstrTypeState& state, + const DecompilerTypeSystem& dts, + std::vector>& temp_nodes) { + // we parse a string from a JSON config file here, so do this in a try/catch + try { + // first, see if pruning can help us get closer... + if (user_casts) { + for (const auto& cast : *user_casts) { + TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); + // first, let's see if we can just prune the tree: + // TODO: maybe there should be an option to avoid this? + if (state.get_state(cast.reg).can_eliminate_to_get(type_from_cast, dts.ts)) { + // we can! Just prune. This modifies the input, which is what we want. + bool success = state.get_state(cast.reg).try_elimination(type_from_cast, dts.ts); + assert(success); + } + } + } + + if (stack_casts) { + for (const auto& [offset, cast] : *stack_casts) { + auto stack_state = state.get_slot_state(offset); + + TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); + if (stack_state.can_eliminate_to_get(type_from_cast, dts.ts)) { + bool success = stack_state.try_elimination(type_from_cast, dts.ts); + assert(success); + } + } + } + + // now we need to make modifications: + InstrTypeState result = state; + + auto make_temp = [&]() { + temp_nodes.push_back(std::make_unique()); + auto* result = temp_nodes.back().get(); + result->is_temp_node = true; + return result; + }; + + if (user_casts) { + for (const auto& cast : *user_casts) { + TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); + + if (!state.get_state(cast.reg).can_eliminate_to_get(type_from_cast, dts.ts)) { + // nope we can't make it work. + // need to make a change here. It's fine to lose our decision history here because + // we showed that there is no way to get what the user wants by pruning. + auto temp = make_temp(); + *temp = make_typespec_parent_regstate(type_from_cast); + result.get(cast.reg).set_cast_temp_ptr(temp); + } + } + } + + if (stack_casts) { + for (const auto& [offset, cast] : *stack_casts) { + auto& stack_state = state.get_slot_state(offset); + TypeSpec type_from_cast = dts.parse_type_spec(cast.type_name); + if (!stack_state.can_eliminate_to_get(type_from_cast, dts.ts)) { + auto temp = make_temp(); + *temp = make_typespec_parent_regstate(type_from_cast); + result.get_slot(offset).set_cast_temp_ptr(temp); + } + } + } + + return result; + + } catch (std::exception& e) { + lg::die("Failed to parse type cast hint: {}\n", e.what()); + throw; + } +} + +void simplify_to_single(int idx, DecompWarnings* warnings, DerefHint* hint, InstrTypeState& state) { + for_each_regnode(state, [&](RegisterNode& node) { + node.ptr()->reduce_to_single_best_type(warnings, idx, hint); + }); +} + +/*! + * Set combined to lca(combined, add) and do single simplification. + */ +bool multi_lca(InstrTypeState& combined, + InstrTypeState& add, + int pred_idx, + int succ_idx, + DecompWarnings* warnings, + DecompilerTypeSystem& dts) { + bool result = false; + // first, simplify add: + simplify_to_single(pred_idx, warnings, nullptr, add); + + for_each_regnode_pair(combined, add, [&](RegisterNode& c, const RegisterNode& a) { + assert(c.is_alloc_point()); + + bool diff = false; + auto new_type = dts.tp_lca(c.ptr()->get_single_tp_type(), a.ptr()->get_single_tp_type(), &diff); + if (diff) { + result = true; + // we checked is_alloc_point above. + *c.ptr() = make_typespec_parent_regstate(new_type); + } + }); + return result; +} + +TypeState convert_to_old_format(const InstrTypeState& input) { + TypeState t; + for (int regid = 0; regid < Reg::MAX_VAR_REG_ID; regid++) { + t.get(Register(regid)) = input.regs().at(regid).ptr()->get_single_tp_type(); + } + + for (const auto& [offset, slot] : input.slots()) { + t.get_slot(offset) = slot.ptr()->get_single_tp_type(); + } + return t; +} + +std::vector convert_to_old_format(const std::vector& input) { + std::vector result; + result.reserve(input.size()); + for (auto& x : input) { + result.push_back(convert_to_old_format(x)); + } + return result; +} + + +bool dbg_types = true; +} // namespace + +bool run_multi_type_analysis(const TypeSpec& my_type, + DecompilerTypeSystem& dts, + Function& func, + TypeAnalysisGraph& graph) { + if (dbg_types) { + fmt::print("mtyp {}\n", func.guessed_name.to_string()); + } + // STEP 0 - set decompiler type system settings for this function. these should be cleaned up + // eventually... + if (func.guessed_name.kind == FunctionName::FunctionKind::METHOD) { + dts.type_prop_settings.current_method_type = func.guessed_name.type_name; + } + + // set up none-returning function junk. + if (my_type.last_arg() == TypeSpec("none")) { + auto as_end = dynamic_cast(func.ir2.atomic_ops->ops.back().get()); + assert(as_end); + as_end->mark_function_as_no_return_value(); + } + + auto& block_init_types = graph.block_start_types; + auto& op_types = graph.after_op_types; + auto& aop = func.ir2.atomic_ops; + + // STEP 1 - topological sort the blocks. This gives us an order where we: + // - never visit unreachable blocks (we can't type propagate these) + // - always visit at least one predecessor of a block before that block + const auto& order = graph.topo_sort; + assert(!order.vist_order.empty()); + assert(order.vist_order.front() == 0); + + // STEP 2 - initialize type state for the first block to the function argument types. + auto uninitialized = RegisterTypeState(PossibleType(TP_Type::make_uninitialized())); + construct_initial_typestate(&block_init_types.at(0), my_type, TypeSpec("process"), func.ir2.env, + uninitialized); + + // STEP 3 - propagate types until the result stops changing + bool run_again = true; + while (run_again) { + fmt::print("ITER\n"); + run_again = false; + // do each block in the topological sort order: + for (auto block_id : order.vist_order) { + fmt::print("BLOCK {}\n", block_id); + auto& block = func.basic_blocks.at(block_id); + // pointer to the types (no user casts) before the op. + auto* preceding_types = &block_init_types.at(block_id); + + // ops in block, in order + for (int op_id = aop->block_id_to_first_atomic_op.at(block_id); + op_id < aop->block_id_to_end_atomic_op.at(block_id); op_id++) { + auto& op = aop->ops.at(op_id); + // look for hints: + const std::vector* user_casts = nullptr; + const std::unordered_map* stack_casts = nullptr; + const auto& cast_it = func.ir2.env.casts().find(op_id); + if (cast_it != func.ir2.env.casts().end()) { + user_casts = &cast_it->second; + } + + if (!func.ir2.env.stack_casts().empty()) { + stack_casts = &func.ir2.env.stack_casts(); + } + + try { + auto& dest = op_types.at(op_id); + dest = *preceding_types; + if (stack_casts || user_casts) { + std::vector> temp_nodes; + auto casted = get_input_types_with_user_casts(user_casts, stack_casts, *preceding_types, + dts, temp_nodes); + op->multi_types(&dest, casted, func.ir2.env, dts); + } else { + op->multi_types(&dest, *preceding_types, func.ir2.env, dts); + } + } catch (std::runtime_error& e) { + lg::warn("Function {} failed type prop at op {}: {}", func.guessed_name.to_string(), + op_id, e.what()); + func.warnings.type_prop_warning("{}", e.what()); + // func.ir2.env.set_types(block_init_types, op_types, *func.ir2.atomic_ops, my_type); + return false; + } + + // for the next op... + preceding_types = &op_types.at(op_id); + } + + // propagate the types: for each possible succ + for (auto succ_block_id : {block.succ_ft, block.succ_branch}) { + if (succ_block_id != -1) { + // set types to LCA (current, new) + if (multi_lca(block_init_types.at(succ_block_id), *preceding_types, block_id, + succ_block_id, nullptr, dts)) { + run_again = true; + } + } + } + } + } + + auto last_type = + op_types.back().get(Register(Reg::GPR, Reg::V0)).ptr()->get_single_tp_type().typespec(); + if (last_type != my_type.last_arg()) { + func.warnings.info("Return type mismatch {} vs {}.", last_type.print(), + my_type.last_arg().print()); + } + + // and apply final casts: + for (auto block_id : order.vist_order) { + for (int op_id = aop->block_id_to_first_atomic_op.at(block_id); + op_id < aop->block_id_to_end_atomic_op.at(block_id); op_id++) { + const std::vector* user_casts = nullptr; + const std::unordered_map* stack_casts = nullptr; + const auto& cast_it = func.ir2.env.casts().find(op_id); + if (cast_it != func.ir2.env.casts().end()) { + user_casts = &cast_it->second; + } + + if (!func.ir2.env.stack_casts().empty()) { + stack_casts = &func.ir2.env.stack_casts(); + } + + if (user_casts || stack_casts) { + if (op_id == aop->block_id_to_first_atomic_op.at(block_id)) { + block_init_types.at(block_id) = get_input_types_with_user_casts( + user_casts, stack_casts, block_init_types.at(block_id), dts, graph.final_cast_nodes); + + } else { + op_types.at(op_id - 1) = get_input_types_with_user_casts( + user_casts, stack_casts, op_types.at(op_id - 1), dts, graph.final_cast_nodes); + } + } + } + } + + // figure out the types of stack spill variables: + auto& env = func.ir2.env; + bool changed; + for (auto& type_info : op_types) { + for (auto& spill : type_info.slots()) { + auto& slot_info = env.stack_slot_entries[spill.first]; + slot_info.tp_type = dts.tp_lca(env.stack_slot_entries[spill.first].tp_type, + spill.second.ptr()->get_single_tp_type(), &changed); + slot_info.offset = spill.first; + } + } + + for (auto& type_info : block_init_types) { + for (auto& spill : type_info.slots()) { + auto& slot_info = env.stack_slot_entries[spill.first]; + slot_info.tp_type = dts.tp_lca(env.stack_slot_entries[spill.first].tp_type, + spill.second.ptr()->get_single_tp_type(), &changed); + slot_info.offset = spill.first; + } + } + + // convert to typespec + for (auto& info : env.stack_slot_entries) { + info.second.typespec = info.second.tp_type.typespec(); + // debug + // fmt::print("STACK {} : {} ({})\n", info.first, info.second.typespec.print(), + // info.second.tp_type.print()); + } + + func.ir2.env.set_types(convert_to_old_format(block_init_types), convert_to_old_format(op_types), + *func.ir2.atomic_ops, my_type); + + return true; +} + +void AtomicOp::multi_types(InstrTypeState* output, + InstrTypeState& input, + const Env& env, + DecompilerTypeSystem& dts) { + for (auto& reg : clobber_regs()) { + assert(output->get(reg).is_clobber()); + } + + multi_types_internal(output, input, env, dts); +} + +void AtomicOp::multi_types_internal(InstrTypeState*, + InstrTypeState&, + const Env&, + DecompilerTypeSystem&) { + throw std::runtime_error( + fmt::format("multi_type_internal not yet implemented for {}", typeid(*this).name())); +} } // namespace decompiler \ No newline at end of file diff --git a/decompiler/IR2/MultiTypeAnalysis.h b/decompiler/IR2/MultiTypeAnalysis.h index 62f317324e..97f233ab4e 100644 --- a/decompiler/IR2/MultiTypeAnalysis.h +++ b/decompiler/IR2/MultiTypeAnalysis.h @@ -32,6 +32,9 @@ struct DerefHint { struct TypeChoiceParent { RegisterTypeState* reg_type = nullptr; int idx_in_parent = -1; + const PossibleType& get() const; + PossibleType& get(); + void remove_ref(); }; /*! @@ -41,9 +44,13 @@ struct TypeChoiceParent { */ struct PossibleType { TP_Type type; // the actual type. - std::optional - deref_path; // the field accessed to get here, assuming we did a deref. - double score = 0.; // the sum of scores of all derefs to get here. + + // the field accessed to get here, assuming we did a deref. + std::optional deref_path; + + // the sum of scores of all derefs to get here. + // this can be used to compare us to others in the same RegisterTypeState. + double score = 0.; // if we are a child, 0. // otherwise, the number of children who have a reference to us. @@ -68,12 +75,15 @@ struct RegisterTypeState { std::optional override_type; // if we're simplified to a single type, this will hold in the index in the possible types vector. - // the types we can be. std::vector possible_types; + bool is_temp_node = false; + RegisterTypeState() = default; - RegisterTypeState(const PossibleType& single_type) : possible_types({single_type}) {} + RegisterTypeState(const PossibleType& single_type) : possible_types({single_type}) { + single_type_cache = 0; + } void reduce_to_single_best_type(DecompWarnings* warnings, int op_idx, const DerefHint* hint); bool is_single_type() const; const PossibleType& get_single_type_decision() const; @@ -90,36 +100,37 @@ struct RegisterTypeState { * During setup, this contains a alloc flag and a uid. * While it's running, it contains a pointer. */ -/* -struct RegisterNode { - RegisterTypeState* ptr() { return (RegisterTypeState*)data; } - bool alloc() { return data & 1; } - u64 uid() { return data >> 32; } - void set_alloc() { data |= 1; } - void set_uid(u64 uid) { data |= (uid << 32); } - -private: - uintptr_t data = 0; - static_assert(sizeof(uintptr_t) == 8); -}; - */ struct RegisterNode { RegisterTypeState* ptr() { return m_ptr; } - void set_ptr(RegisterTypeState* ptr) { m_ptr = ptr; } + const RegisterTypeState* ptr() const { return m_ptr; } + void set_cast_temp_ptr(RegisterTypeState* ptr) { + m_ptr = ptr; + m_flags |= FLAG_CAST_TEMP; + } bool alloc() const { return !!m_ptr; } void set_alloc(RegisterTypeState* state) { m_ptr = state; - m_alloc_point = true; + m_flags |= FLAG_ALLOC_POINT; } - bool is_alloc_point() const { return m_alloc_point; } + void set_clobber(RegisterTypeState* state) { + m_ptr = state; + m_flags |= FLAG_CLOBBER; + } + bool is_alloc_point() const { return m_flags & FLAG_ALLOC_POINT; } + bool is_clobber() const { return m_flags & FLAG_CLOBBER; } s64 uid() const { return m_uid; } void set_uid(s64 val) { m_uid = val; } private: RegisterTypeState* m_ptr = nullptr; s32 m_uid = 0; - bool m_alloc_point = false; + + u8 m_flags = 0; + static constexpr u8 FLAG_ALLOC_POINT = 1; + static constexpr u8 FLAG_CLOBBER = 2; + static constexpr u8 FLAG_CAST_TEMP = 4; + static constexpr u8 FLAG_CAST_FINAL = 8; }; class InstrTypeState { @@ -128,9 +139,11 @@ class InstrTypeState { int stack_slot_count() const { return m_stack_slots.size(); } std::array& regs() { return m_regs; } std::vector>& slots() { return m_stack_slots; } + const std::array& regs() const { return m_regs; } + const std::vector>& slots() const { return m_stack_slots; } RegisterNode& get_slot(int offset) { - for(auto& s : m_stack_slots) { + for (auto& s : m_stack_slots) { if (s.first == offset) { return s.second; } @@ -143,6 +156,10 @@ class InstrTypeState { return m_regs[reg.reg_id()]; } + RegisterTypeState& get_state(const Register& reg) { return *get(reg).ptr(); } + RegisterTypeState& get_slot_state(int offset) { return *get_slot(offset).ptr(); } + void assign(const Register& reg, const RegisterTypeState& value); + private: std::array m_regs; std::vector> m_stack_slots; @@ -151,6 +168,7 @@ class InstrTypeState { struct TypeAnalysisGraph { std::vector after_op_types; std::vector block_start_types; + std::vector> final_cast_nodes; BlockTopologicalSort topo_sort; @@ -161,9 +179,13 @@ struct TypeAnalysisGraph { class Function; class DecompilerTypeSystem; -TypeAnalysisGraph make_analysis_graph(const TypeSpec& my_type, - DecompilerTypeSystem& dts, - Function& func, - bool verbose); +std::shared_ptr allocate_analysis_graph(const TypeSpec& my_type, + DecompilerTypeSystem& dts, + Function& func, + bool verbose); +bool run_multi_type_analysis(const TypeSpec& my_type, + DecompilerTypeSystem& dts, + Function& func, + TypeAnalysisGraph& graph); } // namespace decompiler \ No newline at end of file diff --git a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp index 5569afec6d..afd856c4bf 100644 --- a/decompiler/ObjectFile/ObjectFileDB_IR2.cpp +++ b/decompiler/ObjectFile/ObjectFileDB_IR2.cpp @@ -382,7 +382,7 @@ void ObjectFileDB::ir2_type_analysis_pass(const Config& config) { try_lookup(config.stack_structure_hints_by_function, func_name)); // experimental multi-type pass, for debugging. - auto tg = make_analysis_graph(ts, dts, func, true); + auto tg = allocate_analysis_graph(ts, dts, func, true); if (run_type_analysis_ir2(ts, dts, func)) { successful_functions++; @@ -989,7 +989,7 @@ std::string ObjectFileDB::ir2_final_out(ObjectFileData& data, result += ";;-*-Lisp-*-\n"; result += "(in-package goal)\n\n"; assert(data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).size() == 1); - auto top_level = data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).at(0); + auto& top_level = data.linked_data.functions_by_seg.at(TOP_LEVEL_SEGMENT).at(0); result += write_from_top_level(top_level, dts, data.linked_data, skip_functions); result += "\n\n"; return result; diff --git a/test/decompiler/FormRegressionTest.cpp b/test/decompiler/FormRegressionTest.cpp index d2dba22108..459d8e989a 100644 --- a/test/decompiler/FormRegressionTest.cpp +++ b/test/decompiler/FormRegressionTest.cpp @@ -185,8 +185,12 @@ std::unique_ptr FormRegressionTest::make_function( } // analyze types - EXPECT_TRUE(run_type_analysis_ir2(function_type, *dts, test->func)); - test->func.ir2.env.types_succeeded = true; + // EXPECT_TRUE(run_type_analysis_ir2(function_type, *dts, test->func)); + auto tg = allocate_analysis_graph(function_type, *dts, test->func, true); + bool ok = run_multi_type_analysis(function_type, *dts, test->func, *tg); + EXPECT_TRUE(ok); + test->func.ir2.env.set_type_graph(tg); + test->func.ir2.env.types_succeeded = ok; // analyze registers test->func.ir2.env.set_reg_use(analyze_ir2_register_usage(test->func));