d/j3: all-types: guess at associated process for non-virtual states and fix some issues along the way (#3300)

This will make a best effort attempt at guessing which process a
non-virtual state belongs to:
![Screenshot 2024-01-08
195309](https://github.com/open-goal/jak-project/assets/13153231/69132f10-823d-4df5-b2d6-662d4dd754a0)

I also noticed some issues while working on this, mainly around how
virtual states were being output. They were being duplicated, for
example:
![Screenshot 2024-01-08
184733](https://github.com/open-goal/jak-project/assets/13153231/1edb0f1a-3ac7-46cb-96cd-cf93d42fb01f)
or
![Screenshot 2024-01-08
193730](https://github.com/open-goal/jak-project/assets/13153231/45673653-4000-45bb-af00-9baa6e2a70ae)

I think I've fixed that, but @Hat-Kid I defer to you to see if i've done
something terrible.
![Screenshot 2024-01-08
194513](https://github.com/open-goal/jak-project/assets/13153231/75543d2e-69da-4bbd-b143-2f824b9d8dde)
This commit is contained in:
Tyler Wilding
2024-01-09 16:50:05 -05:00
committed by GitHub
parent 9d680a0aba
commit 4cccaf2645
6 changed files with 176 additions and 115 deletions
+60 -7
View File
@@ -10,6 +10,7 @@
#include "common/log/log.h"
#include "common/util/FileUtil.h"
#include "common/util/Timer.h"
#include "common/util/string_util.h"
#include "decompiler/IR2/Form.h"
#include "decompiler/analysis/analyze_inspect_method.h"
@@ -319,7 +320,7 @@ void ObjectFileDB::ir2_top_level_pass(const Config& config) {
void ObjectFileDB::ir2_analyze_all_types(const fs::path& output_file,
const std::optional<std::string>& previous_game_types,
const std::unordered_set<std::string>& bad_types) {
std::vector<PerObjectAllTypeInfo> per_object;
std::unordered_map<std::string, PerObjectAllTypeInfo> per_object;
DecompilerTypeSystem previous_game_ts(GameVersion::Jak2); // version here doesn't matter.
if (previous_game_types) {
@@ -328,16 +329,66 @@ void ObjectFileDB::ir2_analyze_all_types(const fs::path& output_file,
TypeInspectorCache ti_cache;
// Do a first pass to initialize all types and symbols
for_each_obj([&](ObjectFileData& data) {
if (data.obj_version == 3 || (data.obj_version == 5 && data.linked_data.has_any_functions())) {
auto& object_result = per_object.emplace_back();
object_result.object_name = data.to_unique_name();
per_object[data.to_unique_name()] = PerObjectAllTypeInfo();
// Go through the top-level segment first to identify the type names associated with each
// symbol def
for_each_function_in_seg_in_obj(TOP_LEVEL_SEGMENT, data, [&](Function& f) {
inspect_top_level_for_metadata(f, data.linked_data, dts, previous_game_ts, object_result);
inspect_top_level_for_metadata(f, data.linked_data, dts, previous_game_ts,
per_object.at(data.to_unique_name()));
});
}
});
// Guess at non-virtual state type's:
//
// Collect all type names, since the DTS doesn't know the actual type tree (all-types is empty!)
// we can't filter by what is actually a process type (with existing code).
std::unordered_map<std::string, std::vector<std::string>> all_type_names;
for (auto& [obj_name, obj_info] : per_object) {
for (const auto& type_name : obj_info.type_names_in_order) {
if (all_type_names.find(obj_name) == all_type_names.end()) {
all_type_names[obj_name] = {};
}
all_type_names[obj_name].push_back(type_name);
}
}
std::unordered_map<std::string, std::string> state_to_type_map;
for (auto& [obj_name, obj_info] : per_object) {
for (const auto& [sym_name, sym_type] : obj_info.symbol_types) {
if (sym_type == "state") {
int longest_match_length = 0;
std::string longest_match = "";
std::string longest_match_object_name = "";
// Make a best effort guess by finding the longest prefix match
for (const auto& [obj_name, type_names] : all_type_names) {
for (const auto& type_name : type_names) {
if (str_util::starts_with(sym_name, type_name) &&
type_name.length() > longest_match_length) {
longest_match_length = type_name.length();
longest_match = type_name;
longest_match_object_name = obj_name;
}
}
}
if (longest_match != "") {
if (per_object.find(longest_match_object_name) != per_object.end()) {
per_object.at(longest_match_object_name).non_virtual_state_guesses[sym_name] =
longest_match;
obj_info.already_seen_symbols.insert(sym_name);
}
}
}
}
}
// Then another to actually setup the definitions
for_each_obj([&](ObjectFileData& data) {
if (data.obj_version == 3 || (data.obj_version == 5 && data.linked_data.has_any_functions())) {
auto& object_result = per_object.at(data.to_unique_name());
// Handle the top level last, which is fine as all symbol_defs are always written after
// typedefs
@@ -368,12 +419,14 @@ void ObjectFileDB::ir2_analyze_all_types(const fs::path& output_file,
}
});
// Output result
std::string result;
result += ";; All Types\n\n";
for (auto& obj : per_object) {
for (auto& [obj_name, obj] : per_object) {
result += fmt::format(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n");
result += fmt::format(";; {:30s} ;;\n", obj.object_name);
result += fmt::format(";; {:30s} ;;\n", obj_name);
result += fmt::format(";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;\n\n");
for (const auto& type_name : obj.type_names_in_order) {
auto& info = obj.type_info.at(type_name);