Files
jak-project/decompiler/extractor/extractor_util.hpp
T
Tyler Wilding 6446389263 extractor: cleanup, support unicode properly, and add multi-game support (#1609)
* extractor: refactor and cleanup for multi-game support

* deps: switch to `ghc::filesystem` as it is utf-8 everywhere by default

* extractor: finally working with unicode

* unicode: fix unicode cli args on windows in all `main` functions
2022-07-05 20:38:13 -04:00

252 lines
9.0 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#pragma once
#include <optional>
#include <regex>
#include <unordered_map>
#include "common/log/log.h"
#include "common/util/FileUtil.h"
#include <common/util/json_util.h>
#include <common/util/read_iso_file.h>
#include <third-party/json.hpp>
#include "third-party/xxhash.hpp"
enum class ExtractorErrorCode {
SUCCESS = 0,
INVALID_CLI_INPUT = 3990,
VALIDATION_CANT_LOCATE_ELF = 4000,
VALIDATION_SERIAL_MISSING_FROM_DB = 4001,
VALIDATION_ELF_MISSING_FROM_DB = 4002,
VALIDATION_BAD_ISO_CONTENTS = 4010,
VALIDATION_INCORRECT_EXTRACTION_COUNT = 4011,
VALIDATION_FILE_CONTENTS_UNEXPECTED = 4012,
VALIDATION_BAD_EXTRACTION = 4020,
DECOMPILATION_GENERIC_ERROR = 4030,
EXTRACTION_INVALID_ISO_PATH = 4040,
EXTRACTION_ISO_UNEXPECTED_SIZE = 4041,
COMPILATION_BAD_PROJECT_PATH = 4050,
};
enum GameIsoFlags { FLAG_JAK1_BLACK_LABEL = (1 << 0) };
static const std::unordered_map<std::string, GameIsoFlags> sGameIsoFlagNames = {
{"jak1-black-label", FLAG_JAK1_BLACK_LABEL}};
// used for - decompiler_out/<jak1> and iso_data/<jak1>
std::unordered_map<std::string, std::string> data_subfolders = {{"jak1", "jak1"}};
struct ISOMetadata {
std::string canonical_name;
std::string region;
int num_files;
xxh::hash64_t contents_hash;
std::string decomp_config;
std::string game_name;
std::vector<std::string> flags;
};
// This is all we need to re-fetch info from the database
// - if this changes such that we have a collision in the future,
// then the database isn't adequate and everything must change
struct BuildInfo {
std::string serial = "";
xxh::hash64_t elf_hash = 0;
};
void to_json(nlohmann::json& j, const BuildInfo& info) {
j = nlohmann::json{{"serial", info.serial}, {"elf_hash", info.elf_hash}};
}
void from_json(const nlohmann::json& j, BuildInfo& info) {
j[0].at("serial").get_to(info.serial);
j[0].at("elf_hash").get_to(info.elf_hash);
}
std::optional<BuildInfo> get_buildinfo_from_path(fs::path iso_data_path) {
if (!fs::exists(iso_data_path / "buildinfo.json")) {
return {};
}
auto buildinfo_path = (iso_data_path / "buildinfo.json").string();
try {
return parse_commented_json(file_util::read_text_file(buildinfo_path), buildinfo_path)
.get<BuildInfo>();
} catch (std::exception& e) {
lg::error("JSON parsing error on buildinfo.json - {}", e.what());
return {};
}
}
static const ISOMetadata jak1_ntsc_black_label_info = {
"Jak & Daxter™: The Precursor Legacy (Black Label)",
"NTSC-U",
337,
11363853835861842434U,
"jak1_ntsc_black_label",
"jak1",
{"jak1-black-label"}};
// { SERIAL : { ELF_HASH : ISOMetadataDatabase } }
static const std::unordered_map<std::string, std::unordered_map<xxh::hash64_t, ISOMetadata>>
isoDatabase{{"SCUS-97124",
{{7280758013604870207U, jak1_ntsc_black_label_info},
{744661860962747854,
{"Jak & Daxter™: The Precursor Legacy",
"NTSC-U",
338,
8538304367812415885U,
"jak1_jp",
"jak1",
{}}}}},
{"SCES-50361",
{{12150718117852276522U,
{"Jak & Daxter™: The Precursor Legacy",
"PAL",
338,
16850370297611763875U,
"jak1_pal",
"jak1",
{}}}}},
{"SCPS-15021",
{{16909372048085114219U,
{"ジャックXダクスター  旧世界の遺産",
"NTSC-J",
338,
1262350561338887717,
"jak1_jp",
"jak1",
{}}}}}};
std::optional<ISOMetadata> get_version_info_from_build_info(const BuildInfo& build_info) {
if (build_info.serial.empty() || build_info.elf_hash == 0) {
return {};
}
auto dbEntry = isoDatabase.find(build_info.serial);
if (dbEntry == isoDatabase.end()) {
return {};
}
auto& metaMap = dbEntry->second;
auto meta_entry = metaMap.find(build_info.elf_hash);
if (meta_entry == metaMap.end()) {
return {};
}
return std::make_optional(meta_entry->second);
}
ISOMetadata get_version_info_or_default(const fs::path& iso_data_path) {
ISOMetadata version_info = jak1_ntsc_black_label_info;
const auto build_info = get_buildinfo_from_path(iso_data_path);
if (!build_info) {
lg::warn(
"unable locate buildinfo.json file in iso data path, defaulting to Jak 1 - NTSC "
"Black Label");
} else {
auto maybe_version_info = get_version_info_from_build_info(build_info.value());
if (!maybe_version_info) {
lg::warn(
"unable to determine game version from buildinfo.json file, defaulting to Jak 1 - NTSC "
"Black Label");
} else {
version_info = maybe_version_info.value();
}
}
return version_info;
}
std::tuple<std::optional<std::string>, std::optional<xxh::hash64_t>> findElfFile(
const fs::path& extracted_iso_path) {
std::optional<std::string> serial = std::nullopt;
std::optional<xxh::hash64_t> elf_hash = std::nullopt;
for (const auto& entry : fs::directory_iterator(extracted_iso_path)) {
auto as_str = entry.path().filename().string();
if (std::regex_match(as_str, std::regex(".{4}_.{3}\\..{2}"))) {
serial = std::make_optional(
fmt::format("{}-{}", as_str.substr(0, 4), as_str.substr(5, 3) + as_str.substr(9, 2)));
// We already found the path, so hash it while we're here
auto fp = file_util::open_file(entry.path().string().c_str(), "rb");
fseek(fp, 0, SEEK_END);
size_t size = ftell(fp);
std::vector<u8> buffer(size);
rewind(fp);
fread(&buffer[0], sizeof(std::vector<u8>::value_type), buffer.size(), fp);
elf_hash = std::make_optional(xxh::xxhash<64>(buffer));
fclose(fp);
break;
}
}
return {serial, elf_hash};
}
void log_potential_new_db_entry(ExtractorErrorCode error_code,
const std::string& serial,
const xxh::hash64_t elf_hash,
const int files_extracted,
const xxh::hash64_t contents_hash) {
// Finally, return the result
// Generate the map entry to make things simple, just convienance
if (error_code == ExtractorErrorCode::VALIDATION_SERIAL_MISSING_FROM_DB) {
lg::info(
"If this is a new release or version that should be supported, consider adding the "
"following serial entry to the database:");
lg::info(
"\t'{{\"{}\", {{{{{}U, {{\"GAME_TITLE\", \"NTSC-U/PAL/NTSC-J\", {}, {}U, "
"\"DECOMP_CONFIG_FILENAME_NO_EXTENSION\", \"jak1|jak2|jak3|jakx\", {}}}}}}}}}'",
serial, elf_hash, files_extracted, contents_hash);
} else if (error_code == ExtractorErrorCode::VALIDATION_ELF_MISSING_FROM_DB) {
lg::info(
"If this is a new release or version that should be supported, consider adding the "
"following ELF entry to the database under the '{}' serial:",
serial);
lg::info(
"\t'{{{}, {{\"GAME_TITLE\", \"NTSC-U/PAL/NTSC-J\", {}, {}U, "
"\"DECOMP_CONFIF_FILENAME_NO_EXTENSION\", \"jak1|jak2|jak3|jakx\", {}}}}}'",
elf_hash, files_extracted, contents_hash);
}
}
std::tuple<bool, ExtractorErrorCode> is_iso_file(fs::path path_to_supposed_iso) {
// it's a file, normalize extension case and verify it's an ISO file
std::string ext = path_to_supposed_iso.extension().string();
if (!std::regex_match(ext, std::regex("\\.(iso|ISO)"))) {
lg::error("Provided game data path contains a file that isn't a .ISO!");
return {false, ExtractorErrorCode::EXTRACTION_INVALID_ISO_PATH};
}
// make sure the .iso is greater than 1GB in size
// to-do: verify game header data as well
if (fs::file_size(path_to_supposed_iso) < 1000000000) {
lg::error("Provided game data file appears to be too small or corrupted! Size is: {}",
fs::file_size(path_to_supposed_iso));
return {false, ExtractorErrorCode::EXTRACTION_ISO_UNEXPECTED_SIZE};
}
return {true, ExtractorErrorCode::SUCCESS};
}
std::tuple<xxh::hash64_t, int> calculate_extraction_hash(const IsoFile& iso_file) {
// - XOR all hashes together and hash the result. This makes the ordering of the hashes (aka
// files) irrelevant
xxh::hash64_t combined_hash = 0;
for (const auto& hash : iso_file.hashes) {
combined_hash ^= hash;
}
return {xxh::xxhash<64>({combined_hash}), iso_file.hashes.size()};
}
std::tuple<xxh::hash64_t, int> calculate_extraction_hash(const fs::path& extracted_iso_path) {
// - XOR all hashes together and hash the result. This makes the ordering of the hashes (aka
// files) irrelevant
xxh::hash64_t combined_hash = 0;
int filec = 0;
for (auto const& dir_entry : fs::recursive_directory_iterator(extracted_iso_path)) {
if (dir_entry.is_regular_file()) {
auto buffer = file_util::read_binary_file(dir_entry.path().string());
auto hash = xxh::xxhash<64>(buffer);
combined_hash ^= hash;
filec++;
}
}
return {xxh::xxhash<64>({combined_hash}), filec};
}