mirror of
https://github.com/open-goal/jak-project
synced 2026-05-26 07:39:12 -04:00
6446389263
* extractor: refactor and cleanup for multi-game support * deps: switch to `ghc::filesystem` as it is utf-8 everywhere by default * extractor: finally working with unicode * unicode: fix unicode cli args on windows in all `main` functions
252 lines
9.0 KiB
C++
252 lines
9.0 KiB
C++
#pragma once
|
||
|
||
#include <optional>
|
||
#include <regex>
|
||
#include <unordered_map>
|
||
|
||
#include "common/log/log.h"
|
||
#include "common/util/FileUtil.h"
|
||
#include <common/util/json_util.h>
|
||
#include <common/util/read_iso_file.h>
|
||
|
||
#include <third-party/json.hpp>
|
||
|
||
#include "third-party/xxhash.hpp"
|
||
|
||
enum class ExtractorErrorCode {
|
||
SUCCESS = 0,
|
||
INVALID_CLI_INPUT = 3990,
|
||
VALIDATION_CANT_LOCATE_ELF = 4000,
|
||
VALIDATION_SERIAL_MISSING_FROM_DB = 4001,
|
||
VALIDATION_ELF_MISSING_FROM_DB = 4002,
|
||
VALIDATION_BAD_ISO_CONTENTS = 4010,
|
||
VALIDATION_INCORRECT_EXTRACTION_COUNT = 4011,
|
||
VALIDATION_FILE_CONTENTS_UNEXPECTED = 4012,
|
||
VALIDATION_BAD_EXTRACTION = 4020,
|
||
DECOMPILATION_GENERIC_ERROR = 4030,
|
||
EXTRACTION_INVALID_ISO_PATH = 4040,
|
||
EXTRACTION_ISO_UNEXPECTED_SIZE = 4041,
|
||
COMPILATION_BAD_PROJECT_PATH = 4050,
|
||
};
|
||
|
||
enum GameIsoFlags { FLAG_JAK1_BLACK_LABEL = (1 << 0) };
|
||
|
||
static const std::unordered_map<std::string, GameIsoFlags> sGameIsoFlagNames = {
|
||
{"jak1-black-label", FLAG_JAK1_BLACK_LABEL}};
|
||
|
||
// used for - decompiler_out/<jak1> and iso_data/<jak1>
|
||
std::unordered_map<std::string, std::string> data_subfolders = {{"jak1", "jak1"}};
|
||
|
||
struct ISOMetadata {
|
||
std::string canonical_name;
|
||
std::string region;
|
||
int num_files;
|
||
xxh::hash64_t contents_hash;
|
||
std::string decomp_config;
|
||
std::string game_name;
|
||
std::vector<std::string> flags;
|
||
};
|
||
|
||
// This is all we need to re-fetch info from the database
|
||
// - if this changes such that we have a collision in the future,
|
||
// then the database isn't adequate and everything must change
|
||
struct BuildInfo {
|
||
std::string serial = "";
|
||
xxh::hash64_t elf_hash = 0;
|
||
};
|
||
|
||
void to_json(nlohmann::json& j, const BuildInfo& info) {
|
||
j = nlohmann::json{{"serial", info.serial}, {"elf_hash", info.elf_hash}};
|
||
}
|
||
|
||
void from_json(const nlohmann::json& j, BuildInfo& info) {
|
||
j[0].at("serial").get_to(info.serial);
|
||
j[0].at("elf_hash").get_to(info.elf_hash);
|
||
}
|
||
|
||
std::optional<BuildInfo> get_buildinfo_from_path(fs::path iso_data_path) {
|
||
if (!fs::exists(iso_data_path / "buildinfo.json")) {
|
||
return {};
|
||
}
|
||
auto buildinfo_path = (iso_data_path / "buildinfo.json").string();
|
||
try {
|
||
return parse_commented_json(file_util::read_text_file(buildinfo_path), buildinfo_path)
|
||
.get<BuildInfo>();
|
||
} catch (std::exception& e) {
|
||
lg::error("JSON parsing error on buildinfo.json - {}", e.what());
|
||
return {};
|
||
}
|
||
}
|
||
|
||
static const ISOMetadata jak1_ntsc_black_label_info = {
|
||
"Jak & Daxter™: The Precursor Legacy (Black Label)",
|
||
"NTSC-U",
|
||
337,
|
||
11363853835861842434U,
|
||
"jak1_ntsc_black_label",
|
||
"jak1",
|
||
{"jak1-black-label"}};
|
||
|
||
// { SERIAL : { ELF_HASH : ISOMetadataDatabase } }
|
||
static const std::unordered_map<std::string, std::unordered_map<xxh::hash64_t, ISOMetadata>>
|
||
isoDatabase{{"SCUS-97124",
|
||
{{7280758013604870207U, jak1_ntsc_black_label_info},
|
||
{744661860962747854,
|
||
{"Jak & Daxter™: The Precursor Legacy",
|
||
"NTSC-U",
|
||
338,
|
||
8538304367812415885U,
|
||
"jak1_jp",
|
||
"jak1",
|
||
{}}}}},
|
||
{"SCES-50361",
|
||
{{12150718117852276522U,
|
||
{"Jak & Daxter™: The Precursor Legacy",
|
||
"PAL",
|
||
338,
|
||
16850370297611763875U,
|
||
"jak1_pal",
|
||
"jak1",
|
||
{}}}}},
|
||
{"SCPS-15021",
|
||
{{16909372048085114219U,
|
||
{"ジャックXダクスター ~ 旧世界の遺産",
|
||
"NTSC-J",
|
||
338,
|
||
1262350561338887717,
|
||
"jak1_jp",
|
||
"jak1",
|
||
{}}}}}};
|
||
|
||
std::optional<ISOMetadata> get_version_info_from_build_info(const BuildInfo& build_info) {
|
||
if (build_info.serial.empty() || build_info.elf_hash == 0) {
|
||
return {};
|
||
}
|
||
auto dbEntry = isoDatabase.find(build_info.serial);
|
||
if (dbEntry == isoDatabase.end()) {
|
||
return {};
|
||
}
|
||
|
||
auto& metaMap = dbEntry->second;
|
||
auto meta_entry = metaMap.find(build_info.elf_hash);
|
||
if (meta_entry == metaMap.end()) {
|
||
return {};
|
||
}
|
||
return std::make_optional(meta_entry->second);
|
||
}
|
||
|
||
ISOMetadata get_version_info_or_default(const fs::path& iso_data_path) {
|
||
ISOMetadata version_info = jak1_ntsc_black_label_info;
|
||
const auto build_info = get_buildinfo_from_path(iso_data_path);
|
||
if (!build_info) {
|
||
lg::warn(
|
||
"unable locate buildinfo.json file in iso data path, defaulting to Jak 1 - NTSC "
|
||
"Black Label");
|
||
} else {
|
||
auto maybe_version_info = get_version_info_from_build_info(build_info.value());
|
||
if (!maybe_version_info) {
|
||
lg::warn(
|
||
"unable to determine game version from buildinfo.json file, defaulting to Jak 1 - NTSC "
|
||
"Black Label");
|
||
} else {
|
||
version_info = maybe_version_info.value();
|
||
}
|
||
}
|
||
return version_info;
|
||
}
|
||
|
||
std::tuple<std::optional<std::string>, std::optional<xxh::hash64_t>> findElfFile(
|
||
const fs::path& extracted_iso_path) {
|
||
std::optional<std::string> serial = std::nullopt;
|
||
std::optional<xxh::hash64_t> elf_hash = std::nullopt;
|
||
for (const auto& entry : fs::directory_iterator(extracted_iso_path)) {
|
||
auto as_str = entry.path().filename().string();
|
||
if (std::regex_match(as_str, std::regex(".{4}_.{3}\\..{2}"))) {
|
||
serial = std::make_optional(
|
||
fmt::format("{}-{}", as_str.substr(0, 4), as_str.substr(5, 3) + as_str.substr(9, 2)));
|
||
// We already found the path, so hash it while we're here
|
||
auto fp = file_util::open_file(entry.path().string().c_str(), "rb");
|
||
fseek(fp, 0, SEEK_END);
|
||
size_t size = ftell(fp);
|
||
std::vector<u8> buffer(size);
|
||
rewind(fp);
|
||
fread(&buffer[0], sizeof(std::vector<u8>::value_type), buffer.size(), fp);
|
||
elf_hash = std::make_optional(xxh::xxhash<64>(buffer));
|
||
fclose(fp);
|
||
break;
|
||
}
|
||
}
|
||
return {serial, elf_hash};
|
||
}
|
||
|
||
void log_potential_new_db_entry(ExtractorErrorCode error_code,
|
||
const std::string& serial,
|
||
const xxh::hash64_t elf_hash,
|
||
const int files_extracted,
|
||
const xxh::hash64_t contents_hash) {
|
||
// Finally, return the result
|
||
// Generate the map entry to make things simple, just convienance
|
||
if (error_code == ExtractorErrorCode::VALIDATION_SERIAL_MISSING_FROM_DB) {
|
||
lg::info(
|
||
"If this is a new release or version that should be supported, consider adding the "
|
||
"following serial entry to the database:");
|
||
lg::info(
|
||
"\t'{{\"{}\", {{{{{}U, {{\"GAME_TITLE\", \"NTSC-U/PAL/NTSC-J\", {}, {}U, "
|
||
"\"DECOMP_CONFIG_FILENAME_NO_EXTENSION\", \"jak1|jak2|jak3|jakx\", {}}}}}}}}}'",
|
||
serial, elf_hash, files_extracted, contents_hash);
|
||
} else if (error_code == ExtractorErrorCode::VALIDATION_ELF_MISSING_FROM_DB) {
|
||
lg::info(
|
||
"If this is a new release or version that should be supported, consider adding the "
|
||
"following ELF entry to the database under the '{}' serial:",
|
||
serial);
|
||
lg::info(
|
||
"\t'{{{}, {{\"GAME_TITLE\", \"NTSC-U/PAL/NTSC-J\", {}, {}U, "
|
||
"\"DECOMP_CONFIF_FILENAME_NO_EXTENSION\", \"jak1|jak2|jak3|jakx\", {}}}}}'",
|
||
elf_hash, files_extracted, contents_hash);
|
||
}
|
||
}
|
||
|
||
std::tuple<bool, ExtractorErrorCode> is_iso_file(fs::path path_to_supposed_iso) {
|
||
// it's a file, normalize extension case and verify it's an ISO file
|
||
std::string ext = path_to_supposed_iso.extension().string();
|
||
if (!std::regex_match(ext, std::regex("\\.(iso|ISO)"))) {
|
||
lg::error("Provided game data path contains a file that isn't a .ISO!");
|
||
return {false, ExtractorErrorCode::EXTRACTION_INVALID_ISO_PATH};
|
||
}
|
||
|
||
// make sure the .iso is greater than 1GB in size
|
||
// to-do: verify game header data as well
|
||
if (fs::file_size(path_to_supposed_iso) < 1000000000) {
|
||
lg::error("Provided game data file appears to be too small or corrupted! Size is: {}",
|
||
fs::file_size(path_to_supposed_iso));
|
||
return {false, ExtractorErrorCode::EXTRACTION_ISO_UNEXPECTED_SIZE};
|
||
}
|
||
return {true, ExtractorErrorCode::SUCCESS};
|
||
}
|
||
|
||
std::tuple<xxh::hash64_t, int> calculate_extraction_hash(const IsoFile& iso_file) {
|
||
// - XOR all hashes together and hash the result. This makes the ordering of the hashes (aka
|
||
// files) irrelevant
|
||
xxh::hash64_t combined_hash = 0;
|
||
for (const auto& hash : iso_file.hashes) {
|
||
combined_hash ^= hash;
|
||
}
|
||
return {xxh::xxhash<64>({combined_hash}), iso_file.hashes.size()};
|
||
}
|
||
|
||
std::tuple<xxh::hash64_t, int> calculate_extraction_hash(const fs::path& extracted_iso_path) {
|
||
// - XOR all hashes together and hash the result. This makes the ordering of the hashes (aka
|
||
// files) irrelevant
|
||
xxh::hash64_t combined_hash = 0;
|
||
int filec = 0;
|
||
for (auto const& dir_entry : fs::recursive_directory_iterator(extracted_iso_path)) {
|
||
if (dir_entry.is_regular_file()) {
|
||
auto buffer = file_util::read_binary_file(dir_entry.path().string());
|
||
auto hash = xxh::xxhash<64>(buffer);
|
||
combined_hash ^= hash;
|
||
filec++;
|
||
}
|
||
}
|
||
return {xxh::xxhash<64>({combined_hash}), filec};
|
||
}
|