diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 05f6127624..8a586fd037 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -28,6 +28,7 @@ add_library(common util/Assert.cpp util/BitUtils.cpp util/compress.cpp + util/crc32.cpp util/dgo_util.cpp util/DgoReader.cpp util/DgoWriter.cpp @@ -35,6 +36,7 @@ add_library(common util/FileUtil.cpp util/json_util.cpp util/read_iso_file.cpp + util/SimpleThreadGroup.cpp util/Timer.cpp util/os.cpp util/print_float.cpp diff --git a/common/util/FileUtil.cpp b/common/util/FileUtil.cpp index 9345c4b9f0..5f93c3d18b 100644 --- a/common/util/FileUtil.cpp +++ b/common/util/FileUtil.cpp @@ -277,32 +277,6 @@ std::string base_name(const std::string& filename) { return filename.substr(pos); } -static bool sInitCrc = false; -static uint32_t crc_table[0x100]; - -void init_crc() { - for (uint32_t i = 0; i < 0x100; i++) { - uint32_t n = i << 24u; - for (uint32_t j = 0; j < 8; j++) - n = n & 0x80000000 ? (n << 1u) ^ 0x04c11db7u : (n << 1u); - crc_table[i] = n; - } - sInitCrc = true; -} - -uint32_t crc32(const uint8_t* data, size_t size) { - ASSERT(sInitCrc); - uint32_t crc = 0; - for (size_t i = size; i != 0; i--, data++) { - crc = crc_table[crc >> 24u] ^ ((crc << 8u) | *data); - } - return ~crc; -} - -uint32_t crc32(const std::vector& data) { - return crc32(data.data(), data.size()); -} - void ISONameFromAnimationName(char* dst, const char* src) { // The Animation Name is a bunch of words separated by dashes diff --git a/common/util/FileUtil.h b/common/util/FileUtil.h index f672b57992..8abeee9715 100644 --- a/common/util/FileUtil.h +++ b/common/util/FileUtil.h @@ -33,9 +33,6 @@ bool is_printable_char(char c); std::string combine_path(const std::string& parent, const std::string& child); bool file_exists(const std::string& path); std::string base_name(const std::string& filename); -void init_crc(); -uint32_t crc32(const uint8_t* data, size_t size); -uint32_t crc32(const std::vector& data); void MakeISOName(char* dst, const char* src); void ISONameFromAnimationName(char* dst, const char* src); void assert_file_exists(const char* path, const char* error_message); diff --git a/common/util/Serializer.h b/common/util/Serializer.h index fa410f607b..a960c7a609 100644 --- a/common/util/Serializer.h +++ b/common/util/Serializer.h @@ -36,7 +36,7 @@ class Serializer { * later be accessed with get_save_result. */ Serializer() : m_writing(true) { - constexpr size_t initial_size = 32 * 1024 * 1024; + constexpr size_t initial_size = 32; m_data = (u8*)malloc(initial_size); m_size = initial_size; } @@ -202,7 +202,8 @@ class Serializer { if (m_writing) { // if we would overflow, just resize the buffer. if (m_offset + size > m_size) { - m_data = (u8*)realloc(m_data, (m_offset + size) * 2); + m_size = (m_offset + size) * 2; + m_data = (u8*)realloc(m_data, m_size); } memcpy(m_data + m_offset, data, size); } else { diff --git a/common/util/SimpleThreadGroup.cpp b/common/util/SimpleThreadGroup.cpp new file mode 100644 index 0000000000..a647799546 --- /dev/null +++ b/common/util/SimpleThreadGroup.cpp @@ -0,0 +1,41 @@ +#include "SimpleThreadGroup.h" +#include "common/util/Assert.h" + +void SimpleThreadGroup::run(const std::function& func, int num_runs) { + int num_workers = std::min(num_runs, (int)std::thread::hardware_concurrency()); + run(func, num_runs, num_workers); +} + +void SimpleThreadGroup::run(const std::function& func, int num_runs, int num_workers) { + ASSERT(m_joined); + + // copy the function, in case the user gives us a temporary function. + m_func = func; + + int start_idx = 0; + for (int thread_idx = 0; thread_idx < num_workers; thread_idx++) { + int end_idx = ((thread_idx + 1) * num_runs) / num_workers; + if (thread_idx == (num_workers - 1)) { + end_idx = num_runs; + } + + m_threads.emplace_back([&, start_idx, end_idx]() { + for (int i = start_idx; i < end_idx; i++) { + m_func(i); + } + }); + + start_idx = end_idx; + } + + m_joined = false; +} + +void SimpleThreadGroup::join() { + ASSERT(!m_joined); + for (auto& t : m_threads) { + t.join(); + } + m_threads.clear(); + m_joined = true; +} \ No newline at end of file diff --git a/common/util/SimpleThreadGroup.h b/common/util/SimpleThreadGroup.h new file mode 100644 index 0000000000..b800648e3a --- /dev/null +++ b/common/util/SimpleThreadGroup.h @@ -0,0 +1,29 @@ +#include +#include +#include + +/*! + * Very simple group of threads. + * Does: + * for (int i = 0; i < num_runs; i++) { + * func(i); + * } + * but in parallel. + * + * Two things to watch out for: + * - you must call join before this object is destroyed. The pattern of "join in the destructor" + * can cause confusing issues where resources used by threads are destroyed before the threads + * are joined, if you aren't careful about the order you declare variables. + * - the function is copied (once) + */ +class SimpleThreadGroup { + public: + void run(const std::function& func, int num_runs, int num_workers); + void run(const std::function& func, int num_runs); + void join(); + + private: + bool m_joined = true; + std::vector m_threads; + std::function m_func; +}; \ No newline at end of file diff --git a/common/util/crc32.cpp b/common/util/crc32.cpp new file mode 100644 index 0000000000..dc75238cc6 --- /dev/null +++ b/common/util/crc32.cpp @@ -0,0 +1,21 @@ +#include "crc32.h" + +#include +#include + +u32 crc32(const u8* data, size_t size) { + u32 result = 0xffffffff; + while (size >= 4) { + u32 x; + memcpy(&x, data, 4); + data += 4; + size -= 4; + result = _mm_crc32_u32(result, x); + } + while (size) { + result = _mm_crc32_u8(result, *data); + data++; + size--; + } + return ~result; +} \ No newline at end of file diff --git a/common/util/crc32.h b/common/util/crc32.h new file mode 100644 index 0000000000..d0f3f845c0 --- /dev/null +++ b/common/util/crc32.h @@ -0,0 +1,4 @@ +#include +#include "common/common_types.h" + +u32 crc32(const u8* data, size_t size); \ No newline at end of file diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index dda918c543..6f29e39332 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -26,6 +26,7 @@ #include "decompiler/Function/BasicBlocks.h" #include "common/log/log.h" #include "common/util/json_util.h" +#include "common/util/crc32.h" namespace decompiler { namespace { @@ -89,10 +90,10 @@ std::string ObjectFileData::to_unique_name() const { } } -ObjectFileData& ObjectFileDB::lookup_record(const ObjectFileRecord& rec) { - ObjectFileData* result = nullptr; +const ObjectFileData& ObjectFileDB::lookup_record(const ObjectFileRecord& rec) const { + const ObjectFileData* result = nullptr; - for (auto& x : obj_files_by_name[rec.name]) { + for (auto& x : obj_files_by_name.at(rec.name)) { if (x.record.version == rec.version) { ASSERT(x.record.hash == rec.hash); ASSERT(!result); @@ -270,7 +271,7 @@ void ObjectFileDB::add_obj_from_dgo(const std::string& obj_name, stats.total_obj_files++; ASSERT(obj_size > 128); uint16_t version = *(const uint16_t*)(obj_data + 8); - auto hash = file_util::crc32(obj_data, obj_size); + auto hash = crc32(obj_data, obj_size); bool duplicated = false; // first, check to see if we already got it... diff --git a/decompiler/ObjectFile/ObjectFileDB.h b/decompiler/ObjectFile/ObjectFileDB.h index 51a2e5f5ec..23751098b4 100644 --- a/decompiler/ObjectFile/ObjectFileDB.h +++ b/decompiler/ObjectFile/ObjectFileDB.h @@ -101,7 +101,7 @@ class ObjectFileDB { std::string process_game_count_file(); std::string process_game_text_files(const Config& cfg); - ObjectFileData& lookup_record(const ObjectFileRecord& rec); + const ObjectFileData& lookup_record(const ObjectFileRecord& rec) const; DecompilerTypeSystem dts; bool lookup_function_type(const FunctionName& name, diff --git a/decompiler/config/jak1_ntsc_black_label.jsonc b/decompiler/config/jak1_ntsc_black_label.jsonc index ef42a305e6..2c90382797 100644 --- a/decompiler/config/jak1_ntsc_black_label.jsonc +++ b/decompiler/config/jak1_ntsc_black_label.jsonc @@ -18,7 +18,7 @@ "disassemble_code": false, // Run the decompiler - "decompile_code": true, + "decompile_code": false, //////////////////////////// // DATA ANALYSIS OPTIONS diff --git a/decompiler/extractor/main.cpp b/decompiler/extractor/main.cpp index 83d92c1984..eb27c210d3 100644 --- a/decompiler/extractor/main.cpp +++ b/decompiler/extractor/main.cpp @@ -9,7 +9,6 @@ #include "common/util/read_iso_file.h" void setup_global_decompiler_stuff(std::optional project_path_override) { - file_util::init_crc(); decompiler::init_opcode_info(); file_util::setup_project_path(project_path_override); } @@ -105,10 +104,8 @@ void decompile(std::filesystem::path jak1_input_files) { // levels { - extract_common(db, tex_db, "GAME.CGO"); - for (auto& lev : config.levels_to_extract) { - extract_from_level(db, tex_db, lev, config.hacks, config.rip_levels); - } + extract_all_levels(db, tex_db, config.levels_to_extract, "GAME.CGO", config.hacks, + config.rip_levels); } } diff --git a/decompiler/level_extractor/extract_level.cpp b/decompiler/level_extractor/extract_level.cpp index 2ea8d28d33..3889fa2598 100644 --- a/decompiler/level_extractor/extract_level.cpp +++ b/decompiler/level_extractor/extract_level.cpp @@ -1,4 +1,5 @@ #include +#include #include "extract_level.h" #include "decompiler/level_extractor/BspHeader.h" @@ -7,6 +8,7 @@ #include "decompiler/level_extractor/extract_shrub.h" #include "common/util/compress.h" #include "common/util/FileUtil.h" +#include "common/util/SimpleThreadGroup.h" namespace decompiler { @@ -93,27 +95,30 @@ void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size) { void add_all_textures_from_level(tfrag3::Level& lev, const std::string& level_name, - TextureDB& tex_db) { + const TextureDB& tex_db) { ASSERT(lev.textures.empty()); - for (auto id : tex_db.texture_ids_per_level[level_name]) { - const auto& tex = tex_db.textures.at(id); - lev.textures.emplace_back(); - auto& new_tex = lev.textures.back(); - new_tex.combo_id = id; - new_tex.w = tex.w; - new_tex.h = tex.h; - new_tex.debug_tpage_name = tex_db.tpage_names.at(tex.page); - new_tex.debug_name = new_tex.debug_tpage_name + tex.name; - new_tex.data = tex.rgba_bytes; - new_tex.combo_id = id; - new_tex.load_to_pool = true; + const auto& level_it = tex_db.texture_ids_per_level.find(level_name); + if (level_it != tex_db.texture_ids_per_level.end()) { + for (auto id : level_it->second) { + const auto& tex = tex_db.textures.at(id); + lev.textures.emplace_back(); + auto& new_tex = lev.textures.back(); + new_tex.combo_id = id; + new_tex.w = tex.w; + new_tex.h = tex.h; + new_tex.debug_tpage_name = tex_db.tpage_names.at(tex.page); + new_tex.debug_name = new_tex.debug_tpage_name + tex.name; + new_tex.data = tex.rgba_bytes; + new_tex.combo_id = id; + new_tex.load_to_pool = true; + } } } -void confirm_textures_identical(TextureDB& tex_db) { +void confirm_textures_identical(const TextureDB& tex_db) { std::unordered_map> tex_dupl; for (auto& tex : tex_db.textures) { - auto name = tex_db.tpage_names[tex.second.page] + tex.second.name; + auto name = tex_db.tpage_names.at(tex.second.page) + tex.second.name; auto it = tex_dupl.find(name); if (it == tex_dupl.end()) { tex_dupl.insert({name, tex.second.rgba_bytes}); @@ -130,7 +135,7 @@ void confirm_textures_identical(TextureDB& tex_db) { /*! * Extract common textures found in GAME.CGO */ -void extract_common(ObjectFileDB& db, TextureDB& tex_db, const std::string& dgo_name) { +void extract_common(const ObjectFileDB& db, const TextureDB& tex_db, const std::string& dgo_name) { if (db.obj_files_by_dgo.count(dgo_name) == 0) { lg::warn("Skipping common extract for {} because the DGO was not part of the input", dgo_name); return; @@ -157,8 +162,8 @@ void extract_common(ObjectFileDB& db, TextureDB& tex_db, const std::string& dgo_ compressed.data(), compressed.size()); } -void extract_from_level(ObjectFileDB& db, - TextureDB& tex_db, +void extract_from_level(const ObjectFileDB& db, + const TextureDB& tex_db, const std::string& dgo_name, const DecompileHacks& hacks, bool dump_level) { @@ -175,7 +180,7 @@ void extract_from_level(ObjectFileDB& db, std::string level_name = bsp_rec->name.substr(0, bsp_rec->name.length() - 4); fmt::print("Processing level {} ({})\n", dgo_name, level_name); - auto& bsp_file = db.lookup_record(*bsp_rec); + const auto& bsp_file = db.lookup_record(*bsp_rec); bool ok = is_valid_bsp(bsp_file.linked_data); ASSERT(ok); @@ -238,4 +243,19 @@ void extract_from_level(ObjectFileDB& db, "assets/{}.fr3", dgo_name.substr(0, dgo_name.length() - 4))}), compressed.data(), compressed.size()); } + +void extract_all_levels(const ObjectFileDB& db, + const TextureDB& tex_db, + const std::vector& dgo_names, + const std::string& common_name, + const DecompileHacks& hacks, + bool debug_dump_level) { + extract_common(db, tex_db, common_name); + SimpleThreadGroup threads; + threads.run( + [&](int idx) { extract_from_level(db, tex_db, dgo_names[idx], hacks, debug_dump_level); }, + dgo_names.size()); + threads.join(); +} + } // namespace decompiler diff --git a/decompiler/level_extractor/extract_level.h b/decompiler/level_extractor/extract_level.h index 6952053c40..a97fcf37d3 100644 --- a/decompiler/level_extractor/extract_level.h +++ b/decompiler/level_extractor/extract_level.h @@ -6,10 +6,18 @@ #include "decompiler/ObjectFile/ObjectFileDB.h" namespace decompiler { -void extract_from_level(ObjectFileDB& db, - TextureDB& tex_db, +void extract_from_level(const ObjectFileDB& db, + const TextureDB& tex_db, const std::string& dgo_name, const DecompileHacks& hacks, bool dump_level); -void extract_common(ObjectFileDB& db, TextureDB& tex_db, const std::string& dgo_name); +void extract_common(const ObjectFileDB& db, const TextureDB& tex_db, const std::string& dgo_name); + +// extract everything +void extract_all_levels(const ObjectFileDB& db, + const TextureDB& tex_db, + const std::vector& dgo_names, + const std::string& common_name, + const DecompileHacks& hacks, + bool debug_dump_level); } // namespace decompiler diff --git a/decompiler/main.cpp b/decompiler/main.cpp index 574eac0b8b..bede66b8a0 100644 --- a/decompiler/main.cpp +++ b/decompiler/main.cpp @@ -25,7 +25,6 @@ int main(int argc, char** argv) { lg::initialize(); lg::info("GOAL Decompiler version {}\n", versions::DECOMPILER_VERSION); - file_util::init_crc(); init_opcode_info(); if (argc < 4) { @@ -207,10 +206,8 @@ int main(int argc, char** argv) { } if (config.levels_extract) { - extract_common(db, tex_db, "GAME.CGO"); - for (auto& lev : config.levels_to_extract) { - extract_from_level(db, tex_db, lev, config.hacks, config.rip_levels); - } + extract_all_levels(db, tex_db, config.levels_to_extract, "GAME.CGO", config.hacks, + config.rip_levels); } fmt::print("[Mem] After extraction: {} MB\n", get_peak_rss() / (1024 * 1024)); diff --git a/test/offline/offline_test_main.cpp b/test/offline/offline_test_main.cpp index 72e8757089..97083142d3 100644 --- a/test/offline/offline_test_main.cpp +++ b/test/offline/offline_test_main.cpp @@ -179,7 +179,6 @@ Decompiler setup_decompiler(const std::vector& files, const OfflineTestArgs& args, const OfflineTestConfig& offline_config) { Decompiler dc; - file_util::init_crc(); decompiler::init_opcode_info(); dc.config = std::make_unique(decompiler::read_config_file( (file_util::get_jak_project_dir() / "decompiler" / "config" / "jak1_ntsc_black_label.jsonc") diff --git a/test/test_common_util.cpp b/test/test_common_util.cpp index a01f187f9d..bee59a1a06 100644 --- a/test/test_common_util.cpp +++ b/test/test_common_util.cpp @@ -15,6 +15,7 @@ #include "common/util/print_float.h" #include "common/util/CopyOnWrite.h" #include "common/util/SmallVector.h" +#include "common/util/crc32.h" TEST(CommonUtil, CpuInfo) { setup_cpu_info(); @@ -396,5 +397,26 @@ TEST(Assert, Death) { EXPECT_DEATH(private_assert_failed("foo", "bar", 12, "aaa"), ""); } +uint32_t crc_reference(const u8* data, size_t size) { + u32 crc = 0xffffffff; + while (size--) { + crc ^= *data++; + for (int k = 0; k < 8; k++) + crc = crc & 1 ? (crc >> 1) ^ 0x82f63b78 : crc >> 1; + } + return ~crc; +} + +TEST(CRC32, Reference) { + for (u32 so = 0; so < 7; so++) { + std::vector test_data; + for (u32 i = 0; i < 1024 + so; i++) { + test_data.push_back(i & 0xff); + } + EXPECT_EQ(crc_reference(test_data.data(), test_data.size()), + crc32(test_data.data(), test_data.size())); + } +} + } // namespace test } // namespace cu \ No newline at end of file