From 26da66b29c225abb22ff0366e7ca9029418ddfb2 Mon Sep 17 00:00:00 2001 From: water111 <48171810+water111@users.noreply.github.com> Date: Sat, 6 Mar 2021 16:06:08 -0500 Subject: [PATCH] move dgo decompression to common utils and support it in the dgo unpacker (#312) --- common/CMakeLists.txt | 2 +- common/util/DgoReader.cpp | 7 +++ common/util/FileUtil.cpp | 63 +++++++++++++++++++++++++- common/util/FileUtil.h | 3 ++ decompiler/ObjectFile/ObjectFileDB.cpp | 48 +------------------- tools/dgo_unpacker.cpp | 7 +++ 6 files changed, 82 insertions(+), 48 deletions(-) diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index a3f845cbd0..0859ab4bc2 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -22,7 +22,7 @@ add_library(common util/Timer.cpp ) -target_link_libraries(common fmt) +target_link_libraries(common fmt lzokay) if(WIN32) target_link_libraries(common wsock32 ws2_32) diff --git a/common/util/DgoReader.cpp b/common/util/DgoReader.cpp index fb25f10abf..fe81cd88b6 100644 --- a/common/util/DgoReader.cpp +++ b/common/util/DgoReader.cpp @@ -22,7 +22,14 @@ DgoReader::DgoReader(std::string file_name, const std::vector& data) DgoDataEntry entry; entry.internal_name = obj_header.name; + entry.unique_name = get_object_file_name(entry.internal_name, reader.here(), obj_header.size); + if (all_unique_names.find(entry.unique_name) != all_unique_names.end()) { + printf("Warning: there are multiple files named %s\n", entry.unique_name.c_str()); + entry.unique_name += '-'; + entry.unique_name += std::to_string(obj_header.size); + } + all_unique_names.insert(entry.unique_name); entry.data.resize(obj_header.size); diff --git a/common/util/FileUtil.cpp b/common/util/FileUtil.cpp index 9e9e394780..65678226c0 100644 --- a/common/util/FileUtil.cpp +++ b/common/util/FileUtil.cpp @@ -10,10 +10,12 @@ #include #include #include +#include +#include "common/util/BinaryReader.h" #include "BinaryWriter.h" #include "common/common_types.h" #include "third-party/svpng.h" -#include +#include "third-party/lzokay/lzokay.hpp" #ifdef _WIN32 #include @@ -327,4 +329,63 @@ void assert_file_exists(const char* path, const char* error_message) { } } +/*! + * Check if the given DGO header (or entire file) is compressed. + */ +bool dgo_header_is_compressed(const std::vector& data) { + const char compressed_header[] = "oZlB"; + bool is_compressed = true; + for (int i = 0; i < 4; i++) { + if (compressed_header[i] != data.at(i)) { + is_compressed = false; + } + } + return is_compressed; +} + +/*! + * Decompress a DGO. Resulting data will start at the DGO header. + */ +std::vector decompress_dgo(const std::vector& data_in) { + constexpr int MAX_CHUNK_SIZE = 0x8000; + BinaryReader compressed_reader(data_in); + // seek past oZlB + compressed_reader.ffwd(4); + std::size_t decompressed_size = compressed_reader.read(); + std::vector decompressed_data; + decompressed_data.resize(decompressed_size); + size_t output_offset = 0; + while (true) { + // seek past alignment bytes and read the next chunk size + uint32_t chunk_size = 0; + while (!chunk_size) { + chunk_size = compressed_reader.read(); + } + + if (chunk_size < MAX_CHUNK_SIZE) { + std::size_t bytes_written = 0; + lzokay::EResult ok = lzokay::decompress( + compressed_reader.here(), chunk_size, decompressed_data.data() + output_offset, + decompressed_data.size() - output_offset, bytes_written); + assert(ok == lzokay::EResult::Success); + compressed_reader.ffwd(chunk_size); + output_offset += bytes_written; + } else { + // nope - sometimes chunk_size is bigger than MAX, but we should still use max. + // assert(chunk_size == MAX_CHUNK_SIZE); + memcpy(decompressed_data.data() + output_offset, compressed_reader.here(), MAX_CHUNK_SIZE); + compressed_reader.ffwd(MAX_CHUNK_SIZE); + output_offset += MAX_CHUNK_SIZE; + } + + if (output_offset >= decompressed_size) + break; + while (compressed_reader.get_seek() % 4) { + compressed_reader.ffwd(1); + } + } + + return decompressed_data; +} + } // namespace file_util diff --git a/common/util/FileUtil.h b/common/util/FileUtil.h index 3127207afa..6dcb35201d 100644 --- a/common/util/FileUtil.h +++ b/common/util/FileUtil.h @@ -8,6 +8,7 @@ #include #include #include +#include "common/common_types.h" namespace file_util { std::filesystem::path get_user_home_dir(); @@ -28,4 +29,6 @@ uint32_t crc32(const std::vector& data); void MakeISOName(char* dst, const char* src); void ISONameFromAnimationName(char* dst, const char* src); void assert_file_exists(const char* path, const char* error_message); +bool dgo_header_is_compressed(const std::vector& data); +std::vector decompress_dgo(const std::vector& data_in); } // namespace file_util diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index 6c515d911a..a84264f52c 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -195,52 +195,8 @@ void ObjectFileDB::get_objs_from_dgo(const std::string& filename) { auto dgo_data = file_util::read_binary_file(filename); stats.total_dgo_bytes += dgo_data.size(); - const char jak2_header[] = "oZlB"; - bool is_jak2 = true; - for (int i = 0; i < 4; i++) { - if (jak2_header[i] != dgo_data[i]) { - is_jak2 = false; - } - } - - if (is_jak2) { - BinaryReader compressed_reader(dgo_data); - // seek past oZlB - compressed_reader.ffwd(4); - std::size_t decompressed_size = compressed_reader.read(); - std::vector decompressed_data; - decompressed_data.resize(decompressed_size); - size_t output_offset = 0; - while (true) { - // seek past alignment bytes and read the next chunk size - uint32_t chunk_size = 0; - while (!chunk_size) { - chunk_size = compressed_reader.read(); - } - - if (chunk_size < MAX_CHUNK_SIZE) { - std::size_t bytes_written = 0; - lzokay::EResult ok = lzokay::decompress( - compressed_reader.here(), chunk_size, decompressed_data.data() + output_offset, - decompressed_data.size() - output_offset, bytes_written); - assert(ok == lzokay::EResult::Success); - compressed_reader.ffwd(chunk_size); - output_offset += bytes_written; - } else { - // nope - sometimes chunk_size is bigger than MAX, but we should still use max. - // assert(chunk_size == MAX_CHUNK_SIZE); - memcpy(decompressed_data.data() + output_offset, compressed_reader.here(), MAX_CHUNK_SIZE); - compressed_reader.ffwd(MAX_CHUNK_SIZE); - output_offset += MAX_CHUNK_SIZE; - } - - if (output_offset >= decompressed_size) - break; - while (compressed_reader.get_seek() % 4) { - compressed_reader.ffwd(1); - } - } - dgo_data = decompressed_data; + if (file_util::dgo_header_is_compressed(dgo_data)) { + dgo_data = file_util::decompress_dgo(dgo_data); } BinaryReader reader(dgo_data); diff --git a/tools/dgo_unpacker.cpp b/tools/dgo_unpacker.cpp index c3453ec2e7..289a28afe1 100644 --- a/tools/dgo_unpacker.cpp +++ b/tools/dgo_unpacker.cpp @@ -20,6 +20,13 @@ int main(int argc, char** argv) { printf("Unpacking %s\n", base.c_str()); // read the file auto data = file_util::read_binary_file(file_name); + if (file_util::dgo_header_is_compressed(data)) { + printf(" Detected compressed dgo, decompressing...\n"); + auto original_size = data.size(); + data = file_util::decompress_dgo(data); + printf(" Decompressed from %d to %d bytes (%.2f%% compression)\n", int(original_size), + int(data.size()), 100.f * original_size / data.size()); + } // read as a DGO auto dgo = DgoReader(base, data); // write dgo description