From a06348fa9f26bf9666104fcb8d20968a846d378f Mon Sep 17 00:00:00 2001 From: water111 Date: Sun, 15 Jun 2025 18:26:56 -0400 Subject: [PATCH] wip --- common/custom_data/TFrag3Data.cpp | 35 +- common/custom_data/Tfrag3Data.h | 34 +- decompiler/CMakeLists.txt | 1 + decompiler/level_extractor/extract_level.cpp | 3 + decompiler/level_extractor/extract_shadow.cpp | 369 ++++++ decompiler/level_extractor/extract_shadow.h | 13 + decompiler/util/goal_data_reader.cpp | 3 +- decompiler/util/goal_data_reader.h | 2 +- docs/progress-notes/shadow.md | 1032 +++++++++++++++++ game/CMakeLists.txt | 1 + .../graphics/opengl_renderer/BucketRenderer.h | 2 + .../opengl_renderer/OpenGLRenderer.cpp | 6 +- .../graphics/opengl_renderer/OpenGLRenderer.h | 2 + game/graphics/opengl_renderer/Shader.cpp | 1 + game/graphics/opengl_renderer/Shader.h | 1 + .../opengl_renderer/ShadowRenderer.cpp | 19 +- .../graphics/opengl_renderer/ShadowRenderer.h | 9 +- .../background/background_common.cpp | 2 + .../opengl_renderer/foreground/Shadow3.cpp | 378 ++++++ .../opengl_renderer/foreground/Shadow3.h | 81 ++ .../opengl_renderer/loader/Loader.cpp | 29 + game/graphics/opengl_renderer/loader/Loader.h | 2 + .../opengl_renderer/loader/LoaderStages.cpp | 63 + .../opengl_renderer/loader/LoaderStages.h | 13 + game/graphics/opengl_renderer/loader/common.h | 14 + .../opengl_renderer/shaders/shadow3.frag | 8 + .../opengl_renderer/shaders/shadow3.vert | 102 ++ goal_src/jak1/engine/draw/drawable.gc | 10 +- goal_src/jak1/engine/gfx/foreground/bones.gc | 114 +- .../jak1/engine/gfx/shadow/shadow-cpu-h.gc | 29 + goal_src/jak1/engine/gfx/shadow/shadow-cpu.gc | 31 + .../sublime_text/lispindent.sublime-settings | 2 +- 32 files changed, 2381 insertions(+), 30 deletions(-) create mode 100644 decompiler/level_extractor/extract_shadow.cpp create mode 100644 decompiler/level_extractor/extract_shadow.h create mode 100644 docs/progress-notes/shadow.md create mode 100644 game/graphics/opengl_renderer/foreground/Shadow3.cpp create mode 100644 game/graphics/opengl_renderer/foreground/Shadow3.h create mode 100644 game/graphics/opengl_renderer/shaders/shadow3.frag create mode 100644 game/graphics/opengl_renderer/shaders/shadow3.vert diff --git a/common/custom_data/TFrag3Data.cpp b/common/custom_data/TFrag3Data.cpp index 12cb53a3dc..9b3a887216 100644 --- a/common/custom_data/TFrag3Data.cpp +++ b/common/custom_data/TFrag3Data.cpp @@ -587,6 +587,28 @@ void MercModelGroup::serialize(Serializer& ser) { ser.from_pod_vector(&vertices); } +void ShadowModel::serialize(Serializer& ser) { + ser.from_str(&name); + ser.from_ptr(&max_bones); + ser.from_ptr(&single_tris); + ser.from_ptr(&double_tris); + ser.from_ptr(&single_edges); + ser.from_ptr(&double_edges); +} + +void ShadowModelGroup::serialize(Serializer& ser) { + ser.from_pod_vector(&vertices); + ser.from_pod_vector(&indices); + if (ser.is_saving()) { + ser.save(models.size()); + } else { + models.resize(ser.load()); + } + for (auto& model : models) { + model.serialize(ser); + } +} + void Level::serialize(Serializer& ser) { ser.from_ptr(&version); if (ser.is_loading() && version != TFRAG3_VERSION) { @@ -647,9 +669,9 @@ void Level::serialize(Serializer& ser) { } hfrag.serialize(ser); - collision.serialize(ser); merc_data.serialize(ser); + shadow_data.serialize(ser); ser.from_ptr(&version2); if (ser.is_loading() && version2 != TFRAG3_VERSION) { @@ -770,6 +792,11 @@ void Hfragment::memory_usage(tfrag3::MemoryUsageTracker* tracker) const { tracker->add(MemoryUsageCategory::HFRAG_CORNERS, corners.size() * sizeof(HfragmentCorner)); } +void ShadowModelGroup::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(SHADOW_VERTS, vertices.size() * sizeof(ShadowVertex)); + tracker->add(SHADOW_INDEX, indices.size() * sizeof(u32)); +} + void Level::memory_usage(MemoryUsageTracker* tracker) const { for (const auto& texture : textures) { texture.memory_usage(tracker); @@ -793,6 +820,7 @@ void Level::memory_usage(MemoryUsageTracker* tracker) const { hfrag.memory_usage(tracker); collision.memory_usage(tracker); merc_data.memory_usage(tracker); + shadow_data.memory_usage(tracker); } void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size) { @@ -837,8 +865,9 @@ void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size) { {"hfrag-verts", mem_use.data[tfrag3::MemoryUsageCategory::HFRAG_VERTS]}, {"hfrag-index", mem_use.data[tfrag3::MemoryUsageCategory::HFRAG_INDEX]}, {"hfrag-time-of-day", mem_use.data[tfrag3::MemoryUsageCategory::HFRAG_TIME_OF_DAY]}, - {"hfrag-corners", mem_use.data[tfrag3::MemoryUsageCategory::HFRAG_CORNERS]} - + {"hfrag-corners", mem_use.data[tfrag3::MemoryUsageCategory::HFRAG_CORNERS]}, + {"shadow-vert", mem_use.data[SHADOW_VERTS]}, + {"shadow-ind", mem_use.data[SHADOW_INDEX]}, }; for (auto& known : known_categories) { total_accounted += known.second; diff --git a/common/custom_data/Tfrag3Data.h b/common/custom_data/Tfrag3Data.h index 2dac7cf479..907f86e3c3 100644 --- a/common/custom_data/Tfrag3Data.h +++ b/common/custom_data/Tfrag3Data.h @@ -18,7 +18,7 @@ namespace tfrag3 { // - if changing any large things (vertices, vis, bvh, colors, textures) update get_memory_usage // - if adding a new category to the memory usage, update extract_level to print it. -constexpr int TFRAG3_VERSION = 43; +constexpr int TFRAG3_VERSION = 44; enum MemoryUsageCategory { TEXTURE, @@ -66,6 +66,9 @@ enum MemoryUsageCategory { HFRAG_TIME_OF_DAY, HFRAG_CORNERS, + SHADOW_VERTS, + SHADOW_INDEX, + COLLISION, NUM_CATEGORIES @@ -614,7 +617,33 @@ struct MercModelGroup { void memory_usage(MemoryUsageTracker* tracker) const; }; -// +struct ShadowVertex { + float pos[3]; + float weight; + u8 mats[2]; + u8 flags; +}; + +struct ShadowModel { + std::string name; + u32 max_bones; + + struct Run { + u32 first_index; + u32 count; + }; + Run single_tris, double_tris, single_edges, double_edges; + + void serialize(Serializer& ser); +}; + +struct ShadowModelGroup { + std::vector vertices; + std::vector indices; + std::vector models; + void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; +}; constexpr int TFRAG_GEOS = 3; constexpr int TIE_GEOS = 4; @@ -630,6 +659,7 @@ struct Level { Hfragment hfrag; CollisionMesh collision; MercModelGroup merc_data; + ShadowModelGroup shadow_data; u16 version2 = TFRAG3_VERSION; void serialize(Serializer& ser); void memory_usage(MemoryUsageTracker* tracker) const; diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index 7e6f1403d6..a31ea2c3b1 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -62,6 +62,7 @@ add_library( level_extractor/extract_joint_group.cpp level_extractor/extract_level.cpp level_extractor/extract_merc.cpp + level_extractor/extract_shadow.cpp level_extractor/extract_tfrag.cpp level_extractor/extract_tie.cpp level_extractor/extract_shrub.cpp diff --git a/decompiler/level_extractor/extract_level.cpp b/decompiler/level_extractor/extract_level.cpp index 43e792dcbd..3d40099be7 100644 --- a/decompiler/level_extractor/extract_level.cpp +++ b/decompiler/level_extractor/extract_level.cpp @@ -3,6 +3,8 @@ #include #include +#include "extract_shadow.h" + #include "common/log/log.h" #include "common/util/FileUtil.h" #include "common/util/SimpleThreadGroup.h" @@ -129,6 +131,7 @@ void extract_art_groups_from_level(const ObjectFileDB& db, extract_merc(ag_file, tex_db, db.dts, tex_remap, level_data, false, db.version(), swapped_info); extract_joint_group(ag_file, db.dts, db.version(), art_group_data); + extract_shadow(ag_file, db.dts, level_data, false, db.version()); } } } diff --git a/decompiler/level_extractor/extract_shadow.cpp b/decompiler/level_extractor/extract_shadow.cpp new file mode 100644 index 0000000000..546dd7565e --- /dev/null +++ b/decompiler/level_extractor/extract_shadow.cpp @@ -0,0 +1,369 @@ +#include "extract_shadow.h" + +#include "common/log/log.h" +#include "common/util/BitUtils.h" + +#include "decompiler/util/goal_data_reader.h" + +namespace decompiler { + +/* +*(deftype shadow-header (structure) +((qwc-data uint32 :offset-assert 0) +(num-joints uint32 :offset-assert 4) +(num-verts uint16 :offset-assert 8) +(num-twos uint16 :offset-assert 10) +(num-single-tris uint16 :offset-assert 12) +(num-single-edges uint16 :offset-assert 14) +(num-double-tris uint16 :offset-assert 16) +(num-double-edges uint16 :offset-assert 18) +(ofs-verts uint32 :offset-assert 20) +(ofs-refs uint32 :offset-assert 24) +(ofs-single-tris uint32 :offset-assert 28) +(ofs-single-edges uint32 :offset-assert 32) +(ofs-double-tris uint32 :offset-assert 36) +(ofs-double-edges uint32 :offset-assert 40) +) +:method-count-assert 9 +:size-assert #x2c +:flag-assert #x90000002c +) + +(deftype shadow-geo (art-element) +((total-size uint32 :offset-assert 32) +(header shadow-header :inline :offset 32) +(rest uint64 :dynamic :offset-assert 80) +) +:method-count-assert 13 +:size-assert #x50 +:flag-assert #xd00000050 +)*/ + +struct ShadowVertex { + math::Vector3f pos; + float weight; +}; + +struct ShadowRef { + uint8_t joint_0 = 0; + uint8_t joint_1 = 0; +}; + +struct ShadowTri { + uint8_t verts[3]; + uint8_t faces; +}; + +struct ShadowEdge { + uint8_t ind[2]; + uint8_t tri[2]; +}; + +struct ShadowData { + std::string name; + uint32_t num_joints = 0; + std::vector one_bone_vertices; + std::vector two_bone_vertices; + std::vector refs; + std::vector single_tris, double_tris; + std::vector single_edges, double_edges; +}; + +std::string debug_dump_to_ply(const ShadowData& data) { + int num_verts = data.one_bone_vertices.size() + data.two_bone_vertices.size(); + std::string result = fmt::format( + "ply\nformat ascii 1.0\nelement vertex {}\nproperty float x\nproperty float y\nproperty " + "float z\nproperty uchar red\nproperty uchar green\nproperty uchar blue\nelement face " + "{}\nproperty list uchar int vertex_index\nend_header\n", + 2 * num_verts, data.single_tris.size() + data.double_tris.size()); + + for (auto& vtx : data.one_bone_vertices) { + result += fmt::format("{} {} {} {} {} {}\n", vtx.pos.x() / 1024.f, vtx.pos.y() / 1024.f, + vtx.pos.z() / 1024.f, 128, 128, 128); + } + for (auto& vtx : data.two_bone_vertices) { + result += fmt::format("{} {} {} {} {} {}\n", vtx.pos.x() / 1024.f, vtx.pos.y() / 1024.f, + vtx.pos.z() / 1024.f, 128, 128, 128); + } + for (auto& vtx : data.one_bone_vertices) { + result += fmt::format("{} {} {} {} {} {}\n", vtx.pos.x() / 1024.f, vtx.pos.y() / 1024.f, + vtx.pos.z() / 1024.f, 128, 256, 128); + } + for (auto& vtx : data.two_bone_vertices) { + result += fmt::format("{} {} {} {} {} {}\n", vtx.pos.x() / 1024.f, vtx.pos.y() / 1024.f, + vtx.pos.z() / 1024.f, 128, 256, 128); + } + + for (auto& face : data.single_tris) { + result += fmt::format("3 {} {} {}\n", face.verts[0], face.verts[1], face.verts[2]); + } + + for (auto& face : data.double_tris) { + result += fmt::format("3 {} {} {}\n", face.verts[0] + num_verts, face.verts[1] + num_verts, + face.verts[2] + num_verts); + } + + return result; +} + +ShadowData extract_shadow_data(const LinkedObjectFile& file, + const DecompilerTypeSystem& dts, + int word_idx) { + Ref ref; + ref.data = &file; + ref.seg = 0; + ref.byte_offset = word_idx * 4; + auto tr = typed_ref_from_basic(ref, dts); + constexpr int kHeaderSize = 48; + + ShadowData shadow_data; + + auto header_ref = TypedRef(get_field_ref(tr, "header", dts), dts.ts.lookup_type("shadow-header")); + u32 size_qwc = read_plain_data_field(header_ref, "qwc-data", dts); + ASSERT(size_qwc < 1024 * 1024); // something reasonable + std::vector data(size_qwc * 16); + Ref shadow_ref = header_ref.ref; + shadow_ref.byte_offset += kHeaderSize; + memcpy_from_plain_data(data.data(), shadow_ref, size_qwc * 16 - kHeaderSize); + + lg::info("name is {}, has {} joints, size {} bytes", read_string_field(tr, "name", dts, false), + read_plain_data_field(header_ref, "num-joints", dts), data.size()); + + shadow_data.name = read_string_field(tr, "name", dts, false); + shadow_data.num_joints = read_plain_data_field(header_ref, "num-joints", dts); + + const u32 num_verts = read_plain_data_field(header_ref, "num-verts", dts); + const u32 num_twos = read_plain_data_field(header_ref, "num-twos", dts); + ASSERT(num_verts >= num_twos); + const u32 num_ones = num_verts - num_twos; + lg::info(" vert counts {} {}", num_ones, num_twos); + + const u32 ofs_verts = read_plain_data_field(header_ref, "ofs-verts", dts); + const u32 ofs_refs = read_plain_data_field(header_ref, "ofs-refs", dts); + const u32 ofs_single_tris = read_plain_data_field(header_ref, "ofs-single-tris", dts); + const u32 ofs_single_edges = read_plain_data_field(header_ref, "ofs-single-edges", dts); + const u32 ofs_double_tris = read_plain_data_field(header_ref, "ofs-double-tris", dts); + const u32 ofs_double_edges = read_plain_data_field(header_ref, "ofs-double-edges", dts); + + const u32 num_single_tris = read_plain_data_field(header_ref, "num-single-tris", dts); + const u32 num_single_edges = read_plain_data_field(header_ref, "num-single-edges", dts); + const u32 num_double_tris = read_plain_data_field(header_ref, "num-double-tris", dts); + const u32 num_double_edges = read_plain_data_field(header_ref, "num-double-edges", dts); + + ASSERT(ofs_verts == kHeaderSize); // verts always right after the header + + lg::info(" offsets {} {} {} {} {} {}", ofs_verts, ofs_refs, ofs_single_tris, ofs_single_edges, + ofs_double_tris, ofs_double_edges); + + // vertices + ASSERT(ofs_refs - ofs_verts == 16 * num_verts); + shadow_data.one_bone_vertices.resize(num_ones); + memcpy_from_plain_data(shadow_data.one_bone_vertices.data(), shadow_ref, num_ones * 16); + shadow_ref.byte_offset += num_ones * 16; + for (const auto& x : shadow_data.one_bone_vertices) { + ASSERT(x.weight == 1); + } + + shadow_data.two_bone_vertices.resize(num_twos); + memcpy_from_plain_data(shadow_data.two_bone_vertices.data(), shadow_ref, num_twos * 16); + shadow_ref.byte_offset += num_twos * 16; + for (auto x : shadow_data.two_bone_vertices) { + ASSERT(x.weight > 0 && x.weight < 1); + } + + // refs + ASSERT(ofs_single_tris - ofs_refs == align16(num_verts * 2)); + shadow_data.refs.resize(num_verts); + memcpy_from_plain_data(shadow_data.refs.data(), shadow_ref, num_verts * 2); + shadow_ref.byte_offset += ofs_single_tris - ofs_refs; + for (size_t i = 0; i < num_verts; i++) { + ASSERT(shadow_data.refs[i].joint_0 < shadow_data.num_joints); + if (i < num_ones) { + ASSERT(shadow_data.refs[i].joint_1 == 255); + } else { + ASSERT(shadow_data.refs[i].joint_1 < shadow_data.num_joints); + ASSERT(shadow_data.refs[i].joint_1 != shadow_data.refs[i].joint_0); + } + } + + // single tris + ASSERT(ofs_single_edges - ofs_single_tris == align16(num_single_tris * 4)); + shadow_data.single_tris.resize(num_single_tris); + memcpy_from_plain_data(shadow_data.single_tris.data(), shadow_ref, num_single_tris * 4); + shadow_ref.byte_offset += ofs_single_edges - ofs_single_tris; + for (auto& tri : shadow_data.single_tris) { + for (auto v : tri.verts) { + ASSERT(v < num_verts); + } + ASSERT(tri.faces == 0); + } + + // single edges + ASSERT(ofs_double_tris - ofs_single_edges == align16(num_single_edges * 4)); + shadow_data.single_edges.resize(num_single_edges); + memcpy_from_plain_data(shadow_data.single_edges.data(), shadow_ref, num_single_edges * 4); + shadow_ref.byte_offset += ofs_double_tris - ofs_single_edges; + for (auto& edge : shadow_data.single_edges) { + for (auto x : edge.ind) { + ASSERT(x < num_verts); + } + ASSERT(edge.tri[0] != 255); + for (auto x : edge.tri) { + ASSERT(x == 255 || x < shadow_data.single_tris.size()); + } + } + + // double tris + ASSERT(ofs_double_edges - ofs_double_tris == align16(num_double_tris * 4)); + shadow_data.double_tris.resize(num_double_tris); + memcpy_from_plain_data(shadow_data.double_tris.data(), shadow_ref, num_double_tris * 4); + shadow_ref.byte_offset += ofs_double_edges - ofs_double_tris; + for (auto& tri : shadow_data.double_tris) { + for (auto v : tri.verts) { + ASSERT(v < num_verts); + } + ASSERT(tri.faces == 0); + } + + // double edges + ASSERT(size_qwc * 16 - ofs_double_edges == align16(num_double_edges * 4)); + shadow_data.double_edges.resize(num_double_edges); + memcpy_from_plain_data(shadow_data.double_edges.data(), shadow_ref, num_double_edges * 4); + for (auto& edge : shadow_data.double_edges) { + for (auto x : edge.ind) { + ASSERT(x < num_verts); + } + ASSERT(edge.tri[0] != 255); + for (auto x : edge.tri) { + ASSERT(x == 255 || x < shadow_data.double_tris.size()); + } + } + return shadow_data; +} + +std::vector convert_vertices(const ShadowData& data) { + std::vector result; + + for (size_t i = 0; i < data.one_bone_vertices.size(); i++) { + const auto& in = data.one_bone_vertices[i]; + auto& out = result.emplace_back(); + out.pos[0] = in.pos.x(); + out.pos[1] = in.pos.y(); + out.pos[2] = in.pos.z(); + out.weight = 1.f; + out.mats[0] = data.refs.at(i).joint_0; + out.mats[1] = data.refs.at(i).joint_1; + ASSERT(out.mats[1] == 255); + ASSERT(in.weight == 1.f); + out.flags = 0; + } + + for (size_t i = 0; i < data.two_bone_vertices.size(); i++) { + const auto& in = data.two_bone_vertices[i]; + auto& out = result.emplace_back(); + out.pos[0] = in.pos.x(); + out.pos[1] = in.pos.y(); + out.pos[2] = in.pos.z(); + out.weight = in.weight; + ASSERT(out.weight != 1.f && out.weight != 0.f); + out.mats[0] = data.refs.at(data.one_bone_vertices.size() + i).joint_0; + out.mats[1] = data.refs.at(data.one_bone_vertices.size() + i).joint_1; + ASSERT(out.mats[0] != 255); + ASSERT(out.mats[1] != 255); + out.flags = 0; + } + + return result; +} + +void extract_shadow(const ObjectFileData& ag_data, + const DecompilerTypeSystem& dts, + tfrag3::Level& out, + bool dump_level, + GameVersion version) { + // hack + // dump_level = true; + + if (dump_level) { + file_util::create_dir_if_needed(file_util::get_file_path({"debug_out/shadow"})); + } + auto geo_locations = find_objects_with_type(ag_data.linked_data, "shadow-geo"); + if (!geo_locations.empty()) { + lg::error("{} has {} shadows", ag_data.name_in_dgo, geo_locations.size()); + } + + int i = 0; + auto& sd = out.shadow_data; + for (auto loc : geo_locations) { + const ShadowData data = extract_shadow_data(ag_data.linked_data, dts, loc); + + const u32 vertex_offset = sd.vertices.size(); + const u32 num_vertices = data.one_bone_vertices.size() + data.two_bone_vertices.size(); + + // insert top vertices + auto vertices = convert_vertices(data); + sd.vertices.insert(sd.vertices.end(), vertices.begin(), vertices.end()); + + // bottom vertices + for (auto& v : vertices) { + v.flags = 1; + } + sd.vertices.insert(sd.vertices.end(), vertices.begin(), vertices.end()); + + auto& model = sd.models.emplace_back(); + model.name = data.name; + model.max_bones = data.num_joints; + + // single triangles + model.single_tris.first_index = sd.indices.size(); + for (auto& stri : data.single_tris) { + sd.indices.push_back(static_cast(stri.verts[0]) + vertex_offset); + sd.indices.push_back(static_cast(stri.verts[1]) + vertex_offset); + sd.indices.push_back(static_cast(stri.verts[2]) + vertex_offset); + } + model.single_tris.count = sd.indices.size() - model.single_tris.first_index; + + // double triangles + model.double_tris.first_index = sd.indices.size(); + for (auto& dtri : data.double_tris) { + sd.indices.push_back(static_cast(dtri.verts[0]) + vertex_offset + num_vertices); + sd.indices.push_back(static_cast(dtri.verts[1]) + vertex_offset + num_vertices); + sd.indices.push_back(static_cast(dtri.verts[2]) + vertex_offset + num_vertices); + } + model.double_tris.count = sd.indices.size() - model.double_tris.first_index; + + // single edges + model.single_edges.first_index = sd.indices.size(); + for (auto& se : data.single_edges) { + sd.indices.push_back(static_cast(se.ind[0]) + vertex_offset + num_vertices); + sd.indices.push_back(static_cast(se.ind[0]) + vertex_offset); + sd.indices.push_back(static_cast(se.ind[1]) + vertex_offset + num_vertices); + + sd.indices.push_back(static_cast(se.ind[1]) + vertex_offset + num_vertices); + sd.indices.push_back(static_cast(se.ind[0]) + vertex_offset); + sd.indices.push_back(static_cast(se.ind[1]) + vertex_offset); + } + model.single_edges.count = sd.indices.size() - model.single_edges.first_index; + + model.double_edges.first_index = sd.indices.size(); + for (auto& se : data.double_edges) { + sd.indices.push_back(static_cast(se.ind[0]) + vertex_offset + num_vertices); + sd.indices.push_back(static_cast(se.ind[0]) + vertex_offset); + sd.indices.push_back(static_cast(se.ind[1]) + vertex_offset + num_vertices); + + sd.indices.push_back(static_cast(se.ind[1]) + vertex_offset + num_vertices); + sd.indices.push_back(static_cast(se.ind[0]) + vertex_offset); + sd.indices.push_back(static_cast(se.ind[1]) + vertex_offset); + } + model.double_edges.count = sd.indices.size() - model.double_edges.first_index; + + if (dump_level) { + auto file_path = file_util::get_file_path( + {"debug_out/shadow", fmt::format("{}_{}.ply", ag_data.name_in_dgo, i)}); + file_util::create_dir_if_needed_for_file(file_path); + file_util::write_text_file(file_path, debug_dump_to_ply(data)); + } + i++; + } +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/level_extractor/extract_shadow.h b/decompiler/level_extractor/extract_shadow.h new file mode 100644 index 0000000000..6def0f1cbd --- /dev/null +++ b/decompiler/level_extractor/extract_shadow.h @@ -0,0 +1,13 @@ +#pragma once + +#include "common/custom_data/Tfrag3Data.h" +#include "decompiler/ObjectFile/ObjectFileDB.h" + +namespace decompiler { + +void extract_shadow(const ObjectFileData& ag_data, + const DecompilerTypeSystem& dts, + tfrag3::Level& out, + bool dump_level, + GameVersion version); +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/util/goal_data_reader.cpp b/decompiler/util/goal_data_reader.cpp index 12777b8495..102b8988c6 100644 --- a/decompiler/util/goal_data_reader.cpp +++ b/decompiler/util/goal_data_reader.cpp @@ -51,7 +51,8 @@ void read_plain_data_field(const TypedRef& object, } } -void memcpy_from_plain_data(u8* dest, const Ref& source, int size_bytes) { +void memcpy_from_plain_data(void* _dest, const Ref& source, int size_bytes) { + u8* dest = (u8*)_dest; const auto& words = source.data->words_by_seg.at(source.seg); for (int byte = 0; byte < size_bytes; byte++) { int byte_in_words = byte + source.byte_offset; diff --git a/decompiler/util/goal_data_reader.h b/decompiler/util/goal_data_reader.h index 8a3ad5339e..e3aa1a2b26 100644 --- a/decompiler/util/goal_data_reader.h +++ b/decompiler/util/goal_data_reader.h @@ -45,7 +45,7 @@ T read_plain_data_field(const TypedRef& object, return result; } -void memcpy_from_plain_data(u8* dest, const Ref& source, int size_bytes); +void memcpy_from_plain_data(void* dest, const Ref& source, int size_bytes); std::vector bytes_from_plain_data(const Ref& source, int size_bytes); decompiler::LinkedWord::Kind get_word_kind_for_field(const TypedRef& object, diff --git a/docs/progress-notes/shadow.md b/docs/progress-notes/shadow.md new file mode 100644 index 0000000000..ba355bdb37 --- /dev/null +++ b/docs/progress-notes/shadow.md @@ -0,0 +1,1032 @@ +# Shadow Renderer + +The shadow renderer works by darkening the intersection between the "shadow volume" and the world. There's a clever trick sometimes called "Carmack's Reverse" to accomplish this, but it requires drawing the "shadow volume". + +The game builds the shadow volume mesh in `shadow-cpu.gc`, then submits it to a VU1 renderer. This part is a MIPS2C mess and we want to redo it in C++. + +## Drawing Procedure Jak 1 + +## Setup + +The shadow is not drawn is `disable-draw` flag is set. + +The `center` stored in `shadow-settings` and `shadow-dcache` have different meanings. + +The `center` in `shadow-settings` is set from `draw-bones-shadow`, which is a joint point. The `center` in `shadow-dcache` is + +``` +dcache.center = settings.center + settings.dir * settings.dist-to-locus +``` + +There are both top and bottom clipping planes. If the `shdf02` flag is set, the planes in `settings` are treated as global. Otherwise, they are treated as "relative". However, computing the final plane assumes the planes have a y normal: + +``` +(set! (-> dcache plane w) (- (-> settings bot-plane w) (-> settings center y))) +``` + +If `shdf00` is set, the shadow is discarded if the camera is below the plane: +``` + (let ((v1-16 (camera-pos))) + (if (< (+ (* (-> v1-16 x) (-> dcache plane x)) + (* (-> v1-16 y) (-> dcache plane y)) + (* (-> v1-16 z) (-> dcache plane z)) + (-> dcache plane w) + ) + 0.0 + ) + (set! s1-0 #t) + ) + ) +``` + +The shadow plane is adjusted (again assuming it's +y normal) to make sure the shadow center is inside the volume: +``` + (let ((f0-25 (+ (* (-> dcache center x) (-> dcache plane x)) + (* (-> dcache center y) (-> dcache plane y)) + (* (-> dcache center z) (-> dcache plane z)) + ) + ) + ) + (if (< 0.0 (+ f0-25 (-> dcache plane w))) + (set! (-> dcache plane w) (- f0-25)) + ) + ) +``` + +Final setup of dcache: +``` + (set! (-> dcache light-dir quad) (-> settings shadow-dir quad)) + (set! (-> dcache near-plane x) 0.0) + (set! (-> dcache near-plane y) 0.0) + (set! (-> dcache near-plane z) 1.0) + (set! (-> dcache near-plane w) (* -2.0 (-> *math-camera* d))) + (set! (-> dcache dcache-top) (the-as uint (-> dcache data))) +``` + +## Stages + +The stages are: + +- `xform-verts` transform mesh vertices into camera space (no perspective) +- `init-vars` transform settings to camera space +- `calc-dual-verts` project vertices to plane +- `scissor-top` (only executed if shdf03 is set), clip vertices to top plane, if above +- `scissor-edges`, clip vertices to near plane +- `find-facing-single-tris`, set face bit to indicate orientation, cull backward ones +- `find-single-edges`, find edges that, when extruded, should be drawn +- `find-facing-double-tris`, set face bit indicate orientation. double sided tris, so no culling +- `find-double-edges`, find edges to extrude from the double-sided tris +- `add-verts` +- `add-facing-single-tris` +- `add-single-edges` +- `add-double-tris` +- `add-double-edges` + + +## Transform Verts + +this needs access to only the `num-joints` in the header bone matrices. + +```asm +L98: + lw v1, 0(a0) ;; v1 = qwc-data + lw a2, 20(a0) ;; a2 = ofs-verts + dsll v1, v1, 4 ;; v1 = 16 * qwc-data + lw t0, 24(a0) ;; t0 = ofs-refs + daddu a2, a2, a0 ;; a2 = verts-in-ptr + lh a3, 8(a0) ;; a3 = num-verts + daddu t0, t0, a0 ;; t0 = refs-ptr + lw t1, 4(a0) ;; t1 = num-joints + daddu v1, a0, v1 ;; v1 = dest-start?? + sw a2, 0(a1) ;; store vtx-table in shadow-dcache + daddiu v1, v1, 144 ;; v1 = dest-start + 144... + or a1, t0, r0 ;; a1 = refs-ptr + lh t0, 10(a0) ;; t0 = num-twos + or a2, a2, r0 ;; no effect + dsubu a3, a3, t0 ;; a3 = num-verts - num-twos + lui t0, 28672 + ori t0, t0, 2608 ;; 0xa30 offset in spad + beq a3, r0, L100 + +;; transform ones +B1: +L99: + daddiu a3, a3, -1 ;; decrement num-ones counter + lbu t0, 0(a1) ;; t0 = ref[0] + lbu t1, 1(a1) ;; t1 = ref[1] + daddiu a1, a1, 2 ;; increment ref + dsll t0, t0, 7 ;; t0 = mat0-idx * 128 + daddu t0, t0, v1 ;; t0 = matrix pointer + lqc2 vf1, 0(t0) ;; load transformation matrix! + lqc2 vf2, 16(t0) + lqc2 vf3, 32(t0) + lqc2 vf4, 48(t0) + lqc2 vf9, 0(a2) ;; load vertex + vmulaw.xyzw acc, vf4, vf0 ;; transform!! + vmaddax.xyzw acc, vf1, vf9 + vmadday.xyzw acc, vf2, vf9 + vmaddz.xyz vf9, vf3, vf9 + sqc2 vf9, 0(a2) ;; store! + daddiu a2, a2, 16 + bne a3, r0, L99 + +B2: +L100: + lh a0, 10(a0) ;; num-twos + beq a0, r0, L102 + sll r0, r0, 0 + +B3: +L101: + daddiu a0, a0, -1 ;; decrement remaining count + lbu t0, 0(a1) ;; load mat0 + lbu a3, 1(a1) ;; load mat1 + dsll t0, t0, 7 ;; mat0_idx * 128 + daddiu a1, a1, 2 ;; increment refs ptr + dsll a3, a3, 7 ;; mat0_idx * 128 + daddu t0, t0, v1 ;; t0 = mat0_ptr + daddu a3, a3, v1 ;; a3 = mat1_ptr + lqc2 vf1, 0(t0) ;; load mat0 + lqc2 vf2, 16(t0) + lqc2 vf3, 32(t0) + lqc2 vf4, 48(t0) + lqc2 vf9, 0(a2) ;; load vertex + lqc2 vf5, 0(a3) ;; load mat1 + lqc2 vf6, 16(a3) + lqc2 vf7, 32(a3) + lqc2 vf8, 48(a3) + vsubw.w vf10, vf0, vf9 ;; vf10.w = 1 - vertex.w + vmulaw.xyzw acc, vf4, vf0 ;; xform 0 to vf10.xyz + vmaddax.xyzw acc, vf1, vf9 + vmadday.xyzw acc, vf2, vf9 + vmaddz.xyz vf10, vf3, vf9 + + vmulaw.xyzw acc, vf8, vf0 ;; xform 1 to vf9.xyz + vmaddax.xyzw acc, vf5, vf9 + vmadday.xyzw acc, vf6, vf9 + vmaddz.xyz vf9, vf7, vf9 + + vmulaw.xyz acc, vf10, vf9 ;; combine + vmaddw.xyz vf9, vf9, vf10 + vaddx.w vf9, vf0, vf0 ;; make sure w = 1. + + sqc2 vf9, 0(a2) + daddiu a2, a2, 16 + bne a0, r0, L101 + sll r0, r0, 0 + +B4: + sll r0, r0, 0 + sll r0, r0, 0 +B5: +L102: + or v0, r0, r0 + jr ra + daddu sp, sp, r0 +``` + +## Init Vars + +This function just transforms light-dir, plane, top-plane, and center into the camera frame. +See details of transformation below. + +- `vf7 = cam_rot[0]` +- `vf8 = cam_rot[1]` +- `vf9 = cam_rot[2]` +- `vf10 = cam_rot[3]` +- `vf1 = light-dir` +- `vf11 = plane` +- `vf12 = top-plane` +- `vf2 = center` + +`vf1`, `vf11`, `vf12` (light-dir, both planes) are rotated by `cam-rot` +`vf2`:`center` is transformed by `cam-rat` + +``` + lw v1, *math-camera*(s7) + or v1, v1, r0 + lqc2 vf7, 364(v1) + lqc2 vf8, 380(v1) + lqc2 vf9, 396(v1) + lqc2 vf10, 412(v1) + lqc2 vf1, 128(a1) + lqc2 vf11, 80(a1) + lqc2 vf12, 96(a1) + lqc2 vf2, 64(a1) + + vmulax.xyzw acc, vf7, vf1 ;; rotate light-dir + vmadday.xyzw acc, vf8, vf1 + vmaddz.xyzw vf1, vf9, vf1 + + vmulax.xyzw acc, vf7, vf11 ;; rotate plane + vmadday.xyzw acc, vf8, vf11 + vmaddz.xyz vf11, vf9, vf11 + + vmulax.xyzw acc, vf7, vf12 ;; rotate top-plane + vmadday.xyzw acc, vf8, vf12 + vmaddz.xyz vf12, vf9, vf12 + + vmul.xyzw vf13, vf10, vf11 ;; vf13 = dot(cam_pos, plane) + + vmulaw.xyzw acc, vf10, vf0 ;; acc = cam_pos + vmaddax.xyzw acc, vf7, vf2 ;; acc = cam_pos + cam_rot_x*center + + vmul.xyzw vf14, vf10, vf12 ;; vf14 = dot(cam_pos, top-plane) + + vsubx.w vf13, vf13, vf13 ;; vf13 = dot(cam_pos, plane) - [0, 0, 0, cam.x*plane.x] + vsubx.w vf14, vf14, vf14 ;; vf14 = dot(cam_pos, top-plane) - [0, 0, 0, cam.x*plane.x] + + vmadday.xyzw acc, vf8, vf2 ;; acc = cam_pos + cam_rot_x*center + cam_rot_y*center + vmaddz.xyzw vf2, vf9, vf2 + + vsuby.w vf13, vf13, vf13 + vsuby.w vf14, vf14, vf14 + vsubz.w vf11, vf13, vf13 + vsubz.w vf12, vf14, vf14 + sqc2 vf2, 64(a1) + sqc2 vf1, 128(a1) + sqc2 vf11, 80(a1) + sqc2 vf12, 96(a1) + or v0, r0, r0 + jr ra + daddu sp, sp, r0 +``` + +## Calc Dual Verts +This runs each vertex on program 28. It takes two cycles through the program!! +``` + nop | mul.xyzw vf27, vf20, Q N | V1-10 + div Q, vf13.x, vf17.x | sub.xyzw vf19, vf01, vf03 V2-9 | V0-0 + move.xyzw vf23, vf07 | sub.xyzw vf20, vf01, vf04 ?? | V1-0 + nop | sub.xyzw vf21, vf01, vf05 N | V2-0 + move.xyzw vf25, vf09 | sub.xyzw vf22, vf01, vf06 ?? | V3-0 + move.xyzw vf26, vf10 | sub.xyzw vf24, vf08, vf27 ?? | V1-11 + nop | mul.xyzw vf11, vf03, vf02 N | V0-1 + nop | mul.xyz vf15, vf19, vf02 N | V0-2 + div Q, vf14.x, vf18.x | mul.xyzw vf12, vf04, vf02 V3-9 | V1-1 + move.xyzw vf07, vf03 | mul.xyzw vf28, vf28, Q V0-3 | V2-10 + move.xyzw vf08, vf04 | mul.xyz vf16, vf20, vf02 V1-3 | V1-2 + move.xyzw vf09, vf05 | addy.x vf11, vf11, vf11 V2-3 | V0-4 + move.xyzw vf10, vf06 | addy.x vf15, vf15, vf15 V3-3 | V0-5 + nop | sub.xyzw vf25, vf25, vf28 N | V2-11 + nop | addy.x vf12, vf12, vf12 N | V1-4 + nop | mul.xyzw vf29, vf29, Q N | V3-10 + nop | addy.x vf16, vf16, vf16 N | V1-5 + nop | addz.x vf11, vf11, vf11 N | V0-6 + nop | addz.x vf15, vf15, vf15 N | V0-7 + nop | sub.xyzw vf26, vf26, vf29 N | V3-11 + nop | addz.x vf12, vf12, vf12 N | V1-6 + nop | addz.x vf16, vf16, vf16 N | V1-7 + nop | addw.x vf11, vf11, vf11 N | V9-8 + nop | mul.xyzw vf13, vf09, vf02 N | V2-1 + nop | addw.x vf12, vf12, vf12 N | V1-8 + nop | mul.xyz vf17, vf21, vf02 N | V2-2 + nop | mul.xyzw vf14, vf10, vf02 N | V3-1 + div Q, vf11.x, vf15.x | mul.xyz vf18, vf22, vf02 V0-9 | V3-2 + nop | addy.x vf13, vf13, vf13 N | V2-4 + nop | addy.x vf17, vf17, vf17 N | V2-5 + nop | addy.x vf14, vf14, vf14 N | V3-4 + nop | addy.x vf18, vf18, vf18 N | V3-5 + nop | addz.x vf13, vf13, vf13 N | V2-6 + nop | addz.x vf17, vf17, vf17 N | V2-7 + div Q, vf12.x, vf16.x | addz.x vf14, vf14, vf14 V1-9 | V3-6 + nop | mul.xyzw vf19, vf19, Q N | V0-10 + move.xyzw vf28, vf21 | addz.x vf18, vf18, vf18 ~ | V3-7 + move.xyzw vf29, vf22 | addw.x vf13, vf13, vf13 ~ | V2-8 + nop | addw.x vf14, vf14, vf14 :e N | V3-8 + nop | sub.xyzw vf07, vf07, vf19 N | V0-11 +``` + +`vf03`'s path: +- 0 `sub.xyzw vf19, vf01, vf03` : `vf19 = center - vert` +- 1 `mul.xyzw vf11, vf03, vf02` : `vf11 = dot(vert, plane)` +- 2 `mul.xyz vf15, vf19, vf02` : `vf15 = dot3(center - vert, plane)` +- 3 `move.xyzw vf07, vf03` : `vf07 = vert` +- 4 `addy.x vf11, vf11, vf11` : `vf11.x += vf11.y` +- 5 `addy.x vf15, vf15, vf15` : `vf15.x += vf15.y` +- 6 `addz.x vf11, vf11, vf11` : `vf11.x += vf11.z` +- 7 `addz.x vf15, vf15, vf15` : `vf15.x += vf15.z` +- 8 `addw.x vf11, vf11, vf11` : `vf11.x += vf11.w` +- 9 `div Q, vf11.x, vf15.x` : `Q = dot(vert, plane) / dot3(center - vert, plane)` +- 10 `mul.xyzw vf19, vf19, Q` : +- 11 `sub.xyzw vf07, vf07, vf19`: + +This is projecting the vertex onto the plane! + +``` +L93: + lw v1, 16(a1) ;; v1 = dcache-top + lw a2, 0(a1) ;; a2 = vtx-table + daddiu v1, v1, 15 ;; v1 = dcache-top + 15 + lqc2 vf1, 64(a1) ;; vf1 = center + dsra v1, v1, 4 ;; aligning dcache ptr + lqc2 vf2, 80(a1) ;; vf2 = plane + dsll a3, v1, 4 ;; aligning dcache ptr + lh a0, 8(a0) ;; a0 = num-verts + or v1, a3, r0 ;; v1 = dest-ptr + sw a3, 44(a1) ;; storing ptr-dual-verts + or a2, a2, r0 ;; no effect + beq a0, r0, L97 + sll r0, r0, 0 + +B1: + lq a3, 0(a2) ;; a3 = vtx0 + lq t0, 16(a2) ;; t0 = vtx1 + lq t1, 32(a2) ;; t1 = vtx2 + lq t2, 48(a2) ;; t2 = vtx3 + daddiu a2, a2, 64 ;; inc vtx ptr + qmtc2.i vf3, a3 ;; set vertex to vf3, vf4, vf5, vf6 + qmtc2.ni vf4, t0 + qmtc2.ni vf5, t1 + qmtc2.ni vf6, t2 + vcallms 28 ;; run program 28 + sll r0, r0, 0 + daddiu a0, a0, -4 ;; decrement vertex by 4. + lq a3, 0(a2) ;; start loading next + blez a0, L95 ;; leftovers loop + lq t0, 16(a2) + +B2: + lq t1, 32(a2) + lq t2, 48(a2) + daddiu a2, a2, 64 + qmtc2.i vf3, a3 + qmtc2.ni vf4, t0 + qmtc2.ni vf5, t1 + qmtc2.ni vf6, t2 +B3: +L94: + vcallms 28 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.i a3, vf23 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sq a3, 0(v1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.ni a3, vf24 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sq a3, 16(v1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.ni a3, vf25 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sq a3, 32(v1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.ni a3, vf26 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sq a3, 48(v1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + lq a3, 0(a2) + sll r0, r0, 0 + lq t0, 16(a2) + sll r0, r0, 0 + lq t1, 32(a2) + daddiu a0, a0, -4 + lq t2, 48(a2) + daddiu a2, a2, 64 + daddiu v1, v1, 64 + sll r0, r0, 0 + qmtc2.ni vf3, a3 + sll r0, r0, 0 + qmtc2.ni vf4, t0 + sll r0, r0, 0 + qmtc2.ni vf5, t1 + bgtz a0, L94 + qmtc2.ni vf6, t2 + +B4: +L95: + vcallms 68 + sll r0, r0, 0 + vnop + sll r0, r0, 0 + daddiu a2, a0, 3 + qmfc2.i a3, vf23 + daddiu t0, a0, 2 + qmfc2.i t1, vf24 + daddiu t2, a0, 1 + qmfc2.i t3, vf25 + daddiu a0, a0, 4 + qmfc2.i t4, vf26 + beq a2, r0, L96 + sq a3, 0(v1) + +B5: + beq t0, r0, L96 + sq t1, 16(v1) + +B6: + beq t2, r0, L96 + sq t3, 32(v1) + +B7: + sll r0, r0, 0 + sq t4, 48(v1) +B8: +L96: + dsll a0, a0, 4 + sll r0, r0, 0 + daddu v1, v1, a0 + sll r0, r0, 0 +B9: +L97: + sw v1, 16(a1) ;; dcache top store + or v0, r0, r0 + jr ra + daddu sp, sp, r0 + + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 +``` + +## Scissor Top +If a vertex is past the top, it's projected to the top plane. It does so by moving along the direction of the bottom plane projection. + +``` +B0: +L83: + lw a2, 44(a1) ;; a2 = dual verts + lw v1, 0(a1) ;; a1 = normal verts + lqc2 vf3, 96(a1) ;; vf3 = top plane + lh a0, 8(a0) ;; a0 = num-verts + or a1, a2, r0 ;; a1 = dual-verts + or v1, v1, r0 + beq a0, r0, L86 + sll r0, r0, 0 + +B1: +L84: + lqc2 vf1, 0(v1) ;; vf1 = vert + lqc2 vf2, 0(a1) ;; vf2 = dual vert + vsub.xyzw vf4, vf2, vf1 ;; vf4 = dual - orig + vmul.xyzw vf5, vf1, vf3 ;; dot4(vert, top_plane) + vmul.xyz vf6, vf4, vf3 ;; dot3(dual-orig, top_plane) + vaddx.y vf5, vf5, vf5 ;; adds for dots + vaddy.x vf6, vf6, vf6 + vaddz.y vf5, vf5, vf5 + vaddz.x vf6, vf6, vf6 + vaddw.y vf5, vf5, vf5 + qmfc2.i a2, vf5 ;; checking the dot4 to see which side of top plane we're on + bltz a2, L85 + sll r0, r0, 0 + +B2: + vdiv Q, vf5.y, vf6.x ;; we're past the top plane, need to project original vertex. + vwaitq + vmulq.xyzw vf4, vf4, Q + vsub.xyzw vf1, vf1, vf4 + sqc2 vf1, 0(v1) +B3: +L85: + daddiu v1, v1, 16 + daddiu a1, a1, 16 + daddiu a0, a0, -1 + bne a0, r0, L84 + sll r0, r0, 0 + +B4: +L86: + or v0, r0, r0 + jr ra + daddu sp, sp, r0 +``` + +## Scissor Edges + +This function is to prevent the shadow edges from going through the camera near plane. +``` +L87: + lw a3, 44(a1) ;; a3 = dual-verts + lw a2, 0(a1) ;; a2 = verts + lqc2 vf3, 112(a1) ;; vf3 = near plane + lh v1, 8(a0) ;; v1 = num-verts + or a0, a3, r0 ;; a0 = duals + or a1, a2, r0 ;; a1 = verts + beq v1, r0, L92 + sll r0, r0, 0 + +B1: +L88: + lqc2 vf1, 0(a1) ;; vf1 = vert + lqc2 vf2, 0(a0) ;; vf2 = dual vert + vaddw.z vf7, vf1, vf3 ;; vf7.z = vert.z + near_plane.w + vaddw.z vf8, vf2, vf3 ;; vf8.z = dual.z + near_plane.w + vsubz.z vf6, vf1, vf2 ;; vf6.z = vert.z - dual.z + vaddw.z vf5, vf1, vf3 ;; vf5.z = vert.z + near_plane.w (?? again) + + vaddz.y vf7, vf0, vf7 ;; vf7.y = vert.z + near_plane.w + vaddz.y vf8, vf0, vf8 ;; vf8.y = dual.z + near_plane.w + vsub.xyz vf4, vf2, vf1 ;; vf4 = dual - vert + qmfc2.i a2, vf7 ;; a2 = compare of vert + qmfc2.i a3, vf8 ;; a3 = compare of dual + bltz a2, L89 + sll r0, r0, 0 + +B2: + bgtz a3, L91 + sll r0, r0, 0 + +B3: + beq r0, r0, L90 + sll r0, r0, 0 + +B4: +L89: + bltz a3, L91 + sll r0, r0, 0 + +B5: + vdiv Q, vf5.z, vf6.z + vwaitq + vmulq.xyzw vf4, vf4, Q + vnop + vnop + vnop + vadd.xyzw vf1, vf1, vf4 + beq r0, r0, L91 + sqc2 vf1, 0(a1) + +B6: +L90: + vdiv Q, vf5.z, vf6.z + vwaitq + vmulq.xyzw vf4, vf4, Q + vnop + vnop + vnop + vadd.xyzw vf1, vf1, vf4 + beq r0, r0, L91 + sqc2 vf1, 0(a0) + +B7: +L91: + daddiu a1, a1, 16 + daddiu a0, a0, 16 + daddiu v1, v1, -1 + bne v1, r0, L88 + sll r0, r0, 0 + +B8: +L92: + or v0, r0, r0 + jr ra + daddu sp, sp, r0 +``` + +## Find Facing Single Tris + +``` + daddiu sp, sp, -64 + sd ra, 0(sp) + sq s4, 16(sp) + sq s5, 32(sp) + sq gp, 48(sp) + + lw v1, 16(a1) ;; dcache top (so we're writing something out!) + lh t0, 12(a0) ;; t0 = num-single-tris + or a2, v1, r0 + lw a3, 28(a0) + daddu a0, a3, a0 + or a3, a0, r0 ;; a3 = single tris + lqc2 vf2, 64(a1) ;; vf2 = center + lqc2 vf1, 128(a1) ;; vf1 = light-dir + lqc2 vf11, 80(a1) ;; vf11 = plane + lw a0, 0(a1) ;; a0 = vtx-ptr + pextlw a0, a0, a0 ;; a0 = [vtx-ptr, vtx-ptr, vtx-ptr, vtx-ptr] + pextlw a0, a0, a0 + daddiu t0, t0, -4 ;; 4 tris at a time I guess. + addiu t1, r0, 1 ;; t1 = 1 + bltz t0, L78 + daddiu t0, t0, 4 + +B1: + lq t3, 0(a3) + pextub t2, r0, t3 + mfc1 r0, f31 + pextlb t3, r0, t3 + mfc1 r0, f31 + psllh t2, t2, 4 + mfc1 r0, f31 + psllh t4, t3, 4 + mfc1 r0, f31 + pextuh t3, r0, t4 + mfc1 r0, f31 + pextlh t4, r0, t4 + mfc1 r0, f31 + pextuh t7, r0, t2 + mfc1 r0, f31 + pextlh t5, r0, t2 + mfc1 r0, f31 + paddw t6, t4, a0 + mfc1 r0, f31 + pcpyud t4, t6, r0 + lq t2, 0(t6) + paddw t8, t3, a0 + lq t3, 0(t4) + pcpyud t9, t8, r0 + lq t4, 0(t8) + dsra32 t6, t6, 0 + dsra32 t8, t8, 0 + paddw s5, t5, a0 + lq t5, 0(t9) + pcpyud t9, s5, r0 + lq t6, 0(t6) + paddw gp, t7, a0 + lq t7, 0(t8) + pcpyud ra, gp, r0 + lq t8, 0(s5) + dsra32 s5, s5, 0 + dsra32 s4, gp, 0 + lq s5, 0(s5) + lq t9, 0(t9) + lq gp, 0(gp) + lq s4, 0(s4) + lq ra, 0(ra) + qmtc2.ni vf2, t2 + qmtc2.ni vf3, t6 + qmtc2.ni vf4, t3 + qmtc2.ni vf7, t4 + qmtc2.ni vf8, t7 + qmtc2.ni vf9, t5 + qmtc2.ni vf12, t8 + qmtc2.ni vf13, s5 + qmtc2.ni vf14, t9 + qmtc2.ni vf17, gp + qmtc2.ni vf18, s4 + qmtc2.ni vf19, ra +B2: +L73: + lq t3, 16(a3) + daddiu t0, t0, -4 + vcallms 0 + pextub t2, r0, t3 + mfc1 r0, f31 + pextlb t3, r0, t3 + mfc1 r0, f31 + psllh t2, t2, 4 + mfc1 r0, f31 + psllh t4, t3, 4 + mfc1 r0, f31 + pextuh t3, r0, t4 + mfc1 r0, f31 + pextlh t4, r0, t4 + mfc1 r0, f31 + pextuh t7, r0, t2 + mfc1 r0, f31 + pextlh t5, r0, t2 + mfc1 r0, f31 + paddw t6, t4, a0 + mfc1 r0, f31 + pcpyud t4, t6, r0 + lq t2, 0(t6) + paddw t8, t3, a0 + lq t3, 0(t4) + pcpyud t9, t8, r0 + lq t4, 0(t8) + dsra32 t6, t6, 0 + dsra32 t8, t8, 0 + paddw s5, t5, a0 + lq t5, 0(t9) + pcpyud t9, s5, r0 + lq t6, 0(t6) + paddw gp, t7, a0 + lq t7, 0(t8) + pcpyud ra, gp, r0 + lq t8, 0(s5) + dsra32 s5, s5, 0 + dsra32 s4, gp, 0 + lq s5, 0(s5) + lq t9, 0(t9) + lq gp, 0(gp) + lq s4, 0(s4) + lq ra, 0(ra) + qmtc2.ni vf2, t2 + qmtc2.ni vf3, t6 + qmtc2.ni vf4, t3 + qmtc2.ni vf7, t4 + qmtc2.ni vf8, t7 + qmtc2.ni vf9, t5 + qmtc2.ni vf12, t8 + qmtc2.ni vf13, s5 + qmtc2.ni vf14, t9 + qmtc2.ni vf17, gp + qmtc2.ni vf18, s4 + qmtc2.ni vf19, ra + qmfc2.ni t3, vf22 + qmfc2.ni t4, vf23 + qmfc2.ni t2, vf24 + bgez t3, L74 + qmfc2.ni t3, vf25 + +B3: + sb t1, 3(a3) + sw a3, 0(a2) + daddiu a2, a2, 4 +B4: +L74: + bgez t4, L75 + daddiu a3, a3, 4 + +B5: + sb t1, 3(a3) + sw a3, 0(a2) + daddiu a2, a2, 4 +B6: +L75: + bgez t2, L76 + daddiu a3, a3, 4 + +B7: + sb t1, 3(a3) + sw a3, 0(a2) + daddiu a2, a2, 4 +B8: +L76: + bgez t3, L77 + daddiu a3, a3, 4 + +B9: + sb t1, 3(a3) + sw a3, 0(a2) + daddiu a2, a2, 4 +B10: +L77: + bgtz t0, L73 + daddiu a3, a3, 4 + +B11: +L78: + blez t0, L81 + sll r0, r0, 0 + +B12: +L79: + lbu t2, 0(a3) ;; t2 = ind-0 + lbu t3, 1(a3) ;; t3 = ind-1 + lbu t1, 2(a3) ;; t1 = ind-2 + dsll t2, t2, 4 ;; multiply by 16 + dsll t3, t3, 4 + dsll t1, t1, 4 + daddu t2, t2, a0 ;; offset, get original vertex + daddu t3, t3, a0 + daddu t1, t1, a0 + lqc2 vf2, 0(t2) + lqc2 vf3, 0(t3) + lqc2 vf4, 0(t1) + vsub.xyzw vf5, vf3, vf2 + vsub.xyzw vf6, vf4, vf2 + vopmula.xyz acc, vf5, vf6 + vopmsub.xyz vf5, vf6, vf5 ;; vf5 is the normal + vmul.xyz vf5, vf5, vf1 ;; dot with the light-dir + vaddx.y vf5, vf5, vf5 + vaddz.y vf5, vf5, vf5 + qmfc2.i t1, vf5 + sll r0, r0, 0 + bgez t1, L80 + addiu t1, r0, 1 + +B13: + sw a3, 0(a2) ;; output this triangle (as a pointer to the shadow-tri) + daddiu a2, a2, 4 + sb t1, 3(a3) ;; store a faces = 1 in the tri itself. +B14: +L80: + daddiu t0, t0, -1 + bne t0, r0, L79 + daddiu a3, a3, 4 + +B15: +L81: + dsubu a0, a2, v1 + dsra a0, a0, 2 + sw a0, 20(a1) ;; num facing-single-tris + sw v1, 32(a1) ;; single tri list + sw a2, 16(a1) ;; dcache top + or v0, r0, r0 + ld ra, 0(sp) + lq gp, 48(sp) + lq s5, 32(sp) + lq s4, 16(sp) + jr ra + daddiu sp, sp, 64 +``` + +## Find Single Edges + +``` +L66: + lw a2, 16(a1) ;; top + lh a3, 14(a0) ;; a3 = num-single-edges + or v1, a2, r0 ;; v1 = dcache top + lw t0, 32(a0) ;; t0 = ofs-single-edges + beq a3, r0, L71 ;; exit if none + lw t1, 28(a0) ;; t1 = ofs-single-tris + +B1: + daddu t0, t0, a0 ;; t0 = single edge table + sw a2, 36(a1) ;; set single-edge-list + daddu a0, t1, a0 ;; a0 = orig vertices + sw t0, 4(a1) ;; set single-edge-table + or t1, t0, r0 ;; t1 = single edges + addiu t2, r0, 255 ;; t2 = 255 + sll r0, r0, 0 +B2: +L67: + daddiu a3, a3, -1 ;; dec counter + lbu t4, 3(t1) ;; t4 = edge.tri-1 + sll r0, r0, 0 + lbu t5, 2(t1) ;; t5 = edge.tri-0 + beq t4, t2, L68 ;; goto L68 if tri-1 is 255. + or t3, r0, r0 ;; t3 = 0 + +B3: ;; case where both tris are set. + dsll t3, t5, 2 ;; t3 = tri-0 + dsll t4, t4, 2 + daddu t3, t3, a0 + daddu t5, t4, a0 ;; t5 = tri-1 + sll r0, r0, 0 + lbu t4, 3(t3) ;; t4 = tri-0.faces + sll r0, r0, 0 + lbu t5, 3(t5) ;; t5 = tri-1.faces + sltiu t3, t4, 1 ;; t3 = tri-0.faces < 1 + sll r0, r0, 0 + beq t4, t5, L70 ;; if facing is equal skip this. + sll r0, r0, 0 + +B4: + beq r0, r0, L69 + sll r0, r0, 0 + +B5: +L68: ;; case where tri 1 is 255 + dsll t4, t5, 2 ;; t4 = tri-0 + sll r0, r0, 0 + daddu t4, t4, a0 ;; t4 = tri0 + sll r0, r0, 0 + sll r0, r0, 0 + lbu t4, 3(t4) ;; t4 = tri-0.faces: + beq t4, r0, L70 ;; if facing isn't set, skip this. + sll r0, r0, 0 + +B6: +L69: + dsubu t4, t1, t0 ;; t4 = edge idx + sh t3, 2(v1) ;; store (0, or, tri0.faces < 1) + sh t4, 0(v1) ;; store the edge idx. + daddiu v1, v1, 4 +B7: +L70: + bne a3, r0, L67 + daddiu t1, t1, 4 + +B8: +L71: + dsubu a0, v1, a2 + dsra a0, a0, 2 + sw a0, 24(a1) + sw v1, 16(a1) + or v0, r0, r0 + jr ra + daddu sp, sp, r0 +``` + +## Find Facing Double Tris + +Same as single, but we don't build a list. + +## Find Double Edges + +``` + lw a2, 16(a1) + lh a3, 18(a0) ;; num-double-edges + or v1, a2, r0 + lw t1, 40(a0) + beq a3, r0, L55 + lw t0, 12(a1) + +B1: + daddu a0, t1, a0 + sw a2, 40(a1) + sw a0, 8(a1) + or t1, a0, r0 + addiu t2, r0, 255 +B2: +L52: + daddiu a3, a3, -1 + lbu t3, 3(t1) + sll r0, r0, 0 + lbu t4, 2(t1) + beq t3, t2, L53 + or t5, r0, r0 + +B3: + dsll t4, t4, 2 + dsll t3, t3, 2 + daddu t4, t4, t0 + daddu t3, t3, t0 + sll r0, r0, 0 + lbu t4, 3(t4) + sll r0, r0, 0 + lbu t3, 3(t3) + beq t4, t3, L54 + sll r0, r0, 0 + +B4: + sltiu t4, t4, 1 + sll r0, r0, 0 + sltu t3, r0, t3 + sll r0, r0, 0 + sll r0, r0, 0 + sh t4, 2(v1) + dsubu t4, t1, a0 + sh t3, 6(v1) + sll r0, r0, 0 + sh t4, 0(v1) + sll r0, r0, 0 + sh t4, 4(v1) + beq r0, r0, L54 + daddiu v1, v1, 8 + +B5: +L53: + dsll t3, t4, 2 + sll r0, r0, 0 + daddu t3, t3, t0 + sll r0, r0, 0 + sll r0, r0, 0 + lbu t3, 3(t3) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sltiu t3, t3, 1 + dsubu t4, t1, a0 + sh t3, 2(v1) + sh t4, 0(v1) + daddiu v1, v1, 4 +B6: +L54: + bne a3, r0, L52 + daddiu t1, t1, 4 + +B7: +L55: + dsubu a0, v1, a2 + dsra a0, a0, 2 + sw a0, 28(a1) + sw v1, 16(a1) + or v0, r0, r0 + jr ra + daddu sp, sp, r0 + + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 +``` \ No newline at end of file diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index ba6da7cce1..a02c504612 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -55,6 +55,7 @@ set(RUNTIME_SOURCE graphics/opengl_renderer/foreground/Merc2.cpp graphics/opengl_renderer/foreground/Merc2BucketRenderer.cpp graphics/opengl_renderer/foreground/Shadow2.cpp + graphics/opengl_renderer/foreground/Shadow3.cpp graphics/opengl_renderer/loader/Loader.cpp graphics/opengl_renderer/loader/LoaderStages.cpp graphics/opengl_renderer/ocean/CommonOceanRenderer.cpp diff --git a/game/graphics/opengl_renderer/BucketRenderer.h b/game/graphics/opengl_renderer/BucketRenderer.h index 207f003180..126e869109 100644 --- a/game/graphics/opengl_renderer/BucketRenderer.h +++ b/game/graphics/opengl_renderer/BucketRenderer.h @@ -55,6 +55,8 @@ struct SharedRenderState { // including transformation, rotation, perspective math::Vector4f camera_matrix[4]; + math::Vector4f camera_rot[4]; + math::Vector4f perspective[4]; math::Vector4f camera_hvdf_off; math::Vector4f camera_fog; math::Vector4f camera_pos; diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.cpp b/game/graphics/opengl_renderer/OpenGLRenderer.cpp index 5b38618f90..a9b9c81a7d 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.cpp +++ b/game/graphics/opengl_renderer/OpenGLRenderer.cpp @@ -112,6 +112,7 @@ OpenGLRenderer::OpenGLRenderer(std::shared_ptr texture_pool, } m_merc2 = std::make_shared(m_render_state.shaders, anim_slot_array()); + m_shadow3 = std::make_shared(m_render_state.shaders); m_generic2 = std::make_shared(m_render_state.shaders); // initialize all renderers @@ -757,8 +758,9 @@ void OpenGLRenderer::init_bucket_renderers_jak1() { init_bucket_renderer("common-alpha-generic", BucketCategory::GENERIC, BucketId::GENERIC_ALPHA, m_generic2, - Generic2::Mode::NORMAL); // 46 - init_bucket_renderer("shadow", BucketCategory::OTHER, BucketId::SHADOW); // 47 + Generic2::Mode::NORMAL); // 46 + init_bucket_renderer("shadow", BucketCategory::OTHER, BucketId::SHADOW, + m_shadow3); // 47 //----------------------- // LEVEL 0 pris texture diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.h b/game/graphics/opengl_renderer/OpenGLRenderer.h index 667fe2fecf..fdd7e07c50 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.h +++ b/game/graphics/opengl_renderer/OpenGLRenderer.h @@ -13,6 +13,7 @@ #include "game/graphics/opengl_renderer/TextureAnimator.h" #include "game/graphics/opengl_renderer/foreground/Generic2.h" #include "game/graphics/opengl_renderer/foreground/Merc2.h" +#include "game/graphics/opengl_renderer/foreground/Shadow3.h" #include "game/graphics/opengl_renderer/opengl_utils.h" #include "game/tools/filter_menu/filter_menu.h" #include "game/tools/subtitle_editor/subtitle_editor.h" @@ -112,6 +113,7 @@ class OpenGLRenderer { std::shared_ptr m_merc2; std::shared_ptr m_generic2; + std::shared_ptr m_shadow3; std::shared_ptr m_texture_animator; std::vector> m_bucket_renderers; std::vector m_bucket_categories; diff --git a/game/graphics/opengl_renderer/Shader.cpp b/game/graphics/opengl_renderer/Shader.cpp index 9e18e7e9d2..2d1b14e491 100644 --- a/game/graphics/opengl_renderer/Shader.cpp +++ b/game/graphics/opengl_renderer/Shader.cpp @@ -132,6 +132,7 @@ ShaderLibrary::ShaderLibrary(GameVersion version) { at(ShaderId::HFRAG_MONTAGE) = {"hfrag_montage", version}; at(ShaderId::PLAIN_TEXTURE) = {"plain_texture", version}; at(ShaderId::TIE_WIND) = {"tie_wind", version}; + at(ShaderId::SHADOW3) = {"shadow3", version}; for (auto& shader : m_shaders) { ASSERT_MSG(shader.okay(), "error compiling shader"); diff --git a/game/graphics/opengl_renderer/Shader.h b/game/graphics/opengl_renderer/Shader.h index 99acd7b186..59e050f096 100644 --- a/game/graphics/opengl_renderer/Shader.h +++ b/game/graphics/opengl_renderer/Shader.h @@ -65,6 +65,7 @@ enum class ShaderId { HFRAG_MONTAGE = 38, PLAIN_TEXTURE = 39, TIE_WIND = 40, + SHADOW3 = 41, MAX_SHADERS }; diff --git a/game/graphics/opengl_renderer/ShadowRenderer.cpp b/game/graphics/opengl_renderer/ShadowRenderer.cpp index 8f3fa3f646..ed00574ef2 100644 --- a/game/graphics/opengl_renderer/ShadowRenderer.cpp +++ b/game/graphics/opengl_renderer/ShadowRenderer.cpp @@ -4,7 +4,8 @@ #include "third-party/imgui/imgui.h" -ShadowRenderer::ShadowRenderer(const std::string& name, int my_id) : BucketRenderer(name, my_id) { +ShadowRenderer::ShadowRenderer(const std::string& name, int my_id, std::shared_ptr shadow3) + : BucketRenderer(name, my_id), m_shadow3(shadow3) { // create OpenGL objects glGenBuffers(1, &m_ogl.vertex_buffer); @@ -35,9 +36,13 @@ ShadowRenderer::ShadowRenderer(const std::string& name, int my_id) : BucketRende } void ShadowRenderer::draw_debug_window() { - ImGui::Checkbox("Volume", &m_debug_draw_volume); - ImGui::Text("Vert: %d, Front: %d, Back: %d\n", m_next_vertex, m_next_front_index, - m_next_back_index); + if (m_using_shadow3) { + m_shadow3->draw_debug_window(); + } else { + ImGui::Checkbox("Volume", &m_debug_draw_volume); + ImGui::Text("Vert: %d, Front: %d, Back: %d\n", m_next_vertex, m_next_front_index, + m_next_back_index); + } } ShadowRenderer::~ShadowRenderer() { @@ -201,6 +206,12 @@ void ShadowRenderer::render(DmaFollower& dma, return; } + m_using_shadow3 = dma.current_tag_vifcode0().kind != VifCode::Kind::STCYCL; + if (m_using_shadow3) { + m_shadow3->render_jak1(dma, render_state, prof); + return; + } + { // constants auto constants = dma.read_and_advance(); diff --git a/game/graphics/opengl_renderer/ShadowRenderer.h b/game/graphics/opengl_renderer/ShadowRenderer.h index 70afcf44da..32722b91e6 100644 --- a/game/graphics/opengl_renderer/ShadowRenderer.h +++ b/game/graphics/opengl_renderer/ShadowRenderer.h @@ -2,10 +2,15 @@ #include "game/common/vu.h" #include "game/graphics/opengl_renderer/BucketRenderer.h" +#include "game/graphics/opengl_renderer/foreground/Shadow3.h" +/*! + * Jak 1 shadow renderer. This uses mips2c'd VU1 code and isn't very efficient. + * If it detects PC shadow enabled, it will instead render with Shadow3. + */ class ShadowRenderer : public BucketRenderer { public: - ShadowRenderer(const std::string& name, int my_id); + ShadowRenderer(const std::string& name, int my_id, std::shared_ptr shadow3); ~ShadowRenderer(); void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; void draw_debug_window() override; @@ -127,4 +132,6 @@ class ShadowRenderer : public BucketRenderer { } m_ogl; bool m_debug_draw_volume = false; + std::shared_ptr m_shadow3; + bool m_using_shadow3 = false; }; diff --git a/game/graphics/opengl_renderer/background/background_common.cpp b/game/graphics/opengl_renderer/background/background_common.cpp index b3f9f6ca56..d14723a777 100644 --- a/game/graphics/opengl_renderer/background/background_common.cpp +++ b/game/graphics/opengl_renderer/background/background_common.cpp @@ -829,6 +829,8 @@ void update_render_state_from_pc_settings(SharedRenderState* state, const TfragP for (int i = 0; i < 4; i++) { state->camera_planes[i] = data.camera.planes[i]; state->camera_matrix[i] = data.camera.camera[i]; + state->camera_rot[i] = data.camera.rot[i]; + state->perspective[i] = data.camera.perspective[i]; } state->camera_pos = data.camera.trans; state->camera_hvdf_off = data.camera.hvdf_off; diff --git a/game/graphics/opengl_renderer/foreground/Shadow3.cpp b/game/graphics/opengl_renderer/foreground/Shadow3.cpp new file mode 100644 index 0000000000..09d5e5d49d --- /dev/null +++ b/game/graphics/opengl_renderer/foreground/Shadow3.cpp @@ -0,0 +1,378 @@ +#include "Shadow3.h" + +#include "game/runtime.h" + +Shadow3::Shadow3(ShaderLibrary& shaders) { + glGenVertexArrays(1, &m_opengl.vao); + glBindVertexArray(m_opengl.vao); + + glGenBuffers(1, &m_opengl.bones_buffer); + glBindBuffer(GL_UNIFORM_BUFFER, m_opengl.bones_buffer); + + GLint val; + glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &val); + if (val <= 16) { + m_opengl.buffer_alignment = 1; + } else { + m_opengl.buffer_alignment = val / 16; + if (m_opengl.buffer_alignment * 16 != (u32)val) { + ASSERT_MSG(false, + fmt::format("opengl uniform buffer alignment is {}, which is strange\n", val)); + } + } + { + auto& shader = shaders.at(ShaderId::SHADOW3); + shader.activate(); + auto id = shader.id(); + m_uniforms.camera_rot = glGetUniformLocation(id, "camera_rot"); + m_uniforms.fog_constants = glGetUniformLocation(id, "fog_constants"); + m_uniforms.hvdf_offset = glGetUniformLocation(id, "hvdf_offset"); + m_uniforms.perspective_matrix = glGetUniformLocation(id, "perspective_matrix"); + m_uniforms.debug_color = glGetUniformLocation(id, "debug_color"); + m_uniforms.origin = glGetUniformLocation(id, "origin"); + m_uniforms.top_plane = glGetUniformLocation(id, "top_plane"); + m_uniforms.bottom_plane = glGetUniformLocation(id, "bottom_plane"); + m_uniforms.bottom_cap = glGetUniformLocation(id, "bottom_cap"); + } + + std::vector temp(MAX_SHADER_BONE_VECTORS * sizeof(math::Vector4f)); + glBufferData(GL_UNIFORM_BUFFER, MAX_SHADER_BONE_VECTORS * sizeof(math::Vector4f), temp.data(), + GL_DYNAMIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); +} + +Shadow3::~Shadow3() { + glDeleteBuffers(1, &m_opengl.bones_buffer); + glDeleteVertexArrays(1, &m_opengl.vao); +} + +void Shadow3::setup_for_level(SharedRenderState* render_state, const LevelData* level_data) { + glBindVertexArray(m_opengl.vao); + glBindBuffer(GL_ARRAY_BUFFER, level_data->shadow_vertices); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, level_data->shadow_indices); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_GEQUAL); + + glVertexAttribPointer(0, // location 0 in the shader + 3, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::ShadowVertex), // stride + (void*)offsetof(tfrag3::ShadowVertex, pos) // offset (0) + ); + + glVertexAttribPointer(1, // location 1 in the + 1, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::ShadowVertex), // stride + (void*)offsetof(tfrag3::ShadowVertex, weight) // offset (0) + ); + + glVertexAttribIPointer(2, // location 2 in the + 2, // + GL_UNSIGNED_BYTE, // u8's + sizeof(tfrag3::ShadowVertex), // + (void*)offsetof(tfrag3::ShadowVertex, mats[0]) // offset in array + ); + + glVertexAttribIPointer(3, // location 2 in the + 1, // + GL_UNSIGNED_BYTE, // u8's + sizeof(tfrag3::ShadowVertex), // + (void*)offsetof(tfrag3::ShadowVertex, flags) // offset in array + ); +} + +namespace { +void set_uniform(GLuint uniform, const math::Vector3f& val) { + glUniform3f(uniform, val.x(), val.y(), val.z()); +} +void set_uniform(GLuint uniform, const math::Vector4f& val) { + glUniform4f(uniform, val.x(), val.y(), val.z(), val.w()); +} +} // namespace + +void Shadow3::draw_model(SharedRenderState* render_state, + ShadowRequest* request, + ScopedProfilerNode& prof) { + glBindBufferRange(GL_UNIFORM_BUFFER, 1, m_opengl.bones_buffer, + sizeof(math::Vector4f) * request->bone_idx, 128 * 16 * 4); + const auto* geo = request->model.model; + + set_uniform(m_uniforms.origin, request->origin); + set_uniform(m_uniforms.top_plane, request->top_plane); + set_uniform(m_uniforms.bottom_plane, request->bottom_plane); + + // enable stencil! + glEnable(GL_STENCIL_TEST); + glStencilMask(0xFF); + glEnable(GL_DEPTH_TEST); + glDisable(GL_BLEND); + glDepthFunc(GL_GEQUAL); + // glDepthMask(GL_FALSE); // no depth writes. + + auto do_draw = [&](const tfrag3::ShadowModel::Run& run, const math::Vector3f& color) { + set_uniform(m_uniforms.debug_color, color); + glDrawElements(GL_TRIANGLES, run.count, GL_UNSIGNED_INT, + (void*)(sizeof(u32) * run.first_index)); + glDisable(GL_BLEND); + set_uniform(m_uniforms.debug_color,color); + // glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); + // glDrawElements(GL_TRIANGLES, run.count, GL_UNSIGNED_INT, + // (void*)(sizeof(u32) * run.first_index)); + // glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + }; + + auto do_all_draws = [&]( const math::Vector3f& color) { + glUniform1i(m_uniforms.bottom_cap, 0); + do_draw(geo->single_tris, color); + // do_draw(geo->double_tris, math::Vector3f(0.8, 0.5, 0.5)); + do_draw(geo->single_edges, color); + // do_draw(geo->double_edges, math::Vector3f(0.5, 0.8, 0.5)); + // glUniform1i(m_uniforms.bottom_cap, 1); + // do_draw(geo->single_tris, math::Vector3f(0.5, 0.5, 0.8)); + // do_draw(geo->double_tris, math::Vector3f(0.5, 0.5, 0.8)); + }; + + // using glCullFace(GL_FRONT) seems to give us back faces. + + glEnable(GL_CULL_FACE); + glCullFace(GL_BACK); + glStencilFunc(GL_ALWAYS, 0, 0); // always pass stencil + glStencilOp(GL_KEEP, GL_KEEP, GL_INCR); // increment on depth pass. + do_all_draws({0.1f, 0.1f, 0.8f}); + glCullFace(GL_FRONT); + glStencilFunc(GL_ALWAYS, 0, 0); + glStencilOp(GL_KEEP, GL_KEEP, GL_DECR); // decrement on depth pass. + do_all_draws({0.8f, 0.1f, 0.1f}); + glDisable(GL_CULL_FACE); +} + +void Shadow3::finish(SharedRenderState* render_state, ScopedProfilerNode& prof) { + +} + +void Shadow3::flush_requests(SharedRenderState* render_state, ScopedProfilerNode& prof) { + if (m_next_request == 0) { + return; + } + + if (!m_did_first_time_setup) { + first_time_setup(render_state); + m_did_first_time_setup = true; + } + + glBindBuffer(GL_UNIFORM_BUFFER, m_opengl.bones_buffer); + glBufferSubData(GL_UNIFORM_BUFFER, 0, m_next_free_bone_vector * sizeof(math::Vector4f), + m_shader_bone_vector_buffer); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + + for (auto& c : m_level_chains) { + if (!c.head) + continue; + setup_for_level(render_state, c.level); + ShadowRequest* iter = c.head; + while (iter) { + draw_model(render_state, iter, prof); + iter = iter->next; + } + } + + for (auto& c : m_level_chains) { + c.level = nullptr; + c.head = nullptr; + } + m_next_request = 0; + m_next_free_bone_vector = 0; +} + +void Shadow3::first_time_setup(SharedRenderState* render_state) { + glClearStencil(0); + glClear(GL_STENCIL_BUFFER_BIT); + + render_state->shaders[ShaderId::SHADOW3].activate(); + glUniformMatrix4fv(m_uniforms.camera_rot, 1, GL_FALSE, &render_state->camera_rot[0].x()); + glUniformMatrix4fv(m_uniforms.perspective_matrix, 1, GL_FALSE, &render_state->perspective[0].x()); + set_uniform(m_uniforms.fog_constants, render_state->camera_fog); + set_uniform(m_uniforms.hvdf_offset, render_state->camera_hvdf_off); +} + +void Shadow3::draw_debug_window() {} + +void Shadow3::render_jak1(DmaFollower& dma, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + printf("Jak1 shadow render\n"); + m_did_first_time_setup = false; + + while (dma.current_tag_offset() != render_state->next_bucket) { + auto dmatag = dma.current_tag(); + auto data = dma.read_and_advance(); + int run_idx = 0; + if (data.vifcode0().kind == VifCode::Kind::PC_PORT) { + printf(" Run %d start\n", run_idx); + u32 next = data.data_offset; + while (next) { + Jak1ShadowRequest game_request; + memcpy(&game_request, g_ee_main_mem + next, sizeof(Jak1ShadowRequest)); + next = game_request.next; + + char name[128]; + strncpy(name, (const char*)(g_ee_main_mem) + 4 + game_request.geo_name, 128); + name[127] = 0; + printf(" draw %s\n", name); + + auto model = render_state->loader->get_shadow_model(name); + if (!model) { + printf(" SKIP: no model data\n"); + continue; + } + /* + * (shdf00) ;; unused + (disable-fade) + (shdf02) ;; only set, never used. + (shdf03) + (shdf04) ;; unused + (disable-draw) + */ + constexpr u32 kCullWhenUnderPlane = 1; + constexpr u32 kDisableFade = 2; + constexpr u32 kAbsolutePlanes = 4; + constexpr u32 kFlag3 = 8; + constexpr u32 kFlag4 = 16; + constexpr u32 kDisableDraw = 32; + + if (game_request.settings.flags & kDisableDraw) { + printf(" SKIP: disable flag set\n"); + continue; + } + + if (game_request.num_joints * 4 + m_next_free_bone_vector >= MAX_SHADER_BONE_VECTORS) { + flush_requests(render_state, prof); + } + + if (m_next_request == m_requests.size()) { + flush_requests(render_state, prof); + } + + LevelChain* chain = nullptr; + for (auto& c : m_level_chains) { + if (c.level == model->level || !c.level) { + chain = &c; + chain->level = model->level; + break; + } + } + + if (!chain) { + ASSERT_NOT_REACHED(); + } + + // grab the next request and link it to the chain for the level. + auto& request = m_requests[m_next_request++]; + request.next = chain->head; + chain->head = &request; + + request.model = *model; + // the origin of "light" for the shadow is found by starting at the "center" point + // (somewhere in the model) and following the shadow direction backward. + request.origin = game_request.settings.center + + game_request.settings.shadow_dir * game_request.settings.dist_to_locus; + + // copy bones to buffer + constexpr int in_stride = 8 * 4 * sizeof(float); + constexpr int out_stride = 4 * 4 * sizeof(float); + constexpr int in_offset = 3 * in_stride; + request.bone_idx = m_next_free_bone_vector; + for (int i = 0; i < game_request.num_joints; i++) { + memcpy(&m_shader_bone_vector_buffer[m_next_free_bone_vector].x(), + g_ee_main_mem + game_request.mtx + in_offset + i * in_stride, out_stride); + m_next_free_bone_vector += 4; + } + + // the clipping planes for the shadow + request.top_plane = game_request.settings.top_plane; + request.bottom_plane = game_request.settings.bot_plane; + if (!(kAbsolutePlanes & game_request.settings.flags)) { + printf("relative plane mode, base is %f, move by %f\n", + -request.bottom_plane.w() / 4096.0, game_request.settings.center.y() / 4096.0); + // in relative planes mode, the height of the plane is adjusted to be relative to the + // height of the center, so the planes move and down with the model + request.top_plane.w() -= game_request.settings.center.y(); + request.bottom_plane.w() -= game_request.settings.center.y(); + } + + // skip drawing if the camera is below the lower clipping plane + if (kCullWhenUnderPlane & game_request.settings.flags) { + if (render_state->camera_pos.xyz().dot(request.bottom_plane.xyz()) + + request.bottom_plane.w() < + 0) { + printf(" SKIP: camera below lower clipping plane.\n"); + m_next_request--; + continue; + } + } + + // detect if the origin is below the clipping plane and if so, move it up. + const float dot = request.bottom_plane.xyz().dot(request.origin); + if (dot + request.bottom_plane.w() > 0) { + printf(" the origin is below the clipping plane, moving it up.\n"); + printf(" center was %s\n", game_request.settings.center.to_string_aligned().c_str()); + printf(" dir was %s\n", game_request.settings.shadow_dir.to_string_aligned().c_str()); + printf(" locus %f\n", game_request.settings.dist_to_locus); + printf(" bottom plane was %s\n", + game_request.settings.bot_plane.to_string_aligned().c_str()); + printf(" adjusted bottom plane was %s\n", + request.bottom_plane.to_string_aligned().c_str()); + printf(" abs flag %d\n", game_request.settings.flags & kAbsolutePlanes); + + request.bottom_plane.w() = -dot; + } + + const auto& cam_rot = render_state->camera_rot; + const auto& cam_pos = render_state->camera_pos; + + request.light_dir = game_request.settings.shadow_dir; + + // transform to camera frame + auto rotate = [&](const math::Vector3f& in) { + return (cam_rot[0] * in[0] + cam_rot[1] * in[1] + cam_rot[2] * in[2]).xyz(); + }; + + auto transform = [&](const math::Vector3f& in) { + return (cam_rot[0] * in[0] + cam_rot[1] * in[1] + cam_rot[2] * in[2] + cam_rot[3]).xyz(); + }; + + auto rotate_plane = [&](const math::Vector4f& in) { + auto xyz = rotate(in.xyz()); + return math::Vector4f(xyz.x(), xyz.y(), xyz.z(), in.w() - xyz.dot(cam_rot[3].xyz())); + }; + + printf("plane offset before: %f\n", + game_request.settings.center.dot(request.bottom_plane.xyz()) + + request.bottom_plane.w()); + + request.light_dir = rotate(request.light_dir); + request.top_plane = rotate_plane(request.top_plane); + request.bottom_plane = rotate_plane(request.bottom_plane); + request.origin = transform(request.origin); + + printf("plane offset after: %f\n", + transform(game_request.settings.center).dot(request.bottom_plane.xyz()) + + request.bottom_plane.w()); + printf("rot3: %s\n", cam_rot[3].to_string_aligned().c_str()); + printf(" 2: %s\n", cam_pos.to_string_aligned().c_str()); + + // printf(" origin: %s\n", (request.origin / 4096.f).to_string_aligned().c_str()); + } + } + } + + flush_requests(render_state, prof); + finish(render_state, prof); +} diff --git a/game/graphics/opengl_renderer/foreground/Shadow3.h b/game/graphics/opengl_renderer/foreground/Shadow3.h new file mode 100644 index 0000000000..154bf2e7c1 --- /dev/null +++ b/game/graphics/opengl_renderer/foreground/Shadow3.h @@ -0,0 +1,81 @@ +#pragma once +#include "game/graphics/opengl_renderer/BucketRenderer.h" + +struct Jak1ShadowSettings { + math::Vector center; + u32 flags; + math::Vector shadow_dir; + float dist_to_locus; + math::Vector4f bot_plane; + math::Vector4f top_plane; + float fade_dist; + float fade_start; + s32 dummy2; + s32 dummy3; +}; +static_assert(sizeof(Jak1ShadowSettings) == 5 * 16); + +struct Jak1ShadowRequest { + u8 dma[16]; + Jak1ShadowSettings settings; + u32 geo_name; + u32 mtx; + u32 num_joints; + u32 next; +}; + +class Shadow3 { + public: + Shadow3(ShaderLibrary& shaders); + ~Shadow3(); + void render_jak1(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof); + void draw_debug_window(); + + private: + struct ShadowRequest { + ShadowRef model; + math::Vector origin; + math::Vector4f top_plane, bottom_plane; + math::Vector3f light_dir; + ShadowRequest* next = nullptr; + u32 bone_idx = 0; + }; + + struct LevelChain { + const LevelData* level = nullptr; + ShadowRequest* head = nullptr; + }; + void flush_requests(SharedRenderState* render_state, ScopedProfilerNode& prof); + void first_time_setup(SharedRenderState* render_state); + void setup_for_level(SharedRenderState* render_state, const LevelData* level_data); + void draw_model(SharedRenderState* render_state, + ShadowRequest* request, + ScopedProfilerNode& prof); + void finish(SharedRenderState* render_state, ScopedProfilerNode& prof); + std::array m_requests; + std::array m_level_chains; + int m_next_request = 0; + + static constexpr int MAX_SHADER_BONE_VECTORS = 1024 * 16; // ?? + math::Vector4f m_shader_bone_vector_buffer[MAX_SHADER_BONE_VECTORS]; + u32 m_next_free_bone_vector = 0; + + struct { + GLuint vao = -1; + GLuint bones_buffer = -1; + int buffer_alignment = 0; + } m_opengl; + + struct { + GLuint hvdf_offset = 0; + GLuint fog_constants = 0; + GLuint perspective_matrix = 0; + GLuint camera_rot = 0; + GLuint debug_color = 0; + GLuint bottom_plane = 0; + GLuint top_plane = 0; + GLuint origin = 0; + GLuint bottom_cap = 0; + } m_uniforms; + bool m_did_first_time_setup = false; +}; \ No newline at end of file diff --git a/game/graphics/opengl_renderer/loader/Loader.cpp b/game/graphics/opengl_renderer/loader/Loader.cpp index e0d7beddae..b08b1e71f7 100644 --- a/game/graphics/opengl_renderer/loader/Loader.cpp +++ b/game/graphics/opengl_renderer/loader/Loader.cpp @@ -152,6 +152,7 @@ void Loader::draw_debug_window() { lev.second->frames_since_last_used); ImGui::Text(" %d textures", (int)lev.second->textures.size()); ImGui::Text(" %d merc", (int)lev.second->merc_model_lookup.size()); + ImGui::Text(" %d shadow", (int)lev.second->shadow_model_lookup.size()); } ImGui::NewLine(); ImGui::Separator(); @@ -267,14 +268,20 @@ const tfrag3::Level& Loader::load_common(TexturePool& tex_pool, const std::strin Timer tim; MercLoaderStage mls; + ShadowLoaderStage sls; LoaderInput input; input.tex_pool = &tex_pool; input.mercs = &m_all_merc_models; + input.shadows = &m_all_shadow_models; input.lev_data = &m_common_level; bool done = false; while (!done) { done = mls.run(tim, input); } + done = false; + while (!done) { + done = sls.run(tim, input); + } return *m_common_level.level; } @@ -416,6 +423,7 @@ void Loader::update(TexturePool& texture_pool) { LoaderInput loader_input; loader_input.lev_data = lev.get(); loader_input.mercs = &m_all_merc_models; + loader_input.shadows = &m_all_shadow_models; loader_input.tex_pool = &texture_pool; for (auto& stage : m_loader_stages) { @@ -496,6 +504,8 @@ void Loader::update(TexturePool& texture_pool) { m_garbage_buffers.push_back(lev->collide_vertices); m_garbage_buffers.push_back(lev->merc_vertices); m_garbage_buffers.push_back(lev->merc_indices); + m_garbage_buffers.push_back(lev->shadow_indices); + m_garbage_buffers.push_back(lev->shadow_vertices); for (auto& model : lev->level->merc_data.models) { auto& mercs = m_all_merc_models.at(model.name); @@ -505,6 +515,14 @@ void Loader::update(TexturePool& texture_pool) { mercs.erase(it); } + for (auto& model : lev->level->shadow_data.models) { + auto& shadows = m_all_shadow_models.at(model.name); + ShadowRef ref{&model, lev->load_id}; + auto it = std::find(shadows.begin(), shadows.end(), ref); + ASSERT_MSG(it != shadows.end(), fmt::format("missing shadow: {}\n", model.name)); + shadows.erase(it); + } + m_loaded_tfrag3_levels.erase(*to_unload); } } @@ -544,3 +562,14 @@ std::optional Loader::get_merc_model(const char* model_name) { return std::nullopt; } } + +std::optional Loader::get_shadow_model(const char* model_name) { + // don't think we need to lock here... + const auto& it = m_all_shadow_models.find(model_name); + if (it != m_all_shadow_models.end() && !it->second.empty()) { + // it->second.front().parent_level->frames_since_last_used = 0; + return it->second.front(); + } else { + return std::nullopt; + } +} diff --git a/game/graphics/opengl_renderer/loader/Loader.h b/game/graphics/opengl_renderer/loader/Loader.h index a6fde83998..dc273e7903 100644 --- a/game/graphics/opengl_renderer/loader/Loader.h +++ b/game/graphics/opengl_renderer/loader/Loader.h @@ -21,6 +21,7 @@ class Loader { void update_blocking(TexturePool& tex_pool); const LevelData* get_tfrag3_level(const std::string& level_name); std::optional get_merc_model(const char* model_name); + std::optional get_shadow_model(const char* model_name); const tfrag3::Level& load_common(TexturePool& tex_pool, const std::string& name); void set_want_levels(const std::vector& levels); void set_active_levels(const std::vector& levels); @@ -52,6 +53,7 @@ class Loader { std::unordered_map> m_loaded_tfrag3_levels; std::unordered_map> m_all_merc_models; + std::unordered_map> m_all_shadow_models; std::vector m_desired_levels; std::vector m_active_levels; diff --git a/game/graphics/opengl_renderer/loader/LoaderStages.cpp b/game/graphics/opengl_renderer/loader/LoaderStages.cpp index 2a3ff372eb..c378ae19ba 100644 --- a/game/graphics/opengl_renderer/loader/LoaderStages.cpp +++ b/game/graphics/opengl_renderer/loader/LoaderStages.cpp @@ -689,6 +689,68 @@ bool MercLoaderStage::run(Timer& /*timer*/, LoaderInput& data) { return true; } +ShadowLoaderStage::ShadowLoaderStage() : LoaderStage("shadow") {} +void ShadowLoaderStage::reset() { + m_done = false; + m_opengl = false; + m_vtx_uploaded = false; + m_idx = 0; +} + +bool ShadowLoaderStage::run(Timer& /*timer*/, LoaderInput& data) { + if (m_done) { + return true; + } + + if (!m_opengl) { + glGenBuffers(1, &data.lev_data->shadow_indices); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, data.lev_data->shadow_indices); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, + data.lev_data->level->shadow_data.indices.size() * sizeof(u32), nullptr, + GL_STATIC_DRAW); + + glGenBuffers(1, &data.lev_data->shadow_vertices); + glBindBuffer(GL_ARRAY_BUFFER, data.lev_data->shadow_vertices); + glBufferData(GL_ARRAY_BUFFER, + data.lev_data->level->shadow_data.vertices.size() * sizeof(tfrag3::ShadowVertex), + nullptr, GL_STATIC_DRAW); + m_opengl = true; + } + + if (!m_vtx_uploaded) { + u32 start = m_idx; + m_idx = std::min(start + 32768, (u32)data.lev_data->level->shadow_data.indices.size()); + glBindBuffer(GL_ARRAY_BUFFER, data.lev_data->shadow_indices); + glBufferSubData(GL_ARRAY_BUFFER, start * sizeof(u32), (m_idx - start) * sizeof(u32), + data.lev_data->level->shadow_data.indices.data() + start); + if (m_idx != data.lev_data->level->shadow_data.indices.size()) { + return false; + } else { + m_idx = 0; + m_vtx_uploaded = true; + } + } + + u32 start = m_idx; + m_idx = std::min(start + 32768, (u32)data.lev_data->level->shadow_data.vertices.size()); + glBindBuffer(GL_ARRAY_BUFFER, data.lev_data->shadow_vertices); + glBufferSubData(GL_ARRAY_BUFFER, start * sizeof(tfrag3::ShadowVertex), + (m_idx - start) * sizeof(tfrag3::ShadowVertex), + data.lev_data->level->shadow_data.vertices.data() + start); + + if (m_idx != data.lev_data->level->shadow_data.vertices.size()) { + return false; + } else { + m_done = true; + for (auto& model : data.lev_data->level->shadow_data.models) { + data.lev_data->shadow_model_lookup[model.name] = &model; + (*data.shadows)[model.name].push_back({&model, data.lev_data->load_id, data.lev_data}); + } + return true; + } + return true; +} + std::vector> make_loader_stages() { std::vector> ret; ret.push_back(std::make_unique()); @@ -697,6 +759,7 @@ std::vector> make_loader_stages() { ret.push_back(std::make_unique()); ret.push_back(std::make_unique()); ret.push_back(std::make_unique()); + ret.push_back(std::make_unique()); ret.push_back(std::make_unique()); ret.push_back(std::make_unique()); return ret; diff --git a/game/graphics/opengl_renderer/loader/LoaderStages.h b/game/graphics/opengl_renderer/loader/LoaderStages.h index a2e1f43e41..7bf783ee8b 100644 --- a/game/graphics/opengl_renderer/loader/LoaderStages.h +++ b/game/graphics/opengl_renderer/loader/LoaderStages.h @@ -16,4 +16,17 @@ class MercLoaderStage : public LoaderStage { bool m_opengl = false; bool m_vtx_uploaded = false; u32 m_idx = 0; +}; + +class ShadowLoaderStage : public LoaderStage { +public: + ShadowLoaderStage(); + bool run(Timer& timer, LoaderInput& data) override; + void reset() override; + +private: + bool m_done = false; + bool m_opengl = false; + bool m_vtx_uploaded = false; + u32 m_idx = 0; }; \ No newline at end of file diff --git a/game/graphics/opengl_renderer/loader/common.h b/game/graphics/opengl_renderer/loader/common.h index 31424bb095..10ac57e2c3 100644 --- a/game/graphics/opengl_renderer/loader/common.h +++ b/game/graphics/opengl_renderer/loader/common.h @@ -28,6 +28,10 @@ struct LevelData { GLuint merc_indices; std::unordered_map merc_model_lookup; + GLuint shadow_vertices; + GLuint shadow_indices; + std::unordered_map shadow_model_lookup; + GLuint hfrag_vertices; GLuint hfrag_indices; @@ -43,10 +47,20 @@ struct MercRef { } }; +struct ShadowRef { + const tfrag3::ShadowModel* model = nullptr; + u64 load_id = 0; + const LevelData* level = nullptr; + bool operator==(const ShadowRef& other) const { + return model == other.model && load_id == other.load_id; + } +}; + struct LoaderInput { LevelData* lev_data; TexturePool* tex_pool; std::unordered_map>* mercs; + std::unordered_map>* shadows; }; class LoaderStage { diff --git a/game/graphics/opengl_renderer/shaders/shadow3.frag b/game/graphics/opengl_renderer/shaders/shadow3.frag new file mode 100644 index 0000000000..a66dc91093 --- /dev/null +++ b/game/graphics/opengl_renderer/shaders/shadow3.frag @@ -0,0 +1,8 @@ +#version 410 core + +out vec4 color; +in vec4 vtx_color; + +void main() { + color = vtx_color; +} diff --git a/game/graphics/opengl_renderer/shaders/shadow3.vert b/game/graphics/opengl_renderer/shaders/shadow3.vert new file mode 100644 index 0000000000..49169d8691 --- /dev/null +++ b/game/graphics/opengl_renderer/shaders/shadow3.vert @@ -0,0 +1,102 @@ +#version 410 core + +// merc vertex definition +layout (location = 0) in vec3 position_in; +layout (location = 1) in float weight_in; +layout (location = 2) in uvec2 mats; +layout (location = 3) in uint flags; + +// camera control +uniform vec4 hvdf_offset; +uniform vec4 fog_constants; +uniform mat4 perspective_matrix; +uniform mat4 camera_rot; +uniform vec3 debug_color; +uniform vec4 bottom_plane; +uniform vec4 top_plane; +uniform vec3 origin; +uniform bool bottom_cap; + +// output +out vec4 vtx_color; + +int offset = 0; + +struct MercMatrixData { + mat4 X; +}; + +layout (std140) uniform ub_bones { + MercMatrixData bones[128]; +}; + +/* +- 0 `sub.xyzw vf19, vf01, vf03` : `vf19 = center - vert` +- 1 `mul.xyzw vf11, vf03, vf02` : `vf11 = dot(vert, plane)` +- 2 `mul.xyz vf15, vf19, vf02` : `vf15 = dot3(center - vert, plane)` +- 3 `move.xyzw vf07, vf03` : `vf07 = vert` +- 4 `addy.x vf11, vf11, vf11` : `vf11.x += vf11.y` +- 5 `addy.x vf15, vf15, vf15` : `vf15.x += vf15.y` +- 6 `addz.x vf11, vf11, vf11` : `vf11.x += vf11.z` +- 7 `addz.x vf15, vf15, vf15` : `vf15.x += vf15.z` +- 8 `addw.x vf11, vf11, vf11` : `vf11.x += vf11.w` +- 9 `div Q, vf11.x, vf15.x` : `Q = dot(vert, plane) / dot3(center - vert, plane)` +- 10 `mul.xyzw vf19, vf19, Q` : +- 11 `sub.xyzw vf07, vf07, vf19`: +*/ + +vec4 dual(vec4 p, vec4 plane) { + vec4 offset = vec4(origin, 1) - p; + return p - offset * dot(p, plane) / dot(offset.xyz, plane.xyz); +} + +vec4 scissor(vec4 p, vec4 plane) { + float plane_offset = dot(p, plane); + if (plane_offset > 0) { + vec4 offset = vec4(origin, 1) - p; + return p - offset * plane_offset / dot(offset.xyz, plane.xyz); + } else { + return p; + } +} + +void main() { + vec4 p = vec4(position_in, 1); + + vec4 vtx_pos = -bones[mats[0] + offset].X * p * weight_in; + + if (weight_in > 1) { + vtx_pos += -bones[mats[1] + offset].X * p * (1.f - weight_in); + } + + + if (bottom_cap) { + vtx_pos = dual(vtx_pos, bottom_plane); + } else { + if ((flags & uint(1)) != 0) { + vtx_pos = dual(vtx_pos, bottom_plane); + } else { + vtx_pos = scissor(vtx_pos, top_plane); + } + } + + + + vec4 transformed = perspective_matrix * vtx_pos; + + float Q = fog_constants.x / transformed[3]; + + transformed.xyz *= Q; + transformed.xyz += hvdf_offset.xyz; + transformed.xy -= (2048.); + transformed.z /= (8388608); + transformed.z -= 1; + transformed.x /= (256); + transformed.y /= -(128); + transformed.xyz *= transformed.w; + transformed.y *= SCISSOR_ADJUST * HEIGHT_SCALE; + gl_Position = transformed; + + + vtx_color = vec4(debug_color, 1.0); +} diff --git a/goal_src/jak1/engine/draw/drawable.gc b/goal_src/jak1/engine/draw/drawable.gc index 381192d523..59e226ca73 100644 --- a/goal_src/jak1/engine/draw/drawable.gc +++ b/goal_src/jak1/engine/draw/drawable.gc @@ -718,7 +718,10 @@ (&- (-> *display* frames (-> *display* on-screen) frame global-buf base) (the-as uint s4-1))) (set! (-> a0-26 data 86 total) (-> a0-26 data 86 used))))))) (when #t - (let ((v1-41 *shadow-queue*)) (+! (-> v1-41 cur-run) 1))) + (let ((v1-41 *shadow-queue*)) (+! (-> v1-41 cur-run) 1)) + ;; og:preserve-this + (+! (-> *pc-shadow-queue* cur-run) 1) + ) 0 (none)) @@ -856,7 +859,10 @@ (with-profiler "merc" (set! (-> *merc-global-array* count) (the-as uint 0)) (set! *merc-globals* (the-as merc-globals (-> *merc-global-array* globals))) - (set! (-> *shadow-queue* cur-run) (the-as uint 0))) + (set! (-> *shadow-queue* cur-run) (the-as uint 0)) + ;; og:preserve-this + (set! (-> *pc-shadow-queue* cur-run) 0) + ) ;; draw the background! (with-profiler "background" (init-background) diff --git a/goal_src/jak1/engine/gfx/foreground/bones.gc b/goal_src/jak1/engine/gfx/foreground/bones.gc index cadfe3ed59..0b1b3f9f04 100644 --- a/goal_src/jak1/engine/gfx/foreground/bones.gc +++ b/goal_src/jak1/engine/gfx/foreground/bones.gc @@ -27,6 +27,27 @@ (defglobalconstant BACKWARD_COMPAT_MERC_CLIP #f) +;; when set, render some environment mapped stuff with jak 2's emerc. +;; this is much faster, and does significantly speed up the game thread on finalboss. +(define *emerc-hack* #t) + +;; when set, use merc for blerc instead of generic. +(define *blerc-hack* #t) + +;; when true, uses the PC float blerc implementation. +(define *use-fp-blerc* #t) + +(define *texscroll-force-generic* #f) + +(define *ripple-force-generic* #f) + +;; when set, use the rewritten PC shadow render (faster) +(define *use-pc-shadow* #f) + +;; use rewritten bones math (GOAL asm instead of mips2c) +(define *use-new-bones* #t) + + ;;;;;;;;;;;;;;;;;; ;; calc list ;;;;;;;;;;;;;;;;;; @@ -326,6 +347,10 @@ (let ((v1-13 (-> gp-1 run (-> gp-1 cur-run)))) (set! (-> v1-13 first) (the-as dma-packet 0)) (set! (-> v1-13 next) (the-as (pointer dma-packet) 0)))) + ;; og:preserve-this + (set! (-> *pc-shadow-queue* run (-> *pc-shadow-queue* cur-run) first) (the pc-shadow-request 0)) + (set! (-> *pc-shadow-queue* run (-> *pc-shadow-queue* cur-run) next) (the (pointer pc-shadow-request) 0)) + 0 (none)) @@ -464,7 +489,6 @@ (.svf (&-> out n-mtx quad 2) nmat2)))) (none)) -(define *use-new-bones* #t) (defun bones-mtx-calc-execute () "Do all pending bone calculations" @@ -660,7 +684,83 @@ (new 'static 'plane :y 1.0 :w 4096.0) :fade-dist 409600.0)) +;; og:preserve-this +(defun pc-draw-bones-shadow ((dc draw-control) (mtx pointer) (dma-ptr pointer)) + "Add shadows for this draw-control to the *pc-shadow-queue* to be drawn in pc-shadow-execute-all. + This places a pc-shadow-request in the DMA buffer and adds it to the linked list of requests + for the currently selected run in *pc-shadow-queue*" + (let* ((pse (the pc-shadow-request dma-ptr)) + (sgeo (-> dc shadow)) + (settings (if (-> dc shadow-ctrl) (-> dc shadow-ctrl settings) *default-shadow-settings*)) + (flags (-> settings flags)) + ) + + ;; if fade is enabled, and we're all the way faded out, disable draw + (when (not (logtest? flags (shadow-flags disable-fade))) + (let ((dist (-> (scratchpad-object terrain-context) work foreground bone-mem work distance w))) + (if (< (-> settings fade-dist) dist) + (logior! flags (shadow-flags disable-draw)) + ) + ) + ) + + ;; if disabled, early return + (if (logtest? flags (shadow-flags disable-draw)) + (return dma-ptr) + ) + + + ;; settings + (mem-copy! (the pointer (-> pse settings)) (the pointer settings) (size-of shadow-settings)) + + ;; update the "center" position. + (let ((center-pos (-> dc skeleton bones (-> dc shadow-joint-index) position))) + (set! (-> pse settings center x) (-> center-pos x)) + (set! (-> pse settings center y) (-> center-pos y)) + (set! (-> pse settings center z) (-> center-pos z)) + ) + + ;; set the other properties + (set! (-> pse geo-name) (-> sgeo name)) + (set! (-> pse mtx) mtx) + (set! (-> pse num-joints) (-> sgeo header num-joints)) + + ;; set up linked list. + (let* ((run (-> *pc-shadow-queue* run (-> *pc-shadow-queue* cur-run))) + (next (-> run next)) + ) + + ;; if we're the first in the list, store in the run + (if (zero? (-> run first)) (set! (-> run first) pse)) + + ;; patch next pointer of previous + (if (nonzero? next) (set! (-> next 0) pse)) + + ;; remember where to patch for the next one + (set! (-> run next) (&-> pse next)) + + ;; clear our next pointer in case we're last + (set! (-> pse next) (the pc-shadow-request 0)) + + ;; set up next tag at the start, to skip over this data. + ;; this is a bit of a hack, this function gets called when building merc chains, + ;; and inserts a bit of shadow dma that will later be referenced by the shadow bucket. + ;; but the original game did the same thing! + (&+! dma-ptr (size-of pc-shadow-request)) + (set! (-> pse dma-next dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (the-as int dma-ptr))) + (set! (-> pse dma-next vif0) (new 'static 'vif-tag)) + (set! (-> pse dma-next vif1) (new 'static 'vif-tag)) + ) + + dma-ptr + ) + ) + (defun draw-bones-shadow ((arg0 draw-control) (arg1 pointer) (arg2 pointer)) + ;; og:preserve-this + (when *use-pc-shadow* + (return (pc-draw-bones-shadow arg0 arg1 arg2)) + ) ;; (local-vars (ra-0 int)) ;; the dma packet we'll use for shadow in the end. (let* ((v1-0 (the-as dma-packet (&+ arg2 0))) @@ -1025,19 +1125,7 @@ (set! dma-buf (the pointer (&+ packet 16)))) dma-buf)) -;; when set, render some environment mapped stuff with jak 2's emerc. -;; this is much faster, and does significantly speed up the game thread on finalboss. -(define *emerc-hack* #t) -;; when set, use merc for blerc instead of generic. -(define *blerc-hack* #t) - -;; when true, uses the PC float blerc implementation. -(define *use-fp-blerc* #t) - -(define *texscroll-force-generic* #f) - -(define *ripple-force-generic* #f) (defun draw-bones ((arg0 draw-control) (dma-buf dma-buffer) (arg2 float)) "Main draw function for all bone-related renderers. Will set up merc, generic and shadow. diff --git a/goal_src/jak1/engine/gfx/shadow/shadow-cpu-h.gc b/goal_src/jak1/engine/gfx/shadow/shadow-cpu-h.gc index 5f9a61523c..fd3f6e39ec 100644 --- a/goal_src/jak1/engine/gfx/shadow/shadow-cpu-h.gc +++ b/goal_src/jak1/engine/gfx/shadow/shadow-cpu-h.gc @@ -103,6 +103,35 @@ (define *shadow-queue* (new 'global 'shadow-queue)) +;; og:preserve-this +;; new shadow queue for drawing PC shadows. +(declare-type pc-shadow-request structure) +(deftype pc-shadow-request (structure) + ( + (dma-next dma-packet :inline) + (settings shadow-settings :inline) + (geo-name string) ;; name to send to PC renderer + (mtx pointer) ;; pointer to DMA memory that will contain bones + (num-joints uint32) ;; number of joints needed for shadow + (next pc-shadow-request) + ) + ) + + +(deftype pc-shadow-run (structure) + ((first pc-shadow-request) + (next (pointer pc-shadow-request))) + ) + +(deftype pc-shadow-queue (structure) + ((cur-run uint32) + (run pc-shadow-run 16 :inline) + ) + ) + +(define *pc-shadow-queue* (new 'global 'pc-shadow-queue)) + + (deftype shadow-vertex (structure) ((x float) (y float) diff --git a/goal_src/jak1/engine/gfx/shadow/shadow-cpu.gc b/goal_src/jak1/engine/gfx/shadow/shadow-cpu.gc index a0eeda0a7b..d6474108be 100644 --- a/goal_src/jak1/engine/gfx/shadow/shadow-cpu.gc +++ b/goal_src/jak1/engine/gfx/shadow/shadow-cpu.gc @@ -390,7 +390,38 @@ 0 (none)) +;; og:preserve-this +(defun pc-shadow-execute-all () + "Send PC shadow queue to the PC shadow renderer." + + ;; bail if disabled + (if (not (logtest? *vu1-enable-user* (vu1-renderer-mask shadow))) + (return #f) + ) + + ;; TODO: plus1 here? + (dotimes (run-idx (-> *pc-shadow-queue* cur-run)) + (when (nonzero? (-> *pc-shadow-queue* run run-idx first)) + (with-dma-buffer-add-bucket ((dma-buf (-> (current-frame) global-buf)) (bucket-id shadow)) + (dma-buffer-add-ref-vif2 + dma-buf + 6 + (-> *pc-shadow-queue* run run-idx first) + (new 'static 'vif-tag :cmd (vif-cmd pc-port)) + (new 'static 'vif-tag :cmd (vif-cmd pc-port)) + ) + ) + ) + ) + (none) + ) + (defun shadow-execute-all ((arg0 dma-buffer) (arg1 shadow-queue)) + ;; og:preserve-this + (when *use-pc-shadow* + (pc-shadow-execute-all) + (return #f) + ) (if *debug-segment* (add-frame (-> *display* frames (-> *display* on-screen) frame profile-bar 0) 'draw diff --git a/scripts/sublime_text/lispindent.sublime-settings b/scripts/sublime_text/lispindent.sublime-settings index 836528e45f..1275c0717f 100644 --- a/scripts/sublime_text/lispindent.sublime-settings +++ b/scripts/sublime_text/lispindent.sublime-settings @@ -6,7 +6,7 @@ "regex": ["(with-gensyms|defenum|countdown|while|defglobalconstant|desfun|defsmacro|catch|defvar|defclass|defconstant|defcustom|defparameter|defconst|define-condition|define-modify-macro|", "defsetf|defun|defgeneric|define-setf-method|define-self-expander|defmacro|defsubst|deftype|defmethod|", - "defpackage|defstruct|dolist|dotimes|lambda|let|let\\*|prog1|prog2|unless|when)$"] + "defpackage|defstruct|dolist|dotimes|lambda|let|let\\*|prog1|prog2|unless|when|with-dma-buffer-add-bucket)$"] } } }