From 71cb1aef6fb5cd6f839ad8d70b31343e58a90e77 Mon Sep 17 00:00:00 2001 From: water111 <48171810+water111@users.noreply.github.com> Date: Tue, 31 Jan 2023 18:23:39 -0500 Subject: [PATCH] [merc2] support vertex updates, use this for blerc in jak 1 and jak 2 (#2179) This PR adds a feature to merc2 to update vertices. This will be needed to efficient do effects like blerc/ripple/texture scroll. It's enabled for blerc in jak 1 and jak 2, but with a few disclaimers: - currently we still use the mips2c blerc implementation, which is slow and has some "jittering" because of integer precision. When porting to PC, there was an additional synchronization problem because blerc overwrites the merc data as its being read by the renderers. I _think_ this wasn't an issue on PS2 because the blerc dma is higher priority than the VIF1 DMA, but I'm not certain. Either way, I had to add a mutex for this on PC to avoid very slight flickering/gaps. This isn't ideal for performance, but still beats generic by a significant amount in every place I tested. If you see merc taking 2ms to draw, it is likely because it is stuck waiting on blerc to finish. This will go away once blerc itself is ported to C++. - in jak 1, we end up using generic in some cases where we could use merc. In particular maia in village3 hut. This will be fixed later once we can use merc in more places. I don't want to mess with the merc/generic selection logic when we're hopefully going to get rid of it soon. - There is no support for ripple or texture scroll. These use generic on jak 1, and remain broken on jak 2. - Like with `emerc`, jak 1 has a toggle to go back to the old behavior `*blerc-hack*`. - In most cases, toggling this causes no visual differences. One exception is Gol's teeth. I believe this is caused by texture coordinate rounding issues, where generic has an additional float -> int -> float compared to PC merc. It is very hard to notice so I'm not going to worry about it. --- common/custom_data/TFrag3Data.cpp | 275 ++++-- common/custom_data/Tfrag3Data.h | 55 +- common/global_profiler/GlobalProfiler.h | 2 +- common/type_system/TypeSystem.cpp | 2 +- decompiler/level_extractor/MercData.cpp | 62 ++ decompiler/level_extractor/MercData.h | 13 + decompiler/level_extractor/extract_merc.cpp | 268 +++++- decompiler/level_extractor/fr3_to_gltf.cpp | 6 +- .../opengl_renderer/foreground/Merc2.cpp | 826 +++++++++++++----- .../opengl_renderer/foreground/Merc2.h | 79 +- .../opengl_renderer/ocean/OceanMidAndFar.cpp | 8 +- .../opengl_renderer/ocean/OceanTexture_PC.cpp | 8 +- game/mips2c/jak1_functions/bones.cpp | 1 - .../jak2_functions/merc_blend_shape.cpp | 17 +- game/mips2c/jak2_functions/ripple.cpp | 3 - game/mips2c/mips2c_private.h | 2 +- goal_src/jak1/engine/gfx/foreground/bones.gc | 28 +- goal_src/jak2/engine/game/main.gc | 8 +- .../jak2/engine/gfx/foreground/foreground.gc | 37 +- 19 files changed, 1327 insertions(+), 373 deletions(-) diff --git a/common/custom_data/TFrag3Data.cpp b/common/custom_data/TFrag3Data.cpp index 4ae886e973..36c65f0e46 100644 --- a/common/custom_data/TFrag3Data.cpp +++ b/common/custom_data/TFrag3Data.cpp @@ -256,19 +256,46 @@ void MercDraw::serialize(Serializer& ser) { ser.from_ptr(&num_triangles); } -void MercEffect::serialize(Serializer& ser) { +void MercModifiableDrawGroup::serialize(Serializer& ser) { if (ser.is_saving()) { - ser.save(draws.size()); + ser.save(mod_draw.size()); } else { - draws.resize(ser.load()); + mod_draw.resize(ser.load()); } - for (auto& draw : draws) { + for (auto& draw : mod_draw) { draw.serialize(ser); } + if (ser.is_saving()) { + ser.save(fix_draw.size()); + } else { + fix_draw.resize(ser.load()); + } + for (auto& draw : fix_draw) { + draw.serialize(ser); + } + ser.from_pod_vector(&vertices); + ser.from_pod_vector(&vertex_lump4_addr); + ser.from_pod_vector(&fragment_mask); + ser.from_ptr(&expect_vidx_end); +} + +void MercEffect::serialize(Serializer& ser) { + if (ser.is_saving()) { + ser.save(all_draws.size()); + } else { + all_draws.resize(ser.load()); + } + for (auto& draw : all_draws) { + draw.serialize(ser); + } + + mod.serialize(ser); + ser.from_ptr(&envmap_mode); ser.from_ptr(&envmap_texture); ser.from_ptr(&has_envmap); + ser.from_ptr(&has_mod_draw); } void MercModel::serialize(Serializer& ser) { @@ -283,6 +310,8 @@ void MercModel::serialize(Serializer& ser) { } ser.from_ptr(&max_draws); ser.from_ptr(&max_bones); + ser.from_ptr(&st_vif_add); + ser.from_ptr(&xyz_scale); } void MercModelGroup::serialize(Serializer& ser) { @@ -360,109 +389,163 @@ void Level::serialize(Serializer& ser) { } } -std::array Level::get_memory_usage() const { - std::array result; - result.fill(0); +void MercModifiableDrawGroup::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::MERC_MOD_VERT, sizeof(MercVertex) * vertices.size()); + tracker->add(MemoryUsageCategory::MERC_MOD_DRAW_1, sizeof(MercDraw) * fix_draw.size()); + tracker->add(MemoryUsageCategory::MERC_MOD_DRAW_2, sizeof(MercDraw) * mod_draw.size()); + tracker->add(MemoryUsageCategory::MERC_MOD_TABLE, sizeof(u16) * vertex_lump4_addr.size()); +} - // textures - for (const auto& tex : textures) { - result[TEXTURE] += tex.data.size() * sizeof(u32); +void MercEffect::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::MERC_DRAW, sizeof(MercDraw) * all_draws.size()); + mod.memory_usage(tracker); +} + +void MercModel::memory_usage(MemoryUsageTracker* tracker) const { + for (auto& effect : effects) { + effect.memory_usage(tracker); } +} - // tfrag - for (const auto& tfrag_tree_geoms : tfrag_trees) { - for (const auto& tfrag_tree : tfrag_tree_geoms) { - for (const auto& draw : tfrag_tree.draws) { - result[TFRAG_INDEX] += draw.runs.size() * sizeof(StripDraw::VertexRun); - result[TFRAG_INDEX] += draw.plain_indices.size() * sizeof(u32); - result[TFRAG_VIS] += draw.vis_groups.size() * sizeof(StripDraw::VisGroup); - } - result[TFRAG_VERTS] += - tfrag_tree.packed_vertices.vertices.size() * sizeof(PackedTfragVertices::Vertex); - result[TFRAG_CLUSTER] += - tfrag_tree.packed_vertices.cluster_origins.size() * sizeof(math::Vector); - result[TFRAG_TIME_OF_DAY] += tfrag_tree.colors.size() * sizeof(TimeOfDayColor); - result[TFRAG_BVH] += tfrag_tree.bvh.vis_nodes.size() * sizeof(VisNode); +void MercModelGroup::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::MERC_VERT, sizeof(MercVertex) * vertices.size()); + tracker->add(MemoryUsageCategory::MERC_INDEX, sizeof(u32) * indices.size()); + for (auto& model : models) { + model.memory_usage(tracker); + } +} + +void CollisionMesh::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::COLLISION, sizeof(Vertex) * vertices.size()); +} + +void PackedShrubVertices::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::SHRUB_VERT, 64 * matrices.size()); + tracker->add(MemoryUsageCategory::SHRUB_VERT, sizeof(InstanceGroup) * instance_groups.size()); + tracker->add(MemoryUsageCategory::SHRUB_VERT, sizeof(Vertex) * vertices.size()); +} + +void ShrubTree::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::SHRUB_TIME_OF_DAY, + sizeof(TimeOfDayColor) * time_of_day_colors.size()); + packed_vertices.memory_usage(tracker); + tracker->add(MemoryUsageCategory::SHRUB_DRAW, sizeof(ShrubDraw) * static_draws.size()); + tracker->add(MemoryUsageCategory::SHRUB_IND, sizeof(u32) * indices.size()); +} + +void InstancedStripDraw::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::TIE_INST_INDEX, sizeof(u32) * vertex_index_stream.size()); + tracker->add(MemoryUsageCategory::TIE_INST_VIS, sizeof(InstanceGroup) * instance_groups.size()); +} + +void PackedTieVertices::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::TIE_CIDX, sizeof(u16) * color_indices.size()); + tracker->add(MemoryUsageCategory::TIE_MATRICES, 64 * matrices.size()); + tracker->add(MemoryUsageCategory::TIE_GRPS, sizeof(MatrixGroup) * matrix_groups.size()); + tracker->add(MemoryUsageCategory::TIE_VERTS, sizeof(Vertex) * vertices.size()); +} + +void TieTree::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::TIE_BVH, sizeof(VisNode) * bvh.vis_nodes.size()); + for (auto& draw : static_draws) { + tracker->add(MemoryUsageCategory::TIE_DEINST_INDEX, + draw.runs.size() * sizeof(StripDraw::VertexRun)); + tracker->add(MemoryUsageCategory::TIE_DEINST_INDEX, draw.plain_indices.size() * sizeof(u32)); + tracker->add(MemoryUsageCategory::TIE_DEINST_VIS, + draw.vis_groups.size() * sizeof(StripDraw::VisGroup)); + } + packed_vertices.memory_usage(tracker); + tracker->add(MemoryUsageCategory::TIE_TIME_OF_DAY, sizeof(TimeOfDayColor) * colors.size()); + + for (auto& draw : instanced_wind_draws) { + draw.memory_usage(tracker); + } + tracker->add(MemoryUsageCategory::TIE_WIND_INSTANCE_INFO, + sizeof(TieWindInstance) * wind_instance_info.size()); +} + +void PackedTfragVertices::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::TFRAG_VERTS, + sizeof(PackedTfragVertices::Vertex) * vertices.size()); + tracker->add(MemoryUsageCategory::TFRAG_CLUSTER, + sizeof(math::Vector) * cluster_origins.size()); +} + +void TfragTree::memory_usage(MemoryUsageTracker* tracker) const { + for (auto& draw : draws) { + tracker->add(MemoryUsageCategory::TFRAG_INDEX, draw.runs.size() * sizeof(StripDraw::VertexRun)); + tracker->add(MemoryUsageCategory::TFRAG_INDEX, draw.plain_indices.size() * sizeof(u32)); + tracker->add(MemoryUsageCategory::TFRAG_VIS, + draw.vis_groups.size() * sizeof(StripDraw::VisGroup)); + } + packed_vertices.memory_usage(tracker); + tracker->add(MemoryUsageCategory::TFRAG_TIME_OF_DAY, sizeof(TimeOfDayColor) * colors.size()); + tracker->add(MemoryUsageCategory::TFRAG_BVH, sizeof(VisNode) * bvh.vis_nodes.size()); +} + +void Texture::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::TEXTURE, data.size() * sizeof(u32)); +} + +void Level::memory_usage(MemoryUsageTracker* tracker) const { + for (const auto& texture : textures) { + texture.memory_usage(tracker); + } + for (const auto& tftk : tfrag_trees) { + for (const auto& tree : tftk) { + tree.memory_usage(tracker); } } - - // tie - for (const auto& tie_tree_geoms : tie_trees) { - for (const auto& tie_tree : tie_tree_geoms) { - result[TIE_BVH] += tie_tree.bvh.vis_nodes.size(); - for (const auto& draw : tie_tree.static_draws) { - result[TIE_DEINST_INDEX] += draw.runs.size() * sizeof(StripDraw::VertexRun); - result[TIE_DEINST_VIS] += draw.vis_groups.size() * sizeof(StripDraw::VisGroup); - } - result[TIE_VERTS] += - tie_tree.packed_vertices.vertices.size() * sizeof(PackedTieVertices::Vertex); - result[TIE_CIDX] += tie_tree.packed_vertices.color_indices.size() * sizeof(u16); - result[TIE_MATRICES] += tie_tree.packed_vertices.matrices.size() * 4 * 4 * 4; - result[TIE_GRPS] += - tie_tree.packed_vertices.matrix_groups.size() * sizeof(PackedTieVertices::MatrixGroup); - result[TIE_TIME_OF_DAY] += tie_tree.colors.size() * sizeof(TimeOfDayColor); - - for (const auto& draw : tie_tree.instanced_wind_draws) { - result[TIE_INST_INDEX] += draw.vertex_index_stream.size() * sizeof(u32); - result[TIE_INST_VIS] += - draw.instance_groups.size() * sizeof(InstancedStripDraw::InstanceGroup); - } - result[TIE_WIND_INSTANCE_INFO] += - tie_tree.wind_instance_info.size() * sizeof(TieWindInstance); + for (const auto& ttk : tie_trees) { + for (const auto& tree : ttk) { + tree.memory_usage(tracker); } } - - // shrub - for (const auto& shrub_tree : shrub_trees) { - result[SHRUB_TIME_OF_DAY] += shrub_tree.time_of_day_colors.size() * sizeof(TimeOfDayColor); - result[SHRUB_VERT] += shrub_tree.packed_vertices.matrices.size() * 4 * 4 * 4; - result[SHRUB_VERT] += - shrub_tree.packed_vertices.vertices.size() * sizeof(PackedShrubVertices::Vertex); - result[SHRUB_VERT] += shrub_tree.packed_vertices.instance_groups.size() * - sizeof(PackedShrubVertices::InstanceGroup); - result[SHRUB_IND] += sizeof(u32) * shrub_tree.indices.size(); + for (const auto& tree : shrub_trees) { + tree.memory_usage(tracker); } - - // merc - result[MERC_INDEX] += merc_data.indices.size() * sizeof(u32); - result[MERC_VERT] += merc_data.vertices.size() * sizeof(MercVertex); - - // collision - result[COLLISION] += sizeof(CollisionMesh::Vertex) * collision.vertices.size(); - - return result; + collision.memory_usage(tracker); + merc_data.memory_usage(tracker); } void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size) { int total_accounted = 0; - auto memory_use_by_category = lev.get_memory_usage(); + MemoryUsageTracker mem_use; + lev.memory_usage(&mem_use); std::vector> known_categories = { - {"texture", memory_use_by_category[tfrag3::MemoryUsageCategory::TEXTURE]}, - {"tie-deinst-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_DEINST_VIS]}, - {"tie-deinst-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_DEINST_INDEX]}, - {"tie-inst-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_INST_VIS]}, - {"tie-inst-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_INST_INDEX]}, - {"tie-bvh", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_BVH]}, - {"tie-verts", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_VERTS]}, - {"tie-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_TIME_OF_DAY]}, - {"tie-wind-inst-info", - memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_WIND_INSTANCE_INFO]}, - {"tie-cidx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_CIDX]}, - {"tie-mats", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_MATRICES]}, - {"tie-grps", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_GRPS]}, - {"tfrag-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_VIS]}, - {"tfrag-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_INDEX]}, - {"tfrag-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_VERTS]}, - {"tfrag-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_TIME_OF_DAY]}, - {"tfrag-cluster", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_CLUSTER]}, - {"tfrag-bvh", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_BVH]}, - {"shrub-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_TIME_OF_DAY]}, - {"shrub-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_VERT]}, - {"shrub-ind", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_IND]}, - {"collision", memory_use_by_category[tfrag3::MemoryUsageCategory::COLLISION]}, - {"merc-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::MERC_VERT]}, - {"merc-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::MERC_INDEX]}}; + {"texture", mem_use.data[tfrag3::MemoryUsageCategory::TEXTURE]}, + {"tie-deinst-vis", mem_use.data[tfrag3::MemoryUsageCategory::TIE_DEINST_VIS]}, + {"tie-deinst-idx", mem_use.data[tfrag3::MemoryUsageCategory::TIE_DEINST_INDEX]}, + {"tie-inst-vis", mem_use.data[tfrag3::MemoryUsageCategory::TIE_INST_VIS]}, + {"tie-inst-idx", mem_use.data[tfrag3::MemoryUsageCategory::TIE_INST_INDEX]}, + {"tie-bvh", mem_use.data[tfrag3::MemoryUsageCategory::TIE_BVH]}, + {"tie-verts", mem_use.data[tfrag3::MemoryUsageCategory::TIE_VERTS]}, + {"tie-colors", mem_use.data[tfrag3::MemoryUsageCategory::TIE_TIME_OF_DAY]}, + {"tie-wind-inst-info", mem_use.data[tfrag3::MemoryUsageCategory::TIE_WIND_INSTANCE_INFO]}, + {"tie-cidx", mem_use.data[tfrag3::MemoryUsageCategory::TIE_CIDX]}, + {"tie-mats", mem_use.data[tfrag3::MemoryUsageCategory::TIE_MATRICES]}, + {"tie-grps", mem_use.data[tfrag3::MemoryUsageCategory::TIE_GRPS]}, + {"tfrag-vis", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_VIS]}, + {"tfrag-idx", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_INDEX]}, + {"tfrag-vert", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_VERTS]}, + {"tfrag-colors", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_TIME_OF_DAY]}, + {"tfrag-cluster", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_CLUSTER]}, + {"tfrag-bvh", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_BVH]}, + {"shrub-colors", mem_use.data[tfrag3::MemoryUsageCategory::SHRUB_TIME_OF_DAY]}, + {"shrub-vert", mem_use.data[tfrag3::MemoryUsageCategory::SHRUB_VERT]}, + {"shrub-ind", mem_use.data[tfrag3::MemoryUsageCategory::SHRUB_IND]}, + {"shrub-draw", mem_use.data[tfrag3::MemoryUsageCategory::SHRUB_DRAW]}, + {"collision", mem_use.data[tfrag3::MemoryUsageCategory::COLLISION]}, + {"merc-vert", mem_use.data[tfrag3::MemoryUsageCategory::MERC_VERT]}, + {"merc-idx", mem_use.data[tfrag3::MemoryUsageCategory::MERC_INDEX]}, + {"merc-draw", mem_use.data[tfrag3::MemoryUsageCategory::MERC_DRAW]}, + {"merc-mod-vert", mem_use.data[tfrag3::MemoryUsageCategory::MERC_MOD_VERT]}, + {"merc-mod-ind", mem_use.data[tfrag3::MemoryUsageCategory::MERC_MOD_IND]}, + {"merc-mod-table", mem_use.data[tfrag3::MemoryUsageCategory::MERC_MOD_TABLE]}, + {"merc-mod-draw-1", mem_use.data[tfrag3::MemoryUsageCategory::MERC_MOD_DRAW_1]}, + {"merc-mod-draw-2", mem_use.data[tfrag3::MemoryUsageCategory::MERC_MOD_DRAW_2]}, + }; for (auto& known : known_categories) { total_accounted += known.second; } @@ -473,8 +556,10 @@ void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size) { [](const auto& a, const auto& b) { return a.second > b.second; }); for (const auto& x : known_categories) { - fmt::print("{:30s} : {:6d} kB {:3.1f}%\n", x.first, x.second / 1024, - 100.f * (float)x.second / uncompressed_data_size); + if (x.second) { + fmt::print("{:30s} : {:6d} kB {:3.1f}%\n", x.first, x.second / 1024, + 100.f * (float)x.second / uncompressed_data_size); + } } } diff --git a/common/custom_data/Tfrag3Data.h b/common/custom_data/Tfrag3Data.h index e4dec9ef00..83133d0596 100644 --- a/common/custom_data/Tfrag3Data.h +++ b/common/custom_data/Tfrag3Data.h @@ -44,16 +44,36 @@ enum MemoryUsageCategory { SHRUB_TIME_OF_DAY, SHRUB_VERT, SHRUB_IND, + SHRUB_DRAW, MERC_VERT, MERC_INDEX, + MERC_DRAW, + + MERC_MOD_DRAW_1, + MERC_MOD_DRAW_2, + MERC_MOD_VERT, + MERC_MOD_IND, + MERC_MOD_TABLE, COLLISION, NUM_CATEGORIES }; -constexpr int TFRAG3_VERSION = 22; +struct MemoryUsageTracker { + u32 data[MemoryUsageCategory::NUM_CATEGORIES]; + + MemoryUsageTracker() { + for (auto& x : data) { + x = 0; + } + } + + void add(MemoryUsageCategory category, u32 size_bytes) { data[category] += size_bytes; } +}; + +constexpr int TFRAG3_VERSION = 24; // These vertices should be uploaded to the GPU at load time and don't change struct PreloadedVertex { @@ -93,6 +113,7 @@ struct PackedTieVertices { std::vector matrix_groups; // todo pack std::vector vertices; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; struct PackedTfragVertices { @@ -102,7 +123,7 @@ struct PackedTfragVertices { s16 s, t; u16 color_index; }; - + void memory_usage(MemoryUsageTracker* tracker) const; std::vector vertices; std::vector> cluster_origins; }; @@ -135,7 +156,7 @@ struct PackedShrubVertices { std::vector instance_groups; // todo pack std::vector vertices; u32 total_vertex_count; - + void memory_usage(MemoryUsageTracker* tracker) const; void serialize(Serializer& ser); }; @@ -207,6 +228,7 @@ struct InstancedStripDraw { // for debug counting. u32 num_triangles = 0; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; // node in the BVH. @@ -256,6 +278,7 @@ struct Texture { std::string debug_tpage_name; bool load_to_pool = false; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; // Tfrag trees have several kinds: @@ -279,6 +302,7 @@ struct TfragTree { } unpacked; void unpack(); void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; struct TieWindInstance { @@ -305,6 +329,7 @@ struct TieTree { } unpacked; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; void unpack(); }; @@ -321,6 +346,7 @@ struct ShrubTree { } unpacked; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; void unpack(); }; @@ -336,6 +362,7 @@ struct CollisionMesh { static_assert(sizeof(Vertex) == 32); std::vector vertices; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; // MERC @@ -368,12 +395,25 @@ struct MercDraw { void serialize(Serializer& ser); }; +struct MercModifiableDrawGroup { + std::vector vertices; + std::vector vertex_lump4_addr; + std::vector fix_draw, mod_draw; + std::vector fragment_mask; + u32 expect_vidx_end = 0; + void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; +}; + struct MercEffect { - std::vector draws; + std::vector all_draws; + MercModifiableDrawGroup mod; DrawMode envmap_mode; u32 envmap_texture; bool has_envmap = false; + bool has_mod_draw = false; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; struct MercModel { @@ -381,7 +421,10 @@ struct MercModel { std::vector effects; u32 max_draws; u32 max_bones; + u32 st_vif_add; + float xyz_scale; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; struct MercModelGroup { @@ -389,6 +432,7 @@ struct MercModelGroup { std::vector indices; std::vector models; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; // @@ -407,8 +451,7 @@ struct Level { MercModelGroup merc_data; u16 version2 = TFRAG3_VERSION; void serialize(Serializer& ser); - - std::array get_memory_usage() const; + void memory_usage(MemoryUsageTracker* tracker) const; }; void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size); diff --git a/common/global_profiler/GlobalProfiler.h b/common/global_profiler/GlobalProfiler.h index a156517a04..e8fcb3407c 100644 --- a/common/global_profiler/GlobalProfiler.h +++ b/common/global_profiler/GlobalProfiler.h @@ -9,7 +9,7 @@ struct ProfNode { u64 ts; u64 tid; - char name[32]; + char name[128]; enum Kind : u8 { BEGIN, END, INSTANT, UNUSED } kind = UNUSED; }; diff --git a/common/type_system/TypeSystem.cpp b/common/type_system/TypeSystem.cpp index c44f8d0049..1c023ea876 100644 --- a/common/type_system/TypeSystem.cpp +++ b/common/type_system/TypeSystem.cpp @@ -516,7 +516,7 @@ int TypeSystem::get_load_size_allow_partial_def(const TypeSpec& ts) const { } MethodInfo TypeSystem::override_method(Type* type, - const std::string& type_name, + const std::string& /*type_name*/, const int method_id, const std::optional& docstring) { // Lookup the method from the parent type diff --git a/decompiler/level_extractor/MercData.cpp b/decompiler/level_extractor/MercData.cpp index 5d932ebdaf..267ae1d050 100644 --- a/decompiler/level_extractor/MercData.cpp +++ b/decompiler/level_extractor/MercData.cpp @@ -356,6 +356,10 @@ void MercEffect::from_ref(TypedRef tr, envmap_or_effect_usage = read_plain_data_field(tr, "effect-usage", dts); } + if (type->lookup_field("texture-index", &temp)) { + texture_index = read_plain_data_field(tr, "texture-index", dts); + } + // do frag-ctrls TypedRef fc(deref_label(get_field_ref(tr, "frag-ctrl", dts)), dts.ts.lookup_type("merc-fragment-control")); @@ -369,6 +373,15 @@ void MercEffect::from_ref(TypedRef tr, f = frag_geo.emplace_back().from_ref(f, dts, frag_ctrl.at(i), main_control); } + // do blend ctrls + if (blend_frag_count) { + TypedRef bc(deref_label(get_field_ref(tr, "blend-ctrl", dts)), + dts.ts.lookup_type("merc-blend-ctrl")); + for (u32 i = 0; i < blend_frag_count; i++) { + bc = blend_ctrl.emplace_back().from_ref(bc, dts, main_control.blend_target_count); + } + } + // do extra info auto fr = get_field_ref(tr, "extra-info", dts); const auto& word = fr.data->words_by_seg.at(fr.seg).at(fr.byte_offset / 4); @@ -414,6 +427,55 @@ void MercCtrl::from_ref(TypedRef tr, const DecompilerTypeSystem& dts) { effects.emplace_back().from_ref(eff_ref, dts, header); eff_ref.ref.byte_offset += 32; // } + // debug_print_blerc(); +} + +void MercCtrl::debug_print_blerc() { + int total_verts = 0; + int blerc_verts = 0; + int total_frags = 0; + int blerc_frags = 0; + int total_effects = effects.size(); + int blerc_effects = 0; + + for (auto& effect : effects) { + bool effect_has_blerc = false; + for (size_t frag_idx = 0; frag_idx < effect.frag_count; frag_idx++) { + total_frags++; + auto& fc = effect.frag_ctrl.at(frag_idx); + total_verts += fc.lump_four_count; + + if (frag_idx < effect.blend_ctrl.size()) { + auto& bfc = effect.blend_ctrl.at(frag_idx); + if (bfc.blend_vtx_count) { + effect_has_blerc = true; + blerc_frags++; + blerc_verts += fc.lump_four_count; + } + } + } + + if (effect_has_blerc) { + blerc_effects++; + } + } + if (blerc_effects) { + fmt::print("BLERC: {}, {}/{} e, {}/{} f, {}/{} v\n", name, blerc_effects, total_effects, + blerc_frags, total_frags, blerc_verts, total_verts); + } +} + +TypedRef MercBlendCtrl::from_ref(TypedRef tr, + const DecompilerTypeSystem& dts, + int blend_target_count) { + blend_vtx_count = read_plain_data_field(tr, "blend-vtx-count", dts); + nonzero_index_count = read_plain_data_field(tr, "nonzero-index-count", dts); + tr.ref.byte_offset += 2; + for (int i = 0; i < blend_target_count; i++) { + bt_index.push_back(deref_u8(tr.ref, 0)); + tr.ref.byte_offset += 1; + } + return tr; } std::string MercCtrl::print() { diff --git a/decompiler/level_extractor/MercData.h b/decompiler/level_extractor/MercData.h index 146fa16809..9536ea893c 100644 --- a/decompiler/level_extractor/MercData.h +++ b/decompiler/level_extractor/MercData.h @@ -162,16 +162,26 @@ struct MercFragment { std::string print() const; }; +struct MercBlendCtrl { + u8 blend_vtx_count; + u8 nonzero_index_count; + std::vector bt_index; + TypedRef from_ref(TypedRef tr, const DecompilerTypeSystem& dts, int blend_target_count); +}; + struct MercExtraInfo { std::optional shader; }; +constexpr int kRippleEffectBit = 4; // true in jak 1 and jak 2 + struct MercEffect { //((frag-geo merc-fragment :offset-assert 0) ;; ? std::vector frag_geo; // (frag-ctrl merc-fragment-control :offset-assert 4) std::vector frag_ctrl; // (blend-data merc-blend-data :offset-assert 8) ?? + std::vector blend_ctrl; // (blend-ctrl merc-blend-ctrl :offset-assert 12) ?? // (dummy0 uint8 :offset-assert 16) ?? u8 effect_bits; @@ -184,6 +194,8 @@ struct MercEffect { // (extra-info merc-extra-info :offset-assert 28) ?? MercExtraInfo extra_info; + u8 texture_index = -1; // jak 2 only + void from_ref(TypedRef tr, const DecompilerTypeSystem& dts, const MercCtrlHeader& main_control); std::string print(); }; @@ -195,6 +207,7 @@ struct MercCtrl { std::vector effects; void from_ref(TypedRef tr, const DecompilerTypeSystem& dts); + void debug_print_blerc(); std::string print(); }; } // namespace decompiler \ No newline at end of file diff --git a/decompiler/level_extractor/extract_merc.cpp b/decompiler/level_extractor/extract_merc.cpp index de998e195a..2869bc8e0e 100644 --- a/decompiler/level_extractor/extract_merc.cpp +++ b/decompiler/level_extractor/extract_merc.cpp @@ -94,6 +94,12 @@ struct MercUnpackedVtx { u16 dst0; u16 dst1; + + bool can_be_modified = false; + int idx_in_combined_lump4 = -1; // divided by 3 + + int flump4 = -1; + int frag = -1; }; /*! @@ -107,6 +113,7 @@ struct ConvertedMercEffect { // draws from all fragments. std::vector draws; std::vector vertices; + std::vector verts_per_frag; bool has_envmap = false; DrawMode envmap_mode; u32 envmap_texture; @@ -216,7 +223,9 @@ void update_mode_from_alpha1(GsAlpha reg, DrawMode& mode) { /*! * Convert merc shader to PC draw mode */ -DrawMode process_draw_mode(const MercShader& info, bool enable_alpha_test) { +DrawMode process_draw_mode(const MercShader& info, + bool enable_alpha_test, + bool enable_alpha_blend) { DrawMode mode; /* * (new 'static 'gs-test @@ -237,7 +246,7 @@ DrawMode process_draw_mode(const MercShader& info, bool enable_alpha_test) { mode.set_depth_test(GsTest::ZTest::GEQUAL); // check these - mode.disable_ab(); + mode.set_ab(enable_alpha_blend); mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_SRC_DST); mode.set_tcc(info.tex0.tcc()); mode.set_decal(info.tex0.tfx() == GsTex0::TextureFunction::DECAL); @@ -306,7 +315,10 @@ void handle_frag(const std::string& debug_name, const MercFragmentControl& frag_ctrl, const MercState& state, std::vector& effect_vertices, - MercMemory& memory) { + MercMemory& memory, + bool can_be_modified, + int base_lump4, + int frag_idx) { (void)frag_ctrl; (void)debug_name; // lg::print("handling frag: {}\n", debug_name); @@ -330,6 +342,10 @@ void handle_frag(const std::string& debug_name, for (size_t i = 0; i < mat123_cnt; i++) { u32 current_vtx_idx = effect_vertices.size(); // idx in effect vertex list. auto& vtx = effect_vertices.emplace_back(); + vtx.can_be_modified = can_be_modified; + vtx.idx_in_combined_lump4 = lump_ptr / 3 + base_lump4; + vtx.frag = frag_idx; + vtx.flump4 = lump_ptr / 3; if (i < mat1_cnt) { vtx.kind = 1; // 1 matrix @@ -712,7 +728,7 @@ ConvertedMercEffect convert_merc_effect(const MercEffect& input_effect, result.effect_idx = effect_idx; if (input_effect.extra_info.shader) { result.has_envmap = true; - result.envmap_mode = process_draw_mode(*input_effect.extra_info.shader, false); + result.envmap_mode = process_draw_mode(*input_effect.extra_info.shader, false, false); result.envmap_mode.set_ab(true); u32 new_tex = remap_texture(input_effect.extra_info.shader->original_tex, map); ASSERT(result.envmap_mode.get_tcc_enable()); @@ -763,6 +779,12 @@ ConvertedMercEffect convert_merc_effect(const MercEffect& input_effect, result.envmap_mode = mode; result.envmap_mode.set_ab(true); } + + bool use_alpha_blend = false; + if (version == GameVersion::Jak2) { + use_alpha_blend = input_effect.texture_index == 4; // water + } + // full reset of state per effect. // we have no idea what the previous effect draw will be - it might be given to // mercneric. @@ -771,6 +793,8 @@ ConvertedMercEffect convert_merc_effect(const MercEffect& input_effect, MercMemory merc_memories[2]; // double buffered output int memory_buffer_toggle = 0; // which output we're in + int combined_lump4_addr = 0; + for (size_t fi = 0; fi < input_effect.frag_ctrl.size(); fi++) { const auto& frag = input_effect.frag_geo[fi]; const auto& frag_ctrl = input_effect.frag_ctrl[fi]; @@ -792,9 +816,20 @@ ConvertedMercEffect convert_merc_effect(const MercEffect& input_effect, // run the frag. // this will add vertices to the per-effect vertex lists and also update the merc memory // to point to these. + bool can_be_modified = false; + if (fi < input_effect.blend_ctrl.size()) { + can_be_modified = input_effect.blend_ctrl.at(fi).blend_vtx_count > 0; + } + + if (input_effect.effect_bits & kRippleEffectBit) { + can_be_modified = true; + } handle_frag(debug_name, ctrl_header, frag, frag_ctrl, merc_state, result.vertices, - merc_memories[memory_buffer_toggle]); + merc_memories[memory_buffer_toggle], can_be_modified, combined_lump4_addr, fi); + u32 vert_count = frag.lump4_unpacked.size() / 3; + combined_lump4_addr += vert_count; + result.verts_per_frag.push_back(vert_count); // we'll add draws after this draw, but wait to actually populate the index lists until // we've processed all the vertices. @@ -819,7 +854,8 @@ ConvertedMercEffect convert_merc_effect(const MercEffect& input_effect, for (size_t i = 0; i < frag.fp_header.shader_cnt; i++) { const auto& shader = frag.shaders.at(i); // update merc state from shader (will hold over to next fragment, if needed) - merc_state.merc_draw_mode.mode = process_draw_mode(shader, result.has_envmap); + merc_state.merc_draw_mode.mode = + process_draw_mode(shader, result.has_envmap, use_alpha_blend); if (!merc_state.merc_draw_mode.mode.get_tcc_enable()) { ASSERT(false); } @@ -956,6 +992,193 @@ tfrag3::MercVertex convert_vertex(const MercUnpackedVtx& vtx, float xyz_scale) { return out; } +struct VertexSourceInfo { + int combined_lump4; + int frag; + int flump4; +}; + +void create_modifiable_vertex_data( + const std::vector& vtx_mod_flag, + const std::vector& vtx_srcs, + tfrag3::MercModelGroup& out, + size_t first_out_vertex, + size_t first_out_model, + const std::vector>& all_effects) { + ASSERT(vtx_mod_flag.size() + first_out_vertex == out.vertices.size()); + + // we need to be able to modify some vertices at runtime. + // this can be detected vertex-by-vertex + // the plan is to find MercEffects that contain modifiable vertices, and provide an alternate way + // to draw them. In the case where no vertices should be modified, we can fall back to the normal + // merc drawing path. + + // In this modifiable draw path, there will be a list of "fixed draws", which draw vertices that + // cannot be modified. This set is known at build-time. + // The "mod draws" will draw the modifiable vertices. These use the normal index buffer, but + // index into a per-effect modifiable vertex buffer. + + // std::vector fixed_draws, mod_draws; + + // some stats + int num_tris = 0; // all triangles + int mod_tris = 0; // triangles in mod draws + + // loop over models added from this art-group + for (size_t mi = first_out_model; mi < out.models.size(); mi++) { + auto& model = out.models.at(mi); + // loop over "effects" within this model. the pc format merges all fragments in an effect + // together. + + for (size_t ei = 0; ei < model.effects.size(); ei++) { + auto& effect = model.effects[ei]; + + std::vector> inds_per_mod_draw; + + for (const auto& draw : effect.all_draws) { + num_tris += draw.num_triangles; + + // first check to see what's in this draw + bool found_mod = false; + bool found_fixed = false; + for (int i = 0; i < (int)draw.index_count; i++) { + u32 idx = out.indices.at(draw.first_index + i); + if (idx == UINT32_MAX) { + continue; + } + ASSERT(idx >= first_out_vertex); + if (vtx_mod_flag.at(idx - first_out_vertex)) { + found_mod = true; + } else { + found_fixed = true; + } + } + + if (!found_fixed && !found_mod) { + // nothing found at all, bad + ASSERT_NOT_REACHED(); + } else if (found_fixed && !found_mod) { + // only fixed. can just copy the fixed draw + effect.mod.fix_draw.push_back(draw); + } else if (found_mod && !found_fixed) { + // only mod + effect.mod.mod_draw.push_back(draw); + auto& inds_out = inds_per_mod_draw.emplace_back(); + for (u32 i = 0; i < draw.index_count; i++) { + inds_out.push_back(out.indices.at(draw.first_index + i)); + } + mod_tris += draw.num_triangles; + } else { + // it's a mix... + std::vector> strips; + strips.emplace_back(); + for (u32 i = 0; i < draw.index_count; i++) { + u32 val = out.indices.at(draw.first_index + i); + if (val == UINT32_MAX) { + if (!strips.back().empty()) { + strips.emplace_back(); + } + } else { + strips.back().push_back(val); + } + } + + tfrag3::MercDraw mod = draw; + tfrag3::MercDraw fix = draw; + std::vector mod_ind, fix_ind; + for (auto& strip : strips) { + bool strip_has_mod = false; + for (auto ind : strip) { + if (vtx_mod_flag.at(ind - first_out_vertex)) { + strip_has_mod = true; + break; + } + } + if (strip_has_mod) { + mod_ind.insert(mod_ind.end(), strip.begin(), strip.end()); + mod_ind.push_back(UINT32_MAX); + } else { + fix_ind.insert(fix_ind.end(), strip.begin(), strip.end()); + fix_ind.push_back(UINT32_MAX); + } + } + + mod.index_count = mod_ind.size(); + inds_per_mod_draw.push_back(mod_ind); + fix.first_index = out.indices.size(); + fix.index_count = fix_ind.size(); + out.indices.insert(out.indices.end(), fix_ind.begin(), fix_ind.end()); + + effect.mod.mod_draw.push_back(mod); + effect.mod.fix_draw.push_back(fix); + } + } // for draw + + // if there are no modifiable draws, we can't possible modify anything, so not worth + // storing the fixed draws + if (effect.mod.mod_draw.empty()) { + effect.mod.fix_draw.clear(); + } else { + effect.has_mod_draw = true; + // need to set up the vertex buffer for the modifiable draws + // map of original vertex indices to mod buffer index + std::unordered_map vtx_to_mod_vtx; + for (size_t mdi = 0; mdi < effect.mod.mod_draw.size(); mdi++) { + auto& draw = effect.mod.mod_draw[mdi]; + auto& orig_inds = inds_per_mod_draw.at(mdi); + u32 new_first_index = out.indices.size(); + for (auto vidx : orig_inds) { + if (vidx == UINT32_MAX) { + out.indices.push_back(UINT32_MAX); + continue; // strip restart + } + const auto& existing = vtx_to_mod_vtx.find(vidx); + if (existing == vtx_to_mod_vtx.end()) { + // add vertex to mod buffer + auto idx = effect.mod.vertices.size(); + vtx_to_mod_vtx[vidx] = idx; + effect.mod.vertices.push_back(out.vertices.at(vidx)); + auto src = vtx_srcs.at(vidx - first_out_vertex); + ASSERT(src.combined_lump4 < UINT16_MAX); + effect.mod.vertex_lump4_addr.push_back(src.combined_lump4); + u32 frag_idx = src.frag; + if (frag_idx >= effect.mod.fragment_mask.size()) { + effect.mod.fragment_mask.resize(frag_idx + 1); + } + effect.mod.fragment_mask[frag_idx] = true; + out.indices.push_back(idx); + } else { + out.indices.push_back(existing->second); + } + } + draw.first_index = new_first_index; + } + + // splice out masked fragments, the renderer won't index them + const auto& frag_counts = all_effects.at(mi - first_out_model).at(ei).verts_per_frag; + std::unordered_map old_to_new; + u32 old_idx = 0; + u32 new_idx = 0; + for (size_t fi = 0; fi < effect.mod.fragment_mask.size(); fi++) { + if (effect.mod.fragment_mask[fi]) { + for (u32 vi = 0; vi < frag_counts.at(fi); vi++) { + old_to_new[old_idx] = new_idx; + old_idx++; + new_idx++; + } + } else { + old_idx += frag_counts.at(fi); + } + } + effect.mod.expect_vidx_end = new_idx; + for (auto& v : effect.mod.vertex_lump4_addr) { + v = old_to_new.at(v); + } + } + } + } +} + /*! * Top-level merc extraction */ @@ -990,9 +1213,12 @@ void extract_merc(const ObjectFileData& ag_data, } } + size_t first_out_vertex = out.merc_data.vertices.size(); // convert to PC format // first pass, before merging indices u32 first_model = out.merc_data.models.size(); + std::vector vertex_modify_flags; + std::vector vertex_srcs; std::vector>>> indices_temp; // ctrl, effect, draw, vtx for (size_t ci = 0; ci < ctrls.size(); ci++) { indices_temp.emplace_back(); @@ -1002,6 +1228,8 @@ void extract_merc(const ObjectFileData& ag_data, pc_ctrl.name = ctrl.name; pc_ctrl.max_draws = 0; pc_ctrl.max_bones = 0; + pc_ctrl.st_vif_add = ctrl.header.st_vif_add; + pc_ctrl.xyz_scale = ctrl.header.xyz_scale; for (size_t ei = 0; ei < ctrls[ci].effects.size(); ei++) { indices_temp[ci].emplace_back(); @@ -1013,6 +1241,8 @@ void extract_merc(const ObjectFileData& ag_data, u32 first_vertex = out.merc_data.vertices.size(); for (auto& vtx : effect.vertices) { auto cvtx = convert_vertex(vtx, ctrl.header.xyz_scale); + vertex_modify_flags.push_back(vtx.can_be_modified); + vertex_srcs.push_back({vtx.idx_in_combined_lump4, vtx.frag, vtx.flump4}); out.merc_data.vertices.push_back(cvtx); for (int i = 0; i < 3; i++) { pc_ctrl.max_bones = std::max(pc_ctrl.max_bones, (u32)cvtx.mats[i]); @@ -1023,22 +1253,21 @@ void extract_merc(const ObjectFileData& ag_data, std::map draw_mode_dedup; for (auto& draw : effect.draws) { - pc_ctrl.max_draws++; indices_temp[ci][ei].emplace_back(); // find draw to add to, or create a new one const auto& existing = draw_mode_dedup.find(draw.state.merc_draw_mode.as_u64()); tfrag3::MercDraw* pc_draw = nullptr; u64 pc_draw_idx = -1; if (existing == draw_mode_dedup.end()) { - pc_draw_idx = pc_effect.draws.size(); + pc_draw_idx = pc_effect.all_draws.size(); draw_mode_dedup[draw.state.merc_draw_mode.as_u64()] = pc_draw_idx; - pc_draw = &pc_effect.draws.emplace_back(); + pc_draw = &pc_effect.all_draws.emplace_back(); pc_draw->mode = draw.state.merc_draw_mode.mode; pc_draw->tree_tex_id = find_or_add_texture_to_level( out, tex_db, ctrl.name, draw.state.merc_draw_mode.pc_combo_tex_id); } else { pc_draw_idx = existing->second; - pc_draw = &pc_effect.draws.at(pc_draw_idx); + pc_draw = &pc_effect.all_draws.at(pc_draw_idx); } for (auto idx : draw.indices) { @@ -1057,8 +1286,8 @@ void extract_merc(const ObjectFileData& ag_data, auto& pc_ctrl = out.merc_data.models.at(ci + first_model); for (size_t ei = 0; ei < ctrls[ci].effects.size(); ei++) { auto& pc_effect = pc_ctrl.effects.at(ei); - for (size_t di = 0; di < pc_effect.draws.size(); di++) { - auto& pc_draw = pc_effect.draws.at(di); + for (size_t di = 0; di < pc_effect.all_draws.size(); di++) { + auto& pc_draw = pc_effect.all_draws.at(di); auto& inds = indices_temp[ci][ei][di]; pc_draw.num_triangles = clean_up_vertex_indices(inds); pc_draw.first_index = out.merc_data.indices.size(); @@ -1067,5 +1296,20 @@ void extract_merc(const ObjectFileData& ag_data, } } } + + create_modifiable_vertex_data(vertex_modify_flags, vertex_srcs, out.merc_data, first_out_vertex, + first_model, all_effects); + + // compute max draws + for (u32 mi = first_model; mi < out.merc_data.models.size(); mi++) { + auto& model = out.merc_data.models[mi]; + model.max_draws = 0; + for (auto& e : model.effects) { + model.max_draws += e.all_draws.size(); + if (e.has_mod_draw) { + model.max_draws += e.mod.mod_draw.size() + e.mod.fix_draw.size(); + } + } + } } } // namespace decompiler diff --git a/decompiler/level_extractor/fr3_to_gltf.cpp b/decompiler/level_extractor/fr3_to_gltf.cpp index 21f0f459a6..ffeb7cc0eb 100644 --- a/decompiler/level_extractor/fr3_to_gltf.cpp +++ b/decompiler/level_extractor/fr3_to_gltf.cpp @@ -64,7 +64,7 @@ void unstrip_merc_draws(const std::vector& stripped_indices, for (auto& effect : model.effects) { auto& effect_dts = model_dts.emplace_back(); auto& effect_dtc = model_dtc.emplace_back(); - for (auto& draw : effect.draws) { + for (auto& draw : effect.all_draws) { effect_dts.push_back(unstripped.size()); for (size_t i = 2; i < draw.index_count; i++) { @@ -690,8 +690,8 @@ void add_merc(const tfrag3::Level& level, for (size_t effect_idx = 0; effect_idx < mmodel.effects.size(); effect_idx++) { const auto& effect = mmodel.effects[effect_idx]; - for (size_t draw_idx = 0; draw_idx < effect.draws.size(); draw_idx++) { - const auto& draw = effect.draws[draw_idx]; + for (size_t draw_idx = 0; draw_idx < effect.all_draws.size(); draw_idx++) { + const auto& draw = effect.all_draws[draw_idx]; auto& prim = mesh.primitives.emplace_back(); prim.material = add_material_for_tex(level, model, draw.tree_tex_id, tex_image_map, draw.mode); diff --git a/game/graphics/opengl_renderer/foreground/Merc2.cpp b/game/graphics/opengl_renderer/foreground/Merc2.cpp index bd5801140d..87e8b1b61b 100644 --- a/game/graphics/opengl_renderer/foreground/Merc2.cpp +++ b/game/graphics/opengl_renderer/foreground/Merc2.cpp @@ -1,20 +1,69 @@ #include "Merc2.h" +#include "common/global_profiler/GlobalProfiler.h" + #include "game/graphics/opengl_renderer/background/background_common.h" #include "third-party/imgui/imgui.h" +/* Merc 2 renderer: + The merc2 renderer is the main "foreground" renderer, which draws characters, collectables, + and even some water. + + The PC format renderer does the usual tricks of buffering stuff head of time as much as possible. + The main trick here is to buffer up draws and upload "bones" (skinning matrix) for many draws all + at once. + + The other tricky part is "mod vertices", which may be modified by the game. + We know ahead of time which vertices could be modified, and have a way to upload only those + vertices. + + Each "merc model" corresponds to a merc-ctrl in game. There's one merc-ctrl per LOD of an + art-group. So generally, this will be something like "jak" or "orb" or "some enemy". + + Each model is made up of "effect"s. There are a number of per-effect settings, like environment + mapping. Generally, the purpose of an "effect" is to divide up a model into parts that should be + rendered with a different configuration. + + Within each model, there are fragments. These correspond to how much data can be uploaded to VU1 + memory. For the most part, fragments are not considered by the PC renderer. The only exception is + updating vertices - we must read the data from the game, which is stored in fragments. + + Per level, there is an FR3 file loaded by the loader. Each merc renderer can access multiple + levels. +*/ + +/*! + * Remaining ideas for optimization: + * - port blerc to C++, do it in the rendering thread and avoid the lock. + * - combine envmap draws per effect (might require some funky indexing stuff, or multidraw) + * - smaller vertex formats for mod-vertex + * - AVX version of vertex conversion math + * - eliminate the "copy" step of vertex modification + * - batch uploading the vertex modification data + */ + +std::mutex g_merc_data_mutex; + Merc2::Merc2(const std::string& name, int my_id) : BucketRenderer(name, my_id) { + // Set up main vertex array. This will point to the data stored in the .FR3 level file, and will + // be uploaded to the GPU by the Loader. glGenVertexArrays(1, &m_vao); glBindVertexArray(m_vao); + // Bone buffer to store skinning matrices for multiple draws glGenBuffers(1, &m_bones_buffer); glBindBuffer(GL_UNIFORM_BUFFER, m_bones_buffer); + + // zero initialize the bone buffer. std::vector temp(MAX_SHADER_BONE_VECTORS * sizeof(math::Vector4f)); glBufferData(GL_UNIFORM_BUFFER, MAX_SHADER_BONE_VECTORS * sizeof(math::Vector4f), temp.data(), GL_DYNAMIC_DRAW); glBindBuffer(GL_UNIFORM_BUFFER, 0); + // annoyingly, glBindBufferRange can have alignment restrictions that vary per platform. + // the GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT gives us the minimum alignment for views into the bone + // buffer. The bone buffer stores things per-16-byte "quadword". GLint val; glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &val); if (val <= 16) { @@ -28,82 +77,432 @@ Merc2::Merc2(const std::string& name, int my_id) : BucketRenderer(name, my_id) { } } + // initialize draw buffers, these will store lists of draws to flush. for (int i = 0; i < MAX_LEVELS; i++) { auto& draws = m_level_draw_buckets.emplace_back(); draws.draws.resize(MAX_DRAWS_PER_LEVEL); draws.envmap_draws.resize(MAX_ENVMAP_DRAWS_PER_LEVEL); } + + m_mod_vtx_temp.resize(MAX_MOD_VTX); + m_mod_vtx_unpack_temp.resize(MAX_MOD_VTX * 2); + + for (auto& x : m_effect_debug_mask) { + x = true; + } } +Merc2::~Merc2() { + for (auto& x : m_mod_vtx_buffers) { + glDeleteBuffers(1, &x.vertex); + glDeleteVertexArrays(1, &x.vao); + } + + glDeleteBuffers(1, &m_bones_buffer); + glDeleteVertexArrays(1, &m_vao); +} + +// We can run into a problem where adding a PC model would overflow the +// preallocated draw/bone buffers. +// So we break this part into two functions: +// - init_pc_model, which doesn't allocate bones/draws + /*! - * Handle the merc renderer switching to a different model. + * Setup draws for a model, given the DMA data generated by the GOAL code. */ -void Merc2::init_pc_model(const DmaTransfer& setup, SharedRenderState* render_state) { +void Merc2::handle_pc_model(const DmaTransfer& setup, + SharedRenderState* render_state, + ScopedProfilerNode& proff) { + auto p = scoped_prof("init-pc"); + + // the format of the data is: // ;; name (128 char, 8 qw) // ;; lights (7 qw x 1) // ;; matrix slot string (128 char, 8 qw) // ;; matrices (7 qw x N) // ;; flags (num-effects, effect-alpha-ignore, effect-disable) // ;; fades (u32 x N), padding to qw aligned + // ;; pointers (u32 x N), padding - // Part 1: name + // Get the name const u8* input_data = setup.data; + ASSERT(strlen((const char*)input_data) < 127); char name[128]; strcpy(name, (const char*)setup.data); - m_current_model = render_state->loader->get_merc_model(name); input_data += 128; - // Part 2: lights - memcpy(&m_current_lights, input_data, sizeof(VuLights)); + // Look up the model by name in the loader. + // This will return a reference to this model's data, plus a reference to the level's data + // for stuff shared between models of the same level + auto model_ref = render_state->loader->get_merc_model(name); + if (!model_ref) { + // it can fail, if the game is faster than the loader. In this case, we just don't draw. + m_stats.num_missing_models++; + return; + } + + // next, we need to check if we have enough room to draw this effect. + const LevelData* lev = model_ref->level; + const tfrag3::MercModel* model = model_ref->model; + + // each model uses only 1 light. + if (m_next_free_light >= MAX_LIGHTS) { + fmt::print("MERC2 out of lights, consider increasing MAX_LIGHTS\n"); + flush_draw_buckets(render_state, proff); + } + + // models use many bones. First check if we need to flush: + int bone_count = model->max_bones + 1; + if (m_next_free_bone_vector + m_opengl_buffer_alignment + bone_count * 8 > + MAX_SHADER_BONE_VECTORS) { + fmt::print("MERC2 out of bones, consider increasing MAX_SHADER_BONE_VECTORS\n"); + flush_draw_buckets(render_state, proff); + } + + // also sanity check that we have enough to draw the model + if (m_opengl_buffer_alignment + bone_count * 8 > MAX_SHADER_BONE_VECTORS) { + fmt::print( + "MERC2 doesn't have enough bones to draw a model, increase MAX_SHADER_BONE_VECTORS\n"); + ASSERT_NOT_REACHED(); + } + + // next, we need to find a bucket that holds draws for this level (will have the right buffers + // bound for drawing) + LevelDrawBucket* lev_bucket = nullptr; + for (u32 i = 0; i < m_next_free_level_bucket; i++) { + if (m_level_draw_buckets[i].level == lev) { + lev_bucket = &m_level_draw_buckets[i]; + break; + } + } + + if (!lev_bucket) { + // no existing bucket, allocate a new one. + if (m_next_free_level_bucket >= m_level_draw_buckets.size()) { + // out of room, flush + // fmt::print("MERC2 out of levels, consider increasing MAX_LEVELS\n"); + flush_draw_buckets(render_state, proff); + } + // alloc a new one + lev_bucket = &m_level_draw_buckets[m_next_free_level_bucket++]; + lev_bucket->reset(); + lev_bucket->level = lev; + } + + // next check draws: + if (lev_bucket->next_free_draw + model->max_draws >= lev_bucket->draws.size()) { + // out of room, flush + fmt::print("MERC2 out of draws, consider increasing MAX_DRAWS_PER_LEVEL\n"); + flush_draw_buckets(render_state, proff); + if (model->max_draws >= lev_bucket->draws.size()) { + ASSERT_NOT_REACHED_MSG("MERC2 draw buffer not big enough"); + } + } + + // same for envmap draws + if (lev_bucket->next_free_envmap_draw + model->max_draws >= lev_bucket->envmap_draws.size()) { + // out of room, flush + fmt::print("MERC2 out of envmap draws, consider increasing MAX_ENVMAP_DRAWS_PER_LEVEL\n"); + flush_draw_buckets(render_state, proff); + if (model->max_draws >= lev_bucket->envmap_draws.size()) { + ASSERT_NOT_REACHED_MSG("MERC2 envmap draw buffer not big enough"); + } + } + + // Next part of input data is the lights + VuLights current_lights; + memcpy(¤t_lights, input_data, sizeof(VuLights)); input_data += sizeof(VuLights); - // Part 3: matrix slot string + // Next part is the matrix slot string. The game sends us a bunch of bone matrices, + // but they may not be in order, or include all bones. The matrix slot string tells + // us which bones go where. (the game doesn't go in order because it follows the merc format) + ShaderMercMat skel_matrix_buffer[MAX_SKEL_BONES]; auto* matrix_array = (const u32*)(input_data + 128); int i; for (i = 0; i < 128; i++) { - if (input_data[i] == 0xff) { + if (input_data[i] == 0xff) { // indicates end of string. break; } + // read goal addr of matrix (matrix data isn't known at merc dma time, bones runs after) u32 addr; memcpy(&addr, &matrix_array[i * 4], 4); const u8* real_addr = setup.data - setup.data_offset + addr; - memcpy(&m_skel_matrix_buffer[input_data[i]], real_addr, sizeof(MercMat)); + ASSERT(input_data[i] < MAX_SKEL_BONES); + // get the matrix data + memcpy(&skel_matrix_buffer[input_data[i]], real_addr, sizeof(MercMat)); } input_data += 128 + 16 * i; - // Part 4: flags + // Next part is some flags auto* flags = (const u32*)input_data; - int num_effects = flags[0]; - m_current_ignore_alpha_bits = flags[1]; - m_current_effect_enable_bits = flags[2]; + int num_effects = flags[0]; // mostly just a sanity check + ASSERT(num_effects < kMaxEffect); + u32 current_ignore_alpha_bits = flags[1]; // shader settings + u32 current_effect_enable_bits = flags[2]; // mask for game to disable an effect + bool model_uses_mod = flags[3]; // if we should update vertices from game. input_data += 16; - // Part 5: fades + // Next is "fade data", indicating the color/intensity of envmap effect + u8 fade_buffer[4 * kMaxEffect]; for (int ei = 0; ei < num_effects; ei++) { for (int j = 0; j < 4; j++) { - m_fade_buffer[ei * 4 + j] = input_data[ei * 4 + j]; + fade_buffer[ei * 4 + j] = input_data[ei * 4 + j]; + } + } + input_data += (((num_effects * 4) + 15) / 16) * 16; + + // Next is pointers to merc data, needed so we can update vertices + + // will hold opengl buffers for the updated vertices + ModBuffers mod_opengl_buffers[kMaxEffect]; + if (model_uses_mod) { // only if we've enabled, this path is slow. + auto p = scoped_prof("update-verts"); + + // loop over effects. Mod vertices are done per effect (possibly a bad idea?) + for (int ei = 0; ei < num_effects; ei++) { + const auto& effect = model_ref->model->effects[ei]; + // some effects might have no mod draw info, and no modifiable vertices + if (effect.mod.mod_draw.empty()) { + continue; + } + + prof().begin_event("start1"); + // grab opengl buffer + auto opengl_buffers = alloc_mod_vtx_buffer(model_ref->level); + mod_opengl_buffers[ei] = opengl_buffers; + + // check that we have enough room for the finished thing. + if (effect.mod.vertices.size() > MAX_MOD_VTX) { + fmt::print("More mod vertices than MAX_MOD_VTX. {} > {}\n", effect.mod.vertices.size(), + MAX_MOD_VTX); + ASSERT_NOT_REACHED(); + } + + // check that we have enough room for unpack + if (effect.mod.expect_vidx_end > MAX_MOD_VTX) { + fmt::print("More mod vertices (temp) than MAX_MOD_VTX. {} > {}\n", + effect.mod.expect_vidx_end, MAX_MOD_VTX); + ASSERT_NOT_REACHED(); + } + + // start with the "correct" vertices from the model data: + memcpy(m_mod_vtx_temp.data(), effect.mod.vertices.data(), + sizeof(tfrag3::MercVertex) * effect.mod.vertices.size()); + + // get pointers to the fragment and fragment control data + u32 goal_addr; + memcpy(&goal_addr, input_data + 4 * ei, 4); + const u8* ee0 = setup.data - setup.data_offset; + const u8* merc_effect = ee0 + goal_addr; + u16 frag_cnt; + memcpy(&frag_cnt, merc_effect + 18, 2); + ASSERT(frag_cnt >= effect.mod.fragment_mask.size()); + u32 frag_goal; + memcpy(&frag_goal, merc_effect, 4); + u32 frag_ctrl_goal; + memcpy(&frag_ctrl_goal, merc_effect + 4, 4); + const u8* frag = ee0 + frag_goal; + const u8* frag_ctrl = ee0 + frag_ctrl_goal; + + // loop over frags + u32 vidx = 0; + // u32 st_vif_add = model->st_vif_add; + float xyz_scale = model->xyz_scale; + prof().end_event(); + { + // we're going to look at data that the game may be modifying. + // in the original game, they didn't have any lock, but I think that the + // scratchpad access from the EE would effectively block the VIF1 DMA, so you'd + // hopefully never get a partially updated model (which causes obvious holes). + // this lock is not ideal, and can block the rendering thread while blerc_execute runs, + // which can take up to 2ms on really blerc-heavy scenes + std::unique_lock lk(g_merc_data_mutex); + int frags_done = 0; + auto p = scoped_prof("vert-math"); + + // loop over fragments + for (u32 fi = 0; fi < effect.mod.fragment_mask.size(); fi++) { + frags_done++; + u8 mat_xfer_count = frag_ctrl[3]; + + // we create a mask of fragments to skip because they have no vertices. + // the indexing data assumes that we skip the other fragments. + if (effect.mod.fragment_mask[fi]) { + // read fragment metadata + u8 unsigned_four_count = frag_ctrl[0]; + u8 lump_four_count = frag_ctrl[1]; + u32 mm_qwc_off = frag[10]; + float float_offsets[3]; + memcpy(float_offsets, &frag[mm_qwc_off * 16], 12); + u32 my_u4_count = ((unsigned_four_count + 3) / 4) * 16; + u32 my_l4_count = my_u4_count + ((lump_four_count + 3) / 4) * 16; + + // loop over vertices in the fragment and unpack + for (u32 w = my_u4_count / 4; w < (my_l4_count / 4) - 2; w += 3) { + // just want positions for now. + u32 q0w = 0x4b010000 + frag[w * 4 + (0 * 4) + 3]; + u32 q1w = 0x4b010000 + frag[w * 4 + (1 * 4) + 3]; + u32 q2w = 0x4b010000 + frag[w * 4 + (2 * 4) + 3]; + + // and maybe normals + u32 q0z = 0x47800000 + frag[w * 4 + (0 * 4) + 2]; + u32 q1z = 0x47800000 + frag[w * 4 + (1 * 4) + 2]; + u32 q2z = 0x47800000 + frag[w * 4 + (2 * 4) + 2]; + + auto* pos_array = m_mod_vtx_unpack_temp[vidx].pos; + memcpy(&pos_array[0], &q0w, 4); + memcpy(&pos_array[1], &q1w, 4); + memcpy(&pos_array[2], &q2w, 4); + pos_array[0] += float_offsets[0]; + pos_array[1] += float_offsets[1]; + pos_array[2] += float_offsets[2]; + pos_array[0] *= xyz_scale; + pos_array[1] *= xyz_scale; + pos_array[2] *= xyz_scale; + + auto* nrm_array = m_mod_vtx_unpack_temp[vidx].nrm; + memcpy(&nrm_array[0], &q0z, 4); + memcpy(&nrm_array[1], &q1z, 4); + memcpy(&nrm_array[2], &q2z, 4); + nrm_array[0] += -65537; + nrm_array[1] += -65537; + nrm_array[2] += -65537; + vidx++; + } + } + + // next control + frag_ctrl += 4 + 2 * mat_xfer_count; + + // next frag + u32 mm_qwc_count = frag[11]; + frag += mm_qwc_count * 16; + } + + // sanity check + if (effect.mod.expect_vidx_end != vidx) { + fmt::print("---------- BAD {}/{}\n", effect.mod.expect_vidx_end, vidx); + ASSERT(false); + } + } + + { + auto pp = scoped_prof("copy"); + // now copy the data in merc original vertex order to the output. + for (u32 vi = 0; vi < effect.mod.vertices.size(); vi++) { + u32 addr = effect.mod.vertex_lump4_addr[vi]; + if (addr < vidx) { + memcpy(&m_mod_vtx_temp[vi], &m_mod_vtx_unpack_temp[addr], 32); + } + } + } + + // and upload to GPU + m_stats.num_uploads++; + m_stats.num_upload_bytes += effect.mod.vertices.size() * sizeof(tfrag3::MercVertex); + { + auto pp = scoped_prof("update-verts-upload"); + glBindBuffer(GL_ARRAY_BUFFER, opengl_buffers.vertex); + glBufferData(GL_ARRAY_BUFFER, effect.mod.vertices.size() * sizeof(tfrag3::MercVertex), + m_mod_vtx_temp.data(), GL_DYNAMIC_DRAW); + } } } - if (m_current_model) { - m_stats.num_models++; - for (const auto& effect : m_current_model->model->effects) { - bool envmap = effect.has_envmap; - m_stats.num_effects++; - m_stats.num_predicted_draws += effect.draws.size(); + // stats + m_stats.num_models++; + for (const auto& effect : model_ref->model->effects) { + bool envmap = effect.has_envmap; + m_stats.num_effects++; + m_stats.num_predicted_draws += effect.all_draws.size(); + if (envmap) { + m_stats.num_envmap_effects++; + m_stats.num_predicted_draws += effect.all_draws.size(); + } + for (const auto& draw : effect.all_draws) { + m_stats.num_predicted_tris += draw.num_triangles; if (envmap) { - m_stats.num_envmap_effects++; - m_stats.num_predicted_draws += effect.draws.size(); - } - for (const auto& draw : effect.draws) { m_stats.num_predicted_tris += draw.num_triangles; - if (envmap) { - m_stats.num_predicted_tris += draw.num_triangles; - } } } - } else { - m_stats.num_missing_models++; + } + + if (m_debug_mode) { + auto& d = m_debug.model_list.emplace_back(); + d.name = model->name; + d.level = model_ref->level->level->level_name; + for (auto& e : model->effects) { + auto& de = d.effects.emplace_back(); + de.envmap = e.has_envmap; + de.envmap_mode = e.envmap_mode; + for (auto& draw : e.all_draws) { + auto& dd = de.draws.emplace_back(); + dd.mode = draw.mode; + dd.num_tris = draw.num_triangles; + } + } + } + + // allocate bones in shared bone buffer to be sent to GPU at flush-time + u32 first_bone = alloc_bones(bone_count, skel_matrix_buffer); + + // allocate lights + u32 lights = alloc_lights(current_lights); + + // loop over effects, creating draws for each + for (size_t ei = 0; ei < model->effects.size(); ei++) { + // game has disabled it? + if (!(current_effect_enable_bits & (1 << ei))) { + continue; + } + + // imgui menu disabled it? + if (!m_effect_debug_mask[ei]) { + continue; + } + + u8 ignore_alpha = (current_ignore_alpha_bits & (1 << ei)); + auto& effect = model->effects[ei]; + + bool should_envmap = effect.has_envmap; + bool should_mod = model_uses_mod && effect.has_mod_draw; + + if (should_mod) { + // draw as two parts, fixed and mod + + // do fixed draws: + for (auto& fdraw : effect.mod.fix_draw) { + alloc_normal_draw(fdraw, ignore_alpha, lev_bucket, first_bone, lights); + if (should_envmap) { + try_alloc_envmap_draw(fdraw, effect.envmap_mode, effect.envmap_texture, lev_bucket, + fade_buffer + 4 * ei, first_bone, lights); + } + } + + // do mod draws + for (auto& mdraw : effect.mod.mod_draw) { + auto n = alloc_normal_draw(mdraw, ignore_alpha, lev_bucket, first_bone, lights); + // modify the draw, set the mod flag and point it to the opengl buffer + n->flags |= MOD_VTX; + n->mod_vtx_buffer = mod_opengl_buffers[ei]; + if (should_envmap) { + auto e = try_alloc_envmap_draw(mdraw, effect.envmap_mode, effect.envmap_texture, + lev_bucket, fade_buffer + 4 * ei, first_bone, lights); + e->flags |= MOD_VTX; + e->mod_vtx_buffer = mod_opengl_buffers[ei]; + } + } + } else { + // no mod, just do all_draws + for (auto& draw : effect.all_draws) { + if (should_envmap) { + try_alloc_envmap_draw(draw, effect.envmap_mode, effect.envmap_texture, lev_bucket, + fade_buffer + 4 * ei, first_bone, lights); + } + alloc_normal_draw(draw, ignore_alpha, lev_bucket, first_bone, lights); + } + } } } @@ -118,6 +517,30 @@ void Merc2::draw_debug_window() { ImGui::Text("EEffects : %d", m_stats.num_envmap_effects); ImGui::Text("ETris : %d", m_stats.num_envmap_tris); + + ImGui::Text("Uploads : %d", m_stats.num_uploads); + ImGui::Text("Upload kB: %d", m_stats.num_upload_bytes / 1024); + + ImGui::Checkbox("Debug", &m_debug_mode); + + if (m_debug_mode) { + for (int i = 0; i < kMaxEffect; i++) { + ImGui::Checkbox(fmt::format("e{:02d}", i).c_str(), &m_effect_debug_mask[i]); + } + + for (const auto& model : m_debug.model_list) { + if (ImGui::TreeNode(model.name.c_str())) { + ImGui::Text("Level: %s\n", model.level.c_str()); + for (const auto& e : model.effects) { + for (const auto& d : e.draws) { + ImGui::Text("%s", d.mode.to_string().c_str()); + } + ImGui::Separator(); + } + ImGui::TreePop(); + } + } + } } void Merc2::init_shaders(ShaderLibrary& shaders) { @@ -175,6 +598,9 @@ void Merc2::switch_to_emerc(SharedRenderState* render_state) { */ void Merc2::render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) { m_stats = {}; + if (m_debug_mode) { + m_debug = {}; + } // skip if disabled if (!m_enabled) { @@ -183,16 +609,19 @@ void Merc2::render(DmaFollower& dma, SharedRenderState* render_state, ScopedProf } return; } - m_current_model = std::nullopt; switch_to_merc2(render_state); - // iterate through the dma chain, filling buckets - handle_all_dma(dma, render_state, prof); + { + auto pp = scoped_prof("handle-all-dma"); + // iterate through the dma chain, filling buckets + handle_all_dma(dma, render_state, prof); + } - // flush model data to buckets - flush_pending_model(render_state, prof); - // flush buckets to draws - flush_draw_buckets(render_state, prof); + { + auto pp = scoped_prof("flush-buckets"); + // flush buckets to draws + flush_draw_buckets(render_state, prof); + } } u32 Merc2::alloc_lights(const VuLights& lights) { @@ -367,8 +796,8 @@ void Merc2::handle_merc_chain(DmaFollower& dma, } while (init.vifcode1().kind == VifCode::Kind::PC_PORT) { - flush_pending_model(render_state, prof); - init_pc_model(init, render_state); + // flush_pending_model(render_state, prof); + handle_pc_model(init, render_state, prof); for (int i = 0; i < skip_count; i++) { auto link = dma.read_and_advance(); ASSERT(link.vifcode0().kind == VifCode::Kind::NOP); @@ -393,7 +822,7 @@ void Merc2::handle_merc_chain(DmaFollower& dma, * Queue up some bones to be included in the bone buffer. * Returns the index of the first bone vector. */ -u32 Merc2::alloc_bones(int count) { +u32 Merc2::alloc_bones(int count, ShaderMercMat* data) { u32 first_bone_vector = m_next_free_bone_vector; ASSERT(count * 8 + first_bone_vector <= MAX_SHADER_BONE_VECTORS); @@ -402,7 +831,7 @@ u32 Merc2::alloc_bones(int count) { // iterate over each bone we need for (int i = 0; i < count; i++) { - auto& skel_mat = m_skel_matrix_buffer[i]; + auto& skel_mat = data[i]; auto* shader_mat = &m_shader_bone_vector_buffer[m_next_free_bone_vector]; int bv = 0; @@ -426,198 +855,147 @@ u32 Merc2::alloc_bones(int count) { ASSERT(first_bone_vector + count * 8 <= m_next_free_bone_vector); return first_bone_vector; } -/*! - * Flush a model to draw buckets - */ -void Merc2::flush_pending_model(SharedRenderState* render_state, ScopedProfilerNode& prof) { - if (!m_current_model) { - return; + +Merc2::ModBuffers Merc2::alloc_mod_vtx_buffer(const LevelData* lev) { + if (m_next_mod_vtx_buffer >= m_mod_vtx_buffers.size()) { + GLuint b; + glGenBuffers(1, &b); + GLuint vao; + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + glBindBuffer(GL_ARRAY_BUFFER, b); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, lev->merc_indices); + setup_merc_vao(); + m_mod_vtx_buffers.push_back({vao, b}); } + return m_mod_vtx_buffers[m_next_mod_vtx_buffer++]; +} - const LevelData* lev = m_current_model->level; - const tfrag3::MercModel* model = m_current_model->model; - - int bone_count = model->max_bones + 1; - - if (m_next_free_light >= MAX_LIGHTS) { - fmt::print("MERC2 out of lights, consider increasing MAX_LIGHTS\n"); - flush_draw_buckets(render_state, prof); - } - - if (m_next_free_bone_vector + m_opengl_buffer_alignment + bone_count * 8 > - MAX_SHADER_BONE_VECTORS) { - fmt::print("MERC2 out of bones, consider increasing MAX_SHADER_BONE_VECTORS\n"); - flush_draw_buckets(render_state, prof); - } - - // find a level bucket - LevelDrawBucket* lev_bucket = nullptr; - for (u32 i = 0; i < m_next_free_level_bucket; i++) { - if (m_level_draw_buckets[i].level == lev) { - lev_bucket = &m_level_draw_buckets[i]; +Merc2::Draw* Merc2::try_alloc_envmap_draw(const tfrag3::MercDraw& mdraw, + const DrawMode& envmap_mode, + u32 envmap_texture, + LevelDrawBucket* lev_bucket, + const u8* fade, + u32 first_bone, + u32 lights) { + bool nonzero_fade = false; + for (int i = 0; i < 4; i++) { + if (fade[i]) { + nonzero_fade = true; break; } } - - if (!lev_bucket) { - // no existing bucket - if (m_next_free_level_bucket >= m_level_draw_buckets.size()) { - // out of room, flush - // fmt::print("MERC2 out of levels, consider increasing MAX_LEVELS\n"); - flush_draw_buckets(render_state, prof); - // and retry the whole thing. - flush_pending_model(render_state, prof); - return; - } - // alloc a new one - lev_bucket = &m_level_draw_buckets[m_next_free_level_bucket++]; - lev_bucket->reset(); - lev_bucket->level = lev; + if (!nonzero_fade) { + return nullptr; } - if (lev_bucket->next_free_draw + model->max_draws >= lev_bucket->draws.size()) { - // out of room, flush - fmt::print("MERC2 out of draws, consider increasing MAX_DRAWS_PER_LEVEL\n"); - flush_draw_buckets(render_state, prof); - // and retry the whole thing. - flush_pending_model(render_state, prof); - return; + Draw* draw = &lev_bucket->envmap_draws[lev_bucket->next_free_envmap_draw++]; + draw->flags = 0; + draw->first_index = mdraw.first_index; + draw->index_count = mdraw.index_count; + draw->mode = envmap_mode; + draw->texture = envmap_texture; + draw->first_bone = first_bone; + draw->light_idx = lights; + draw->num_triangles = mdraw.num_triangles; + for (int i = 0; i < 4; i++) { + draw->fade[i] = fade[i]; } + return draw; +} - if (lev_bucket->next_free_envmap_draw + model->max_draws >= lev_bucket->envmap_draws.size()) { - // out of room, flush - fmt::print("MERC2 out of envmap draws, consider increasing MAX_ENVMAP_DRAWS_PER_LEVEL\n"); - // or, use a more accurate max_draws for envmap. - flush_draw_buckets(render_state, prof); - // and retry the whole thing. - flush_pending_model(render_state, prof); - return; +Merc2::Draw* Merc2::alloc_normal_draw(const tfrag3::MercDraw& mdraw, + bool ignore_alpha, + LevelDrawBucket* lev_bucket, + u32 first_bone, + u32 lights) { + Draw* draw = &lev_bucket->draws[lev_bucket->next_free_draw++]; + draw->flags = 0; + draw->first_index = mdraw.first_index; + draw->index_count = mdraw.index_count; + draw->mode = mdraw.mode; + draw->texture = mdraw.tree_tex_id; + draw->first_bone = first_bone; + draw->light_idx = lights; + draw->num_triangles = mdraw.num_triangles; + if (ignore_alpha) { + draw->flags |= IGNORE_ALPHA; } - - u32 first_bone = alloc_bones(bone_count); - - // allocate lights - u32 lights = alloc_lights(m_current_lights); - // - for (size_t ei = 0; ei < model->effects.size(); ei++) { - if (!(m_current_effect_enable_bits & (1 << ei))) { - continue; - } - - u8 ignore_alpha = (m_current_ignore_alpha_bits & (1 << ei)); - auto& effect = model->effects[ei]; - if (effect.has_envmap) { - bool nonzero_fade = false; - for (int i = 0; i < 4; i++) { - if (m_fade_buffer[4 * ei + i]) { - nonzero_fade = true; - break; - } - } - if (nonzero_fade) { - for (auto& mdraw : effect.draws) { - Draw* draw = &lev_bucket->envmap_draws[lev_bucket->next_free_envmap_draw++]; - draw->first_index = mdraw.first_index; - draw->index_count = mdraw.index_count; - draw->mode = effect.envmap_mode; - draw->texture = effect.envmap_texture; - draw->first_bone = first_bone; - draw->light_idx = lights; - draw->num_triangles = mdraw.num_triangles; - draw->ignore_alpha = false; - for (int i = 0; i < 4; i++) { - draw->fade[i] = m_fade_buffer[4 * ei + i]; - } - } - } - } - for (auto& mdraw : effect.draws) { - Draw* draw = &lev_bucket->draws[lev_bucket->next_free_draw++]; - draw->first_index = mdraw.first_index; - draw->index_count = mdraw.index_count; - draw->mode = mdraw.mode; - draw->texture = mdraw.tree_tex_id; - draw->first_bone = first_bone; - draw->light_idx = lights; - draw->num_triangles = mdraw.num_triangles; - draw->ignore_alpha = ignore_alpha; - for (int i = 0; i < 4; i++) { - draw->fade[i] = 0; - } - } + for (int i = 0; i < 4; i++) { + draw->fade[i] = 0; } + return draw; +} - m_current_model = std::nullopt; +void Merc2::setup_merc_vao() { + glEnable(GL_PRIMITIVE_RESTART); + glPrimitiveRestartIndex(UINT32_MAX); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glEnableVertexAttribArray(4); + glEnableVertexAttribArray(5); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_GEQUAL); + + glVertexAttribPointer(0, // location 0 in the shader + 3, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::MercVertex), // stride + (void*)offsetof(tfrag3::MercVertex, pos) // offset (0) + ); + + glVertexAttribPointer(1, // location 1 in the + 3, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::MercVertex), // stride + (void*)offsetof(tfrag3::MercVertex, normal[0]) // offset (0) + ); + + glVertexAttribPointer(2, // location 1 in the + 3, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::MercVertex), // stride + (void*)offsetof(tfrag3::MercVertex, weights[0]) // offset (0) + ); + + glVertexAttribPointer(3, // location 1 in the shader + 2, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::MercVertex), // stride + (void*)offsetof(tfrag3::MercVertex, st[0]) // offset (0) + ); + + glVertexAttribPointer(4, // location 1 in the shader + 4, // 3 values per vert + GL_UNSIGNED_BYTE, // floats + GL_TRUE, // normalized + sizeof(tfrag3::MercVertex), // stride + (void*)offsetof(tfrag3::MercVertex, rgba[0]) // offset (0) + ); + + glVertexAttribIPointer(5, // location 0 in the + 4, // 3 floats per vert + GL_UNSIGNED_BYTE, // u8's + sizeof(tfrag3::MercVertex), // + (void*)offsetof(tfrag3::MercVertex, mats[0]) // offset in array + ); } void Merc2::flush_draw_buckets(SharedRenderState* render_state, ScopedProfilerNode& prof) { m_stats.num_draw_flush++; - for (u32 li = 0; li < m_next_free_level_bucket; li++) { const auto& lev_bucket = m_level_draw_buckets[li]; const auto* lev = lev_bucket.level; glBindVertexArray(m_vao); glBindBuffer(GL_ARRAY_BUFFER, lev->merc_vertices); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, lev->merc_indices); - - glEnable(GL_PRIMITIVE_RESTART); - glPrimitiveRestartIndex(UINT32_MAX); - glEnableVertexAttribArray(0); - glEnableVertexAttribArray(1); - glEnableVertexAttribArray(2); - glEnableVertexAttribArray(3); - glEnableVertexAttribArray(4); - glEnableVertexAttribArray(5); - glEnable(GL_DEPTH_TEST); - glDepthFunc(GL_GEQUAL); - - glVertexAttribPointer(0, // location 0 in the shader - 3, // 3 values per vert - GL_FLOAT, // floats - GL_FALSE, // normalized - sizeof(tfrag3::MercVertex), // stride - (void*)offsetof(tfrag3::MercVertex, pos) // offset (0) - ); - - glVertexAttribPointer(1, // location 1 in the - 3, // 3 values per vert - GL_FLOAT, // floats - GL_FALSE, // normalized - sizeof(tfrag3::MercVertex), // stride - (void*)offsetof(tfrag3::MercVertex, normal[0]) // offset (0) - ); - - glVertexAttribPointer(2, // location 1 in the - 3, // 3 values per vert - GL_FLOAT, // floats - GL_FALSE, // normalized - sizeof(tfrag3::MercVertex), // stride - (void*)offsetof(tfrag3::MercVertex, weights[0]) // offset (0) - ); - - glVertexAttribPointer(3, // location 1 in the shader - 2, // 3 values per vert - GL_FLOAT, // floats - GL_FALSE, // normalized - sizeof(tfrag3::MercVertex), // stride - (void*)offsetof(tfrag3::MercVertex, st[0]) // offset (0) - ); - - glVertexAttribPointer(4, // location 1 in the shader - 4, // 3 values per vert - GL_UNSIGNED_BYTE, // floats - GL_TRUE, // normalized - sizeof(tfrag3::MercVertex), // stride - (void*)offsetof(tfrag3::MercVertex, rgba[0]) // offset (0) - ); - - glVertexAttribIPointer(5, // location 0 in the - 4, // 3 floats per vert - GL_UNSIGNED_BYTE, // u8's - sizeof(tfrag3::MercVertex), // - (void*)offsetof(tfrag3::MercVertex, mats[0]) // offset in array - ); - + setup_merc_vao(); m_stats.num_bones_uploaded += m_next_free_bone_vector; glBindBuffer(GL_UNIFORM_BUFFER, m_bones_buffer); @@ -638,6 +1016,7 @@ void Merc2::flush_draw_buckets(SharedRenderState* render_state, ScopedProfilerNo m_next_free_light = 0; m_next_free_bone_vector = 0; m_next_free_level_bucket = 0; + m_next_mod_vtx_buffer = 0; } void Merc2::do_draws(const Draw* draw_array, @@ -646,12 +1025,27 @@ void Merc2::do_draws(const Draw* draw_array, const Uniforms& uniforms, ScopedProfilerNode& prof, bool set_fade, - SharedRenderState* render_state) { + SharedRenderState*) { + glBindVertexArray(m_vao); int last_tex = -1; int last_light = -1; + bool normal_vtx_buffer_bound = true; for (u32 di = 0; di < num_draws; di++) { auto& draw = draw_array[di]; - glUniform1i(uniforms.ignore_alpha, draw.ignore_alpha); + if (draw.flags & MOD_VTX) { + glBindVertexArray(draw.mod_vtx_buffer.vao); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, lev->merc_indices); + glBindBuffer(GL_ARRAY_BUFFER, lev->merc_vertices); + normal_vtx_buffer_bound = false; + } else { + if (!normal_vtx_buffer_bound) { + glBindVertexArray(m_vao); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, lev->merc_indices); + glBindBuffer(GL_ARRAY_BUFFER, lev->merc_vertices); + normal_vtx_buffer_bound = true; + } + } + glUniform1i(uniforms.ignore_alpha, draw.flags & DrawFlags::IGNORE_ALPHA); if ((int)draw.texture != last_tex) { if (draw.texture < lev->textures.size()) { glBindTexture(GL_TEXTURE_2D, lev->textures.at(draw.texture)); @@ -690,4 +1084,10 @@ void Merc2::do_draws(const Draw* draw_array, glDrawElements(GL_TRIANGLE_STRIP, draw.index_count, GL_UNSIGNED_INT, (void*)(sizeof(u32) * draw.first_index)); } + + if (!normal_vtx_buffer_bound) { + glBindVertexArray(m_vao); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, lev->merc_indices); + glBindBuffer(GL_ARRAY_BUFFER, lev->merc_vertices); + } } \ No newline at end of file diff --git a/game/graphics/opengl_renderer/foreground/Merc2.h b/game/graphics/opengl_renderer/foreground/Merc2.h index 4b79aa9460..d57b8de3b1 100644 --- a/game/graphics/opengl_renderer/foreground/Merc2.h +++ b/game/graphics/opengl_renderer/foreground/Merc2.h @@ -4,11 +4,30 @@ class Merc2 : public BucketRenderer { public: Merc2(const std::string& name, int my_id); + ~Merc2(); void draw_debug_window() override; void init_shaders(ShaderLibrary& shaders) override; void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; private: + bool m_debug_mode = false; + struct DrawDebug { + DrawMode mode; + int num_tris; + }; + struct EffectDebug { + bool envmap = false; + DrawMode envmap_mode; + std::vector draws; + }; + struct ModelDebug { + std::string name; + std::string level; + std::vector effects; + }; + struct { + std::vector model_list; + } m_debug; enum MercDataMemory { LOW_MEMORY = 0, BUFFER_BASE = 442, @@ -38,16 +57,17 @@ class Merc2 : public BucketRenderer { math::Vector4f ambient; }; - void init_pc_model(const DmaTransfer& setup, SharedRenderState* render_state); + void handle_pc_model(const DmaTransfer& setup, + SharedRenderState* render_state, + ScopedProfilerNode& prof); u32 alloc_lights(const VuLights& lights); - u32 alloc_bones(int count); + struct ModBuffers { + GLuint vao, vertex; + }; - std::optional m_current_model = std::nullopt; - u16 m_current_effect_enable_bits = 0; - u16 m_current_ignore_alpha_bits = 0; - static constexpr int kMaxEffect = 16; - u8 m_fade_buffer[4 * kMaxEffect]; + static constexpr int kMaxEffect = 32; + bool m_effect_debug_mask[kMaxEffect]; struct MercMat { math::Vector4f tmat[4]; @@ -60,7 +80,7 @@ class Merc2 : public BucketRenderer { math::Vector4f pad; std::string to_string() const; }; - + u32 alloc_bones(int count, ShaderMercMat* data); static constexpr int MAX_SKEL_BONES = 128; static constexpr int BONE_VECTORS_PER_BONE = 7; static constexpr int MAX_SHADER_BONE_VECTORS = 1024 * 32; // ?? @@ -70,7 +90,6 @@ class Merc2 : public BucketRenderer { static constexpr int MAX_ENVMAP_DRAWS_PER_LEVEL = 1024; math::Vector4f m_shader_bone_vector_buffer[MAX_SHADER_BONE_VECTORS]; - ShaderMercMat m_skel_matrix_buffer[MAX_SKEL_BONES]; struct Uniforms { GLuint light_direction[3]; @@ -97,10 +116,8 @@ class Merc2 : public BucketRenderer { Uniforms m_merc_uniforms, m_emerc_uniforms; void init_shader_common(Shader& shader, Uniforms* uniforms, bool include_lights); - void init_for_frame(SharedRenderState* render_state, ShaderId shader); void handle_setup_dma(DmaFollower& dma, SharedRenderState* render_state); void handle_all_dma(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof); - void flush_pending_model(SharedRenderState* render_state, ScopedProfilerNode& prof); void handle_merc_chain(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof); @@ -110,6 +127,22 @@ class Merc2 : public BucketRenderer { GLuint m_vao; + void setup_merc_vao(); + + std::vector m_mod_vtx_buffers; + u32 m_next_mod_vtx_buffer = 0; + + static constexpr int MAX_MOD_VTX = UINT16_MAX; + std::vector m_mod_vtx_temp; + + struct UnpackTempVtx { + float pos[4]; + float nrm[4]; + }; + std::vector m_mod_vtx_unpack_temp; + + ModBuffers alloc_mod_vtx_buffer(const LevelData* lev); + GLuint m_bones_buffer; struct Stats { @@ -125,8 +158,16 @@ class Merc2 : public BucketRenderer { int num_envmap_effects = 0; int num_envmap_tris = 0; + + int num_upload_bytes = 0; + int num_uploads = 0; } m_stats; + enum DrawFlags { + IGNORE_ALPHA = 1, + MOD_VTX = 2, + }; + struct Draw { u32 first_index; u32 index_count; @@ -135,7 +176,8 @@ class Merc2 : public BucketRenderer { u32 num_triangles; u16 first_bone; u16 light_idx; - u8 ignore_alpha; + u8 flags; + ModBuffers mod_vtx_buffer; u8 fade[4]; }; @@ -152,6 +194,18 @@ class Merc2 : public BucketRenderer { next_free_envmap_draw = 0; } }; + Draw* alloc_normal_draw(const tfrag3::MercDraw& mdraw, + bool ignore_alpha, + LevelDrawBucket* lev_bucket, + u32 first_bone, + u32 lights); + Draw* try_alloc_envmap_draw(const tfrag3::MercDraw& mdraw, + const DrawMode& envmap_mode, + u32 envmap_texture, + LevelDrawBucket* lev_bucket, + const u8* fade, + u32 first_bone, + u32 lights); void do_draws(const Draw* draw_array, const LevelData* lev, @@ -164,7 +218,6 @@ class Merc2 : public BucketRenderer { static constexpr int MAX_LIGHTS = 1024; VuLights m_lights_buffer[MAX_LIGHTS]; u32 m_next_free_light = 0; - VuLights m_current_lights; std::vector m_level_draw_buckets; u32 m_next_free_level_bucket = 0; diff --git a/game/graphics/opengl_renderer/ocean/OceanMidAndFar.cpp b/game/graphics/opengl_renderer/ocean/OceanMidAndFar.cpp index 1c57e239db..73a469d02c 100644 --- a/game/graphics/opengl_renderer/ocean/OceanMidAndFar.cpp +++ b/game/graphics/opengl_renderer/ocean/OceanMidAndFar.cpp @@ -195,10 +195,6 @@ void OceanMidAndFar::handle_ocean_mid(DmaFollower& dma, } } -void handle_ocean_89_jak2(DmaFollower& dma, - SharedRenderState* render_state, - ScopedProfilerNode& prof) {} +void handle_ocean_89_jak2(DmaFollower&, SharedRenderState*, ScopedProfilerNode&) {} -void handle_ocean_79_jak2(DmaFollower& dma, - SharedRenderState* render_state, - ScopedProfilerNode& prof) {} \ No newline at end of file +void handle_ocean_79_jak2(DmaFollower&, SharedRenderState*, ScopedProfilerNode&) {} \ No newline at end of file diff --git a/game/graphics/opengl_renderer/ocean/OceanTexture_PC.cpp b/game/graphics/opengl_renderer/ocean/OceanTexture_PC.cpp index 6d9e248084..7e00b94a3d 100644 --- a/game/graphics/opengl_renderer/ocean/OceanTexture_PC.cpp +++ b/game/graphics/opengl_renderer/ocean/OceanTexture_PC.cpp @@ -518,20 +518,20 @@ void OceanTexture::run_L3_PC_jak2() { Vf res3; // vf23 Vf nrm0; // vf24 - Vf nrm1; // vf25 + // Vf nrm1; // vf25 Vf nrm2; // vf26 - Vf reflect; // vf27 + // Vf reflect; // vf27 Vf cout0; // vf28 Vf cout1; // vf29 Vf cout2; // vf30 Vf cout3; // vf31 - Accumulator acc; + // Accumulator acc; const Vf ones(1, 1, 1, 1); const Vf vf00(0, 0, 0, 1); - const u16 vi11 = 0x80; + // const u16 vi11 = 0x80; bool bc; // clang-format off diff --git a/game/mips2c/jak1_functions/bones.cpp b/game/mips2c/jak1_functions/bones.cpp index 72ade06777..9499d9be43 100644 --- a/game/mips2c/jak1_functions/bones.cpp +++ b/game/mips2c/jak1_functions/bones.cpp @@ -803,7 +803,6 @@ u64 execute(void* ctxt) { const MercBucketInfo* mbi = (const MercBucketInfo*)(g_ee_main_mem + c->sgpr64(a3)); u16 use_pc_merc_bits = 0; u16 ignore_alpha_bits = 0; - u32 fade = 0; for (int i = 0; i < 16; i++) { if (!mbi->effects[i].use_mercneric) { use_pc_merc_bits |= (1 << i); diff --git a/game/mips2c/jak2_functions/merc_blend_shape.cpp b/game/mips2c/jak2_functions/merc_blend_shape.cpp index 69f0310355..b58579e7e5 100644 --- a/game/mips2c/jak2_functions/merc_blend_shape.cpp +++ b/game/mips2c/jak2_functions/merc_blend_shape.cpp @@ -2,6 +2,11 @@ // clang-format off #include "game/mips2c/mips2c_private.h" #include "game/kernel/jak2/kscheme.h" +#include "common/global_profiler/GlobalProfiler.h" +#include + +extern std::mutex g_merc_data_mutex; + using ::jak2::intern_from_c; namespace Mips2C::jak2 { namespace blerc_execute { @@ -14,6 +19,8 @@ struct Cache { } cache; u64 execute(void* ctxt) { + auto pp = scoped_prof("blerc-exec"); + std::unique_lock lk(g_merc_data_mutex); auto* c = (ExecutionContext*)ctxt; bool bc = false; u32 call_addr = 0; @@ -251,6 +258,10 @@ block_24: if (bc) {goto block_23;} // branch non-likely // Unknown instr: pmfhl.uw t5 + c->gprs[t5].du32[0] = c->lo.du32[1]; + c->gprs[t5].du32[1] = c->hi.du32[1]; + c->gprs[t5].du32[2] = c->lo.du32[3]; + c->gprs[t5].du32[3] = c->hi.du32[3]; c->mfc1(r0, f31); // mfc1 r0, f31 c->psraw(t7, t7, 13); // psraw t7, t7, 13 c->mfc1(r0, f31); // mfc1 r0, f31 @@ -345,7 +356,10 @@ block_29: qwc = c->sgpr64(a0); // Unknown instr: sync.l // c->sw(a3, 0, a2); // sw a3, 0(a2) - spad_from_dma_no_sadr_off(cache.fake_scratchpad_data, madr, sadr, qwc); + // fmt::print("blerc download 0x{:x} <- 0x{:x} ({} qwc)\n", madr, sadr, qwc); + { + spad_from_dma_no_sadr_off(cache.fake_scratchpad_data, madr, sadr, qwc); + } // Unknown instr: sync.l c->gprs[a0].du64[0] = 0; // or a0, r0, r0 c->addiu(a0, r0, 1); // addiu a0, r0, 1 @@ -406,7 +420,6 @@ struct Cache { u64 execute(void* ctxt) { auto* c = (ExecutionContext*)ctxt; bool bc = false; - u32 call_addr = 0; c->daddiu(sp, sp, -128); // daddiu sp, sp, -128 c->sd(ra, 0, sp); // sd ra, 0(sp) c->sq(s0, 16, sp); // sq s0, 16(sp) diff --git a/game/mips2c/jak2_functions/ripple.cpp b/game/mips2c/jak2_functions/ripple.cpp index ecb6b05ca9..34364a933e 100644 --- a/game/mips2c/jak2_functions/ripple.cpp +++ b/game/mips2c/jak2_functions/ripple.cpp @@ -25,7 +25,6 @@ struct Cache { u64 execute(void* ctxt) { auto* c = (ExecutionContext*)ctxt; bool bc = false; - u32 call_addr = 0; c->load_symbol2(v1, cache.cos_poly_vec); // lw v1, *cos-poly-vec*(s7) c->lqc2(vf7, 0, v1); // lqc2 vf7, 0(v1) c->lui(v1, 15561); // lui v1, 15561 @@ -450,7 +449,6 @@ struct Cache { u64 execute(void* ctxt) { auto* c = (ExecutionContext*)ctxt; bool bc = false; - u32 call_addr = 0; get_fake_spad_addr2(v1, cache.fake_scratchpad_data, 0, c);// lui v1, 28672 c->daddiu(v1, v1, 1024); // daddiu v1, v1, 1024 c->lwu(a1, 4, a0); // lwu a1, 4(a0) @@ -552,7 +550,6 @@ namespace ripple_matrix_scale { u64 execute(void* ctxt) { auto* c = (ExecutionContext*)ctxt; bool bc = false; - u32 call_addr = 0; c->lhu(v1, 2, a0); // lhu v1, 2(a0) c->lw(a1, 4, a0); // lw a1, 4(a0) c->lw(a2, 28, a0); // lw a2, 28(a0) diff --git a/game/mips2c/mips2c_private.h b/game/mips2c/mips2c_private.h index 4412cebb86..96a9698ba5 100644 --- a/game/mips2c/mips2c_private.h +++ b/game/mips2c/mips2c_private.h @@ -1652,7 +1652,7 @@ inline void load_vfs_from_tf_regs(const void* tf_regs_sym, ExecutionContext* c) inline void spad_to_dma_blerc_chain(void* spad_sym_addr, u32 sadr, u32 tadr) { u32 spad_addr_goal; - memcpy(&spad_addr_goal, spad_sym_addr, 4); + memcpy(&spad_addr_goal, align4_ptr(spad_sym_addr), 4); void* spad_addr_c = g_ee_main_mem + spad_addr_goal; ASSERT(sadr < 0x4000); emulate_dma(g_ee_main_mem, spad_addr_c, tadr, sadr); diff --git a/goal_src/jak1/engine/gfx/foreground/bones.gc b/goal_src/jak1/engine/gfx/foreground/bones.gc index c64d7d2e1b..1a93a8fd2d 100644 --- a/goal_src/jak1/engine/gfx/foreground/bones.gc +++ b/goal_src/jak1/engine/gfx/foreground/bones.gc @@ -944,7 +944,7 @@ ;; flags (num-effects, effect-alpha-ignore, effect-disable) ;; fades (u32 x N), padding to qw aligned -(defun pc-merc-draw-request ((dc draw-control) (dma-buf pointer) (matrix-buf pointer)) +(defun pc-merc-draw-request ((dc draw-control) (dma-buf pointer) (matrix-buf pointer) (update-verts symbol)) (let ((start-packet (the-as dma-packet dma-buf)) (qwc-total 0)) ;; merc draw asm will check this. @@ -1028,6 +1028,7 @@ (set! (-> flags 0) (-> merc-ctrl header effect-count)) (set! (-> flags 1) ignore-alpha-mask) (set! (-> flags 2) enable-mask) + (set! (-> flags 3) (if update-verts 1 0)) ) (&+! dma-buf (* 16 1)) (+! qwc-total 1) @@ -1038,11 +1039,22 @@ (set! (-> fades i) (the-as uint (-> *merc-bucket-info* effect i color-fade))) ) ) - (let ((num-fades (/ (+ (-> merc-ctrl header effect-count) 3) 4))) (&+! dma-buf (* 16 num-fades)) (+! qwc-total num-fades) ) + + ;; merc ptrs + (let ((merc-ptrs (the (pointer object) dma-buf))) + (dotimes (i (-> merc-ctrl header effect-count)) + (set! (-> merc-ptrs i) (-> merc-ctrl effect i)) + ) + ) + (let ((num-fades (/ (+ (-> merc-ctrl header effect-count) 3) 4))) + (&+! dma-buf (* 16 num-fades)) + (+! qwc-total num-fades) + ) + ) ) ) @@ -1065,6 +1077,9 @@ ;; this is much faster, and does significantly speed up the game thread on finalboss. (define *emerc-hack* #t) +;; when set, use merc for blerc instead of generic. +(define *blerc-hack* #t) + (defun draw-bones ((arg0 draw-control) (dma-buf dma-buffer) (arg2 float)) "Main draw function for all bone-related renderers. Will set up merc, generic and shadow. and also add the bones to the calculation list." @@ -1277,6 +1292,8 @@ (let ((geom (-> arg0 lod-set lod (-> arg0 cur-lod) geo)) ;; merc2 can't handle all cases of the original merc, so we add this fallback on PC. (pc-force-mercneric #f) + ;; if pc rendering code needs to update merc vertices + (pc-merc-vtx-update #f) ) (when (logtest? (-> arg0 global-effect) (draw-effect title)) (set! pc-force-mercneric #t) @@ -1352,7 +1369,10 @@ (jc (-> pd skel))) (when (nonzero? jc) (when (logtest? (-> jc status) (janim-status blerc)) - (set! pc-force-mercneric #t) + (if *blerc-hack* + (set! pc-merc-vtx-update #t) + (set! pc-force-mercneric #t) + ) ) ) ) @@ -1518,7 +1538,7 @@ ) ) ) - (set! s2-0 (pc-merc-draw-request arg0 (the pointer s2-0) (the pointer matrix-data))) + (set! s2-0 (pc-merc-draw-request arg0 (the pointer s2-0) (the pointer matrix-data) pc-merc-vtx-update)) ; (if (nonzero? (-> *merc-bucket-info* need-mercprime-if-merc)) ; (set! (-> dma-buf base) (draw-bones-merc arg0 matrix-data s2-0 32 17)) ; (set! (-> dma-buf base) (draw-bones-merc arg0 matrix-data s2-0 35 20)) diff --git a/goal_src/jak2/engine/game/main.gc b/goal_src/jak2/engine/game/main.gc index 98c92761f6..f743e35c5e 100644 --- a/goal_src/jak2/engine/game/main.gc +++ b/goal_src/jak2/engine/game/main.gc @@ -1425,13 +1425,13 @@ ; ;; Run blerc to modify foreground models (with-profiler 'merc *profile-merc-color* - ; (blerc-execute) - ; (blerc-init) + (blerc-execute) + (blerc-init) ) ; ;; Run other merc effects that modify vertices ; (texscroll-execute) - ; (ripple-execute) + (ripple-execute) (region-execute) ;; final call to update joints before drawing. @@ -1641,7 +1641,7 @@ (free-nodes *touching-list*) (prepare *collide-rider-pool*) (update-actor-hash) - ; (blerc-init) + (blerc-init) ; (dma-send ; (the-as dma-bank #x10008000) ; (the-as uint (-> *collide-vif0-init* data)) diff --git a/goal_src/jak2/engine/gfx/foreground/foreground.gc b/goal_src/jak2/engine/gfx/foreground/foreground.gc index 5ebaeafe65..09bb4ab806 100644 --- a/goal_src/jak2/engine/gfx/foreground/foreground.gc +++ b/goal_src/jak2/engine/gfx/foreground/foreground.gc @@ -630,7 +630,7 @@ ) ) -(defun pc-merc-draw-request ((dc draw-control) (dma-buf pointer) (matrix-buf pointer) (tex-idx int)) +(defun pc-merc-draw-request ((dc draw-control) (dma-buf pointer) (matrix-buf pointer) (tex-idx int) (update-verts symbol)) "Send a request to PC Merc2 to draw the given object. Only draws the effects which match this texture index. Just places a single big dma packet, you have to patch the end yourself." @@ -724,6 +724,7 @@ (set! (-> flags 0) (-> merc-ctrl header effect-count)) (set! (-> flags 1) ignore-alpha-mask) (set! (-> flags 2) enable-mask) + (set! (-> flags 3) (if update-verts 1 0)) ) (&+! dma-buf (* 16 1)) (+! qwc-total 1) @@ -735,6 +736,17 @@ ) ) + (let ((num-fades (/ (+ (-> merc-ctrl header effect-count) 3) 4))) + (&+! dma-buf (* 16 num-fades)) + (+! qwc-total num-fades) + ) + + ;; merc ptrs + (let ((merc-ptrs (the (pointer object) dma-buf))) + (dotimes (i (-> merc-ctrl header effect-count)) + (set! (-> merc-ptrs i) (-> merc-ctrl effect i)) + ) + ) (let ((num-fades (/ (+ (-> merc-ctrl header effect-count) 3) 4))) (&+! dma-buf (* 16 num-fades)) (+! qwc-total num-fades) @@ -753,6 +765,7 @@ (let ((use-flags (new 'stack-no-clear 'array 'uint8 7)) (mctrl (-> dc mgeo)) (buckets (-> (scratchpad-object foreground-work) grid level-buckets (-> (scratchpad-object foreground-work) draw-index-map (-> dc level-index)))) + (has-ripple #f) ) ;; mark all as unused, until we see a use (dotimes (i 7) (set! (-> use-flags i) 0)) @@ -761,6 +774,9 @@ (dotimes (i (-> mctrl header effect-count)) ;;(format 0 "effect ~d, texture ~d~%" i (-> mctrl effect i texture-index)) (set! (-> use-flags (-> mctrl effect i texture-index)) 1) + (when (logtest? (-> (-> dc lod-set lod (-> dc cur-lod) geo) effect i effect-bits) (effect-bits ripple)) + (set! has-ripple #t) + ) ) ;; loop over texture groupe @@ -768,8 +784,21 @@ (when (nonzero? (-> use-flags i)) ;; this one is used, update the model for pc. ;; create dma-packet to send the name: - (let ((packet (the-as dma-packet dma-buf))) - (set! dma-buf (pc-merc-draw-request dc dma-buf matrix-buf i)) + (let ((packet (the-as dma-packet dma-buf)) + (vertex-update #f) + ) + (when has-ripple + (set! vertex-update #t) + ) + (let* ((pd (the process-drawable (-> dc process))) + (jc (-> pd skel))) + (when (nonzero? jc) + (when (logtest? (-> jc status) (joint-control-status blend-shape-valid)) + (set! vertex-update #t) + ) + ) + ) + (set! dma-buf (pc-merc-draw-request dc dma-buf matrix-buf i vertex-update)) ;; create a patch packet (let ((patch-packet (the-as dma-packet dma-buf))) @@ -995,7 +1024,7 @@ ) ) ) - (if (and (logtest? (-> geo effect effect-idx effect-bits) (effect-bits ripple)) (-> dc ripple)) + (when (and (logtest? (-> geo effect effect-idx effect-bits) (effect-bits ripple)) (-> dc ripple)) (set! dma-ptr (foreground-ripple dc geo dma-ptr effect-idx)) ) (nonzero? (-> dc death-timer))