diff --git a/common/custom_data/TFrag3Data.cpp b/common/custom_data/TFrag3Data.cpp index 4ae886e973..36c65f0e46 100644 --- a/common/custom_data/TFrag3Data.cpp +++ b/common/custom_data/TFrag3Data.cpp @@ -256,19 +256,46 @@ void MercDraw::serialize(Serializer& ser) { ser.from_ptr(&num_triangles); } -void MercEffect::serialize(Serializer& ser) { +void MercModifiableDrawGroup::serialize(Serializer& ser) { if (ser.is_saving()) { - ser.save(draws.size()); + ser.save(mod_draw.size()); } else { - draws.resize(ser.load()); + mod_draw.resize(ser.load()); } - for (auto& draw : draws) { + for (auto& draw : mod_draw) { draw.serialize(ser); } + if (ser.is_saving()) { + ser.save(fix_draw.size()); + } else { + fix_draw.resize(ser.load()); + } + for (auto& draw : fix_draw) { + draw.serialize(ser); + } + ser.from_pod_vector(&vertices); + ser.from_pod_vector(&vertex_lump4_addr); + ser.from_pod_vector(&fragment_mask); + ser.from_ptr(&expect_vidx_end); +} + +void MercEffect::serialize(Serializer& ser) { + if (ser.is_saving()) { + ser.save(all_draws.size()); + } else { + all_draws.resize(ser.load()); + } + for (auto& draw : all_draws) { + draw.serialize(ser); + } + + mod.serialize(ser); + ser.from_ptr(&envmap_mode); ser.from_ptr(&envmap_texture); ser.from_ptr(&has_envmap); + ser.from_ptr(&has_mod_draw); } void MercModel::serialize(Serializer& ser) { @@ -283,6 +310,8 @@ void MercModel::serialize(Serializer& ser) { } ser.from_ptr(&max_draws); ser.from_ptr(&max_bones); + ser.from_ptr(&st_vif_add); + ser.from_ptr(&xyz_scale); } void MercModelGroup::serialize(Serializer& ser) { @@ -360,109 +389,163 @@ void Level::serialize(Serializer& ser) { } } -std::array Level::get_memory_usage() const { - std::array result; - result.fill(0); +void MercModifiableDrawGroup::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::MERC_MOD_VERT, sizeof(MercVertex) * vertices.size()); + tracker->add(MemoryUsageCategory::MERC_MOD_DRAW_1, sizeof(MercDraw) * fix_draw.size()); + tracker->add(MemoryUsageCategory::MERC_MOD_DRAW_2, sizeof(MercDraw) * mod_draw.size()); + tracker->add(MemoryUsageCategory::MERC_MOD_TABLE, sizeof(u16) * vertex_lump4_addr.size()); +} - // textures - for (const auto& tex : textures) { - result[TEXTURE] += tex.data.size() * sizeof(u32); +void MercEffect::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::MERC_DRAW, sizeof(MercDraw) * all_draws.size()); + mod.memory_usage(tracker); +} + +void MercModel::memory_usage(MemoryUsageTracker* tracker) const { + for (auto& effect : effects) { + effect.memory_usage(tracker); } +} - // tfrag - for (const auto& tfrag_tree_geoms : tfrag_trees) { - for (const auto& tfrag_tree : tfrag_tree_geoms) { - for (const auto& draw : tfrag_tree.draws) { - result[TFRAG_INDEX] += draw.runs.size() * sizeof(StripDraw::VertexRun); - result[TFRAG_INDEX] += draw.plain_indices.size() * sizeof(u32); - result[TFRAG_VIS] += draw.vis_groups.size() * sizeof(StripDraw::VisGroup); - } - result[TFRAG_VERTS] += - tfrag_tree.packed_vertices.vertices.size() * sizeof(PackedTfragVertices::Vertex); - result[TFRAG_CLUSTER] += - tfrag_tree.packed_vertices.cluster_origins.size() * sizeof(math::Vector); - result[TFRAG_TIME_OF_DAY] += tfrag_tree.colors.size() * sizeof(TimeOfDayColor); - result[TFRAG_BVH] += tfrag_tree.bvh.vis_nodes.size() * sizeof(VisNode); +void MercModelGroup::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::MERC_VERT, sizeof(MercVertex) * vertices.size()); + tracker->add(MemoryUsageCategory::MERC_INDEX, sizeof(u32) * indices.size()); + for (auto& model : models) { + model.memory_usage(tracker); + } +} + +void CollisionMesh::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::COLLISION, sizeof(Vertex) * vertices.size()); +} + +void PackedShrubVertices::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::SHRUB_VERT, 64 * matrices.size()); + tracker->add(MemoryUsageCategory::SHRUB_VERT, sizeof(InstanceGroup) * instance_groups.size()); + tracker->add(MemoryUsageCategory::SHRUB_VERT, sizeof(Vertex) * vertices.size()); +} + +void ShrubTree::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::SHRUB_TIME_OF_DAY, + sizeof(TimeOfDayColor) * time_of_day_colors.size()); + packed_vertices.memory_usage(tracker); + tracker->add(MemoryUsageCategory::SHRUB_DRAW, sizeof(ShrubDraw) * static_draws.size()); + tracker->add(MemoryUsageCategory::SHRUB_IND, sizeof(u32) * indices.size()); +} + +void InstancedStripDraw::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::TIE_INST_INDEX, sizeof(u32) * vertex_index_stream.size()); + tracker->add(MemoryUsageCategory::TIE_INST_VIS, sizeof(InstanceGroup) * instance_groups.size()); +} + +void PackedTieVertices::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::TIE_CIDX, sizeof(u16) * color_indices.size()); + tracker->add(MemoryUsageCategory::TIE_MATRICES, 64 * matrices.size()); + tracker->add(MemoryUsageCategory::TIE_GRPS, sizeof(MatrixGroup) * matrix_groups.size()); + tracker->add(MemoryUsageCategory::TIE_VERTS, sizeof(Vertex) * vertices.size()); +} + +void TieTree::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::TIE_BVH, sizeof(VisNode) * bvh.vis_nodes.size()); + for (auto& draw : static_draws) { + tracker->add(MemoryUsageCategory::TIE_DEINST_INDEX, + draw.runs.size() * sizeof(StripDraw::VertexRun)); + tracker->add(MemoryUsageCategory::TIE_DEINST_INDEX, draw.plain_indices.size() * sizeof(u32)); + tracker->add(MemoryUsageCategory::TIE_DEINST_VIS, + draw.vis_groups.size() * sizeof(StripDraw::VisGroup)); + } + packed_vertices.memory_usage(tracker); + tracker->add(MemoryUsageCategory::TIE_TIME_OF_DAY, sizeof(TimeOfDayColor) * colors.size()); + + for (auto& draw : instanced_wind_draws) { + draw.memory_usage(tracker); + } + tracker->add(MemoryUsageCategory::TIE_WIND_INSTANCE_INFO, + sizeof(TieWindInstance) * wind_instance_info.size()); +} + +void PackedTfragVertices::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::TFRAG_VERTS, + sizeof(PackedTfragVertices::Vertex) * vertices.size()); + tracker->add(MemoryUsageCategory::TFRAG_CLUSTER, + sizeof(math::Vector) * cluster_origins.size()); +} + +void TfragTree::memory_usage(MemoryUsageTracker* tracker) const { + for (auto& draw : draws) { + tracker->add(MemoryUsageCategory::TFRAG_INDEX, draw.runs.size() * sizeof(StripDraw::VertexRun)); + tracker->add(MemoryUsageCategory::TFRAG_INDEX, draw.plain_indices.size() * sizeof(u32)); + tracker->add(MemoryUsageCategory::TFRAG_VIS, + draw.vis_groups.size() * sizeof(StripDraw::VisGroup)); + } + packed_vertices.memory_usage(tracker); + tracker->add(MemoryUsageCategory::TFRAG_TIME_OF_DAY, sizeof(TimeOfDayColor) * colors.size()); + tracker->add(MemoryUsageCategory::TFRAG_BVH, sizeof(VisNode) * bvh.vis_nodes.size()); +} + +void Texture::memory_usage(MemoryUsageTracker* tracker) const { + tracker->add(MemoryUsageCategory::TEXTURE, data.size() * sizeof(u32)); +} + +void Level::memory_usage(MemoryUsageTracker* tracker) const { + for (const auto& texture : textures) { + texture.memory_usage(tracker); + } + for (const auto& tftk : tfrag_trees) { + for (const auto& tree : tftk) { + tree.memory_usage(tracker); } } - - // tie - for (const auto& tie_tree_geoms : tie_trees) { - for (const auto& tie_tree : tie_tree_geoms) { - result[TIE_BVH] += tie_tree.bvh.vis_nodes.size(); - for (const auto& draw : tie_tree.static_draws) { - result[TIE_DEINST_INDEX] += draw.runs.size() * sizeof(StripDraw::VertexRun); - result[TIE_DEINST_VIS] += draw.vis_groups.size() * sizeof(StripDraw::VisGroup); - } - result[TIE_VERTS] += - tie_tree.packed_vertices.vertices.size() * sizeof(PackedTieVertices::Vertex); - result[TIE_CIDX] += tie_tree.packed_vertices.color_indices.size() * sizeof(u16); - result[TIE_MATRICES] += tie_tree.packed_vertices.matrices.size() * 4 * 4 * 4; - result[TIE_GRPS] += - tie_tree.packed_vertices.matrix_groups.size() * sizeof(PackedTieVertices::MatrixGroup); - result[TIE_TIME_OF_DAY] += tie_tree.colors.size() * sizeof(TimeOfDayColor); - - for (const auto& draw : tie_tree.instanced_wind_draws) { - result[TIE_INST_INDEX] += draw.vertex_index_stream.size() * sizeof(u32); - result[TIE_INST_VIS] += - draw.instance_groups.size() * sizeof(InstancedStripDraw::InstanceGroup); - } - result[TIE_WIND_INSTANCE_INFO] += - tie_tree.wind_instance_info.size() * sizeof(TieWindInstance); + for (const auto& ttk : tie_trees) { + for (const auto& tree : ttk) { + tree.memory_usage(tracker); } } - - // shrub - for (const auto& shrub_tree : shrub_trees) { - result[SHRUB_TIME_OF_DAY] += shrub_tree.time_of_day_colors.size() * sizeof(TimeOfDayColor); - result[SHRUB_VERT] += shrub_tree.packed_vertices.matrices.size() * 4 * 4 * 4; - result[SHRUB_VERT] += - shrub_tree.packed_vertices.vertices.size() * sizeof(PackedShrubVertices::Vertex); - result[SHRUB_VERT] += shrub_tree.packed_vertices.instance_groups.size() * - sizeof(PackedShrubVertices::InstanceGroup); - result[SHRUB_IND] += sizeof(u32) * shrub_tree.indices.size(); + for (const auto& tree : shrub_trees) { + tree.memory_usage(tracker); } - - // merc - result[MERC_INDEX] += merc_data.indices.size() * sizeof(u32); - result[MERC_VERT] += merc_data.vertices.size() * sizeof(MercVertex); - - // collision - result[COLLISION] += sizeof(CollisionMesh::Vertex) * collision.vertices.size(); - - return result; + collision.memory_usage(tracker); + merc_data.memory_usage(tracker); } void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size) { int total_accounted = 0; - auto memory_use_by_category = lev.get_memory_usage(); + MemoryUsageTracker mem_use; + lev.memory_usage(&mem_use); std::vector> known_categories = { - {"texture", memory_use_by_category[tfrag3::MemoryUsageCategory::TEXTURE]}, - {"tie-deinst-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_DEINST_VIS]}, - {"tie-deinst-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_DEINST_INDEX]}, - {"tie-inst-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_INST_VIS]}, - {"tie-inst-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_INST_INDEX]}, - {"tie-bvh", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_BVH]}, - {"tie-verts", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_VERTS]}, - {"tie-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_TIME_OF_DAY]}, - {"tie-wind-inst-info", - memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_WIND_INSTANCE_INFO]}, - {"tie-cidx", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_CIDX]}, - {"tie-mats", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_MATRICES]}, - {"tie-grps", memory_use_by_category[tfrag3::MemoryUsageCategory::TIE_GRPS]}, - {"tfrag-vis", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_VIS]}, - {"tfrag-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_INDEX]}, - {"tfrag-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_VERTS]}, - {"tfrag-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_TIME_OF_DAY]}, - {"tfrag-cluster", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_CLUSTER]}, - {"tfrag-bvh", memory_use_by_category[tfrag3::MemoryUsageCategory::TFRAG_BVH]}, - {"shrub-colors", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_TIME_OF_DAY]}, - {"shrub-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_VERT]}, - {"shrub-ind", memory_use_by_category[tfrag3::MemoryUsageCategory::SHRUB_IND]}, - {"collision", memory_use_by_category[tfrag3::MemoryUsageCategory::COLLISION]}, - {"merc-vert", memory_use_by_category[tfrag3::MemoryUsageCategory::MERC_VERT]}, - {"merc-idx", memory_use_by_category[tfrag3::MemoryUsageCategory::MERC_INDEX]}}; + {"texture", mem_use.data[tfrag3::MemoryUsageCategory::TEXTURE]}, + {"tie-deinst-vis", mem_use.data[tfrag3::MemoryUsageCategory::TIE_DEINST_VIS]}, + {"tie-deinst-idx", mem_use.data[tfrag3::MemoryUsageCategory::TIE_DEINST_INDEX]}, + {"tie-inst-vis", mem_use.data[tfrag3::MemoryUsageCategory::TIE_INST_VIS]}, + {"tie-inst-idx", mem_use.data[tfrag3::MemoryUsageCategory::TIE_INST_INDEX]}, + {"tie-bvh", mem_use.data[tfrag3::MemoryUsageCategory::TIE_BVH]}, + {"tie-verts", mem_use.data[tfrag3::MemoryUsageCategory::TIE_VERTS]}, + {"tie-colors", mem_use.data[tfrag3::MemoryUsageCategory::TIE_TIME_OF_DAY]}, + {"tie-wind-inst-info", mem_use.data[tfrag3::MemoryUsageCategory::TIE_WIND_INSTANCE_INFO]}, + {"tie-cidx", mem_use.data[tfrag3::MemoryUsageCategory::TIE_CIDX]}, + {"tie-mats", mem_use.data[tfrag3::MemoryUsageCategory::TIE_MATRICES]}, + {"tie-grps", mem_use.data[tfrag3::MemoryUsageCategory::TIE_GRPS]}, + {"tfrag-vis", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_VIS]}, + {"tfrag-idx", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_INDEX]}, + {"tfrag-vert", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_VERTS]}, + {"tfrag-colors", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_TIME_OF_DAY]}, + {"tfrag-cluster", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_CLUSTER]}, + {"tfrag-bvh", mem_use.data[tfrag3::MemoryUsageCategory::TFRAG_BVH]}, + {"shrub-colors", mem_use.data[tfrag3::MemoryUsageCategory::SHRUB_TIME_OF_DAY]}, + {"shrub-vert", mem_use.data[tfrag3::MemoryUsageCategory::SHRUB_VERT]}, + {"shrub-ind", mem_use.data[tfrag3::MemoryUsageCategory::SHRUB_IND]}, + {"shrub-draw", mem_use.data[tfrag3::MemoryUsageCategory::SHRUB_DRAW]}, + {"collision", mem_use.data[tfrag3::MemoryUsageCategory::COLLISION]}, + {"merc-vert", mem_use.data[tfrag3::MemoryUsageCategory::MERC_VERT]}, + {"merc-idx", mem_use.data[tfrag3::MemoryUsageCategory::MERC_INDEX]}, + {"merc-draw", mem_use.data[tfrag3::MemoryUsageCategory::MERC_DRAW]}, + {"merc-mod-vert", mem_use.data[tfrag3::MemoryUsageCategory::MERC_MOD_VERT]}, + {"merc-mod-ind", mem_use.data[tfrag3::MemoryUsageCategory::MERC_MOD_IND]}, + {"merc-mod-table", mem_use.data[tfrag3::MemoryUsageCategory::MERC_MOD_TABLE]}, + {"merc-mod-draw-1", mem_use.data[tfrag3::MemoryUsageCategory::MERC_MOD_DRAW_1]}, + {"merc-mod-draw-2", mem_use.data[tfrag3::MemoryUsageCategory::MERC_MOD_DRAW_2]}, + }; for (auto& known : known_categories) { total_accounted += known.second; } @@ -473,8 +556,10 @@ void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size) { [](const auto& a, const auto& b) { return a.second > b.second; }); for (const auto& x : known_categories) { - fmt::print("{:30s} : {:6d} kB {:3.1f}%\n", x.first, x.second / 1024, - 100.f * (float)x.second / uncompressed_data_size); + if (x.second) { + fmt::print("{:30s} : {:6d} kB {:3.1f}%\n", x.first, x.second / 1024, + 100.f * (float)x.second / uncompressed_data_size); + } } } diff --git a/common/custom_data/Tfrag3Data.h b/common/custom_data/Tfrag3Data.h index e4dec9ef00..83133d0596 100644 --- a/common/custom_data/Tfrag3Data.h +++ b/common/custom_data/Tfrag3Data.h @@ -44,16 +44,36 @@ enum MemoryUsageCategory { SHRUB_TIME_OF_DAY, SHRUB_VERT, SHRUB_IND, + SHRUB_DRAW, MERC_VERT, MERC_INDEX, + MERC_DRAW, + + MERC_MOD_DRAW_1, + MERC_MOD_DRAW_2, + MERC_MOD_VERT, + MERC_MOD_IND, + MERC_MOD_TABLE, COLLISION, NUM_CATEGORIES }; -constexpr int TFRAG3_VERSION = 22; +struct MemoryUsageTracker { + u32 data[MemoryUsageCategory::NUM_CATEGORIES]; + + MemoryUsageTracker() { + for (auto& x : data) { + x = 0; + } + } + + void add(MemoryUsageCategory category, u32 size_bytes) { data[category] += size_bytes; } +}; + +constexpr int TFRAG3_VERSION = 24; // These vertices should be uploaded to the GPU at load time and don't change struct PreloadedVertex { @@ -93,6 +113,7 @@ struct PackedTieVertices { std::vector matrix_groups; // todo pack std::vector vertices; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; struct PackedTfragVertices { @@ -102,7 +123,7 @@ struct PackedTfragVertices { s16 s, t; u16 color_index; }; - + void memory_usage(MemoryUsageTracker* tracker) const; std::vector vertices; std::vector> cluster_origins; }; @@ -135,7 +156,7 @@ struct PackedShrubVertices { std::vector instance_groups; // todo pack std::vector vertices; u32 total_vertex_count; - + void memory_usage(MemoryUsageTracker* tracker) const; void serialize(Serializer& ser); }; @@ -207,6 +228,7 @@ struct InstancedStripDraw { // for debug counting. u32 num_triangles = 0; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; // node in the BVH. @@ -256,6 +278,7 @@ struct Texture { std::string debug_tpage_name; bool load_to_pool = false; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; // Tfrag trees have several kinds: @@ -279,6 +302,7 @@ struct TfragTree { } unpacked; void unpack(); void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; struct TieWindInstance { @@ -305,6 +329,7 @@ struct TieTree { } unpacked; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; void unpack(); }; @@ -321,6 +346,7 @@ struct ShrubTree { } unpacked; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; void unpack(); }; @@ -336,6 +362,7 @@ struct CollisionMesh { static_assert(sizeof(Vertex) == 32); std::vector vertices; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; // MERC @@ -368,12 +395,25 @@ struct MercDraw { void serialize(Serializer& ser); }; +struct MercModifiableDrawGroup { + std::vector vertices; + std::vector vertex_lump4_addr; + std::vector fix_draw, mod_draw; + std::vector fragment_mask; + u32 expect_vidx_end = 0; + void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; +}; + struct MercEffect { - std::vector draws; + std::vector all_draws; + MercModifiableDrawGroup mod; DrawMode envmap_mode; u32 envmap_texture; bool has_envmap = false; + bool has_mod_draw = false; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; struct MercModel { @@ -381,7 +421,10 @@ struct MercModel { std::vector effects; u32 max_draws; u32 max_bones; + u32 st_vif_add; + float xyz_scale; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; struct MercModelGroup { @@ -389,6 +432,7 @@ struct MercModelGroup { std::vector indices; std::vector models; void serialize(Serializer& ser); + void memory_usage(MemoryUsageTracker* tracker) const; }; // @@ -407,8 +451,7 @@ struct Level { MercModelGroup merc_data; u16 version2 = TFRAG3_VERSION; void serialize(Serializer& ser); - - std::array get_memory_usage() const; + void memory_usage(MemoryUsageTracker* tracker) const; }; void print_memory_usage(const tfrag3::Level& lev, int uncompressed_data_size); diff --git a/common/global_profiler/GlobalProfiler.h b/common/global_profiler/GlobalProfiler.h index a156517a04..e8fcb3407c 100644 --- a/common/global_profiler/GlobalProfiler.h +++ b/common/global_profiler/GlobalProfiler.h @@ -9,7 +9,7 @@ struct ProfNode { u64 ts; u64 tid; - char name[32]; + char name[128]; enum Kind : u8 { BEGIN, END, INSTANT, UNUSED } kind = UNUSED; }; diff --git a/common/type_system/TypeSystem.cpp b/common/type_system/TypeSystem.cpp index c44f8d0049..1c023ea876 100644 --- a/common/type_system/TypeSystem.cpp +++ b/common/type_system/TypeSystem.cpp @@ -516,7 +516,7 @@ int TypeSystem::get_load_size_allow_partial_def(const TypeSpec& ts) const { } MethodInfo TypeSystem::override_method(Type* type, - const std::string& type_name, + const std::string& /*type_name*/, const int method_id, const std::optional& docstring) { // Lookup the method from the parent type diff --git a/decompiler/level_extractor/MercData.cpp b/decompiler/level_extractor/MercData.cpp index 5d932ebdaf..267ae1d050 100644 --- a/decompiler/level_extractor/MercData.cpp +++ b/decompiler/level_extractor/MercData.cpp @@ -356,6 +356,10 @@ void MercEffect::from_ref(TypedRef tr, envmap_or_effect_usage = read_plain_data_field(tr, "effect-usage", dts); } + if (type->lookup_field("texture-index", &temp)) { + texture_index = read_plain_data_field(tr, "texture-index", dts); + } + // do frag-ctrls TypedRef fc(deref_label(get_field_ref(tr, "frag-ctrl", dts)), dts.ts.lookup_type("merc-fragment-control")); @@ -369,6 +373,15 @@ void MercEffect::from_ref(TypedRef tr, f = frag_geo.emplace_back().from_ref(f, dts, frag_ctrl.at(i), main_control); } + // do blend ctrls + if (blend_frag_count) { + TypedRef bc(deref_label(get_field_ref(tr, "blend-ctrl", dts)), + dts.ts.lookup_type("merc-blend-ctrl")); + for (u32 i = 0; i < blend_frag_count; i++) { + bc = blend_ctrl.emplace_back().from_ref(bc, dts, main_control.blend_target_count); + } + } + // do extra info auto fr = get_field_ref(tr, "extra-info", dts); const auto& word = fr.data->words_by_seg.at(fr.seg).at(fr.byte_offset / 4); @@ -414,6 +427,55 @@ void MercCtrl::from_ref(TypedRef tr, const DecompilerTypeSystem& dts) { effects.emplace_back().from_ref(eff_ref, dts, header); eff_ref.ref.byte_offset += 32; // } + // debug_print_blerc(); +} + +void MercCtrl::debug_print_blerc() { + int total_verts = 0; + int blerc_verts = 0; + int total_frags = 0; + int blerc_frags = 0; + int total_effects = effects.size(); + int blerc_effects = 0; + + for (auto& effect : effects) { + bool effect_has_blerc = false; + for (size_t frag_idx = 0; frag_idx < effect.frag_count; frag_idx++) { + total_frags++; + auto& fc = effect.frag_ctrl.at(frag_idx); + total_verts += fc.lump_four_count; + + if (frag_idx < effect.blend_ctrl.size()) { + auto& bfc = effect.blend_ctrl.at(frag_idx); + if (bfc.blend_vtx_count) { + effect_has_blerc = true; + blerc_frags++; + blerc_verts += fc.lump_four_count; + } + } + } + + if (effect_has_blerc) { + blerc_effects++; + } + } + if (blerc_effects) { + fmt::print("BLERC: {}, {}/{} e, {}/{} f, {}/{} v\n", name, blerc_effects, total_effects, + blerc_frags, total_frags, blerc_verts, total_verts); + } +} + +TypedRef MercBlendCtrl::from_ref(TypedRef tr, + const DecompilerTypeSystem& dts, + int blend_target_count) { + blend_vtx_count = read_plain_data_field(tr, "blend-vtx-count", dts); + nonzero_index_count = read_plain_data_field(tr, "nonzero-index-count", dts); + tr.ref.byte_offset += 2; + for (int i = 0; i < blend_target_count; i++) { + bt_index.push_back(deref_u8(tr.ref, 0)); + tr.ref.byte_offset += 1; + } + return tr; } std::string MercCtrl::print() { diff --git a/decompiler/level_extractor/MercData.h b/decompiler/level_extractor/MercData.h index 146fa16809..9536ea893c 100644 --- a/decompiler/level_extractor/MercData.h +++ b/decompiler/level_extractor/MercData.h @@ -162,16 +162,26 @@ struct MercFragment { std::string print() const; }; +struct MercBlendCtrl { + u8 blend_vtx_count; + u8 nonzero_index_count; + std::vector bt_index; + TypedRef from_ref(TypedRef tr, const DecompilerTypeSystem& dts, int blend_target_count); +}; + struct MercExtraInfo { std::optional shader; }; +constexpr int kRippleEffectBit = 4; // true in jak 1 and jak 2 + struct MercEffect { //((frag-geo merc-fragment :offset-assert 0) ;; ? std::vector frag_geo; // (frag-ctrl merc-fragment-control :offset-assert 4) std::vector frag_ctrl; // (blend-data merc-blend-data :offset-assert 8) ?? + std::vector blend_ctrl; // (blend-ctrl merc-blend-ctrl :offset-assert 12) ?? // (dummy0 uint8 :offset-assert 16) ?? u8 effect_bits; @@ -184,6 +194,8 @@ struct MercEffect { // (extra-info merc-extra-info :offset-assert 28) ?? MercExtraInfo extra_info; + u8 texture_index = -1; // jak 2 only + void from_ref(TypedRef tr, const DecompilerTypeSystem& dts, const MercCtrlHeader& main_control); std::string print(); }; @@ -195,6 +207,7 @@ struct MercCtrl { std::vector effects; void from_ref(TypedRef tr, const DecompilerTypeSystem& dts); + void debug_print_blerc(); std::string print(); }; } // namespace decompiler \ No newline at end of file diff --git a/decompiler/level_extractor/extract_merc.cpp b/decompiler/level_extractor/extract_merc.cpp index de998e195a..2869bc8e0e 100644 --- a/decompiler/level_extractor/extract_merc.cpp +++ b/decompiler/level_extractor/extract_merc.cpp @@ -94,6 +94,12 @@ struct MercUnpackedVtx { u16 dst0; u16 dst1; + + bool can_be_modified = false; + int idx_in_combined_lump4 = -1; // divided by 3 + + int flump4 = -1; + int frag = -1; }; /*! @@ -107,6 +113,7 @@ struct ConvertedMercEffect { // draws from all fragments. std::vector draws; std::vector vertices; + std::vector verts_per_frag; bool has_envmap = false; DrawMode envmap_mode; u32 envmap_texture; @@ -216,7 +223,9 @@ void update_mode_from_alpha1(GsAlpha reg, DrawMode& mode) { /*! * Convert merc shader to PC draw mode */ -DrawMode process_draw_mode(const MercShader& info, bool enable_alpha_test) { +DrawMode process_draw_mode(const MercShader& info, + bool enable_alpha_test, + bool enable_alpha_blend) { DrawMode mode; /* * (new 'static 'gs-test @@ -237,7 +246,7 @@ DrawMode process_draw_mode(const MercShader& info, bool enable_alpha_test) { mode.set_depth_test(GsTest::ZTest::GEQUAL); // check these - mode.disable_ab(); + mode.set_ab(enable_alpha_blend); mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_SRC_DST); mode.set_tcc(info.tex0.tcc()); mode.set_decal(info.tex0.tfx() == GsTex0::TextureFunction::DECAL); @@ -306,7 +315,10 @@ void handle_frag(const std::string& debug_name, const MercFragmentControl& frag_ctrl, const MercState& state, std::vector& effect_vertices, - MercMemory& memory) { + MercMemory& memory, + bool can_be_modified, + int base_lump4, + int frag_idx) { (void)frag_ctrl; (void)debug_name; // lg::print("handling frag: {}\n", debug_name); @@ -330,6 +342,10 @@ void handle_frag(const std::string& debug_name, for (size_t i = 0; i < mat123_cnt; i++) { u32 current_vtx_idx = effect_vertices.size(); // idx in effect vertex list. auto& vtx = effect_vertices.emplace_back(); + vtx.can_be_modified = can_be_modified; + vtx.idx_in_combined_lump4 = lump_ptr / 3 + base_lump4; + vtx.frag = frag_idx; + vtx.flump4 = lump_ptr / 3; if (i < mat1_cnt) { vtx.kind = 1; // 1 matrix @@ -712,7 +728,7 @@ ConvertedMercEffect convert_merc_effect(const MercEffect& input_effect, result.effect_idx = effect_idx; if (input_effect.extra_info.shader) { result.has_envmap = true; - result.envmap_mode = process_draw_mode(*input_effect.extra_info.shader, false); + result.envmap_mode = process_draw_mode(*input_effect.extra_info.shader, false, false); result.envmap_mode.set_ab(true); u32 new_tex = remap_texture(input_effect.extra_info.shader->original_tex, map); ASSERT(result.envmap_mode.get_tcc_enable()); @@ -763,6 +779,12 @@ ConvertedMercEffect convert_merc_effect(const MercEffect& input_effect, result.envmap_mode = mode; result.envmap_mode.set_ab(true); } + + bool use_alpha_blend = false; + if (version == GameVersion::Jak2) { + use_alpha_blend = input_effect.texture_index == 4; // water + } + // full reset of state per effect. // we have no idea what the previous effect draw will be - it might be given to // mercneric. @@ -771,6 +793,8 @@ ConvertedMercEffect convert_merc_effect(const MercEffect& input_effect, MercMemory merc_memories[2]; // double buffered output int memory_buffer_toggle = 0; // which output we're in + int combined_lump4_addr = 0; + for (size_t fi = 0; fi < input_effect.frag_ctrl.size(); fi++) { const auto& frag = input_effect.frag_geo[fi]; const auto& frag_ctrl = input_effect.frag_ctrl[fi]; @@ -792,9 +816,20 @@ ConvertedMercEffect convert_merc_effect(const MercEffect& input_effect, // run the frag. // this will add vertices to the per-effect vertex lists and also update the merc memory // to point to these. + bool can_be_modified = false; + if (fi < input_effect.blend_ctrl.size()) { + can_be_modified = input_effect.blend_ctrl.at(fi).blend_vtx_count > 0; + } + + if (input_effect.effect_bits & kRippleEffectBit) { + can_be_modified = true; + } handle_frag(debug_name, ctrl_header, frag, frag_ctrl, merc_state, result.vertices, - merc_memories[memory_buffer_toggle]); + merc_memories[memory_buffer_toggle], can_be_modified, combined_lump4_addr, fi); + u32 vert_count = frag.lump4_unpacked.size() / 3; + combined_lump4_addr += vert_count; + result.verts_per_frag.push_back(vert_count); // we'll add draws after this draw, but wait to actually populate the index lists until // we've processed all the vertices. @@ -819,7 +854,8 @@ ConvertedMercEffect convert_merc_effect(const MercEffect& input_effect, for (size_t i = 0; i < frag.fp_header.shader_cnt; i++) { const auto& shader = frag.shaders.at(i); // update merc state from shader (will hold over to next fragment, if needed) - merc_state.merc_draw_mode.mode = process_draw_mode(shader, result.has_envmap); + merc_state.merc_draw_mode.mode = + process_draw_mode(shader, result.has_envmap, use_alpha_blend); if (!merc_state.merc_draw_mode.mode.get_tcc_enable()) { ASSERT(false); } @@ -956,6 +992,193 @@ tfrag3::MercVertex convert_vertex(const MercUnpackedVtx& vtx, float xyz_scale) { return out; } +struct VertexSourceInfo { + int combined_lump4; + int frag; + int flump4; +}; + +void create_modifiable_vertex_data( + const std::vector& vtx_mod_flag, + const std::vector& vtx_srcs, + tfrag3::MercModelGroup& out, + size_t first_out_vertex, + size_t first_out_model, + const std::vector>& all_effects) { + ASSERT(vtx_mod_flag.size() + first_out_vertex == out.vertices.size()); + + // we need to be able to modify some vertices at runtime. + // this can be detected vertex-by-vertex + // the plan is to find MercEffects that contain modifiable vertices, and provide an alternate way + // to draw them. In the case where no vertices should be modified, we can fall back to the normal + // merc drawing path. + + // In this modifiable draw path, there will be a list of "fixed draws", which draw vertices that + // cannot be modified. This set is known at build-time. + // The "mod draws" will draw the modifiable vertices. These use the normal index buffer, but + // index into a per-effect modifiable vertex buffer. + + // std::vector fixed_draws, mod_draws; + + // some stats + int num_tris = 0; // all triangles + int mod_tris = 0; // triangles in mod draws + + // loop over models added from this art-group + for (size_t mi = first_out_model; mi < out.models.size(); mi++) { + auto& model = out.models.at(mi); + // loop over "effects" within this model. the pc format merges all fragments in an effect + // together. + + for (size_t ei = 0; ei < model.effects.size(); ei++) { + auto& effect = model.effects[ei]; + + std::vector> inds_per_mod_draw; + + for (const auto& draw : effect.all_draws) { + num_tris += draw.num_triangles; + + // first check to see what's in this draw + bool found_mod = false; + bool found_fixed = false; + for (int i = 0; i < (int)draw.index_count; i++) { + u32 idx = out.indices.at(draw.first_index + i); + if (idx == UINT32_MAX) { + continue; + } + ASSERT(idx >= first_out_vertex); + if (vtx_mod_flag.at(idx - first_out_vertex)) { + found_mod = true; + } else { + found_fixed = true; + } + } + + if (!found_fixed && !found_mod) { + // nothing found at all, bad + ASSERT_NOT_REACHED(); + } else if (found_fixed && !found_mod) { + // only fixed. can just copy the fixed draw + effect.mod.fix_draw.push_back(draw); + } else if (found_mod && !found_fixed) { + // only mod + effect.mod.mod_draw.push_back(draw); + auto& inds_out = inds_per_mod_draw.emplace_back(); + for (u32 i = 0; i < draw.index_count; i++) { + inds_out.push_back(out.indices.at(draw.first_index + i)); + } + mod_tris += draw.num_triangles; + } else { + // it's a mix... + std::vector> strips; + strips.emplace_back(); + for (u32 i = 0; i < draw.index_count; i++) { + u32 val = out.indices.at(draw.first_index + i); + if (val == UINT32_MAX) { + if (!strips.back().empty()) { + strips.emplace_back(); + } + } else { + strips.back().push_back(val); + } + } + + tfrag3::MercDraw mod = draw; + tfrag3::MercDraw fix = draw; + std::vector mod_ind, fix_ind; + for (auto& strip : strips) { + bool strip_has_mod = false; + for (auto ind : strip) { + if (vtx_mod_flag.at(ind - first_out_vertex)) { + strip_has_mod = true; + break; + } + } + if (strip_has_mod) { + mod_ind.insert(mod_ind.end(), strip.begin(), strip.end()); + mod_ind.push_back(UINT32_MAX); + } else { + fix_ind.insert(fix_ind.end(), strip.begin(), strip.end()); + fix_ind.push_back(UINT32_MAX); + } + } + + mod.index_count = mod_ind.size(); + inds_per_mod_draw.push_back(mod_ind); + fix.first_index = out.indices.size(); + fix.index_count = fix_ind.size(); + out.indices.insert(out.indices.end(), fix_ind.begin(), fix_ind.end()); + + effect.mod.mod_draw.push_back(mod); + effect.mod.fix_draw.push_back(fix); + } + } // for draw + + // if there are no modifiable draws, we can't possible modify anything, so not worth + // storing the fixed draws + if (effect.mod.mod_draw.empty()) { + effect.mod.fix_draw.clear(); + } else { + effect.has_mod_draw = true; + // need to set up the vertex buffer for the modifiable draws + // map of original vertex indices to mod buffer index + std::unordered_map vtx_to_mod_vtx; + for (size_t mdi = 0; mdi < effect.mod.mod_draw.size(); mdi++) { + auto& draw = effect.mod.mod_draw[mdi]; + auto& orig_inds = inds_per_mod_draw.at(mdi); + u32 new_first_index = out.indices.size(); + for (auto vidx : orig_inds) { + if (vidx == UINT32_MAX) { + out.indices.push_back(UINT32_MAX); + continue; // strip restart + } + const auto& existing = vtx_to_mod_vtx.find(vidx); + if (existing == vtx_to_mod_vtx.end()) { + // add vertex to mod buffer + auto idx = effect.mod.vertices.size(); + vtx_to_mod_vtx[vidx] = idx; + effect.mod.vertices.push_back(out.vertices.at(vidx)); + auto src = vtx_srcs.at(vidx - first_out_vertex); + ASSERT(src.combined_lump4 < UINT16_MAX); + effect.mod.vertex_lump4_addr.push_back(src.combined_lump4); + u32 frag_idx = src.frag; + if (frag_idx >= effect.mod.fragment_mask.size()) { + effect.mod.fragment_mask.resize(frag_idx + 1); + } + effect.mod.fragment_mask[frag_idx] = true; + out.indices.push_back(idx); + } else { + out.indices.push_back(existing->second); + } + } + draw.first_index = new_first_index; + } + + // splice out masked fragments, the renderer won't index them + const auto& frag_counts = all_effects.at(mi - first_out_model).at(ei).verts_per_frag; + std::unordered_map old_to_new; + u32 old_idx = 0; + u32 new_idx = 0; + for (size_t fi = 0; fi < effect.mod.fragment_mask.size(); fi++) { + if (effect.mod.fragment_mask[fi]) { + for (u32 vi = 0; vi < frag_counts.at(fi); vi++) { + old_to_new[old_idx] = new_idx; + old_idx++; + new_idx++; + } + } else { + old_idx += frag_counts.at(fi); + } + } + effect.mod.expect_vidx_end = new_idx; + for (auto& v : effect.mod.vertex_lump4_addr) { + v = old_to_new.at(v); + } + } + } + } +} + /*! * Top-level merc extraction */ @@ -990,9 +1213,12 @@ void extract_merc(const ObjectFileData& ag_data, } } + size_t first_out_vertex = out.merc_data.vertices.size(); // convert to PC format // first pass, before merging indices u32 first_model = out.merc_data.models.size(); + std::vector vertex_modify_flags; + std::vector vertex_srcs; std::vector>>> indices_temp; // ctrl, effect, draw, vtx for (size_t ci = 0; ci < ctrls.size(); ci++) { indices_temp.emplace_back(); @@ -1002,6 +1228,8 @@ void extract_merc(const ObjectFileData& ag_data, pc_ctrl.name = ctrl.name; pc_ctrl.max_draws = 0; pc_ctrl.max_bones = 0; + pc_ctrl.st_vif_add = ctrl.header.st_vif_add; + pc_ctrl.xyz_scale = ctrl.header.xyz_scale; for (size_t ei = 0; ei < ctrls[ci].effects.size(); ei++) { indices_temp[ci].emplace_back(); @@ -1013,6 +1241,8 @@ void extract_merc(const ObjectFileData& ag_data, u32 first_vertex = out.merc_data.vertices.size(); for (auto& vtx : effect.vertices) { auto cvtx = convert_vertex(vtx, ctrl.header.xyz_scale); + vertex_modify_flags.push_back(vtx.can_be_modified); + vertex_srcs.push_back({vtx.idx_in_combined_lump4, vtx.frag, vtx.flump4}); out.merc_data.vertices.push_back(cvtx); for (int i = 0; i < 3; i++) { pc_ctrl.max_bones = std::max(pc_ctrl.max_bones, (u32)cvtx.mats[i]); @@ -1023,22 +1253,21 @@ void extract_merc(const ObjectFileData& ag_data, std::map draw_mode_dedup; for (auto& draw : effect.draws) { - pc_ctrl.max_draws++; indices_temp[ci][ei].emplace_back(); // find draw to add to, or create a new one const auto& existing = draw_mode_dedup.find(draw.state.merc_draw_mode.as_u64()); tfrag3::MercDraw* pc_draw = nullptr; u64 pc_draw_idx = -1; if (existing == draw_mode_dedup.end()) { - pc_draw_idx = pc_effect.draws.size(); + pc_draw_idx = pc_effect.all_draws.size(); draw_mode_dedup[draw.state.merc_draw_mode.as_u64()] = pc_draw_idx; - pc_draw = &pc_effect.draws.emplace_back(); + pc_draw = &pc_effect.all_draws.emplace_back(); pc_draw->mode = draw.state.merc_draw_mode.mode; pc_draw->tree_tex_id = find_or_add_texture_to_level( out, tex_db, ctrl.name, draw.state.merc_draw_mode.pc_combo_tex_id); } else { pc_draw_idx = existing->second; - pc_draw = &pc_effect.draws.at(pc_draw_idx); + pc_draw = &pc_effect.all_draws.at(pc_draw_idx); } for (auto idx : draw.indices) { @@ -1057,8 +1286,8 @@ void extract_merc(const ObjectFileData& ag_data, auto& pc_ctrl = out.merc_data.models.at(ci + first_model); for (size_t ei = 0; ei < ctrls[ci].effects.size(); ei++) { auto& pc_effect = pc_ctrl.effects.at(ei); - for (size_t di = 0; di < pc_effect.draws.size(); di++) { - auto& pc_draw = pc_effect.draws.at(di); + for (size_t di = 0; di < pc_effect.all_draws.size(); di++) { + auto& pc_draw = pc_effect.all_draws.at(di); auto& inds = indices_temp[ci][ei][di]; pc_draw.num_triangles = clean_up_vertex_indices(inds); pc_draw.first_index = out.merc_data.indices.size(); @@ -1067,5 +1296,20 @@ void extract_merc(const ObjectFileData& ag_data, } } } + + create_modifiable_vertex_data(vertex_modify_flags, vertex_srcs, out.merc_data, first_out_vertex, + first_model, all_effects); + + // compute max draws + for (u32 mi = first_model; mi < out.merc_data.models.size(); mi++) { + auto& model = out.merc_data.models[mi]; + model.max_draws = 0; + for (auto& e : model.effects) { + model.max_draws += e.all_draws.size(); + if (e.has_mod_draw) { + model.max_draws += e.mod.mod_draw.size() + e.mod.fix_draw.size(); + } + } + } } } // namespace decompiler diff --git a/decompiler/level_extractor/fr3_to_gltf.cpp b/decompiler/level_extractor/fr3_to_gltf.cpp index 21f0f459a6..ffeb7cc0eb 100644 --- a/decompiler/level_extractor/fr3_to_gltf.cpp +++ b/decompiler/level_extractor/fr3_to_gltf.cpp @@ -64,7 +64,7 @@ void unstrip_merc_draws(const std::vector& stripped_indices, for (auto& effect : model.effects) { auto& effect_dts = model_dts.emplace_back(); auto& effect_dtc = model_dtc.emplace_back(); - for (auto& draw : effect.draws) { + for (auto& draw : effect.all_draws) { effect_dts.push_back(unstripped.size()); for (size_t i = 2; i < draw.index_count; i++) { @@ -690,8 +690,8 @@ void add_merc(const tfrag3::Level& level, for (size_t effect_idx = 0; effect_idx < mmodel.effects.size(); effect_idx++) { const auto& effect = mmodel.effects[effect_idx]; - for (size_t draw_idx = 0; draw_idx < effect.draws.size(); draw_idx++) { - const auto& draw = effect.draws[draw_idx]; + for (size_t draw_idx = 0; draw_idx < effect.all_draws.size(); draw_idx++) { + const auto& draw = effect.all_draws[draw_idx]; auto& prim = mesh.primitives.emplace_back(); prim.material = add_material_for_tex(level, model, draw.tree_tex_id, tex_image_map, draw.mode); diff --git a/game/graphics/opengl_renderer/foreground/Merc2.cpp b/game/graphics/opengl_renderer/foreground/Merc2.cpp index bd5801140d..87e8b1b61b 100644 --- a/game/graphics/opengl_renderer/foreground/Merc2.cpp +++ b/game/graphics/opengl_renderer/foreground/Merc2.cpp @@ -1,20 +1,69 @@ #include "Merc2.h" +#include "common/global_profiler/GlobalProfiler.h" + #include "game/graphics/opengl_renderer/background/background_common.h" #include "third-party/imgui/imgui.h" +/* Merc 2 renderer: + The merc2 renderer is the main "foreground" renderer, which draws characters, collectables, + and even some water. + + The PC format renderer does the usual tricks of buffering stuff head of time as much as possible. + The main trick here is to buffer up draws and upload "bones" (skinning matrix) for many draws all + at once. + + The other tricky part is "mod vertices", which may be modified by the game. + We know ahead of time which vertices could be modified, and have a way to upload only those + vertices. + + Each "merc model" corresponds to a merc-ctrl in game. There's one merc-ctrl per LOD of an + art-group. So generally, this will be something like "jak" or "orb" or "some enemy". + + Each model is made up of "effect"s. There are a number of per-effect settings, like environment + mapping. Generally, the purpose of an "effect" is to divide up a model into parts that should be + rendered with a different configuration. + + Within each model, there are fragments. These correspond to how much data can be uploaded to VU1 + memory. For the most part, fragments are not considered by the PC renderer. The only exception is + updating vertices - we must read the data from the game, which is stored in fragments. + + Per level, there is an FR3 file loaded by the loader. Each merc renderer can access multiple + levels. +*/ + +/*! + * Remaining ideas for optimization: + * - port blerc to C++, do it in the rendering thread and avoid the lock. + * - combine envmap draws per effect (might require some funky indexing stuff, or multidraw) + * - smaller vertex formats for mod-vertex + * - AVX version of vertex conversion math + * - eliminate the "copy" step of vertex modification + * - batch uploading the vertex modification data + */ + +std::mutex g_merc_data_mutex; + Merc2::Merc2(const std::string& name, int my_id) : BucketRenderer(name, my_id) { + // Set up main vertex array. This will point to the data stored in the .FR3 level file, and will + // be uploaded to the GPU by the Loader. glGenVertexArrays(1, &m_vao); glBindVertexArray(m_vao); + // Bone buffer to store skinning matrices for multiple draws glGenBuffers(1, &m_bones_buffer); glBindBuffer(GL_UNIFORM_BUFFER, m_bones_buffer); + + // zero initialize the bone buffer. std::vector temp(MAX_SHADER_BONE_VECTORS * sizeof(math::Vector4f)); glBufferData(GL_UNIFORM_BUFFER, MAX_SHADER_BONE_VECTORS * sizeof(math::Vector4f), temp.data(), GL_DYNAMIC_DRAW); glBindBuffer(GL_UNIFORM_BUFFER, 0); + // annoyingly, glBindBufferRange can have alignment restrictions that vary per platform. + // the GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT gives us the minimum alignment for views into the bone + // buffer. The bone buffer stores things per-16-byte "quadword". GLint val; glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &val); if (val <= 16) { @@ -28,82 +77,432 @@ Merc2::Merc2(const std::string& name, int my_id) : BucketRenderer(name, my_id) { } } + // initialize draw buffers, these will store lists of draws to flush. for (int i = 0; i < MAX_LEVELS; i++) { auto& draws = m_level_draw_buckets.emplace_back(); draws.draws.resize(MAX_DRAWS_PER_LEVEL); draws.envmap_draws.resize(MAX_ENVMAP_DRAWS_PER_LEVEL); } + + m_mod_vtx_temp.resize(MAX_MOD_VTX); + m_mod_vtx_unpack_temp.resize(MAX_MOD_VTX * 2); + + for (auto& x : m_effect_debug_mask) { + x = true; + } } +Merc2::~Merc2() { + for (auto& x : m_mod_vtx_buffers) { + glDeleteBuffers(1, &x.vertex); + glDeleteVertexArrays(1, &x.vao); + } + + glDeleteBuffers(1, &m_bones_buffer); + glDeleteVertexArrays(1, &m_vao); +} + +// We can run into a problem where adding a PC model would overflow the +// preallocated draw/bone buffers. +// So we break this part into two functions: +// - init_pc_model, which doesn't allocate bones/draws + /*! - * Handle the merc renderer switching to a different model. + * Setup draws for a model, given the DMA data generated by the GOAL code. */ -void Merc2::init_pc_model(const DmaTransfer& setup, SharedRenderState* render_state) { +void Merc2::handle_pc_model(const DmaTransfer& setup, + SharedRenderState* render_state, + ScopedProfilerNode& proff) { + auto p = scoped_prof("init-pc"); + + // the format of the data is: // ;; name (128 char, 8 qw) // ;; lights (7 qw x 1) // ;; matrix slot string (128 char, 8 qw) // ;; matrices (7 qw x N) // ;; flags (num-effects, effect-alpha-ignore, effect-disable) // ;; fades (u32 x N), padding to qw aligned + // ;; pointers (u32 x N), padding - // Part 1: name + // Get the name const u8* input_data = setup.data; + ASSERT(strlen((const char*)input_data) < 127); char name[128]; strcpy(name, (const char*)setup.data); - m_current_model = render_state->loader->get_merc_model(name); input_data += 128; - // Part 2: lights - memcpy(&m_current_lights, input_data, sizeof(VuLights)); + // Look up the model by name in the loader. + // This will return a reference to this model's data, plus a reference to the level's data + // for stuff shared between models of the same level + auto model_ref = render_state->loader->get_merc_model(name); + if (!model_ref) { + // it can fail, if the game is faster than the loader. In this case, we just don't draw. + m_stats.num_missing_models++; + return; + } + + // next, we need to check if we have enough room to draw this effect. + const LevelData* lev = model_ref->level; + const tfrag3::MercModel* model = model_ref->model; + + // each model uses only 1 light. + if (m_next_free_light >= MAX_LIGHTS) { + fmt::print("MERC2 out of lights, consider increasing MAX_LIGHTS\n"); + flush_draw_buckets(render_state, proff); + } + + // models use many bones. First check if we need to flush: + int bone_count = model->max_bones + 1; + if (m_next_free_bone_vector + m_opengl_buffer_alignment + bone_count * 8 > + MAX_SHADER_BONE_VECTORS) { + fmt::print("MERC2 out of bones, consider increasing MAX_SHADER_BONE_VECTORS\n"); + flush_draw_buckets(render_state, proff); + } + + // also sanity check that we have enough to draw the model + if (m_opengl_buffer_alignment + bone_count * 8 > MAX_SHADER_BONE_VECTORS) { + fmt::print( + "MERC2 doesn't have enough bones to draw a model, increase MAX_SHADER_BONE_VECTORS\n"); + ASSERT_NOT_REACHED(); + } + + // next, we need to find a bucket that holds draws for this level (will have the right buffers + // bound for drawing) + LevelDrawBucket* lev_bucket = nullptr; + for (u32 i = 0; i < m_next_free_level_bucket; i++) { + if (m_level_draw_buckets[i].level == lev) { + lev_bucket = &m_level_draw_buckets[i]; + break; + } + } + + if (!lev_bucket) { + // no existing bucket, allocate a new one. + if (m_next_free_level_bucket >= m_level_draw_buckets.size()) { + // out of room, flush + // fmt::print("MERC2 out of levels, consider increasing MAX_LEVELS\n"); + flush_draw_buckets(render_state, proff); + } + // alloc a new one + lev_bucket = &m_level_draw_buckets[m_next_free_level_bucket++]; + lev_bucket->reset(); + lev_bucket->level = lev; + } + + // next check draws: + if (lev_bucket->next_free_draw + model->max_draws >= lev_bucket->draws.size()) { + // out of room, flush + fmt::print("MERC2 out of draws, consider increasing MAX_DRAWS_PER_LEVEL\n"); + flush_draw_buckets(render_state, proff); + if (model->max_draws >= lev_bucket->draws.size()) { + ASSERT_NOT_REACHED_MSG("MERC2 draw buffer not big enough"); + } + } + + // same for envmap draws + if (lev_bucket->next_free_envmap_draw + model->max_draws >= lev_bucket->envmap_draws.size()) { + // out of room, flush + fmt::print("MERC2 out of envmap draws, consider increasing MAX_ENVMAP_DRAWS_PER_LEVEL\n"); + flush_draw_buckets(render_state, proff); + if (model->max_draws >= lev_bucket->envmap_draws.size()) { + ASSERT_NOT_REACHED_MSG("MERC2 envmap draw buffer not big enough"); + } + } + + // Next part of input data is the lights + VuLights current_lights; + memcpy(¤t_lights, input_data, sizeof(VuLights)); input_data += sizeof(VuLights); - // Part 3: matrix slot string + // Next part is the matrix slot string. The game sends us a bunch of bone matrices, + // but they may not be in order, or include all bones. The matrix slot string tells + // us which bones go where. (the game doesn't go in order because it follows the merc format) + ShaderMercMat skel_matrix_buffer[MAX_SKEL_BONES]; auto* matrix_array = (const u32*)(input_data + 128); int i; for (i = 0; i < 128; i++) { - if (input_data[i] == 0xff) { + if (input_data[i] == 0xff) { // indicates end of string. break; } + // read goal addr of matrix (matrix data isn't known at merc dma time, bones runs after) u32 addr; memcpy(&addr, &matrix_array[i * 4], 4); const u8* real_addr = setup.data - setup.data_offset + addr; - memcpy(&m_skel_matrix_buffer[input_data[i]], real_addr, sizeof(MercMat)); + ASSERT(input_data[i] < MAX_SKEL_BONES); + // get the matrix data + memcpy(&skel_matrix_buffer[input_data[i]], real_addr, sizeof(MercMat)); } input_data += 128 + 16 * i; - // Part 4: flags + // Next part is some flags auto* flags = (const u32*)input_data; - int num_effects = flags[0]; - m_current_ignore_alpha_bits = flags[1]; - m_current_effect_enable_bits = flags[2]; + int num_effects = flags[0]; // mostly just a sanity check + ASSERT(num_effects < kMaxEffect); + u32 current_ignore_alpha_bits = flags[1]; // shader settings + u32 current_effect_enable_bits = flags[2]; // mask for game to disable an effect + bool model_uses_mod = flags[3]; // if we should update vertices from game. input_data += 16; - // Part 5: fades + // Next is "fade data", indicating the color/intensity of envmap effect + u8 fade_buffer[4 * kMaxEffect]; for (int ei = 0; ei < num_effects; ei++) { for (int j = 0; j < 4; j++) { - m_fade_buffer[ei * 4 + j] = input_data[ei * 4 + j]; + fade_buffer[ei * 4 + j] = input_data[ei * 4 + j]; + } + } + input_data += (((num_effects * 4) + 15) / 16) * 16; + + // Next is pointers to merc data, needed so we can update vertices + + // will hold opengl buffers for the updated vertices + ModBuffers mod_opengl_buffers[kMaxEffect]; + if (model_uses_mod) { // only if we've enabled, this path is slow. + auto p = scoped_prof("update-verts"); + + // loop over effects. Mod vertices are done per effect (possibly a bad idea?) + for (int ei = 0; ei < num_effects; ei++) { + const auto& effect = model_ref->model->effects[ei]; + // some effects might have no mod draw info, and no modifiable vertices + if (effect.mod.mod_draw.empty()) { + continue; + } + + prof().begin_event("start1"); + // grab opengl buffer + auto opengl_buffers = alloc_mod_vtx_buffer(model_ref->level); + mod_opengl_buffers[ei] = opengl_buffers; + + // check that we have enough room for the finished thing. + if (effect.mod.vertices.size() > MAX_MOD_VTX) { + fmt::print("More mod vertices than MAX_MOD_VTX. {} > {}\n", effect.mod.vertices.size(), + MAX_MOD_VTX); + ASSERT_NOT_REACHED(); + } + + // check that we have enough room for unpack + if (effect.mod.expect_vidx_end > MAX_MOD_VTX) { + fmt::print("More mod vertices (temp) than MAX_MOD_VTX. {} > {}\n", + effect.mod.expect_vidx_end, MAX_MOD_VTX); + ASSERT_NOT_REACHED(); + } + + // start with the "correct" vertices from the model data: + memcpy(m_mod_vtx_temp.data(), effect.mod.vertices.data(), + sizeof(tfrag3::MercVertex) * effect.mod.vertices.size()); + + // get pointers to the fragment and fragment control data + u32 goal_addr; + memcpy(&goal_addr, input_data + 4 * ei, 4); + const u8* ee0 = setup.data - setup.data_offset; + const u8* merc_effect = ee0 + goal_addr; + u16 frag_cnt; + memcpy(&frag_cnt, merc_effect + 18, 2); + ASSERT(frag_cnt >= effect.mod.fragment_mask.size()); + u32 frag_goal; + memcpy(&frag_goal, merc_effect, 4); + u32 frag_ctrl_goal; + memcpy(&frag_ctrl_goal, merc_effect + 4, 4); + const u8* frag = ee0 + frag_goal; + const u8* frag_ctrl = ee0 + frag_ctrl_goal; + + // loop over frags + u32 vidx = 0; + // u32 st_vif_add = model->st_vif_add; + float xyz_scale = model->xyz_scale; + prof().end_event(); + { + // we're going to look at data that the game may be modifying. + // in the original game, they didn't have any lock, but I think that the + // scratchpad access from the EE would effectively block the VIF1 DMA, so you'd + // hopefully never get a partially updated model (which causes obvious holes). + // this lock is not ideal, and can block the rendering thread while blerc_execute runs, + // which can take up to 2ms on really blerc-heavy scenes + std::unique_lock lk(g_merc_data_mutex); + int frags_done = 0; + auto p = scoped_prof("vert-math"); + + // loop over fragments + for (u32 fi = 0; fi < effect.mod.fragment_mask.size(); fi++) { + frags_done++; + u8 mat_xfer_count = frag_ctrl[3]; + + // we create a mask of fragments to skip because they have no vertices. + // the indexing data assumes that we skip the other fragments. + if (effect.mod.fragment_mask[fi]) { + // read fragment metadata + u8 unsigned_four_count = frag_ctrl[0]; + u8 lump_four_count = frag_ctrl[1]; + u32 mm_qwc_off = frag[10]; + float float_offsets[3]; + memcpy(float_offsets, &frag[mm_qwc_off * 16], 12); + u32 my_u4_count = ((unsigned_four_count + 3) / 4) * 16; + u32 my_l4_count = my_u4_count + ((lump_four_count + 3) / 4) * 16; + + // loop over vertices in the fragment and unpack + for (u32 w = my_u4_count / 4; w < (my_l4_count / 4) - 2; w += 3) { + // just want positions for now. + u32 q0w = 0x4b010000 + frag[w * 4 + (0 * 4) + 3]; + u32 q1w = 0x4b010000 + frag[w * 4 + (1 * 4) + 3]; + u32 q2w = 0x4b010000 + frag[w * 4 + (2 * 4) + 3]; + + // and maybe normals + u32 q0z = 0x47800000 + frag[w * 4 + (0 * 4) + 2]; + u32 q1z = 0x47800000 + frag[w * 4 + (1 * 4) + 2]; + u32 q2z = 0x47800000 + frag[w * 4 + (2 * 4) + 2]; + + auto* pos_array = m_mod_vtx_unpack_temp[vidx].pos; + memcpy(&pos_array[0], &q0w, 4); + memcpy(&pos_array[1], &q1w, 4); + memcpy(&pos_array[2], &q2w, 4); + pos_array[0] += float_offsets[0]; + pos_array[1] += float_offsets[1]; + pos_array[2] += float_offsets[2]; + pos_array[0] *= xyz_scale; + pos_array[1] *= xyz_scale; + pos_array[2] *= xyz_scale; + + auto* nrm_array = m_mod_vtx_unpack_temp[vidx].nrm; + memcpy(&nrm_array[0], &q0z, 4); + memcpy(&nrm_array[1], &q1z, 4); + memcpy(&nrm_array[2], &q2z, 4); + nrm_array[0] += -65537; + nrm_array[1] += -65537; + nrm_array[2] += -65537; + vidx++; + } + } + + // next control + frag_ctrl += 4 + 2 * mat_xfer_count; + + // next frag + u32 mm_qwc_count = frag[11]; + frag += mm_qwc_count * 16; + } + + // sanity check + if (effect.mod.expect_vidx_end != vidx) { + fmt::print("---------- BAD {}/{}\n", effect.mod.expect_vidx_end, vidx); + ASSERT(false); + } + } + + { + auto pp = scoped_prof("copy"); + // now copy the data in merc original vertex order to the output. + for (u32 vi = 0; vi < effect.mod.vertices.size(); vi++) { + u32 addr = effect.mod.vertex_lump4_addr[vi]; + if (addr < vidx) { + memcpy(&m_mod_vtx_temp[vi], &m_mod_vtx_unpack_temp[addr], 32); + } + } + } + + // and upload to GPU + m_stats.num_uploads++; + m_stats.num_upload_bytes += effect.mod.vertices.size() * sizeof(tfrag3::MercVertex); + { + auto pp = scoped_prof("update-verts-upload"); + glBindBuffer(GL_ARRAY_BUFFER, opengl_buffers.vertex); + glBufferData(GL_ARRAY_BUFFER, effect.mod.vertices.size() * sizeof(tfrag3::MercVertex), + m_mod_vtx_temp.data(), GL_DYNAMIC_DRAW); + } } } - if (m_current_model) { - m_stats.num_models++; - for (const auto& effect : m_current_model->model->effects) { - bool envmap = effect.has_envmap; - m_stats.num_effects++; - m_stats.num_predicted_draws += effect.draws.size(); + // stats + m_stats.num_models++; + for (const auto& effect : model_ref->model->effects) { + bool envmap = effect.has_envmap; + m_stats.num_effects++; + m_stats.num_predicted_draws += effect.all_draws.size(); + if (envmap) { + m_stats.num_envmap_effects++; + m_stats.num_predicted_draws += effect.all_draws.size(); + } + for (const auto& draw : effect.all_draws) { + m_stats.num_predicted_tris += draw.num_triangles; if (envmap) { - m_stats.num_envmap_effects++; - m_stats.num_predicted_draws += effect.draws.size(); - } - for (const auto& draw : effect.draws) { m_stats.num_predicted_tris += draw.num_triangles; - if (envmap) { - m_stats.num_predicted_tris += draw.num_triangles; - } } } - } else { - m_stats.num_missing_models++; + } + + if (m_debug_mode) { + auto& d = m_debug.model_list.emplace_back(); + d.name = model->name; + d.level = model_ref->level->level->level_name; + for (auto& e : model->effects) { + auto& de = d.effects.emplace_back(); + de.envmap = e.has_envmap; + de.envmap_mode = e.envmap_mode; + for (auto& draw : e.all_draws) { + auto& dd = de.draws.emplace_back(); + dd.mode = draw.mode; + dd.num_tris = draw.num_triangles; + } + } + } + + // allocate bones in shared bone buffer to be sent to GPU at flush-time + u32 first_bone = alloc_bones(bone_count, skel_matrix_buffer); + + // allocate lights + u32 lights = alloc_lights(current_lights); + + // loop over effects, creating draws for each + for (size_t ei = 0; ei < model->effects.size(); ei++) { + // game has disabled it? + if (!(current_effect_enable_bits & (1 << ei))) { + continue; + } + + // imgui menu disabled it? + if (!m_effect_debug_mask[ei]) { + continue; + } + + u8 ignore_alpha = (current_ignore_alpha_bits & (1 << ei)); + auto& effect = model->effects[ei]; + + bool should_envmap = effect.has_envmap; + bool should_mod = model_uses_mod && effect.has_mod_draw; + + if (should_mod) { + // draw as two parts, fixed and mod + + // do fixed draws: + for (auto& fdraw : effect.mod.fix_draw) { + alloc_normal_draw(fdraw, ignore_alpha, lev_bucket, first_bone, lights); + if (should_envmap) { + try_alloc_envmap_draw(fdraw, effect.envmap_mode, effect.envmap_texture, lev_bucket, + fade_buffer + 4 * ei, first_bone, lights); + } + } + + // do mod draws + for (auto& mdraw : effect.mod.mod_draw) { + auto n = alloc_normal_draw(mdraw, ignore_alpha, lev_bucket, first_bone, lights); + // modify the draw, set the mod flag and point it to the opengl buffer + n->flags |= MOD_VTX; + n->mod_vtx_buffer = mod_opengl_buffers[ei]; + if (should_envmap) { + auto e = try_alloc_envmap_draw(mdraw, effect.envmap_mode, effect.envmap_texture, + lev_bucket, fade_buffer + 4 * ei, first_bone, lights); + e->flags |= MOD_VTX; + e->mod_vtx_buffer = mod_opengl_buffers[ei]; + } + } + } else { + // no mod, just do all_draws + for (auto& draw : effect.all_draws) { + if (should_envmap) { + try_alloc_envmap_draw(draw, effect.envmap_mode, effect.envmap_texture, lev_bucket, + fade_buffer + 4 * ei, first_bone, lights); + } + alloc_normal_draw(draw, ignore_alpha, lev_bucket, first_bone, lights); + } + } } } @@ -118,6 +517,30 @@ void Merc2::draw_debug_window() { ImGui::Text("EEffects : %d", m_stats.num_envmap_effects); ImGui::Text("ETris : %d", m_stats.num_envmap_tris); + + ImGui::Text("Uploads : %d", m_stats.num_uploads); + ImGui::Text("Upload kB: %d", m_stats.num_upload_bytes / 1024); + + ImGui::Checkbox("Debug", &m_debug_mode); + + if (m_debug_mode) { + for (int i = 0; i < kMaxEffect; i++) { + ImGui::Checkbox(fmt::format("e{:02d}", i).c_str(), &m_effect_debug_mask[i]); + } + + for (const auto& model : m_debug.model_list) { + if (ImGui::TreeNode(model.name.c_str())) { + ImGui::Text("Level: %s\n", model.level.c_str()); + for (const auto& e : model.effects) { + for (const auto& d : e.draws) { + ImGui::Text("%s", d.mode.to_string().c_str()); + } + ImGui::Separator(); + } + ImGui::TreePop(); + } + } + } } void Merc2::init_shaders(ShaderLibrary& shaders) { @@ -175,6 +598,9 @@ void Merc2::switch_to_emerc(SharedRenderState* render_state) { */ void Merc2::render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) { m_stats = {}; + if (m_debug_mode) { + m_debug = {}; + } // skip if disabled if (!m_enabled) { @@ -183,16 +609,19 @@ void Merc2::render(DmaFollower& dma, SharedRenderState* render_state, ScopedProf } return; } - m_current_model = std::nullopt; switch_to_merc2(render_state); - // iterate through the dma chain, filling buckets - handle_all_dma(dma, render_state, prof); + { + auto pp = scoped_prof("handle-all-dma"); + // iterate through the dma chain, filling buckets + handle_all_dma(dma, render_state, prof); + } - // flush model data to buckets - flush_pending_model(render_state, prof); - // flush buckets to draws - flush_draw_buckets(render_state, prof); + { + auto pp = scoped_prof("flush-buckets"); + // flush buckets to draws + flush_draw_buckets(render_state, prof); + } } u32 Merc2::alloc_lights(const VuLights& lights) { @@ -367,8 +796,8 @@ void Merc2::handle_merc_chain(DmaFollower& dma, } while (init.vifcode1().kind == VifCode::Kind::PC_PORT) { - flush_pending_model(render_state, prof); - init_pc_model(init, render_state); + // flush_pending_model(render_state, prof); + handle_pc_model(init, render_state, prof); for (int i = 0; i < skip_count; i++) { auto link = dma.read_and_advance(); ASSERT(link.vifcode0().kind == VifCode::Kind::NOP); @@ -393,7 +822,7 @@ void Merc2::handle_merc_chain(DmaFollower& dma, * Queue up some bones to be included in the bone buffer. * Returns the index of the first bone vector. */ -u32 Merc2::alloc_bones(int count) { +u32 Merc2::alloc_bones(int count, ShaderMercMat* data) { u32 first_bone_vector = m_next_free_bone_vector; ASSERT(count * 8 + first_bone_vector <= MAX_SHADER_BONE_VECTORS); @@ -402,7 +831,7 @@ u32 Merc2::alloc_bones(int count) { // iterate over each bone we need for (int i = 0; i < count; i++) { - auto& skel_mat = m_skel_matrix_buffer[i]; + auto& skel_mat = data[i]; auto* shader_mat = &m_shader_bone_vector_buffer[m_next_free_bone_vector]; int bv = 0; @@ -426,198 +855,147 @@ u32 Merc2::alloc_bones(int count) { ASSERT(first_bone_vector + count * 8 <= m_next_free_bone_vector); return first_bone_vector; } -/*! - * Flush a model to draw buckets - */ -void Merc2::flush_pending_model(SharedRenderState* render_state, ScopedProfilerNode& prof) { - if (!m_current_model) { - return; + +Merc2::ModBuffers Merc2::alloc_mod_vtx_buffer(const LevelData* lev) { + if (m_next_mod_vtx_buffer >= m_mod_vtx_buffers.size()) { + GLuint b; + glGenBuffers(1, &b); + GLuint vao; + glGenVertexArrays(1, &vao); + glBindVertexArray(vao); + glBindBuffer(GL_ARRAY_BUFFER, b); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, lev->merc_indices); + setup_merc_vao(); + m_mod_vtx_buffers.push_back({vao, b}); } + return m_mod_vtx_buffers[m_next_mod_vtx_buffer++]; +} - const LevelData* lev = m_current_model->level; - const tfrag3::MercModel* model = m_current_model->model; - - int bone_count = model->max_bones + 1; - - if (m_next_free_light >= MAX_LIGHTS) { - fmt::print("MERC2 out of lights, consider increasing MAX_LIGHTS\n"); - flush_draw_buckets(render_state, prof); - } - - if (m_next_free_bone_vector + m_opengl_buffer_alignment + bone_count * 8 > - MAX_SHADER_BONE_VECTORS) { - fmt::print("MERC2 out of bones, consider increasing MAX_SHADER_BONE_VECTORS\n"); - flush_draw_buckets(render_state, prof); - } - - // find a level bucket - LevelDrawBucket* lev_bucket = nullptr; - for (u32 i = 0; i < m_next_free_level_bucket; i++) { - if (m_level_draw_buckets[i].level == lev) { - lev_bucket = &m_level_draw_buckets[i]; +Merc2::Draw* Merc2::try_alloc_envmap_draw(const tfrag3::MercDraw& mdraw, + const DrawMode& envmap_mode, + u32 envmap_texture, + LevelDrawBucket* lev_bucket, + const u8* fade, + u32 first_bone, + u32 lights) { + bool nonzero_fade = false; + for (int i = 0; i < 4; i++) { + if (fade[i]) { + nonzero_fade = true; break; } } - - if (!lev_bucket) { - // no existing bucket - if (m_next_free_level_bucket >= m_level_draw_buckets.size()) { - // out of room, flush - // fmt::print("MERC2 out of levels, consider increasing MAX_LEVELS\n"); - flush_draw_buckets(render_state, prof); - // and retry the whole thing. - flush_pending_model(render_state, prof); - return; - } - // alloc a new one - lev_bucket = &m_level_draw_buckets[m_next_free_level_bucket++]; - lev_bucket->reset(); - lev_bucket->level = lev; + if (!nonzero_fade) { + return nullptr; } - if (lev_bucket->next_free_draw + model->max_draws >= lev_bucket->draws.size()) { - // out of room, flush - fmt::print("MERC2 out of draws, consider increasing MAX_DRAWS_PER_LEVEL\n"); - flush_draw_buckets(render_state, prof); - // and retry the whole thing. - flush_pending_model(render_state, prof); - return; + Draw* draw = &lev_bucket->envmap_draws[lev_bucket->next_free_envmap_draw++]; + draw->flags = 0; + draw->first_index = mdraw.first_index; + draw->index_count = mdraw.index_count; + draw->mode = envmap_mode; + draw->texture = envmap_texture; + draw->first_bone = first_bone; + draw->light_idx = lights; + draw->num_triangles = mdraw.num_triangles; + for (int i = 0; i < 4; i++) { + draw->fade[i] = fade[i]; } + return draw; +} - if (lev_bucket->next_free_envmap_draw + model->max_draws >= lev_bucket->envmap_draws.size()) { - // out of room, flush - fmt::print("MERC2 out of envmap draws, consider increasing MAX_ENVMAP_DRAWS_PER_LEVEL\n"); - // or, use a more accurate max_draws for envmap. - flush_draw_buckets(render_state, prof); - // and retry the whole thing. - flush_pending_model(render_state, prof); - return; +Merc2::Draw* Merc2::alloc_normal_draw(const tfrag3::MercDraw& mdraw, + bool ignore_alpha, + LevelDrawBucket* lev_bucket, + u32 first_bone, + u32 lights) { + Draw* draw = &lev_bucket->draws[lev_bucket->next_free_draw++]; + draw->flags = 0; + draw->first_index = mdraw.first_index; + draw->index_count = mdraw.index_count; + draw->mode = mdraw.mode; + draw->texture = mdraw.tree_tex_id; + draw->first_bone = first_bone; + draw->light_idx = lights; + draw->num_triangles = mdraw.num_triangles; + if (ignore_alpha) { + draw->flags |= IGNORE_ALPHA; } - - u32 first_bone = alloc_bones(bone_count); - - // allocate lights - u32 lights = alloc_lights(m_current_lights); - // - for (size_t ei = 0; ei < model->effects.size(); ei++) { - if (!(m_current_effect_enable_bits & (1 << ei))) { - continue; - } - - u8 ignore_alpha = (m_current_ignore_alpha_bits & (1 << ei)); - auto& effect = model->effects[ei]; - if (effect.has_envmap) { - bool nonzero_fade = false; - for (int i = 0; i < 4; i++) { - if (m_fade_buffer[4 * ei + i]) { - nonzero_fade = true; - break; - } - } - if (nonzero_fade) { - for (auto& mdraw : effect.draws) { - Draw* draw = &lev_bucket->envmap_draws[lev_bucket->next_free_envmap_draw++]; - draw->first_index = mdraw.first_index; - draw->index_count = mdraw.index_count; - draw->mode = effect.envmap_mode; - draw->texture = effect.envmap_texture; - draw->first_bone = first_bone; - draw->light_idx = lights; - draw->num_triangles = mdraw.num_triangles; - draw->ignore_alpha = false; - for (int i = 0; i < 4; i++) { - draw->fade[i] = m_fade_buffer[4 * ei + i]; - } - } - } - } - for (auto& mdraw : effect.draws) { - Draw* draw = &lev_bucket->draws[lev_bucket->next_free_draw++]; - draw->first_index = mdraw.first_index; - draw->index_count = mdraw.index_count; - draw->mode = mdraw.mode; - draw->texture = mdraw.tree_tex_id; - draw->first_bone = first_bone; - draw->light_idx = lights; - draw->num_triangles = mdraw.num_triangles; - draw->ignore_alpha = ignore_alpha; - for (int i = 0; i < 4; i++) { - draw->fade[i] = 0; - } - } + for (int i = 0; i < 4; i++) { + draw->fade[i] = 0; } + return draw; +} - m_current_model = std::nullopt; +void Merc2::setup_merc_vao() { + glEnable(GL_PRIMITIVE_RESTART); + glPrimitiveRestartIndex(UINT32_MAX); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + glEnableVertexAttribArray(3); + glEnableVertexAttribArray(4); + glEnableVertexAttribArray(5); + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_GEQUAL); + + glVertexAttribPointer(0, // location 0 in the shader + 3, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::MercVertex), // stride + (void*)offsetof(tfrag3::MercVertex, pos) // offset (0) + ); + + glVertexAttribPointer(1, // location 1 in the + 3, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::MercVertex), // stride + (void*)offsetof(tfrag3::MercVertex, normal[0]) // offset (0) + ); + + glVertexAttribPointer(2, // location 1 in the + 3, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::MercVertex), // stride + (void*)offsetof(tfrag3::MercVertex, weights[0]) // offset (0) + ); + + glVertexAttribPointer(3, // location 1 in the shader + 2, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::MercVertex), // stride + (void*)offsetof(tfrag3::MercVertex, st[0]) // offset (0) + ); + + glVertexAttribPointer(4, // location 1 in the shader + 4, // 3 values per vert + GL_UNSIGNED_BYTE, // floats + GL_TRUE, // normalized + sizeof(tfrag3::MercVertex), // stride + (void*)offsetof(tfrag3::MercVertex, rgba[0]) // offset (0) + ); + + glVertexAttribIPointer(5, // location 0 in the + 4, // 3 floats per vert + GL_UNSIGNED_BYTE, // u8's + sizeof(tfrag3::MercVertex), // + (void*)offsetof(tfrag3::MercVertex, mats[0]) // offset in array + ); } void Merc2::flush_draw_buckets(SharedRenderState* render_state, ScopedProfilerNode& prof) { m_stats.num_draw_flush++; - for (u32 li = 0; li < m_next_free_level_bucket; li++) { const auto& lev_bucket = m_level_draw_buckets[li]; const auto* lev = lev_bucket.level; glBindVertexArray(m_vao); glBindBuffer(GL_ARRAY_BUFFER, lev->merc_vertices); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, lev->merc_indices); - - glEnable(GL_PRIMITIVE_RESTART); - glPrimitiveRestartIndex(UINT32_MAX); - glEnableVertexAttribArray(0); - glEnableVertexAttribArray(1); - glEnableVertexAttribArray(2); - glEnableVertexAttribArray(3); - glEnableVertexAttribArray(4); - glEnableVertexAttribArray(5); - glEnable(GL_DEPTH_TEST); - glDepthFunc(GL_GEQUAL); - - glVertexAttribPointer(0, // location 0 in the shader - 3, // 3 values per vert - GL_FLOAT, // floats - GL_FALSE, // normalized - sizeof(tfrag3::MercVertex), // stride - (void*)offsetof(tfrag3::MercVertex, pos) // offset (0) - ); - - glVertexAttribPointer(1, // location 1 in the - 3, // 3 values per vert - GL_FLOAT, // floats - GL_FALSE, // normalized - sizeof(tfrag3::MercVertex), // stride - (void*)offsetof(tfrag3::MercVertex, normal[0]) // offset (0) - ); - - glVertexAttribPointer(2, // location 1 in the - 3, // 3 values per vert - GL_FLOAT, // floats - GL_FALSE, // normalized - sizeof(tfrag3::MercVertex), // stride - (void*)offsetof(tfrag3::MercVertex, weights[0]) // offset (0) - ); - - glVertexAttribPointer(3, // location 1 in the shader - 2, // 3 values per vert - GL_FLOAT, // floats - GL_FALSE, // normalized - sizeof(tfrag3::MercVertex), // stride - (void*)offsetof(tfrag3::MercVertex, st[0]) // offset (0) - ); - - glVertexAttribPointer(4, // location 1 in the shader - 4, // 3 values per vert - GL_UNSIGNED_BYTE, // floats - GL_TRUE, // normalized - sizeof(tfrag3::MercVertex), // stride - (void*)offsetof(tfrag3::MercVertex, rgba[0]) // offset (0) - ); - - glVertexAttribIPointer(5, // location 0 in the - 4, // 3 floats per vert - GL_UNSIGNED_BYTE, // u8's - sizeof(tfrag3::MercVertex), // - (void*)offsetof(tfrag3::MercVertex, mats[0]) // offset in array - ); - + setup_merc_vao(); m_stats.num_bones_uploaded += m_next_free_bone_vector; glBindBuffer(GL_UNIFORM_BUFFER, m_bones_buffer); @@ -638,6 +1016,7 @@ void Merc2::flush_draw_buckets(SharedRenderState* render_state, ScopedProfilerNo m_next_free_light = 0; m_next_free_bone_vector = 0; m_next_free_level_bucket = 0; + m_next_mod_vtx_buffer = 0; } void Merc2::do_draws(const Draw* draw_array, @@ -646,12 +1025,27 @@ void Merc2::do_draws(const Draw* draw_array, const Uniforms& uniforms, ScopedProfilerNode& prof, bool set_fade, - SharedRenderState* render_state) { + SharedRenderState*) { + glBindVertexArray(m_vao); int last_tex = -1; int last_light = -1; + bool normal_vtx_buffer_bound = true; for (u32 di = 0; di < num_draws; di++) { auto& draw = draw_array[di]; - glUniform1i(uniforms.ignore_alpha, draw.ignore_alpha); + if (draw.flags & MOD_VTX) { + glBindVertexArray(draw.mod_vtx_buffer.vao); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, lev->merc_indices); + glBindBuffer(GL_ARRAY_BUFFER, lev->merc_vertices); + normal_vtx_buffer_bound = false; + } else { + if (!normal_vtx_buffer_bound) { + glBindVertexArray(m_vao); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, lev->merc_indices); + glBindBuffer(GL_ARRAY_BUFFER, lev->merc_vertices); + normal_vtx_buffer_bound = true; + } + } + glUniform1i(uniforms.ignore_alpha, draw.flags & DrawFlags::IGNORE_ALPHA); if ((int)draw.texture != last_tex) { if (draw.texture < lev->textures.size()) { glBindTexture(GL_TEXTURE_2D, lev->textures.at(draw.texture)); @@ -690,4 +1084,10 @@ void Merc2::do_draws(const Draw* draw_array, glDrawElements(GL_TRIANGLE_STRIP, draw.index_count, GL_UNSIGNED_INT, (void*)(sizeof(u32) * draw.first_index)); } + + if (!normal_vtx_buffer_bound) { + glBindVertexArray(m_vao); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, lev->merc_indices); + glBindBuffer(GL_ARRAY_BUFFER, lev->merc_vertices); + } } \ No newline at end of file diff --git a/game/graphics/opengl_renderer/foreground/Merc2.h b/game/graphics/opengl_renderer/foreground/Merc2.h index 4b79aa9460..d57b8de3b1 100644 --- a/game/graphics/opengl_renderer/foreground/Merc2.h +++ b/game/graphics/opengl_renderer/foreground/Merc2.h @@ -4,11 +4,30 @@ class Merc2 : public BucketRenderer { public: Merc2(const std::string& name, int my_id); + ~Merc2(); void draw_debug_window() override; void init_shaders(ShaderLibrary& shaders) override; void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; private: + bool m_debug_mode = false; + struct DrawDebug { + DrawMode mode; + int num_tris; + }; + struct EffectDebug { + bool envmap = false; + DrawMode envmap_mode; + std::vector draws; + }; + struct ModelDebug { + std::string name; + std::string level; + std::vector effects; + }; + struct { + std::vector model_list; + } m_debug; enum MercDataMemory { LOW_MEMORY = 0, BUFFER_BASE = 442, @@ -38,16 +57,17 @@ class Merc2 : public BucketRenderer { math::Vector4f ambient; }; - void init_pc_model(const DmaTransfer& setup, SharedRenderState* render_state); + void handle_pc_model(const DmaTransfer& setup, + SharedRenderState* render_state, + ScopedProfilerNode& prof); u32 alloc_lights(const VuLights& lights); - u32 alloc_bones(int count); + struct ModBuffers { + GLuint vao, vertex; + }; - std::optional m_current_model = std::nullopt; - u16 m_current_effect_enable_bits = 0; - u16 m_current_ignore_alpha_bits = 0; - static constexpr int kMaxEffect = 16; - u8 m_fade_buffer[4 * kMaxEffect]; + static constexpr int kMaxEffect = 32; + bool m_effect_debug_mask[kMaxEffect]; struct MercMat { math::Vector4f tmat[4]; @@ -60,7 +80,7 @@ class Merc2 : public BucketRenderer { math::Vector4f pad; std::string to_string() const; }; - + u32 alloc_bones(int count, ShaderMercMat* data); static constexpr int MAX_SKEL_BONES = 128; static constexpr int BONE_VECTORS_PER_BONE = 7; static constexpr int MAX_SHADER_BONE_VECTORS = 1024 * 32; // ?? @@ -70,7 +90,6 @@ class Merc2 : public BucketRenderer { static constexpr int MAX_ENVMAP_DRAWS_PER_LEVEL = 1024; math::Vector4f m_shader_bone_vector_buffer[MAX_SHADER_BONE_VECTORS]; - ShaderMercMat m_skel_matrix_buffer[MAX_SKEL_BONES]; struct Uniforms { GLuint light_direction[3]; @@ -97,10 +116,8 @@ class Merc2 : public BucketRenderer { Uniforms m_merc_uniforms, m_emerc_uniforms; void init_shader_common(Shader& shader, Uniforms* uniforms, bool include_lights); - void init_for_frame(SharedRenderState* render_state, ShaderId shader); void handle_setup_dma(DmaFollower& dma, SharedRenderState* render_state); void handle_all_dma(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof); - void flush_pending_model(SharedRenderState* render_state, ScopedProfilerNode& prof); void handle_merc_chain(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof); @@ -110,6 +127,22 @@ class Merc2 : public BucketRenderer { GLuint m_vao; + void setup_merc_vao(); + + std::vector m_mod_vtx_buffers; + u32 m_next_mod_vtx_buffer = 0; + + static constexpr int MAX_MOD_VTX = UINT16_MAX; + std::vector m_mod_vtx_temp; + + struct UnpackTempVtx { + float pos[4]; + float nrm[4]; + }; + std::vector m_mod_vtx_unpack_temp; + + ModBuffers alloc_mod_vtx_buffer(const LevelData* lev); + GLuint m_bones_buffer; struct Stats { @@ -125,8 +158,16 @@ class Merc2 : public BucketRenderer { int num_envmap_effects = 0; int num_envmap_tris = 0; + + int num_upload_bytes = 0; + int num_uploads = 0; } m_stats; + enum DrawFlags { + IGNORE_ALPHA = 1, + MOD_VTX = 2, + }; + struct Draw { u32 first_index; u32 index_count; @@ -135,7 +176,8 @@ class Merc2 : public BucketRenderer { u32 num_triangles; u16 first_bone; u16 light_idx; - u8 ignore_alpha; + u8 flags; + ModBuffers mod_vtx_buffer; u8 fade[4]; }; @@ -152,6 +194,18 @@ class Merc2 : public BucketRenderer { next_free_envmap_draw = 0; } }; + Draw* alloc_normal_draw(const tfrag3::MercDraw& mdraw, + bool ignore_alpha, + LevelDrawBucket* lev_bucket, + u32 first_bone, + u32 lights); + Draw* try_alloc_envmap_draw(const tfrag3::MercDraw& mdraw, + const DrawMode& envmap_mode, + u32 envmap_texture, + LevelDrawBucket* lev_bucket, + const u8* fade, + u32 first_bone, + u32 lights); void do_draws(const Draw* draw_array, const LevelData* lev, @@ -164,7 +218,6 @@ class Merc2 : public BucketRenderer { static constexpr int MAX_LIGHTS = 1024; VuLights m_lights_buffer[MAX_LIGHTS]; u32 m_next_free_light = 0; - VuLights m_current_lights; std::vector m_level_draw_buckets; u32 m_next_free_level_bucket = 0; diff --git a/game/graphics/opengl_renderer/ocean/OceanMidAndFar.cpp b/game/graphics/opengl_renderer/ocean/OceanMidAndFar.cpp index 1c57e239db..73a469d02c 100644 --- a/game/graphics/opengl_renderer/ocean/OceanMidAndFar.cpp +++ b/game/graphics/opengl_renderer/ocean/OceanMidAndFar.cpp @@ -195,10 +195,6 @@ void OceanMidAndFar::handle_ocean_mid(DmaFollower& dma, } } -void handle_ocean_89_jak2(DmaFollower& dma, - SharedRenderState* render_state, - ScopedProfilerNode& prof) {} +void handle_ocean_89_jak2(DmaFollower&, SharedRenderState*, ScopedProfilerNode&) {} -void handle_ocean_79_jak2(DmaFollower& dma, - SharedRenderState* render_state, - ScopedProfilerNode& prof) {} \ No newline at end of file +void handle_ocean_79_jak2(DmaFollower&, SharedRenderState*, ScopedProfilerNode&) {} \ No newline at end of file diff --git a/game/graphics/opengl_renderer/ocean/OceanTexture_PC.cpp b/game/graphics/opengl_renderer/ocean/OceanTexture_PC.cpp index 6d9e248084..7e00b94a3d 100644 --- a/game/graphics/opengl_renderer/ocean/OceanTexture_PC.cpp +++ b/game/graphics/opengl_renderer/ocean/OceanTexture_PC.cpp @@ -518,20 +518,20 @@ void OceanTexture::run_L3_PC_jak2() { Vf res3; // vf23 Vf nrm0; // vf24 - Vf nrm1; // vf25 + // Vf nrm1; // vf25 Vf nrm2; // vf26 - Vf reflect; // vf27 + // Vf reflect; // vf27 Vf cout0; // vf28 Vf cout1; // vf29 Vf cout2; // vf30 Vf cout3; // vf31 - Accumulator acc; + // Accumulator acc; const Vf ones(1, 1, 1, 1); const Vf vf00(0, 0, 0, 1); - const u16 vi11 = 0x80; + // const u16 vi11 = 0x80; bool bc; // clang-format off diff --git a/game/mips2c/jak1_functions/bones.cpp b/game/mips2c/jak1_functions/bones.cpp index 72ade06777..9499d9be43 100644 --- a/game/mips2c/jak1_functions/bones.cpp +++ b/game/mips2c/jak1_functions/bones.cpp @@ -803,7 +803,6 @@ u64 execute(void* ctxt) { const MercBucketInfo* mbi = (const MercBucketInfo*)(g_ee_main_mem + c->sgpr64(a3)); u16 use_pc_merc_bits = 0; u16 ignore_alpha_bits = 0; - u32 fade = 0; for (int i = 0; i < 16; i++) { if (!mbi->effects[i].use_mercneric) { use_pc_merc_bits |= (1 << i); diff --git a/game/mips2c/jak2_functions/merc_blend_shape.cpp b/game/mips2c/jak2_functions/merc_blend_shape.cpp index 69f0310355..b58579e7e5 100644 --- a/game/mips2c/jak2_functions/merc_blend_shape.cpp +++ b/game/mips2c/jak2_functions/merc_blend_shape.cpp @@ -2,6 +2,11 @@ // clang-format off #include "game/mips2c/mips2c_private.h" #include "game/kernel/jak2/kscheme.h" +#include "common/global_profiler/GlobalProfiler.h" +#include + +extern std::mutex g_merc_data_mutex; + using ::jak2::intern_from_c; namespace Mips2C::jak2 { namespace blerc_execute { @@ -14,6 +19,8 @@ struct Cache { } cache; u64 execute(void* ctxt) { + auto pp = scoped_prof("blerc-exec"); + std::unique_lock lk(g_merc_data_mutex); auto* c = (ExecutionContext*)ctxt; bool bc = false; u32 call_addr = 0; @@ -251,6 +258,10 @@ block_24: if (bc) {goto block_23;} // branch non-likely // Unknown instr: pmfhl.uw t5 + c->gprs[t5].du32[0] = c->lo.du32[1]; + c->gprs[t5].du32[1] = c->hi.du32[1]; + c->gprs[t5].du32[2] = c->lo.du32[3]; + c->gprs[t5].du32[3] = c->hi.du32[3]; c->mfc1(r0, f31); // mfc1 r0, f31 c->psraw(t7, t7, 13); // psraw t7, t7, 13 c->mfc1(r0, f31); // mfc1 r0, f31 @@ -345,7 +356,10 @@ block_29: qwc = c->sgpr64(a0); // Unknown instr: sync.l // c->sw(a3, 0, a2); // sw a3, 0(a2) - spad_from_dma_no_sadr_off(cache.fake_scratchpad_data, madr, sadr, qwc); + // fmt::print("blerc download 0x{:x} <- 0x{:x} ({} qwc)\n", madr, sadr, qwc); + { + spad_from_dma_no_sadr_off(cache.fake_scratchpad_data, madr, sadr, qwc); + } // Unknown instr: sync.l c->gprs[a0].du64[0] = 0; // or a0, r0, r0 c->addiu(a0, r0, 1); // addiu a0, r0, 1 @@ -406,7 +420,6 @@ struct Cache { u64 execute(void* ctxt) { auto* c = (ExecutionContext*)ctxt; bool bc = false; - u32 call_addr = 0; c->daddiu(sp, sp, -128); // daddiu sp, sp, -128 c->sd(ra, 0, sp); // sd ra, 0(sp) c->sq(s0, 16, sp); // sq s0, 16(sp) diff --git a/game/mips2c/jak2_functions/ripple.cpp b/game/mips2c/jak2_functions/ripple.cpp index ecb6b05ca9..34364a933e 100644 --- a/game/mips2c/jak2_functions/ripple.cpp +++ b/game/mips2c/jak2_functions/ripple.cpp @@ -25,7 +25,6 @@ struct Cache { u64 execute(void* ctxt) { auto* c = (ExecutionContext*)ctxt; bool bc = false; - u32 call_addr = 0; c->load_symbol2(v1, cache.cos_poly_vec); // lw v1, *cos-poly-vec*(s7) c->lqc2(vf7, 0, v1); // lqc2 vf7, 0(v1) c->lui(v1, 15561); // lui v1, 15561 @@ -450,7 +449,6 @@ struct Cache { u64 execute(void* ctxt) { auto* c = (ExecutionContext*)ctxt; bool bc = false; - u32 call_addr = 0; get_fake_spad_addr2(v1, cache.fake_scratchpad_data, 0, c);// lui v1, 28672 c->daddiu(v1, v1, 1024); // daddiu v1, v1, 1024 c->lwu(a1, 4, a0); // lwu a1, 4(a0) @@ -552,7 +550,6 @@ namespace ripple_matrix_scale { u64 execute(void* ctxt) { auto* c = (ExecutionContext*)ctxt; bool bc = false; - u32 call_addr = 0; c->lhu(v1, 2, a0); // lhu v1, 2(a0) c->lw(a1, 4, a0); // lw a1, 4(a0) c->lw(a2, 28, a0); // lw a2, 28(a0) diff --git a/game/mips2c/mips2c_private.h b/game/mips2c/mips2c_private.h index 4412cebb86..96a9698ba5 100644 --- a/game/mips2c/mips2c_private.h +++ b/game/mips2c/mips2c_private.h @@ -1652,7 +1652,7 @@ inline void load_vfs_from_tf_regs(const void* tf_regs_sym, ExecutionContext* c) inline void spad_to_dma_blerc_chain(void* spad_sym_addr, u32 sadr, u32 tadr) { u32 spad_addr_goal; - memcpy(&spad_addr_goal, spad_sym_addr, 4); + memcpy(&spad_addr_goal, align4_ptr(spad_sym_addr), 4); void* spad_addr_c = g_ee_main_mem + spad_addr_goal; ASSERT(sadr < 0x4000); emulate_dma(g_ee_main_mem, spad_addr_c, tadr, sadr); diff --git a/goal_src/jak1/engine/gfx/foreground/bones.gc b/goal_src/jak1/engine/gfx/foreground/bones.gc index c64d7d2e1b..1a93a8fd2d 100644 --- a/goal_src/jak1/engine/gfx/foreground/bones.gc +++ b/goal_src/jak1/engine/gfx/foreground/bones.gc @@ -944,7 +944,7 @@ ;; flags (num-effects, effect-alpha-ignore, effect-disable) ;; fades (u32 x N), padding to qw aligned -(defun pc-merc-draw-request ((dc draw-control) (dma-buf pointer) (matrix-buf pointer)) +(defun pc-merc-draw-request ((dc draw-control) (dma-buf pointer) (matrix-buf pointer) (update-verts symbol)) (let ((start-packet (the-as dma-packet dma-buf)) (qwc-total 0)) ;; merc draw asm will check this. @@ -1028,6 +1028,7 @@ (set! (-> flags 0) (-> merc-ctrl header effect-count)) (set! (-> flags 1) ignore-alpha-mask) (set! (-> flags 2) enable-mask) + (set! (-> flags 3) (if update-verts 1 0)) ) (&+! dma-buf (* 16 1)) (+! qwc-total 1) @@ -1038,11 +1039,22 @@ (set! (-> fades i) (the-as uint (-> *merc-bucket-info* effect i color-fade))) ) ) - (let ((num-fades (/ (+ (-> merc-ctrl header effect-count) 3) 4))) (&+! dma-buf (* 16 num-fades)) (+! qwc-total num-fades) ) + + ;; merc ptrs + (let ((merc-ptrs (the (pointer object) dma-buf))) + (dotimes (i (-> merc-ctrl header effect-count)) + (set! (-> merc-ptrs i) (-> merc-ctrl effect i)) + ) + ) + (let ((num-fades (/ (+ (-> merc-ctrl header effect-count) 3) 4))) + (&+! dma-buf (* 16 num-fades)) + (+! qwc-total num-fades) + ) + ) ) ) @@ -1065,6 +1077,9 @@ ;; this is much faster, and does significantly speed up the game thread on finalboss. (define *emerc-hack* #t) +;; when set, use merc for blerc instead of generic. +(define *blerc-hack* #t) + (defun draw-bones ((arg0 draw-control) (dma-buf dma-buffer) (arg2 float)) "Main draw function for all bone-related renderers. Will set up merc, generic and shadow. and also add the bones to the calculation list." @@ -1277,6 +1292,8 @@ (let ((geom (-> arg0 lod-set lod (-> arg0 cur-lod) geo)) ;; merc2 can't handle all cases of the original merc, so we add this fallback on PC. (pc-force-mercneric #f) + ;; if pc rendering code needs to update merc vertices + (pc-merc-vtx-update #f) ) (when (logtest? (-> arg0 global-effect) (draw-effect title)) (set! pc-force-mercneric #t) @@ -1352,7 +1369,10 @@ (jc (-> pd skel))) (when (nonzero? jc) (when (logtest? (-> jc status) (janim-status blerc)) - (set! pc-force-mercneric #t) + (if *blerc-hack* + (set! pc-merc-vtx-update #t) + (set! pc-force-mercneric #t) + ) ) ) ) @@ -1518,7 +1538,7 @@ ) ) ) - (set! s2-0 (pc-merc-draw-request arg0 (the pointer s2-0) (the pointer matrix-data))) + (set! s2-0 (pc-merc-draw-request arg0 (the pointer s2-0) (the pointer matrix-data) pc-merc-vtx-update)) ; (if (nonzero? (-> *merc-bucket-info* need-mercprime-if-merc)) ; (set! (-> dma-buf base) (draw-bones-merc arg0 matrix-data s2-0 32 17)) ; (set! (-> dma-buf base) (draw-bones-merc arg0 matrix-data s2-0 35 20)) diff --git a/goal_src/jak2/engine/game/main.gc b/goal_src/jak2/engine/game/main.gc index 98c92761f6..f743e35c5e 100644 --- a/goal_src/jak2/engine/game/main.gc +++ b/goal_src/jak2/engine/game/main.gc @@ -1425,13 +1425,13 @@ ; ;; Run blerc to modify foreground models (with-profiler 'merc *profile-merc-color* - ; (blerc-execute) - ; (blerc-init) + (blerc-execute) + (blerc-init) ) ; ;; Run other merc effects that modify vertices ; (texscroll-execute) - ; (ripple-execute) + (ripple-execute) (region-execute) ;; final call to update joints before drawing. @@ -1641,7 +1641,7 @@ (free-nodes *touching-list*) (prepare *collide-rider-pool*) (update-actor-hash) - ; (blerc-init) + (blerc-init) ; (dma-send ; (the-as dma-bank #x10008000) ; (the-as uint (-> *collide-vif0-init* data)) diff --git a/goal_src/jak2/engine/gfx/foreground/foreground.gc b/goal_src/jak2/engine/gfx/foreground/foreground.gc index 5ebaeafe65..09bb4ab806 100644 --- a/goal_src/jak2/engine/gfx/foreground/foreground.gc +++ b/goal_src/jak2/engine/gfx/foreground/foreground.gc @@ -630,7 +630,7 @@ ) ) -(defun pc-merc-draw-request ((dc draw-control) (dma-buf pointer) (matrix-buf pointer) (tex-idx int)) +(defun pc-merc-draw-request ((dc draw-control) (dma-buf pointer) (matrix-buf pointer) (tex-idx int) (update-verts symbol)) "Send a request to PC Merc2 to draw the given object. Only draws the effects which match this texture index. Just places a single big dma packet, you have to patch the end yourself." @@ -724,6 +724,7 @@ (set! (-> flags 0) (-> merc-ctrl header effect-count)) (set! (-> flags 1) ignore-alpha-mask) (set! (-> flags 2) enable-mask) + (set! (-> flags 3) (if update-verts 1 0)) ) (&+! dma-buf (* 16 1)) (+! qwc-total 1) @@ -735,6 +736,17 @@ ) ) + (let ((num-fades (/ (+ (-> merc-ctrl header effect-count) 3) 4))) + (&+! dma-buf (* 16 num-fades)) + (+! qwc-total num-fades) + ) + + ;; merc ptrs + (let ((merc-ptrs (the (pointer object) dma-buf))) + (dotimes (i (-> merc-ctrl header effect-count)) + (set! (-> merc-ptrs i) (-> merc-ctrl effect i)) + ) + ) (let ((num-fades (/ (+ (-> merc-ctrl header effect-count) 3) 4))) (&+! dma-buf (* 16 num-fades)) (+! qwc-total num-fades) @@ -753,6 +765,7 @@ (let ((use-flags (new 'stack-no-clear 'array 'uint8 7)) (mctrl (-> dc mgeo)) (buckets (-> (scratchpad-object foreground-work) grid level-buckets (-> (scratchpad-object foreground-work) draw-index-map (-> dc level-index)))) + (has-ripple #f) ) ;; mark all as unused, until we see a use (dotimes (i 7) (set! (-> use-flags i) 0)) @@ -761,6 +774,9 @@ (dotimes (i (-> mctrl header effect-count)) ;;(format 0 "effect ~d, texture ~d~%" i (-> mctrl effect i texture-index)) (set! (-> use-flags (-> mctrl effect i texture-index)) 1) + (when (logtest? (-> (-> dc lod-set lod (-> dc cur-lod) geo) effect i effect-bits) (effect-bits ripple)) + (set! has-ripple #t) + ) ) ;; loop over texture groupe @@ -768,8 +784,21 @@ (when (nonzero? (-> use-flags i)) ;; this one is used, update the model for pc. ;; create dma-packet to send the name: - (let ((packet (the-as dma-packet dma-buf))) - (set! dma-buf (pc-merc-draw-request dc dma-buf matrix-buf i)) + (let ((packet (the-as dma-packet dma-buf)) + (vertex-update #f) + ) + (when has-ripple + (set! vertex-update #t) + ) + (let* ((pd (the process-drawable (-> dc process))) + (jc (-> pd skel))) + (when (nonzero? jc) + (when (logtest? (-> jc status) (joint-control-status blend-shape-valid)) + (set! vertex-update #t) + ) + ) + ) + (set! dma-buf (pc-merc-draw-request dc dma-buf matrix-buf i vertex-update)) ;; create a patch packet (let ((patch-packet (the-as dma-packet dma-buf))) @@ -995,7 +1024,7 @@ ) ) ) - (if (and (logtest? (-> geo effect effect-idx effect-bits) (effect-bits ripple)) (-> dc ripple)) + (when (and (logtest? (-> geo effect effect-idx effect-bits) (effect-bits ripple)) (-> dc ripple)) (set! dma-ptr (foreground-ripple dc geo dma-ptr effect-idx)) ) (nonzero? (-> dc death-timer))