From b999422305b2315e7bb927bb3cb0fb8ca4d1506e Mon Sep 17 00:00:00 2001 From: water111 <48171810+water111@users.noreply.github.com> Date: Sun, 2 Jan 2022 19:02:28 -0500 Subject: [PATCH] [tie] add wind effect (#1046) * wip tie wind stuff * wind * clang --- common/custom_data/TFrag3Data.cpp | 32 +++ common/custom_data/Tfrag3Data.h | 34 ++- decompiler/level_extractor/BspHeader.cpp | 2 +- decompiler/level_extractor/BspHeader.h | 2 +- decompiler/level_extractor/extract_tfrag.cpp | 14 + decompiler/level_extractor/extract_tie.cpp | 241 ++++++++++++---- docs/scratch/tie_format.txt | 63 +++++ game/graphics/opengl_renderer/tfrag/Tie3.cpp | 273 +++++++++++++++++++ game/graphics/opengl_renderer/tfrag/Tie3.h | 31 +++ goal_src/engine/gfx/tie/tie-methods.gc | 16 ++ 10 files changed, 648 insertions(+), 60 deletions(-) diff --git a/common/custom_data/TFrag3Data.cpp b/common/custom_data/TFrag3Data.cpp index 78fcfafa8b..b7f004021f 100644 --- a/common/custom_data/TFrag3Data.cpp +++ b/common/custom_data/TFrag3Data.cpp @@ -11,6 +11,20 @@ void StripDraw::serialize(Serializer& ser) { ser.from_ptr(&num_triangles); } +void InstancedStripDraw::serialize(Serializer& ser) { + ser.from_ptr(&mode); + ser.from_ptr(&tree_tex_id); + ser.from_pod_vector(&vertex_index_stream); + ser.from_pod_vector(&instance_groups); + ser.from_ptr(&num_triangles); +} + +void TieWindInstance::serialize(Serializer& ser) { + ser.from_ptr(&matrix); + ser.from_ptr(&wind_idx); + ser.from_ptr(&stiffness); +} + void TfragTree::serialize(Serializer& ser) { ser.from_ptr(&kind); @@ -38,6 +52,24 @@ void TieTree::serialize(Serializer& ser) { draw.serialize(ser); } + if (ser.is_saving()) { + ser.save(instanced_wind_draws.size()); + } else { + instanced_wind_draws.resize(ser.load()); + } + for (auto& draw : instanced_wind_draws) { + draw.serialize(ser); + } + + if (ser.is_saving()) { + ser.save(instance_info.size()); + } else { + instance_info.resize(ser.load()); + } + for (auto& inst : instance_info) { + inst.serialize(ser); + } + ser.from_pod_vector(&vertices); ser.from_pod_vector(&colors); bvh.serialize(ser); diff --git a/common/custom_data/Tfrag3Data.h b/common/custom_data/Tfrag3Data.h index c523556d01..463e27f9e5 100644 --- a/common/custom_data/Tfrag3Data.h +++ b/common/custom_data/Tfrag3Data.h @@ -1,6 +1,7 @@ #pragma once // Data format for the tfrag3 renderer. +#include #include "common/common_types.h" #include "common/dma/gs.h" @@ -10,7 +11,7 @@ namespace tfrag3 { -constexpr int TFRAG3_VERSION = 7; +constexpr int TFRAG3_VERSION = 8; // These vertices should be uploaded to the GPU at load time and don't change struct PreloadedVertex { @@ -51,6 +52,27 @@ struct StripDraw { void serialize(Serializer& ser); }; +struct InstancedStripDraw { + DrawMode mode; // the OpenGL draw settings. + u32 tree_tex_id = 0; // the texture that should be bound for the draw + + // the list of vertices in the draw. This includes the restart code of UINT32_MAX that OpenGL + // will use to start a new strip. + std::vector vertex_index_stream; + + // the vertex stream above is segmented by instance. + struct InstanceGroup { + u32 num = 0; // number of vertex indices in this group + u32 instance_idx = 0; // the instance they belong to + u32 vis_idx = 0; + }; + std::vector instance_groups; + + // for debug counting. + u32 num_triangles = 0; + void serialize(Serializer& ser); +}; + // node in the BVH. struct VisNode { math::Vector bsphere; // the bounding sphere, in meters (4096 = 1 game meter). w = rad @@ -114,6 +136,13 @@ struct TfragTree { void serialize(Serializer& ser); }; +struct TieWindInstance { + std::array matrix; + u16 wind_idx; + float stiffness; + void serialize(Serializer& ser); +}; + // A tie model struct TieTree { BVH bvh; @@ -121,7 +150,8 @@ struct TieTree { std::vector vertices; // mesh vertices std::vector colors; // vertex colors (pre-interpolation) - // TODO wind stuff + std::vector instanced_wind_draws; + std::vector instance_info; void serialize(Serializer& ser); }; diff --git a/decompiler/level_extractor/BspHeader.cpp b/decompiler/level_extractor/BspHeader.cpp index f11c326e35..56704afde0 100644 --- a/decompiler/level_extractor/BspHeader.cpp +++ b/decompiler/level_extractor/BspHeader.cpp @@ -1041,7 +1041,7 @@ void ProxyPrototypeArrayTie::read_from_file(TypedRef ref, prototype_array_tie.read_from_file( get_and_check_ref_to_basic(ref, "prototype-array-tie", "prototype-array-tie", dts), dts, stats); - // TODO wind + wind_vectors = deref_label(get_field_ref(ref, "wind-vectors", dts)); } std::string ProxyPrototypeArrayTie::print(const PrintSettings& settings, int indent) const { diff --git a/decompiler/level_extractor/BspHeader.h b/decompiler/level_extractor/BspHeader.h index 6d40bf572e..6ee2de7e7b 100644 --- a/decompiler/level_extractor/BspHeader.h +++ b/decompiler/level_extractor/BspHeader.h @@ -352,7 +352,7 @@ struct ProxyPrototypeArrayTie { std::string print(const PrintSettings& settings, int indent) const; PrototypeArrayTie prototype_array_tie; - // todo wind vectors. + Ref wind_vectors; }; struct DrawableTreeInstanceTie : public DrawableTree { diff --git a/decompiler/level_extractor/extract_tfrag.cpp b/decompiler/level_extractor/extract_tfrag.cpp index fbe1954728..7bb0fe7c19 100644 --- a/decompiler/level_extractor/extract_tfrag.cpp +++ b/decompiler/level_extractor/extract_tfrag.cpp @@ -2114,6 +2114,19 @@ void extract_time_of_day(const level_tools::DrawableTreeTfrag* tree, tfrag3::Tfr } } +void merge_groups(std::vector& grps) { + std::vector result; + result.push_back(grps.at(0)); + for (size_t i = 1; i < grps.size(); i++) { + if (grps[i].vis_idx == result.back().vis_idx) { + result.back().num += grps[i].num; + } else { + result.push_back(grps[i]); + } + } + std::swap(result, grps); +} + } // namespace void extract_tfrag(const level_tools::DrawableTreeTfrag* tree, @@ -2190,6 +2203,7 @@ void extract_tfrag(const level_tools::DrawableTreeTfrag* tree, str.vis_idx = it->second; } } + merge_groups(draw.vis_groups); } out.tfrag_trees.push_back(this_tree); } diff --git a/decompiler/level_extractor/extract_tie.cpp b/decompiler/level_extractor/extract_tie.cpp index 104173cbbb..a09ad46931 100644 --- a/decompiler/level_extractor/extract_tie.cpp +++ b/decompiler/level_extractor/extract_tie.cpp @@ -165,6 +165,8 @@ struct TieInstanceFragInfo { // this contains indices into the shared palette. std::vector color_indices; + // in the PC port format, we upload a single giant time of day color. this points to the offset + // of the colors from this frag instance. u16 color_index_offset_in_big_palette = -1; math::Vector lq_colors_ui(u32 qw) const { @@ -336,6 +338,25 @@ std::array extract_tie_matrix(const u16* data) { constexpr int GEOM_IDX = 1; // todo 0 or 1?? +/*! + * Confirm that the initial value of all wind vectors is 0. + */ +void check_wind_vectors_zero(const std::vector& protos, Ref wind_ref) { + u16 max_wind = 0; + for (auto& proto : protos) { + for (auto& inst : proto.instances) { + max_wind = std::max(inst.wind_index, max_wind); + } + } + u32 wind_words = max_wind; + wind_words *= 4; + for (size_t i = 0; i < wind_words; i++) { + auto& word = wind_ref.data->words_by_seg.at(wind_ref.seg).at(wind_ref.byte_offset / 4 + i); + assert(word.kind() == LinkedWord::PLAIN_DATA); + assert(word.data == 0); + } +} + std::vector collect_instance_info( const level_tools::DrawableInlineArrayInstanceTie* instances, const std::vector* protos) { @@ -416,6 +437,9 @@ void update_proto_info(std::vector* out, info.uses_generic = (proto.flags == 2); info.name = proto.name; info.stiffness = proto.stiffness; + if (info.stiffness != 0) { + fmt::print("--------------------proto {} wind {}\n", info.name, info.stiffness); + } info.generic_flag = proto.flags & 2; info.time_of_day_colors.resize(proto.time_of_day.height); @@ -1674,13 +1698,6 @@ std::string debug_dump_proto_to_obj(const TieProtoInfo& proto) { math::Vector transform_tie(const std::array mat, const math::Vector3f& pt) { auto temp = mat[0] * pt.x() + mat[1] * pt.y() + mat[2] * pt.z() + mat[3]; - - // math::Vector4f temp; - // temp.x() = pt.x(); - // temp.y() = pt.y(); - // temp.z() = pt.z(); - // temp += mat[3]; - math::Vector3f result; result.x() = temp.x(); result.y() = temp.y(); @@ -1872,17 +1889,33 @@ void add_vertices_and_static_draw(tfrag3::TieTree& tree, tfrag3::Level& lev, const TextureDB& tdb, const std::vector& protos) { - // our current approach for static draws is just to flatten to giant mesh. + // our current approach for static draws is just to flatten to giant mesh, except for wind stuff. + std::unordered_map> static_draws_by_tex; + std::unordered_map> wind_draws_by_tex; - std::unordered_map> draws_by_tex; - - std::unordered_map interp_hack_colors; + // renumbering instances. + // loop over all prototypes for (auto& proto : protos) { + // bool using_wind = true; // hack, for testing + bool using_wind = proto.stiffness != 0.f; + + // loop over instances of the prototypes for (auto& inst : proto.instances) { + u32 wind_instance_idx = tree.instance_info.size(); + if (using_wind) { + tfrag3::TieWindInstance wind_instance_info; + wind_instance_info.wind_idx = inst.wind_index; + wind_instance_info.stiffness = proto.stiffness; + wind_instance_info.matrix = inst.mat; + tree.instance_info.push_back(wind_instance_info); + } + + // loop over fragments of the prototype for (size_t frag_idx = 0; frag_idx < proto.frags.size(); frag_idx++) { auto& frag = proto.frags[frag_idx]; auto& ifrag = inst.frags.at(frag_idx); + // loop over triangle strips within the fragment for (auto& strip : frag.strips) { // what texture are we using? u32 combo_tex = strip.adgif.combo_tex; @@ -1930,57 +1963,111 @@ void add_vertices_and_static_draw(tfrag3::TieTree& tree, DrawMode mode = process_draw_mode(strip.adgif, frag.prog_info.misc_x == 0, frag.has_magic_tex0_bit); - // okay, we now have a texture and draw mode, let's see if we can add to an existing... - auto existing_draws_in_tex = draws_by_tex.find(idx_in_lev_data); - tfrag3::StripDraw* draw_to_add_to = nullptr; - if (existing_draws_in_tex != draws_by_tex.end()) { - for (auto idx : existing_draws_in_tex->second) { - if (tree.static_draws.at(idx).mode == mode) { - draw_to_add_to = &tree.static_draws[idx]; + if (using_wind) { + // okay, we now have a texture and draw mode, let's see if we can add to an existing... + auto existing_draws_in_tex = wind_draws_by_tex.find(idx_in_lev_data); + tfrag3::InstancedStripDraw* draw_to_add_to = nullptr; + if (existing_draws_in_tex != wind_draws_by_tex.end()) { + for (auto idx : existing_draws_in_tex->second) { + if (tree.instanced_wind_draws.at(idx).mode == mode) { + draw_to_add_to = &tree.instanced_wind_draws[idx]; + } } } - } - if (!draw_to_add_to) { - // nope, need to create a new draw - tree.static_draws.emplace_back(); - draws_by_tex[idx_in_lev_data].push_back(tree.static_draws.size() - 1); - draw_to_add_to = &tree.static_draws.back(); - draw_to_add_to->mode = mode; - draw_to_add_to->tree_tex_id = idx_in_lev_data; - } - - // now we have a draw, time to add vertices - tfrag3::StripDraw::VisGroup vgroup; - vgroup.vis_idx = inst.vis_id; // associate with the tfrag for culling - vgroup.num = strip.verts.size() + 1; // one for the primitive restart! - draw_to_add_to->num_triangles += strip.verts.size() - 2; - for (auto& vert : strip.verts) { - tfrag3::PreloadedVertex vtx; - // todo fields - auto tf = transform_tie(inst.mat, vert.pos); - vtx.x = tf.x(); - vtx.y = tf.y(); - vtx.z = tf.z(); - vtx.s = vert.tex.x(); - vtx.t = vert.tex.y(); - vtx.q = vert.tex.z(); - // if this is true, we can remove a divide in the shader - assert(vtx.q == 1.f); - if (vert.color_index_index == UINT32_MAX) { - vtx.color_index = 0; - } else { - vtx.color_index = ifrag.color_indices.at(vert.color_index_index); - assert(vert.color_index_index < ifrag.color_indices.size()); - vtx.color_index += ifrag.color_index_offset_in_big_palette; + if (!draw_to_add_to) { + // nope, need to create a new draw + tree.instanced_wind_draws.emplace_back(); + wind_draws_by_tex[idx_in_lev_data].push_back(tree.instanced_wind_draws.size() - 1); + draw_to_add_to = &tree.instanced_wind_draws.back(); + draw_to_add_to->mode = mode; + draw_to_add_to->tree_tex_id = idx_in_lev_data; } - size_t vert_idx = tree.vertices.size(); - tree.vertices.push_back(vtx); - draw_to_add_to->vertex_index_stream.push_back(vert_idx); + // now we have a draw, time to add vertices + tfrag3::InstancedStripDraw::InstanceGroup igroup; + igroup.vis_idx = inst.vis_id; // associate with the tfrag for culling + igroup.num = strip.verts.size() + 1; // one for the primitive restart! + igroup.instance_idx = wind_instance_idx; + draw_to_add_to->num_triangles += strip.verts.size() - 2; + // note: this is a bit wasteful to duplicate the xyz/stq. + for (auto& vert : strip.verts) { + tfrag3::PreloadedVertex vtx; + vtx.x = vert.pos.x(); + vtx.y = vert.pos.y(); + vtx.z = vert.pos.z(); + vtx.s = vert.tex.x(); + vtx.t = vert.tex.y(); + vtx.q = vert.tex.z(); + // if this is true, we can remove a divide in the shader + assert(vtx.q == 1.f); + if (vert.color_index_index == UINT32_MAX) { + vtx.color_index = 0; + } else { + vtx.color_index = ifrag.color_indices.at(vert.color_index_index); + assert(vert.color_index_index < ifrag.color_indices.size()); + vtx.color_index += ifrag.color_index_offset_in_big_palette; + } + + size_t vert_idx = tree.vertices.size(); + tree.vertices.push_back(vtx); + draw_to_add_to->vertex_index_stream.push_back(vert_idx); + } + draw_to_add_to->vertex_index_stream.push_back(UINT32_MAX); + draw_to_add_to->instance_groups.push_back(igroup); + } else { + // okay, we now have a texture and draw mode, let's see if we can add to an existing... + auto existing_draws_in_tex = static_draws_by_tex.find(idx_in_lev_data); + tfrag3::StripDraw* draw_to_add_to = nullptr; + if (existing_draws_in_tex != static_draws_by_tex.end()) { + for (auto idx : existing_draws_in_tex->second) { + if (tree.static_draws.at(idx).mode == mode) { + draw_to_add_to = &tree.static_draws[idx]; + } + } + } + + if (!draw_to_add_to) { + // nope, need to create a new draw + tree.static_draws.emplace_back(); + static_draws_by_tex[idx_in_lev_data].push_back(tree.static_draws.size() - 1); + draw_to_add_to = &tree.static_draws.back(); + draw_to_add_to->mode = mode; + draw_to_add_to->tree_tex_id = idx_in_lev_data; + } + + // now we have a draw, time to add vertices + tfrag3::StripDraw::VisGroup vgroup; + vgroup.vis_idx = inst.vis_id; // associate with the tfrag for culling + vgroup.num = strip.verts.size() + 1; // one for the primitive restart! + draw_to_add_to->num_triangles += strip.verts.size() - 2; + for (auto& vert : strip.verts) { + tfrag3::PreloadedVertex vtx; + // todo fields + auto tf = transform_tie(inst.mat, vert.pos); + vtx.x = tf.x(); + vtx.y = tf.y(); + vtx.z = tf.z(); + vtx.s = vert.tex.x(); + vtx.t = vert.tex.y(); + vtx.q = vert.tex.z(); + // if this is true, we can remove a divide in the shader + assert(vtx.q == 1.f); + if (vert.color_index_index == UINT32_MAX) { + vtx.color_index = 0; + } else { + vtx.color_index = ifrag.color_indices.at(vert.color_index_index); + assert(vert.color_index_index < ifrag.color_indices.size()); + vtx.color_index += ifrag.color_index_offset_in_big_palette; + } + + size_t vert_idx = tree.vertices.size(); + tree.vertices.push_back(vtx); + draw_to_add_to->vertex_index_stream.push_back(vert_idx); + } + draw_to_add_to->vertex_index_stream.push_back(UINT32_MAX); + draw_to_add_to->vis_groups.push_back(vgroup); } - draw_to_add_to->vertex_index_stream.push_back(UINT32_MAX); - draw_to_add_to->vis_groups.push_back(vgroup); } } } @@ -1992,6 +2079,33 @@ void add_vertices_and_static_draw(tfrag3::TieTree& tree, }); } +void merge_groups(std::vector& grps) { + std::vector result; + result.push_back(grps.at(0)); + for (size_t i = 1; i < grps.size(); i++) { + if (grps[i].vis_idx == result.back().vis_idx && + grps[i].instance_idx == result.back().instance_idx) { + result.back().num += grps[i].num; + } else { + result.push_back(grps[i]); + } + } + std::swap(result, grps); +} + +void merge_groups(std::vector& grps) { + std::vector result; + result.push_back(grps.at(0)); + for (size_t i = 1; i < grps.size(); i++) { + if (grps[i].vis_idx == result.back().vis_idx) { + result.back().num += grps[i].num; + } else { + result.push_back(grps[i]); + } + } + std::swap(result, grps); +} + void extract_tie(const level_tools::DrawableTreeInstanceTie* tree, const std::string& debug_name, const std::vector& tex_map, @@ -2034,6 +2148,7 @@ void extract_tie(const level_tools::DrawableTreeInstanceTie* tree, auto info = collect_instance_info(as_instance_array, &tree->prototypes.prototype_array_tie.data); update_proto_info(&info, tex_map, tex_db, tree->prototypes.prototype_array_tie.data); + check_wind_vectors_zero(info, tree->prototypes.wind_vectors); // debug_print_info(info); emulate_tie_prototype_program(info); emulate_tie_instance_program(info); @@ -2064,6 +2179,20 @@ void extract_tie(const level_tools::DrawableTreeInstanceTie* tree, str.vis_idx = it->second; } } + merge_groups(draw.vis_groups); + } + + for (auto& draw : this_tree.instanced_wind_draws) { + for (auto& str : draw.instance_groups) { + auto it = instance_parents.find(str.vis_idx); + if (it == instance_parents.end()) { + str.vis_idx = UINT32_MAX; + } else { + str.vis_idx = it->second; + } + } + + merge_groups(draw.instance_groups); } this_tree.colors = full_palette.colors; diff --git a/docs/scratch/tie_format.txt b/docs/scratch/tie_format.txt index 2cf9f48584..ce7bfa195a 100644 --- a/docs/scratch/tie_format.txt +++ b/docs/scratch/tie_format.txt @@ -39,12 +39,15 @@ vf10+ is origin, vf20+ is the SHRUB MATRIX!!! vmulax.xyzw acc, vf20, vf10 vmadday.xyzw acc, vf21, vf10 vmaddz.xyzw vf10, vf22, vf10 + vmulax.xyzw acc, vf20, vf11 vmadday.xyzw acc, vf21, vf11 vmaddz.xyzw vf11, vf22, vf11 + vmulax.xyzw acc, vf20, vf12 vmadday.xyzw acc, vf21, vf12 vmaddz.xyzw vf12, vf22, vf12 + vmulax.xyzw acc, vf20, vf13 vmadday.xyzw acc, vf21, vf13 vmaddaz.xyzw acc, vf22, vf13 @@ -61,3 +64,63 @@ For the final instance in the bucket, it will be a ret. 96: color0 + + +# Wind +Wind is only applied if "stiffness" is nonzero. + +The first wind data is from the prototype wind-vectors: +s5 = wind_idx * 16 + wind_vectors + +The second wind data is +s3 = wind_work + ((wind_idx + wind_work.wind_time) & 63) * 16 + = wind_work.wind_array[(wind_idx + wind_work.wind_time) & 63] + +```asm +ld s1, 8(s5) # load wind vector 1 +pextlw s1, r0, s1 # convert to 2x 64 bits, by shifting left +qmtc2.i vf18, s1 # put in vf + +ld s2, 0(s5) # load wind vector 0 +pextlw s3, r0, s2 # convert to 2x 64 bits, by shifting left +qmtc2.i vf17, s3 # put in vf + +lqc2 vf16, 12(s3) # load wind vector + +vmula.xyzw acc, vf16, vf1 # acc = vf16 +vmsubax.xyzw acc, vf18, vf19 # acc = vf16 - vf18 * wind_const.x +vmsuby.xyzw vf16, vf17, vf19 +# vf16 -= (vf18 * wind_const.x) + (vf17 * wind_const.y) + +vmulaz.xyzw acc, vf16, vf19 # acc = vf16 * wind_const.z +vmadd.xyzw vf18, vf1, vf18 +# vf18 += vf16 * wind_const.z + +vmulaz.xyzw acc, vf18, vf19 # acc = vf18 * wind_const.z +vmadd.xyzw vf17, vf17, vf1 +# vf17 += vf18 * wind_const.z + +vitof12.xyzw vf11, vf11 # normal convert +vitof12.xyzw vf12, vf12 # normal convert + +vminiw.xyzw vf17, vf17, vf0 +qmfc2.i s3, vf18 +vmaxw.xyzw vf27, vf17, vf19 +ppacw s3, r0, s3 +vmulw.xyzw vf27, vf27, vf15 +vmulax.yw acc, vf0, vf0 +vmulay.xz acc, vf27, vf10 +vmadd.xyzw vf10, vf1, vf10 +qmfc2.i s2, vf27 +vmulax.yw acc, vf0, vf0 +vmulay.xz acc, vf27, vf11 +vmadd.xyzw vf11, vf1, vf11 +ppacw s2, r0, s2 +vmulax.yw acc, vf0, vf0 +vmulay.xz acc, vf27, vf12 +vmadd.xyzw vf12, vf1, vf12 + +if not paused +sd s3, 8(s5) +sd s2, 0(s5) +``` \ No newline at end of file diff --git a/game/graphics/opengl_renderer/tfrag/Tie3.cpp b/game/graphics/opengl_renderer/tfrag/Tie3.cpp index 4b40d96210..5060adc9ba 100644 --- a/game/graphics/opengl_renderer/tfrag/Tie3.cpp +++ b/game/graphics/opengl_renderer/tfrag/Tie3.cpp @@ -24,6 +24,7 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s if (m_level_name != level) { Timer tie_setup_timer; + m_wind_vectors.clear(); // We changed level! fmt::print("TIE3 level change! {} -> {}\n", m_level_name, level); fmt::print(" Removing old level...\n"); @@ -35,16 +36,25 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s size_t vis_temp_len = 0; size_t max_draw = 0; size_t max_idx_per_draw = 0; + u16 max_wind_idx = 0; // set up each tree for (size_t tree_idx = 0; tree_idx < lev_data->tie_trees.size(); tree_idx++) { size_t idx_buffer_len = 0; + size_t wind_idx_buffer_len = 0; const auto& tree = lev_data->tie_trees[tree_idx]; max_draw = std::max(tree.static_draws.size(), max_draw); for (auto& draw : tree.static_draws) { idx_buffer_len += draw.vertex_index_stream.size(); max_idx_per_draw = std::max(max_idx_per_draw, draw.vertex_index_stream.size()); } + for (auto& draw : tree.instanced_wind_draws) { + wind_idx_buffer_len += draw.vertex_index_stream.size(); + max_idx_per_draw = std::max(max_idx_per_draw, draw.vertex_index_stream.size()); + } + for (auto& inst : tree.instance_info) { + max_wind_idx = std::max(max_wind_idx, inst.wind_idx); + } time_of_day_count = std::max(tree.colors.size(), time_of_day_count); u32 verts = tree.vertices.size(); fmt::print(" tree {} has {} verts ({} kB) and {} draws\n", tree_idx, verts, @@ -56,6 +66,8 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s m_trees[tree_idx].draws = &tree.static_draws; // todo - should we just copy this? m_trees[tree_idx].colors = &tree.colors; m_trees[tree_idx].vis = &tree.bvh; + m_trees[tree_idx].instance_info = &tree.instance_info; + m_trees[tree_idx].wind_draws = &tree.instanced_wind_draws; vis_temp_len = std::max(vis_temp_len, tree.bvh.vis_nodes.size()); m_trees[tree_idx].tod_cache = swizzle_time_of_day(tree.colors); glBindBuffer(GL_ARRAY_BUFFER, m_trees[tree_idx].vertex_buffer); @@ -96,6 +108,25 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr, GL_STREAM_DRAW); m_trees[tree_idx].index_list.resize(idx_buffer_len); + if (wind_idx_buffer_len > 0) { + m_trees[tree_idx].wind_matrix_cache.resize(tree.instance_info.size()); + m_trees[tree_idx].has_wind = true; + glGenBuffers(1, &m_trees[tree_idx].wind_vertex_index_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_trees[tree_idx].wind_vertex_index_buffer); + std::vector temp; + temp.resize(wind_idx_buffer_len); + u32 off = 0; + for (auto& draw : tree.instanced_wind_draws) { + m_trees[tree_idx].wind_vertex_index_offsets.push_back(off); + memcpy(temp.data() + off, draw.vertex_index_stream.data(), + draw.vertex_index_stream.size() * sizeof(u32)); + off += draw.vertex_index_stream.size(); + } + + glBufferData(GL_ELEMENT_ARRAY_BUFFER, wind_idx_buffer_len * sizeof(u32), temp.data(), + GL_STATIC_DRAW); + } + glActiveTexture(GL_TEXTURE1); glGenTextures(1, &m_trees[tree_idx].time_of_day_texture); glBindTexture(GL_TEXTURE_1D, m_trees[tree_idx].time_of_day_texture); @@ -136,11 +167,124 @@ void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_s fmt::print("level max time of day: {}\n", time_of_day_count); assert(time_of_day_count <= TIME_OF_DAY_COLOR_COUNT); + fmt::print("wind: {}\n", max_wind_idx); + m_wind_vectors.resize(4 * max_wind_idx + 4); // 4x u32's per wind. + m_level_name = level; fmt::print("TIE setup: {:.3f}\n", tie_setup_timer.getSeconds()); } } +void vector_min_in_place(math::Vector4f& v, float val) { + for (int i = 0; i < 4; i++) { + if (v[i] > val) { + v[i] = val; + } + } +} + +math::Vector4f vector_max(const math::Vector4f& v, float val) { + math::Vector4f result; + for (int i = 0; i < 4; i++) { + result[i] = std::max(val, v[i]); + } + return result; +} + +void do_wind_math(u16 wind_idx, + float* wind_vector_data, + const Tie3::WindWork& wind_work, + float stiffness, + std::array& mat) { + float* my_vector = wind_vector_data + (4 * wind_idx); + const auto& work_vector = wind_work.wind_array[(wind_work.wind_time + wind_idx) & 63]; + constexpr float cx = 0.5; + constexpr float cy = 100.0; + constexpr float cz = 0.0166; + constexpr float cw = -1.0; + + // ld s1, 8(s5) # load wind vector 1 + // pextlw s1, r0, s1 # convert to 2x 64 bits, by shifting left + // qmtc2.i vf18, s1 # put in vf + float vf18_x = my_vector[2]; + float vf18_z = my_vector[3]; + + // ld s2, 0(s5) # load wind vector 0 + // pextlw s3, r0, s2 # convert to 2x 64 bits, by shifting left + // qmtc2.i vf17, s3 # put in vf + float vf17_x = my_vector[0]; + float vf17_z = my_vector[1]; + + // lqc2 vf16, 12(s3) # load wind vector + math::Vector4f vf16 = work_vector; + + // vmula.xyzw acc, vf16, vf1 # acc = vf16 + // vmsubax.xyzw acc, vf18, vf19 # acc = vf16 - vf18 * wind_const.x + // vmsuby.xyzw vf16, vf17, vf19 + //# vf16 -= (vf18 * wind_const.x) + (vf17 * wind_const.y) + vf16.x() -= cx * vf18_x + cy * vf17_x; + vf16.z() -= cx * vf18_z + cy * vf17_z; + + // vmulaz.xyzw acc, vf16, vf19 # acc = vf16 * wind_const.z + // vmadd.xyzw vf18, vf1, vf18 + //# vf18 += vf16 * wind_const.z + math::Vector4f vf18(vf18_x, 0.f, vf18_z, 0.f); + vf18 += vf16 * cz; + + // vmulaz.xyzw acc, vf18, vf19 # acc = vf18 * wind_const.z + // vmadd.xyzw vf17, vf17, vf1 + //# vf17 += vf18 * wind_const.z + math::Vector4f vf17(vf17_x, 0.f, vf17_z, 0.f); + vf17 += vf18 * cz; + + // vitof12.xyzw vf11, vf11 # normal convert + // vitof12.xyzw vf12, vf12 # normal convert + + // vminiw.xyzw vf17, vf17, vf0 + vector_min_in_place(vf17, 1.f); + + // qmfc2.i s3, vf18 + // ppacw s3, r0, s3 + + // vmaxw.xyzw vf27, vf17, vf19 + auto vf27 = vector_max(vf17, cw); + + // vmulw.xyzw vf27, vf27, vf15 + vf27 *= stiffness; + + // vmulax.yw acc, vf0, vf0 + // vmulay.xz acc, vf27, vf10 + // vmadd.xyzw vf10, vf1, vf10 + mat[0].x() += vf27.x() * mat[0].y(); + mat[0].z() += vf27.z() * mat[0].y(); + + // qmfc2.i s2, vf27 + if (!wind_work.paused) { + my_vector[0] = vf27.x(); + my_vector[1] = vf27.z(); + my_vector[2] = vf18.x(); + my_vector[3] = vf18.z(); + } + + // vmulax.yw acc, vf0, vf0 + // vmulay.xz acc, vf27, vf11 + // vmadd.xyzw vf11, vf1, vf11 + mat[1].x() += vf27.x() * mat[1].y(); + mat[1].z() += vf27.z() * mat[1].y(); + + // ppacw s2, r0, s2 + // vmulax.yw acc, vf0, vf0 + // vmulay.xz acc, vf27, vf12 + // vmadd.xyzw vf12, vf1, vf12 + mat[2].x() += vf27.x() * mat[2].y(); + mat[2].z() += vf27.z() * mat[2].y(); + + // + // if not paused + // sd s3, 8(s5) + // sd s2, 0(s5) +} + void Tie3::discard_tree_cache() { for (auto tex : m_textures) { glBindTexture(GL_TEXTURE_2D, tex); @@ -154,6 +298,9 @@ void Tie3::discard_tree_cache() { glDeleteBuffers(1, &tree.vertex_buffer); glDeleteBuffers(1, &tree.index_buffer); glDeleteVertexArrays(1, &tree.vao); + if (tree.has_wind) { + glDeleteBuffers(1, &tree.wind_vertex_index_buffer); + } } m_trees.clear(); @@ -206,6 +353,10 @@ void Tie3::render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfi memcpy(&m_pc_port_data, pc_port_data.data, sizeof(TfragPcPortData)); m_pc_port_data.level_name[11] = '\0'; + auto wind_data = dma.read_and_advance(); + assert(wind_data.size_bytes == sizeof(WindWork)); + memcpy(&m_wind_data, wind_data.data, sizeof(WindWork)); + while (dma.current_tag_offset() != render_state->next_bucket) { dma.read_and_advance(); } @@ -252,6 +403,118 @@ void Tie3::render_all_trees(const TfragRenderSettings& settings, m_all_tree_time.add(all_tree_timer.getSeconds()); } +void Tie3::render_tree_wind(int idx, + const TfragRenderSettings& settings, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + auto& tree = m_trees.at(idx); + if (tree.wind_draws->empty()) { + return; + } + + // note: this isn't the most efficient because we might compute wind matrices for invisible + // instances. TODO: add vis ids to the instance info to avoid this + memset(tree.wind_matrix_cache.data(), 0, sizeof(float) * 16 * tree.wind_matrix_cache.size()); + auto& cam_bad = settings.math_camera; + std::array cam; + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + cam[i][j] = cam_bad.data()[i * 4 + j]; + } + } + + for (size_t inst_id = 0; inst_id < tree.instance_info->size(); inst_id++) { + auto& info = tree.instance_info->operator[](inst_id); + auto& out = tree.wind_matrix_cache[inst_id]; + // auto& mat = tree.instance_info->operator[](inst_id).matrix; + auto mat = info.matrix; + + assert(info.wind_idx * 4 <= m_wind_vectors.size()); + do_wind_math(info.wind_idx, m_wind_vectors.data(), m_wind_data, + info.stiffness * m_wind_multiplier, mat); + + // vmulax.xyzw acc, vf20, vf10 + // vmadday.xyzw acc, vf21, vf10 + // vmaddz.xyzw vf10, vf22, vf10 + out[0] = cam[0] * mat[0].x() + cam[1] * mat[0].y() + cam[2] * mat[0].z(); + + // vmulax.xyzw acc, vf20, vf11 + // vmadday.xyzw acc, vf21, vf11 + // vmaddz.xyzw vf11, vf22, vf11 + out[1] = cam[0] * mat[1].x() + cam[1] * mat[1].y() + cam[2] * mat[1].z(); + + // vmulax.xyzw acc, vf20, vf12 + // vmadday.xyzw acc, vf21, vf12 + // vmaddz.xyzw vf12, vf22, vf12 + out[2] = cam[0] * mat[2].x() + cam[1] * mat[2].y() + cam[2] * mat[2].z(); + + // vmulax.xyzw acc, vf20, vf13 + // vmadday.xyzw acc, vf21, vf13 + // vmaddaz.xyzw acc, vf22, vf13 + // vmaddw.xyzw vf13, vf23, vf0 + out[3] = cam[0] * mat[3].x() + cam[1] * mat[3].y() + cam[2] * mat[3].z() + cam[3]; + } + + int last_texture = -1; + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, tree.wind_vertex_index_buffer); + + for (size_t draw_idx = 0; draw_idx < tree.wind_draws->size(); draw_idx++) { + const auto& draw = tree.wind_draws->operator[](draw_idx); + + if ((int)draw.tree_tex_id != last_texture) { + glBindTexture(GL_TEXTURE_2D, m_textures.at(draw.tree_tex_id)); + last_texture = draw.tree_tex_id; + } + auto double_draw = setup_tfrag_shader(settings, render_state, draw.mode); + + int off = 0; + for (auto& grp : draw.instance_groups) { + if (!m_debug_all_visible && !m_cache.vis_temp.at(grp.vis_idx)) { + off += grp.num; + continue; // invisible, skip. + } + + glUniformMatrix4fv( + glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "camera"), 1, GL_FALSE, + tree.wind_matrix_cache.at(grp.instance_idx)[0].data()); + + prof.add_draw_call(); + prof.add_tri(grp.num); + + tree.perf.draws++; + tree.perf.wind_draws++; + tree.perf.verts += grp.num; + + glDrawElements(GL_TRIANGLE_STRIP, grp.num, GL_UNSIGNED_INT, + (void*)((off + tree.wind_vertex_index_offsets.at(draw_idx)) * sizeof(u32))); + off += grp.num; + + switch (double_draw.kind) { + case DoubleDrawKind::NONE: + break; + case DoubleDrawKind::AFAIL_NO_DEPTH_WRITE: + tree.perf.draws++; + tree.perf.wind_draws++; + tree.perf.verts += grp.num; + prof.add_draw_call(); + prof.add_tri(grp.num); + glUniform1f( + glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_min"), + -10.f); + glUniform1f( + glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_max"), + double_draw.aref); + glDepthMask(GL_FALSE); + glDrawElements(GL_TRIANGLE_STRIP, draw.vertex_index_stream.size(), GL_UNSIGNED_INT, + (void*)0); + break; + default: + assert(false); + } + } + } +} + void Tie3::render_tree(int idx, const TfragRenderSettings& settings, SharedRenderState* render_state, @@ -261,6 +524,7 @@ void Tie3::render_tree(int idx, tree.perf.draws = 0; tree.perf.verts = 0; tree.perf.full_draws = 0; + tree.perf.wind_draws = 0; if (m_color_result.size() < tree.colors->size()) { m_color_result.resize(tree.colors->size()); @@ -388,6 +652,12 @@ void Tie3::render_tree(int idx, render_state->shaders[ShaderId::TFRAG3].activate(); } } + + if (!m_hide_wind) { + auto wind_prof = prof.make_scoped_child("wind"); + render_tree_wind(idx, settings, render_state, wind_prof); + } + glBindVertexArray(0); tree.perf.draw_time.add(draw_timer.getSeconds()); tree.perf.tree_time.add(tree_timer.getSeconds()); @@ -403,6 +673,8 @@ void Tie3::draw_debug_window() { ImGui::Checkbox("Wireframe", &m_debug_wireframe); ImGui::SameLine(); ImGui::Checkbox("All Visible", &m_debug_all_visible); + ImGui::Checkbox("Hide Wind", &m_hide_wind); + ImGui::SliderFloat("Wind Multiplier", &m_wind_multiplier, 0., 40.f); ImGui::Separator(); for (u32 i = 0; i < m_trees.size(); i++) { auto& perf = m_trees[i].perf; @@ -410,6 +682,7 @@ void Tie3::draw_debug_window() { ImGui::Text("index data bytes: %d", perf.index_upload); ImGui::Text("time of days: %d", (int)m_trees[i].colors->size()); ImGui::Text("draw: %d, full: %d, verts: %d", perf.draws, perf.full_draws, perf.verts); + ImGui::Text("wind draw: %d", perf.wind_draws); ImGui::Text("total: %.2f", perf.tree_time.get()); ImGui::Text("cull: %.2f index: %.2f tod: %.2f setup: %.2f draw: %.2f", perf.cull_time.get() * 1000.f, perf.index_time.get() * 1000.f, diff --git a/game/graphics/opengl_renderer/tfrag/Tie3.h b/game/graphics/opengl_renderer/tfrag/Tie3.h index a3cd07abc8..c29863dc9c 100644 --- a/game/graphics/opengl_renderer/tfrag/Tie3.h +++ b/game/graphics/opengl_renderer/tfrag/Tie3.h @@ -23,8 +23,23 @@ class Tie3 : public BucketRenderer { ScopedProfilerNode& prof); void setup_for_level(const std::string& str, SharedRenderState* render_state); + struct WindWork { + u32 paused; + u32 pad[3]; + math::Vector4f wind_array[64]; + math::Vector4f wind_normal; + math::Vector4f wind_temp; + float wind_force[64]; + u32 wind_time; + u32 pad2[3]; + } m_wind_data; + private: void discard_tree_cache(); + void render_tree_wind(int idx, + const TfragRenderSettings& settings, + SharedRenderState* render_state, + ScopedProfilerNode& prof); struct Tree { GLuint vertex_buffer; GLuint index_buffer; @@ -33,15 +48,24 @@ class Tie3 : public BucketRenderer { GLuint vao; u32 vert_count; const std::vector* draws = nullptr; + const std::vector* wind_draws = nullptr; + const std::vector* instance_info = nullptr; const std::vector* colors = nullptr; const tfrag3::BVH* vis = nullptr; SwizzledTimeOfDay tod_cache; + std::vector> wind_matrix_cache; + + bool has_wind = false; + GLuint wind_vertex_index_buffer; + std::vector wind_vertex_index_offsets; + struct { u32 index_upload = 0; u32 verts = 0; u32 draws = 0; u32 full_draws = 0; // ones that have all visible + u32 wind_draws = 0; Filtered cull_time; Filtered index_time; Filtered tod_time; @@ -70,7 +94,14 @@ class Tie3 : public BucketRenderer { bool m_use_fast_time_of_day = true; bool m_debug_wireframe = false; bool m_debug_all_visible = false; + bool m_hide_wind = false; Filtered m_all_tree_time; TfragPcPortData m_pc_port_data; + + std::vector m_wind_vectors; // note: I suspect these are shared with shrub. + + float m_wind_multiplier = 1.f; + + static_assert(sizeof(WindWork) == 84 * 16); }; diff --git a/goal_src/engine/gfx/tie/tie-methods.gc b/goal_src/engine/gfx/tie/tie-methods.gc index ff24173520..cb9cf0fe55 100644 --- a/goal_src/engine/gfx/tie/tie-methods.gc +++ b/goal_src/engine/gfx/tie/tie-methods.gc @@ -254,6 +254,21 @@ obj ) +(defun add-pc-wind-data ((dma-buf dma-buffer)) + ;; packet to send 84 qw's + (let ((packet (the-as dma-packet (-> dma-buf base)))) + (set! (-> packet dma) (new 'static 'dma-tag :id (dma-tag-id cnt) :qwc 84)) + (set! (-> packet vif0) (new 'static 'vif-tag)) + (set! (-> packet vif1) (new 'static 'vif-tag :cmd (vif-cmd pc-port))) + (set! (-> dma-buf base) (the pointer (&+ packet 16))) + ) + (quad-copy! (-> dma-buf base) (the pointer (&- (the pointer *wind-work*) 4)) 84) + (set! (-> (the (pointer uint32) (-> dma-buf base)) 0) + (if (paused?) 1 0) + ) + (&+! (-> dma-buf base) (* 16 84)) + ) + (defun draw-drawable-tree-instance-tie ((arg0 drawable-tree-instance-tie) (arg1 level)) "Actually draw TIE instances. Will draw TIE, TIE-NEAR, and GENERIC" @@ -418,6 +433,7 @@ (reset! (-> *perf-stats* data 11)) ;;(draw-inline-array-prototype-tie-asm s1-1 s5-1 s4-1) (add-pc-tfrag3-data s1-1 (-> *level* data (-> (scratchpad-object terrain-context) bsp lev-index))) + (add-pc-wind-data s1-1) (read! (-> *perf-stats* data 11)) (update-wait-stats (-> *perf-stats* data 11) (the-as uint 0) (-> *prototype-tie-work* wait-to-spr)