From 67bb1193a83ee6f5c0bd6a088ea139e2cd4fe8ec Mon Sep 17 00:00:00 2001 From: ManDude <7569514+ManDude@users.noreply.github.com> Date: Sat, 15 Jan 2022 17:31:38 +0000 Subject: [PATCH] [runtime] GPU sprite renderer (#1075) * sprite_3d first attempt * fixs * fix Q usage * ?? * attempt 2 * Update sprite_3d.vert * works-ish * works properly! * scissor fix * simplify shader * texture support except everything is broken for some reason * stuff * Update SpriteRenderer.cpp * meltdown * Revert "simplify shader" This reverts commit 97bd9b77bec2b61ba66360f499ab6d16489e5513. * Revert "Revert "simplify shader"" This reverts commit 32fb46ce908ef16ea827be1f8e499de6e4dda3e4. * fix blend * fix blend and prim * fix depth write and texture rendering * fix bugs * remove noperspective * fix and finalize sprite 3D vert shader * optimize 3D program * 2D sprites! almost first try * fixes + merge shader code * HUD sprite support and clean up code * oopsie * fix 3d sprites * minor cleanup and increase sprite buffer to 8k sprites * clang * replace some uses of `glBufferSubData` with `glBufferData` --- common/dma/gs.h | 2 +- docs/scratch/sprite_code_old.txt | 902 +++++++++++ .../opengl_renderer/DirectRenderer.cpp | 3 - game/graphics/opengl_renderer/Shader.cpp | 2 +- game/graphics/opengl_renderer/Shader.h | 1 + game/graphics/opengl_renderer/SkyBlendGPU.cpp | 3 +- .../opengl_renderer/SpriteRenderer.cpp | 1395 +++++------------ .../graphics/opengl_renderer/SpriteRenderer.h | 117 +- .../opengl_renderer/shaders/sky_blend.frag | 2 +- .../{sprite_cpu.frag => sprite_3d.frag} | 7 +- .../opengl_renderer/shaders/sprite_3d.vert | 185 +++ .../opengl_renderer/shaders/sprite_cpu.vert | 21 - .../opengl_renderer/shaders/tfrag3.vert | 3 +- .../graphics/opengl_renderer/tfrag/Tfrag3.cpp | 3 +- game/graphics/opengl_renderer/tfrag/Tie3.cpp | 3 +- game/graphics/texture/TexturePool.cpp | 2 +- goal_src/engine/draw/drawable.gc | 2 +- 17 files changed, 1599 insertions(+), 1054 deletions(-) create mode 100644 docs/scratch/sprite_code_old.txt rename game/graphics/opengl_renderer/shaders/{sprite_cpu.frag => sprite_3d.frag} (92%) create mode 100644 game/graphics/opengl_renderer/shaders/sprite_3d.vert delete mode 100644 game/graphics/opengl_renderer/shaders/sprite_cpu.vert diff --git a/common/dma/gs.h b/common/dma/gs.h index 03a84136d5..acc464e31b 100644 --- a/common/dma/gs.h +++ b/common/dma/gs.h @@ -339,7 +339,7 @@ struct AdGifData { u64 tex1_addr; u64 mip_data; u64 mip_addr; - u64 clamp_data; + u64 clamp_data; // can also be zbuf!! u64 clamp_addr; u64 alpha_data; u64 alpha_addr; diff --git a/docs/scratch/sprite_code_old.txt b/docs/scratch/sprite_code_old.txt new file mode 100644 index 0000000000..e486a8f779 --- /dev/null +++ b/docs/scratch/sprite_code_old.txt @@ -0,0 +1,902 @@ +3D: + + + /* + + SpriteHud2DPacket packet; + memset(&packet, 0, sizeof(packet)); + // ilw.y vi08, 1(vi02) | nop vi08 = matrix + u32 offset_selector = m_vec_data_2d[sprite_idx].matrix(); + // assert(offset_selector == 0 || offset_selector == 1); + // moved this out of the loop. + // lq.xyzw vf25, 900(vi00) | nop vf25 = cam_mat + // lq.xyzw vf26, 901(vi00) | nop + // lq.xyzw vf27, 902(vi00) | nop + // lq.xyzw vf28, 903(vi00) | nop + // lq.xyzw vf30, 904(vi00) | nop vf30 = hvdf_offset + // vf30 + + // lqi.xyzw vf01, vi02 | nop + Vector4f pos_vf01 = m_vec_data_2d[sprite_idx].xyz_sx; + // lqi.xyzw vf05, vi02 | nop + Vector4f flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; + // lqi.xyzw vf11, vi02 | nop + Vector4f color_vf11 = m_vec_data_2d[sprite_idx].rgba; + + // multiplications from the right column + transformed_pos_vf02 = matrix_transform(camera_matrix, pos_vf01); + + scales_vf01 = pos_vf01; // now used for something else. + // lq.xyzw vf12, 1020(vi00) | mulaw.xyzw ACC, vf28, vf00 + // vf12 is fog consts + fog_consts_vf12 = Vector4f(m_frame_data.fog_min, m_frame_data.fog_max, m_frame_data.max_scale, + m_frame_data.bonus); + // ilw.y vi08, 1(vi02) | maddax.xyzw ACC, vf25, vf01 + // load offset selector for the next round. + // nop | madday.xyzw ACC, vf26, vf01 + // nop | maddz.xyzw vf02, vf27, vf01 + + // move.w vf05, vf00 | addw.z vf01, vf00, vf05 + // scales_vf01.z = sy + scales_vf01.z() = flags_vf05.w(); // start building the scale vector + flags_vf05.w() = 1.f; // what are we building in flags right now?? + + // nop | nop + // div Q, vf31.x, vf02.w | muly.z vf05, vf05, vf31 + Q = m_frame_data.pfog0 / transformed_pos_vf02.w(); + flags_vf05.z() *= m_frame_data.deg_to_rad; + // nop | mul.xyzw vf03, vf02, vf29 + Vector4f scaled_pos_vf03 = transformed_pos_vf02.elementwise_multiply(m_frame_data.hmge_scale); + // nop | nop + // nop | nop + // nop | mulz.z vf04, vf05, vf05 (ts) + // fmt::print("rot is {} degrees\n", flags_vf05.z() * 360.0 / (2.0 * M_PI)); + + // the load is for rotation stuff, + // lq.xyzw vf14, 1001(vi00) | clipw.xyz vf03, vf03 (used for fcand) + // iaddi vi06, vi00, 0x1 | adda.xyzw ACC, vf11, vf11 (used for fmand) + + // upcoming fcand with 0x3f, that checks all of them. + bool fcand_result = clip_xyz_plus_minus(scaled_pos_vf03); + bool fmand_result = color_vf11.w() == 0; // (really w+w, but I don't think it matters?) + + // L8: + // xgkick double buffer setup + // ior vi05, vi15, vi00 | mul.zw vf01, vf01, Q + scales_vf01.z() *= Q; // sy + scales_vf01.w() *= Q; // sx + + // lq.xyzw vf06, 998(vi00) | mulz.xyzw vf15, vf05, vf04 (ts) + auto adgif_vf06 = m_frame_data.adgif_giftag; + + // lq.xyzw vf14, 1002(vi00) ts| mula.xyzw ACC, vf05, vf14 (ts) + + // fmand vi01, vi06 | mul.xyz vf02, vf02, Q + transformed_pos_vf02.x() *= Q; + transformed_pos_vf02.y() *= Q; + transformed_pos_vf02.z() *= Q; + + // ibne vi00, vi01, L10 | addz.x vf01, vf00, vf01 + scales_vf01.x() = scales_vf01.z(); // = sy + if (fmand_result) { + continue; // reject! + } + + // lqi.xyzw vf07, vi03 | mulz.xyzw vf16, vf15, vf04 (ts) + // vf07 is first use adgif + + // lq.xyzw vf14, 1003(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) + + // lqi.xyzw vf08, vi03 | add.xyzw vf10, vf02, vf30 + // vf08 is second user adgif + offset_pos_vf10 = transformed_pos_vf02 + hvdf_offset; + // if (m_extra_debug) { + // ImGui::Text("sel %d", offset_selector); + // //ImGui::Text("hvdf off z: %f tf/w z: %f", hvdf_offset.z(), transformed_pos_vf02.z()); + // imgui_vec(hvdf_offset, "hvdf"); + // imgui_vec(transformed_pos_vf02, "tf'd"); + // } + + // lqi.xyzw vf09, vi03 | mulw.x vf01, vf01, vf01 + // vf09 is third user adgif + scales_vf01.x() *= scales_vf01.w(); // x = sx * sy + + // sqi.xyzw vf06, vi05 | mulz.xyzw vf15, vf16, vf04 (ts) + // FIRST ADGIF IS adgif_vf06 + packet.adgif_giftag = adgif_vf06; + + // just do all 5 now. + packet.user_adgif = m_adgif[sprite_idx]; + + offset_pos_vf10.w() = std::max(offset_pos_vf10.w(), m_frame_data.fog_max); + + scales_vf01.z() = std::max(scales_vf01.z(), m_frame_data.min_scale); + scales_vf01.w() = std::max(scales_vf01.w(), m_frame_data.min_scale); + + scales_vf01.x() *= m_frame_data.inv_area; // x = sx * sy * inv_area (area ratio) + + offset_pos_vf10.w() = std::min(offset_pos_vf10.w(), m_frame_data.fog_min); + + scales_vf01.z() = std::min(scales_vf01.z(), fog_consts_vf12.z()); + scales_vf01.w() = std::min(scales_vf01.w(), fog_consts_vf12.z()); + bool use_first_giftag = offset_selector == 0; + + auto flag_vi07 = m_vec_data_2d[sprite_idx].flag(); + + scales_vf01.x() = std::min(scales_vf01.x(), 1.f); + + transformed_pos_vf02.w() = offset_pos_vf10.w() - fog_consts_vf12.y(); + + color_vf11.w() *= scales_vf01.x(); // is this right? doesn't this stall?? + + // ibne vi00, vi09, L6 | nop + if (transformed_pos_vf02.w() != 0) { + use_first_giftag = false; + } + + flag_vi07 = 0; // todo hack + Vector4f* xy_array = m_frame_data.xyz_array + flag_vi07; + math::Vector color_integer_vf11 = color_vf11.cast(); + + packet.color = color_integer_vf11; + + if (fcand_result) { + continue; // reject (could move earlier) + } + + Vector4f transformed[4]; + + flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; + // do rot + rot = sprite_quat_to_rot(flags_vf05.x(), flags_vf05.y(), flags_vf05.z()); + // fmt::print("root: {}\n", offset_pos_vf10.to_string_aligned()); + + // for (int i = 0; i < 3; i++) { + // fmt::print("M{}: {}\n", i, rot[i].to_string_aligned()); + // } + for (int i = 0; i < 4; i++) { + transformed[i] = + sprite_transform2(m_vec_data_2d[sprite_idx].xyz_sx, xy_array[i], camera_matrix, rot, + m_vec_data_2d[sprite_idx].sx(), m_vec_data_2d[sprite_idx].sy(), + m_3d_matrix_data.hvdf_offset, m_frame_data.pfog0, m_frame_data.fog_min, + m_frame_data.fog_max); + } + Vector4f xy0_vf19 = transformed[0]; + Vector4f xy1_vf20 = transformed[1]; + Vector4f xy2_vf21 = transformed[2]; + Vector4f xy3_vf22 = transformed[3]; + + packet.sprite_giftag = + use_first_giftag ? m_frame_data.sprite_2d_giftag : m_frame_data.sprite_2d_giftag2; + + Vector4f st0_vf06 = m_frame_data.st_array[0]; + Vector4f st1_vf07 = m_frame_data.st_array[1]; + Vector4f st2_vf08 = m_frame_data.st_array[2]; + Vector4f st3_vf09 = m_frame_data.st_array[3]; + + packet.st0 = st0_vf06; + packet.st1 = st1_vf07; + packet.st2 = st2_vf08; + packet.st3 = st3_vf09; + + auto xy0_vf19_int = (xy0_vf19).cast(); + auto xy1_vf20_int = (xy1_vf20).cast(); + auto xy2_vf21_int = (xy2_vf21).cast(); + auto xy3_vf22_int = (xy3_vf22).cast(); + + packet.xy0 = xy0_vf19_int; + packet.xy1 = xy1_vf20_int; + packet.xy2 = xy2_vf21_int; + packet.xy3 = xy3_vf22_int; + + // m_sprite_renderer.render_gif((const u8*)&packet, sizeof(packet), render_state, prof); + + */ + +2d: + +/* + + if (m_extra_debug) { + ImGui::Text("Sprite: %d", sprite_idx); + } + SpriteHud2DPacket packet; + memset(&packet, 0, sizeof(packet)); + // ilw.y vi08, 1(vi02) | nop vi08 = matrix + u32 offset_selector = m_vec_data_2d[sprite_idx].matrix(); + // assert(offset_selector == 0 || offset_selector == 1); + // moved this out of the loop. + // lq.xyzw vf25, 900(vi00) | nop vf25 = cam_mat + // lq.xyzw vf26, 901(vi00) | nop + // lq.xyzw vf27, 902(vi00) | nop + // lq.xyzw vf28, 903(vi00) | nop + // lq.xyzw vf30, 904(vi00) | nop vf30 = hvdf_offset + // vf30 + Vector4f hvdf_offset = m_3d_matrix_data.hvdf_offset; + glUniform4f( + glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "hvdf_offset"), + hvdf_offset[0], hvdf_offset[1], hvdf_offset[2], hvdf_offset[3]); + glUniform1f( + glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "fog_constant"), + m_frame_data.pfog0); + + // lqi.xyzw vf01, vi02 | nop + Vector4f pos_vf01 = m_vec_data_2d[sprite_idx].xyz_sx; + if (m_extra_debug) { + imgui_vec(pos_vf01, "POS", 2); + } + // lqi.xyzw vf05, vi02 | nop + Vector4f flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; + // lqi.xyzw vf11, vi02 | nop + Vector4f color_vf11 = m_vec_data_2d[sprite_idx].rgba; + + // multiplications from the right column + Vector4f transformed_pos_vf02 = matrix_transform(camera_matrix, pos_vf01); + + Vector4f scales_vf01 = pos_vf01; // now used for something else. + // lq.xyzw vf12, 1020(vi00) | mulaw.xyzw ACC, vf28, vf00 + // vf12 is fog consts + Vector4f fog_consts_vf12(m_frame_data.fog_min, m_frame_data.fog_max, m_frame_data.max_scale, + m_frame_data.bonus); + // ilw.y vi08, 1(vi02) | maddax.xyzw ACC, vf25, vf01 + // load offset selector for the next round. + // nop | madday.xyzw ACC, vf26, vf01 + // nop | maddz.xyzw vf02, vf27, vf01 + + // move.w vf05, vf00 | addw.z vf01, vf00, vf05 + // scales_vf01.z = sy + scales_vf01.z() = flags_vf05.w(); // start building the scale vector + flags_vf05.w() = 1.f; // what are we building in flags right now?? + + // nop | nop + // div Q, vf31.x, vf02.w | muly.z vf05, vf05, vf31 + float Q = m_frame_data.pfog0 / transformed_pos_vf02.w(); + flags_vf05.z() *= m_frame_data.deg_to_rad; + // nop | mul.xyzw vf03, vf02, vf29 + Vector4f scaled_pos_vf03 = transformed_pos_vf02.elementwise_multiply(m_frame_data.hmge_scale); + // nop | nop + // nop | nop + // nop | mulz.z vf04, vf05, vf05 (ts) + // fmt::print("rot is {} degrees\n", flags_vf05.z() * 360.0 / (2.0 * M_PI)); + + // the load is for rotation stuff, + // lq.xyzw vf14, 1001(vi00) | clipw.xyz vf03, vf03 (used for fcand) + // iaddi vi06, vi00, 0x1 | adda.xyzw ACC, vf11, vf11 (used for fmand) + + // upcoming fcand with 0x3f, that checks all of them. + bool fcand_result = clip_xyz_plus_minus(scaled_pos_vf03); + bool fmand_result = color_vf11.w() == 0; // (really w+w, but I don't think it matters?) + + // L8: + // xgkick double buffer setup + // ior vi05, vi15, vi00 | mul.zw vf01, vf01, Q + scales_vf01.z() *= Q; // sy + scales_vf01.w() *= Q; // sx + + // lq.xyzw vf06, 998(vi00) | mulz.xyzw vf15, vf05, vf04 (ts) + auto adgif_vf06 = m_frame_data.adgif_giftag; + + // lq.xyzw vf14, 1002(vi00) ts| mula.xyzw ACC, vf05, vf14 (ts) + + // fmand vi01, vi06 | mul.xyz vf02, vf02, Q + transformed_pos_vf02.x() *= Q; + transformed_pos_vf02.y() *= Q; + transformed_pos_vf02.z() *= Q; + + // if (m_extra_debug) { + // imgui_vec(transformed_pos_vf02, "scaled xf"); + // } + + // ibne vi00, vi01, L10 | addz.x vf01, vf00, vf01 + scales_vf01.x() = scales_vf01.z(); // = sy + if (fmand_result) { + if (m_extra_debug) { + ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fmand (1) reject"); + ImGui::Separator(); + } + continue; // reject! + } + + // lqi.xyzw vf07, vi03 | mulz.xyzw vf16, vf15, vf04 (ts) + // vf07 is first use adgif + + // lq.xyzw vf14, 1003(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) + + // lqi.xyzw vf08, vi03 | add.xyzw vf10, vf02, vf30 + // vf08 is second user adgif + Vector4f offset_pos_vf10 = transformed_pos_vf02 + hvdf_offset; + // if (m_extra_debug) { + // ImGui::Text("sel %d", offset_selector); + // //ImGui::Text("hvdf off z: %f tf/w z: %f", hvdf_offset.z(), transformed_pos_vf02.z()); + // imgui_vec(hvdf_offset, "hvdf"); + // imgui_vec(transformed_pos_vf02, "tf'd"); + // } + + // lqi.xyzw vf09, vi03 | mulw.x vf01, vf01, vf01 + // vf09 is third user adgif + scales_vf01.x() *= scales_vf01.w(); // x = sx * sy + + // sqi.xyzw vf06, vi05 | mulz.xyzw vf15, vf16, vf04 (ts) + // FIRST ADGIF IS adgif_vf06 + packet.adgif_giftag = adgif_vf06; + + // lq.xyzw vf14, 1004(vi00) | madda.xyzw ACC, vf16, vf14 (ts both) + + // sqi.xyzw vf07, vi05 | maxx.w vf10, vf10, vf12 + // SECOND ADGIF is first user + // just do all 5 now. + packet.user_adgif = m_adgif[sprite_idx]; + + offset_pos_vf10.w() = std::max(offset_pos_vf10.w(), m_frame_data.fog_max); + + // sqi.xyzw vf08, vi05 | maxz.zw vf01, vf01, vf31 + // THIRD ADGIF is second user + scales_vf01.z() = std::max(scales_vf01.z(), m_frame_data.min_scale); + scales_vf01.w() = std::max(scales_vf01.w(), m_frame_data.min_scale); + + // sqi.xyzw vf09, vi05 | mulz.xyzw vf16, vf15, vf04 (ts) + // FOURTH ADGIF is third user + + // lq.xyzw vf14, 1005(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) + + // lqi.xyzw vf06, vi03 | mulw.x vf01, vf01, vf31 + // vf06 is fourth user adgif + scales_vf01.x() *= m_frame_data.inv_area; // x = sx * sy * inv_area (area ratio) + + // lqi.xyzw vf07, vi03 | miniy.w vf10, vf10, vf12 + // vf07 is fifth user adgif + offset_pos_vf10.w() = std::min(offset_pos_vf10.w(), m_frame_data.fog_min); + + // lq.xyzw vf08, 999(vi00) | miniz.zw vf01, vf01, vf12 + // vf08 is 2d giftag 1 (NOTE THIS IS DIFFERENT FROM 2d 1)!!!!! + scales_vf01.z() = std::min(scales_vf01.z(), fog_consts_vf12.z()); + scales_vf01.w() = std::min(scales_vf01.w(), fog_consts_vf12.z()); + bool use_first_giftag = offset_selector == 0; + + // ilw.x vi07, -2(vi02) | madd.xyzw vf05, vf16, vf14 + auto flag_vi07 = m_vec_data_2d[sprite_idx].flag(); + Vector4f vf05_sincos(0, 0, std::sin(flags_vf05.z()), std::cos(flags_vf05.z())); + + // lqi.xyzw vf23, vi02 | miniw.x vf01, vf01, vf00 + // pipeline + scales_vf01.x() = std::min(scales_vf01.x(), 1.f); + + // nop | suby.w vf02, vf10, vf12 (unique) + transformed_pos_vf02.w() = offset_pos_vf10.w() - fog_consts_vf12.y(); + + // lqi.xyzw vf24, vi02 | mulx.w vf11, vf11, vf01 + // pipeline + color_vf11.w() *= scales_vf01.x(); // is this right? doesn't this stall?? + + // fcand vi01, 0x3f | mulaw.xyzw ACC, vf28, vf00 + // already computed pipeline + + // lq.xyzw vf17, 1006(vi00) | maddax.xyzw ACC, vf25, vf23 (pipeline) + Vector4f basis_x_vf17 = m_frame_data.basis_x; + + // fmand vi09, vi06 | nop + // ibne vi00, vi09, L6 | nop + if (transformed_pos_vf02.w() != 0) { + if (m_extra_debug) { + ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fmand (2) trick"); + } + use_first_giftag = false; + } + + // lq.xyzw vf18, 1007(vi00) | madday.xyzw ACC, vf26, vf23 (pipeline) + Vector4f basis_y_vf18 = m_frame_data.basis_y; + + // assert(flag_vi07 == 0); + Vector4f* xy_array = m_frame_data.xy_array + flag_vi07; + // lq.xyzw vf19, 980(vi07) | ftoi0.xyzw vf11, vf11 + Vector4f xy0_vf19 = xy_array[0]; + math::Vector color_integer_vf11 = color_vf11.cast(); + + // lq.xyzw vf20, 981(vi07) | maddz.xyzw vf02, vf27, vf23 (pipeline) + Vector4f xy1_vf20 = xy_array[1]; + + // lq.xyzw vf21, 982(vi07) | mulaw.xyzw ACC, vf17, vf05 + Vector4f xy2_vf21 = xy_array[2]; + Vector4f acc = basis_x_vf17 * vf05_sincos.w(); + + // lq.xyzw vf22, 983(vi07) | msubz.xyzw vf12, vf18, vf05 + Vector4f xy3_vf22 = xy_array[3]; + Vector4f vf12_rotated = acc - (basis_y_vf18 * vf05_sincos.z()); + // sq.xyzw vf11, 3(vi05) | mulaz.xyzw ACC, vf17, vf05 + // EIGHTH is color integer + packet.color = color_integer_vf11; + + acc = basis_x_vf17 * vf05_sincos.z(); + + // lqi.xyzw vf11, vi02 | maddw.xyzw vf13, vf18, vf05 + // (pipeline) + Vector4f vf13_rotated_trans = acc + basis_y_vf18 * vf05_sincos.w(); + + // move.w vf24, vf00 | addw.z vf23, vf00, vf24 (pipeline both) + + // div Q, vf31.x, vf02.w | mulw.xyzw vf12, vf12, vf01 + // (pipeline) + vf12_rotated *= scales_vf01.w(); + + // ibne vi00, vi01, L9 | muly.z vf24, vf24, vf31 (pipeline) + if (fcand_result) { + if (m_extra_debug) { + ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fcand reject"); + ImGui::Separator(); + } + continue; // reject (could move earlier) + } + + // ilw.y vi08, 1(vi02) | mulz.xyzw vf13, vf13, vf01 + // (pipeline) + vf13_rotated_trans *= scales_vf01.z(); + + // LEFT OFF HERE! + + // sqi.xyzw vf06, vi05 | mul.xyzw vf03, vf02, vf29 + // FIFTH is fourth user + + // sqi.xyzw vf07, vi05 | mulaw.xyzw ACC, vf10, vf00 + // SIXTH is fifth user + acc = offset_pos_vf10; + + // sqi.xyzw vf08, vi05 | maddax.xyzw ACC, vf12, vf19 + // SEVENTH is giftag2 + packet.sprite_giftag = + use_first_giftag ? m_frame_data.sprite_2d_giftag : m_frame_data.sprite_2d_giftag2; + acc += vf12_rotated * xy0_vf19.x(); + + // lq.xyzw vf06, 988(vi00) | maddy.xyzw vf19, vf13, vf19 + Vector4f st0_vf06 = m_frame_data.st_array[0]; + xy0_vf19 = acc + vf13_rotated_trans * xy0_vf19.y(); + + // lq.xyzw vf07, 989(vi00) | mulaw.xyzw ACC, vf10, vf00 + Vector4f st1_vf07 = m_frame_data.st_array[1]; + acc = offset_pos_vf10; + + // lq.xyzw vf08, 990(vi00) | maddax.xyzw ACC, vf12, vf20 + Vector4f st2_vf08 = m_frame_data.st_array[2]; + acc += vf12_rotated * xy1_vf20.x(); + + // lq.xyzw vf09, 991(vi00) | maddy.xyzw vf20, vf13, vf20 + Vector4f st3_vf09 = m_frame_data.st_array[3]; + xy1_vf20 = acc + vf13_rotated_trans * xy1_vf20.y(); + + // sq.xyzw vf06, 1(vi05) | mulaw.xyzw ACC, vf10, vf00 + // NINTH is st0 + packet.st0 = st0_vf06; + acc = offset_pos_vf10; + + // sq.xyzw vf07, 3(vi05) | maddax.xyzw ACC, vf12, vf21 + // ELEVEN is st1 + packet.st1 = st1_vf07; + acc += vf12_rotated * xy2_vf21.x(); + + // sq.xyzw vf08, 5(vi05) | maddy.xyzw vf21, vf13, vf21 + // THIRTEEN is st2 + packet.st2 = st2_vf08; + xy2_vf21 = acc + vf13_rotated_trans * xy2_vf21.y(); + + // sq.xyzw vf09, 7(vi05) | mulaw.xyzw ACC, vf10, vf00 + // FIFTEEN is st3 + packet.st3 = st3_vf09; + acc = offset_pos_vf10; + + // nop | maddax.xyzw ACC, vf12, vf22 + acc += vf12_rotated * xy3_vf22.x(); + + // nop | maddy.xyzw vf22, vf13, vf22 + xy3_vf22 = acc + vf13_rotated_trans * xy3_vf22.y(); + + // lq.xyzw vf12, 1020(vi00) | ftoi4.xyzw vf19, vf19 + // (pipeline) + auto xy0_vf19_int = (xy0_vf19 * 16.f).cast(); + + // lq.xyzw vf14, 1001(vi00) | ftoi4.xyzw vf20, vf20 + // (pipeline) + auto xy1_vf20_int = (xy1_vf20 * 16.f).cast(); + + // move.xyzw vf05, vf24 | ftoi4.xyzw vf21, vf21 + // (pipeline) + auto xy2_vf21_int = (xy2_vf21 * 16.f).cast(); + + // move.xyzw vf01, vf23 | ftoi4.xyzw vf22, vf22 + // (pipeline) + auto xy3_vf22_int = (xy3_vf22 * 16.f).cast(); + + if (m_extra_debug) { + u32 zi = xy3_vf22_int.z() >> 4; + ImGui::Text("z (int): 0x%08x %s", zi, zi >= (1 << 24) ? "bad" : ""); + ImGui::Text("z (flt): %f", (double)(((u32)zi) << 8) / UINT32_MAX); + } + + // sq.xyzw vf19, 2(vi05) | mulz.z vf04, vf24, vf24 (pipeline) + // TENTH is xy0int + packet.xy0 = xy0_vf19_int; + // sq.xyzw vf20, 4(vi05) | clipw.xyz vf03, vf03 (pipeline) + // TWELVE is xy1int + packet.xy1 = xy1_vf20_int; + // sq.xyzw vf21, 6(vi05) | nop + // FOURTEEN is xy2int + packet.xy2 = xy2_vf21_int; + // sq.xyzw vf22, 8(vi05) | nop + // SIXTEEN is xy3int + packet.xy3 = xy3_vf22_int; + + // m_sprite_renderer.render_gif((const u8*)&packet, sizeof(packet), render_state, prof); + if (m_extra_debug) { + imgui_vec(vf12_rotated, "vf12", 2); + imgui_vec(vf13_rotated_trans, "vf13", 2); + ImGui::Separator(); + } + + // xgkick vi15 | nop + // iaddi vi04, vi04, -0x1 | nop + // iaddiu vi01, vi00, 0x672 | nop + // ibne vi00, vi04, L8 | nop + // isub vi15, vi01, vi15 | adda.xyzw ACC, vf11, vf11 + // nop | nop :e + // nop | nop + // L9: + // iaddi vi04, vi04, -0x1 | nop + // iaddi vi02, vi02, -0x3 | nop + // ibne vi00, vi04, L7 | nop + // nop | nop + // nop | nop :e + // nop | nop + // L10: + // iaddi vi04, vi04, -0x1 | nop + // iaddi vi03, vi03, 0x4 | nop + // ibne vi00, vi04, L7 | nop + // nop | nop + // nop | nop :e + // nop | nop + + */ + +hud: +/* + SpriteHud2DPacket packet; + memset(&packet, 0, sizeof(packet)); + // L7 (prologue, and early abort) + // ilw.y vi08, 1(vi02) | nop vi08 = matrix + u32 offset_selector = m_vec_data_2d[sprite_idx].matrix(); + + // moved this out of the loop. + // lq.xyzw vf25, 900(vi00) | nop vf25 = cam_mat + // lq.xyzw vf26, 901(vi00) | nop + // lq.xyzw vf27, 902(vi00) | nop + // lq.xyzw vf28, 903(vi00) | nop + // lq.xyzw vf30, 904(vi08) | nop vf30 = hvdf_offset + // vf30 + Vector4f hvdf_offset = offset_selector == 0 ? m_hud_matrix_data.hvdf_offset + : m_hud_matrix_data.user_hvdf[offset_selector - 1]; + glUniform4f( + glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "hvdf_offset"), + hvdf_offset[0], hvdf_offset[1], hvdf_offset[2], hvdf_offset[3]); + glUniform1f( + glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "fog_constant"), + m_frame_data.pfog0); + + // lqi.xyzw vf01, vi02 | nop + Vector4f pos_vf01 = m_vec_data_2d[sprite_idx].xyz_sx; + if (m_extra_debug) { + imgui_vec(pos_vf01, "POS", 2); + } + // lqi.xyzw vf05, vi02 | nop + Vector4f flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; + // lqi.xyzw vf11, vi02 | nop + Vector4f color_vf11 = m_vec_data_2d[sprite_idx].rgba; + + // multiplications from the right column + Vector4f transformed_pos_vf02 = matrix_transform(camera_matrix, pos_vf01); + + Vector4f scales_vf01 = pos_vf01; // now used for something else. + // lq.xyzw vf12, 1020(vi00) | mulaw.xyzw ACC, vf28, vf00 + // vf12 is fog consts + Vector4f fog_consts_vf12(m_frame_data.fog_min, m_frame_data.fog_max, m_frame_data.max_scale, + m_frame_data.bonus); + // ilw.y vi08, 1(vi02) | maddax.xyzw ACC, vf25, vf01 + // load offset selector for the next round. + // nop | madday.xyzw ACC, vf26, vf01 + // nop | maddz.xyzw vf02, vf27, vf01 + + // move.w vf05, vf00 | addw.z vf01, vf00, vf05 + // scales_vf01.z = sy + scales_vf01.z() = flags_vf05.w(); // start building the scale vector + flags_vf05.w() = 1.f; // what are we building in flags right now?? + + // nop | nop + // div Q, vf31.x, vf02.w | muly.z vf05, vf05, vf31 + float Q = m_frame_data.pfog0 / transformed_pos_vf02.w(); + flags_vf05.z() *= m_frame_data.deg_to_rad; + // nop | mul.xyzw vf03, vf02, vf29 + Vector4f scaled_pos_vf03 = transformed_pos_vf02.elementwise_multiply(m_frame_data.hmge_scale); + // nop | nop + // nop | nop + // nop | mulz.z vf04, vf05, vf05 (ts) + // fmt::print("rot is {} degrees\n", flags_vf05.z() * 360.0 / (2.0 * M_PI)); + + // the load is for rotation stuff, + // lq.xyzw vf14, 1001(vi00) | clipw.xyz vf03, vf03 (used for fcand) + // iaddi vi06, vi00, 0x1 | adda.xyzw ACC, vf11, vf11 (used for fmand) + + // upcoming fcand with 0x3f, that checks all of them. + bool fcand_result = clip_xyz_plus_minus(scaled_pos_vf03); + bool fmand_result = color_vf11.w() == 0; // (really w+w, but I don't think it matters?) + + // L8: + // xgkick double buffer setup + // ior vi05, vi15, vi00 | mul.zw vf01, vf01, Q + scales_vf01.z() *= Q; // sy + scales_vf01.w() *= Q; // sx + + // lq.xyzw vf06, 998(vi00) | mulz.xyzw vf15, vf05, vf04 (ts) + auto adgif_vf06 = m_frame_data.adgif_giftag; + + // lq.xyzw vf14, 1002(vi00) ts| mula.xyzw ACC, vf05, vf14 (ts) + + // fmand vi01, vi06 | mul.xyz vf02, vf02, Q + transformed_pos_vf02.x() *= Q; + transformed_pos_vf02.y() *= Q; + transformed_pos_vf02.z() *= Q; + + // if (m_extra_debug) { + // imgui_vec(transformed_pos_vf02, "scaled xf"); + // } + + // ibne vi00, vi01, L10 | addz.x vf01, vf00, vf01 + scales_vf01.x() = scales_vf01.z(); // = sy + if (fmand_result) { + if (m_extra_debug) { + ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fmand reject"); + ImGui::Separator(); + } + continue; // reject! + } + + // lqi.xyzw vf07, vi03 | mulz.xyzw vf16, vf15, vf04 (ts) + // vf07 is first use adgif + + // lq.xyzw vf14, 1003(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) + + // lqi.xyzw vf08, vi03 | add.xyzw vf10, vf02, vf30 + // vf08 is second user adgif + Vector4f offset_pos_vf10 = transformed_pos_vf02 + hvdf_offset; + // if (m_extra_debug) { + // ImGui::Text("sel %d", offset_selector); + // //ImGui::Text("hvdf off z: %f tf/w z: %f", hvdf_offset.z(), transformed_pos_vf02.z()); + // imgui_vec(hvdf_offset, "hvdf"); + // imgui_vec(transformed_pos_vf02, "tf'd"); + // } + + // lqi.xyzw vf09, vi03 | mulw.x vf01, vf01, vf01 + // vf09 is third user adgif + scales_vf01.x() *= scales_vf01.w(); // x = sx * sy + + // sqi.xyzw vf06, vi05 | mulz.xyzw vf15, vf16, vf04 (ts) + // FIRST ADGIF IS adgif_vf06 + packet.adgif_giftag = adgif_vf06; + + // lq.xyzw vf14, 1004(vi00) | madda.xyzw ACC, vf16, vf14 (ts both) + + // sqi.xyzw vf07, vi05 | maxx.w vf10, vf10, vf12 + // SECOND ADGIF is first user + // just do all 5 now. + packet.user_adgif = m_adgif[sprite_idx]; + + offset_pos_vf10.w() = std::max(offset_pos_vf10.w(), m_frame_data.fog_max); + + // sqi.xyzw vf08, vi05 | maxz.zw vf01, vf01, vf31 + // THIRD ADGIF is second user + scales_vf01.z() = std::max(scales_vf01.z(), m_frame_data.min_scale); + scales_vf01.w() = std::max(scales_vf01.w(), m_frame_data.min_scale); + + // sqi.xyzw vf09, vi05 | mulz.xyzw vf16, vf15, vf04 (ts) + // FOURTH ADGIF is third user + + // lq.xyzw vf14, 1005(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) + + // lqi.xyzw vf06, vi03 | mulw.x vf01, vf01, vf31 + // vf06 is fourth user adgif + scales_vf01.x() *= m_frame_data.inv_area; // x = sx * sy * inv_area (area ratio) + + // lqi.xyzw vf07, vi03 | miniy.w vf10, vf10, vf12 + // vf07 is fifth user adgif + offset_pos_vf10.w() = std::min(offset_pos_vf10.w(), m_frame_data.fog_min); + + // lq.xyzw vf08, 1000(vi00) | nop + // vf08 is 2d giftag 2 + + // ilw.x vi07, -2(vi02) | madd.xyzw vf05, vf16, vf14 + auto flag_vi07 = m_vec_data_2d[sprite_idx].flag(); + Vector4f vf05_sincos(0, 0, std::sin(flags_vf05.z()), std::cos(flags_vf05.z())); + + // lq.xyzw vf30, 904(vi08) | nop + // pipline + + // lqi.xyzw vf23, vi02 | miniw.x vf01, vf01, vf00 + // pipeline + scales_vf01.x() = std::min(scales_vf01.x(), 1.f); + + // lqi.xyzw vf24, vi02 | mulx.w vf11, vf11, vf01 + // pipeline + color_vf11.w() *= scales_vf01.x(); // is this right? doesn't this stall?? + + // fcand vi01, 0x3f | mulaw.xyzw ACC, vf28, vf00 + // already computed pipeline + + // lq.xyzw vf17, 1006(vi00) | maddax.xyzw ACC, vf25, vf23 (pipeline) + Vector4f basis_x_vf17 = m_frame_data.basis_x; + + // lq.xyzw vf18, 1007(vi00) | madday.xyzw ACC, vf26, vf23 (pipeline) + Vector4f basis_y_vf18 = m_frame_data.basis_y; + + assert(flag_vi07 == 0); + Vector4f* xy_array = m_frame_data.xy_array + flag_vi07; + // lq.xyzw vf19, 980(vi07) | ftoi0.xyzw vf11, vf11 + Vector4f xy0_vf19 = xy_array[0]; + math::Vector color_integer_vf11 = color_vf11.cast(); + + // lq.xyzw vf20, 981(vi07) | maddz.xyzw vf02, vf27, vf23 (pipeline) + Vector4f xy1_vf20 = xy_array[1]; + + // lq.xyzw vf21, 982(vi07) | mulaw.xyzw ACC, vf17, vf05 + Vector4f xy2_vf21 = xy_array[2]; + Vector4f acc = basis_x_vf17 * vf05_sincos.w(); + + // lq.xyzw vf22, 983(vi07) | msubz.xyzw vf12, vf18, vf05 + Vector4f xy3_vf22 = xy_array[3]; + Vector4f vf12_rotated = acc - (basis_y_vf18 * vf05_sincos.z()); + // sq.xyzw vf11, 3(vi05) | mulaz.xyzw ACC, vf17, vf05 + // EIGHTH is color integer + packet.color = color_integer_vf11; + + acc = basis_x_vf17 * vf05_sincos.z(); + + // lqi.xyzw vf11, vi02 | maddw.xyzw vf13, vf18, vf05 + // (pipeline) + Vector4f vf13_rotated_trans = acc + basis_y_vf18 * vf05_sincos.w(); + + // move.w vf24, vf00 | addw.z vf23, vf00, vf24 (pipeline both) + + // div Q, vf31.x, vf02.w | mulw.xyzw vf12, vf12, vf01 + // (pipeline) + vf12_rotated *= scales_vf01.w(); + + // ibne vi00, vi01, L9 | muly.z vf24, vf24, vf31 (pipeline) + if (fcand_result) { + if (m_extra_debug) { + ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fcand reject"); + ImGui::Separator(); + } + continue; // reject (could move earlier) + } + + // ilw.y vi08, 1(vi02) | mulz.xyzw vf13, vf13, vf01 + // (pipeline) + vf13_rotated_trans *= scales_vf01.z(); + + // LEFT OFF HERE! + + // sqi.xyzw vf06, vi05 | mul.xyzw vf03, vf02, vf29 + // FIFTH is fourth user + + // sqi.xyzw vf07, vi05 | mulaw.xyzw ACC, vf10, vf00 + // SIXTH is fifth user + acc = offset_pos_vf10; + + // sqi.xyzw vf08, vi05 | maddax.xyzw ACC, vf12, vf19 + // SEVENTH is giftag2 + packet.sprite_giftag = m_frame_data.sprite_2d_giftag2; + acc += vf12_rotated * xy0_vf19.x(); + + // lq.xyzw vf06, 988(vi00) | maddy.xyzw vf19, vf13, vf19 + Vector4f st0_vf06 = m_frame_data.st_array[0]; + xy0_vf19 = acc + vf13_rotated_trans * xy0_vf19.y(); + + // lq.xyzw vf07, 989(vi00) | mulaw.xyzw ACC, vf10, vf00 + Vector4f st1_vf07 = m_frame_data.st_array[1]; + acc = offset_pos_vf10; + + // lq.xyzw vf08, 990(vi00) | maddax.xyzw ACC, vf12, vf20 + Vector4f st2_vf08 = m_frame_data.st_array[2]; + acc += vf12_rotated * xy1_vf20.x(); + + // lq.xyzw vf09, 991(vi00) | maddy.xyzw vf20, vf13, vf20 + Vector4f st3_vf09 = m_frame_data.st_array[3]; + xy1_vf20 = acc + vf13_rotated_trans * xy1_vf20.y(); + + // sq.xyzw vf06, 1(vi05) | mulaw.xyzw ACC, vf10, vf00 + // NINTH is st0 + packet.st0 = st0_vf06; + acc = offset_pos_vf10; + + // sq.xyzw vf07, 3(vi05) | maddax.xyzw ACC, vf12, vf21 + // ELEVEN is st1 + packet.st1 = st1_vf07; + acc += vf12_rotated * xy2_vf21.x(); + + // sq.xyzw vf08, 5(vi05) | maddy.xyzw vf21, vf13, vf21 + // THIRTEEN is st2 + packet.st2 = st2_vf08; + xy2_vf21 = acc + vf13_rotated_trans * xy2_vf21.y(); + + // sq.xyzw vf09, 7(vi05) | mulaw.xyzw ACC, vf10, vf00 + // FIFTEEN is st3 + packet.st3 = st3_vf09; + acc = offset_pos_vf10; + + // nop | maddax.xyzw ACC, vf12, vf22 + acc += vf12_rotated * xy3_vf22.x(); + + // nop | maddy.xyzw vf22, vf13, vf22 + xy3_vf22 = acc + vf13_rotated_trans * xy3_vf22.y(); + + // lq.xyzw vf12, 1020(vi00) | ftoi4.xyzw vf19, vf19 + // (pipeline) + auto xy0_vf19_int = (xy0_vf19 * 16.f).cast(); + + // lq.xyzw vf14, 1001(vi00) | ftoi4.xyzw vf20, vf20 + // (pipeline) + auto xy1_vf20_int = (xy1_vf20 * 16.f).cast(); + + // move.xyzw vf05, vf24 | ftoi4.xyzw vf21, vf21 + // (pipeline) + auto xy2_vf21_int = (xy2_vf21 * 16.f).cast(); + + // move.xyzw vf01, vf23 | ftoi4.xyzw vf22, vf22 + // (pipeline) + auto xy3_vf22_int = (xy3_vf22 * 16.f).cast(); + + if (m_extra_debug) { + u32 zi = xy3_vf22_int.z() >> 4; + ImGui::Text("z (int): 0x%08x %s", zi, zi >= (1 << 24) ? "bad" : ""); + ImGui::Text("z (flt): %f", (double)(((u32)zi) << 8) / UINT32_MAX); + } + + // sq.xyzw vf19, 2(vi05) | mulz.z vf04, vf24, vf24 (pipeline) + // TENTH is xy0int + packet.xy0 = xy0_vf19_int; + // sq.xyzw vf20, 4(vi05) | clipw.xyz vf03, vf03 (pipeline) + // TWELVE is xy1int + packet.xy1 = xy1_vf20_int; + // sq.xyzw vf21, 6(vi05) | nop + // FOURTEEN is xy2int + packet.xy2 = xy2_vf21_int; + // sq.xyzw vf22, 8(vi05) | nop + // SIXTEEN is xy3int + packet.xy3 = xy3_vf22_int; + + // m_sprite_renderer.render_gif((const u8*)&packet, sizeof(packet), render_state, prof); + if (m_extra_debug) { + imgui_vec(vf12_rotated, "vf12", 2); + imgui_vec(vf13_rotated_trans, "vf13", 2); + ImGui::Separator(); + } + + // xgkick vi15 | nop + // iaddi vi04, vi04, -0x1 | nop + // iaddiu vi01, vi00, 0x672 | nop + // ibne vi00, vi04, L8 | nop + // isub vi15, vi01, vi15 | adda.xyzw ACC, vf11, vf11 + // nop | nop :e + // nop | nop + // L9: + // iaddi vi04, vi04, -0x1 | nop + // iaddi vi02, vi02, -0x3 | nop + // ibne vi00, vi04, L7 | nop + // nop | nop + // nop | nop :e + // nop | nop + // L10: + // iaddi vi04, vi04, -0x1 | nop + // iaddi vi03, vi03, 0x4 | nop + // ibne vi00, vi04, L7 | nop + // nop | nop + // nop | nop :e + // nop | nop + + */ + + diff --git a/game/graphics/opengl_renderer/DirectRenderer.cpp b/game/graphics/opengl_renderer/DirectRenderer.cpp index cba7407178..4ff6df551d 100644 --- a/game/graphics/opengl_renderer/DirectRenderer.cpp +++ b/game/graphics/opengl_renderer/DirectRenderer.cpp @@ -346,9 +346,6 @@ void DirectRenderer::update_gl_texture(SharedRenderState* render_state, int unit glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); } - - glUniform1i( - glGetUniformLocation(render_state->shaders[ShaderId::DIRECT_BASIC_TEXTURED].id(), "T0"), 0); } void DirectRenderer::update_gl_blend() { diff --git a/game/graphics/opengl_renderer/Shader.cpp b/game/graphics/opengl_renderer/Shader.cpp index be6b42dbce..6e14cf26cb 100644 --- a/game/graphics/opengl_renderer/Shader.cpp +++ b/game/graphics/opengl_renderer/Shader.cpp @@ -69,7 +69,7 @@ ShaderLibrary::ShaderLibrary() { at(ShaderId::DIRECT_BASIC) = {"direct_basic"}; at(ShaderId::DIRECT_BASIC_TEXTURED) = {"direct_basic_textured"}; at(ShaderId::DEBUG_RED) = {"debug_red"}; - at(ShaderId::SPRITE_CPU) = {"sprite_cpu"}; + at(ShaderId::SPRITE) = {"sprite_3d"}; at(ShaderId::SKY) = {"sky"}; at(ShaderId::SKY_BLEND) = {"sky_blend"}; at(ShaderId::DEBUG_BUFFERED) = {"debug_buffered"}; diff --git a/game/graphics/opengl_renderer/Shader.h b/game/graphics/opengl_renderer/Shader.h index cf60e0e20c..e986883d9e 100644 --- a/game/graphics/opengl_renderer/Shader.h +++ b/game/graphics/opengl_renderer/Shader.h @@ -36,6 +36,7 @@ enum class ShaderId { BUFFERED_TCC1 = 11, TFRAG3 = 12, TFRAG3_NO_TEX = 13, + SPRITE = 14, MAX_SHADERS }; diff --git a/game/graphics/opengl_renderer/SkyBlendGPU.cpp b/game/graphics/opengl_renderer/SkyBlendGPU.cpp index 890f2d448a..7a1ffed2b3 100644 --- a/game/graphics/opengl_renderer/SkyBlendGPU.cpp +++ b/game/graphics/opengl_renderer/SkyBlendGPU.cpp @@ -151,7 +151,7 @@ SkyBlendStats SkyBlendGPU::do_sky_blends(DmaFollower& dma, // setup draw data glBindBuffer(GL_ARRAY_BUFFER, m_gl_vertex_buffer); - glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(Vertex) * 6, m_vertex_data); + glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * 6, m_vertex_data, GL_STREAM_DRAW); glEnableVertexAttribArray(0); glVertexAttribPointer(0, // location 0 in the shader 3, // 3 floats per vert @@ -167,7 +167,6 @@ SkyBlendStats SkyBlendGPU::do_sky_blends(DmaFollower& dma, glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::SKY_BLEND].id(), "T0"), 0); // Draw a sqaure glDrawArrays(GL_TRIANGLES, 0, 6); diff --git a/game/graphics/opengl_renderer/SpriteRenderer.cpp b/game/graphics/opengl_renderer/SpriteRenderer.cpp index 90ca876f6d..2ff4cd71a9 100644 --- a/game/graphics/opengl_renderer/SpriteRenderer.cpp +++ b/game/graphics/opengl_renderer/SpriteRenderer.cpp @@ -30,13 +30,69 @@ u32 process_sprite_chunk_header(DmaFollower& dma) { } } // namespace +constexpr int SPRITE_RENDERER_MAX_SPRITES = 8000; + SpriteRenderer::SpriteRenderer(const std::string& name, BucketId my_id) - : BucketRenderer(name, my_id), - m_sprite_renderer(fmt::format("{}.sprites", name), - my_id, - 16384, - DirectRenderer::Mode::SPRITE_CPU), - m_direct_renderer(fmt::format("{}.direct", name), my_id, 100, DirectRenderer::Mode::NORMAL) {} + : BucketRenderer(name, my_id) { + glGenBuffers(1, &m_ogl.vertex_buffer); + glGenVertexArrays(1, &m_ogl.vao); + glBindVertexArray(m_ogl.vao); + glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer); + auto verts = SPRITE_RENDERER_MAX_SPRITES * 3 * 2; + auto bytes = verts * sizeof(SpriteVertex3D); + glBufferData(GL_ARRAY_BUFFER, bytes, nullptr, GL_STREAM_DRAW); + glEnableVertexAttribArray(0); + glVertexAttribPointer( + 0, // location 0 in the shader + 4, // 4 floats per vert (w unused) + GL_FLOAT, // floats + GL_TRUE, // normalized, ignored, + sizeof(SpriteVertex3D), // + (void*)offsetof(SpriteVertex3D, xyz_sx) // offset in array (why is this a pointer...) + ); + + glEnableVertexAttribArray(1); + glVertexAttribPointer( + 1, // location 0 in the shader + 4, // 4 color components + GL_FLOAT, // floats + GL_TRUE, // normalized, ignored, + sizeof(SpriteVertex3D), // + (void*)offsetof(SpriteVertex3D, quat_sy) // offset in array (why is this a pointer...) + ); + + glEnableVertexAttribArray(2); + glVertexAttribPointer( + 2, // location 0 in the shader + 4, // 4 color components + GL_FLOAT, // floats + GL_TRUE, // normalized, ignored, + sizeof(SpriteVertex3D), // + (void*)offsetof(SpriteVertex3D, rgba) // offset in array (why is this a pointer...) + ); + + glEnableVertexAttribArray(3); + glVertexAttribIPointer( + 3, // location 0 in the shader + 2, // 4 color components + GL_UNSIGNED_SHORT, // floats + sizeof(SpriteVertex3D), // + (void*)offsetof(SpriteVertex3D, flags_matrix) // offset in array (why is this a pointer...) + ); + + glEnableVertexAttribArray(4); + glVertexAttribIPointer( + 4, // location 0 in the shader + 4, // 3 floats per vert + GL_UNSIGNED_SHORT, // floats + sizeof(SpriteVertex3D), // + (void*)offsetof(SpriteVertex3D, info) // offset in array (why is this a pointer...) + ); + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindVertexArray(0); + + m_vertices_3d.resize(verts); +} /*! * Run the sprite distorter. Currently nothing uses sprite-distorter so this just skips through @@ -46,13 +102,13 @@ void SpriteRenderer::render_distorter(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) { // Next thing should be the sprite-distorter setup - m_direct_renderer.reset_state(); + // m_direct_renderer.reset_state(); while (dma.current_tag().qwc != 7) { auto direct_data = dma.read_and_advance(); - m_direct_renderer.render_vif(direct_data.vif0(), direct_data.vif1(), direct_data.data, - direct_data.size_bytes, render_state, prof); + // m_direct_renderer.render_vif(direct_data.vif0(), direct_data.vif1(), direct_data.data, + // direct_data.size_bytes, render_state, prof); } - m_direct_renderer.flush_pending(render_state, prof); + // m_direct_renderer.flush_pending(render_state, prof); auto sprite_distorter_direct_setup = dma.read_and_advance(); assert(sprite_distorter_direct_setup.vifcode0().kind == VifCode::Kind::NOP); assert(sprite_distorter_direct_setup.vifcode1().kind == VifCode::Kind::DIRECT); @@ -133,6 +189,42 @@ void SpriteRenderer::render_3d(DmaFollower& dma) { void SpriteRenderer::render_2d_group0(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) { + // opengl sprite frame setup + glUniform4fv(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "hvdf_offset"), 1, + m_3d_matrix_data.hvdf_offset.data()); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "pfog0"), + m_frame_data.pfog0); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "fog_min"), + m_frame_data.fog_min); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "fog_max"), + m_frame_data.fog_max); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "min_scale"), + m_frame_data.min_scale); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "max_scale"), + m_frame_data.max_scale); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "bonus"), + m_frame_data.bonus); + glUniform4fv(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "hmge_scale"), 1, + m_frame_data.hmge_scale.data()); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "deg_to_rad"), + m_frame_data.deg_to_rad); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "inv_area"), + m_frame_data.inv_area); + glUniformMatrix4fv(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "camera"), + 1, GL_FALSE, m_3d_matrix_data.camera.data()); + glUniform4fv(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "xy_array"), 8, + m_frame_data.xy_array[0].data()); + glUniform4fv(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "xyz_array"), 4, + m_frame_data.xyz_array[0].data()); + glUniform4fv(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "st_array"), 4, + m_frame_data.st_array[0].data()); + glUniform4fv(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "basis_x"), 1, + m_frame_data.basis_x.data()); + glUniform4fv(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "basis_y"), 1, + m_frame_data.basis_y.data()); + + u16 last_prog = -1; + while (looks_like_2d_chunk_start(dma)) { m_debug_stats.blocks_2d_grp0++; // 4 packets per chunk @@ -160,18 +252,28 @@ void SpriteRenderer::render_2d_group0(DmaFollower& dma, assert(run.vifcode0().kind == VifCode::Kind::NOP); assert(run.vifcode1().kind == VifCode::Kind::MSCAL); - // HACK: this renderers 3D sprites with the 2D renderer. amazingly, it almost works. - // assert(run.vifcode1().immediate == SpriteProgMem::Sprites2dGrp0); if (m_enabled) { + if (run.vifcode1().immediate != last_prog) { + // one-time setups and flushing + flush_sprites(render_state, prof); + if (run.vifcode1().immediate == SpriteProgMem::Sprites2dGrp0 && + m_prim_gl_state.current_register != m_frame_data.sprite_2d_giftag.prim()) { + m_prim_gl_state.from_register(m_frame_data.sprite_2d_giftag.prim()); + } else if (m_prim_gl_state.current_register != m_frame_data.sprite_3d_giftag.prim()) { + m_prim_gl_state.from_register(m_frame_data.sprite_3d_giftag.prim()); + } + } + if (run.vifcode1().immediate == SpriteProgMem::Sprites2dGrp0) { if (m_2d_enable) { - do_2d_group0_block_cpu(sprite_count, render_state, prof); + do_block_common(SpriteMode::Mode2D, sprite_count, render_state, prof); } } else { if (m_3d_enable) { - do_3d_block_cpu(sprite_count, render_state, prof); + do_block_common(SpriteMode::Mode3D, sprite_count, render_state, prof); } } + last_prog = run.vifcode1().immediate; } } } @@ -198,6 +300,18 @@ void SpriteRenderer::render_2d_group1(DmaFollower& dma, assert(mat_upload.size_bytes == sizeof(m_hud_matrix_data)); memcpy(&m_hud_matrix_data, mat_upload.data, sizeof(m_hud_matrix_data)); + // opengl sprite frame setup + glUniform4fv( + glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "hud_hvdf_offset"), 1, + m_hud_matrix_data.hvdf_offset.data()); + glUniform4fv(glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "hud_hvdf_user"), + 75, m_hud_matrix_data.user_hvdf[0].data()); + glUniformMatrix4fv( + glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "hud_matrix"), 1, GL_FALSE, + m_hud_matrix_data.matrix.data()); + + m_prim_gl_state.from_register(m_frame_data.sprite_2d_giftag2.prim()); + // loop through chunks. while (looks_like_2d_chunk_start(dma)) { m_debug_stats.blocks_2d_grp1++; @@ -227,7 +341,7 @@ void SpriteRenderer::render_2d_group1(DmaFollower& dma, assert(run.vifcode1().kind == VifCode::Kind::MSCAL); assert(run.vifcode1().immediate == SpriteProgMem::Sprites2dHud); if (m_enabled && m_2d_enable) { - do_2d_group1_block_cpu(sprite_count, render_state, prof); + do_block_common(SpriteMode::ModeHUD, sprite_count, render_state, prof); } } } @@ -251,6 +365,8 @@ void SpriteRenderer::render(DmaFollower& dma, return; } + render_state->shaders[ShaderId::SPRITE].activate(); + // First is the distorter { auto child = prof.make_scoped_child("distorter"); @@ -264,10 +380,11 @@ void SpriteRenderer::render(DmaFollower& dma, render_3d(dma); // 2d draw - m_sprite_renderer.reset_state(); + // m_sprite_renderer.reset_state(); { auto child = prof.make_scoped_child("2d-group0"); render_2d_group0(dma, render_state, child); + flush_sprites(render_state, prof); } // shadow draw @@ -277,7 +394,7 @@ void SpriteRenderer::render(DmaFollower& dma, { auto child = prof.make_scoped_child("2d-group1"); render_2d_group1(dma, render_state, child); - m_sprite_renderer.flush_pending(render_state, child); + flush_sprites(render_state, prof); } // TODO finish this up. @@ -307,7 +424,7 @@ void SpriteRenderer::draw_debug_window() { ImGui::SameLine(); ImGui::Checkbox("3d-debug", &m_3d_debug); if (ImGui::TreeNode("direct")) { - m_sprite_renderer.draw_debug_window(); + // m_sprite_renderer.draw_debug_window(); ImGui::TreePop(); } } @@ -315,1027 +432,285 @@ void SpriteRenderer::draw_debug_window() { /////////////////////////////////////////////////////////////////////////////////////////////////// // Render (for real) -namespace { -Vector4f matrix_transform(const Matrix4f& mat, const Vector4f& pt) { - // mulaw.xyzw ACC, vf28, vf00 - // maddax.xyzw ACC, vf25, vf01 - // madday.xyzw ACC, vf26, vf01 - // maddz.xyzw vf02, vf27, vf01 - return mat.col(3) + (mat.col(0) * pt[0]) + (mat.col(1) * pt[1]) + (mat.col(2) * pt[2]); -} - -bool clip_xyz_plus_minus(const Vector4f& pt) { - float pw = std::abs(pt.w()); - float mw = -pw; - for (int i = 0; i < 3; i++) { - if (pt[i] > pw) { - return true; - } - if (pt[i] < mw) { - return true; - } +void SpriteRenderer::flush_sprites(SharedRenderState* render_state, ScopedProfilerNode& prof) { + for (int i = 0; i <= m_adgif_index; ++i) { + update_gl_texture(render_state, i); } - return false; -} -void imgui_vec(const Vector4f& vec, const char* name = nullptr, int indent = 0) { - std::string spacing(indent, ' '); - if (name) { - ImGui::Text("%s%s: %f, %f, %f, %f", spacing.c_str(), name, vec.x(), vec.y(), vec.z(), vec.w()); + if (m_sprite_offset == 0) { + // nothing to render + m_adgif_index = 0; + return; + } + + update_gl_blend(m_adgif_state_stack[m_adgif_index]); + + if (m_adgif_state_stack[m_adgif_index].z_write) { + glDepthMask(GL_TRUE); } else { - ImGui::Text("%s%f, %f, %f, %f", spacing.c_str(), vec.x(), vec.y(), vec.z(), vec.w()); + glDepthMask(GL_FALSE); } + + glBindVertexArray(m_ogl.vao); + + // render! + // fmt::print("drawing {} sprites\n", m_sprite_offset); + glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, m_sprite_offset * sizeof(SpriteVertex3D) * 6, m_vertices_3d.data(), + GL_STREAM_DRAW); + + glDrawArrays(GL_TRIANGLES, 0, m_sprite_offset * 6); + + glBindVertexArray(0); + int n_tris = m_sprite_offset * 6 / 3; + prof.add_tri(n_tris); + prof.add_draw_call(1); + + m_sprite_offset = 0; + m_adgif_index = 0; } -} // namespace -/*! - * Render the sprites! - * This is a somewhat inefficient way to do it: - * The VU program is (poorly) translated to C, then the gs packet is sent to a DirectRenderer. - * In the future we should make a sprite-specific renderer which would have some benefits: - * - do this math on the GPU - * - special case the primitive buffer stuff - */ -void SpriteRenderer::do_2d_group1_block_cpu(u32 count, - SharedRenderState* render_state, - ScopedProfilerNode& prof) { - if (m_extra_debug) { - ImGui::Begin("Sprite Extra Debug 2d_1"); +void SpriteRenderer::handle_tex0(u64 val, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + GsTex0 reg(val); + + // update tbp + + m_adgif_state.reg_tex0 = reg; + m_adgif_state.texture_base_ptr = reg.tbp0(); + m_adgif_state.using_mt4hh = reg.psm() == GsTex0::PSM::PSMT4HH; + m_adgif_state.tcc = reg.tcc(); + + // tbw: assume they got it right + // psm: assume they got it right + // tw: assume they got it right + // th: assume they got it right + + assert(reg.tfx() == GsTex0::TextureFunction::MODULATE); + + // cbp: assume they got it right + // cpsm: assume they got it right + // csm: assume they got it right +} + +void SpriteRenderer::handle_tex1(u64 val, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + GsTex1 reg(val); + // for now, we aren't going to handle mipmapping. I don't think it's used with direct. + // assert(reg.mxl() == 0); + // if that's true, we can ignore LCM, MTBA, L, K + + m_adgif_state.enable_tex_filt = reg.mmag(); + + // MMAG/MMIN specify texture filtering. For now, assume always linear + // assert(reg.mmag() == true); + // if (!(reg.mmin() == 1 || reg.mmin() == 4)) { // with mipmap off, both of these are linear + // // lg::error("unsupported mmin"); + // } +} + +void SpriteRenderer::handle_zbuf(u64 val, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + // note: we can basically ignore this. There's a single z buffer that's always configured the same + // way - 24-bit, at offset 448. + GsZbuf x(val); + assert(x.psm() == TextureFormat::PSMZ24); + assert(x.zbp() == 448); + + m_adgif_state.z_write = !x.zmsk(); +} + +void SpriteRenderer::handle_clamp(u64 val, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + if (!(val == 0b101 || val == 0 || val == 1 || val == 0b100)) { + fmt::print("clamp: 0x{:x}\n", val); + assert(false); } - // set up double buffering - // xtop vi02 | nop - // nop | nop - // load sprite count from header - // vi04 = count - // ilwr.x vi04, vi02 | nop - // vi02 = m_vec_data_2d - // iaddi vi02, vi02, 0x1 | nop - // vi03 = m_adgif - // iaddiu vi03, vi02, 0x90 | nop + m_adgif_state.reg_clamp = val; + m_adgif_state.clamp_s = val & 0b001; + m_adgif_state.clamp_t = val & 0b100; +} - // this VU program uses "software pipelining" - // it's a little bit tricky to use software pipelining in a case like - // this where sometimes you want to reject a sprite entirely and jump ahead - // so sometimes they reset back to L7 on rejection. - - // The approach in this translation is to assume we loop back to L7 every time - // and not worry about the pipeline stuff that shows up in L8 and on. - // you can enter from L7 at anytime, they are not assumed to only run on the first go. - // (though if their implementation has bugs we will not replicate them correctly...) - - Matrix4f camera_matrix = m_hud_matrix_data.matrix; // vf25, vf26, vf27, vf28 - - for (u32 sprite_idx = 0; sprite_idx < count; sprite_idx++) { - if (m_extra_debug) { - ImGui::Text("Sprite: %d", sprite_idx); +void SpriteRenderer::update_gl_blend(AdGifState& state) { + if (!m_prim_gl_state.alpha_blend_enable) { + glDisable(GL_BLEND); + } else { + if (state.a == GsAlpha::BlendMode::SOURCE && state.b == GsAlpha::BlendMode::DEST && + state.c == GsAlpha::BlendMode::SOURCE && state.d == GsAlpha::BlendMode::DEST) { + // (Cs - Cd) * As + Cd + // Cs * As + (1 - As) * Cd + glEnable(GL_BLEND); + // s, d + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } else if (state.a == GsAlpha::BlendMode::SOURCE && + state.b == GsAlpha::BlendMode::ZERO_OR_FIXED && + state.c == GsAlpha::BlendMode::SOURCE && state.d == GsAlpha::BlendMode::DEST) { + // (Cs - 0) * As + Cd + // Cs * As + (1) * CD + glEnable(GL_BLEND); + // s, d + glBlendFunc(GL_SRC_ALPHA, GL_ONE); + } else { + // unsupported blend: a 0 b 2 c 2 d 1 + lg::error("unsupported blend: a {} b {} c {} d {}\n", (int)state.a, (int)state.b, + (int)state.c, (int)state.d); + assert(false); } - SpriteHud2DPacket packet; - memset(&packet, 0, sizeof(packet)); - // L7 (prologue, and early abort) - // ilw.y vi08, 1(vi02) | nop vi08 = matrix - u32 offset_selector = m_vec_data_2d[sprite_idx].matrix(); - - // moved this out of the loop. - // lq.xyzw vf25, 900(vi00) | nop vf25 = cam_mat - // lq.xyzw vf26, 901(vi00) | nop - // lq.xyzw vf27, 902(vi00) | nop - // lq.xyzw vf28, 903(vi00) | nop - // lq.xyzw vf30, 904(vi08) | nop vf30 = hvdf_offset - // vf30 - Vector4f hvdf_offset = offset_selector == 0 ? m_hud_matrix_data.hvdf_offset - : m_hud_matrix_data.user_hvdf[offset_selector - 1]; - - // lqi.xyzw vf01, vi02 | nop - Vector4f pos_vf01 = m_vec_data_2d[sprite_idx].xyz_sx; - if (m_extra_debug) { - imgui_vec(pos_vf01, "POS", 2); - } - // lqi.xyzw vf05, vi02 | nop - Vector4f flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; - // lqi.xyzw vf11, vi02 | nop - Vector4f color_vf11 = m_vec_data_2d[sprite_idx].rgba; - - // multiplications from the right column - Vector4f transformed_pos_vf02 = matrix_transform(camera_matrix, pos_vf01); - - Vector4f scales_vf01 = pos_vf01; // now used for something else. - // lq.xyzw vf12, 1020(vi00) | mulaw.xyzw ACC, vf28, vf00 - // vf12 is fog consts - Vector4f fog_consts_vf12(m_frame_data.fog_min, m_frame_data.fog_max, m_frame_data.max_scale, - m_frame_data.bonus); - // ilw.y vi08, 1(vi02) | maddax.xyzw ACC, vf25, vf01 - // load offset selector for the next round. - // nop | madday.xyzw ACC, vf26, vf01 - // nop | maddz.xyzw vf02, vf27, vf01 - - // move.w vf05, vf00 | addw.z vf01, vf00, vf05 - // scales_vf01.z = sy - scales_vf01.z() = flags_vf05.w(); // start building the scale vector - flags_vf05.w() = 1.f; // what are we building in flags right now?? - - // nop | nop - // div Q, vf31.x, vf02.w | muly.z vf05, vf05, vf31 - float Q = m_frame_data.pfog0 / transformed_pos_vf02.w(); - flags_vf05.z() *= m_frame_data.deg_to_rad; - // nop | mul.xyzw vf03, vf02, vf29 - Vector4f scaled_pos_vf03 = transformed_pos_vf02.elementwise_multiply(m_frame_data.hmge_scale); - // nop | nop - // nop | nop - // nop | mulz.z vf04, vf05, vf05 (ts) - // fmt::print("rot is {} degrees\n", flags_vf05.z() * 360.0 / (2.0 * M_PI)); - - // the load is for rotation stuff, - // lq.xyzw vf14, 1001(vi00) | clipw.xyz vf03, vf03 (used for fcand) - // iaddi vi06, vi00, 0x1 | adda.xyzw ACC, vf11, vf11 (used for fmand) - - // upcoming fcand with 0x3f, that checks all of them. - bool fcand_result = clip_xyz_plus_minus(scaled_pos_vf03); - bool fmand_result = color_vf11.w() == 0; // (really w+w, but I don't think it matters?) - - // L8: - // xgkick double buffer setup - // ior vi05, vi15, vi00 | mul.zw vf01, vf01, Q - scales_vf01.z() *= Q; // sy - scales_vf01.w() *= Q; // sx - - // lq.xyzw vf06, 998(vi00) | mulz.xyzw vf15, vf05, vf04 (ts) - auto adgif_vf06 = m_frame_data.adgif_giftag; - - // lq.xyzw vf14, 1002(vi00) ts| mula.xyzw ACC, vf05, vf14 (ts) - - // fmand vi01, vi06 | mul.xyz vf02, vf02, Q - transformed_pos_vf02.x() *= Q; - transformed_pos_vf02.y() *= Q; - transformed_pos_vf02.z() *= Q; - - // if (m_extra_debug) { - // imgui_vec(transformed_pos_vf02, "scaled xf"); - // } - - // ibne vi00, vi01, L10 | addz.x vf01, vf00, vf01 - scales_vf01.x() = scales_vf01.z(); // = sy - if (fmand_result) { - if (m_extra_debug) { - ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fmand reject"); - ImGui::Separator(); - } - continue; // reject! - } - - // lqi.xyzw vf07, vi03 | mulz.xyzw vf16, vf15, vf04 (ts) - // vf07 is first use adgif - - // lq.xyzw vf14, 1003(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) - - // lqi.xyzw vf08, vi03 | add.xyzw vf10, vf02, vf30 - // vf08 is second user adgif - Vector4f offset_pos_vf10 = transformed_pos_vf02 + hvdf_offset; - // if (m_extra_debug) { - // ImGui::Text("sel %d", offset_selector); - // //ImGui::Text("hvdf off z: %f tf/w z: %f", hvdf_offset.z(), transformed_pos_vf02.z()); - // imgui_vec(hvdf_offset, "hvdf"); - // imgui_vec(transformed_pos_vf02, "tf'd"); - // } - - // lqi.xyzw vf09, vi03 | mulw.x vf01, vf01, vf01 - // vf09 is third user adgif - scales_vf01.x() *= scales_vf01.w(); // x = sx * sy - - // sqi.xyzw vf06, vi05 | mulz.xyzw vf15, vf16, vf04 (ts) - // FIRST ADGIF IS adgif_vf06 - packet.adgif_giftag = adgif_vf06; - - // lq.xyzw vf14, 1004(vi00) | madda.xyzw ACC, vf16, vf14 (ts both) - - // sqi.xyzw vf07, vi05 | maxx.w vf10, vf10, vf12 - // SECOND ADGIF is first user - // just do all 5 now. - packet.user_adgif = m_adgif[sprite_idx]; - - offset_pos_vf10.w() = std::max(offset_pos_vf10.w(), m_frame_data.fog_max); - - // sqi.xyzw vf08, vi05 | maxz.zw vf01, vf01, vf31 - // THIRD ADGIF is second user - scales_vf01.z() = std::max(scales_vf01.z(), m_frame_data.min_scale); - scales_vf01.w() = std::max(scales_vf01.w(), m_frame_data.min_scale); - - // sqi.xyzw vf09, vi05 | mulz.xyzw vf16, vf15, vf04 (ts) - // FOURTH ADGIF is third user - - // lq.xyzw vf14, 1005(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) - - // lqi.xyzw vf06, vi03 | mulw.x vf01, vf01, vf31 - // vf06 is fourth user adgif - scales_vf01.x() *= m_frame_data.inv_area; // x = sx * sy * inv_area (area ratio) - - // lqi.xyzw vf07, vi03 | miniy.w vf10, vf10, vf12 - // vf07 is fifth user adgif - offset_pos_vf10.w() = std::min(offset_pos_vf10.w(), m_frame_data.fog_min); - - // lq.xyzw vf08, 1000(vi00) | nop - // vf08 is 2d giftag 2 - - // ilw.x vi07, -2(vi02) | madd.xyzw vf05, vf16, vf14 - auto flag_vi07 = m_vec_data_2d[sprite_idx].flag(); - Vector4f vf05_sincos(0, 0, std::sin(flags_vf05.z()), std::cos(flags_vf05.z())); - - // lq.xyzw vf30, 904(vi08) | nop - // pipline - - // lqi.xyzw vf23, vi02 | miniw.x vf01, vf01, vf00 - // pipeline - scales_vf01.x() = std::min(scales_vf01.x(), 1.f); - - // lqi.xyzw vf24, vi02 | mulx.w vf11, vf11, vf01 - // pipeline - color_vf11.w() *= scales_vf01.x(); // is this right? doesn't this stall?? - - // fcand vi01, 0x3f | mulaw.xyzw ACC, vf28, vf00 - // already computed pipeline - - // lq.xyzw vf17, 1006(vi00) | maddax.xyzw ACC, vf25, vf23 (pipeline) - Vector4f basis_x_vf17 = m_frame_data.basis_x; - - // lq.xyzw vf18, 1007(vi00) | madday.xyzw ACC, vf26, vf23 (pipeline) - Vector4f basis_y_vf18 = m_frame_data.basis_y; - - assert(flag_vi07 == 0); - Vector4f* xy_array = m_frame_data.xy_array + flag_vi07; - // lq.xyzw vf19, 980(vi07) | ftoi0.xyzw vf11, vf11 - Vector4f xy0_vf19 = xy_array[0]; - math::Vector color_integer_vf11 = color_vf11.cast(); - - // lq.xyzw vf20, 981(vi07) | maddz.xyzw vf02, vf27, vf23 (pipeline) - Vector4f xy1_vf20 = xy_array[1]; - - // lq.xyzw vf21, 982(vi07) | mulaw.xyzw ACC, vf17, vf05 - Vector4f xy2_vf21 = xy_array[2]; - Vector4f acc = basis_x_vf17 * vf05_sincos.w(); - - // lq.xyzw vf22, 983(vi07) | msubz.xyzw vf12, vf18, vf05 - Vector4f xy3_vf22 = xy_array[3]; - Vector4f vf12_rotated = acc - (basis_y_vf18 * vf05_sincos.z()); - // sq.xyzw vf11, 3(vi05) | mulaz.xyzw ACC, vf17, vf05 - // EIGHTH is color integer - packet.color = color_integer_vf11; - - acc = basis_x_vf17 * vf05_sincos.z(); - - // lqi.xyzw vf11, vi02 | maddw.xyzw vf13, vf18, vf05 - // (pipeline) - Vector4f vf13_rotated_trans = acc + basis_y_vf18 * vf05_sincos.w(); - - // move.w vf24, vf00 | addw.z vf23, vf00, vf24 (pipeline both) - - // div Q, vf31.x, vf02.w | mulw.xyzw vf12, vf12, vf01 - // (pipeline) - vf12_rotated *= scales_vf01.w(); - - // ibne vi00, vi01, L9 | muly.z vf24, vf24, vf31 (pipeline) - if (fcand_result) { - if (m_extra_debug) { - ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fcand reject"); - ImGui::Separator(); - } - continue; // reject (could move earlier) - } - - // ilw.y vi08, 1(vi02) | mulz.xyzw vf13, vf13, vf01 - // (pipeline) - vf13_rotated_trans *= scales_vf01.z(); - - // LEFT OFF HERE! - - // sqi.xyzw vf06, vi05 | mul.xyzw vf03, vf02, vf29 - // FIFTH is fourth user - - // sqi.xyzw vf07, vi05 | mulaw.xyzw ACC, vf10, vf00 - // SIXTH is fifth user - acc = offset_pos_vf10; - - // sqi.xyzw vf08, vi05 | maddax.xyzw ACC, vf12, vf19 - // SEVENTH is giftag2 - packet.sprite_giftag = m_frame_data.sprite_2d_giftag2; - acc += vf12_rotated * xy0_vf19.x(); - - // lq.xyzw vf06, 988(vi00) | maddy.xyzw vf19, vf13, vf19 - Vector4f st0_vf06 = m_frame_data.st_array[0]; - xy0_vf19 = acc + vf13_rotated_trans * xy0_vf19.y(); - - // lq.xyzw vf07, 989(vi00) | mulaw.xyzw ACC, vf10, vf00 - Vector4f st1_vf07 = m_frame_data.st_array[1]; - acc = offset_pos_vf10; - - // lq.xyzw vf08, 990(vi00) | maddax.xyzw ACC, vf12, vf20 - Vector4f st2_vf08 = m_frame_data.st_array[2]; - acc += vf12_rotated * xy1_vf20.x(); - - // lq.xyzw vf09, 991(vi00) | maddy.xyzw vf20, vf13, vf20 - Vector4f st3_vf09 = m_frame_data.st_array[3]; - xy1_vf20 = acc + vf13_rotated_trans * xy1_vf20.y(); - - // sq.xyzw vf06, 1(vi05) | mulaw.xyzw ACC, vf10, vf00 - // NINTH is st0 - packet.st0 = st0_vf06; - acc = offset_pos_vf10; - - // sq.xyzw vf07, 3(vi05) | maddax.xyzw ACC, vf12, vf21 - // ELEVEN is st1 - packet.st1 = st1_vf07; - acc += vf12_rotated * xy2_vf21.x(); - - // sq.xyzw vf08, 5(vi05) | maddy.xyzw vf21, vf13, vf21 - // THIRTEEN is st2 - packet.st2 = st2_vf08; - xy2_vf21 = acc + vf13_rotated_trans * xy2_vf21.y(); - - // sq.xyzw vf09, 7(vi05) | mulaw.xyzw ACC, vf10, vf00 - // FIFTEEN is st3 - packet.st3 = st3_vf09; - acc = offset_pos_vf10; - - // nop | maddax.xyzw ACC, vf12, vf22 - acc += vf12_rotated * xy3_vf22.x(); - - // nop | maddy.xyzw vf22, vf13, vf22 - xy3_vf22 = acc + vf13_rotated_trans * xy3_vf22.y(); - - // lq.xyzw vf12, 1020(vi00) | ftoi4.xyzw vf19, vf19 - // (pipeline) - auto xy0_vf19_int = (xy0_vf19 * 16.f).cast(); - - // lq.xyzw vf14, 1001(vi00) | ftoi4.xyzw vf20, vf20 - // (pipeline) - auto xy1_vf20_int = (xy1_vf20 * 16.f).cast(); - - // move.xyzw vf05, vf24 | ftoi4.xyzw vf21, vf21 - // (pipeline) - auto xy2_vf21_int = (xy2_vf21 * 16.f).cast(); - - // move.xyzw vf01, vf23 | ftoi4.xyzw vf22, vf22 - // (pipeline) - auto xy3_vf22_int = (xy3_vf22 * 16.f).cast(); - - if (m_extra_debug) { - u32 zi = xy3_vf22_int.z() >> 4; - ImGui::Text("z (int): 0x%08x %s", zi, zi >= (1 << 24) ? "bad" : ""); - ImGui::Text("z (flt): %f", (double)(((u32)zi) << 8) / UINT32_MAX); - } - - // sq.xyzw vf19, 2(vi05) | mulz.z vf04, vf24, vf24 (pipeline) - // TENTH is xy0int - packet.xy0 = xy0_vf19_int; - // sq.xyzw vf20, 4(vi05) | clipw.xyz vf03, vf03 (pipeline) - // TWELVE is xy1int - packet.xy1 = xy1_vf20_int; - // sq.xyzw vf21, 6(vi05) | nop - // FOURTEEN is xy2int - packet.xy2 = xy2_vf21_int; - // sq.xyzw vf22, 8(vi05) | nop - // SIXTEEN is xy3int - packet.xy3 = xy3_vf22_int; - - m_sprite_renderer.render_gif((const u8*)&packet, sizeof(packet), render_state, prof); - if (m_extra_debug) { - imgui_vec(vf12_rotated, "vf12", 2); - imgui_vec(vf13_rotated_trans, "vf13", 2); - ImGui::Separator(); - } - - // xgkick vi15 | nop - // iaddi vi04, vi04, -0x1 | nop - // iaddiu vi01, vi00, 0x672 | nop - // ibne vi00, vi04, L8 | nop - // isub vi15, vi01, vi15 | adda.xyzw ACC, vf11, vf11 - // nop | nop :e - // nop | nop - // L9: - // iaddi vi04, vi04, -0x1 | nop - // iaddi vi02, vi02, -0x3 | nop - // ibne vi00, vi04, L7 | nop - // nop | nop - // nop | nop :e - // nop | nop - // L10: - // iaddi vi04, vi04, -0x1 | nop - // iaddi vi03, vi03, 0x4 | nop - // ibne vi00, vi04, L7 | nop - // nop | nop - // nop | nop :e - // nop | nop - } - - if (m_extra_debug) { - ImGui::End(); } } -std::array sprite_quat_to_rot(float qi, float qj, float qk) { - std::array result; - float qr = std::sqrt(std::abs(1.f - (qi * qi + qj * qj + qk * qk))); - // fmt::print("q: {} {} {} {}\n", qi, qj, qk, qr); - result[0][0] = 1.f - 2.f * (qj * qj + qk * qk); - result[1][0] = 2.f * (qi * qj - qk * qr); - result[2][0] = 2.f * (qi * qk + qj * qr); - result[0][1] = 2.f * (qi * qj + qk * qr); - result[1][1] = 1.f - 2.f * (qi * qi + qk * qk); - result[2][1] = 2.f * (qj * qk - qi * qr); - result[0][2] = 2.f * (qi * qk - qj * qr); - result[1][2] = 2.f * (qj * qk + qi * qr); - result[2][2] = 1.f - 2.f * (qi * qi + qj * qj); - return result; +void SpriteRenderer::handle_alpha(u64 val, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + GsAlpha reg(val); + + m_adgif_state.from_register(reg); } -Vector4f sprite_transform2(const Vector4f& root, - const Vector4f& off, - const Matrix4f& cam, - const std::array& sprite_rot, - float sx, - float sy, - const Vector4f& hvdf_off, - float pfog0, - float fog_min, - float fog_max) { - Vector4f pos = root; - // fmt::print("root : {}\n", root.to_string_aligned()); - // fmt::print("off : {} s {} {}\n", off.to_string_aligned(), sx, sy); - - math::Vector3f offset = - sprite_rot[0] * off.x() * sx + sprite_rot[1] * off.y() + sprite_rot[2] * off.z() * sy; - // fmt::print("off (r): {}\n", offset.to_string_aligned()); - - pos.x() += offset.x(); - pos.y() += offset.y(); - pos.z() += offset.z(); - Vector4f transformed_pos = matrix_transform(cam, pos); - float Q = pfog0 / transformed_pos.w(); - transformed_pos.x() *= Q; - transformed_pos.y() *= Q; - transformed_pos.z() *= Q; - Vector4f offset_pos = transformed_pos + hvdf_off; - offset_pos.w() = std::max(offset_pos.w(), fog_max); - offset_pos.w() = std::min(offset_pos.w(), fog_min); - - return offset_pos; +void SpriteRenderer::update_gl_prim(SharedRenderState* render_state) { + // currently gouraud is handled in setup. + const auto& state = m_prim_gl_state; + if (state.fogging_enable) { + // assert(false); + } + if (state.aa_enable) { + assert(false); + } + if (state.use_uv) { + assert(false); + } + if (state.ctxt) { + assert(false); + } + if (state.fix) { + assert(false); + } } -void SpriteRenderer::do_3d_block_cpu(u32 count, +void SpriteRenderer::update_gl_texture(SharedRenderState* render_state, int unit) { + TextureRecord* tex = nullptr; + auto& state = m_adgif_state_stack[unit]; + if (!state.used) { + // nothing used this state, don't bother binding the texture. + return; + } + if (state.using_mt4hh) { + tex = render_state->texture_pool->lookup_mt4hh(state.texture_base_ptr); + } else { + tex = render_state->texture_pool->lookup(state.texture_base_ptr); + } + + if (!tex) { + // TODO Add back + fmt::print("Failed to find texture at {}, using random\n", state.texture_base_ptr); + tex = render_state->texture_pool->get_random_texture(); + if (tex) { + // fmt::print("Successful texture lookup! {} {}\n", tex->page_name, tex->name); + } + } + assert(tex); + + // first: do we need to load the texture? + if (!tex->on_gpu) { + render_state->texture_pool->upload_to_gpu(tex); + } + + glActiveTexture(GL_TEXTURE20 + unit); + glBindTexture(GL_TEXTURE_2D, tex->gpu_texture); + // Note: CLAMP and CLAMP_TO_EDGE are different... + if (state.clamp_s) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + } else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + } + + if (state.clamp_t) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + } + + if (state.enable_tex_filt) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + } + + state.used = false; +} + +void SpriteRenderer::do_block_common(SpriteMode mode, + u32 count, SharedRenderState* render_state, ScopedProfilerNode& prof) { - Matrix4f camera_matrix = m_3d_matrix_data.camera; // vf25, vf26, vf27, vf28 for (u32 sprite_idx = 0; sprite_idx < count; sprite_idx++) { - SpriteHud2DPacket packet; - memset(&packet, 0, sizeof(packet)); - // ilw.y vi08, 1(vi02) | nop vi08 = matrix - u32 offset_selector = m_vec_data_2d[sprite_idx].matrix(); - // assert(offset_selector == 0 || offset_selector == 1); - // moved this out of the loop. - // lq.xyzw vf25, 900(vi00) | nop vf25 = cam_mat - // lq.xyzw vf26, 901(vi00) | nop - // lq.xyzw vf27, 902(vi00) | nop - // lq.xyzw vf28, 903(vi00) | nop - // lq.xyzw vf30, 904(vi00) | nop vf30 = hvdf_offset - // vf30 - Vector4f hvdf_offset = m_3d_matrix_data.hvdf_offset; - - // lqi.xyzw vf01, vi02 | nop - Vector4f pos_vf01 = m_vec_data_2d[sprite_idx].xyz_sx; - // lqi.xyzw vf05, vi02 | nop - Vector4f flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; - // lqi.xyzw vf11, vi02 | nop - Vector4f color_vf11 = m_vec_data_2d[sprite_idx].rgba; - - // multiplications from the right column - Vector4f transformed_pos_vf02 = matrix_transform(camera_matrix, pos_vf01); - - Vector4f scales_vf01 = pos_vf01; // now used for something else. - // lq.xyzw vf12, 1020(vi00) | mulaw.xyzw ACC, vf28, vf00 - // vf12 is fog consts - Vector4f fog_consts_vf12(m_frame_data.fog_min, m_frame_data.fog_max, m_frame_data.max_scale, - m_frame_data.bonus); - // ilw.y vi08, 1(vi02) | maddax.xyzw ACC, vf25, vf01 - // load offset selector for the next round. - // nop | madday.xyzw ACC, vf26, vf01 - // nop | maddz.xyzw vf02, vf27, vf01 - - // move.w vf05, vf00 | addw.z vf01, vf00, vf05 - // scales_vf01.z = sy - scales_vf01.z() = flags_vf05.w(); // start building the scale vector - flags_vf05.w() = 1.f; // what are we building in flags right now?? - - // nop | nop - // div Q, vf31.x, vf02.w | muly.z vf05, vf05, vf31 - float Q = m_frame_data.pfog0 / transformed_pos_vf02.w(); - flags_vf05.z() *= m_frame_data.deg_to_rad; - // nop | mul.xyzw vf03, vf02, vf29 - Vector4f scaled_pos_vf03 = transformed_pos_vf02.elementwise_multiply(m_frame_data.hmge_scale); - // nop | nop - // nop | nop - // nop | mulz.z vf04, vf05, vf05 (ts) - // fmt::print("rot is {} degrees\n", flags_vf05.z() * 360.0 / (2.0 * M_PI)); - - // the load is for rotation stuff, - // lq.xyzw vf14, 1001(vi00) | clipw.xyz vf03, vf03 (used for fcand) - // iaddi vi06, vi00, 0x1 | adda.xyzw ACC, vf11, vf11 (used for fmand) - - // upcoming fcand with 0x3f, that checks all of them. - bool fcand_result = clip_xyz_plus_minus(scaled_pos_vf03); - bool fmand_result = color_vf11.w() == 0; // (really w+w, but I don't think it matters?) - - // L8: - // xgkick double buffer setup - // ior vi05, vi15, vi00 | mul.zw vf01, vf01, Q - scales_vf01.z() *= Q; // sy - scales_vf01.w() *= Q; // sx - - // lq.xyzw vf06, 998(vi00) | mulz.xyzw vf15, vf05, vf04 (ts) - auto adgif_vf06 = m_frame_data.adgif_giftag; - - // lq.xyzw vf14, 1002(vi00) ts| mula.xyzw ACC, vf05, vf14 (ts) - - // fmand vi01, vi06 | mul.xyz vf02, vf02, Q - transformed_pos_vf02.x() *= Q; - transformed_pos_vf02.y() *= Q; - transformed_pos_vf02.z() *= Q; - - // ibne vi00, vi01, L10 | addz.x vf01, vf00, vf01 - scales_vf01.x() = scales_vf01.z(); // = sy - if (fmand_result) { - continue; // reject! + if (m_sprite_offset == SPRITE_RENDERER_MAX_SPRITES) { + flush_sprites(render_state, prof); } - // lqi.xyzw vf07, vi03 | mulz.xyzw vf16, vf15, vf04 (ts) - // vf07 is first use adgif + auto& adgif = m_adgif[sprite_idx]; + // fmt::print("adgif: {:X} {:X} {:X} {:X}\n", adgif.tex0_data, adgif.tex1_data, + // adgif.clamp_data, adgif.alpha_data); fmt::print("adgif regs: {} {} {} {} {}\n", + // register_address_name(adgif.tex0_addr), register_address_name(adgif.tex1_addr), + // register_address_name(adgif.mip_addr), register_address_name(adgif.clamp_addr), + // register_address_name(adgif.alpha_addr)); + handle_tex0(adgif.tex0_data, render_state, prof); + handle_tex1(adgif.tex1_data, render_state, prof); + // handle_mip(adgif.mip_data, render_state, prof); + if (GsRegisterAddress(adgif.clamp_addr) == GsRegisterAddress::ZBUF_1) { + handle_zbuf(adgif.clamp_data, render_state, prof); + } else { + handle_clamp(adgif.clamp_data, render_state, prof); + } + handle_alpha(adgif.alpha_data, render_state, prof); - // lq.xyzw vf14, 1003(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) - - // lqi.xyzw vf08, vi03 | add.xyzw vf10, vf02, vf30 - // vf08 is second user adgif - Vector4f offset_pos_vf10 = transformed_pos_vf02 + hvdf_offset; - // if (m_extra_debug) { - // ImGui::Text("sel %d", offset_selector); - // //ImGui::Text("hvdf off z: %f tf/w z: %f", hvdf_offset.z(), transformed_pos_vf02.z()); - // imgui_vec(hvdf_offset, "hvdf"); - // imgui_vec(transformed_pos_vf02, "tf'd"); - // } - - // lqi.xyzw vf09, vi03 | mulw.x vf01, vf01, vf01 - // vf09 is third user adgif - scales_vf01.x() *= scales_vf01.w(); // x = sx * sy - - // sqi.xyzw vf06, vi05 | mulz.xyzw vf15, vf16, vf04 (ts) - // FIRST ADGIF IS adgif_vf06 - packet.adgif_giftag = adgif_vf06; - - // just do all 5 now. - packet.user_adgif = m_adgif[sprite_idx]; - - offset_pos_vf10.w() = std::max(offset_pos_vf10.w(), m_frame_data.fog_max); - - scales_vf01.z() = std::max(scales_vf01.z(), m_frame_data.min_scale); - scales_vf01.w() = std::max(scales_vf01.w(), m_frame_data.min_scale); - - scales_vf01.x() *= m_frame_data.inv_area; // x = sx * sy * inv_area (area ratio) - - offset_pos_vf10.w() = std::min(offset_pos_vf10.w(), m_frame_data.fog_min); - - scales_vf01.z() = std::min(scales_vf01.z(), fog_consts_vf12.z()); - scales_vf01.w() = std::min(scales_vf01.w(), fog_consts_vf12.z()); - bool use_first_giftag = offset_selector == 0; - - auto flag_vi07 = m_vec_data_2d[sprite_idx].flag(); - - scales_vf01.x() = std::min(scales_vf01.x(), 1.f); - - transformed_pos_vf02.w() = offset_pos_vf10.w() - fog_consts_vf12.y(); - - color_vf11.w() *= scales_vf01.x(); // is this right? doesn't this stall?? - - // ibne vi00, vi09, L6 | nop - if (transformed_pos_vf02.w() != 0) { - use_first_giftag = false; + if (!m_adgif_state_stack[m_adgif_index].used) { + m_adgif_state_stack[m_adgif_index] = m_adgif_state; + m_adgif_state_stack[m_adgif_index].used = true; + } else if (m_adgif_state != m_adgif_state_stack[m_adgif_index]) { + if (m_adgif_index + 1 == ADGIF_STATE_COUNT || + !m_adgif_state.nontexture_equal(m_adgif_state_stack[m_adgif_index])) { + flush_sprites(render_state, prof); + } else { + m_adgif_index++; + } + m_adgif_state_stack[m_adgif_index] = m_adgif_state; + m_adgif_state_stack[m_adgif_index].used = true; } - flag_vi07 = 0; // todo hack - Vector4f* xy_array = m_frame_data.xyz_array + flag_vi07; - math::Vector color_integer_vf11 = color_vf11.cast(); + int vert_idx = 6 * m_sprite_offset; - packet.color = color_integer_vf11; + auto& vert1 = m_vertices_3d.at(vert_idx + 0); - if (fcand_result) { - continue; // reject (could move earlier) - } + vert1.xyz_sx = m_vec_data_2d[sprite_idx].xyz_sx; + vert1.quat_sy = m_vec_data_2d[sprite_idx].flag_rot_sy; + vert1.rgba = m_vec_data_2d[sprite_idx].rgba / 255; + vert1.flags_matrix[0] = m_vec_data_2d[sprite_idx].flag(); + vert1.flags_matrix[1] = m_vec_data_2d[sprite_idx].matrix(); + vert1.info[0] = m_adgif_index; + vert1.info[1] = m_adgif_state_stack[m_adgif_index].tcc; + vert1.info[2] = 0; + vert1.info[3] = mode; - Vector4f transformed[4]; + m_vertices_3d.at(vert_idx + 1) = vert1; + m_vertices_3d.at(vert_idx + 2) = vert1; + m_vertices_3d.at(vert_idx + 3) = vert1; + m_vertices_3d.at(vert_idx + 4) = vert1; + m_vertices_3d.at(vert_idx + 5) = vert1; - flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; - // do rot - auto rot = sprite_quat_to_rot(flags_vf05.x(), flags_vf05.y(), flags_vf05.z()); - // fmt::print("root: {}\n", offset_pos_vf10.to_string_aligned()); + m_vertices_3d.at(vert_idx + 1).info[2] = 1; + m_vertices_3d.at(vert_idx + 2).info[2] = 2; + m_vertices_3d.at(vert_idx + 3).info[2] = 2; + m_vertices_3d.at(vert_idx + 4).info[2] = 3; + m_vertices_3d.at(vert_idx + 5).info[2] = 0; - // for (int i = 0; i < 3; i++) { - // fmt::print("M{}: {}\n", i, rot[i].to_string_aligned()); - // } - for (int i = 0; i < 4; i++) { - transformed[i] = - sprite_transform2(m_vec_data_2d[sprite_idx].xyz_sx, xy_array[i], camera_matrix, rot, - m_vec_data_2d[sprite_idx].sx(), m_vec_data_2d[sprite_idx].sy(), - m_3d_matrix_data.hvdf_offset, m_frame_data.pfog0, m_frame_data.fog_min, - m_frame_data.fog_max); - } - Vector4f xy0_vf19 = transformed[0]; - Vector4f xy1_vf20 = transformed[1]; - Vector4f xy2_vf21 = transformed[2]; - Vector4f xy3_vf22 = transformed[3]; - - packet.sprite_giftag = - use_first_giftag ? m_frame_data.sprite_2d_giftag : m_frame_data.sprite_2d_giftag2; - - Vector4f st0_vf06 = m_frame_data.st_array[0]; - Vector4f st1_vf07 = m_frame_data.st_array[1]; - Vector4f st2_vf08 = m_frame_data.st_array[2]; - Vector4f st3_vf09 = m_frame_data.st_array[3]; - - packet.st0 = st0_vf06; - packet.st1 = st1_vf07; - packet.st2 = st2_vf08; - packet.st3 = st3_vf09; - - auto xy0_vf19_int = (xy0_vf19 * 16.f).cast(); - auto xy1_vf20_int = (xy1_vf20 * 16.f).cast(); - auto xy2_vf21_int = (xy2_vf21 * 16.f).cast(); - auto xy3_vf22_int = (xy3_vf22 * 16.f).cast(); - - packet.xy0 = xy0_vf19_int; - packet.xy1 = xy1_vf20_int; - packet.xy2 = xy2_vf21_int; - packet.xy3 = xy3_vf22_int; - - m_sprite_renderer.render_gif((const u8*)&packet, sizeof(packet), render_state, prof); - } -} - -void SpriteRenderer::do_2d_group0_block_cpu(u32 count, - SharedRenderState* render_state, - ScopedProfilerNode& prof) { - if (m_extra_debug) { - ImGui::Begin("Sprite Extra Debug 2d_0"); - } - - Matrix4f camera_matrix = m_3d_matrix_data.camera; // vf25, vf26, vf27, vf28 - for (u32 sprite_idx = 0; sprite_idx < count; sprite_idx++) { - if (m_extra_debug) { - ImGui::Text("Sprite: %d", sprite_idx); - } - SpriteHud2DPacket packet; - memset(&packet, 0, sizeof(packet)); - // ilw.y vi08, 1(vi02) | nop vi08 = matrix - u32 offset_selector = m_vec_data_2d[sprite_idx].matrix(); - // assert(offset_selector == 0 || offset_selector == 1); - // moved this out of the loop. - // lq.xyzw vf25, 900(vi00) | nop vf25 = cam_mat - // lq.xyzw vf26, 901(vi00) | nop - // lq.xyzw vf27, 902(vi00) | nop - // lq.xyzw vf28, 903(vi00) | nop - // lq.xyzw vf30, 904(vi00) | nop vf30 = hvdf_offset - // vf30 - Vector4f hvdf_offset = m_3d_matrix_data.hvdf_offset; - - // lqi.xyzw vf01, vi02 | nop - Vector4f pos_vf01 = m_vec_data_2d[sprite_idx].xyz_sx; - if (m_extra_debug) { - imgui_vec(pos_vf01, "POS", 2); - } - // lqi.xyzw vf05, vi02 | nop - Vector4f flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; - // lqi.xyzw vf11, vi02 | nop - Vector4f color_vf11 = m_vec_data_2d[sprite_idx].rgba; - - // multiplications from the right column - Vector4f transformed_pos_vf02 = matrix_transform(camera_matrix, pos_vf01); - - Vector4f scales_vf01 = pos_vf01; // now used for something else. - // lq.xyzw vf12, 1020(vi00) | mulaw.xyzw ACC, vf28, vf00 - // vf12 is fog consts - Vector4f fog_consts_vf12(m_frame_data.fog_min, m_frame_data.fog_max, m_frame_data.max_scale, - m_frame_data.bonus); - // ilw.y vi08, 1(vi02) | maddax.xyzw ACC, vf25, vf01 - // load offset selector for the next round. - // nop | madday.xyzw ACC, vf26, vf01 - // nop | maddz.xyzw vf02, vf27, vf01 - - // move.w vf05, vf00 | addw.z vf01, vf00, vf05 - // scales_vf01.z = sy - scales_vf01.z() = flags_vf05.w(); // start building the scale vector - flags_vf05.w() = 1.f; // what are we building in flags right now?? - - // nop | nop - // div Q, vf31.x, vf02.w | muly.z vf05, vf05, vf31 - float Q = m_frame_data.pfog0 / transformed_pos_vf02.w(); - flags_vf05.z() *= m_frame_data.deg_to_rad; - // nop | mul.xyzw vf03, vf02, vf29 - Vector4f scaled_pos_vf03 = transformed_pos_vf02.elementwise_multiply(m_frame_data.hmge_scale); - // nop | nop - // nop | nop - // nop | mulz.z vf04, vf05, vf05 (ts) - // fmt::print("rot is {} degrees\n", flags_vf05.z() * 360.0 / (2.0 * M_PI)); - - // the load is for rotation stuff, - // lq.xyzw vf14, 1001(vi00) | clipw.xyz vf03, vf03 (used for fcand) - // iaddi vi06, vi00, 0x1 | adda.xyzw ACC, vf11, vf11 (used for fmand) - - // upcoming fcand with 0x3f, that checks all of them. - bool fcand_result = clip_xyz_plus_minus(scaled_pos_vf03); - bool fmand_result = color_vf11.w() == 0; // (really w+w, but I don't think it matters?) - - // L8: - // xgkick double buffer setup - // ior vi05, vi15, vi00 | mul.zw vf01, vf01, Q - scales_vf01.z() *= Q; // sy - scales_vf01.w() *= Q; // sx - - // lq.xyzw vf06, 998(vi00) | mulz.xyzw vf15, vf05, vf04 (ts) - auto adgif_vf06 = m_frame_data.adgif_giftag; - - // lq.xyzw vf14, 1002(vi00) ts| mula.xyzw ACC, vf05, vf14 (ts) - - // fmand vi01, vi06 | mul.xyz vf02, vf02, Q - transformed_pos_vf02.x() *= Q; - transformed_pos_vf02.y() *= Q; - transformed_pos_vf02.z() *= Q; - - // if (m_extra_debug) { - // imgui_vec(transformed_pos_vf02, "scaled xf"); - // } - - // ibne vi00, vi01, L10 | addz.x vf01, vf00, vf01 - scales_vf01.x() = scales_vf01.z(); // = sy - if (fmand_result) { - if (m_extra_debug) { - ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fmand (1) reject"); - ImGui::Separator(); - } - continue; // reject! - } - - // lqi.xyzw vf07, vi03 | mulz.xyzw vf16, vf15, vf04 (ts) - // vf07 is first use adgif - - // lq.xyzw vf14, 1003(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) - - // lqi.xyzw vf08, vi03 | add.xyzw vf10, vf02, vf30 - // vf08 is second user adgif - Vector4f offset_pos_vf10 = transformed_pos_vf02 + hvdf_offset; - // if (m_extra_debug) { - // ImGui::Text("sel %d", offset_selector); - // //ImGui::Text("hvdf off z: %f tf/w z: %f", hvdf_offset.z(), transformed_pos_vf02.z()); - // imgui_vec(hvdf_offset, "hvdf"); - // imgui_vec(transformed_pos_vf02, "tf'd"); - // } - - // lqi.xyzw vf09, vi03 | mulw.x vf01, vf01, vf01 - // vf09 is third user adgif - scales_vf01.x() *= scales_vf01.w(); // x = sx * sy - - // sqi.xyzw vf06, vi05 | mulz.xyzw vf15, vf16, vf04 (ts) - // FIRST ADGIF IS adgif_vf06 - packet.adgif_giftag = adgif_vf06; - - // lq.xyzw vf14, 1004(vi00) | madda.xyzw ACC, vf16, vf14 (ts both) - - // sqi.xyzw vf07, vi05 | maxx.w vf10, vf10, vf12 - // SECOND ADGIF is first user - // just do all 5 now. - packet.user_adgif = m_adgif[sprite_idx]; - - offset_pos_vf10.w() = std::max(offset_pos_vf10.w(), m_frame_data.fog_max); - - // sqi.xyzw vf08, vi05 | maxz.zw vf01, vf01, vf31 - // THIRD ADGIF is second user - scales_vf01.z() = std::max(scales_vf01.z(), m_frame_data.min_scale); - scales_vf01.w() = std::max(scales_vf01.w(), m_frame_data.min_scale); - - // sqi.xyzw vf09, vi05 | mulz.xyzw vf16, vf15, vf04 (ts) - // FOURTH ADGIF is third user - - // lq.xyzw vf14, 1005(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) - - // lqi.xyzw vf06, vi03 | mulw.x vf01, vf01, vf31 - // vf06 is fourth user adgif - scales_vf01.x() *= m_frame_data.inv_area; // x = sx * sy * inv_area (area ratio) - - // lqi.xyzw vf07, vi03 | miniy.w vf10, vf10, vf12 - // vf07 is fifth user adgif - offset_pos_vf10.w() = std::min(offset_pos_vf10.w(), m_frame_data.fog_min); - - // lq.xyzw vf08, 999(vi00) | miniz.zw vf01, vf01, vf12 - // vf08 is 2d giftag 1 (NOTE THIS IS DIFFERENT FROM 2d 1)!!!!! - scales_vf01.z() = std::min(scales_vf01.z(), fog_consts_vf12.z()); - scales_vf01.w() = std::min(scales_vf01.w(), fog_consts_vf12.z()); - bool use_first_giftag = offset_selector == 0; - - // ilw.x vi07, -2(vi02) | madd.xyzw vf05, vf16, vf14 - auto flag_vi07 = m_vec_data_2d[sprite_idx].flag(); - Vector4f vf05_sincos(0, 0, std::sin(flags_vf05.z()), std::cos(flags_vf05.z())); - - // lqi.xyzw vf23, vi02 | miniw.x vf01, vf01, vf00 - // pipeline - scales_vf01.x() = std::min(scales_vf01.x(), 1.f); - - // nop | suby.w vf02, vf10, vf12 (unique) - transformed_pos_vf02.w() = offset_pos_vf10.w() - fog_consts_vf12.y(); - - // lqi.xyzw vf24, vi02 | mulx.w vf11, vf11, vf01 - // pipeline - color_vf11.w() *= scales_vf01.x(); // is this right? doesn't this stall?? - - // fcand vi01, 0x3f | mulaw.xyzw ACC, vf28, vf00 - // already computed pipeline - - // lq.xyzw vf17, 1006(vi00) | maddax.xyzw ACC, vf25, vf23 (pipeline) - Vector4f basis_x_vf17 = m_frame_data.basis_x; - - // fmand vi09, vi06 | nop - // ibne vi00, vi09, L6 | nop - if (transformed_pos_vf02.w() != 0) { - if (m_extra_debug) { - ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fmand (2) trick"); - } - use_first_giftag = false; - } - - // lq.xyzw vf18, 1007(vi00) | madday.xyzw ACC, vf26, vf23 (pipeline) - Vector4f basis_y_vf18 = m_frame_data.basis_y; - - // assert(flag_vi07 == 0); - Vector4f* xy_array = m_frame_data.xy_array + flag_vi07; - // lq.xyzw vf19, 980(vi07) | ftoi0.xyzw vf11, vf11 - Vector4f xy0_vf19 = xy_array[0]; - math::Vector color_integer_vf11 = color_vf11.cast(); - - // lq.xyzw vf20, 981(vi07) | maddz.xyzw vf02, vf27, vf23 (pipeline) - Vector4f xy1_vf20 = xy_array[1]; - - // lq.xyzw vf21, 982(vi07) | mulaw.xyzw ACC, vf17, vf05 - Vector4f xy2_vf21 = xy_array[2]; - Vector4f acc = basis_x_vf17 * vf05_sincos.w(); - - // lq.xyzw vf22, 983(vi07) | msubz.xyzw vf12, vf18, vf05 - Vector4f xy3_vf22 = xy_array[3]; - Vector4f vf12_rotated = acc - (basis_y_vf18 * vf05_sincos.z()); - // sq.xyzw vf11, 3(vi05) | mulaz.xyzw ACC, vf17, vf05 - // EIGHTH is color integer - packet.color = color_integer_vf11; - - acc = basis_x_vf17 * vf05_sincos.z(); - - // lqi.xyzw vf11, vi02 | maddw.xyzw vf13, vf18, vf05 - // (pipeline) - Vector4f vf13_rotated_trans = acc + basis_y_vf18 * vf05_sincos.w(); - - // move.w vf24, vf00 | addw.z vf23, vf00, vf24 (pipeline both) - - // div Q, vf31.x, vf02.w | mulw.xyzw vf12, vf12, vf01 - // (pipeline) - vf12_rotated *= scales_vf01.w(); - - // ibne vi00, vi01, L9 | muly.z vf24, vf24, vf31 (pipeline) - if (fcand_result) { - if (m_extra_debug) { - ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fcand reject"); - ImGui::Separator(); - } - continue; // reject (could move earlier) - } - - // ilw.y vi08, 1(vi02) | mulz.xyzw vf13, vf13, vf01 - // (pipeline) - vf13_rotated_trans *= scales_vf01.z(); - - // LEFT OFF HERE! - - // sqi.xyzw vf06, vi05 | mul.xyzw vf03, vf02, vf29 - // FIFTH is fourth user - - // sqi.xyzw vf07, vi05 | mulaw.xyzw ACC, vf10, vf00 - // SIXTH is fifth user - acc = offset_pos_vf10; - - // sqi.xyzw vf08, vi05 | maddax.xyzw ACC, vf12, vf19 - // SEVENTH is giftag2 - packet.sprite_giftag = - use_first_giftag ? m_frame_data.sprite_2d_giftag : m_frame_data.sprite_2d_giftag2; - acc += vf12_rotated * xy0_vf19.x(); - - // lq.xyzw vf06, 988(vi00) | maddy.xyzw vf19, vf13, vf19 - Vector4f st0_vf06 = m_frame_data.st_array[0]; - xy0_vf19 = acc + vf13_rotated_trans * xy0_vf19.y(); - - // lq.xyzw vf07, 989(vi00) | mulaw.xyzw ACC, vf10, vf00 - Vector4f st1_vf07 = m_frame_data.st_array[1]; - acc = offset_pos_vf10; - - // lq.xyzw vf08, 990(vi00) | maddax.xyzw ACC, vf12, vf20 - Vector4f st2_vf08 = m_frame_data.st_array[2]; - acc += vf12_rotated * xy1_vf20.x(); - - // lq.xyzw vf09, 991(vi00) | maddy.xyzw vf20, vf13, vf20 - Vector4f st3_vf09 = m_frame_data.st_array[3]; - xy1_vf20 = acc + vf13_rotated_trans * xy1_vf20.y(); - - // sq.xyzw vf06, 1(vi05) | mulaw.xyzw ACC, vf10, vf00 - // NINTH is st0 - packet.st0 = st0_vf06; - acc = offset_pos_vf10; - - // sq.xyzw vf07, 3(vi05) | maddax.xyzw ACC, vf12, vf21 - // ELEVEN is st1 - packet.st1 = st1_vf07; - acc += vf12_rotated * xy2_vf21.x(); - - // sq.xyzw vf08, 5(vi05) | maddy.xyzw vf21, vf13, vf21 - // THIRTEEN is st2 - packet.st2 = st2_vf08; - xy2_vf21 = acc + vf13_rotated_trans * xy2_vf21.y(); - - // sq.xyzw vf09, 7(vi05) | mulaw.xyzw ACC, vf10, vf00 - // FIFTEEN is st3 - packet.st3 = st3_vf09; - acc = offset_pos_vf10; - - // nop | maddax.xyzw ACC, vf12, vf22 - acc += vf12_rotated * xy3_vf22.x(); - - // nop | maddy.xyzw vf22, vf13, vf22 - xy3_vf22 = acc + vf13_rotated_trans * xy3_vf22.y(); - - // lq.xyzw vf12, 1020(vi00) | ftoi4.xyzw vf19, vf19 - // (pipeline) - auto xy0_vf19_int = (xy0_vf19 * 16.f).cast(); - - // lq.xyzw vf14, 1001(vi00) | ftoi4.xyzw vf20, vf20 - // (pipeline) - auto xy1_vf20_int = (xy1_vf20 * 16.f).cast(); - - // move.xyzw vf05, vf24 | ftoi4.xyzw vf21, vf21 - // (pipeline) - auto xy2_vf21_int = (xy2_vf21 * 16.f).cast(); - - // move.xyzw vf01, vf23 | ftoi4.xyzw vf22, vf22 - // (pipeline) - auto xy3_vf22_int = (xy3_vf22 * 16.f).cast(); - - if (m_extra_debug) { - u32 zi = xy3_vf22_int.z() >> 4; - ImGui::Text("z (int): 0x%08x %s", zi, zi >= (1 << 24) ? "bad" : ""); - ImGui::Text("z (flt): %f", (double)(((u32)zi) << 8) / UINT32_MAX); - } - - // sq.xyzw vf19, 2(vi05) | mulz.z vf04, vf24, vf24 (pipeline) - // TENTH is xy0int - packet.xy0 = xy0_vf19_int; - // sq.xyzw vf20, 4(vi05) | clipw.xyz vf03, vf03 (pipeline) - // TWELVE is xy1int - packet.xy1 = xy1_vf20_int; - // sq.xyzw vf21, 6(vi05) | nop - // FOURTEEN is xy2int - packet.xy2 = xy2_vf21_int; - // sq.xyzw vf22, 8(vi05) | nop - // SIXTEEN is xy3int - packet.xy3 = xy3_vf22_int; - - m_sprite_renderer.render_gif((const u8*)&packet, sizeof(packet), render_state, prof); - if (m_extra_debug) { - imgui_vec(vf12_rotated, "vf12", 2); - imgui_vec(vf13_rotated_trans, "vf13", 2); - ImGui::Separator(); - } - - // xgkick vi15 | nop - // iaddi vi04, vi04, -0x1 | nop - // iaddiu vi01, vi00, 0x672 | nop - // ibne vi00, vi04, L8 | nop - // isub vi15, vi01, vi15 | adda.xyzw ACC, vf11, vf11 - // nop | nop :e - // nop | nop - // L9: - // iaddi vi04, vi04, -0x1 | nop - // iaddi vi02, vi02, -0x3 | nop - // ibne vi00, vi04, L7 | nop - // nop | nop - // nop | nop :e - // nop | nop - // L10: - // iaddi vi04, vi04, -0x1 | nop - // iaddi vi03, vi03, 0x4 | nop - // ibne vi00, vi04, L7 | nop - // nop | nop - // nop | nop :e - // nop | nop - } - - if (m_extra_debug) { - ImGui::End(); + ++m_sprite_offset; } } diff --git a/game/graphics/opengl_renderer/SpriteRenderer.h b/game/graphics/opengl_renderer/SpriteRenderer.h index ac221b1b3e..4362db66e7 100644 --- a/game/graphics/opengl_renderer/SpriteRenderer.h +++ b/game/graphics/opengl_renderer/SpriteRenderer.h @@ -89,6 +89,7 @@ struct SpriteVecData2d { // scale y. float sy() const { return flag_rot_sy.w(); } }; +static_assert(sizeof(SpriteVecData2d) == 48); /*! * The layout of VU1 data memory, in quadword addresses @@ -163,9 +164,22 @@ class SpriteRenderer : public BucketRenderer { void render_2d_group1(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof); - void do_2d_group1_block_cpu(u32 count, SharedRenderState* render_state, ScopedProfilerNode& prof); - void do_2d_group0_block_cpu(u32 count, SharedRenderState* render_state, ScopedProfilerNode& prof); - void do_3d_block_cpu(u32 count, SharedRenderState* render_state, ScopedProfilerNode& prof); + enum SpriteMode { Mode2D = 1, ModeHUD = 2, Mode3D = 3 }; + void do_block_common(SpriteMode mode, + u32 count, + SharedRenderState* render_state, + ScopedProfilerNode& prof); + + void handle_tex0(u64 val, SharedRenderState* render_state, ScopedProfilerNode& prof); + void handle_tex1(u64 val, SharedRenderState* render_state, ScopedProfilerNode& prof); + // void handle_mip(u64 val, SharedRenderState* render_state, ScopedProfilerNode& prof); + void handle_zbuf(u64 val, SharedRenderState* render_state, ScopedProfilerNode& prof); + void handle_clamp(u64 val, SharedRenderState* render_state, ScopedProfilerNode& prof); + void handle_alpha(u64 val, SharedRenderState* render_state, ScopedProfilerNode& prof); + + void update_gl_prim(SharedRenderState* render_state); + void update_gl_texture(SharedRenderState* render_state, int unit); + void flush_sprites(SharedRenderState* render_state, ScopedProfilerNode& prof); u8 m_sprite_distorter_setup[7 * 16]; // direct data u8 m_sprite_direct_setup[3 * 16]; @@ -189,6 +203,99 @@ class SpriteRenderer : public BucketRenderer { bool m_2d_enable = true; bool m_3d_enable = true; - DirectRenderer m_sprite_renderer; - DirectRenderer m_direct_renderer; + struct SpriteVertex3D { + math::Vector4f xyz_sx; // position + x scale + math::Vector4f quat_sy; // quaternion + y scale + math::Vector4f rgba; // color + math::Vector flags_matrix; // flags + matrix... split + math::Vector info; + math::Vector pad; + }; + static_assert(sizeof(SpriteVertex3D) == 64); + + std::vector m_vertices_3d; + + struct { + GLuint vertex_buffer; + GLuint vao; + } m_ogl; + + int m_sprite_offset = 0; + + // state set through the prim register that requires changing GL stuff. + struct PrimGlState { + void from_register(GsPrim reg) { + current_register = reg; + gouraud_enable = reg.gouraud(); + texture_enable = reg.tme(); + fogging_enable = reg.fge(); + aa_enable = reg.aa1(); + use_uv = reg.fst(); + ctxt = reg.ctxt(); + fix = reg.fix(); + alpha_blend_enable = reg.abe(); + } + + GsPrim current_register; + bool gouraud_enable = false; + bool texture_enable = false; + bool fogging_enable = false; + bool alpha_blend_enable = false; + + bool aa_enable = false; + bool use_uv = false; // todo: might not require a gl state change + bool ctxt = false; // do they ever use ctxt2? + bool fix = false; // what does this even do? + } m_prim_gl_state; + + static constexpr int ADGIF_STATE_COUNT = 10; + + struct AdGifState { + GsTex0 reg_tex0; + u32 texture_base_ptr = 0; + bool using_mt4hh = false; + bool tcc = false; + + bool enable_tex_filt = false; + + u64 reg_clamp = 0b101; + bool clamp_s = true; + bool clamp_t = true; + + GsAlpha reg_alpha; + GsAlpha::BlendMode a = GsAlpha::BlendMode::SOURCE; + GsAlpha::BlendMode b = GsAlpha::BlendMode::DEST; + GsAlpha::BlendMode c = GsAlpha::BlendMode::SOURCE; + GsAlpha::BlendMode d = GsAlpha::BlendMode::DEST; + u8 fix = 0; + void from_register(GsAlpha reg) { + reg_alpha = reg; + a = reg.a_mode(); + b = reg.b_mode(); + c = reg.c_mode(); + d = reg.d_mode(); + fix = reg.fix(); + + assert(fix == 0); + } + bool z_write = false; + + bool used = false; + + bool nontexture_equal(const AdGifState& other) const { + return reg_alpha == other.reg_alpha && z_write == other.z_write; + } + + bool operator==(const AdGifState& other) const { + return reg_tex0 == other.reg_tex0 && enable_tex_filt == other.enable_tex_filt && + reg_clamp == other.reg_clamp && nontexture_equal(other); + } + bool operator!=(const AdGifState& other) const { return !operator==(other); } + } m_adgif_state_stack[ADGIF_STATE_COUNT]; + + AdGifState m_adgif_state; // temp state + + int m_adgif_index = 0; + + void update_gl_blend(AdGifState& state); }; diff --git a/game/graphics/opengl_renderer/shaders/sky_blend.frag b/game/graphics/opengl_renderer/shaders/sky_blend.frag index 052f234441..9254ebaa90 100644 --- a/game/graphics/opengl_renderer/shaders/sky_blend.frag +++ b/game/graphics/opengl_renderer/shaders/sky_blend.frag @@ -3,7 +3,7 @@ layout(location = 0) out vec4 color; in vec3 tex_coord; -uniform sampler2D tex_T0; +layout (binding = 0) uniform sampler2D tex_T0; void main() { vec4 T0 = texture(tex_T0, tex_coord.xy); diff --git a/game/graphics/opengl_renderer/shaders/sprite_cpu.frag b/game/graphics/opengl_renderer/shaders/sprite_3d.frag similarity index 92% rename from game/graphics/opengl_renderer/shaders/sprite_cpu.frag rename to game/graphics/opengl_renderer/shaders/sprite_3d.frag index 18154c039c..e94244a1c7 100644 --- a/game/graphics/opengl_renderer/shaders/sprite_cpu.frag +++ b/game/graphics/opengl_renderer/shaders/sprite_3d.frag @@ -2,9 +2,8 @@ out vec4 color; -in vec4 fragment_color; -in vec2 tex_coord; - +in flat vec4 fragment_color; +in vec3 tex_coord; in flat uvec2 tex_info; layout (binding = 20) uniform sampler2D tex_T0; @@ -35,7 +34,7 @@ vec4 sample_tex(vec2 coord, uint unit) { } void main() { - vec4 T0 = sample_tex(tex_coord, tex_info.x); + vec4 T0 = sample_tex(tex_coord.xy, tex_info.x); if (tex_info.y == 0) { T0.w = 1.0; } diff --git a/game/graphics/opengl_renderer/shaders/sprite_3d.vert b/game/graphics/opengl_renderer/shaders/sprite_3d.vert new file mode 100644 index 0000000000..9b4a561747 --- /dev/null +++ b/game/graphics/opengl_renderer/shaders/sprite_3d.vert @@ -0,0 +1,185 @@ +#version 430 core + +layout (location = 0) in vec4 xyz_sx; +layout (location = 1) in vec4 quat_sy; +layout (location = 2) in vec4 rgba; +layout (location = 3) in uvec2 flags_matrix; +layout (location = 4) in uvec4 tex_info_in; + +uniform vec4 hvdf_offset; +uniform mat4 camera; +uniform mat4 hud_matrix; +uniform vec4 hud_hvdf_offset; +uniform vec4 hud_hvdf_user[75]; +uniform float pfog0; +uniform float fog_min; +uniform float fog_max; +uniform float min_scale; +uniform float max_scale; +uniform float bonus; +uniform float deg_to_rad; +uniform float inv_area; +uniform vec4 basis_x; +uniform vec4 basis_y; +uniform vec4 hmge_scale; +uniform vec4 xy_array[8]; +uniform vec4 xyz_array[4]; +uniform vec4 st_array[4]; + +out flat vec4 fragment_color; +out vec3 tex_coord; +out flat uvec2 tex_info; + +vec4 matrix_transform(mat4 mtx, vec3 pt) { + return mtx[3] + + mtx[0] * pt.x + + mtx[1] * pt.y + + mtx[2] * pt.z; +} + +mat3 sprite_quat_to_rot(vec3 quat) { + mat3 result; + float qr = sqrt(abs(1.0 - (quat.x * quat.x + quat.y * quat.y + quat.z * quat.z))); + result[0][0] = 1.0 - 2.0 * (quat.y * quat.y + quat.z * quat.z); + result[1][0] = 2.0 * (quat.x * quat.y - quat.z * qr); + result[2][0] = 2.0 * (quat.x * quat.z + quat.y * qr); + result[0][1] = 2.0 * (quat.x * quat.y + quat.z * qr); + result[1][1] = 1.0 - 2.0 * (quat.x * quat.x + quat.z * quat.z); + result[2][1] = 2.0 * (quat.y * quat.z - quat.x * qr); + result[0][2] = 2.0 * (quat.x * quat.z - quat.y * qr); + result[1][2] = 2.0 * (quat.y * quat.z + quat.x * qr); + result[2][2] = 1.0 - 2.0 * (quat.x * quat.x + quat.y * quat.y); + return result; +} + +vec4 sprite_transform2(vec3 root, vec4 off, mat3 sprite_rot, float sx, float sy) { + vec3 pos = root; + + vec3 offset = sprite_rot[0] * off.x * sx + sprite_rot[1] * off.y + sprite_rot[2] * off.z * sy; + + pos += offset; + vec4 transformed_pos = -matrix_transform(camera, pos); + float Q = pfog0 / transformed_pos.w; + transformed_pos.xyz *= Q; + transformed_pos.xyz += hvdf_offset.xyz; + // transformed_pos.w = max(transformed_pos.w, fog_max); + // transformed_pos.w = min(transformed_pos.w, fog_min); + + return transformed_pos; +} + +void main() { + +// STEP 1: UNPACK DATA AND CREATE READABLE VARIABLES + + vec3 position = xyz_sx.xyz; + float sx = xyz_sx.w; + float sy = quat_sy.w; + fragment_color = rgba; + uint vert_id = tex_info_in.z; + uint rendermode = tex_info_in.w; // 2D, HUD, 3D + vec3 quat = quat_sy.xyz; + uint matrix = flags_matrix.y; + + vec4 transformed; + +// STEP 2: perspective transform for distance + vec4 transformed_pos_vf02 = matrix_transform(rendermode == 2 ? hud_matrix : camera, position); + float Q = pfog0 / transformed_pos_vf02.w; + + +// STEP 3: fade out sprite! + vec4 scales_vf01 = xyz_sx; // now used for something else. + scales_vf01.z = sy; // start building the scale vector + scales_vf01.zw *= Q; // sy sx + scales_vf01.x = scales_vf01.z; // = sy + scales_vf01.x *= scales_vf01.w; // x = sx * sy + scales_vf01.x *= inv_area; // x = sx * sy * inv_area (area ratio) + fragment_color.w *= min(scales_vf01.x, 1.0); // is this right? doesn't this stall?? + + + // STEP 4: actual vertex transformation + if (rendermode == 3) { // 3D sprites + + mat3 rot = sprite_quat_to_rot(quat); + transformed = sprite_transform2(position, xyz_array[vert_id], rot, sx, sy); + + } else if (rendermode == 1) { // 2D sprites + + transformed_pos_vf02.xyz *= Q; + vec4 offset_pos_vf10 = transformed_pos_vf02 + hvdf_offset; + offset_pos_vf10.w = max(offset_pos_vf10.w, fog_max); + offset_pos_vf10.w = min(offset_pos_vf10.w, fog_min); + /* transformed_pos_vf02.w = offset_pos_vf10.w - fog_max; + int fge = matrix == 0; + if (transformed_pos_vf02.w != 0) { + fge = false; + } */ + + scales_vf01.z = min(max(scales_vf01.z, min_scale), max_scale); + scales_vf01.w = min(max(scales_vf01.w, min_scale), max_scale); + + quat.z *= deg_to_rad; + float sp_sin = sin(quat.z); + float sp_cos = cos(quat.z); + + vec4 xy0_vf19 = xy_array[vert_id + flags_matrix.x]; + vec4 vf12_rotated = (basis_x * sp_cos) - (basis_y * sp_sin); + vec4 vf13_rotated_trans = (basis_x * sp_sin) + (basis_y * sp_cos); + + vf12_rotated *= scales_vf01.w; + vf13_rotated_trans *= scales_vf01.z; + + transformed = offset_pos_vf10 + vf12_rotated * xy0_vf19.x + vf13_rotated_trans * xy0_vf19.y; + + } else if (rendermode == 2) { // hud sprites + + transformed_pos_vf02.xyz *= Q; + vec4 offset_pos_vf10 = transformed_pos_vf02 + (matrix == 0 ? hud_hvdf_offset : hud_hvdf_user[matrix - 1]); + offset_pos_vf10.w = max(offset_pos_vf10.w, fog_max); + offset_pos_vf10.w = min(offset_pos_vf10.w, fog_min); + + scales_vf01.z = min(max(scales_vf01.z, min_scale), max_scale); + scales_vf01.w = min(max(scales_vf01.w, min_scale), max_scale); + + quat.z *= deg_to_rad; + float sp_sin = sin(quat.z); + float sp_cos = cos(quat.z); + + vec4 xy0_vf19 = xy_array[vert_id]; + vec4 vf12_rotated = (basis_x * sp_cos) - (basis_y * sp_sin); + vec4 vf13_rotated_trans = (basis_x * sp_sin) + (basis_y * sp_cos); + + vf12_rotated *= scales_vf01.w; + vf13_rotated_trans *= scales_vf01.z; + + transformed = offset_pos_vf10 + vf12_rotated * xy0_vf19.x + vf13_rotated_trans * xy0_vf19.y; + + } + + tex_coord = st_array[vert_id].xyz; + + +// STEP 5: final adjustments + // correct xy offset + transformed.xy -= (2048.); + + // correct z scale + transformed.z /= (8388608); + transformed.z -= 1; + + // correct xy scale + transformed.x /= (256); + transformed.y /= -(128); + + // hack + transformed.xyz *= transformed.w; + + gl_Position = transformed; + // scissoring area adjust + gl_Position.y *= 512.0/448.0; + + fragment_color.w *= 2; + + tex_info = tex_info_in.xy; +} diff --git a/game/graphics/opengl_renderer/shaders/sprite_cpu.vert b/game/graphics/opengl_renderer/shaders/sprite_cpu.vert deleted file mode 100644 index b5d4de95c4..0000000000 --- a/game/graphics/opengl_renderer/shaders/sprite_cpu.vert +++ /dev/null @@ -1,21 +0,0 @@ -#version 430 core - -layout (location = 0) in vec3 position_in; -layout (location = 1) in vec4 rgba_in; -layout (location = 2) in vec2 tex_coord_in; - -out vec4 fragment_color; -out vec2 tex_coord; - -// putting all texture info stuff here so it's easier to copy-paste -layout (location = 3) in uvec2 tex_info_in; -out flat uvec2 tex_info; - -void main() { - gl_Position = vec4((position_in.x - 0.5) * 16., -(position_in.y - 0.5) * 32, position_in.z * 2 - 1., 1.0); - // scissoring area adjust - gl_Position.y *= 512.0/448.0; - fragment_color = vec4(rgba_in.x, rgba_in.y, rgba_in.z, rgba_in.w * 2.); - tex_coord = tex_coord_in; - tex_info = tex_info_in; -} diff --git a/game/graphics/opengl_renderer/shaders/tfrag3.vert b/game/graphics/opengl_renderer/shaders/tfrag3.vert index 33c4e88dd9..05543b5b1d 100644 --- a/game/graphics/opengl_renderer/shaders/tfrag3.vert +++ b/game/graphics/opengl_renderer/shaders/tfrag3.vert @@ -51,8 +51,7 @@ void main() { transformed.xy -= (2048.); // correct z scale - transformed.z /= (16777216); - transformed.z *= 2; + transformed.z /= (8388608); transformed.z -= 1; // correct xy scale diff --git a/game/graphics/opengl_renderer/tfrag/Tfrag3.cpp b/game/graphics/opengl_renderer/tfrag/Tfrag3.cpp index 8a1dccf539..2c8465191f 100644 --- a/game/graphics/opengl_renderer/tfrag/Tfrag3.cpp +++ b/game/graphics/opengl_renderer/tfrag/Tfrag3.cpp @@ -265,7 +265,8 @@ void Tfrag3::render_tree(const TfragRenderSettings& settings, int idx_buffer_ptr = make_index_list_from_vis_string( m_cache.draw_idx_temp.data(), tree.index_list.data(), *tree.draws, m_cache.vis_temp); - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32), tree.index_list.data()); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_ptr * sizeof(u32), tree.index_list.data(), + GL_STREAM_DRAW); for (size_t draw_idx = 0; draw_idx < tree.draws->size(); draw_idx++) { const auto& draw = tree.draws->operator[](draw_idx); diff --git a/game/graphics/opengl_renderer/tfrag/Tie3.cpp b/game/graphics/opengl_renderer/tfrag/Tie3.cpp index 4e49b99c2b..0d42d88a04 100644 --- a/game/graphics/opengl_renderer/tfrag/Tie3.cpp +++ b/game/graphics/opengl_renderer/tfrag/Tie3.cpp @@ -639,7 +639,8 @@ void Tie3::render_tree(int idx, } Timer draw_timer; - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32), tree.index_list.data()); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_ptr * sizeof(u32), tree.index_list.data(), + GL_STREAM_DRAW); for (size_t draw_idx = 0; draw_idx < tree.draws->size(); draw_idx++) { const auto& draw = tree.draws->operator[](draw_idx); diff --git a/game/graphics/texture/TexturePool.cpp b/game/graphics/texture/TexturePool.cpp index 613b87a945..9672b3be88 100644 --- a/game/graphics/texture/TexturePool.cpp +++ b/game/graphics/texture/TexturePool.cpp @@ -298,7 +298,7 @@ void TexturePool::set_texture(u32 location, std::shared_ptr recor if (m_textures.at(location).normal_texture) { if (record->do_gc && m_textures.at(location).normal_texture != record) { m_garbage_textures.push_back(std::move(m_textures[location].normal_texture)); - fmt::print("replace add to garbage list {}\n", m_garbage_textures.back()->name); + // fmt::print("replace add to garbage list {}\n", m_garbage_textures.back()->name); } } m_textures[location].normal_texture = std::move(record); diff --git a/goal_src/engine/draw/drawable.gc b/goal_src/engine/draw/drawable.gc index 949bb0e992..8b443ccebc 100644 --- a/goal_src/engine/draw/drawable.gc +++ b/goal_src/engine/draw/drawable.gc @@ -123,7 +123,7 @@ (defun real-main-draw-hook () (when *slow-frame-rate* - (dotimes (v1-2 #xc35000) ;; changed from ps2 for... obvious reasons. + (dotimes (v1-2 #xc350000) ;; changed from ps2 for... obvious reasons. (nop!) (nop!) (nop!)