3D: /* SpriteHud2DPacket packet; memset(&packet, 0, sizeof(packet)); // ilw.y vi08, 1(vi02) | nop vi08 = matrix u32 offset_selector = m_vec_data_2d[sprite_idx].matrix(); // assert(offset_selector == 0 || offset_selector == 1); // moved this out of the loop. // lq.xyzw vf25, 900(vi00) | nop vf25 = cam_mat // lq.xyzw vf26, 901(vi00) | nop // lq.xyzw vf27, 902(vi00) | nop // lq.xyzw vf28, 903(vi00) | nop // lq.xyzw vf30, 904(vi00) | nop vf30 = hvdf_offset // vf30 // lqi.xyzw vf01, vi02 | nop Vector4f pos_vf01 = m_vec_data_2d[sprite_idx].xyz_sx; // lqi.xyzw vf05, vi02 | nop Vector4f flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; // lqi.xyzw vf11, vi02 | nop Vector4f color_vf11 = m_vec_data_2d[sprite_idx].rgba; // multiplications from the right column transformed_pos_vf02 = matrix_transform(camera_matrix, pos_vf01); scales_vf01 = pos_vf01; // now used for something else. // lq.xyzw vf12, 1020(vi00) | mulaw.xyzw ACC, vf28, vf00 // vf12 is fog consts fog_consts_vf12 = Vector4f(m_frame_data.fog_min, m_frame_data.fog_max, m_frame_data.max_scale, m_frame_data.bonus); // ilw.y vi08, 1(vi02) | maddax.xyzw ACC, vf25, vf01 // load offset selector for the next round. // nop | madday.xyzw ACC, vf26, vf01 // nop | maddz.xyzw vf02, vf27, vf01 // move.w vf05, vf00 | addw.z vf01, vf00, vf05 // scales_vf01.z = sy scales_vf01.z() = flags_vf05.w(); // start building the scale vector flags_vf05.w() = 1.f; // what are we building in flags right now?? // nop | nop // div Q, vf31.x, vf02.w | muly.z vf05, vf05, vf31 Q = m_frame_data.pfog0 / transformed_pos_vf02.w(); flags_vf05.z() *= m_frame_data.deg_to_rad; // nop | mul.xyzw vf03, vf02, vf29 Vector4f scaled_pos_vf03 = transformed_pos_vf02.elementwise_multiply(m_frame_data.hmge_scale); // nop | nop // nop | nop // nop | mulz.z vf04, vf05, vf05 (ts) // fmt::print("rot is {} degrees\n", flags_vf05.z() * 360.0 / (2.0 * M_PI)); // the load is for rotation stuff, // lq.xyzw vf14, 1001(vi00) | clipw.xyz vf03, vf03 (used for fcand) // iaddi vi06, vi00, 0x1 | adda.xyzw ACC, vf11, vf11 (used for fmand) // upcoming fcand with 0x3f, that checks all of them. bool fcand_result = clip_xyz_plus_minus(scaled_pos_vf03); bool fmand_result = color_vf11.w() == 0; // (really w+w, but I don't think it matters?) // L8: // xgkick double buffer setup // ior vi05, vi15, vi00 | mul.zw vf01, vf01, Q scales_vf01.z() *= Q; // sy scales_vf01.w() *= Q; // sx // lq.xyzw vf06, 998(vi00) | mulz.xyzw vf15, vf05, vf04 (ts) auto adgif_vf06 = m_frame_data.adgif_giftag; // lq.xyzw vf14, 1002(vi00) ts| mula.xyzw ACC, vf05, vf14 (ts) // fmand vi01, vi06 | mul.xyz vf02, vf02, Q transformed_pos_vf02.x() *= Q; transformed_pos_vf02.y() *= Q; transformed_pos_vf02.z() *= Q; // ibne vi00, vi01, L10 | addz.x vf01, vf00, vf01 scales_vf01.x() = scales_vf01.z(); // = sy if (fmand_result) { continue; // reject! } // lqi.xyzw vf07, vi03 | mulz.xyzw vf16, vf15, vf04 (ts) // vf07 is first use adgif // lq.xyzw vf14, 1003(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) // lqi.xyzw vf08, vi03 | add.xyzw vf10, vf02, vf30 // vf08 is second user adgif offset_pos_vf10 = transformed_pos_vf02 + hvdf_offset; // if (m_extra_debug) { // ImGui::Text("sel %d", offset_selector); // //ImGui::Text("hvdf off z: %f tf/w z: %f", hvdf_offset.z(), transformed_pos_vf02.z()); // imgui_vec(hvdf_offset, "hvdf"); // imgui_vec(transformed_pos_vf02, "tf'd"); // } // lqi.xyzw vf09, vi03 | mulw.x vf01, vf01, vf01 // vf09 is third user adgif scales_vf01.x() *= scales_vf01.w(); // x = sx * sy // sqi.xyzw vf06, vi05 | mulz.xyzw vf15, vf16, vf04 (ts) // FIRST ADGIF IS adgif_vf06 packet.adgif_giftag = adgif_vf06; // just do all 5 now. packet.user_adgif = m_adgif[sprite_idx]; offset_pos_vf10.w() = std::max(offset_pos_vf10.w(), m_frame_data.fog_max); scales_vf01.z() = std::max(scales_vf01.z(), m_frame_data.min_scale); scales_vf01.w() = std::max(scales_vf01.w(), m_frame_data.min_scale); scales_vf01.x() *= m_frame_data.inv_area; // x = sx * sy * inv_area (area ratio) offset_pos_vf10.w() = std::min(offset_pos_vf10.w(), m_frame_data.fog_min); scales_vf01.z() = std::min(scales_vf01.z(), fog_consts_vf12.z()); scales_vf01.w() = std::min(scales_vf01.w(), fog_consts_vf12.z()); bool use_first_giftag = offset_selector == 0; auto flag_vi07 = m_vec_data_2d[sprite_idx].flag(); scales_vf01.x() = std::min(scales_vf01.x(), 1.f); transformed_pos_vf02.w() = offset_pos_vf10.w() - fog_consts_vf12.y(); color_vf11.w() *= scales_vf01.x(); // is this right? doesn't this stall?? // ibne vi00, vi09, L6 | nop if (transformed_pos_vf02.w() != 0) { use_first_giftag = false; } flag_vi07 = 0; // todo hack Vector4f* xy_array = m_frame_data.xyz_array + flag_vi07; math::Vector color_integer_vf11 = color_vf11.cast(); packet.color = color_integer_vf11; if (fcand_result) { continue; // reject (could move earlier) } Vector4f transformed[4]; flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; // do rot rot = sprite_quat_to_rot(flags_vf05.x(), flags_vf05.y(), flags_vf05.z()); // fmt::print("root: {}\n", offset_pos_vf10.to_string_aligned()); // for (int i = 0; i < 3; i++) { // fmt::print("M{}: {}\n", i, rot[i].to_string_aligned()); // } for (int i = 0; i < 4; i++) { transformed[i] = sprite_transform2(m_vec_data_2d[sprite_idx].xyz_sx, xy_array[i], camera_matrix, rot, m_vec_data_2d[sprite_idx].sx(), m_vec_data_2d[sprite_idx].sy(), m_3d_matrix_data.hvdf_offset, m_frame_data.pfog0, m_frame_data.fog_min, m_frame_data.fog_max); } Vector4f xy0_vf19 = transformed[0]; Vector4f xy1_vf20 = transformed[1]; Vector4f xy2_vf21 = transformed[2]; Vector4f xy3_vf22 = transformed[3]; packet.sprite_giftag = use_first_giftag ? m_frame_data.sprite_2d_giftag : m_frame_data.sprite_2d_giftag2; Vector4f st0_vf06 = m_frame_data.st_array[0]; Vector4f st1_vf07 = m_frame_data.st_array[1]; Vector4f st2_vf08 = m_frame_data.st_array[2]; Vector4f st3_vf09 = m_frame_data.st_array[3]; packet.st0 = st0_vf06; packet.st1 = st1_vf07; packet.st2 = st2_vf08; packet.st3 = st3_vf09; auto xy0_vf19_int = (xy0_vf19).cast(); auto xy1_vf20_int = (xy1_vf20).cast(); auto xy2_vf21_int = (xy2_vf21).cast(); auto xy3_vf22_int = (xy3_vf22).cast(); packet.xy0 = xy0_vf19_int; packet.xy1 = xy1_vf20_int; packet.xy2 = xy2_vf21_int; packet.xy3 = xy3_vf22_int; // m_sprite_renderer.render_gif((const u8*)&packet, sizeof(packet), render_state, prof); */ 2d: /* if (m_extra_debug) { ImGui::Text("Sprite: %d", sprite_idx); } SpriteHud2DPacket packet; memset(&packet, 0, sizeof(packet)); // ilw.y vi08, 1(vi02) | nop vi08 = matrix u32 offset_selector = m_vec_data_2d[sprite_idx].matrix(); // assert(offset_selector == 0 || offset_selector == 1); // moved this out of the loop. // lq.xyzw vf25, 900(vi00) | nop vf25 = cam_mat // lq.xyzw vf26, 901(vi00) | nop // lq.xyzw vf27, 902(vi00) | nop // lq.xyzw vf28, 903(vi00) | nop // lq.xyzw vf30, 904(vi00) | nop vf30 = hvdf_offset // vf30 Vector4f hvdf_offset = m_3d_matrix_data.hvdf_offset; glUniform4f( glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "hvdf_offset"), hvdf_offset[0], hvdf_offset[1], hvdf_offset[2], hvdf_offset[3]); glUniform1f( glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "fog_constant"), m_frame_data.pfog0); // lqi.xyzw vf01, vi02 | nop Vector4f pos_vf01 = m_vec_data_2d[sprite_idx].xyz_sx; if (m_extra_debug) { imgui_vec(pos_vf01, "POS", 2); } // lqi.xyzw vf05, vi02 | nop Vector4f flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; // lqi.xyzw vf11, vi02 | nop Vector4f color_vf11 = m_vec_data_2d[sprite_idx].rgba; // multiplications from the right column Vector4f transformed_pos_vf02 = matrix_transform(camera_matrix, pos_vf01); Vector4f scales_vf01 = pos_vf01; // now used for something else. // lq.xyzw vf12, 1020(vi00) | mulaw.xyzw ACC, vf28, vf00 // vf12 is fog consts Vector4f fog_consts_vf12(m_frame_data.fog_min, m_frame_data.fog_max, m_frame_data.max_scale, m_frame_data.bonus); // ilw.y vi08, 1(vi02) | maddax.xyzw ACC, vf25, vf01 // load offset selector for the next round. // nop | madday.xyzw ACC, vf26, vf01 // nop | maddz.xyzw vf02, vf27, vf01 // move.w vf05, vf00 | addw.z vf01, vf00, vf05 // scales_vf01.z = sy scales_vf01.z() = flags_vf05.w(); // start building the scale vector flags_vf05.w() = 1.f; // what are we building in flags right now?? // nop | nop // div Q, vf31.x, vf02.w | muly.z vf05, vf05, vf31 float Q = m_frame_data.pfog0 / transformed_pos_vf02.w(); flags_vf05.z() *= m_frame_data.deg_to_rad; // nop | mul.xyzw vf03, vf02, vf29 Vector4f scaled_pos_vf03 = transformed_pos_vf02.elementwise_multiply(m_frame_data.hmge_scale); // nop | nop // nop | nop // nop | mulz.z vf04, vf05, vf05 (ts) // fmt::print("rot is {} degrees\n", flags_vf05.z() * 360.0 / (2.0 * M_PI)); // the load is for rotation stuff, // lq.xyzw vf14, 1001(vi00) | clipw.xyz vf03, vf03 (used for fcand) // iaddi vi06, vi00, 0x1 | adda.xyzw ACC, vf11, vf11 (used for fmand) // upcoming fcand with 0x3f, that checks all of them. bool fcand_result = clip_xyz_plus_minus(scaled_pos_vf03); bool fmand_result = color_vf11.w() == 0; // (really w+w, but I don't think it matters?) // L8: // xgkick double buffer setup // ior vi05, vi15, vi00 | mul.zw vf01, vf01, Q scales_vf01.z() *= Q; // sy scales_vf01.w() *= Q; // sx // lq.xyzw vf06, 998(vi00) | mulz.xyzw vf15, vf05, vf04 (ts) auto adgif_vf06 = m_frame_data.adgif_giftag; // lq.xyzw vf14, 1002(vi00) ts| mula.xyzw ACC, vf05, vf14 (ts) // fmand vi01, vi06 | mul.xyz vf02, vf02, Q transformed_pos_vf02.x() *= Q; transformed_pos_vf02.y() *= Q; transformed_pos_vf02.z() *= Q; // if (m_extra_debug) { // imgui_vec(transformed_pos_vf02, "scaled xf"); // } // ibne vi00, vi01, L10 | addz.x vf01, vf00, vf01 scales_vf01.x() = scales_vf01.z(); // = sy if (fmand_result) { if (m_extra_debug) { ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fmand (1) reject"); ImGui::Separator(); } continue; // reject! } // lqi.xyzw vf07, vi03 | mulz.xyzw vf16, vf15, vf04 (ts) // vf07 is first use adgif // lq.xyzw vf14, 1003(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) // lqi.xyzw vf08, vi03 | add.xyzw vf10, vf02, vf30 // vf08 is second user adgif Vector4f offset_pos_vf10 = transformed_pos_vf02 + hvdf_offset; // if (m_extra_debug) { // ImGui::Text("sel %d", offset_selector); // //ImGui::Text("hvdf off z: %f tf/w z: %f", hvdf_offset.z(), transformed_pos_vf02.z()); // imgui_vec(hvdf_offset, "hvdf"); // imgui_vec(transformed_pos_vf02, "tf'd"); // } // lqi.xyzw vf09, vi03 | mulw.x vf01, vf01, vf01 // vf09 is third user adgif scales_vf01.x() *= scales_vf01.w(); // x = sx * sy // sqi.xyzw vf06, vi05 | mulz.xyzw vf15, vf16, vf04 (ts) // FIRST ADGIF IS adgif_vf06 packet.adgif_giftag = adgif_vf06; // lq.xyzw vf14, 1004(vi00) | madda.xyzw ACC, vf16, vf14 (ts both) // sqi.xyzw vf07, vi05 | maxx.w vf10, vf10, vf12 // SECOND ADGIF is first user // just do all 5 now. packet.user_adgif = m_adgif[sprite_idx]; offset_pos_vf10.w() = std::max(offset_pos_vf10.w(), m_frame_data.fog_max); // sqi.xyzw vf08, vi05 | maxz.zw vf01, vf01, vf31 // THIRD ADGIF is second user scales_vf01.z() = std::max(scales_vf01.z(), m_frame_data.min_scale); scales_vf01.w() = std::max(scales_vf01.w(), m_frame_data.min_scale); // sqi.xyzw vf09, vi05 | mulz.xyzw vf16, vf15, vf04 (ts) // FOURTH ADGIF is third user // lq.xyzw vf14, 1005(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) // lqi.xyzw vf06, vi03 | mulw.x vf01, vf01, vf31 // vf06 is fourth user adgif scales_vf01.x() *= m_frame_data.inv_area; // x = sx * sy * inv_area (area ratio) // lqi.xyzw vf07, vi03 | miniy.w vf10, vf10, vf12 // vf07 is fifth user adgif offset_pos_vf10.w() = std::min(offset_pos_vf10.w(), m_frame_data.fog_min); // lq.xyzw vf08, 999(vi00) | miniz.zw vf01, vf01, vf12 // vf08 is 2d giftag 1 (NOTE THIS IS DIFFERENT FROM 2d 1)!!!!! scales_vf01.z() = std::min(scales_vf01.z(), fog_consts_vf12.z()); scales_vf01.w() = std::min(scales_vf01.w(), fog_consts_vf12.z()); bool use_first_giftag = offset_selector == 0; // ilw.x vi07, -2(vi02) | madd.xyzw vf05, vf16, vf14 auto flag_vi07 = m_vec_data_2d[sprite_idx].flag(); Vector4f vf05_sincos(0, 0, std::sin(flags_vf05.z()), std::cos(flags_vf05.z())); // lqi.xyzw vf23, vi02 | miniw.x vf01, vf01, vf00 // pipeline scales_vf01.x() = std::min(scales_vf01.x(), 1.f); // nop | suby.w vf02, vf10, vf12 (unique) transformed_pos_vf02.w() = offset_pos_vf10.w() - fog_consts_vf12.y(); // lqi.xyzw vf24, vi02 | mulx.w vf11, vf11, vf01 // pipeline color_vf11.w() *= scales_vf01.x(); // is this right? doesn't this stall?? // fcand vi01, 0x3f | mulaw.xyzw ACC, vf28, vf00 // already computed pipeline // lq.xyzw vf17, 1006(vi00) | maddax.xyzw ACC, vf25, vf23 (pipeline) Vector4f basis_x_vf17 = m_frame_data.basis_x; // fmand vi09, vi06 | nop // ibne vi00, vi09, L6 | nop if (transformed_pos_vf02.w() != 0) { if (m_extra_debug) { ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fmand (2) trick"); } use_first_giftag = false; } // lq.xyzw vf18, 1007(vi00) | madday.xyzw ACC, vf26, vf23 (pipeline) Vector4f basis_y_vf18 = m_frame_data.basis_y; // assert(flag_vi07 == 0); Vector4f* xy_array = m_frame_data.xy_array + flag_vi07; // lq.xyzw vf19, 980(vi07) | ftoi0.xyzw vf11, vf11 Vector4f xy0_vf19 = xy_array[0]; math::Vector color_integer_vf11 = color_vf11.cast(); // lq.xyzw vf20, 981(vi07) | maddz.xyzw vf02, vf27, vf23 (pipeline) Vector4f xy1_vf20 = xy_array[1]; // lq.xyzw vf21, 982(vi07) | mulaw.xyzw ACC, vf17, vf05 Vector4f xy2_vf21 = xy_array[2]; Vector4f acc = basis_x_vf17 * vf05_sincos.w(); // lq.xyzw vf22, 983(vi07) | msubz.xyzw vf12, vf18, vf05 Vector4f xy3_vf22 = xy_array[3]; Vector4f vf12_rotated = acc - (basis_y_vf18 * vf05_sincos.z()); // sq.xyzw vf11, 3(vi05) | mulaz.xyzw ACC, vf17, vf05 // EIGHTH is color integer packet.color = color_integer_vf11; acc = basis_x_vf17 * vf05_sincos.z(); // lqi.xyzw vf11, vi02 | maddw.xyzw vf13, vf18, vf05 // (pipeline) Vector4f vf13_rotated_trans = acc + basis_y_vf18 * vf05_sincos.w(); // move.w vf24, vf00 | addw.z vf23, vf00, vf24 (pipeline both) // div Q, vf31.x, vf02.w | mulw.xyzw vf12, vf12, vf01 // (pipeline) vf12_rotated *= scales_vf01.w(); // ibne vi00, vi01, L9 | muly.z vf24, vf24, vf31 (pipeline) if (fcand_result) { if (m_extra_debug) { ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fcand reject"); ImGui::Separator(); } continue; // reject (could move earlier) } // ilw.y vi08, 1(vi02) | mulz.xyzw vf13, vf13, vf01 // (pipeline) vf13_rotated_trans *= scales_vf01.z(); // LEFT OFF HERE! // sqi.xyzw vf06, vi05 | mul.xyzw vf03, vf02, vf29 // FIFTH is fourth user // sqi.xyzw vf07, vi05 | mulaw.xyzw ACC, vf10, vf00 // SIXTH is fifth user acc = offset_pos_vf10; // sqi.xyzw vf08, vi05 | maddax.xyzw ACC, vf12, vf19 // SEVENTH is giftag2 packet.sprite_giftag = use_first_giftag ? m_frame_data.sprite_2d_giftag : m_frame_data.sprite_2d_giftag2; acc += vf12_rotated * xy0_vf19.x(); // lq.xyzw vf06, 988(vi00) | maddy.xyzw vf19, vf13, vf19 Vector4f st0_vf06 = m_frame_data.st_array[0]; xy0_vf19 = acc + vf13_rotated_trans * xy0_vf19.y(); // lq.xyzw vf07, 989(vi00) | mulaw.xyzw ACC, vf10, vf00 Vector4f st1_vf07 = m_frame_data.st_array[1]; acc = offset_pos_vf10; // lq.xyzw vf08, 990(vi00) | maddax.xyzw ACC, vf12, vf20 Vector4f st2_vf08 = m_frame_data.st_array[2]; acc += vf12_rotated * xy1_vf20.x(); // lq.xyzw vf09, 991(vi00) | maddy.xyzw vf20, vf13, vf20 Vector4f st3_vf09 = m_frame_data.st_array[3]; xy1_vf20 = acc + vf13_rotated_trans * xy1_vf20.y(); // sq.xyzw vf06, 1(vi05) | mulaw.xyzw ACC, vf10, vf00 // NINTH is st0 packet.st0 = st0_vf06; acc = offset_pos_vf10; // sq.xyzw vf07, 3(vi05) | maddax.xyzw ACC, vf12, vf21 // ELEVEN is st1 packet.st1 = st1_vf07; acc += vf12_rotated * xy2_vf21.x(); // sq.xyzw vf08, 5(vi05) | maddy.xyzw vf21, vf13, vf21 // THIRTEEN is st2 packet.st2 = st2_vf08; xy2_vf21 = acc + vf13_rotated_trans * xy2_vf21.y(); // sq.xyzw vf09, 7(vi05) | mulaw.xyzw ACC, vf10, vf00 // FIFTEEN is st3 packet.st3 = st3_vf09; acc = offset_pos_vf10; // nop | maddax.xyzw ACC, vf12, vf22 acc += vf12_rotated * xy3_vf22.x(); // nop | maddy.xyzw vf22, vf13, vf22 xy3_vf22 = acc + vf13_rotated_trans * xy3_vf22.y(); // lq.xyzw vf12, 1020(vi00) | ftoi4.xyzw vf19, vf19 // (pipeline) auto xy0_vf19_int = (xy0_vf19 * 16.f).cast(); // lq.xyzw vf14, 1001(vi00) | ftoi4.xyzw vf20, vf20 // (pipeline) auto xy1_vf20_int = (xy1_vf20 * 16.f).cast(); // move.xyzw vf05, vf24 | ftoi4.xyzw vf21, vf21 // (pipeline) auto xy2_vf21_int = (xy2_vf21 * 16.f).cast(); // move.xyzw vf01, vf23 | ftoi4.xyzw vf22, vf22 // (pipeline) auto xy3_vf22_int = (xy3_vf22 * 16.f).cast(); if (m_extra_debug) { u32 zi = xy3_vf22_int.z() >> 4; ImGui::Text("z (int): 0x%08x %s", zi, zi >= (1 << 24) ? "bad" : ""); ImGui::Text("z (flt): %f", (double)(((u32)zi) << 8) / UINT32_MAX); } // sq.xyzw vf19, 2(vi05) | mulz.z vf04, vf24, vf24 (pipeline) // TENTH is xy0int packet.xy0 = xy0_vf19_int; // sq.xyzw vf20, 4(vi05) | clipw.xyz vf03, vf03 (pipeline) // TWELVE is xy1int packet.xy1 = xy1_vf20_int; // sq.xyzw vf21, 6(vi05) | nop // FOURTEEN is xy2int packet.xy2 = xy2_vf21_int; // sq.xyzw vf22, 8(vi05) | nop // SIXTEEN is xy3int packet.xy3 = xy3_vf22_int; // m_sprite_renderer.render_gif((const u8*)&packet, sizeof(packet), render_state, prof); if (m_extra_debug) { imgui_vec(vf12_rotated, "vf12", 2); imgui_vec(vf13_rotated_trans, "vf13", 2); ImGui::Separator(); } // xgkick vi15 | nop // iaddi vi04, vi04, -0x1 | nop // iaddiu vi01, vi00, 0x672 | nop // ibne vi00, vi04, L8 | nop // isub vi15, vi01, vi15 | adda.xyzw ACC, vf11, vf11 // nop | nop :e // nop | nop // L9: // iaddi vi04, vi04, -0x1 | nop // iaddi vi02, vi02, -0x3 | nop // ibne vi00, vi04, L7 | nop // nop | nop // nop | nop :e // nop | nop // L10: // iaddi vi04, vi04, -0x1 | nop // iaddi vi03, vi03, 0x4 | nop // ibne vi00, vi04, L7 | nop // nop | nop // nop | nop :e // nop | nop */ hud: /* SpriteHud2DPacket packet; memset(&packet, 0, sizeof(packet)); // L7 (prologue, and early abort) // ilw.y vi08, 1(vi02) | nop vi08 = matrix u32 offset_selector = m_vec_data_2d[sprite_idx].matrix(); // moved this out of the loop. // lq.xyzw vf25, 900(vi00) | nop vf25 = cam_mat // lq.xyzw vf26, 901(vi00) | nop // lq.xyzw vf27, 902(vi00) | nop // lq.xyzw vf28, 903(vi00) | nop // lq.xyzw vf30, 904(vi08) | nop vf30 = hvdf_offset // vf30 Vector4f hvdf_offset = offset_selector == 0 ? m_hud_matrix_data.hvdf_offset : m_hud_matrix_data.user_hvdf[offset_selector - 1]; glUniform4f( glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "hvdf_offset"), hvdf_offset[0], hvdf_offset[1], hvdf_offset[2], hvdf_offset[3]); glUniform1f( glGetUniformLocation(render_state->shaders[ShaderId::SPRITE].id(), "fog_constant"), m_frame_data.pfog0); // lqi.xyzw vf01, vi02 | nop Vector4f pos_vf01 = m_vec_data_2d[sprite_idx].xyz_sx; if (m_extra_debug) { imgui_vec(pos_vf01, "POS", 2); } // lqi.xyzw vf05, vi02 | nop Vector4f flags_vf05 = m_vec_data_2d[sprite_idx].flag_rot_sy; // lqi.xyzw vf11, vi02 | nop Vector4f color_vf11 = m_vec_data_2d[sprite_idx].rgba; // multiplications from the right column Vector4f transformed_pos_vf02 = matrix_transform(camera_matrix, pos_vf01); Vector4f scales_vf01 = pos_vf01; // now used for something else. // lq.xyzw vf12, 1020(vi00) | mulaw.xyzw ACC, vf28, vf00 // vf12 is fog consts Vector4f fog_consts_vf12(m_frame_data.fog_min, m_frame_data.fog_max, m_frame_data.max_scale, m_frame_data.bonus); // ilw.y vi08, 1(vi02) | maddax.xyzw ACC, vf25, vf01 // load offset selector for the next round. // nop | madday.xyzw ACC, vf26, vf01 // nop | maddz.xyzw vf02, vf27, vf01 // move.w vf05, vf00 | addw.z vf01, vf00, vf05 // scales_vf01.z = sy scales_vf01.z() = flags_vf05.w(); // start building the scale vector flags_vf05.w() = 1.f; // what are we building in flags right now?? // nop | nop // div Q, vf31.x, vf02.w | muly.z vf05, vf05, vf31 float Q = m_frame_data.pfog0 / transformed_pos_vf02.w(); flags_vf05.z() *= m_frame_data.deg_to_rad; // nop | mul.xyzw vf03, vf02, vf29 Vector4f scaled_pos_vf03 = transformed_pos_vf02.elementwise_multiply(m_frame_data.hmge_scale); // nop | nop // nop | nop // nop | mulz.z vf04, vf05, vf05 (ts) // fmt::print("rot is {} degrees\n", flags_vf05.z() * 360.0 / (2.0 * M_PI)); // the load is for rotation stuff, // lq.xyzw vf14, 1001(vi00) | clipw.xyz vf03, vf03 (used for fcand) // iaddi vi06, vi00, 0x1 | adda.xyzw ACC, vf11, vf11 (used for fmand) // upcoming fcand with 0x3f, that checks all of them. bool fcand_result = clip_xyz_plus_minus(scaled_pos_vf03); bool fmand_result = color_vf11.w() == 0; // (really w+w, but I don't think it matters?) // L8: // xgkick double buffer setup // ior vi05, vi15, vi00 | mul.zw vf01, vf01, Q scales_vf01.z() *= Q; // sy scales_vf01.w() *= Q; // sx // lq.xyzw vf06, 998(vi00) | mulz.xyzw vf15, vf05, vf04 (ts) auto adgif_vf06 = m_frame_data.adgif_giftag; // lq.xyzw vf14, 1002(vi00) ts| mula.xyzw ACC, vf05, vf14 (ts) // fmand vi01, vi06 | mul.xyz vf02, vf02, Q transformed_pos_vf02.x() *= Q; transformed_pos_vf02.y() *= Q; transformed_pos_vf02.z() *= Q; // if (m_extra_debug) { // imgui_vec(transformed_pos_vf02, "scaled xf"); // } // ibne vi00, vi01, L10 | addz.x vf01, vf00, vf01 scales_vf01.x() = scales_vf01.z(); // = sy if (fmand_result) { if (m_extra_debug) { ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fmand reject"); ImGui::Separator(); } continue; // reject! } // lqi.xyzw vf07, vi03 | mulz.xyzw vf16, vf15, vf04 (ts) // vf07 is first use adgif // lq.xyzw vf14, 1003(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) // lqi.xyzw vf08, vi03 | add.xyzw vf10, vf02, vf30 // vf08 is second user adgif Vector4f offset_pos_vf10 = transformed_pos_vf02 + hvdf_offset; // if (m_extra_debug) { // ImGui::Text("sel %d", offset_selector); // //ImGui::Text("hvdf off z: %f tf/w z: %f", hvdf_offset.z(), transformed_pos_vf02.z()); // imgui_vec(hvdf_offset, "hvdf"); // imgui_vec(transformed_pos_vf02, "tf'd"); // } // lqi.xyzw vf09, vi03 | mulw.x vf01, vf01, vf01 // vf09 is third user adgif scales_vf01.x() *= scales_vf01.w(); // x = sx * sy // sqi.xyzw vf06, vi05 | mulz.xyzw vf15, vf16, vf04 (ts) // FIRST ADGIF IS adgif_vf06 packet.adgif_giftag = adgif_vf06; // lq.xyzw vf14, 1004(vi00) | madda.xyzw ACC, vf16, vf14 (ts both) // sqi.xyzw vf07, vi05 | maxx.w vf10, vf10, vf12 // SECOND ADGIF is first user // just do all 5 now. packet.user_adgif = m_adgif[sprite_idx]; offset_pos_vf10.w() = std::max(offset_pos_vf10.w(), m_frame_data.fog_max); // sqi.xyzw vf08, vi05 | maxz.zw vf01, vf01, vf31 // THIRD ADGIF is second user scales_vf01.z() = std::max(scales_vf01.z(), m_frame_data.min_scale); scales_vf01.w() = std::max(scales_vf01.w(), m_frame_data.min_scale); // sqi.xyzw vf09, vi05 | mulz.xyzw vf16, vf15, vf04 (ts) // FOURTH ADGIF is third user // lq.xyzw vf14, 1005(vi00) | madda.xyzw ACC, vf15, vf14 (ts both) // lqi.xyzw vf06, vi03 | mulw.x vf01, vf01, vf31 // vf06 is fourth user adgif scales_vf01.x() *= m_frame_data.inv_area; // x = sx * sy * inv_area (area ratio) // lqi.xyzw vf07, vi03 | miniy.w vf10, vf10, vf12 // vf07 is fifth user adgif offset_pos_vf10.w() = std::min(offset_pos_vf10.w(), m_frame_data.fog_min); // lq.xyzw vf08, 1000(vi00) | nop // vf08 is 2d giftag 2 // ilw.x vi07, -2(vi02) | madd.xyzw vf05, vf16, vf14 auto flag_vi07 = m_vec_data_2d[sprite_idx].flag(); Vector4f vf05_sincos(0, 0, std::sin(flags_vf05.z()), std::cos(flags_vf05.z())); // lq.xyzw vf30, 904(vi08) | nop // pipline // lqi.xyzw vf23, vi02 | miniw.x vf01, vf01, vf00 // pipeline scales_vf01.x() = std::min(scales_vf01.x(), 1.f); // lqi.xyzw vf24, vi02 | mulx.w vf11, vf11, vf01 // pipeline color_vf11.w() *= scales_vf01.x(); // is this right? doesn't this stall?? // fcand vi01, 0x3f | mulaw.xyzw ACC, vf28, vf00 // already computed pipeline // lq.xyzw vf17, 1006(vi00) | maddax.xyzw ACC, vf25, vf23 (pipeline) Vector4f basis_x_vf17 = m_frame_data.basis_x; // lq.xyzw vf18, 1007(vi00) | madday.xyzw ACC, vf26, vf23 (pipeline) Vector4f basis_y_vf18 = m_frame_data.basis_y; assert(flag_vi07 == 0); Vector4f* xy_array = m_frame_data.xy_array + flag_vi07; // lq.xyzw vf19, 980(vi07) | ftoi0.xyzw vf11, vf11 Vector4f xy0_vf19 = xy_array[0]; math::Vector color_integer_vf11 = color_vf11.cast(); // lq.xyzw vf20, 981(vi07) | maddz.xyzw vf02, vf27, vf23 (pipeline) Vector4f xy1_vf20 = xy_array[1]; // lq.xyzw vf21, 982(vi07) | mulaw.xyzw ACC, vf17, vf05 Vector4f xy2_vf21 = xy_array[2]; Vector4f acc = basis_x_vf17 * vf05_sincos.w(); // lq.xyzw vf22, 983(vi07) | msubz.xyzw vf12, vf18, vf05 Vector4f xy3_vf22 = xy_array[3]; Vector4f vf12_rotated = acc - (basis_y_vf18 * vf05_sincos.z()); // sq.xyzw vf11, 3(vi05) | mulaz.xyzw ACC, vf17, vf05 // EIGHTH is color integer packet.color = color_integer_vf11; acc = basis_x_vf17 * vf05_sincos.z(); // lqi.xyzw vf11, vi02 | maddw.xyzw vf13, vf18, vf05 // (pipeline) Vector4f vf13_rotated_trans = acc + basis_y_vf18 * vf05_sincos.w(); // move.w vf24, vf00 | addw.z vf23, vf00, vf24 (pipeline both) // div Q, vf31.x, vf02.w | mulw.xyzw vf12, vf12, vf01 // (pipeline) vf12_rotated *= scales_vf01.w(); // ibne vi00, vi01, L9 | muly.z vf24, vf24, vf31 (pipeline) if (fcand_result) { if (m_extra_debug) { ImGui::TextColored(ImVec4(0.8, 0.2, 0.2, 1.0), "fcand reject"); ImGui::Separator(); } continue; // reject (could move earlier) } // ilw.y vi08, 1(vi02) | mulz.xyzw vf13, vf13, vf01 // (pipeline) vf13_rotated_trans *= scales_vf01.z(); // LEFT OFF HERE! // sqi.xyzw vf06, vi05 | mul.xyzw vf03, vf02, vf29 // FIFTH is fourth user // sqi.xyzw vf07, vi05 | mulaw.xyzw ACC, vf10, vf00 // SIXTH is fifth user acc = offset_pos_vf10; // sqi.xyzw vf08, vi05 | maddax.xyzw ACC, vf12, vf19 // SEVENTH is giftag2 packet.sprite_giftag = m_frame_data.sprite_2d_giftag2; acc += vf12_rotated * xy0_vf19.x(); // lq.xyzw vf06, 988(vi00) | maddy.xyzw vf19, vf13, vf19 Vector4f st0_vf06 = m_frame_data.st_array[0]; xy0_vf19 = acc + vf13_rotated_trans * xy0_vf19.y(); // lq.xyzw vf07, 989(vi00) | mulaw.xyzw ACC, vf10, vf00 Vector4f st1_vf07 = m_frame_data.st_array[1]; acc = offset_pos_vf10; // lq.xyzw vf08, 990(vi00) | maddax.xyzw ACC, vf12, vf20 Vector4f st2_vf08 = m_frame_data.st_array[2]; acc += vf12_rotated * xy1_vf20.x(); // lq.xyzw vf09, 991(vi00) | maddy.xyzw vf20, vf13, vf20 Vector4f st3_vf09 = m_frame_data.st_array[3]; xy1_vf20 = acc + vf13_rotated_trans * xy1_vf20.y(); // sq.xyzw vf06, 1(vi05) | mulaw.xyzw ACC, vf10, vf00 // NINTH is st0 packet.st0 = st0_vf06; acc = offset_pos_vf10; // sq.xyzw vf07, 3(vi05) | maddax.xyzw ACC, vf12, vf21 // ELEVEN is st1 packet.st1 = st1_vf07; acc += vf12_rotated * xy2_vf21.x(); // sq.xyzw vf08, 5(vi05) | maddy.xyzw vf21, vf13, vf21 // THIRTEEN is st2 packet.st2 = st2_vf08; xy2_vf21 = acc + vf13_rotated_trans * xy2_vf21.y(); // sq.xyzw vf09, 7(vi05) | mulaw.xyzw ACC, vf10, vf00 // FIFTEEN is st3 packet.st3 = st3_vf09; acc = offset_pos_vf10; // nop | maddax.xyzw ACC, vf12, vf22 acc += vf12_rotated * xy3_vf22.x(); // nop | maddy.xyzw vf22, vf13, vf22 xy3_vf22 = acc + vf13_rotated_trans * xy3_vf22.y(); // lq.xyzw vf12, 1020(vi00) | ftoi4.xyzw vf19, vf19 // (pipeline) auto xy0_vf19_int = (xy0_vf19 * 16.f).cast(); // lq.xyzw vf14, 1001(vi00) | ftoi4.xyzw vf20, vf20 // (pipeline) auto xy1_vf20_int = (xy1_vf20 * 16.f).cast(); // move.xyzw vf05, vf24 | ftoi4.xyzw vf21, vf21 // (pipeline) auto xy2_vf21_int = (xy2_vf21 * 16.f).cast(); // move.xyzw vf01, vf23 | ftoi4.xyzw vf22, vf22 // (pipeline) auto xy3_vf22_int = (xy3_vf22 * 16.f).cast(); if (m_extra_debug) { u32 zi = xy3_vf22_int.z() >> 4; ImGui::Text("z (int): 0x%08x %s", zi, zi >= (1 << 24) ? "bad" : ""); ImGui::Text("z (flt): %f", (double)(((u32)zi) << 8) / UINT32_MAX); } // sq.xyzw vf19, 2(vi05) | mulz.z vf04, vf24, vf24 (pipeline) // TENTH is xy0int packet.xy0 = xy0_vf19_int; // sq.xyzw vf20, 4(vi05) | clipw.xyz vf03, vf03 (pipeline) // TWELVE is xy1int packet.xy1 = xy1_vf20_int; // sq.xyzw vf21, 6(vi05) | nop // FOURTEEN is xy2int packet.xy2 = xy2_vf21_int; // sq.xyzw vf22, 8(vi05) | nop // SIXTEEN is xy3int packet.xy3 = xy3_vf22_int; // m_sprite_renderer.render_gif((const u8*)&packet, sizeof(packet), render_state, prof); if (m_extra_debug) { imgui_vec(vf12_rotated, "vf12", 2); imgui_vec(vf13_rotated_trans, "vf13", 2); ImGui::Separator(); } // xgkick vi15 | nop // iaddi vi04, vi04, -0x1 | nop // iaddiu vi01, vi00, 0x672 | nop // ibne vi00, vi04, L8 | nop // isub vi15, vi01, vi15 | adda.xyzw ACC, vf11, vf11 // nop | nop :e // nop | nop // L9: // iaddi vi04, vi04, -0x1 | nop // iaddi vi02, vi02, -0x3 | nop // ibne vi00, vi04, L7 | nop // nop | nop // nop | nop :e // nop | nop // L10: // iaddi vi04, vi04, -0x1 | nop // iaddi vi03, vi03, 0x4 | nop // ibne vi00, vi04, L7 | nop // nop | nop // nop | nop :e // nop | nop */