#include "game/graphics/opengl_renderer/sprite/Sprite3.h" struct SpriteGlowData { float pos[3]; float size_x; float size_probe; float z_offset; float rot_angle; float size_y; float color[4]; float fade_a; float fade_b; u32 tex_id; u32 dummy; }; static_assert(sizeof(SpriteGlowData) == 16 * 4); /*! * Transformation math from the sprite-glow vu1 program. * Populates the SpriteGlowOutput struct with the same data that would get filled into the * output template on VU1. Excludes float to int conversions. * * Not a particularly efficient implementation, but I think the total number of glow sprites is * small, so not a big deal. */ bool glow_math(const SpriteGlowConsts* consts, bool skip_uv_clamp, const void* vec_data, const void* adgif_data, SpriteGlowOutput* out) { const auto* in = (const SpriteGlowData*)vec_data; static_assert(sizeof(out->adgif) == 5 * 16); memcpy(&out->adgif, adgif_data, 5 * 16); // the transformation here is a bit strange - there's two matrix multiplies. // one for camera, and one for perspective. Usually they do one, or when they really need both // for stuff like emerc, they optimize knowing which entires of perspective are always 0. // But not this time. My guess is that the VU program time is very small compared to actual // drawing, so they don't really care. // Transform point to camera frame. Vector4f p0 = consts->camera[3] + consts->camera[0] * in->pos[0] + consts->camera[1] * in->pos[1] + consts->camera[2] * in->pos[2]; // Compute fade. Interestingly, the fade is computed based on depth, not distance from the camera. // I think this is kind of wrong, and it leads to some weird fadeout behavior. float fade = in->fade_a * p0.z() + in->fade_b; // fade_a is negative if (fade < 0) fade = 0; if (fade > 1) fade = 1; // Adjust color based on fade. Vector4f rgba(in->color[0], in->color[1], in->color[2], in->color[3]); rgba.x() *= rgba.w() * fade / 128.f; rgba.y() *= rgba.w() * fade / 128.f; rgba.z() *= rgba.w() * fade / 128.f; out->flare_draw_color = rgba; // Apply an offset. This moves the point along a line between its original position, and the // camera (so this offset doesn't make the thing move up/down/left/right on screen, just "toward" // the camera). float pscale = 1.f - (in->z_offset / p0.z()); p0.x() *= pscale; p0.y() *= pscale; p0.z() *= pscale; // Apply perspective transformation (no divide yet) p0 = consts->perspective[3] + consts->perspective[0] * p0.x() + consts->perspective[1] * p0.y() + consts->perspective[2] * p0.z(); // HMGE's meaning is unknown, but it's scaling factors for clipping. Apply those, and reject if // the origin is off-screen. Vector4f pos_hmged = p0.elementwise_multiply(consts->hmge); float clip_plus = std::abs(pos_hmged.w()); float clip_minus = -clip_plus; if (pos_hmged.x() > clip_plus || pos_hmged.x() < clip_minus) return false; if (pos_hmged.y() > clip_plus || pos_hmged.y() < clip_minus) return false; if (pos_hmged.z() > clip_plus || pos_hmged.z() < clip_minus) return false; // apply perspective divide. Interestingly using hmge's w here... float perspective_q = 1.f / pos_hmged.w(); p0.x() *= perspective_q; p0.y() *= perspective_q; p0.z() *= perspective_q; out->perspective_q = perspective_q; // apply offset to final point. These offsets are applied after perspective divide, and are // required for the PS2 screen coordinates (centered at 2048, 2048). p0 += consts->hvdf; // from this point on, things are in screen coordinates. So our sizes (not screen coordinates) // should be scaled by q to become sizes in screen coordinates. Vector4f vf02(in->size_probe, in->z_offset, in->size_x, in->size_y); vf02 *= perspective_q; // clamp the probe size to be in (1, clamp_max.w) if (vf02.x() < 1) vf02.x() = 1; // size_probe if (vf02.x() > consts->clamp_max.w()) vf02.x() = consts->clamp_max.w(); // size probe // clamp the maximum size_x/size_y to clamp_max.z if (vf02.z() > consts->clamp_max.z()) vf02.z() = consts->clamp_max.z(); // size x if (vf02.w() > consts->clamp_max.z()) vf02.w() = consts->clamp_max.z(); // size y // compute the minimum safe position for the center of the probe, so corner ends up at min/max math::Vector2f vf09_min_probe_center(consts->clamp_min.x() + vf02.x(), consts->clamp_min.y() + vf02.x()); math::Vector2f vf10_max_probe_center(consts->clamp_max.x() - vf02.x(), consts->clamp_max.y() - vf02.x()); // clear corners. these don't have rotation applied, I guess (vf11, vf12) out->second_clear_pos[0] = Vector4f(p0.x() - vf02.x(), p0.y() - vf02.x(), p0.z(), p0.w()); out->second_clear_pos[1] = Vector4f(p0.x() + vf02.x(), p0.y() + vf02.x(), p0.z(), p0.w()); // compute offset from center of sprite to corner. This includes the rotation math::Vector2f basis_x(consts->basis_x[0], 0); // x scaling factor math::Vector2f basis_y(0, consts->basis_y[1]); // y scarling factor // rotate them float rot_rad = in->rot_angle * consts->deg_to_rad; float rot_sin = std::sin(rot_rad); float rot_cos = std::cos(rot_rad); math::Vector2f vf15_rotated_basis_x = basis_x * rot_sin - basis_y * rot_cos; math::Vector2f vf16_rotated_basis_y = basis_x * rot_cos + basis_y * rot_sin; vf15_rotated_basis_x *= vf02.z(); // scale x vf16_rotated_basis_y *= vf02.w(); // scale y // limit position so the clear doesn't go out of bounds // max.xy vf20, vf01, vf09 -> is this bugged? I think the x broadcast here is wrong // this breaks fadeout as the sprite moves off the top of the screen. I've fixed it here because // I'm pretty sure this is just a mistake. math::Vector2f vf20_pos; if (skip_uv_clamp) { vf20_pos = p0.xy(); } else { vf20_pos = math::Vector2f(std::max(p0.x(), vf09_min_probe_center.x()), std::max(p0.y(), vf09_min_probe_center.y())); vf20_pos.min_in_place(vf10_max_probe_center); } // vf17 thing, vf18 thing math::Vector2f vf17(consts->clamp_min.x() - 1, consts->clamp_min.y() - 1); math::Vector2f vf18(consts->clamp_min.x() + 1, consts->clamp_min.y() + 1); vf17 = vf20_pos - vf17; vf17 -= vf02.x(); vf18 = vf20_pos - vf18; vf18 += vf02.x(); out->offscreen_uv[0] = vf17; out->offscreen_uv[1] = vf18; out->first_clear_pos[0] = Vector4f(vf20_pos.x() - vf02.x() - 1, vf20_pos.y() - vf02.x() - 1, 0xffffff, p0.w()); out->first_clear_pos[1] = Vector4f(vf20_pos.x() + vf02.x() + 1, vf20_pos.y() + vf02.x() + 1, 0xffffff, p0.w()); // mulaw.xyzw ACC, vf01, vf00 // maddax.xyzw ACC, vf15, vf11 // maddy.xyzw vf11, vf16, vf11 for (int i = 0; i < 4; i++) { out->flare_xyzw[i] = p0; math::Vector2f off = (vf15_rotated_basis_x * consts->xy_array[i].x()) + (vf16_rotated_basis_y * consts->xy_array[i].y()); out->flare_xyzw[i].x() += off.x(); out->flare_xyzw[i].y() += off.y(); } return true; } /*! * Handle glow dma and draw glow sprites using GlowRenderer */ void Sprite3::glow_dma_and_draw(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) { auto maybe_consts_setup = dma.read_and_advance(); if (maybe_consts_setup.size_bytes != sizeof(SpriteGlowConsts)) { return; } SpriteGlowConsts consts; memcpy(&consts, maybe_consts_setup.data, sizeof(SpriteGlowConsts)); auto templ_1 = dma.read_and_advance(); ASSERT(templ_1.size_bytes == 16 * 0x54); auto templ_2 = dma.read_and_advance(); ASSERT(templ_2.size_bytes == 16 * 0x54); auto bo = dma.read_and_advance(); ASSERT(bo.size_bytes == 0); auto flushe = dma.read_and_advance(); ASSERT(flushe.size_bytes == 0); auto control_xfer = dma.read_and_advance(); while (control_xfer.size_bytes == 0 && control_xfer.vifcode0().kind == VifCode::Kind::NOP && control_xfer.vifcode1().kind == VifCode::Kind::NOP) { control_xfer = dma.read_and_advance(); } while (control_xfer.size_bytes == 16) { auto vecdata_xfer = dma.read_and_advance(); auto shader_xfer = dma.read_and_advance(); auto call = dma.read_and_advance(); (void)call; u32 num_sprites; memcpy(&num_sprites, control_xfer.data, 4); ASSERT(num_sprites == 1); // always, for whatever reason. ASSERT(vecdata_xfer.size_bytes == 4 * 16); ASSERT(shader_xfer.size_bytes == 5 * 16); if (m_enable_glow) { if (m_glow_renderer.at_max_capacity()) { m_glow_renderer.flush(render_state, prof); } auto* out = m_glow_renderer.alloc_sprite(); if (!glow_math(&consts, m_glow_renderer.new_mode, vecdata_xfer.data, shader_xfer.data, out)) { m_glow_renderer.cancel_sprite(); } } control_xfer = dma.read_and_advance(); while (control_xfer.size_bytes == 0 && control_xfer.vifcode0().kind == VifCode::Kind::NOP && control_xfer.vifcode1().kind == VifCode::Kind::NOP) { control_xfer = dma.read_and_advance(); } } m_glow_renderer.flush(render_state, prof); }