#include "Generic2.h" #include "game/graphics/opengl_renderer/AdgifHandler.h" /*! * Advance through DMA data that has no effect on rendering (NOP codes) and see if this is the * end of the data. * The DmaFollower will either point to the start of the next bucket (and the function will return * true), or to the beginning of the next non-NOP DMA for this bucket. */ bool Generic2::check_for_end_of_generic_data(DmaFollower& dma, u32 next_bucket) { while (dma.current_tag().qwc == 0 && dma.current_tag_vifcode0().kind == VifCode::Kind::NOP && dma.current_tag_vifcode1().kind == VifCode::Kind::NOP) { // this "CALL" tag is inserted by the engine to reset the GS. It's always inserted at the end of // the bucket. if we see it here, we should be able to skip over this resetting stuff (always 4 // tags) and then see the start of the next bucket. if (dma.current_tag().kind == DmaTag::Kind::CALL) { for (int i = 0; i < 4; i++) { dma.read_and_advance(); m_stats.dma_tags++; } ASSERT(dma.current_tag_offset() == next_bucket); return true; } m_stats.dma_tags++; dma.read_and_advance(); } return false; } /*! * Process the first DMA of a generic bucket. * Return true if the generic bucket is empty and there is nothing to do. * * Otherwise, populates m_drawing_config which contains the common draw settings for all data being * rendered in this bucket. */ bool Generic2::handle_bucket_setup_dma(DmaFollower& dma, u32 next_bucket) { // if the engine didn't run the generic renderer setup function, this bucket will end here. if (check_for_end_of_generic_data(dma, next_bucket)) { return true; } // next, the generic setup. This reads the data generated by generic-init-buf. // setup packet 1 is GS settings auto test_and_zbuf = dma.read_and_advance(); ASSERT(test_and_zbuf.size_bytes == 48); // first qw is the gif tag. Can ignore. // second qw is test, this is always the same, so can ignore it too. // (new 'static 'gs-test // :ate #x1 // :atst (gs-atest greater-equal) // :aref #x26 // :afail #x1 // :zte #x1 // :ztst (gs-ztest greater-equal) // ) // third qw is zbuf: // the only thing that changes is zmsk, we need to store this value for later. u64 zbuf_val; memcpy(&zbuf_val, test_and_zbuf.data + 32, 8); m_drawing_config.zmsk = GsZbuf(zbuf_val).zmsk(); // setup packet 2 is constants that normally go to VU1 data memory. // we're not going to be super strict checking the exact details of the unpack command, it's // a waste of time since we're the ones generating it anyway. auto constants = dma.read_and_advance(); ASSERT(constants.size_bytes == 160); ASSERT(constants.vifcode0().kind == VifCode::Kind::STCYCL); ASSERT(constants.vifcode1().kind == VifCode::Kind::UNPACK_V4_32); // (fog vector :inline :offset-assert 0) memcpy(&m_drawing_config.pfog0, constants.data + 0, 4); memcpy(&m_drawing_config.fog_min, constants.data + 4, 4); memcpy(&m_drawing_config.fog_max, constants.data + 8, 4); // (adgif gs-gif-tag :inline :offset-assert 16) ;; was qword // (giftag gs-gif-tag :inline :offset-assert 32) ;; was qword // (hvdf-offset vector :inline :offset-assert 48) memcpy(m_drawing_config.hvdf_offset.data(), constants.data + 48, 16); // (hmge-scale vector :inline :offset-assert 64) // (invh-scale vector :inline :offset-assert 80) // (guard vector :inline :offset-assert 96) // (adnop qword :inline :offset-assert 112) // (flush qword :inline :offset-assert 128) // (stores qword :inline :offset-assert 144) auto vu_setup = dma.read_and_advance(); ASSERT(vu_setup.size_bytes == 32); // this sets offset/base to 0, sets row to 0 and runs program 0 to set up VU regs // todo: any setup required from running this program. // if there was nothing rendered by generic on this frame in this bucket, the bucket will end // here. if (check_for_end_of_generic_data(dma, next_bucket)) { return true; } return false; } void Generic2::reset_buffers() { m_next_free_frag = 0; m_next_free_vert = 0; m_next_free_adgif = 0; m_next_free_bucket = 0; m_next_free_idx = 0; } bool is_nop_vif(const u8* data) { u32 tag0_data; memcpy(&tag0_data, data, 4); return VifCode(tag0_data).kind == VifCode::Kind::NOP; } bool is_nop_or_flushe_vif(const u8* data) { u32 tag0_data; memcpy(&tag0_data, data, 4); auto k = VifCode(tag0_data).kind; return k == VifCode::Kind::NOP || k == VifCode::Kind::FLUSHE; } u32 unpack_vtx_positions(Generic2::Vertex* vtx, const u8* data, int vtx_count) { for (int i = 0; i < vtx_count; i++) { memcpy(vtx[i].xyz.data(), data + (i * 12), 12); } return vtx_count * 12; } u32 unpack_vertex_colors(Generic2::Vertex* vtx, const u8* data, int vtx_count) { for (int i = 0; i < vtx_count; i++) { memcpy(vtx[i].rgba.data(), data + (i * 4), 4); } return vtx_count * 4; } u32 unpack_vtx_tcs(Generic2::Vertex* vtx, const u8* data, int vtx_count) { for (int i = 0; i < vtx_count; i++) { s16 s, t; memcpy(&s, data + (i * 4), 2); memcpy(&t, data + (i * 4) + 2, 2); s16 s_masked = s & (s16)0xfffe; // note: int to float happening here. // if this is a bottleneck, we can possible keep integers and do this in the shader. // I've avoided this for now because only some integer formats are inefficient on the GPU // and it's hard to know what's supported well on all drivers/GPUs vtx[i].st[0] = s_masked; vtx[i].st[1] = t; vtx[i].adc = s_masked == s; } return vtx_count * 4; } u32 Generic2::handle_fragments_after_unpack_v4_32(const u8* data, u32 off, u32 first_unpack_bytes, u32 end_of_vif, Fragment* frag, bool loop) { // note: they rely on _something_ aligning this? u32 off_aligned = (off + 15) & ~15; // each header should have 7 qw header + at least 5 qw for a single adgif. ASSERT(first_unpack_bytes >= FRAG_HEADER_SIZE + sizeof(AdGifData)); // grab the 7 qw header memcpy(frag->header, data + off_aligned, FRAG_HEADER_SIZE); // figure out how many adgifs and grab those. u32 adgif_bytes = (first_unpack_bytes - FRAG_HEADER_SIZE); u32 adgifs = adgif_bytes / sizeof(AdGifData); frag->adgif_idx = m_next_free_adgif; frag->adgif_count = adgifs; ASSERT(frag->adgif_count > 0); ASSERT(adgif_bytes == adgifs * sizeof(AdGifData)); for (u32 i = 0; i < adgifs; i++) { auto& add = next_adgif(); memcpy(&add.data, data + off_aligned + FRAG_HEADER_SIZE + (i * sizeof(AdGifData)), sizeof(AdGifData)); } // continue in this transfer off += first_unpack_bytes; if (off == end_of_vif) { ASSERT_MSG(false, "nothing after header upload"); } // the next thing is the vertex positions. while (is_nop_vif(data + off) && off < end_of_vif) { off += 4; } u32 stcycl_tag_data; memcpy(&stcycl_tag_data, data + off, 4); off += 4; VifCode stcycl_tag(stcycl_tag_data); ASSERT(stcycl_tag.kind == VifCode::Kind::STCYCL); ASSERT(stcycl_tag.immediate == 0x103); u32 vtx_pos_unpack_tag_data; memcpy(&vtx_pos_unpack_tag_data, data + off, 4); VifCode vtx_pos_unpack_tag(vtx_pos_unpack_tag_data); if (vtx_pos_unpack_tag.kind == VifCode::Kind::UNPACK_V4_8) { ASSERT(loop); } else { ASSERT(!loop); ASSERT(vtx_pos_unpack_tag.kind == VifCode::Kind::UNPACK_V3_32); off += 4; frag->vtx_idx = m_next_free_vert; frag->vtx_count = vtx_pos_unpack_tag.num; alloc_vtx(frag->vtx_count); off += unpack_vtx_positions(&m_verts[frag->vtx_idx], data + off, frag->vtx_count); ASSERT(off < end_of_vif); while (is_nop_vif(data + off) && off < end_of_vif) { off += 4; } ASSERT(off < end_of_vif); } if (loop) { // next, vertex colors u32 unpack_vtx_color_tag_data; memcpy(&unpack_vtx_color_tag_data, data + off, 4); off += 4; VifCode unpack_vtx_color_tag(unpack_vtx_color_tag_data); ASSERT(unpack_vtx_color_tag.kind == VifCode::Kind::UNPACK_V4_8); frag->vtx_idx = m_next_free_vert; frag->vtx_count = unpack_vtx_color_tag.num; alloc_vtx(frag->vtx_count); off += unpack_vertex_colors(&m_verts[frag->vtx_idx], data + off, frag->vtx_count); } else { // next, vertex colors u32 unpack_vtx_color_tag_data; memcpy(&unpack_vtx_color_tag_data, data + off, 4); off += 4; VifCode unpack_vtx_color_tag(unpack_vtx_color_tag_data); ASSERT(unpack_vtx_color_tag.kind == VifCode::Kind::UNPACK_V4_8); ASSERT(unpack_vtx_color_tag.num == frag->vtx_count); off += unpack_vertex_colors(&m_verts[frag->vtx_idx], data + off, frag->vtx_count); } ASSERT(off < end_of_vif); while (is_nop_vif(data + off) && off < end_of_vif) { off += 4; } ASSERT(off < end_of_vif); // next, vertex tcs u32 unpack_vtx_tc_tag_data; memcpy(&unpack_vtx_tc_tag_data, data + off, 4); off += 4; VifCode unpack_vtx_tc_tag(unpack_vtx_tc_tag_data); ASSERT(unpack_vtx_tc_tag.kind == VifCode::Kind::UNPACK_V2_16); ASSERT(unpack_vtx_tc_tag.num == frag->vtx_count); off += unpack_vtx_tcs(&m_verts[frag->vtx_idx], data + off, frag->vtx_count); if (off == end_of_vif) { return off; } ASSERT(off < end_of_vif); while (is_nop_vif(data + off) && off < end_of_vif) { off += 4; } ASSERT(off < end_of_vif); u32 stcycl_reset_data; memcpy(&stcycl_reset_data, data + off, 4); off += 4; VifCode stcycl_reset(stcycl_reset_data); if (stcycl_reset.kind == VifCode::Kind::STCYCL) { ASSERT(off < end_of_vif); while (is_nop_vif(data + off) && off < end_of_vif) { off += 4; } ASSERT(off < end_of_vif); u32 mscal_data; memcpy(&mscal_data, data + off, 4); off += 4; VifCode mscal(mscal_data); ASSERT(mscal.kind == VifCode::Kind::MSCAL); frag->mscal_addr = mscal.immediate; } else { ASSERT(stcycl_reset.kind == VifCode::Kind::MSCAL); frag->mscal_addr = stcycl_reset.immediate; ASSERT(off < end_of_vif); while (is_nop_vif(data + off) && off < end_of_vif) { off += 4; } ASSERT(off < end_of_vif); u32 stcycl_data; memcpy(&stcycl_data, data + off, 4); off += 4; VifCode stcycl(stcycl_data); ASSERT(stcycl.kind == VifCode::Kind::STCYCL); } ASSERT(off < end_of_vif); while (is_nop_or_flushe_vif(data + off) && off < end_of_vif) { off += 4; } return off; } void Generic2::process_dma(DmaFollower& dma, u32 next_bucket) { reset_buffers(); // handle the stuff at the beginning. if (handle_bucket_setup_dma(dma, next_bucket)) { return; } // loop over "fragments" // each "fragment" consists of a series of uploads, followed by a MSCAL VIFCODE that runs // VU program that does vertex transformation and sends to the GS. Fragment* continued_fragment = nullptr; while (dma.current_tag_offset() != next_bucket) { if (continued_fragment) { auto continue_vif_transfer = dma.read_and_advance(); ASSERT(continue_vif_transfer.vifcode0().kind == VifCode::Kind::NOP); auto up = continue_vif_transfer.vifcode1(); ASSERT(up.kind == VifCode::Kind::UNPACK_V3_32); ASSERT(continue_vif_transfer.size_bytes * 4 / 48 == up.num); ASSERT(up.num == continued_fragment->vtx_count); unpack_vtx_positions(&m_verts[continued_fragment->vtx_idx], continue_vif_transfer.data, continued_fragment->vtx_count); continued_fragment = nullptr; auto call = dma.read_and_advance(); ASSERT(call.size_bytes == 0); ASSERT(call.vifcode1().kind == VifCode::Kind::MSCAL); if (check_for_end_of_generic_data(dma, next_bucket)) { return; } } else { auto vif_transfer = dma.read_and_advance(); auto v1 = vif_transfer.vifcode1(); // if (vif_transfer.vifcode0().kind != VifCode::Kind::STCYCL || // vif_transfer.vifcode1().kind != VifCode::Kind::UNPACK_V4_32) { // fmt::print("failing tag: {} {} {}\n", vif_transfer.vifcode0().print(), // vif_transfer.vifcode1().print(), vif_transfer.size_bytes); // } ASSERT(vif_transfer.vifcode0().kind == VifCode::Kind::STCYCL); ASSERT(v1.kind == VifCode::Kind::UNPACK_V4_32); u32 unpack_bytes = v1.num * 16; auto& frag = next_frag(); u32 off = handle_fragments_after_unpack_v4_32(vif_transfer.data, 0, unpack_bytes, vif_transfer.size_bytes, &frag, false); if (check_for_end_of_generic_data(dma, next_bucket)) { return; } if (off < vif_transfer.size_bytes) { u32 stcycl_reset; memcpy(&stcycl_reset, vif_transfer.data + off, 4); ASSERT(VifCode(stcycl_reset).kind == VifCode::Kind::STCYCL); off += 4; // while (off < vif_transfer.size_bytes) { u32 next; memcpy(&next, vif_transfer.data + off, 4); VifCode next_unpack(next); ASSERT(next_unpack.kind == VifCode::Kind::UNPACK_V4_32); auto& continue_frag = next_frag(); off = handle_fragments_after_unpack_v4_32(vif_transfer.data, off, next_unpack.num * 16, vif_transfer.size_bytes, &continue_frag, true); continued_fragment = &continue_frag; ASSERT(off == vif_transfer.size_bytes); // } } } } }