diff --git a/common/dma/dma.cpp b/common/dma/dma.cpp index 18bae1a10d..48347dc573 100644 --- a/common/dma/dma.cpp +++ b/common/dma/dma.cpp @@ -108,7 +108,9 @@ std::string VifCode::print() { } default: - fmt::print("Unhandled vif code {}", (int)kind); + fmt::print("Unhandled vif code {}\n", (int)kind); + assert(false); + result = "???"; // assert(false); break; diff --git a/common/dma/dma.h b/common/dma/dma.h index 021c955920..a4ab7ba959 100644 --- a/common/dma/dma.h +++ b/common/dma/dma.h @@ -43,6 +43,12 @@ struct DmaTag { bool spr = false; Kind kind; + bool operator==(const DmaTag& other) const { + return qwc == other.qwc && addr == other.addr && spr == other.spr && kind == other.kind; + } + + bool operator!=(const DmaTag& other) const { return !((*this) == other); } + std::string print(); }; diff --git a/common/dma/dma_chain_read.h b/common/dma/dma_chain_read.h index 8dd8a6ec27..7b6856fd89 100644 --- a/common/dma/dma_chain_read.h +++ b/common/dma/dma_chain_read.h @@ -31,6 +31,13 @@ struct DmaTransfer { VifCode vifcode0() const { return VifCode(vif0()); } VifCode vifcode1() const { return VifCode(vif1()); } + + template + T read_val(u32 offset) const { + T result; + memcpy(&result, (const u8*)data + offset, sizeof(T)); + return result; + } }; class DmaFollower { @@ -99,6 +106,10 @@ class DmaFollower { } DmaTag current_tag() const { return DmaTag(read_val(m_tag_offset)); } + u32 current_tag_vif0() const { return read_val(m_tag_offset + 8); } + u32 current_tag_vif1() const { return read_val(m_tag_offset + 12); } + VifCode current_tag_vifcode0() const { return VifCode(current_tag_vif0()); } + VifCode current_tag_vifcode1() const { return VifCode(current_tag_vif1()); } u32 current_tag_offset() const { return m_tag_offset; } bool ended() const { return m_ended; } diff --git a/common/dma/dma_copy.cpp b/common/dma/dma_copy.cpp index 0636c5dc00..55f2cf6542 100644 --- a/common/dma/dma_copy.cpp +++ b/common/dma/dma_copy.cpp @@ -29,6 +29,39 @@ std::vector flatten_dma(const DmaFollower& in) { return result; } +void diff_dma_chains(DmaFollower ref, DmaFollower dma) { + while (!ref.ended() && !dma.ended()) { + auto ref_tag = ref.current_tag(); + auto dma_tag = dma.current_tag(); + if (ref_tag.kind != dma_tag.kind) { + fmt::print("Bad dma tag kinds\n"); + } + + if (ref_tag.qwc != dma_tag.qwc) { + fmt::print("Bad dma tag qwc: {} {}\n", ref_tag.qwc, dma_tag.qwc); + } + + auto ref_result = ref.read_and_advance(); + auto dma_result = dma.read_and_advance(); + + for (int i = 0; i < ref_result.size_bytes; i++) { + if (ref_result.data[i] != dma_result.data[i]) { + fmt::print("Bad data ({} vs {}) at {} into transfer: {} {}\n", ref_result.data[i], + dma_result.data[i], i, ref_tag.print(), dma_tag.print()); + return; + } + } + } + + if (!ref.ended()) { + fmt::print("dma ended early\n"); + } + + if (!dma.ended()) { + fmt::print("dma had extra data\n"); + } +} + void FixedChunkDmaCopier::serialize_last_result(Serializer& serializer) { serializer.from_ptr(&m_result.start_offset); serializer.from_pod_vector(&m_result.data); @@ -81,13 +114,13 @@ const DmaData& FixedChunkDmaCopier::run(const void* memory, u32 offset, bool ver if (transfer.size_bytes) { m_result.stats.num_data_bytes += transfer.size_bytes; u32 initial_chunk = transfer.data_offset / chunk_size; + u32 end_addr = transfer.data_offset + transfer.size_bytes; m_chunk_mask.at(initial_chunk) = true; - s32 bytes_remaining = transfer.size_bytes; - bytes_remaining -= chunk_size - (transfer.size_bytes % chunk_size); u32 chunk = initial_chunk + 1; - while (bytes_remaining >= 0) { - bytes_remaining -= transfer.size_bytes; - m_chunk_mask.at(chunk) = true; + u32 current_address = chunk_size * chunk; + while (current_address < end_addr) { + current_address += chunk_size; + m_chunk_mask.at(chunk++) = true; } } } @@ -129,8 +162,20 @@ const DmaData& FixedChunkDmaCopier::run(const void* memory, u32 offset, bool ver if (verify) { auto ref = flatten_dma(DmaFollower(memory, offset)); auto v2 = flatten_dma(DmaFollower(m_result.data.data(), m_result.start_offset)); + if (ref != v2) { fmt::print("Verification has failed.\n"); + fmt::print("size diff: {} {}\n", ref.size(), v2.size()); + + for (size_t i = 0; i < std::min(ref.size(), v2.size()); i++) { + if (ref[i] != v2[i]) { + fmt::print("first diff at {}\n", i); + break; + } + } + diff_dma_chains(DmaFollower(memory, offset), + DmaFollower(m_result.data.data(), m_result.start_offset)); + assert(false); } else { fmt::print("verification ok: {} bytes\n", ref.size()); } diff --git a/common/math/Vector.h b/common/math/Vector.h index f8ebca13b6..d1e80f042f 100644 --- a/common/math/Vector.h +++ b/common/math/Vector.h @@ -19,7 +19,7 @@ class Vector { } template - Vector(Args... args) : m_data{T(args)...} {} + explicit Vector(Args... args) : m_data{T(args)...} {} T* begin() { return &m_data[0]; } T* end() { return &m_data[Size]; } @@ -101,6 +101,14 @@ class Vector { return result; } + Vector operator-(const T& other) const { + Vector result; + for (int i = 0; i < Size; i++) { + result[i] = m_data[i] - other; + } + return result; + } + T dot(const Vector& other) const { T result(0); for (int i = 0; i < Size; i++) { @@ -145,8 +153,8 @@ class Vector { Vector cross(const Vector& other) const { static_assert(Size == 3, "Size for cross"); - Vector result = {y() * other.z() - z() * other.y(), z() * other.x() - x() * other.z(), - x() * other.y() - y() * other.x()}; + Vector result{y() * other.z() - z() * other.y(), z() * other.x() - x() * other.z(), + x() * other.y() - y() * other.x()}; return result; } @@ -157,7 +165,7 @@ class Vector { std::string to_string_aligned() const { std::string result = "["; for (auto x : m_data) { - result.append(fmt::format("{: 6.3f} ", x)); + result.append(fmt::format("{:6.3f} ", x)); } result.pop_back(); return result + "]"; @@ -203,6 +211,20 @@ struct Matrix { return result; } + std::string to_string_aligned() const { + std::string result; + for (int row = 0; row < Rows; row++) { + result += "["; + for (int col = 0; col < Cols; col++) { + result.append(fmt::format("{:6.3f} ", m_data[row + col * Rows])); + } + result.pop_back(); + result += "]\n"; + } + + return result; + } + private: T m_data[Rows * Cols]; }; diff --git a/decompiler/IR2/bitfields.cpp b/decompiler/IR2/bitfields.cpp index 5dafb47b74..795f9c795e 100644 --- a/decompiler/IR2/bitfields.cpp +++ b/decompiler/IR2/bitfields.cpp @@ -826,6 +826,13 @@ std::optional> get_field_defs_from_expr(const BitFieldT } } + auto field_type_as_bitfield = + dynamic_cast(env.dts->ts.lookup_type(field_info.type())); + if (field_type_as_bitfield) { + maybe_field->value = cast_to_bitfield(field_type_as_bitfield, field_info.type(), pool, env, + maybe_field->value); + } + field_defs.push_back(*maybe_field); } return field_defs; @@ -845,6 +852,7 @@ Form* cast_to_bitfield(const BitFieldType* type_info, Form* in) { in = strip_int_or_uint_cast(in); + // special case for sound-name bitfield to string if (type_info->get_name() == "sound-name") { auto as_sound_name = cast_sound_name(pool, env, in); if (as_sound_name) { @@ -853,6 +861,7 @@ Form* cast_to_bitfield(const BitFieldType* type_info, // just do a normal cast if that failed. return pool.alloc_single_element_form(nullptr, typespec, in); } + // check if it's just a constant: auto in_as_atom = form_as_atom(in); if (in_as_atom && in_as_atom->is_int()) { @@ -887,6 +896,7 @@ Form* cast_to_bitfield(const BitFieldType* type_info, } return pool.alloc_single_element_form(nullptr, typespec, in); } else { + // dynamic bitfield def auto field_defs = get_field_defs_from_expr(type_info, in, typespec, pool, env, {}); if (field_defs) { return pool.alloc_single_element_form(nullptr, typespec, diff --git a/decompiler/ObjectFile/ObjectFileDB.cpp b/decompiler/ObjectFile/ObjectFileDB.cpp index d5ce325334..a86ef40600 100644 --- a/decompiler/ObjectFile/ObjectFileDB.cpp +++ b/decompiler/ObjectFile/ObjectFileDB.cpp @@ -584,6 +584,7 @@ std::string ObjectFileDB::process_tpages() { std::string tpage_string = "tpage-"; int total = 0, success = 0; int tpage_dir_count = 0; + u64 total_px = 0; Timer timer; std::string result; @@ -592,6 +593,7 @@ std::string ObjectFileDB::process_tpages() { auto statistics = process_tpage(data); total += statistics.total_textures; success += statistics.successful_textures; + total_px += statistics.num_px; } else if (data.name_in_dgo == "dir-tpages") { result = process_dir_tpages(data).to_source(); tpage_dir_count++; @@ -605,7 +607,7 @@ std::string ObjectFileDB::process_tpages() { return {}; } - lg::info("Processed {} / {} textures {:.2f}% in {:.2f} ms", success, total, + lg::info("Processed {} / {} textures ({} px) {:.2f}% in {:.2f} ms", success, total, total_px, 100.f * float(success) / float(total), timer.getMs()); return result; } diff --git a/decompiler/VuDisasm/VuDisassembler.cpp b/decompiler/VuDisasm/VuDisassembler.cpp index eee9270792..65e08df739 100644 --- a/decompiler/VuDisasm/VuDisassembler.cpp +++ b/decompiler/VuDisasm/VuDisassembler.cpp @@ -48,7 +48,9 @@ int upper_imm15_unsigned(u32 in) { } } // namespace -VuDisassembler::VuDisassembler() { + +VuDisassembler::VuDisassembler(VuKind kind) : m_kind(kind) { + // build the decode tables m_upper_op6_table[0b000000].set(VuInstrK::ADDbc); // 0 m_upper_op6_table[0b000001].set(VuInstrK::ADDbc); // 1 m_upper_op6_table[0b000010].set(VuInstrK::ADDbc); // 2 @@ -220,6 +222,9 @@ VuDisassembler::VuDisassembler() { add_op(VuInstrK::MFP, "mfp").dst_mask().dst_vft().src_p(); } +/*! + * Add a VU operation to the decode table + */ VuDisassembler::OpInfo& VuDisassembler::add_op(VuInstrK kind, const std::string& name) { assert((int)kind < (int)VuInstrK::INVALID); auto& elt = m_op_info[(int)kind]; @@ -228,6 +233,9 @@ VuDisassembler::OpInfo& VuDisassembler::add_op(VuInstrK kind, const std::string& return elt; } +/*! + * Decode a lower instruction kind + */ VuInstrK VuDisassembler::lower_kind(u32 in) { auto op = lower_op(in); if (in == 0b10000000000000000000000000110000) { @@ -296,6 +304,9 @@ VuInstrK VuDisassembler::lower_kind(u32 in) { } } +/*! + * Decode an upper instruction kind + */ VuInstrK VuDisassembler::upper_kind(u32 in) { auto& upper_info = m_upper_op6_table[upper_op6(in)]; if (upper_info.goto_11) { @@ -361,6 +372,20 @@ VuInstrK VuDisassembler::upper_kind(u32 in) { return upper_info.kind; } +/*! + * Get the mask applied to instruction offsets. + */ +s32 VuDisassembler::get_instruction_index_mask() { + switch (m_kind) { + case VU0: + return (4096 / 8) - 1; + case VU1: + return (16384 / 8) - 1; + default: + assert(false); + } +} + VuProgram VuDisassembler::disassemble(void* data, int size_bytes, bool debug_print) { auto bytes = (u8*)data; // should be 8 byte aligned size. @@ -430,7 +455,9 @@ VuInstruction VuDisassembler::decode(VuInstrK kind, u32 data, int instr_idx) { case VuDecodeStep::FieldK::IMM11_BRANCH: { s32 signed_11 = upper_op11(data) << 21; signed_11 >>= 21; - value = add_label(signed_11 + instr_idx + 1); + s32 offset = signed_11 + instr_idx + 1; + offset &= 2047; + value = add_label(offset); } break; case VuDecodeStep::FieldK::IMM11_SIGNED: { s32 signed_value = (data << 21); @@ -674,6 +701,7 @@ std::string VuDisassembler::to_string(const VuProgram& prog) const { result += ':'; result += '\n'; } + // result += fmt::format("{} ;; 0x{:x}", to_string(prog.instructions().at(i)), i); result += to_string(prog.instructions().at(i)); result += '\n'; } diff --git a/decompiler/VuDisasm/VuDisassembler.h b/decompiler/VuDisasm/VuDisassembler.h index 011a74ad37..f283a0d36f 100644 --- a/decompiler/VuDisasm/VuDisassembler.h +++ b/decompiler/VuDisasm/VuDisassembler.h @@ -50,16 +50,22 @@ struct VuDecodeStep { class VuDisassembler { public: - VuDisassembler(); + enum VuKind { + VU0, + VU1, + }; + VuDisassembler(VuKind kind); VuProgram disassemble(void* data, int size_bytes, bool debug_print = false); std::string to_string(const VuInstruction& instr) const; std::string to_string(const VuInstructionPair& pair) const; std::string to_string(const VuProgram& prog) const; private: + VuKind m_kind; VuInstrK upper_kind(u32 in); VuInstrK lower_kind(u32 in); VuInstruction decode(VuInstrK kind, u32 data, int instr_idx); + s32 get_instruction_index_mask(); struct VuUpperOp6 { bool goto_11 = false; diff --git a/decompiler/analysis/find_skelgroups.cpp b/decompiler/analysis/find_skelgroups.cpp index 3795df3af8..ad2546d2c8 100644 --- a/decompiler/analysis/find_skelgroups.cpp +++ b/decompiler/analysis/find_skelgroups.cpp @@ -108,7 +108,7 @@ DefskelgroupElement::StaticInfo inspect_skel_group_data(DecompiledDataElement* s DefskelgroupElement::Info get_defskelgroup_entries(Form* body, const Env& env, - const DefskelgroupElement::StaticInfo& info, + const DefskelgroupElement::StaticInfo& /*info*/, const RegisterAccess& let_dest_var) { DefskelgroupElement::Info out_info; diff --git a/decompiler/analysis/mips2c.cpp b/decompiler/analysis/mips2c.cpp index 1321bba89e..1841d02df3 100644 --- a/decompiler/analysis/mips2c.cpp +++ b/decompiler/analysis/mips2c.cpp @@ -501,6 +501,10 @@ Mips2C_Line handle_unknown(const std::string& instr_str) { } Mips2C_Line handle_generic_load(const Instruction& i0, const std::string& instr_str) { + if (!i0.get_src(0).is_imm()) { + // might be a load relative to a label + return handle_unknown(instr_str); + } return {fmt::format("c->{}({}, {}, {});", i0.op_name_to_string(), reg_to_name(i0.get_dst(0)), i0.get_src(0).get_imm(), reg_to_name(i0.get_src(1))), instr_str}; @@ -678,6 +682,12 @@ Mips2C_Line handle_vmadda_bc(const Instruction& i0, const std::string& instr_str instr_str}; } +Mips2C_Line handle_vmsuba_bc(const Instruction& i0, const std::string& instr_str) { + return {fmt::format("c->vmsuba_bc(DEST::{}, BC::{}, {}, {});", dest_to_char(i0.cop2_dest), + i0.cop2_bc_to_char(), reg_to_name(i0.get_src(0)), reg_to_name(i0.get_src(1))), + instr_str}; +} + std::string reg64_or_zero(const InstructionAtom& atom) { if (atom.is_reg(Register(Reg::GPR, Reg::R0))) { return "0"; @@ -787,6 +797,10 @@ Mips2C_Line handle_clts(const Instruction& i0, const std::string& instr_string) instr_string}; } +Mips2C_Line handle_pmfhl_lh(const Instruction& i0, const std::string& instr_string) { + return {fmt::format("c->pmfhl_lh({});", reg_to_name(i0.get_dst(0))), instr_string}; +} + Mips2C_Line handle_normal_instr(Mips2C_Output& output, const Instruction& i0, const std::string& instr_str, @@ -795,6 +809,7 @@ Mips2C_Line handle_normal_instr(Mips2C_Output& output, switch (i0.kind) { case InstructionKind::LW: return handle_lw(output, i0, instr_str); + case InstructionKind::LB: case InstructionKind::LBU: case InstructionKind::LWU: case InstructionKind::LQ: @@ -809,6 +824,7 @@ Mips2C_Line handle_normal_instr(Mips2C_Output& output, case InstructionKind::SQC2: case InstructionKind::SH: case InstructionKind::SD: + case InstructionKind::SB: case InstructionKind::SWC1: return handle_generic_store(output, i0, instr_str); case InstructionKind::VADD_BC: @@ -827,6 +843,8 @@ Mips2C_Line handle_normal_instr(Mips2C_Output& output, return handle_generic_op3_mask(i0, instr_str, "vadd"); case InstructionKind::VSUB: return handle_generic_op3_mask(i0, instr_str, "vsub"); + case InstructionKind::VMINI: + return handle_generic_op3_mask(i0, instr_str, "vmini"); case InstructionKind::OR: return handle_or(i0, instr_str); case InstructionKind::SW: @@ -839,15 +857,23 @@ Mips2C_Line handle_normal_instr(Mips2C_Output& output, return handle_generic_op2_mask(i0, instr_str, "vftoi0"); case InstructionKind::VFTOI4: return handle_generic_op2_mask(i0, instr_str, "vftoi4"); + case InstructionKind::VFTOI12: + return handle_generic_op2_mask(i0, instr_str, "vftoi12"); case InstructionKind::VADDQ: return handle_generic_op2_mask(i0, instr_str, "vaddq"); case InstructionKind::ANDI: case InstructionKind::ORI: + case InstructionKind::XORI: case InstructionKind::SRA: case InstructionKind::DSLL: case InstructionKind::DSLL32: case InstructionKind::DSRA: case InstructionKind::DSRA32: + case InstructionKind::DSRL32: + case InstructionKind::DSRL: + case InstructionKind::SRL: + case InstructionKind::PSRAW: + case InstructionKind::PSRLH: return handle_generic_op2_u16(i0, instr_str); case InstructionKind::SLL: return handle_sll(i0, instr_str); @@ -859,6 +885,16 @@ Mips2C_Line handle_normal_instr(Mips2C_Output& output, case InstructionKind::MOVN: case InstructionKind::PEXTUW: case InstructionKind::PCPYUD: + case InstructionKind::PPACH: + case InstructionKind::PINTEH: + case InstructionKind::PCGTW: + case InstructionKind::PPACB: + case InstructionKind::PADDW: + case InstructionKind::PEXTUB: + case InstructionKind::PMULTH: + case InstructionKind::PMADDH: + case InstructionKind::PADDH: + case InstructionKind::PMINH: case InstructionKind::MOVZ: case InstructionKind::MULT3: case InstructionKind::PMINW: @@ -890,8 +926,12 @@ Mips2C_Line handle_normal_instr(Mips2C_Output& output, return handle_vmula_bc(i0, instr_str); case InstructionKind::VMADDA_BC: return handle_vmadda_bc(i0, instr_str); + case InstructionKind::VMSUBA_BC: + return handle_vmsuba_bc(i0, instr_str); case InstructionKind::VMADD_BC: return handle_generic_op3_bc_mask(i0, instr_str, "vmadd_bc"); + case InstructionKind::VMSUB_BC: + return handle_generic_op3_bc_mask(i0, instr_str, "vmsub_bc"); case InstructionKind::VDIV: return handle_vdiv(i0, instr_str); case InstructionKind::VSQRT: @@ -933,6 +973,8 @@ Mips2C_Line handle_normal_instr(Mips2C_Output& output, return handle_plain_op(i0, instr_str, "vwaitq"); case InstructionKind::VOPMULA: return handle_vopmula(i0, instr_str); + case InstructionKind::PMFHL_LH: + return handle_pmfhl_lh(i0, instr_str); default: unknown_count++; return handle_unknown(instr_str); diff --git a/decompiler/config/all-types.gc b/decompiler/config/all-types.gc index 67402f90b3..62173dbb59 100644 --- a/decompiler/config/all-types.gc +++ b/decompiler/config/all-types.gc @@ -502,12 +502,16 @@ (sky-draw 3) (bucket-4 4) ;; ocean (tfrag-tex0 5) + (tfrag-0 6) + (tfrag-near-0 7) ;; merc0 10 ;; generic0 11 (bucket-10 10) (bucket-11 11) (tfrag-tex1 12) + (tfrag-1 13) + (tfrag-near-1 14) ;; merc1 17 ;; generic1 18 (bucket-17 17) @@ -519,10 +523,20 @@ (generic-foreground 30) ;; ? (alpha-tex0 31) - (sky-tex0 32) + (tfrag-trans-0 32) ;; also sky blend + (tfrag-trans-near-0 33) + (tfrag-dirt-0 34) + (tfrag-dirt-near-0 35) + (tfrag-ice-0 36) + (tfrag-ice-near-0 37) (alpha-tex1 38) - (sky-tex1 39) + (tfrag-trans-1 39) ;; also sky blend + (tfrag-trans-near-1 40) + (tfrag-dirt-1 41) + (tfrag-dirt-near-1 42) + (tfrag-ice-1 43) + (tfrag-ice-near-1 44) (bucket-45 45) (bucket-46 46) @@ -3565,8 +3579,30 @@ ;; - Symbols -(define-extern *vu1-enable-user-menu* int) -(define-extern *vu1-enable-user* int) +(defenum vu1-renderer-mask + :bitfield #t + (sky 3) + (ocean 4) + (ocean-wave 5) + (tfrag 6) + (tie-near 7) + (tie 8) + (generic 9) + (merc 10) + (shrubbery 11) + (shrub-near 12) + (billboard 13) + (trans-shrubbery 14) + (trans-frag 15) + (sprite 16) + (shadow 17) + (depth-cue 18) + (nineteen 19) + (twenty 20) + ) + +(define-extern *vu1-enable-user-menu* vu1-renderer-mask) +(define-extern *vu1-enable-user* vu1-renderer-mask) ;; ---------------------- @@ -6224,8 +6260,8 @@ (collide-with-box (_type_ int collide-list) none 11) (collide-y-probe (_type_ int collide-list) none 12) (collide-ray (_type_ int collide-list) none 13) - (dummy-14 (_type_) none 14) - (debug-draw (_type_ drawable object) none 15) + (collect-stats (_type_) none 14) + (debug-draw (_type_ drawable display-frame) none 15) (dummy-16 (_type_ object object) object 16) (collect-ambients (_type_ sphere int ambient-list) none 17) ) @@ -12767,7 +12803,7 @@ (deftype tfragment-debug-data (structure) ((stats tfragment-stats :inline :offset-assert 0) - (debug-lines basic :offset-assert 16) + (debug-lines (array vector-array) :offset-assert 16) ) :method-count-assert 9 :size-assert #x14 @@ -12793,7 +12829,7 @@ (dma-level-0 uint32 :offset 32) (dma-base uint32 :offset 36) (dma-level-1 uint32 :offset 40) - (dma-qwc uint32 4 :offset 44) + (dma-qwc uint8 4 :offset 44) (shader (inline-array adgif-shader) :offset 48) (num-shaders uint8 :offset 52) (num-base-colors uint8 :offset 53) @@ -12828,7 +12864,9 @@ ) (deftype drawable-tree-tfrag (drawable-tree) - ((time-of-day-pal time-of-day-palette :offset 12)) + ((time-of-day-pal time-of-day-palette :offset 12) + (arrays drawable-inline-array 1 :offset 32 :score 100) ;; either drawable-inline-array-node or drawable-inline-array-tfrag + ) :method-count-assert #x12 :size-assert #x24 :flag-assert #x1200000024 @@ -12870,10 +12908,10 @@ ) (deftype tfrag-dists (structure) - ((data uint32 16 :offset-assert 0) - (vector vector 4 :inline :offset 0) - (k0s uint128 2 :offset 0) - (k1s uint128 2 :offset 32) + ((data uint32 16 :offset-assert 0 :score -1) + (vector vector 4 :inline :offset 0 :score -1) + (k0s vector 2 :inline :offset 0) + (k1s vector 2 :inline :offset 32) ) :method-count-assert 9 :size-assert #x40 @@ -12881,13 +12919,14 @@ ) (deftype tfrag-data (structure) - ((data uint32 56 :offset 0) - (vector vector 14 :inline :offset 0) + ((data uint32 56 :offset 0 :score -1) + (vector vector 14 :inline :offset 0 :score -1) (fog vector :inline :offset 0) (val vector :inline :offset 16) - (strgif qword :inline :offset 32) - (fangif qword :inline :offset 48) - (adgif qword :inline :offset 64) + + (strgif gs-gif-tag :inline :offset 32) ;; was qword + (fangif gs-gif-tag :inline :offset 48) ;; was qword + (adgif gs-gif-tag :inline :offset 64) ;; was qword (hvdf-offset vector :inline :offset 80) (hmge-scale vector :inline :offset 96) (invh-scale vector :inline :offset 112) @@ -15552,7 +15591,7 @@ (define-extern clear-tr-stat (function tr-stat none)) (define-extern print-terrain-stats (function none)) (define-extern update-subdivide-settings! (function subdivide-settings math-camera int none)) -(define-extern set-tfrag-dists! (function (pointer float) none)) +(define-extern set-tfrag-dists! (function tfrag-dists none)) (define-extern start-perf-stat-collection (function none)) (define-extern end-perf-stat-collection (function none)) (define-extern print-perf-stats (function none)) @@ -16498,7 +16537,7 @@ (define-extern collide-cache-using-line-sphere-test (function vector symbol)) (define-extern collide-cache-using-y-probe-test (function vector symbol)) (define-extern collide-cache-using-box-test (function vector symbol)) -(define-extern draw-node-cull function) +(define-extern draw-node-cull (function pointer pointer (inline-array draw-node) int none)) ;; ---------------------- @@ -16562,24 +16601,24 @@ ;; - Functions -(define-extern add-tfrag-mtx-0 function) -(define-extern add-tfrag-mtx-1 function) -(define-extern add-tfrag-data function) -(define-extern tfrag-data-setup function) -(define-extern tfrag-print-stats function) -(define-extern tfrag-init-buffer function) -(define-extern tfrag-end-buffer function) -(define-extern draw-inline-array-tfrag function) -(define-extern tfrag-near-init-buffer function) -(define-extern tfrag-near-end-buffer function) -(define-extern draw-inline-array-tfrag-near function) -(define-extern stats-tfrag-asm function) +(define-extern add-tfrag-mtx-0 (function dma-buffer none)) +(define-extern add-tfrag-mtx-1 (function dma-buffer none)) +(define-extern add-tfrag-data (function dma-buffer int none)) +(define-extern tfrag-data-setup (function tfrag-data int none)) +(define-extern tfrag-print-stats (function symbol none)) +(define-extern tfrag-init-buffer (function dma-buffer gs-test int none)) +(define-extern tfrag-end-buffer (function dma-buffer none)) +(define-extern draw-inline-array-tfrag (function pointer drawable-inline-array int dma-buffer none)) +(define-extern tfrag-near-init-buffer (function dma-buffer gs-test int none)) +(define-extern tfrag-near-end-buffer (function dma-buffer none)) +(define-extern draw-inline-array-tfrag-near (function pointer drawable-inline-array int dma-buffer none)) +(define-extern stats-tfrag-asm (function tfragment none)) ;; - Unknowns (define-extern tfrag-vu1-block vu-function) -;;(define-extern t-stat object) ;; unknown type -;;(define-extern *tfrag-display-stats* object) ;; unknown type +(define-extern t-stat tfrag-stats) +(define-extern *tfrag-display-stats* symbol) ;; ---------------------- @@ -16590,7 +16629,7 @@ ;; - Functions -(define-extern edge-debug-lines function) +(define-extern edge-debug-lines (function (array vector-array) none)) (define-extern vis-cull (function int symbol)) diff --git a/decompiler/config/jak1_ntsc_black_label/hacks.jsonc b/decompiler/config/jak1_ntsc_black_label/hacks.jsonc index d0ddaaf1ed..02e4489259 100644 --- a/decompiler/config/jak1_ntsc_black_label/hacks.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/hacks.jsonc @@ -502,7 +502,12 @@ 57, 58, 59, 60, 61, 62, 63, 64, 65, 66 ], "(method 11 sparticle-launch-control)": [27, 28, 35, 46, 48, 49, 77], - "upload-vis-bits": [0, 1, 2, 3, 4, 5, 6] + "upload-vis-bits": [0, 1, 2, 3, 4, 5, 6], + + "draw-drawable-tree-tfrag": [6, 8, 13, 15], + "draw-drawable-tree-trans-tfrag": [6, 8, 13, 15], + "draw-drawable-tree-dirt-tfrag": [6, 8, 13, 15], + "draw-drawable-tree-ice-tfrag": [6, 8, 13, 15] }, // Sometimes the game might use format strings that are fetched dynamically, @@ -545,7 +550,10 @@ "init-boundary-regs", "draw-boundary-polygon", "render-boundary-quad", - "render-boundary-tri" + "render-boundary-tri", + "draw-inline-array-tfrag", + "stats-tfrag-asm", + "time-of-day-interp-colors-scratch" ] } diff --git a/decompiler/config/jak1_ntsc_black_label/label_types.jsonc b/decompiler/config/jak1_ntsc_black_label/label_types.jsonc index 04154c37e7..ecef73b2a1 100644 --- a/decompiler/config/jak1_ntsc_black_label/label_types.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/label_types.jsonc @@ -1240,6 +1240,12 @@ ["L194", "attack-info"] ], + "tfrag": [ + ["L107", "vu-function"], + ["L106", "vector"], + ["L105", "vector"] + ], + // please do not add things after this entry! git is dumb. "object-file-that-doesnt-actually-exist-and-i-just-put-this-here-to-prevent-merge-conflicts-with-this-file": [] } diff --git a/decompiler/config/jak1_ntsc_black_label/type_casts.jsonc b/decompiler/config/jak1_ntsc_black_label/type_casts.jsonc index 544ca2deed..1fffc5cde6 100644 --- a/decompiler/config/jak1_ntsc_black_label/type_casts.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/type_casts.jsonc @@ -4877,5 +4877,123 @@ [9, "v1", "(pointer res-tag)"] ], + + "draw-drawable-tree-tfrag": [ + [[16,32], "v1", "drawable-inline-array-node"], + [18, "a0", "drawable-inline-array-node"], + [142, "v1", "terrain-context"], + [238, "v1", "terrain-context"], + [[38, 40], "v1", "drawable-inline-array-tfrag"], + [22, "a2", "terrain-context"], + [25, "a2", "terrain-context"], + [44, "a0", "terrain-context"], + [[128, 131], "v1", "dma-packet"], + [[224, 227], "v1", "dma-packet"] + ], + + "draw-drawable-tree-trans-tfrag": [ + [[16,32], "v1", "drawable-inline-array-node"], + [22, "a2", "terrain-context"], + [25, "a2", "terrain-context"], + [44, "a0", "terrain-context"], + [18, "a0", "drawable-inline-array-node"], + [134, "v1", "terrain-context"], + [230, "v1", "terrain-context"], + [[120, 123], "v1", "dma-packet"], + [[216, 219], "v1", "dma-packet"] + ], + + "draw-drawable-tree-dirt-tfrag": [ + [[16,32], "v1", "drawable-inline-array-node"], + [22, "a2", "terrain-context"], + [25, "a2", "terrain-context"], + [44, "a0", "terrain-context"], + [18, "a0", "drawable-inline-array-node"], + [134, "v1", "terrain-context"], + [230, "v1", "terrain-context"], + [[120, 123], "v1", "dma-packet"], + [[216, 219], "v1", "dma-packet"] + ], + + "draw-drawable-tree-ice-tfrag": [ + [[16,32], "v1", "drawable-inline-array-node"], + [22, "a2", "terrain-context"], + [25, "a2", "terrain-context"], + [44, "a0", "terrain-context"], + [18, "a0", "drawable-inline-array-node"], + [134, "v1", "terrain-context"], + [230, "v1", "terrain-context"], + [[120, 123], "v1", "dma-packet"], + [[216, 219], "v1", "dma-packet"] + ], + + "(method 10 drawable-tree-tfrag)": [ + [3, "a1", "terrain-context"] + ], + + "(method 10 drawable-tree-trans-tfrag)": [ + [3, "a1", "terrain-context"] + ], + + "(method 10 drawable-tree-dirt-tfrag)": [ + [3, "a1", "terrain-context"] + ], + + "(method 10 drawable-tree-ice-tfrag)": [ + [3, "a1", "terrain-context"] + ], + + "(method 10 drawable-tree-lowres-tfrag)": [ + [3, "a1", "terrain-context"] + ], + + "(method 10 drawable-tree-lowres-trans-tfrag)": [ + [3, "a1", "terrain-context"] + ], + + "(method 15 drawable-tree-array)": [ + [11, "s5", "drawable-tree-array"] + ], + + "tfrag-near-end-buffer": [ + [[3, 7], "a1", "dma-packet"], + [[15,20], "a0", "(pointer vif-tag)"], + [[20, 24], "a0", "(pointer uint32)"], + [[25, 31], "a0", "(pointer vif-tag)"] + ], + + "tfrag-near-init-buffer": [ + [[11, 15], "a0", "dma-packet"], + [[21, 24], "a0", "gs-gif-tag"], + [30, "a0", "(pointer gs-reg64)"], + [[45, 50], "v1", "dma-packet"] + ], + + "tfrag-end-buffer": [ + [[3, 7], "a1", "dma-packet"], + [[13,20], "a0", "(pointer vif-tag)"], + [[20, 24], "a0", "(pointer uint32)"], + [[25, 31], "a0", "(pointer vif-tag)"] + ], + + "tfrag-init-buffer": [ + [[11, 15], "a0", "dma-packet"], + [[21, 24], "a0", "gs-gif-tag"], + [30, "a0", "(pointer gs-reg64)"], + [[45, 50], "v1", "dma-packet"] + ], + + "add-tfrag-data": [ + [[8, 16], "a0", "dma-packet"], + [[26, 30], "v1", "dma-packet"] + ], + + "add-tfrag-mtx-1": [ + [[8, 16], "a0", "dma-packet"] + ], + "add-tfrag-mtx-0": [ + [[8, 16], "a0", "dma-packet"] + ], + "placeholder-do-not-add-below": [] } diff --git a/decompiler/data/tpage.cpp b/decompiler/data/tpage.cpp index 0d69e3d83a..3030c63236 100644 --- a/decompiler/data/tpage.cpp +++ b/decompiler/data/tpage.cpp @@ -471,6 +471,7 @@ TPageResultStats process_tpage(ObjectFileData& data) { } stats.total_textures++; + stats.num_px += tex.w * tex.h; if (tex.psm == int(PSM::PSMT8) && tex.clutpsm == int(CPSM::PSMCT32)) { // this is the only supported texture format for now. diff --git a/decompiler/data/tpage.h b/decompiler/data/tpage.h index aeacb9b68d..aa657501d9 100644 --- a/decompiler/data/tpage.h +++ b/decompiler/data/tpage.h @@ -6,6 +6,7 @@ struct ObjectFileData; struct TPageResultStats { int total_textures = 0; int successful_textures = 0; + int num_px = 0; }; TPageResultStats process_tpage(ObjectFileData& data); diff --git a/docs/markdown/tfrag.md b/docs/markdown/tfrag.md new file mode 100644 index 0000000000..d08a470619 --- /dev/null +++ b/docs/markdown/tfrag.md @@ -0,0 +1,85 @@ +# Basic Process for Drawing + +The first main part just gets the "drawable trees" added to the list. +When the level loads, it gets added to the background engine in `level-status-set!`. When the background system runs, it executes this engine, which calls `draw` on the `bsp-header`. Eventually `draw` is called on the tfrag tree, and it gets added to the `*background-work*` list. + +The second main part builds DMA. +This happens from `finish-background`, called from `real-main-draw-hook`, called from the display loop in `main.gc`. +For each tree, it uplaods vis data, interpolates time-of-day colors, and runs `draw-drawable-tree-tfrag`. This sets up DMA buffers and eventually calls `draw-inline-array-tfrag`, a crazy asm function that builds the DMA lists and likely makes decisions about which LOD to draw. Within `draw-drawable-tree-tfrag`, there's also a call to `draw-inline-array-tfrag-near` that sets up DMA for the separate `near` renderer. + +The third main part runs on VU1 and actually draws. +The DMA chain is a bunch of UNPACKs, which load data to VU memory, with some MSCALs that start VU1 programs. There are a number of different routines. + +# Visibility +The visibility information is not computed yet, but I believe this is properly handled in `draw-inline-array-tfrag`. By modifying `upload-vis-bits` in `background.gc`, I can change what is drawn. There is one mystery that the tfrag "stats" change, but have totally wrong numbers. It almost seems like it is the correct stats for what is _behind_ the camera. + +As far as I can tell there are 2 or 3 types of culling: +- precomputed visibility from this VIS files. I believe this uses the bsp to figure out which string to load. This is done as part of the camera update and modifies the visibility strings. The vis strings aren't loaded, the bsp update isn't running, and the camera update doesn't run, so this doesn't work at all. +- frustum culling in `draw-node-cull`. The camera update sets the view frustum planes, and draw-node-cull iterates through the draw node tree (up to 8 at a time, using the fancy MMI instructions). The camera update doesn't run, so those planes aren't set. Also, the draw-node-cull function isn't ported yet. +- possibly additional culling in the `draw-inline-array-tfrag` function that builds dma list. It at least looks at the bspheres. +- clipping in the VU1 program. This _appears_ to work correctly in my port, but doesn't fully work in the PS2 version - if you turn on "fix frustum", you get garbage. But this might be a part of tfrag I don't have yet. + +# Time of Day +There's a function `time-of-day-interp-colors-scratch` that computes the time-of-day lighting coloring. The 8 w components of times are the multiplier for the 8 precomputed lighting maps. + +# Different TFrag Renderers? Not really +There are different types of drawable trees that are all tfrag: +- `drawable-tree-tfrag` +- `drawable-tree-trans-tfrag` +- `drawable-tree-dirt-tfrag` +- `drawable-tree-ice-tfrag` +- `drawable-tree-lowres-tfrag` +- `drawable-tree-lowres-trans-tfrag` +As far as I can tell, they are all drawn in almost exactly the same way. The different trees are just used: +- to give you an ordering. For example, everything in `drawable-tree-tfrag` is drawn before `drawable-tree-ice-tfrag`. This probably matters for transparent things. +- to have slightly different settings passed to the GS. As far as I can see the only differences are with ztest and alpha GS registers. +- to have different textures in VRAM. The `alpha` textures are uploaded after `drawable-tree-tfrag`. + +# Near vs Far +There are two separate tfrag renderers: near and far. They are separate VU1 programs. The tfragments themselves are not specialized per renderer, but the DMA lists might be. + +I am not sure, but I believe that both near and far versions are capable of rendering the full-detail mesh, and the main purpose of "near" is to perform scissoring on the VUs. I believe it can detect triangles that intersect the edge of the screen and divide them into multiple triangles, throwing away the parts that are off screen. The "far" version can't do this. + + +# Different VU1 subroutines +The "far" render has many subroutines. Currently I have only ported "program 6" and with a modification to remove about half the code. Internally, on VU1, tfrag double buffers GIF data. One packed will be getting "XGKICK"ed while another is being built. They have a pretty complicated system for switching this buffer (it's not synced to the XTOP UNPACK double buffering stuff, like it was on sprite), but I removed this feature and could get of half the packet building code, which is mostly a duplicate. + +This "program 6" is not capable of interpolating the mesh and just draws at one level of detail. But it is capable of drawing at _different_ levels of detail (at least 2). I suspect there are other programs for interpolating the mesh and doing color computations. + +Annoyingly, even tfragments that are at a single LOD, but border tfragments of a different LOD must have a special interpolation applied to them. + +There is at least one other "drawing" program. I tried using program 6 when the game wanted other programs, and surprisingly it drew more stuff. But every now and then, things go wrong and it generates garbage data when running like this. + +Example sequence (from start of frame): `[8, 10, 10, 10, 6, 6, 6...]`. + +Program list: + +- Program 0: init globals (TFrag.cpp handles this) +- Program 2: reset the value of VF04 (nothing uses this?), but only 1 instruction long. +- Program 4: unported drawing program (L112 transform, L79 kick) (unused?) +- Program 6: ported drawing program (L127 transform, L122 kick) (most common) +- Program 8: runs sub L12, L26, L48, L102 (2nd most common) +- Program 10: runs sub L12, L18, L102 +- Program 12: same as program 6 (ported) +- Program 14: runs sub L12, L18, L25, L47, L102 +- Program 16: runs sub L13, L17, L102 +- Program 18: runs sub L13, L17, L84 +- Program 20: same as program 6 (ported) +- Program 22: same as program 2 (unused?) +- Program 24: same as program 2 (unused?) + +Sub List: +- L12/L13 (jal) + - very short, no drawing. looks like transformation + int to float. Has the camera matrix. + + +- L17/L18 (jal) + - also transformations with cam matrix. Looks at the subdivide. Likely part of mesh interp. Has two versions for buffer (2nd starts at L22) + +- L25/L26 (jal) + - also transformations. L37 is 2nd buffer version + +- L47 +- L48 +- L84 +- L102 diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index 3cc90b919e..5506a959af 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -66,6 +66,8 @@ set(RUNTIME_SOURCE mips2c/functions/sparticle_launcher.cpp mips2c/functions/test_func.cpp mips2c/functions/texture.cpp + mips2c/functions/tfrag.cpp + mips2c/functions/time_of_day.cpp overlord/dma.cpp overlord/fake_iso.cpp overlord/iso.cpp @@ -86,12 +88,16 @@ set(RUNTIME_SOURCE graphics/opengl_renderer/BucketRenderer.cpp graphics/opengl_renderer/debug_gui.cpp graphics/opengl_renderer/DirectRenderer.cpp + graphics/opengl_renderer/dma_helpers.cpp graphics/opengl_renderer/OpenGLRenderer.cpp graphics/opengl_renderer/Profiler.cpp graphics/opengl_renderer/Shader.cpp graphics/opengl_renderer/SkyRenderer.cpp graphics/opengl_renderer/SpriteRenderer.cpp graphics/opengl_renderer/TextureUploadHandler.cpp + graphics/opengl_renderer/tfrag/program6_cpu.cpp + graphics/opengl_renderer/tfrag/tfrag_unpack.cpp + graphics/opengl_renderer/tfrag/TFragment.cpp graphics/texture/TextureConverter.cpp graphics/texture/TexturePool.cpp graphics/pipelines/opengl.cpp diff --git a/game/fake_iso.txt b/game/fake_iso.txt index c7f453e868..6320074560 100644 --- a/game/fake_iso.txt +++ b/game/fake_iso.txt @@ -23,4 +23,5 @@ VI3.DGO out/iso/VI3.DGO TRA.DGO out/iso/TRA.DGO FIN.DGO out/iso/FIN.DGO FIC.DGO out/iso/FIC.DGO -JUN.DGO out/iso/JUN.DGO \ No newline at end of file +JUN.DGO out/iso/JUN.DGO +MAI.DGO out/iso/MAI.DGO \ No newline at end of file diff --git a/game/graphics/opengl_renderer/BucketRenderer.cpp b/game/graphics/opengl_renderer/BucketRenderer.cpp index 7095785234..b065fb6d89 100644 --- a/game/graphics/opengl_renderer/BucketRenderer.cpp +++ b/game/graphics/opengl_renderer/BucketRenderer.cpp @@ -45,4 +45,14 @@ void EmptyBucketRenderer::render(DmaFollower& dma, // and we should now be in the next bucket! assert(dma.current_tag_offset() == render_state->next_bucket); +} + +SkipRenderer::SkipRenderer(const std::string& name, BucketId my_id) : BucketRenderer(name, my_id) {} + +void SkipRenderer::render(DmaFollower& dma, + SharedRenderState* render_state, + ScopedProfilerNode& /*prof*/) { + while (dma.current_tag_offset() != render_state->next_bucket) { + dma.read_and_advance(); + } } \ No newline at end of file diff --git a/game/graphics/opengl_renderer/BucketRenderer.h b/game/graphics/opengl_renderer/BucketRenderer.h index 6af79c333d..1cebb4df00 100644 --- a/game/graphics/opengl_renderer/BucketRenderer.h +++ b/game/graphics/opengl_renderer/BucketRenderer.h @@ -15,13 +15,17 @@ enum class BucketId { BUCKET1 = 1, SKY_DRAW = 3, TFRAG_TEX_LEVEL0 = 5, + TFRAG_LEVEL0 = 6, TFRAG_TEX_LEVEL1 = 12, + TFRAG_LEVEL1 = 13, SHRUB_TEX_LEVEL0 = 19, SHRUB_TEX_LEVEL1 = 25, ALPHA_TEX_LEVEL0 = 31, - SKY_BLEND_LEVEL0 = 32, + TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0 = 32, + TFRAG_DIRT_LEVEL0 = 34, ALPHA_TEX_LEVEL1 = 38, - SKY_BLEND_LEVEL1 = 39, + TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1 = 39, + TFRAG_DIRT_LEVEL1 = 41, PRIS_TEX_LEVEL0 = 48, PRIS_TEX_LEVEL1 = 51, WATER_TEX_LEVEL0 = 57, @@ -83,4 +87,12 @@ class EmptyBucketRenderer : public BucketRenderer { void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; bool empty() const override { return true; } void draw_debug_window() override {} +}; + +class SkipRenderer : public BucketRenderer { + public: + SkipRenderer(const std::string& name, BucketId my_id); + void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; + bool empty() const override { return true; } + void draw_debug_window() override {} }; \ No newline at end of file diff --git a/game/graphics/opengl_renderer/DirectRenderer.cpp b/game/graphics/opengl_renderer/DirectRenderer.cpp index d6cb0019d7..cdc08f3476 100644 --- a/game/graphics/opengl_renderer/DirectRenderer.cpp +++ b/game/graphics/opengl_renderer/DirectRenderer.cpp @@ -78,9 +78,23 @@ void DirectRenderer::draw_debug_window() { ImGui::Checkbox("draw2", &m_sprite_mode.do_second_draw); } - ImGui::Text("Triangles: %d", m_triangles); + ImGui::Text("Triangles: %d", m_stats.triangles); ImGui::SameLine(); - ImGui::Text("Draws: %d", m_draw_calls); + ImGui::Text("Draws: %d", m_stats.draw_calls); + + ImGui::Text("Flush from state change:"); + ImGui::Text(" tex0: %d", m_stats.flush_from_tex_0); + ImGui::Text(" tex1: %d", m_stats.flush_from_tex_1); + ImGui::Text(" zbuf: %d", m_stats.flush_from_zbuf); + ImGui::Text(" test: %d", m_stats.flush_from_test); + ImGui::Text(" alph: %d", m_stats.flush_from_alpha); + ImGui::Text(" clmp: %d", m_stats.flush_from_clamp); + ImGui::Text(" prim: %d", m_stats.flush_from_prim); + ImGui::Text(" Total: %d/%d", + m_stats.flush_from_prim + m_stats.flush_from_clamp + m_stats.flush_from_alpha + + m_stats.flush_from_test + m_stats.flush_from_zbuf + m_stats.flush_from_tex_1 + + m_stats.flush_from_tex_0, + m_stats.draw_calls); } float u32_to_float(u32 in) { @@ -142,14 +156,14 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil // render! // update buffers: glBindBuffer(GL_ARRAY_BUFFER, m_ogl.vertex_buffer); - glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.verts.size() * sizeof(math::Vector), + glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.vert_count * sizeof(math::Vector), m_prim_buffer.verts.data()); glBindBuffer(GL_ARRAY_BUFFER, m_ogl.color_buffer); - glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.rgba_u8.size() * sizeof(math::Vector), + glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.vert_count * sizeof(math::Vector), m_prim_buffer.rgba_u8.data()); if (m_prim_gl_state.texture_enable) { glBindBuffer(GL_ARRAY_BUFFER, m_ogl.st_buffer); - glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.stqs.size() * sizeof(math::Vector), + glBufferSubData(GL_ARRAY_BUFFER, 0, m_prim_buffer.vert_count * sizeof(math::Vector), m_prim_buffer.stqs.data()); } @@ -227,8 +241,8 @@ void DirectRenderer::flush_pending(SharedRenderState* render_state, ScopedProfil int n_tris = draw_count * (m_prim_buffer.vert_count / 3); prof.add_tri(n_tris); prof.add_draw_call(draw_count); - m_triangles += n_tris; - m_draw_calls += draw_count; + m_stats.triangles += n_tris; + m_stats.draw_calls += draw_count; m_prim_buffer.vert_count = 0; } @@ -242,9 +256,12 @@ void DirectRenderer::update_gl_prim(SharedRenderState* render_state) { case GsTest::AlphaTest::ALWAYS: break; case GsTest::AlphaTest::GEQUAL: - alpha_reject = m_test_state.aref / 128.f; + alpha_reject = m_test_state.aref / 127.f; + break; + case GsTest::AlphaTest::NEVER: break; default: + fmt::print("unknown alpha test: {}\n", (int)m_test_state.alpha_test); assert(false); } } @@ -267,7 +284,8 @@ void DirectRenderer::update_gl_prim(SharedRenderState* render_state) { "alpha_reject"), alpha_reject); } - update_gl_texture(render_state); + // update_gl_texture(render_state); + m_texture_state.needs_gl_update = true; } else { if (m_mode == Mode::SKY) { render_state->shaders[ShaderId::SKY].activate(); @@ -276,7 +294,7 @@ void DirectRenderer::update_gl_prim(SharedRenderState* render_state) { } } if (state.fogging_enable) { - assert(false); + // assert(false); } if (state.aa_enable) { assert(false); @@ -318,11 +336,15 @@ void DirectRenderer::update_gl_texture(SharedRenderState* render_state) { glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, tex->gpu_texture); // Note: CLAMP and CLAMP_TO_EDGE are different... - if (m_clamp_state.clamp) { + if (m_clamp_state.clamp_s) { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); } else { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + } + + if (m_clamp_state.clamp_t) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } else { glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); } @@ -340,25 +362,30 @@ void DirectRenderer::update_gl_texture(SharedRenderState* render_state) { void DirectRenderer::update_gl_blend() { const auto& state = m_blend_state; - if (state.a == GsAlpha::BlendMode::SOURCE && state.b == GsAlpha::BlendMode::DEST && - state.c == GsAlpha::BlendMode::SOURCE && state.d == GsAlpha::BlendMode::DEST) { - // (Cs - Cd) * As + Cd - // Cs * As + (1 - As) * Cd - glEnable(GL_BLEND); - // s, d - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - } else if (state.a == GsAlpha::BlendMode::SOURCE && - state.b == GsAlpha::BlendMode::ZERO_OR_FIXED && - state.c == GsAlpha::BlendMode::SOURCE && state.d == GsAlpha::BlendMode::DEST) { - // (Cs - 0) * As + Cd - // Cs * As + (1) * CD - glEnable(GL_BLEND); - // s, d - glBlendFunc(GL_SRC_ALPHA, GL_ONE); + if (!state.alpha_blend_enable) { + glDisable(GL_BLEND); } else { - lg::error("unsupported blend: a {} b {} c {} d {}\n", (int)state.a, (int)state.b, (int)state.c, - (int)state.d); - assert(false); + if (state.a == GsAlpha::BlendMode::SOURCE && state.b == GsAlpha::BlendMode::DEST && + state.c == GsAlpha::BlendMode::SOURCE && state.d == GsAlpha::BlendMode::DEST) { + // (Cs - Cd) * As + Cd + // Cs * As + (1 - As) * Cd + glEnable(GL_BLEND); + // s, d + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + } else if (state.a == GsAlpha::BlendMode::SOURCE && + state.b == GsAlpha::BlendMode::ZERO_OR_FIXED && + state.c == GsAlpha::BlendMode::SOURCE && state.d == GsAlpha::BlendMode::DEST) { + // (Cs - 0) * As + Cd + // Cs * As + (1) * CD + glEnable(GL_BLEND); + // s, d + glBlendFunc(GL_SRC_ALPHA, GL_ONE); + } else { + // unsupported blend: a 0 b 2 c 2 d 1 + lg::error("unsupported blend: a {} b {} c {} d {}\n", (int)state.a, (int)state.b, + (int)state.c, (int)state.d); + assert(false); + } } } @@ -476,15 +503,19 @@ void DirectRenderer::render_gif(const u8* data, u32 size, SharedRenderState* render_state, ScopedProfilerNode& prof) { - assert(size >= 16); + if (size != UINT32_MAX) { + assert(size >= 16); + } + bool eop = false; u32 offset = 0; while (!eop) { - assert(offset < size); + if (size != UINT32_MAX) { + assert(offset < size); + } GifTag tag(data + offset); offset += 16; - // fmt::print("Tag at offset {}: {}\n", offset, tag.print()); // unpack registers. // faster to do it once outside of the nloop loop. @@ -560,7 +591,9 @@ void DirectRenderer::render_gif(const u8* data, eop = tag.eop(); } - assert((offset + 15) / 16 == size / 16); + if (size != UINT32_MAX) { + assert((offset + 15) / 16 == size / 16); + } // fmt::print("{}\n", GifTag(data).print()); } @@ -612,6 +645,7 @@ void DirectRenderer::handle_ad(const u8* data, handle_tex0_1(value, render_state, prof); break; case GsRegisterAddress::MIPTBP1_1: + case GsRegisterAddress::MIPTBP2_1: // TODO this has the address of different mip levels. break; case GsRegisterAddress::TEXFLUSH: @@ -634,6 +668,7 @@ void DirectRenderer::handle_tex1_1(u64 val, if (want_tex_filt != m_texture_state.enable_tex_filt) { flush_pending(render_state, prof); + m_stats.flush_from_tex_1++; m_texture_state.enable_tex_filt = want_tex_filt; } @@ -659,12 +694,13 @@ void DirectRenderer::handle_tex0_1(u64 val, // update tbp if (m_texture_state.current_register != reg) { - // fmt::print("flush due to tex0\n"); flush_pending(render_state, prof); + m_stats.flush_from_tex_0++; m_texture_state.texture_base_ptr = reg.tbp0(); m_texture_state.using_mt4hh = reg.psm() == GsTex0::PSM::PSMT4HH; m_prim_gl_state_needs_gl_update = true; m_texture_state.current_register = reg; + if (m_texture_state.tcc != reg.tcc()) { m_texture_state.needs_gl_update = true; } @@ -721,9 +757,7 @@ void DirectRenderer::handle_xyzf2_packed(const u8* data, u8 f = (upper >> 36); bool adc = upper & (1ull << 47); - assert(!adc); - // assert(!f); - handle_xyzf2_common(x, y, z, f, render_state, prof); + handle_xyzf2_common(x, y, z, f, render_state, prof, !adc); } void DirectRenderer::handle_zbuf1(u64 val, @@ -739,7 +773,7 @@ void DirectRenderer::handle_zbuf1(u64 val, // assert(write); if (write != m_test_state.depth_writes) { - // fmt::print("flush due to depth write\n"); + m_stats.flush_from_zbuf++; flush_pending(render_state, prof); m_test_state_needs_gl_update = true; m_test_state.depth_writes = write; @@ -755,7 +789,7 @@ void DirectRenderer::handle_test1(u64 val, } assert(!reg.date()); if (m_test_state.current_register != reg) { - // fmt::print("flush due to test\n"); + m_stats.flush_from_test++; flush_pending(render_state, prof); m_test_state.from_register(reg); m_test_state_needs_gl_update = true; @@ -768,7 +802,7 @@ void DirectRenderer::handle_alpha1(u64 val, ScopedProfilerNode& prof) { GsAlpha reg(val); if (m_blend_state.current_register != reg) { - // fmt::print("flush due to alpha1\n"); + m_stats.flush_from_alpha++; flush_pending(render_state, prof); m_blend_state.from_register(reg); m_blend_state_needs_gl_update = true; @@ -782,15 +816,17 @@ void DirectRenderer::handle_pabe(u64 val) { void DirectRenderer::handle_clamp1(u64 val, SharedRenderState* render_state, ScopedProfilerNode& prof) { - assert(val == 0b101 || val == 0); + if (!(val == 0b101 || val == 0 || val == 1 || val == 0b100)) { + fmt::print("clamp: 0x{:x}\n", val); + assert(false); + } + if (m_clamp_state.current_register != val) { + m_stats.flush_from_clamp++; flush_pending(render_state, prof); m_clamp_state.current_register = val; - if (val == 0b101) { - m_clamp_state.clamp = true; - } else { - m_clamp_state.clamp = false; - } + m_clamp_state.clamp_s = val & 0b001; + m_clamp_state.clamp_t = val & 0b100; m_texture_state.needs_gl_update = true; } } @@ -818,7 +854,7 @@ void DirectRenderer::handle_prim(u64 val, GsPrim prim(val); if (m_prim_gl_state.current_register != prim || m_blend_state.alpha_blend_enable != prim.abe()) { - // fmt::print("flush due to prim\n"); + m_stats.flush_from_prim++; flush_pending(render_state, prof); m_prim_gl_state.from_register(prim); m_blend_state.alpha_blend_enable = prim.abe(); @@ -839,7 +875,8 @@ void DirectRenderer::handle_xyzf2_common(u32 x, u32 z, u8 f, SharedRenderState* render_state, - ScopedProfilerNode& prof) { + ScopedProfilerNode& prof, + bool advance) { assert(z < (1 << 24)); (void)f; // TODO: do something with this. if (m_prim_buffer.is_full()) { @@ -850,7 +887,9 @@ void DirectRenderer::handle_xyzf2_common(u32 x, m_prim_building.building_stq.at(m_prim_building.building_idx) = math::Vector( m_prim_building.st_reg.x(), m_prim_building.st_reg.y(), m_prim_building.Q); m_prim_building.building_rgba.at(m_prim_building.building_idx) = m_prim_building.rgba_reg; - m_prim_building.building_vert.at(m_prim_building.building_idx) = {x << 16, y << 16, z << 8}; + m_prim_building.building_vert.at(m_prim_building.building_idx) = + math::Vector{x << 16, y << 16, z << 8}; + m_prim_building.building_idx++; switch (m_prim_building.kind) { @@ -862,8 +901,8 @@ void DirectRenderer::handle_xyzf2_common(u32 x, auto& corner2_vert = m_prim_building.building_vert[1]; auto& corner2_rgba = m_prim_building.building_rgba[1]; // should use most recent vertex z. - math::Vector corner3_vert = {corner1_vert[0], corner2_vert[1], corner2_vert[2]}; - math::Vector corner4_vert = {corner2_vert[0], corner1_vert[1], corner2_vert[2]}; + math::Vector corner3_vert{corner1_vert[0], corner2_vert[1], corner2_vert[2]}; + math::Vector corner4_vert{corner2_vert[0], corner1_vert[1], corner2_vert[2]}; if (m_prim_gl_state.gouraud_enable) { // I'm not really sure what the GS does here. @@ -890,9 +929,11 @@ void DirectRenderer::handle_xyzf2_common(u32 x, m_prim_building.tri_strip_startup++; } if (m_prim_building.tri_strip_startup >= 3) { - for (int i = 0; i < 3; i++) { - m_prim_buffer.push(m_prim_building.building_rgba[i], m_prim_building.building_vert[i], - m_prim_building.building_stq[i]); + if (advance) { + for (int i = 0; i < 3; i++) { + m_prim_buffer.push(m_prim_building.building_rgba[i], m_prim_building.building_vert[i], + m_prim_building.building_stq[i]); + } } } @@ -928,7 +969,7 @@ void DirectRenderer::handle_xyzf2_common(u32 x, if (m_prim_building.building_idx == 2) { math::Vector pt0 = m_prim_building.building_vert[0].cast(); math::Vector pt1 = m_prim_building.building_vert[1].cast(); - auto normal = (pt1 - pt0).normalized().cross({0, 0, 1}); + auto normal = (pt1 - pt0).normalized().cross(math::Vector{0, 0, 1}); double line_width = (1 << 19); // debug_print_vtx(m_prim_building.building_vert[0]); @@ -966,7 +1007,7 @@ void DirectRenderer::handle_xyzf2(u64 val, u32 z = (val >> 32) & 0xffffff; u32 f = (val >> 56) & 0xff; - handle_xyzf2_common(x, y, z, f, render_state, prof); + handle_xyzf2_common(x, y, z, f, render_state, prof, true); } void DirectRenderer::reset_state() { @@ -983,8 +1024,7 @@ void DirectRenderer::reset_state() { m_prim_building = PrimBuildState(); - m_triangles = 0; - m_draw_calls = 0; + m_stats = {}; } void DirectRenderer::TestState::from_register(GsTest reg) { diff --git a/game/graphics/opengl_renderer/DirectRenderer.h b/game/graphics/opengl_renderer/DirectRenderer.h index a1a0a6d412..d1bb6d6750 100644 --- a/game/graphics/opengl_renderer/DirectRenderer.h +++ b/game/graphics/opengl_renderer/DirectRenderer.h @@ -92,7 +92,8 @@ class DirectRenderer : public BucketRenderer { u32 z, u8 f, SharedRenderState* render_state, - ScopedProfilerNode& prof); + ScopedProfilerNode& prof, + bool advance); void update_gl_prim(SharedRenderState* render_state); void update_gl_blend(); @@ -133,7 +134,8 @@ class DirectRenderer : public BucketRenderer { struct ClampState { void from_register(u64 value); u64 current_register = 0b101; - bool clamp = true; + bool clamp_s = true; + bool clamp_t = true; } m_clamp_state; // state set through the prim register that requires changing GL stuff. @@ -164,7 +166,7 @@ class DirectRenderer : public BucketRenderer { // state set through the prim/rgbaq register that doesn't require changing GL stuff struct PrimBuildState { GsPrim::Kind kind = GsPrim::Kind::PRIM_7; - math::Vector rgba_reg = {0, 0, 0, 0}; + math::Vector rgba_reg = math::Vector{0, 0, 0, 0}; math::Vector st_reg; std::array, 3> building_rgba; @@ -207,8 +209,18 @@ class DirectRenderer : public BucketRenderer { bool always_draw = false; } m_debug_state; - int m_triangles = 0; - int m_draw_calls = 0; + struct { + int triangles = 0; + int draw_calls = 0; + + int flush_from_tex_0 = 0; + int flush_from_tex_1 = 0; + int flush_from_zbuf = 0; + int flush_from_test = 0; + int flush_from_alpha = 0; + int flush_from_clamp = 0; + int flush_from_prim = 0; + } m_stats; bool m_prim_gl_state_needs_gl_update = true; bool m_test_state_needs_gl_update = true; diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.cpp b/game/graphics/opengl_renderer/OpenGLRenderer.cpp index cbe9e036a2..571dcc4d48 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.cpp +++ b/game/graphics/opengl_renderer/OpenGLRenderer.cpp @@ -8,6 +8,7 @@ #include "third-party/imgui/imgui.h" #include "common/util/FileUtil.h" #include "game/graphics/opengl_renderer/SkyRenderer.h" +#include "game/graphics/opengl_renderer/tfrag/TFragment.h" // for the vif callback #include "game/kernel/kmachine.h" @@ -58,26 +59,35 @@ OpenGLRenderer::OpenGLRenderer(std::shared_ptr texture_pool) * Construct bucket renderers. We can specify different renderers for different buckets */ void OpenGLRenderer::init_bucket_renderers() { + // temp + init_bucket_renderer("bucket0", BucketId::BUCKET0); init_bucket_renderer("sky", BucketId::SKY_DRAW); + init_bucket_renderer("tfrag-tex-0", BucketId::TFRAG_TEX_LEVEL0); + init_bucket_renderer("tfrag-0", BucketId::TFRAG_LEVEL0, false); init_bucket_renderer("tfrag-tex-1", BucketId::TFRAG_TEX_LEVEL1); + init_bucket_renderer("tfrag-1", BucketId::TFRAG_LEVEL1, false); init_bucket_renderer("shrub-tex-0", BucketId::SHRUB_TEX_LEVEL0); init_bucket_renderer("shrub-tex-1", BucketId::SHRUB_TEX_LEVEL1); init_bucket_renderer("alpha-tex-0", BucketId::ALPHA_TEX_LEVEL0); init_bucket_renderer("alpha-tex-1", BucketId::ALPHA_TEX_LEVEL1); auto sky_blender = std::make_shared(); - init_bucket_renderer("sky-blend-0", BucketId::SKY_BLEND_LEVEL0, sky_blender); - init_bucket_renderer("sky-blend-1", BucketId::SKY_BLEND_LEVEL1, sky_blender); + init_bucket_renderer("sky-blend-and-tfrag-trans-0", + BucketId::TFRAG_TRANS0_AND_SKY_BLEND_LEVEL0, sky_blender); + init_bucket_renderer("tfrag-dirt-0", BucketId::TFRAG_DIRT_LEVEL0, false); + init_bucket_renderer("sky-blend-and-tfrag-trans-1", + BucketId::TFRAG_TRANS1_AND_SKY_BLEND_LEVEL1, sky_blender); + init_bucket_renderer("tfrag-dirt-1", BucketId::TFRAG_DIRT_LEVEL1, false); init_bucket_renderer("pris-tex-0", BucketId::PRIS_TEX_LEVEL0); init_bucket_renderer("pris-tex-1", BucketId::PRIS_TEX_LEVEL1); init_bucket_renderer("water-tex-0", BucketId::WATER_TEX_LEVEL0); init_bucket_renderer("water-tex-1", BucketId::WATER_TEX_LEVEL1); init_bucket_renderer("pre-sprite-tex", BucketId::PRE_SPRITE_TEX); init_bucket_renderer("sprite", BucketId::SPRITE); - init_bucket_renderer("debug-draw-0", BucketId::DEBUG_DRAW_0, 102, + init_bucket_renderer("debug-draw-0", BucketId::DEBUG_DRAW_0, 1024, DirectRenderer::Mode::NORMAL); - init_bucket_renderer("debug-draw-1", BucketId::DEBUG_DRAW_1, 102, + init_bucket_renderer("debug-draw-1", BucketId::DEBUG_DRAW_1, 1024, DirectRenderer::Mode::NORMAL); // for now, for any unset renderers, just set them to an EmptyBucketRenderer. diff --git a/game/graphics/opengl_renderer/SkyRenderer.cpp b/game/graphics/opengl_renderer/SkyRenderer.cpp index f53a1fe1ff..e9a01449b1 100644 --- a/game/graphics/opengl_renderer/SkyRenderer.cpp +++ b/game/graphics/opengl_renderer/SkyRenderer.cpp @@ -241,7 +241,9 @@ SkyBlender::Stats SkyBlender::do_sky_blends(DmaFollower& dma, SkyBlendHandler::SkyBlendHandler(const std::string& name, BucketId my_id, std::shared_ptr shared_blender) - : BucketRenderer(name, my_id), m_shared_blender(shared_blender) {} + : BucketRenderer(name, my_id), + m_shared_blender(shared_blender), + m_tfrag_renderer(fmt::format("tfrag-{}", name), my_id, true) {} void SkyBlendHandler::handle_sky_copies(DmaFollower& dma, SharedRenderState* render_state, @@ -293,19 +295,29 @@ void SkyBlendHandler::render(DmaFollower& dma, assert(empty.vif0() == 0); assert(empty.vif1() == 0); - assert(dma.current_tag().kind == DmaTag::Kind::CALL); - dma.read_and_advance(); - dma.read_and_advance(); // cnt - assert(dma.current_tag().kind == DmaTag::Kind::RET); - dma.read_and_advance(); // ret - dma.read_and_advance(); // ret - assert(dma.current_tag_offset() == render_state->next_bucket); + if (dma.current_tag().kind != DmaTag::Kind::CALL) { + auto tfrag_prof = prof.make_scoped_child("tfrag-trans"); + m_tfrag_renderer.render(dma, render_state, tfrag_prof); + } else { + assert(dma.current_tag().kind == DmaTag::Kind::CALL); + dma.read_and_advance(); + dma.read_and_advance(); // cnt + assert(dma.current_tag().kind == DmaTag::Kind::RET); + dma.read_and_advance(); // ret + dma.read_and_advance(); // ret + assert(dma.current_tag_offset() == render_state->next_bucket); + } } void SkyBlendHandler::draw_debug_window() { ImGui::Separator(); ImGui::Text("Draw/Blend ( sky ): %d/%d", m_stats.sky_draws, m_stats.sky_blends); ImGui::Text("Draw/Blend (cloud): %d/%d", m_stats.cloud_draws, m_stats.cloud_blends); + + if (ImGui::TreeNode("tfrag")) { + m_tfrag_renderer.draw_debug_window(); + ImGui::TreePop(); + } } SkyRenderer::SkyRenderer(const std::string& name, BucketId my_id) diff --git a/game/graphics/opengl_renderer/SkyRenderer.h b/game/graphics/opengl_renderer/SkyRenderer.h index c8fce2c291..c120dff36d 100644 --- a/game/graphics/opengl_renderer/SkyRenderer.h +++ b/game/graphics/opengl_renderer/SkyRenderer.h @@ -2,6 +2,7 @@ #pragma once #include "game/graphics/opengl_renderer/BucketRenderer.h" #include "game/graphics/opengl_renderer/DirectRenderer.h" +#include "game/graphics/opengl_renderer/tfrag/TFragment.h" class SkyBlender { public: @@ -51,6 +52,7 @@ class SkyBlendHandler : public BucketRenderer { std::shared_ptr m_shared_blender; SkyBlender::Stats m_stats; + TFragment m_tfrag_renderer; }; /*! diff --git a/game/graphics/opengl_renderer/SpriteRenderer.cpp b/game/graphics/opengl_renderer/SpriteRenderer.cpp index 6bf5d9db73..f9d51de7b2 100644 --- a/game/graphics/opengl_renderer/SpriteRenderer.cpp +++ b/game/graphics/opengl_renderer/SpriteRenderer.cpp @@ -1,114 +1,9 @@ #include "third-party/fmt/core.h" #include "third-party/imgui/imgui.h" #include "SpriteRenderer.h" +#include "game/graphics/opengl_renderer/dma_helpers.h" namespace { -/*! - * Make sure that the DMA Transfer is a VIF unpack (copy data to VIF memory) with the given - * setup. This is for a transfer with STCYCL followed by UNPACK. - */ -bool verify_unpack_with_stcycl(const DmaTransfer& transfer, - VifCode::Kind unpack_kind, - u16 cl, - u16 wl, - u32 qwc, - u32 addr, - bool usn, - bool flg) { - if (transfer.size_bytes != qwc * 16) { - fmt::print("verify_unpack: bad size {} vs {}\n", transfer.size_bytes, qwc * 16); - return false; - } - - if (transfer.vifcode0().kind != VifCode::Kind::STCYCL) { - fmt::print("verify_unpack: bad vifcode 0\n"); - return false; - } - - if (transfer.vifcode1().kind != unpack_kind) { - fmt::print("verify_unpack: bad vifcode 1\n"); - return false; - } - - VifCodeStcycl stcycl(transfer.vifcode0()); - VifCodeUnpack unpack(transfer.vifcode1()); - - if (stcycl.cl != cl || stcycl.wl != wl) { - fmt::print("verify_unpack: bad cl/wl {}/{} vs {}/{}\n", stcycl.cl, stcycl.wl, cl, wl); - return false; - } - - if (unpack.addr_qw != addr || unpack.use_tops_flag != flg || unpack.is_unsigned != usn) { - fmt::print("verify_unpack: bad unpack {}/{}/{} vs {}/{}/{}", unpack.addr_qw, - unpack.use_tops_flag, unpack.is_unsigned, addr, flg, usn); - return false; - } - - if (transfer.vifcode1().num != qwc) { - fmt::print("verify_unpack: bad num {} vs {}\n", transfer.vifcode1().num, qwc); - return false; - } - - return true; -} - -/*! - * Make sure that the DMA transfer is a VIF unpack with the given setup. - * This is for when there's just an UNPACK. - */ -bool verify_unpack_no_stcycl(const DmaTransfer& transfer, - VifCode::Kind unpack_kind, - u32 qwc, - u32 addr, - bool usn, - bool flg) { - if (transfer.size_bytes != qwc * 16) { - fmt::print("verify_unpack: bad size {} vs {}\n", transfer.size_bytes, qwc * 16); - return false; - } - - if (transfer.vifcode0().kind != VifCode::Kind::NOP) { - fmt::print("verify_unpack: bad vifcode 0\n"); - return false; - } - - if (transfer.vifcode1().kind != unpack_kind) { - fmt::print("verify_unpack: bad vifcode 1\n"); - return false; - } - - VifCodeUnpack unpack(transfer.vifcode1()); - - if (unpack.addr_qw != addr || unpack.use_tops_flag != flg || unpack.is_unsigned != usn) { - fmt::print("verify_unpack: bad unpack {}/{}/{} vs {}/{}/{}", unpack.addr_qw, - unpack.use_tops_flag, unpack.is_unsigned, addr, flg, usn); - return false; - } - - if (transfer.vifcode1().num != qwc) { - fmt::print("verify_unpack: bad num {} vs {}\n", transfer.vifcode1().num, qwc); - return false; - } - - return true; -} - -/*! - * Verify the DMA transfer is a VIF unpack (with no STCYCL tag). - * Then, unpack the data to dst. - */ -void unpack_to_no_stcycl(void* dst, - const DmaTransfer& transfer, - VifCode::Kind unpack_kind, - u32 size_bytes, - u32 addr, - bool usn, - bool flg) { - bool ok = verify_unpack_no_stcycl(transfer, unpack_kind, size_bytes / 16, addr, usn, flg); - assert(ok); - assert((size_bytes & 0xf) == 0); - memcpy(dst, transfer.data, size_bytes); -} /*! * Does the next DMA transfer look like it could be the start of a 2D group? diff --git a/game/graphics/opengl_renderer/TextureUploadHandler.cpp b/game/graphics/opengl_renderer/TextureUploadHandler.cpp index 3ab2c73fc2..f81f9e9f65 100644 --- a/game/graphics/opengl_renderer/TextureUploadHandler.cpp +++ b/game/graphics/opengl_renderer/TextureUploadHandler.cpp @@ -96,7 +96,18 @@ void TextureUploadHandler::render(DmaFollower& dma, assert(ok); } - } else if (uploads.empty()) { + } else if (uploads.size() == 1 && uploads[0].mode == 0) { + bool has_segment[3] = {true, true, true}; + if (!try_to_populate_from_cache(uploads[0].page, has_segment, render_state)) { + populate_cache(render_state->texture_pool->convert_textures( + ee_mem + uploads[0].page, 0, ee_mem, render_state->offset_of_s7), + render_state); + bool ok = try_to_populate_from_cache(uploads[0].page, has_segment, render_state); + assert(ok); + } + } + + else if (uploads.empty()) { // do nothing. } else { fmt::print("unhandled upload sequence in {}:\n", m_name); diff --git a/game/graphics/opengl_renderer/dma_helpers.cpp b/game/graphics/opengl_renderer/dma_helpers.cpp new file mode 100644 index 0000000000..2869a8b474 --- /dev/null +++ b/game/graphics/opengl_renderer/dma_helpers.cpp @@ -0,0 +1,137 @@ +#include "dma_helpers.h" + +#include "third-party/fmt/format.h" + +/*! + * Make sure that the DMA Transfer is a VIF unpack (copy data to VIF memory) with the given + * setup. This is for a transfer with STCYCL followed by UNPACK. + */ +bool verify_unpack_with_stcycl(const DmaTransfer& transfer, + VifCode::Kind unpack_kind, + u16 cl, + u16 wl, + u32 qwc, + u32 addr, + bool usn, + bool flg) { + if (transfer.size_bytes != qwc * 16) { + fmt::print("verify_unpack: bad size {} vs {}\n", transfer.size_bytes, qwc * 16); + return false; + } + + if (transfer.vifcode0().kind != VifCode::Kind::STCYCL) { + fmt::print("verify_unpack: bad vifcode 0\n"); + return false; + } + + if (transfer.vifcode1().kind != unpack_kind) { + fmt::print("verify_unpack: bad vifcode 1\n"); + return false; + } + + VifCodeStcycl stcycl(transfer.vifcode0()); + VifCodeUnpack unpack(transfer.vifcode1()); + + if (stcycl.cl != cl || stcycl.wl != wl) { + fmt::print("verify_unpack: bad cl/wl {}/{} vs {}/{}\n", stcycl.cl, stcycl.wl, cl, wl); + return false; + } + + if (unpack.addr_qw != addr || unpack.use_tops_flag != flg || unpack.is_unsigned != usn) { + fmt::print("verify_unpack: bad unpack {}/{}/{} vs {}/{}/{}", unpack.addr_qw, + unpack.use_tops_flag, unpack.is_unsigned, addr, flg, usn); + return false; + } + + if (transfer.vifcode1().num != qwc) { + fmt::print("verify_unpack: bad num {} vs {}\n", transfer.vifcode1().num, qwc); + return false; + } + + return true; +} + +/*! + * Verify the DMA transfer is a VIF unpack (with STCYCL tag). + * Then, unpack the data to dst. + */ +void unpack_to_stcycl(void* dst, + const DmaTransfer& transfer, + VifCode::Kind unpack_kind, + u16 cl, + u16 wl, + u32 size_bytes, + u32 addr, + bool usn, + bool flg) { + bool ok = + verify_unpack_with_stcycl(transfer, unpack_kind, cl, wl, size_bytes / 16, addr, usn, flg); + assert(ok); + assert((size_bytes & 0xf) == 0); + memcpy(dst, transfer.data, size_bytes); +} + +/*! + * Make sure that the DMA transfer is a VIF unpack with the given setup. + * This is for when there's just an UNPACK. + */ +bool verify_unpack_no_stcycl(const DmaTransfer& transfer, + VifCode::Kind unpack_kind, + u32 qwc, + u32 addr, + bool usn, + bool flg) { + if (transfer.size_bytes != qwc * 16) { + fmt::print("verify_unpack: bad size {} vs {}\n", transfer.size_bytes, qwc * 16); + return false; + } + + if (transfer.vifcode0().kind != VifCode::Kind::NOP) { + fmt::print("verify_unpack: bad vifcode 0\n"); + return false; + } + + if (transfer.vifcode1().kind != unpack_kind) { + fmt::print("verify_unpack: bad vifcode 1\n"); + return false; + } + + VifCodeUnpack unpack(transfer.vifcode1()); + + if (unpack.addr_qw != addr || unpack.use_tops_flag != flg || unpack.is_unsigned != usn) { + fmt::print("verify_unpack: bad unpack {}/{}/{} vs {}/{}/{}", unpack.addr_qw, + unpack.use_tops_flag, unpack.is_unsigned, addr, flg, usn); + return false; + } + + if (transfer.vifcode1().num != qwc) { + fmt::print("verify_unpack: bad num {} vs {}\n", transfer.vifcode1().num, qwc); + return false; + } + + return true; +} + +/*! + * Verify the DMA transfer is a VIF unpack (with no STCYCL tag). + * Then, unpack the data to dst. + */ +void unpack_to_no_stcycl(void* dst, + const DmaTransfer& transfer, + VifCode::Kind unpack_kind, + u32 size_bytes, + u32 addr, + bool usn, + bool flg) { + bool ok = verify_unpack_no_stcycl(transfer, unpack_kind, size_bytes / 16, addr, usn, flg); + assert(ok); + assert((size_bytes & 0xf) == 0); + memcpy(dst, transfer.data, size_bytes); +} + +void verify_mscal(const DmaTransfer& transfer, int address) { + assert(transfer.size_bytes == 0); + assert(transfer.vif0() == 0); + assert(transfer.vifcode1().kind == VifCode::Kind::MSCAL); + assert(transfer.vifcode1().immediate == address); +} \ No newline at end of file diff --git a/game/graphics/opengl_renderer/dma_helpers.h b/game/graphics/opengl_renderer/dma_helpers.h new file mode 100644 index 0000000000..1130a0819c --- /dev/null +++ b/game/graphics/opengl_renderer/dma_helpers.h @@ -0,0 +1,51 @@ +#pragma once + +#include "common/dma/dma_chain_read.h" + +bool verify_unpack_with_stcycl(const DmaTransfer& transfer, + VifCode::Kind unpack_kind, + u16 cl, + u16 wl, + u32 qwc, + u32 addr, + bool usn, + bool flg); + +/*! + * Make sure that the DMA transfer is a VIF unpack with the given setup. + * This is for when there's just an UNPACK. + */ +bool verify_unpack_no_stcycl(const DmaTransfer& transfer, + VifCode::Kind unpack_kind, + u32 qwc, + u32 addr, + bool usn, + bool flg); + +/*! + * Verify the DMA transfer is a VIF unpack (with no STCYCL tag). + * Then, unpack the data to dst. + */ +void unpack_to_no_stcycl(void* dst, + const DmaTransfer& transfer, + VifCode::Kind unpack_kind, + u32 size_bytes, + u32 addr, + bool usn, + bool flg); + +/*! + * Verify the DMA transfer is a VIF unpack (with STCYCL tag). + * Then, unpack the data to dst. + */ +void unpack_to_stcycl(void* dst, + const DmaTransfer& transfer, + VifCode::Kind unpack_kind, + u16 cl, + u16 wl, + u32 size_bytes, + u32 addr, + bool usn, + bool flg); + +void verify_mscal(const DmaTransfer& transfer, int address); \ No newline at end of file diff --git a/game/graphics/opengl_renderer/tfrag/TFragment.cpp b/game/graphics/opengl_renderer/tfrag/TFragment.cpp new file mode 100644 index 0000000000..419ed750d4 --- /dev/null +++ b/game/graphics/opengl_renderer/tfrag/TFragment.cpp @@ -0,0 +1,431 @@ +#include "TFragment.h" + +#include "third-party/imgui/imgui.h" +#include "game/graphics/opengl_renderer/dma_helpers.h" + +namespace { +bool looks_like_tfragment_dma(const DmaFollower& follow) { + return follow.current_tag_vifcode0().kind == VifCode::Kind::STCYCL; +} + +bool looks_like_tfrag_init(const DmaFollower& follow) { + return follow.current_tag_vifcode0().kind == VifCode::Kind::NOP && + follow.current_tag_vifcode1().kind == VifCode::Kind::DIRECT && + follow.current_tag_vifcode1().immediate == 2; +} +} // namespace + +TFragment::TFragment(const std::string& name, BucketId my_id, bool child_mode) + : BucketRenderer(name, my_id), + m_child_mode(child_mode), + m_direct_renderer(fmt::format("{}.direct", name), my_id, 1024, DirectRenderer::Mode::NORMAL) { + for (auto& buf : m_buffered_data) { + for (auto& x : buf.pad) { + x = 0xff; + } + } + + for (auto& x : m_kick_data.pad) { + x = 0; + } +} + +void TFragment::render(DmaFollower& dma, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + m_debug_string.clear(); + m_frag_debug.clear(); + m_direct_renderer.reset_state(); + m_stats = {}; + + if (!m_enabled) { + while (dma.current_tag_offset() != render_state->next_bucket) { + dma.read_and_advance(); + } + return; + } + + // First thing should be a NEXT with two nops. + // unless we are a child, in which case our parent took this already. + if (!m_child_mode) { + auto data0 = dma.read_and_advance(); + assert(data0.vif1() == 0); + assert(data0.vif0() == 0); + assert(data0.size_bytes == 0); + } + + if (dma.current_tag().kind == DmaTag::Kind::CALL) { + // renderer didn't run, let's just get out of here. + for (int i = 0; i < 4; i++) { + dma.read_and_advance(); + } + assert(dma.current_tag_offset() == render_state->next_bucket); + return; + } + + if (m_extra_debug) { + ImGui::Begin(fmt::format("{} extra", m_name).c_str()); + } + + while (looks_like_tfrag_init(dma)) { + m_debug_string += "------------- START!\n"; + handle_initialization(dma, render_state, prof); + int count = 0; + // fmt::print("---------------------------------------START\n"); + + while (looks_like_tfragment_dma(dma)) { + m_stats.tfrag_dma_packets++; + auto frag = dma.read_and_advance(); + m_stats.tfrag_bytes += frag.size_bytes; + + if (m_extra_debug) { + handle_tfrag(frag, render_state, prof); + } else { + handle_tfrag(frag, render_state, prof); + } + if (m_max_draw >= 0 && count++ > m_max_draw) { + break; + } + } + + if (dma.current_tag().qwc == 3) { + dma.read_and_advance(); + } + if (dma.current_tag().qwc == 0) { + dma.read_and_advance(); + } + } + + if (m_extra_debug) { + ImGui::End(); + } + + m_debug_string += fmt::format("fail: {}\n", dma.current_tag().print()); + m_direct_renderer.flush_pending(render_state, prof); + + while (dma.current_tag_offset() != render_state->next_bucket) { + auto tag = dma.current_tag().print(); + auto data = dma.read_and_advance(); + m_debug_string += + fmt::format("DMA {} {} bytes, {}\n", tag, data.size_bytes, data.vifcode0().print()); + } +} +void TFragment::draw_debug_window() { + ImGui::Separator(); + ImGui::Checkbox("Extra Debug", &m_extra_debug); + ImGui::InputInt("Max Draw", &m_max_draw); + ImGui::SameLine(); + if (ImGui::Button("All")) { + m_max_draw = -1; + } + ImGui::Checkbox("Skip MSCAL", &m_skip_mscals); + ImGui::Checkbox("Skip XGKICK", &m_skip_xgkick); + ImGui::Checkbox("Prog8 hack", &m_prog8_with_prog6); + ImGui::Checkbox("Prog10 hack", &m_prog10_with_prog6); + ImGui::Checkbox("Prog18 hack", &m_prog18_with_prog6); + ImGui::Checkbox("Others with prog6", &m_all_with_prog6); + ImGui::Text("packets: %d", m_stats.tfrag_dma_packets); + ImGui::Text("frag bytes: %d", m_stats.tfrag_bytes); + ImGui::Text("errors: %d", m_stats.error_packets); + for (int prog = 0; prog < 12; prog++) { + ImGui::Text(" prog %d: %d calls\n", prog, m_stats.per_program[prog].calls); + } + + if (ImGui::TreeNode("direct")) { + m_direct_renderer.draw_debug_window(); + ImGui::TreePop(); + } + + ImGui::TextUnformatted(m_debug_string.data()); +} + +void TFragment::handle_initialization(DmaFollower& dma, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + // Set up test (different between different renderers) + auto setup_test = dma.read_and_advance(); + assert(setup_test.vif0() == 0); + assert(setup_test.vifcode1().kind == VifCode::Kind::DIRECT); + assert(setup_test.vifcode1().immediate == 2); + assert(setup_test.size_bytes == 32); + memcpy(m_test_setup, setup_test.data, 32); + m_direct_renderer.render_gif(m_test_setup, 32, render_state, prof); + + // matrix 0 + auto mat0_upload = dma.read_and_advance(); + unpack_to_stcycl(&m_buffered_data[0].pad[TFragDataMem::TFragMatrix0 * 16], mat0_upload, + VifCode::Kind::UNPACK_V4_32, 4, 4, 64, TFragDataMem::TFragMatrix0, false, false); + m_debug_string += fmt::format("Matrix 0:\n {}\n", m_matrix_0.to_string_aligned()); + + // matrix 1 + auto mat1_upload = dma.read_and_advance(); + unpack_to_stcycl(&m_buffered_data[1].pad[TFragDataMem::TFragMatrix0 * 16], mat1_upload, + VifCode::Kind::UNPACK_V4_32, 4, 4, 64, TFragDataMem::TFragMatrix1, false, false); + m_debug_string += fmt::format("Matrix 1:\n {}\n", m_matrix_1.to_string_aligned()); + + // data + auto data_upload = dma.read_and_advance(); + unpack_to_stcycl(&m_tfrag_data, data_upload, VifCode::Kind::UNPACK_V4_32, 4, 4, sizeof(TFragData), + TFragDataMem::TFragFrameData, false, false); + m_debug_string += fmt::format("Frame Data:\n {}\n", m_tfrag_data.print()); + + // call the setup program + auto mscal_setup = dma.read_and_advance(); + verify_mscal(mscal_setup, TFragProgMem::TFragSetup); + + // iaddiu vi14, vi00, 0x2a0 | nop + m_ptrs.vi14 = 0x2a0; // todo constant + // iaddiu vi01, vi00, 0x350 | nop + m_ptrs.vi01 = 0x350; // todo constant + // mfir.x vf03, vi14 | nop + m_ptrs.vf03_x = m_ptrs.vi14; + // mfir.y vf03, vi01 | nop + m_ptrs.vf03_y = m_ptrs.vi01; + // mfir.z vf03, vi14 | nop + m_ptrs.vf03_z = m_ptrs.vi14; + // mfir.w vf03, vi01 | nop :e + m_ptrs.vf03_w = m_ptrs.vi01; + // lq.xyzw vf04, 664(vi00) | nop + m_globals.vf04_ambient = m_tfrag_data.ambient; // TODO get rid? + + // setup double buffering. + auto db_setup = dma.read_and_advance(); + assert(db_setup.size_bytes == 0); + assert(db_setup.vifcode0().kind == VifCode::Kind::BASE && + db_setup.vifcode0().immediate == Buffer0_Start); + assert(db_setup.vifcode1().kind == VifCode::Kind::OFFSET && + db_setup.vifcode1().immediate == (Buffer1_Start - Buffer0_Start)); +} + +template +void TFragment::handle_tfrag(const DmaTransfer& dma, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + auto first_vif = dma.vifcode0(); + auto second_vif = dma.vifcode1(); + if (DEBUG) { + ImGui::Separator(); + ImGui::Text("tf: %d sz %d", m_stats.tfrag_dma_packets, dma.size_bytes); + ImGui::Text(" vif: %s", first_vif.print().c_str()); + ImGui::Text(" vif: %s", second_vif.print().c_str()); + } + + // first VIF should be a STCYCL + assert(first_vif.kind == VifCode::Kind::STCYCL); + VifCodeStcycl stcycl(first_vif.immediate); + + // this is our state for running through the DMA data + int cl = stcycl.cl; + int wl = stcycl.wl; + int offset_into_data = 0; + bool row_init = false; + u32 row[4]; + u8 stmod = 0; + + // next can be one of: + // - NOP, UNPACK, MSCAL + + // fmt::print("START vif -> {} (mod {})\n", second_vif.print(), stmod); + switch (second_vif.kind) { + case VifCode::Kind::NOP: + // do nothing! + break; + case VifCode::Kind::UNPACK_V4_8: + offset_into_data = handle_unpack_v4_8_mode0(second_vif, dma, offset_into_data, cl, wl); + break; + case VifCode::Kind::MSCAL: + if (!m_skip_mscals) { + handle_mscal(second_vif, render_state, prof); + } + break; + default: + fmt::print("unknown second vif in tfragment: {}\n", second_vif.print()); + assert(false); + } + + bool ok = true; + while (ok && offset_into_data < (int)dma.size_bytes) { + assert((offset_into_data % 4) == 0); + auto vif = dma.read_val(offset_into_data); + offset_into_data += 4; + + auto code = VifCode(vif); + // fmt::print("vif -> {} (mod {}) {}/{} #x{:x}\n", code.print(), stmod, offset_into_data, + // dma.size_bytes, dma.data_offset); + switch (code.kind) { + case VifCode::Kind::UNPACK_V4_16: + if (DEBUG) { + ImGui::Text(" vif: %s (m %d)", code.print().c_str(), stmod); + } + if (stmod == 0) { + offset_into_data = handle_unpack_v4_16_mode0(code, dma, offset_into_data, cl, wl); + } else if (stmod == 1) { + assert(row_init); + offset_into_data = handle_unpack_v4_16_mode1(code, dma, offset_into_data, cl, wl, row); + } else { + assert(false); + } + break; + case VifCode::Kind::UNPACK_V4_32: + if (DEBUG) { + ImGui::Text(" vif: %s", code.print().c_str()); + } + assert(stmod == 0); + offset_into_data = handle_unpack_v4_32(code, dma, offset_into_data, cl, wl); + break; + case VifCode::Kind::UNPACK_V4_8: + if (DEBUG) { + ImGui::Text(" vif: %s", code.print().c_str()); + } + if (stmod == 0) { + offset_into_data = handle_unpack_v4_8_mode0(code, dma, offset_into_data, cl, wl); + } else if (stmod == 1) { + assert(row_init); + offset_into_data = handle_unpack_v4_8_mode1(code, dma, offset_into_data, cl, wl, row); + } else { + assert(false); + } + + break; + case VifCode::Kind::UNPACK_V3_32: + if (DEBUG) { + ImGui::Text(" vif: %s", code.print().c_str()); + } + assert(stmod == 0); + offset_into_data = handle_unpack_v3_32(code, dma, offset_into_data, cl, wl); + break; + case VifCode::Kind::STROW: + row_init = true; + memcpy(row, dma.data + offset_into_data, 16); + offset_into_data += 16; + if (DEBUG) { + Vector4f vec; + memcpy(&vec, row, 16); + ImGui::Text(" row: %s", vec.to_string_aligned().c_str()); + // fmt::print(" row: {}\n", vec.to_string_aligned().c_str()); + } + break; + case VifCode::Kind::STMOD: + if (DEBUG) { + ImGui::Text(" stmod %d\n", code.immediate); + } + if (stmod == 0) { + assert(code.immediate == 1); + } else { + assert(stmod == 1); + assert(code.immediate == 0 || code.immediate == 1); // kinda weird. + } + stmod = code.immediate; + break; + case VifCode::Kind::STCYCL: + if (DEBUG) { + ImGui::Text(" vif: %s", code.print().c_str()); + } + { + VifCodeStcycl ss(code.immediate); + cl = ss.cl; + wl = ss.wl; + } + + break; + case VifCode::Kind::NOP: + if (DEBUG) { + ImGui::Text(" NOP"); + } + break; + default: + ok = false; + if (DEBUG) { + ImGui::TextColored(ImVec4(0.8, 0.3, 0.3, 1.0), "unhandled vif: %s", code.print().c_str()); + } + break; + } + } + if (!ok) { + m_stats.error_packets++; + } else { + if (DEBUG) { + ImGui::Text("END"); + } + + assert(stmod == 0); + } +} + +template +void TFragment::handle_mscal(const VifCode& code, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + if (DEBUG) { + ImGui::TextColored(ImVec4(0.3, 0.8, 0.3, 1.0), "MSCAL: %d", code.immediate); + } + + int prog_id = code.immediate / 2; + if (prog_id >= NUM_PROGRAMS) { + fmt::print("bad program: {}\n", prog_id); + assert(false); + } + m_stats.per_program[prog_id].calls++; + + switch (code.immediate) { + case 12: + case 6: + exec_program_6(render_state, prof); + break; + case 8: + if (m_prog8_with_prog6) { + exec_program_6(render_state, prof); + } else { + m_stats.error_mscals++; + } + break; + case 10: + if (m_prog10_with_prog6) { + exec_program_6(render_state, prof); + } else { + m_stats.error_mscals++; + } + break; + case 18: + if (m_prog18_with_prog6) { + exec_program_6(render_state, prof); + } else { + m_stats.error_mscals++; + } + break; + default: + if (m_all_with_prog6) { + exec_program_6(render_state, prof); + } else { + m_stats.error_mscals++; + if (DEBUG) { + ImGui::TextColored(ImVec4(0.8, 0.8, 0.3, 1.0), " UNHANDLED"); + } + } + + break; + } +} + +void TFragment::flip_buffers() { + m_uploading_buffer ^= 1; +} + +std::string TFragData::print() const { + std::string result; + result += fmt::format("fog: {}\n", fog.to_string_aligned()); + result += fmt::format("val: {}\n", val.to_string_aligned()); + result += fmt::format("str-gif: {}\n", str_gif.print()); + result += fmt::format("fan-gif: {}\n", fan_gif.print()); + result += fmt::format("ad-gif: {}\n", ad_gif.print()); + result += fmt::format("hvdf_offset: {}\n", hvdf_offset.to_string_aligned()); + result += fmt::format("hmge_scale: {}\n", hmge_scale.to_string_aligned()); + result += fmt::format("invh_scale: {}\n", invh_scale.to_string_aligned()); + result += fmt::format("ambient: {}\n", ambient.to_string_aligned()); + result += fmt::format("guard: {}\n", guard.to_string_aligned()); + result += fmt::format("k0s[0]: {}\n", k0s[0].to_string_aligned()); + result += fmt::format("k0s[1]: {}\n", k0s[1].to_string_aligned()); + result += fmt::format("k1s[0]: {}\n", k1s[0].to_string_aligned()); + result += fmt::format("k1s[1]: {}\n", k1s[1].to_string_aligned()); + return result; +} diff --git a/game/graphics/opengl_renderer/tfrag/TFragment.h b/game/graphics/opengl_renderer/tfrag/TFragment.h new file mode 100644 index 0000000000..218345ebfb --- /dev/null +++ b/game/graphics/opengl_renderer/tfrag/TFragment.h @@ -0,0 +1,281 @@ +#pragma once + +#include "game/graphics/opengl_renderer/BucketRenderer.h" +#include "game/graphics/opengl_renderer/DirectRenderer.h" +#include "common/dma/gs.h" +#include "common/math/Vector.h" + +using math::Matrix4f; +using math::Vector4f; + +constexpr int KICK_ZONE_END = 1024; + +struct TFragData { + Vector4f fog; // 0 656 (vf01) + Vector4f val; // 1 657 (vf02) + GifTag str_gif; // 2 658 (vf06) + GifTag fan_gif; // 3 659 + GifTag ad_gif; // 4 660 + Vector4f hvdf_offset; // 5 661 (vf10) + Vector4f hmge_scale; // 6 662 (vf11) + Vector4f invh_scale; // 7 663 + Vector4f ambient; // 8 664 + Vector4f guard; // 9 665 + Vector4f k0s[2]; // 10/11 666, 667 + Vector4f k1s[2]; // 12/13 668, 669 + + std::string print() const; +}; +static_assert(sizeof(TFragData) == 0xe0, "TFragData size"); + +struct TFragBufferedData { + u8 pad[328 * 16]; +}; +static_assert(sizeof(TFragBufferedData) == 328 * 16); + +struct TFragKickZone { + u8 pad[(KICK_ZONE_END - 670) * 16]; +}; + +class TFragment : public BucketRenderer { + public: + TFragment(const std::string& name, BucketId my_id, bool child_mode); + void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; + void draw_debug_window() override; + + private: + void handle_initialization(DmaFollower& dma, + SharedRenderState* render_state, + ScopedProfilerNode& prof); + + template + void handle_tfrag(const DmaTransfer& dma, + SharedRenderState* render_state, + ScopedProfilerNode& prof); + + int handle_unpack_v4_8_mode0(const VifCode& code, + const DmaTransfer& dma, + int offset, + int cl, + int wl); + int handle_unpack_v4_8_mode1(const VifCode& code, + const DmaTransfer& dma, + int offset, + int cl, + int wl, + const u32 row[4]); + int handle_unpack_v4_16_mode0(const VifCode& code, + const DmaTransfer& dma, + int offset, + int cl, + int wl); + int handle_unpack_v4_16_mode1(const VifCode& code, + const DmaTransfer& dma, + int offset, + int cl, + int wl, + const u32 row[4]); + int handle_unpack_v4_32(const VifCode& code, const DmaTransfer& dma, int offset, int cl, int wl); + int handle_unpack_v3_32(const VifCode& code, const DmaTransfer& dma, int offset, int cl, int wl); + + template + void handle_mscal(const VifCode& code, SharedRenderState* render_state, ScopedProfilerNode& prof); + + template + void exec_program_6(SharedRenderState* render_state, ScopedProfilerNode& prof); + + template + void XGKICK(u32 addr, SharedRenderState* render_state, ScopedProfilerNode& prof); + + struct Prog6Inputs { + Vector4f vf04_cam_mat_x; + Vector4f vf07_cam_mat_y; + Vector4f vf08_cam_mat_z; + }; + + struct Prog6Vars { + // pre-set + u16 vi03; + u16 vi07; + u16 vi08; + u16 vi09; + u16 vi14; + Vector4f vf16_scaled_pos_0; + Vector4f vf17_scaled_pos_1; + Vector4f vf18_scaled_pos_2; + Vector4f vf19_scaled_pos_3; + + // uninit + u16 vi02; + u16 vi04; + u16 vi05; + u16 vi06_kick_zone_ptr; + u16 vi10; + u16 vi11; + u16 vi12; // seems to be gs loop count (dverts) - 0x80. + u16 vi13; + Vector4f vf09_cam_trans; + Vector4f vf12_root_pos_0; // position a, 0 + Vector4f vf13_root_pos_1; + Vector4f vf14_loop_pos_0; + Vector4f vf15_loop_pos_1; + Vector4f vf20; + Vector4f vf21; + Vector4f vf22; + Vector4f vf23; + Vector4f vf24; + Vector4f vf25; // position b, 0 + Vector4f vf26; + Vector4f vf27; + Vector4f vf28; + Vector4f vf29; + Vector4f vf30; + Vector4f vf31; + }; + + template + void exec_program_6_process_first(const Prog6Inputs& in, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof); + + template + void exec_jumper_L128(const Prog6Inputs& in, Prog6Vars& vars); + + template + bool exec_jumper_L129(const Prog6Inputs& in, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof); + + template + void exec_jumper_L6A1(const Prog6Inputs& in, Prog6Vars& vars); + + template + bool exec_jumper_L130(const Prog6Inputs& in, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof); + + template + void exec_jumper_L6B0(const Prog6Inputs& in, Prog6Vars& vars); + + template + bool exec_jumper_L131(const Prog6Inputs& in, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof); + + template + void exec_jumper_L6BF(const Prog6Inputs& in, Prog6Vars& vars); + + template + bool exec_jumper_L132(const Prog6Inputs& in, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof); + + template + bool exec_jumper_L122(const Prog6Inputs& in, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof); + + std::string m_debug_string; + bool m_child_mode = false; + bool m_extra_debug = false; + int m_max_draw = -1; + bool m_skip_mscals = false; + bool m_skip_xgkick = false; + bool m_prog8_with_prog6 = true; + bool m_prog10_with_prog6 = true; + bool m_prog18_with_prog6 = true; + bool m_all_with_prog6 = false; + std::string m_frag_debug; + + // GS setup data + u8 m_test_setup[32]; + + // VU data + Matrix4f m_matrix_0; + Matrix4f m_matrix_1; + TFragData m_tfrag_data; + TFragKickZone m_kick_data; + + // buffers + TFragBufferedData m_buffered_data[2]; + int m_uploading_buffer = 0; + + u8* get_upload_buffer() { return (u8*)&m_buffered_data[m_uploading_buffer].pad[0]; } + u8* get_processing_buffer() { return (u8*)&m_buffered_data[1 - m_uploading_buffer].pad[0]; } + + void flip_buffers(); + u16 ilw_data(int offset, int xyzw); + u16 ilw_kick_zone(int offset, int xyzw); + + Vector4f load_vector_data(int offset); + void store_vector_kick_zone(int offset, const Vector4f& vec); + void store_gif_kick_zone(int offset, const GifTag& tag); + void store_u32_kick_zone(u32 value, int qw, int xyzw); + + enum TFragDataMem { + Buffer0_Start = 0, + TFragMatrix0 = 5, + + Buffer1_Start = 328, + TFragMatrix1 = TFragMatrix0 + Buffer1_Start, + + TFragFrameData = 656, + TFragKickZoneData = 670, + }; + + enum TFragJumper { + L128_PART0_X = 0, + L129_PART1_X = 1, + L0x6A1_PART0_Y = 2, + L130_PART1_Y = 3, + L0x6B0_PART0_Z = 4, + L131_PART1_Z = 5, + L0x6BF_PART0_W = 6, + L132_PART1_W = 7, + L122_KICK = 8, + END_PROGRAM = 9, + INVALID = 10 + }; + + TFragJumper m_next_block = TFragJumper::INVALID; + TFragJumper m_ret_block = TFragJumper::INVALID; + bool m_clip_and_3ffff = false; + Vector4f m_acc; // todo, probably rearrange this so acc stays entirely in part0 or part1? + float m_q; // todo, probably regroup + + enum TFragProgMem { + TFragSetup = 0, + }; + + struct Ptrs { + int vi01; + int vi14; + int vf03_x, vf03_y, vf03_z, vf03_w; + } m_ptrs; + + struct Globals { + Vector4f vf04_ambient; + } m_globals; + + static constexpr int NUM_PROGRAMS = 13; + struct Stats { + int tfrag_dma_packets = 0; + int tfrag_bytes = 0; + int error_packets = 0; + int error_mscals = 0; + + struct PerProgram { + int calls = 0; + }; + + PerProgram per_program[NUM_PROGRAMS]; // addr / 2 + } m_stats; + + DirectRenderer m_direct_renderer; +}; diff --git a/game/graphics/opengl_renderer/tfrag/program6_cpu.cpp b/game/graphics/opengl_renderer/tfrag/program6_cpu.cpp new file mode 100644 index 0000000000..2b132be882 --- /dev/null +++ b/game/graphics/opengl_renderer/tfrag/program6_cpu.cpp @@ -0,0 +1,1173 @@ +#include "game/graphics/opengl_renderer/tfrag/TFragment.h" +#include "third-party/imgui/imgui.h" +// tfragment's program 6, implemented on the CPU (slow) + +u32 float_2_u32(float x) { + u32 y; + memcpy(&y, &x, 4); + return y; +} + +std::string int_vec_debug(const Vector4f& vec) { + return fmt::format("[{:x} {:x} {:x} {:x}]", float_2_u32(vec.x()), float_2_u32(vec.y()), + float_2_u32(vec.z()), float_2_u32(vec.w())); +} + +std::string debug_print_ad_vec(const Vector4f& val) { + u64 data; + u8 address; + memcpy(&data, val.data(), 8); + memcpy(&address, val.data() + 2, 1); + + return fmt::format("AD: 0x{:x} 0x{:x}\n", address, data); +} + +u16 TFragment::ilw_data(int offset, int xyzw) { + u16 result; + if (m_uploading_buffer == 0) { + offset -= Buffer1_Start; + } + assert(offset < Buffer1_Start); + assert(offset >= 0); + int mem_offset = (xyzw * 4) + (offset * 16); + memcpy(&result, get_processing_buffer() + mem_offset, 2); + return result; +} + +Vector4f TFragment::load_vector_data(int offset) { + Vector4f result; + if (m_uploading_buffer == 0) { + offset -= Buffer1_Start; + } + offset = offset & 0x3ff; // not super happy with this... + assert(offset < Buffer1_Start); + assert(offset >= 0); + memcpy(&result, get_processing_buffer() + (offset * 16), 16); + return result; +} + +void TFragment::store_vector_kick_zone(int offset, const Vector4f& vec) { + assert(offset >= TFragDataMem::TFragKickZoneData); + assert(offset < KICK_ZONE_END); // hack increased + memcpy(&m_kick_data.pad[(offset - TFragDataMem::TFragKickZoneData) * 16], &vec.data()[0], 16); +} + +u16 TFragment::ilw_kick_zone(int offset, int xyzw) { + assert(offset >= TFragDataMem::TFragKickZoneData); + assert(offset < KICK_ZONE_END); + u16 result; + int mem_offset = (xyzw * 4) + (offset * 16); + memcpy(&result, m_kick_data.pad + mem_offset - TFragDataMem::TFragKickZoneData * 16, 2); + return result; +} + +template +void TFragment::exec_program_6(SharedRenderState* render_state, ScopedProfilerNode& prof) { + // fmt::print("exec 6\n"); + flip_buffers(); + // VF02 is VAL always + // VF05 is ADGIF always + // VF06 is STRGIF always + // VF10 is HVDF offset always + // VF11 is hmge_scale always + // VF01 is fog always + + // SETUP + // first, load globals from TFragData (these never change, so we'll just use them from TFragData) + // lq.xyzw vf02, 657(vi00) | nop + // lq.xyzw vf05, 660(vi00) | addw.z vf28, vf00, vf00 (done later) + // lq.xyzw vf06, 658(vi00) | nop + // lq.xyzw vf10, 661(vi00) | nop + // lq.xyzw vf11, 662(vi00) | nop + // lq.xyzw vf01, 656(vi00) | addz.z vf28, vf28, vf02 (done later) + + // there are two main versions, one for each double-buffer. + // I'm not sure why these need to be different yet. + // but, just in case it actually matters and we actually need to swap addresses for some reason + // (like addresses baked in to the data) + // we're going to split into two functions. + // these inputs will be given to either. (these are const) + Prog6Inputs inputs; + + // non-const + Prog6Vars vars; + if (m_uploading_buffer == 1) { + vars.vi14 = 0; + } else { + vars.vi14 = Buffer1_Start; + } + + vars.vf28.z() = 1.f; + vars.vf28.z() += m_tfrag_data.val.z(); + + // ilw.w vi08, 4(vi14) | nop + vars.vi08 = ilw_data(4 + vars.vi14, 3); + // fmt::print("------------- VI08 init: {}\n", vars.vi08); + // ilw.z vi09, 4(vi14) | nop + vars.vi09 = ilw_data(4 + vars.vi14, 2); + // ilw.y vi03, 3(vi14) | nop + vars.vi03 = ilw_data(3 + vars.vi14, 1); + + // fmt::print("-------VI03 init: {}\n", vars.vi03); + + if (DEBUG) { + // small, like 9, 54, 66 + ImGui::Text("ints: %d %d %d", vars.vi08, vars.vi09, vars.vi03); + } + + // fmt::print("vi09: #x{:x} ({})\n", vars.vi09, vars.vi14); + + // fcset 0x0 | nop + // iaddi vi07, vi00, -0x1 | nop + vars.vi07 = -1; + + // lq.xyzw vf04, 5(vi14) | mulw.xyzw vf16, vf00, vf00 + inputs.vf04_cam_mat_x = load_vector_data(vars.vi14 + 5); + vars.vf16_scaled_pos_0 = Vector4f(0, 0, 0, 1); + + // lq.xyzw vf07, 6(vi14) | mulw.xyzw vf17, vf00, vf00 + inputs.vf07_cam_mat_y = load_vector_data(vars.vi14 + 6); + vars.vf17_scaled_pos_1 = Vector4f(0, 0, 0, 1); + + // ibne vi00, vi14, L136 | mulw.xyzw vf18, vf00, vf00 + vars.vf18_scaled_pos_2 = Vector4f(0, 0, 0, 1); + // lq.xyzw vf08, 7(vi14) | mulw.xyzw vf19, vf00, vf00 + vars.vf19_scaled_pos_3 = Vector4f(0, 0, 0, 1); + inputs.vf08_cam_mat_z = load_vector_data(vars.vi14 + 7); + + if (m_uploading_buffer == 1) { + // vi14 = 0 version + exec_program_6_process_first(inputs, vars, render_state, prof); + } else { + // L136 + assert(false); + } + + // because we're doing everything in-sync (no background kicking or uploading), + // I _think_ it's fine to just do + flip_buffers(); + // and now we only have to implement one half of program 6? +} + +namespace { + +float u32_2_float(u32 x) { + float y; + memcpy(&y, &x, 4); + return y; +} + +Vector4f itof0(const Vector4f& vec) { + Vector4f result; + for (int i = 0; i < 4; i++) { + s32 val; + memcpy(&val, vec.data() + i, 4); + result[i] = val; + } + return result; +} + +Vector4f ftoi4(const Vector4f& vec) { + Vector4f result; + for (int i = 0; i < 4; i++) { + s32 f = vec[i] * 16.f; + float val; + memcpy(&val, &f, 4); + result[i] = val; + } + return result; +} + +bool clip_xyz_plus_minus(const Vector4f& pt) { + float pw = std::abs(pt.w()); + float mw = -pw; + for (int i = 0; i < 3; i++) { + if (pt[i] > pw) { + return true; + } + if (pt[i] < mw) { + return true; + } + } + return false; +} +} // namespace + +void TFragment::store_gif_kick_zone(int offset, const GifTag& tag) { + assert(offset >= TFragDataMem::TFragKickZoneData); + assert(offset < KICK_ZONE_END); + memcpy(&m_kick_data.pad[(offset - TFragDataMem::TFragKickZoneData) * 16], &tag, 16); +} + +void TFragment::store_u32_kick_zone(u32 value, int qw, int xyzw) { + assert(qw >= TFragDataMem::TFragKickZoneData); + assert(qw < KICK_ZONE_END); + memcpy(&m_kick_data.pad[(xyzw * 4) + (qw - TFragDataMem::TFragKickZoneData) * 16], &value, 4); +} + +template +void TFragment::exec_program_6_process_first(const Prog6Inputs& in, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + // SETUP BLOCK + + // ilwr.x vi02, vi03 | nop + assert(vars.vi03 < TFragDataMem::Buffer1_Start); // should be a buffer 0 addr + vars.vi02 = ilw_data(vars.vi03, 0); + // fmt::print("--------- initial vi02.x: {}\n", vars.vi02); + + // lq.xyzw vf09, 8(vi14) | nop + vars.vf09_cam_trans = load_vector_data(vars.vi14 + 8); + + // stupid, this is 0. + // iadd vi08, vi08, vi14 | nop + vars.vi08 += vars.vi14; + // iadd vi09, vi09, vi14 | nop + vars.vi09 += vars.vi14; + + // lq.xyw vf28, 0(vi02) | nop + if (DEBUG) { + ImGui::Text("vi02: %d", vars.vi02); + } + auto vf28_load_temp = load_vector_data(vars.vi02); + vars.vf28.x() = vf28_load_temp.x(); + vars.vf28.y() = vf28_load_temp.y(); + vars.vf28.w() = vf28_load_temp.w(); + + // mtir vi06, vf03.x | nop + vars.vi06_kick_zone_ptr = m_ptrs.vf03_x; + + // ilwr.x vi12, vi09 | nop + vars.vi12 = ilw_data(vars.vi09, 0); + // fmt::print("--------- initial vi12: {}\n", vars.vi12); + + // ilwr.z vi13, vi09 | nop + vars.vi13 = ilw_data(vars.vi09, 2); + + // mtir vi04, vf28.w | subz.xyz vf24, vf28, vf02 + vars.vi04 = float_2_u32(vars.vf28.w()); + vars.vf24 = vars.vf28 - m_tfrag_data.val.z(); // only xyz, but what sets w?? + + // iaddiu vi11, vi00, 0x4000 | nop + vars.vi11 = 0x4000; + + // iaddiu vi11, vi11, 0x4000 | nop + vars.vi11 += 0x4000; + + // ilwr.y vi02, vi03 | nop + vars.vi02 = ilw_data(vars.vi03, 1); + // fmt::print("--------- initial vi02.y: {}\n", vars.vi02); + + // lq.xyzw vf12, 0(vi04) | nop + if (DEBUG) { + ImGui::Text("vi04: %d", vars.vi04); + } + vars.vf12_root_pos_0 = load_vector_data(vars.vi04); + + // lq.xyzw vf20, 1(vi04) | nop + vars.vf20 = load_vector_data(vars.vi04 + 1); + + // iaddiu vi12, vi12, 0x80 | nop + vars.vi12 += 0x80; + + // iadd vi13, vi13, vi08 | nop + vars.vi13 += vars.vi08; + + // lq.xyw vf28, 0(vi02) | itof0.xyzw vf12, vf12 + vf28_load_temp = load_vector_data(vars.vi02); + vars.vf28.x() = vf28_load_temp.x(); + vars.vf28.y() = vf28_load_temp.y(); + vars.vf28.w() = vf28_load_temp.w(); + vars.vf12_root_pos_0 = itof0(vars.vf12_root_pos_0); + + // todo + // vars.vf12_root_pos_0 *= 0; + + // fmt::print("root 12 setup: {}\n", vars.vf12_root_pos_0.to_string_aligned()); + + // mfir.w vf24, vi06 | nop + vars.vf24.w() = u32_2_float(vars.vi06_kick_zone_ptr); + + // lqi.xyzw vf29, vi13 | nop + vars.vf29 = load_vector_data(vars.vi13); + vars.vi13++; + + // lqi.xyzw vf30, vi13 | nop + vars.vf30 = load_vector_data(vars.vi13); + vars.vi13++; + + // lqi.xyzw vf31, vi13 | nop + vars.vf31 = load_vector_data(vars.vi13); + vars.vi13++; + + // sqi.xyzw vf05, vi06 | subz.xyz vf25, vf28, vf02 + if (DEBUG) { + ImGui::Text("vi06: %d", vars.vi06_kick_zone_ptr); + } + store_gif_kick_zone(vars.vi06_kick_zone_ptr, m_tfrag_data.ad_gif); + vars.vi06_kick_zone_ptr++; + vars.vf25 = vars.vf28 - m_tfrag_data.val.z(); + + // sqi.xyzw vf29, vi06 | mulaw.xyzw ACC, vf09, vf00 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf29); + vars.vi06_kick_zone_ptr++; + if (DEBUG) { + fmt::print("@ {} ad (0): {}", vars.vi13, debug_print_ad_vec(vars.vf29)); + } + Vector4f acc = vars.vf09_cam_trans; + + // mtir vi04, vf28.w | nop + vars.vi04 = float_2_u32(vars.vf28.w()); + + // sqi.xyzw vf30, vi06 | maddax.xyzw ACC, vf04, vf12 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf30); + vars.vi06_kick_zone_ptr++; + if (DEBUG) { + fmt::print("ad (1): {}", debug_print_ad_vec(vars.vf30)); + } + acc += in.vf04_cam_mat_x * vars.vf12_root_pos_0.x(); + + // sqi.xyzw vf31, vi06 | nop + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf31); + vars.vi06_kick_zone_ptr++; + if (DEBUG) { + fmt::print("ad (2): {}", debug_print_ad_vec(vars.vf31)); + } + + // ilwr.z vi02, vi03 | nop + vars.vi02 = ilw_data(vars.vi03, 2); + // fmt::print("--------- initial vi02.z: {}\n", vars.vi02); + + // lq.xyzw vf13, 0(vi04) | madday.xyzw ACC, vf07, vf12 + vars.vf13_root_pos_1 = load_vector_data(vars.vi04); + acc += in.vf07_cam_mat_y * vars.vf12_root_pos_0.y(); + + // lq.xyzw vf21, 1(vi04) | maddz.xyzw vf12, vf08, vf12 + vars.vf21 = load_vector_data(vars.vi04 + 1); + vars.vf12_root_pos_0 = acc + in.vf08_cam_mat_z * vars.vf12_root_pos_0.z(); + // fmt::print("root 12 setup cam: {}\n", in.vf08_cam_mat_z.to_string_aligned()); + + // lqi.xyzw vf29, vi13 | nop + vars.vf29 = load_vector_data(vars.vi13); + vars.vi13++; + + // lqi.xyzw vf30, vi13 | nop + vars.vf30 = load_vector_data(vars.vi13); + vars.vi13++; + + // lq.xyw vf28, 0(vi02) | itof0.xyzw vf13, vf13 + vf28_load_temp = load_vector_data(vars.vi02); + vars.vf28.x() = vf28_load_temp.x(); + vars.vf28.y() = vf28_load_temp.y(); + vars.vf28.w() = vf28_load_temp.w(); + vars.vf13_root_pos_1 = itof0(vars.vf13_root_pos_1); + + // div Q, vf01.x, vf12.w | mul.xyzw vf16, vf12, vf11 + float q = m_tfrag_data.fog.x() / vars.vf12_root_pos_0.w(); + vars.vf16_scaled_pos_0 = vars.vf12_root_pos_0.elementwise_multiply(m_tfrag_data.hmge_scale); + + // sqi.xyzw vf29, vi06 | nop + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf29); + vars.vi06_kick_zone_ptr++; + if (DEBUG) { + fmt::print("ad (3): {}", debug_print_ad_vec(vars.vf29)); + } + + // sqi.xyzw vf30, vi06 | nop + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf30); + vars.vi06_kick_zone_ptr++; + if (DEBUG) { + fmt::print("ad (4): {}", debug_print_ad_vec(vars.vf30)); + } + + // iadd vi01, vi12, vi12 | subz.xyz vf26, vf28, vf02 + m_ptrs.vi01 = vars.vi12 + vars.vi12; + vars.vf26 = vars.vf28 - m_tfrag_data.val.z(); + + // iadd vi01, vi01, vi12 | mulaw.xyzw ACC, vf09, vf00 + m_ptrs.vi01 += vars.vi12; + acc = vars.vf09_cam_trans; + + // mtir vi04, vf28.w | nop + vars.vi04 = float_2_u32(vars.vf28.w()); + + // iadd vi05, vi06, vi01 | maddax.xyzw ACC, vf04, vf13 + vars.vi05 = vars.vi06_kick_zone_ptr + m_ptrs.vi01; + // fmt::print("vert count: {}\n", vars.vi12); + acc += in.vf04_cam_mat_x * vars.vf13_root_pos_1.x(); + + // ior vi10, vi06, vi00 | mul.xyz vf12, vf12, Q + vars.vi10 = vars.vi06_kick_zone_ptr; + vars.vf12_root_pos_0.x() *= q; + vars.vf12_root_pos_0.y() *= q; + vars.vf12_root_pos_0.z() *= q; + + // ilwr.w vi02, vi03 | mul.xyz vf24, vf24, Q + vars.vi02 = ilw_data(vars.vi03, 3); + // fmt::print("--------- initial vi02.w: {}\n", vars.vi02); + vars.vf24.x() *= q; + vars.vf24.y() *= q; + vars.vf24.z() *= q; + + // lq.xyzw vf14, 0(vi04) | madday.xyzw ACC, vf07, vf13 + vars.vf14_loop_pos_0 = load_vector_data(vars.vi04); + acc += in.vf07_cam_mat_y * vars.vf13_root_pos_1.y(); + + // lq.xyzw vf22, 1(vi04) | maddz.xyzw vf13, vf08, vf13 + vars.vf22 = load_vector_data(vars.vi04 + 1); + vars.vf13_root_pos_1 = acc + in.vf08_cam_mat_z * vars.vf13_root_pos_1.z(); + + // sqi.xyzw vf06, vi06 | add.xyzw vf12, vf12, vf10 + store_gif_kick_zone(vars.vi06_kick_zone_ptr, m_tfrag_data.str_gif); + vars.vi06_kick_zone_ptr++; + vars.vf12_root_pos_0 += m_tfrag_data.hvdf_offset; + + // isw.x vi12, -1(vi06) | nop + store_u32_kick_zone(vars.vi12, vars.vi06_kick_zone_ptr - 1, 0); + if (DEBUG) { + ImGui::Text("strgif mod: %d", vars.vi12); // maybe number of tris or something? + } + + // lq.xyw vf28, 0(vi02) | itof0.xyzw vf14, vf14 + vf28_load_temp = load_vector_data(vars.vi02); + vars.vf28.x() = vf28_load_temp.x(); + vars.vf28.y() = vf28_load_temp.y(); + vars.vf28.w() = vf28_load_temp.w(); + // fmt::print("ORIG VF28: {} {}\n", vars.vf28.x(), vars.vf28.y()); + vars.vf14_loop_pos_0 = itof0(vars.vf14_loop_pos_0); + + // div Q, vf01.x, vf13.w | mul.xyzw vf17, vf13, vf11 + m_q = m_tfrag_data.fog.x() / vars.vf13_root_pos_1.w(); + vars.vf17_scaled_pos_1 = vars.vf13_root_pos_1.elementwise_multiply(m_tfrag_data.hmge_scale); + + // iaddi vi09, vi09, 0x1 | miniz.w vf12, vf12, vf01 + vars.vi09++; + // fmt::print("VI09 INC (prestart): {}\n", vars.vi09); + vars.vf12_root_pos_0.w() = std::min(vars.vf12_root_pos_0.w(), m_tfrag_data.fog.z()); + + // ilwr.x vi12, vi09 | clipw.xyz vf16, vf16 + vars.vi12 = ilw_data(vars.vi09, 0); + m_clip_and_3ffff = clip_xyz_plus_minus(vars.vf16_scaled_pos_0); + + // starting here, the control flow does crazy stuff, so we have this weird state machine: + m_next_block = TFragJumper::L128_PART0_X; + + while (true) { + exec_jumper_L128(in, vars); + if (exec_jumper_L129(in, vars, render_state, prof)) { + break; + } + exec_jumper_L6A1(in, vars); + if (exec_jumper_L130(in, vars, render_state, prof)) { + break; + } + exec_jumper_L6B0(in, vars); + if (exec_jumper_L131(in, vars, render_state, prof)) { + break; + } + exec_jumper_L6BF(in, vars); + if (exec_jumper_L132(in, vars, render_state, prof)) { + break; + } + } + // while (m_next_block != TFragJumper::END_PROGRAM) { + //// fmt::print("block {}\n", (int)m_next_block); + // switch (m_next_block) { + // case L128_PART0_X: + // exec_jumper_L128(in, vars); + // break; + // case L129_PART1_X: + // exec_jumper_L129(in, vars); + // break; + // case L0x6A1_PART0_Y: + // exec_jumper_L6A1(in, vars); + // break; + // case L130_PART1_Y: + // exec_jumper_L130(in, vars); + // break; + // case L0x6B0_PART0_Z: + // exec_jumper_L6B0(in, vars); + // break; + // case L131_PART1_Z: + // exec_jumper_L131(in, vars); + // break; + // case L0x6BF_PART0_W: + // exec_jumper_L6BF(in, vars); + // break; + // case L132_PART1_W: + // exec_jumper_L132(in, vars); + // break; + // case L122_KICK: + // exec_jumper_L122(in, vars, render_state, prof); + // break; + // default: + // assert(false); + // } + // } +} + +template +void TFragment::exec_jumper_L128(const Prog6Inputs& in, Prog6Vars& vars) { + // Part 0 for X + // iaddi vi03, vi03, 0x1 | subz.xyz vf27, vf28, vf02 + vars.vi03++; + Vector4f vf27_temp = vars.vf28 - m_tfrag_data.val.z(); + vars.vf27.x() = vf27_temp.x(); + vars.vf27.y() = vf27_temp.y(); + vars.vf27.z() = vf27_temp.z(); + + // iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + vars.vi07++; + m_acc = vars.vf09_cam_trans; + + // mtir vi04, vf28.w | maxy.w vf12, vf12, vf01 + vars.vi04 = float_2_u32(vars.vf28.w()); + vars.vf12_root_pos_0.w() = std::max(vars.vf12_root_pos_0.w(), m_tfrag_data.fog.y()); + + // fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf14 + m_acc += in.vf04_cam_mat_x * vars.vf14_loop_pos_0.x(); + // fcand already calculated + + // ibeq vi00, vi01, L129 | mul.xyz vf13, vf13, Q + // branch made after next instr + vars.vf13_root_pos_1.x() *= m_q; + vars.vf13_root_pos_1.y() *= m_q; + vars.vf13_root_pos_1.z() *= m_q; + + // ilwr.x vi02, vi03 | mul.xyz vf25, vf25, Q + vars.vi02 = ilw_data(vars.vi03, 0); + vars.vf25.x() *= m_q; + vars.vf25.y() *= m_q; + vars.vf25.z() *= m_q; + + // skipped if we take the branch + // nop | addw.w vf12, vf12, vf01 + if (m_clip_and_3ffff) { + vars.vf12_root_pos_0.w() += m_tfrag_data.fog.w(); + } +} + +template +void TFragment::exec_jumper_L6A1(const Prog6Inputs& in, Prog6Vars& vars) { + // part 1 for 1 + // nop | subz.xyz vf24, vf28, vf02 + Vector4f vf24_temp = vars.vf28 - m_tfrag_data.val.z(); + vars.vf24.x() = vf24_temp.x(); + vars.vf24.y() = vf24_temp.y(); + vars.vf24.z() = vf24_temp.z(); + + // iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + vars.vi07++; + m_acc = vars.vf09_cam_trans; + + // mtir vi04, vf28.w | maxy.w vf13, vf13, vf01 + vars.vi04 = float_2_u32(vars.vf28.w()); + vars.vf13_root_pos_1.w() = std::max(vars.vf13_root_pos_1.w(), m_tfrag_data.fog.y()); + + // fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf15 + m_acc += in.vf04_cam_mat_x * vars.vf15_loop_pos_1.x(); + // fcand already calculated + + // ibeq vi00, vi01, L130 | mul.xyz vf14, vf14, Q + vars.vf14_loop_pos_0.x() *= m_q; + vars.vf14_loop_pos_0.y() *= m_q; + vars.vf14_loop_pos_0.z() *= m_q; + + // ilwr.y vi02, vi03 | mul.xyz vf26, vf26, Q + vars.vi02 = ilw_data(vars.vi03, 1); + vars.vf26.x() *= m_q; + vars.vf26.y() *= m_q; + vars.vf26.z() *= m_q; + + // nop | addw.w vf13, vf13, vf0 + if (m_clip_and_3ffff) { + vars.vf13_root_pos_1.w() += m_tfrag_data.fog.w(); + } +} + +template +void TFragment::exec_jumper_L6B0(const Prog6Inputs& in, Prog6Vars& vars) { + // nop | subz.xyz vf25, vf28, vf02 + Vector4f vf25_temp = vars.vf28 - m_tfrag_data.val.z(); + vars.vf25.x() = vf25_temp.x(); + vars.vf25.y() = vf25_temp.y(); + vars.vf25.z() = vf25_temp.z(); + + // iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + vars.vi07++; + m_acc = vars.vf09_cam_trans; + + // mtir vi04, vf28.w | maxy.w vf14, vf14, vf01 + vars.vi04 = float_2_u32(vars.vf28.w()); + vars.vf14_loop_pos_0.w() = std::max(vars.vf14_loop_pos_0.w(), m_tfrag_data.fog.y()); + + // fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf12 + m_acc += in.vf04_cam_mat_x * vars.vf12_root_pos_0.x(); + // fcand already calculated + + // ibeq vi00, vi01, L131 | mul.xyz vf15, vf15, Q + vars.vf15_loop_pos_1.x() *= m_q; + vars.vf15_loop_pos_1.y() *= m_q; + vars.vf15_loop_pos_1.z() *= m_q; + + // ilwr.z vi02, vi03 | mul.xyz vf27, vf27, Q + vars.vi02 = ilw_data(vars.vi03, 2); + vars.vf27.x() *= m_q; + vars.vf27.y() *= m_q; + vars.vf27.z() *= m_q; + + // nop | addw.w vf14, vf14, vf01 + if (m_clip_and_3ffff) { + vars.vf14_loop_pos_0.w() += m_tfrag_data.fog.w(); + } +} + +template +void TFragment::exec_jumper_L6BF(const Prog6Inputs& in, Prog6Vars& vars) { + // nop | subz.xyz vf26, vf28, vf02 + Vector4f vf26_temp = vars.vf28 - m_tfrag_data.val.z(); + vars.vf26.x() = vf26_temp.x(); + vars.vf26.y() = vf26_temp.y(); + vars.vf26.z() = vf26_temp.z(); + + // iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + vars.vi07++; + m_acc = vars.vf09_cam_trans; + + // mtir vi04, vf28.w | maxy.w vf15, vf15, vf01 + vars.vi04 = float_2_u32(vars.vf28.w()); // L131 previously + assert(vars.vi04 != 0xbeef); // hit + vars.vf15_loop_pos_1.w() = std::max(vars.vf15_loop_pos_1.w(), m_tfrag_data.fog.y()); + + // fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf13 + m_acc += in.vf04_cam_mat_x * vars.vf13_root_pos_1.x(); + + // ibeq vi00, vi01, L132 | mul.xyz vf12, vf12, Q + vars.vf12_root_pos_0.x() *= m_q; + vars.vf12_root_pos_0.y() *= m_q; + vars.vf12_root_pos_0.z() *= m_q; + + // ilwr.w vi02, vi03 | mul.xyz vf24, vf24, Q + vars.vi02 = ilw_data(vars.vi03, 3); + vars.vf24.x() *= m_q; + vars.vf24.y() *= m_q; + vars.vf24.z() *= m_q; + + // nop | addw.w vf15, vf15, vf01 + if (m_clip_and_3ffff) { + vars.vf15_loop_pos_1.w() += m_tfrag_data.fog.w(); + } +} + +template +bool TFragment::exec_jumper_L129(const Prog6Inputs& in, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + // Part 1 for X + // lq.xyzw vf15, 0(vi04) | madday.xyzw ACC, vf07, vf14 + vars.vf15_loop_pos_1 = load_vector_data(vars.vi04); + m_acc += in.vf07_cam_mat_y * vars.vf14_loop_pos_0.y(); + + // lq.xyzw vf23, 1(vi04) | maddz.xyzw vf14, vf08, vf14 + vars.vf23 = load_vector_data(vars.vi04 + 1); + vars.vf14_loop_pos_0 = m_acc + in.vf08_cam_mat_z * vars.vf14_loop_pos_0.z(); + + // sqi.xyz vf24, vi06 | add.xyzw vf13, vf13, vf10 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf24); + // fmt::print("A: vf24 store: {}\n", vars.vf24.to_string_aligned()); + vars.vi06_kick_zone_ptr++; + vars.vf13_root_pos_1 += m_tfrag_data.hvdf_offset; + + // sqi.xyzw vf20, vi06 | ftoi4.xyzw vf12, vf12 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf20); + // fmt::print("B: vf20 store: {}\n", int_vec_debug(vars.vf20)); + vars.vi06_kick_zone_ptr++; + vars.vf12_root_pos_0 = ftoi4(vars.vf12_root_pos_0); + + // lq.xyw vf28, 0(vi02) | itof0.xyzw vf15, vf15 + if (vars.vi02 < Buffer1_Start) { // HACK added + auto vf28_load_temp = load_vector_data(vars.vi02); + vars.vf28.x() = vf28_load_temp.x(); + vars.vf28.y() = vf28_load_temp.y(); + if (float_2_u32(vf28_load_temp.w()) < Buffer1_Start) { + vars.vf28.w() = vf28_load_temp.w(); + } + } + vars.vf15_loop_pos_1 = itof0(vars.vf15_loop_pos_1); + + // div Q, vf01.x, vf14.w | mul.xyzw vf18, vf14, vf11 + m_q = m_tfrag_data.fog.x() / vars.vf14_loop_pos_0.w(); + vars.vf18_scaled_pos_2 = vars.vf14_loop_pos_0.elementwise_multiply(m_tfrag_data.hmge_scale); + + // ibeq vi05, vi06, L133 | miniz.w vf13, vf13, vf01 + bool take_branch = (vars.vi05 == vars.vi06_kick_zone_ptr); + // fmt::print("L129 prog: {} {}\n", vars.vi05, vars.vi06_kick_zone_ptr); + vars.vf13_root_pos_1.w() = std::min(vars.vf13_root_pos_1.w(), m_tfrag_data.fog.z()); + + // sqi.xyzw vf12, vi06 | clipw.xyz vf17, vf17 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf12_root_pos_0); + // fmt::print("C: vf12 store: {}\n", int_vec_debug(vars.vf12_root_pos_0)); + vars.vi06_kick_zone_ptr++; + m_clip_and_3ffff = clip_xyz_plus_minus(vars.vf17_scaled_pos_1); + + if (take_branch) { + // kick zone is full, time for another kick + return exec_jumper_L122(in, vars, render_state, prof); + } else { + return false; + } +} + +template +bool TFragment::exec_jumper_L130(const Prog6Inputs& in, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + // lq.xyzw vf12, 0(vi04) | madday.xyzw ACC, vf07, vf15 + vars.vf12_root_pos_0 = load_vector_data(vars.vi04); + m_acc += in.vf07_cam_mat_y * vars.vf15_loop_pos_1.y(); + + // lq.xyzw vf20, 1(vi04) | maddz.xyzw vf15, vf08, vf15 + vars.vf20 = load_vector_data(vars.vi04 + 1); + // fmt::print("load vf20 from {}\n", vars.vi04 + 1); + vars.vf15_loop_pos_1 = m_acc + in.vf08_cam_mat_z * vars.vf15_loop_pos_1.z(); + + // sqi.xyzw vf25, vi06 | add.xyzw vf14, vf14, vf10 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf25); + // fmt::print("A: vf25 store: {}\n", vars.vf25.to_string_aligned()); + vars.vi06_kick_zone_ptr++; + vars.vf14_loop_pos_0 += m_tfrag_data.hvdf_offset; + + // sqi.xyzw vf21, vi06 | ftoi4.xyzw vf13, vf13 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf21); + // fmt::print("B: vf21 store: {}\n", int_vec_debug(vars.vf21)); + vars.vi06_kick_zone_ptr++; + vars.vf13_root_pos_1 = ftoi4(vars.vf13_root_pos_1); + + // lq.xyw vf28, 0(vi02) | itof0.xyzw vf12, vf12 + if (vars.vi02 < Buffer1_Start) { // HACK added + auto vf28_load_temp = load_vector_data(vars.vi02); + vars.vf28.x() = vf28_load_temp.x(); + vars.vf28.y() = vf28_load_temp.y(); + if (float_2_u32(vf28_load_temp.w()) < Buffer1_Start) { + vars.vf28.w() = vf28_load_temp.w(); + } + } + + vars.vf12_root_pos_0 = itof0(vars.vf12_root_pos_0); + + // div Q, vf01.x, vf15.w | mul.xyzw vf19, vf15, vf11 + m_q = m_tfrag_data.fog.x() / vars.vf15_loop_pos_1.w(); + vars.vf19_scaled_pos_3 = vars.vf15_loop_pos_1.elementwise_multiply(m_tfrag_data.hmge_scale); + + // ibeq vi05, vi06, L134 | miniz.w vf14, vf14, vf01 + bool take_branch = (vars.vi05 == vars.vi06_kick_zone_ptr); + vars.vf14_loop_pos_0.w() = std::min(vars.vf14_loop_pos_0.w(), m_tfrag_data.fog.z()); + + // sqi.xyzw vf13, vi06 | clipw.xyz vf18, vf18 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf13_root_pos_1); + // fmt::print("C: vf13 store: {}\n", int_vec_debug(vars.vf13_root_pos_1)); + vars.vi06_kick_zone_ptr++; + m_clip_and_3ffff = clip_xyz_plus_minus(vars.vf18_scaled_pos_2); + + if (take_branch) { + // kick zone is full, time for another kick + return exec_jumper_L122(in, vars, render_state, prof); + } else { + return false; + } +} + +template +bool TFragment::exec_jumper_L131(const Prog6Inputs& in, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + // lq.xyzw vf13, 0(vi04) | madday.xyzw ACC, vf07, vf12 + vars.vf13_root_pos_1 = load_vector_data(vars.vi04); + m_acc += in.vf07_cam_mat_y * vars.vf12_root_pos_0.y(); + + // lq.xyzw vf21, 1(vi04) | maddz.xyzw vf12, vf08, vf12 + vars.vf21 = load_vector_data(vars.vi04 + 1); + // fmt::print("vf21 load from: {}\n", vars.vi04 + 1); + vars.vf12_root_pos_0 = m_acc + in.vf08_cam_mat_z * vars.vf12_root_pos_0.z(); + + // sqi.xyzw vf26, vi06 | add.xyzw vf15, vf15, vf10 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf26); + // fmt::print("A: vf26 store: {}\n", vars.vf26.to_string_aligned()); + vars.vi06_kick_zone_ptr++; + vars.vf15_loop_pos_1 += m_tfrag_data.hvdf_offset; + + // sqi.xyzw vf22, vi06 | ftoi4.xyzw vf14, vf14 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf22); + // fmt::print("B: vf22 store: {}\n", int_vec_debug(vars.vf22)); + vars.vi06_kick_zone_ptr++; + vars.vf14_loop_pos_0 = ftoi4(vars.vf14_loop_pos_0); + + // lq.xyw vf28, 0(vi02) | itof0.xyzw vf13, vf13 + if (vars.vi02 < Buffer1_Start) { // HACK added + auto vf28_load_temp = load_vector_data(vars.vi02); + vars.vf28.x() = vf28_load_temp.x(); + vars.vf28.y() = vf28_load_temp.y(); + if (float_2_u32(vf28_load_temp.w()) < Buffer1_Start) { + vars.vf28.w() = vf28_load_temp.w(); + } + } + vars.vf13_root_pos_1 = itof0(vars.vf13_root_pos_1); + + // div Q, vf01.x, vf12.w | mul.xyzw vf16, vf12, vf11 + m_q = m_tfrag_data.fog.x() / vars.vf12_root_pos_0.w(); + vars.vf16_scaled_pos_0 = vars.vf12_root_pos_0.elementwise_multiply(m_tfrag_data.hmge_scale); + + // ibeq vi05, vi06, L135 | miniz.w vf15, vf15, vf01 + bool take_branch = (vars.vi05 == vars.vi06_kick_zone_ptr); + vars.vf15_loop_pos_1.w() = std::min(vars.vf15_loop_pos_1.w(), m_tfrag_data.fog.z()); + + // sqi.xyzw vf14, vi06 | clipw.xyz vf19, vf19 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf14_loop_pos_0); + // fmt::print("C: vf14 store: {}\n", int_vec_debug(vars.vf14_loop_pos_0)); + vars.vi06_kick_zone_ptr++; + m_clip_and_3ffff = clip_xyz_plus_minus(vars.vf19_scaled_pos_3); + + if (take_branch) { + // kick zone is full, time for another kick + return exec_jumper_L122(in, vars, render_state, prof); + } else { + return false; + } +} + +template +bool TFragment::exec_jumper_L132(const Prog6Inputs& in, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + // lq.xyzw vf14, 0(vi04) | madday.xyzw ACC, vf07, vf13 + vars.vf14_loop_pos_0 = load_vector_data(vars.vi04); // bad here, in L0x6BF_PART0_W prev + m_acc += in.vf07_cam_mat_y * vars.vf13_root_pos_1.y(); + + // lq.xyzw vf22, 1(vi04) | maddz.xyzw vf13, vf08, vf13 + vars.vf22 = load_vector_data(vars.vi04 + 1); + vars.vf13_root_pos_1 = m_acc + in.vf08_cam_mat_z * vars.vf13_root_pos_1.z(); + + // sqi.xyzw vf27, vi06 | add.xyzw vf12, vf12, vf10 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf27); + // fmt::print("A: vf27 store: {}\n", vars.vf27.to_string_aligned()); + vars.vi06_kick_zone_ptr++; + vars.vf12_root_pos_0 += m_tfrag_data.hvdf_offset; + + // sqi.xyzw vf23, vi06 | ftoi4.xyzw vf15, vf15 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf23); + // fmt::print("B: vf23 store: {}\n", int_vec_debug(vars.vf23)); + vars.vi06_kick_zone_ptr++; + vars.vf15_loop_pos_1 = ftoi4(vars.vf15_loop_pos_1); + + // lq.xyw vf28, 0(vi02) | itof0.xyzw vf14, vf14 + if (vars.vi02 < Buffer1_Start) { // HACK added + auto vf28_load_temp = load_vector_data(vars.vi02); + vars.vf28.x() = vf28_load_temp.x(); + vars.vf28.y() = vf28_load_temp.y(); + if (float_2_u32(vf28_load_temp.w()) < Buffer1_Start) { + vars.vf28.w() = vf28_load_temp.w(); + } + } + vars.vf14_loop_pos_0 = itof0(vars.vf14_loop_pos_0); + + // div Q, vf01.x, vf13.w | mul.xyzw vf17, vf13, vf11 + m_q = m_tfrag_data.fog.x() / vars.vf13_root_pos_1.w(); + vars.vf17_scaled_pos_1 = vars.vf13_root_pos_1.elementwise_multiply(m_tfrag_data.hmge_scale); + + // ibne vi05, vi06, L128 | miniz.w vf12, vf12, vf01 + bool take_branch = (vars.vi05 != vars.vi06_kick_zone_ptr); + // fmt::print("kick check: {} {}\n", vars.vi05, vars.vi06_kick_zone_ptr); + vars.vf12_root_pos_0.w() = std::min(vars.vf12_root_pos_0.w(), m_tfrag_data.fog.z()); + + // sqi.xyzw vf15, vi06 | clipw.xyz vf16, vf16 + store_vector_kick_zone(vars.vi06_kick_zone_ptr, vars.vf15_loop_pos_1); + vars.vi06_kick_zone_ptr++; + // fmt::print("C: vf15 store: {}\n", int_vec_debug(vars.vf15_loop_pos_1)); + m_clip_and_3ffff = clip_xyz_plus_minus(vars.vf16_scaled_pos_0); + + if (take_branch) { + return false; + } else { + // kick zone is full, kick then restart + return exec_jumper_L122(in, vars, render_state, prof); + } + + // b L122 | nop + // iaddiu vi15, vi00, 0x692 | nop ;; L128 +} + +template +bool TFragment::exec_jumper_L122(const Prog6Inputs& /*in*/, + Prog6Vars& vars, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + // KICK ZONE! + // L122: + // fcset 0x0 + m_clip_and_3ffff = false; // ?? + // iaddi vi07, vi00, -0x1 + vars.vi07 = -1; + // fmt::print("KICK blocks: vi12 = 0x{:x}\n", vars.vi12); + // iblez vi12, L123 + // iaddi vi09, vi09, 0x1 + vars.vi09++; + // fmt::print("VI09 now {}\n", vars.vi09); + if (((s16)vars.vi12) > 0) { + // ior vi10, vi06, vi00 + vars.vi10 = vars.vi06_kick_zone_ptr; + // iadd vi01, vi12, vi12 + m_ptrs.vi01 = vars.vi12 + vars.vi12; + // iadd vi01, vi01, vi12 + m_ptrs.vi01 += vars.vi12; + // iadd vi05, vi06, vi01 + vars.vi05 = vars.vi06_kick_zone_ptr + m_ptrs.vi01; + // sqi.xyzw vf06, vi06 + store_gif_kick_zone(vars.vi06_kick_zone_ptr, m_tfrag_data.str_gif); + vars.vi06_kick_zone_ptr++; + // isw.x vi12, -1(vi06) + store_u32_kick_zone(vars.vi12, vars.vi06_kick_zone_ptr - 1, 0); + // jr vi15 + // ilwr.x vi12, vi09 + vars.vi12 = ilw_data(vars.vi09, 0); + // fmt::print("didn't kick, vi12 now {}\n", vars.vi12); + m_next_block = m_ret_block; + + return false; + } + + // L123: + // ilw.y vi01, -1(vi09) + m_ptrs.vi01 = ilw_data(vars.vi09 - 1, 1); + // ilw.z vi13, -1(vi09) + vars.vi13 = ilw_data(vars.vi09 - 1, 2); + // fmt::print("VI09 loads: {} {}\n", m_ptrs.vi01, vars.vi13); + // ibeq vi00, vi12, L126 + // ilwr.x vi14, vi10 + // fmt::print("val is {}: {}\n", vars.vi10, ilw_kick_zone(vars.vi10, 0)); + vars.vi14 = ilw_kick_zone(vars.vi10, 0); + if (vars.vi12 != 0) { + // ibltz vi01, L124 + // iaddiu vi12, vi12, 0x80 + vars.vi12 += 0x80; + if (((s16)m_ptrs.vi01) >= 0) { + // iadd vi13, vi13, vi08 + vars.vi13 += vars.vi08; + if (DEBUG) { + fmt::print("vi13 = {}, (after adding {})\n", vars.vi13, vars.vi08); + } + // lqi.xyzw vf29, vi13 + vars.vf29 = load_vector_data(vars.vi13++); + // lqi.xyzw vf30, vi13 + vars.vf30 = load_vector_data(vars.vi13++); + // lqi.xyzw vf31, vi13 + vars.vf31 = load_vector_data(vars.vi13++); + // sqi.xyzw vf05, vi06 + store_gif_kick_zone(vars.vi06_kick_zone_ptr++, m_tfrag_data.ad_gif); + // sqi.xyzw vf29, vi06 + store_vector_kick_zone(vars.vi06_kick_zone_ptr++, vars.vf29); + if (DEBUG) { + fmt::print("ad (0): {}", debug_print_ad_vec(vars.vf29)); + } + // sqi.xyzw vf30, vi06 + store_vector_kick_zone(vars.vi06_kick_zone_ptr++, vars.vf30); + if (DEBUG) { + fmt::print("ad (1): {}", debug_print_ad_vec(vars.vf30)); + } + // sqi.xyzw vf31, vi06 + store_vector_kick_zone(vars.vi06_kick_zone_ptr++, vars.vf31); + if (DEBUG) { + fmt::print("ad (2): {}", debug_print_ad_vec(vars.vf31)); + } + // lqi.xyzw vf29, vi13 + vars.vf29 = load_vector_data(vars.vi13++); + // lqi.xyzw vf30, vi13 + vars.vf30 = load_vector_data(vars.vi13++); + // iadd vi01, vi12, vi12 + m_ptrs.vi01 = vars.vi12 + vars.vi12; + // iadd vi01, vi01, vi12 + m_ptrs.vi01 += vars.vi12; + // sqi.xyzw vf29, vi06 + store_vector_kick_zone(vars.vi06_kick_zone_ptr++, vars.vf29); + if (DEBUG) { + fmt::print("ad (3): {}", debug_print_ad_vec(vars.vf29)); + } + // sqi.xyzw vf30, vi06 + store_vector_kick_zone(vars.vi06_kick_zone_ptr++, vars.vf30); + if (DEBUG) { + fmt::print("ad (4): {}", debug_print_ad_vec(vars.vf30)); + } + // ior vi10, vi06, vi00 + vars.vi10 = vars.vi06_kick_zone_ptr; + // iadd vi05, vi06, vi01 + vars.vi05 = vars.vi06_kick_zone_ptr + m_ptrs.vi01; + // sqi.xyzw vf06, vi06 + store_gif_kick_zone(vars.vi06_kick_zone_ptr++, m_tfrag_data.str_gif); + // isw.x vi12, -1(vi06) + store_u32_kick_zone(vars.vi12, vars.vi06_kick_zone_ptr - 1, 0); + // jr vi15 + // ilwr.x vi12, vi09 + vars.vi12 = ilw_data(vars.vi09, 0); + // fmt::print("didn't kick 2, vi12 now {}\n", vars.vi12); + m_next_block = m_ret_block; + return false; + } + + // L124: + // mtir vi01, vf24.w + m_ptrs.vi01 = float_2_u32(vars.vf24.w()); + // mtir vi06, vf03.y + vars.vi06_kick_zone_ptr = m_ptrs.vf03_y; + // mr32.xyzw vf03, vf03 + auto temp = m_ptrs.vf03_x; + m_ptrs.vf03_x = m_ptrs.vf03_y; + m_ptrs.vf03_y = m_ptrs.vf03_z; + m_ptrs.vf03_z = m_ptrs.vf03_w; + m_ptrs.vf03_w = temp; + + // iadd vi14, vi14, vi11 + vars.vi14 += vars.vi11; + + // ibgez vi13, L125 + // iswr.x vi14, vi10 + // fmt::print("kick zone store: {}\n", vars.vi14); + store_u32_kick_zone(vars.vi14, vars.vi10, 0); + if (((s16)vars.vi13) < 0) { + // xgkick vi01 + XGKICK(m_ptrs.vi01, render_state, prof); + // ior vi10, vi06, vi00 + vars.vi10 = vars.vi06_kick_zone_ptr; // xgkick delay slots, doesn't seem to matter. + // mfir.w vf24, vi06 + vars.vf24.w() = u32_2_float(vars.vi06_kick_zone_ptr); + // iadd vi01, vi12, vi12 + m_ptrs.vi01 = vars.vi12 + vars.vi12; + // iadd vi01, vi01, vi12 + m_ptrs.vi01 += vars.vi12; + // iadd vi05, vi06, vi01 + vars.vi05 = vars.vi06_kick_zone_ptr + m_ptrs.vi01; + // sqi.xyzw vf06, vi06 + store_gif_kick_zone(vars.vi06_kick_zone_ptr++, m_tfrag_data.str_gif); + // isw.x vi12, -1(vi06) + store_u32_kick_zone(vars.vi12, vars.vi06_kick_zone_ptr - 1, 0); + // jr vi15 + // ilwr.x vi12, vi09 + vars.vi12 = ilw_data(vars.vi09, 0); + // fmt::print("didn't kick 3, vi12 now {}\n", vars.vi12); + m_next_block = m_ret_block; + return false; + } + + // L125: + // iadd vi13, vi13, vi08 + vars.vi13 += vars.vi08; + // xgkick vi01 + XGKICK(m_ptrs.vi01, render_state, prof); + // lqi.xyzw vf29, vi13 + vars.vf29 = load_vector_data(vars.vi13++); + // lqi.xyzw vf30, vi13 + vars.vf30 = load_vector_data(vars.vi13++); + // lqi.xyzw vf31, vi13 + vars.vf31 = load_vector_data(vars.vi13++); + // mfir.w vf24, vi06 + vars.vf24.w() = u32_2_float(vars.vi06_kick_zone_ptr); + // sqi.xyzw vf05, vi06 + store_gif_kick_zone(vars.vi06_kick_zone_ptr++, m_tfrag_data.ad_gif); + // sqi.xyzw vf29, vi06 + store_vector_kick_zone(vars.vi06_kick_zone_ptr++, vars.vf29); + if (DEBUG) { + fmt::print("ad (0): {}", debug_print_ad_vec(vars.vf29)); + } + // sqi.xyzw vf30, vi06 + store_vector_kick_zone(vars.vi06_kick_zone_ptr++, vars.vf30); + if (DEBUG) { + fmt::print("ad (1): {}", debug_print_ad_vec(vars.vf30)); + } + // sqi.xyzw vf31, vi06 + store_vector_kick_zone(vars.vi06_kick_zone_ptr++, vars.vf31); + if (DEBUG) { + fmt::print("ad (2): {}", debug_print_ad_vec(vars.vf31)); + } + // lqi.xyzw vf29, vi13 + vars.vf29 = load_vector_data(vars.vi13++); + // lqi.xyzw vf30, vi13 + vars.vf30 = load_vector_data(vars.vi13++); + // iadd vi01, vi12, vi12 + m_ptrs.vi01 = vars.vi12 + vars.vi12; + // iadd vi01, vi01, vi12 + m_ptrs.vi01 += vars.vi12; + // sqi.xyzw vf29, vi06 + store_vector_kick_zone(vars.vi06_kick_zone_ptr++, vars.vf29); + if (DEBUG) { + fmt::print("ad (3): {}", debug_print_ad_vec(vars.vf29)); + } + // sqi.xyzw vf30, vi06 + store_vector_kick_zone(vars.vi06_kick_zone_ptr++, vars.vf30); + if (DEBUG) { + fmt::print("ad (4): {}", debug_print_ad_vec(vars.vf30)); + } + // nop + // ior vi10, vi06, vi00 + vars.vi10 = vars.vi06_kick_zone_ptr; + // iadd vi05, vi06, vi01 + vars.vi05 = vars.vi06_kick_zone_ptr + m_ptrs.vi01; + // sqi.xyzw vf06, vi06 + store_gif_kick_zone(vars.vi06_kick_zone_ptr++, m_tfrag_data.str_gif); + // isw.x vi12, -1(vi06) + store_u32_kick_zone(vars.vi12, vars.vi06_kick_zone_ptr - 1, 0); + // jr vi15 + // ilwr.x vi12, vi09 + vars.vi12 = ilw_data(vars.vi09, 0); + // fmt::print("did kick, vi12 now {}\n", vars.vi12); + m_next_block = m_ret_block; + return false; + } + + // L126: + // mtir vi01, vf24.w + m_ptrs.vi01 = float_2_u32(vars.vf24.w()); + // mr32.xyzw vf03, vf03 + auto temp = m_ptrs.vf03_x; + m_ptrs.vf03_x = m_ptrs.vf03_y; + m_ptrs.vf03_y = m_ptrs.vf03_z; + m_ptrs.vf03_z = m_ptrs.vf03_w; + m_ptrs.vf03_w = temp; + // iadd vi14, vi14, vi11 + // fmt::print("before add: {}\n", vars.vi14); + vars.vi14 += vars.vi11; + // iswr.x vi14, vi10 + // fmt::print("kick zone store: {}\n", vars.vi14); + store_u32_kick_zone(vars.vi14, vars.vi10, 0); + // lq.xyzw vf04, 664(vi00) + // todo don't think I needed that load of ambient + XGKICK(m_ptrs.vi01, render_state, prof); + // xgkick vi01 + // nop | nop :e + m_next_block = END_PROGRAM; + return true; + + // nop | nop +} + +template +void TFragment::XGKICK(u32 addr, SharedRenderState* render_state, ScopedProfilerNode& prof) { + if (DEBUG) { + ImGui::Text("XGKICK: %d", addr); + } + + assert(addr >= TFragDataMem::TFragKickZoneData); + assert(addr < KICK_ZONE_END); + + if (!m_skip_xgkick) { + m_direct_renderer.render_gif(&m_kick_data.pad[(addr - TFragDataMem::TFragKickZoneData) * 16], + UINT32_MAX, render_state, prof); + } +} + +template void TFragment::exec_program_6(SharedRenderState* render_state, + ScopedProfilerNode& prof); +template void TFragment::exec_program_6(SharedRenderState* render_state, + ScopedProfilerNode& prof); \ No newline at end of file diff --git a/game/graphics/opengl_renderer/tfrag/tfrag_unpack.cpp b/game/graphics/opengl_renderer/tfrag/tfrag_unpack.cpp new file mode 100644 index 0000000000..a6648f7817 --- /dev/null +++ b/game/graphics/opengl_renderer/tfrag/tfrag_unpack.cpp @@ -0,0 +1,240 @@ +#include "game/graphics/opengl_renderer/tfrag/TFragment.h" + +// TFragment VIF unpack implementation + +int TFragment::handle_unpack_v4_8_mode0(const VifCode& code, + const DmaTransfer& dma, + int offset, + int cl, + int wl) { + VifCodeUnpack unpack(code); + assert(unpack.use_tops_flag); + + // CL x (num/WL)+(num%WL) + + u8* write_base = get_upload_buffer(); + + if (unpack.is_unsigned) { + // note: formulas below assume this! + assert(cl == 2); + assert(wl == 1); + assert(code.num); + for (int i = 0; i < code.num; i++) { + // write every other qw + int dest_qw = unpack.addr_qw + 2 * i; + assert(dest_qw <= 328); + u32 qw[4]; + qw[0] = dma.read_val(offset++); + qw[1] = dma.read_val(offset++); + qw[2] = dma.read_val(offset++); + qw[3] = dma.read_val(offset++); + memcpy(write_base + (dest_qw * 16), qw, 16); + } + } else { + // note: formulas below assume this! + assert(cl == 4); + assert(wl == 4); + assert(code.num); + for (int i = 0; i < code.num; i++) { + // write every other qw + int dest_qw = unpack.addr_qw + i; + assert(dest_qw <= 328); + s32 qw[4]; + qw[0] = dma.read_val(offset++); + qw[1] = dma.read_val(offset++); + qw[2] = dma.read_val(offset++); + qw[3] = dma.read_val(offset++); + memcpy(write_base + (dest_qw * 16), qw, 16); + } + } + + return offset; +} + +int TFragment::handle_unpack_v4_8_mode1(const VifCode& code, + const DmaTransfer& dma, + int offset, + int cl, + int wl, + const u32 row[4]) { + VifCodeUnpack unpack(code); + assert(unpack.use_tops_flag); + + // CL x (num/WL)+(num%WL) + + u8* write_base = get_upload_buffer(); + + if (unpack.is_unsigned) { + // note: formulas below assume this! + assert(cl == 4); + assert(wl == 4); + assert(code.num); + for (int i = 0; i < code.num; i++) { + // write every other qw + int dest_qw = unpack.addr_qw + i; + assert(dest_qw <= 328); + u32 qw[4]; + qw[0] = row[0] + dma.read_val(offset++); + qw[1] = row[1] + dma.read_val(offset++); + qw[2] = row[2] + dma.read_val(offset++); + qw[3] = row[3] + dma.read_val(offset++); + memcpy(write_base + (dest_qw * 16), qw, 16); + } + } else { + // note: formulas below assume this! + assert(cl == 4); + assert(wl == 4); + assert(code.num); + for (int i = 0; i < code.num; i++) { + // write every other qw + int dest_qw = unpack.addr_qw + i; + assert(dest_qw <= 328); + s32 qw[4]; + qw[0] = row[0] + dma.read_val(offset++); + qw[1] = row[1] + dma.read_val(offset++); + qw[2] = row[2] + dma.read_val(offset++); + qw[3] = row[3] + dma.read_val(offset++); + memcpy(write_base + (dest_qw * 16), qw, 16); + } + } + + return offset; +} + +int TFragment::handle_unpack_v4_16_mode0(const VifCode& code, + const DmaTransfer& dma, + int offset, + int cl, + int wl) { + VifCodeUnpack unpack(code); + assert(unpack.use_tops_flag); + assert(unpack.is_unsigned); + + // note: formulas below assume this! + assert(cl == 4); + assert(wl == 4); + + u8* write_base = get_upload_buffer(); + assert(code.num); + for (int i = 0; i < code.num; i++) { + // write every other qw + int dest_qw = unpack.addr_qw + i; + assert(dest_qw <= 328); + u32 qw[4]; + qw[0] = dma.read_val(offset); + offset += 2; + qw[1] = dma.read_val(offset); + offset += 2; + qw[2] = dma.read_val(offset); + offset += 2; + qw[3] = dma.read_val(offset); + offset += 2; + memcpy(write_base + (dest_qw * 16), qw, 16); + } + return offset; +} + +int TFragment::handle_unpack_v4_16_mode1(const VifCode& code, + const DmaTransfer& dma, + int offset, + int cl, + int wl, + const u32 row[4]) { + VifCodeUnpack unpack(code); + assert(unpack.use_tops_flag); + assert(unpack.is_unsigned); + + // note: formulas below assume this! + assert(cl == 4); + assert(wl == 4); + + assert(code.num); + u8* write_base = get_upload_buffer(); + for (int i = 0; i < code.num; i++) { + // write every other qw + int dest_qw = unpack.addr_qw + i; + assert(dest_qw <= 328); + u32 qw[4]; + qw[0] = row[0] + (u32)dma.read_val(offset); + offset += 2; + qw[1] = row[1] + (u32)dma.read_val(offset); + offset += 2; + qw[2] = row[2] + (u32)dma.read_val(offset); + offset += 2; + qw[3] = row[3] + (u32)dma.read_val(offset); + offset += 2; + + // fmt::print(" unpack rgba?: {:x} {:x} {:x} {:x}\n", qw[0], qw[1], qw[2], qw[3]); + memcpy(write_base + (dest_qw * 16), qw, 16); + } + return offset; +} + +int TFragment::handle_unpack_v3_32(const VifCode& code, + const DmaTransfer& dma, + int offset, + int cl, + int wl) { + VifCodeUnpack unpack(code); + assert(unpack.use_tops_flag); + assert(!unpack.is_unsigned); + + // note: formulas below assume this! + assert(cl == 2); + assert(wl == 1); + + assert(code.num); + u8* write_base = get_upload_buffer(); + for (int i = 0; i < code.num; i++) { + // write every other qw + int dest_qw = unpack.addr_qw + i * 2; + assert(dest_qw <= 328); + u32 qw[4]; + qw[0] = dma.read_val(offset); + offset += 4; + qw[1] = dma.read_val(offset); + offset += 4; + qw[2] = dma.read_val(offset); + offset += 4; + qw[3] = 0x80; // this can be anything... but it seems like it tries to load from it sometimes? + memcpy(write_base + (dest_qw * 16), qw, 16); + } + return offset; +} + +int TFragment::handle_unpack_v4_32(const VifCode& code, + const DmaTransfer& dma, + int offset, + int cl, + int wl) { + VifCodeUnpack unpack(code); + assert(unpack.use_tops_flag); + assert(!unpack.is_unsigned); + + // note: formulas below assume this! + assert(cl == 4); + assert(wl == 4); + u8* write_base = get_upload_buffer(); + assert(code.num); + for (int i = 0; i < code.num; i++) { + // write every other qw + int dest_qw = unpack.addr_qw + i; + assert(dest_qw <= 328); + u32 qw[4]; + qw[0] = dma.read_val(offset); + offset += 4; + qw[1] = dma.read_val(offset); + offset += 4; + qw[2] = dma.read_val(offset); + offset += 4; + qw[3] = dma.read_val(offset); + offset += 4; + memcpy(write_base + (dest_qw * 16), qw, 16); + } + return offset; + + // u8* write_base = get_upload_buffer(); + // assert(code.num + unpack.addr_qw <= 328); + // memcpy(write_base + (unpack.addr_qw * 16), dma.data + offset, code.num * 16); + // return offset + code.num * 16; +} diff --git a/game/graphics/pipelines/opengl.cpp b/game/graphics/pipelines/opengl.cpp index bfe2e96d6b..3deea83051 100644 --- a/game/graphics/pipelines/opengl.cpp +++ b/game/graphics/pipelines/opengl.cpp @@ -227,10 +227,6 @@ void render_game_frame(int width, int height) { // render that chain. if (got_chain) { - // g_gfx_data->ogl_renderer.render(DmaFollower(g_gfx_data->dma_copier.get_last_input_data(), - // g_gfx_data->dma_copier.get_last_input_offset()), - // width, height); - // we want to serialize before rendering if (g_gfx_data->debug_gui.want_save()) { make_gfx_dump(); @@ -250,6 +246,9 @@ void render_game_frame(int width, int height) { options.screenshot_path = make_output_file_name(g_gfx_data->debug_gui.screenshot_name()); } g_gfx_data->ogl_renderer.render(DmaFollower(chain.data.data(), chain.start_offset), options); + // g_gfx_data->ogl_renderer.render(DmaFollower(g_gfx_data->dma_copier.get_last_input_data(), + // g_gfx_data->dma_copier.get_last_input_offset()), + // options); } // before vsync, mark the chain as rendered. diff --git a/game/graphics/texture/TexturePool.cpp b/game/graphics/texture/TexturePool.cpp index 6accaca91d..6d5e6ddf26 100644 --- a/game/graphics/texture/TexturePool.cpp +++ b/game/graphics/texture/TexturePool.cpp @@ -184,6 +184,14 @@ std::vector> TexturePool::convert_textures(const u32 size = ((sizes[0] + sizes[1] + 2047) / 256) * 256; m_tex_converter.upload(memory_base + texture_page.segment[0].block_data_ptr, texture_page.segment[0].dest, size); + } else if (mode == 0) { + has_segment[1] = false; + has_segment[2] = false; + u32 size = ((sizes[0] + 255) / 256) * 256; + + // dest is in 4-byte vram words + m_tex_converter.upload(memory_base + texture_page.segment[0].block_data_ptr, + texture_page.segment[0].dest, size); } else { // no reason to skip this, other than lg::error("TexturePool skipping upload now with mode {}.", mode); diff --git a/game/mips2c/functions/sky_tng.cpp b/game/mips2c/functions/sky_tng.cpp index 51556660f7..8fe39cc8fc 100644 --- a/game/mips2c/functions/sky_tng.cpp +++ b/game/mips2c/functions/sky_tng.cpp @@ -5,11 +5,7 @@ namespace Mips2C { ExecutionContext sky_regs_vfs; -void get_fake_spad_addr(int dst, void* sym_addr, u32 offset, ExecutionContext* c) { - u32 val; - memcpy(&val, sym_addr, 4); - c->gprs[dst].du64[0] = val + offset; -} + } // namespace Mips2C // clang-format off diff --git a/game/mips2c/functions/tfrag.cpp b/game/mips2c/functions/tfrag.cpp new file mode 100644 index 0000000000..fc0c0dc507 --- /dev/null +++ b/game/mips2c/functions/tfrag.cpp @@ -0,0 +1,911 @@ + +//--------------------------MIPS2C--------------------- +#include "game/mips2c/mips2c_private.h" +#include "game/kernel/kscheme.h" +namespace Mips2C { +namespace draw_inline_array_tfrag { +struct Cache { + void* tfrag_work; // *tfrag-work* + void* fake_scratchpad_data; // *fake-scratchpad-data* + void* transform_regs; +} cache; + +// t0 = tfrag work +// t8 = tfrags +// t9 = tfrag count +// clang-format off +u64 execute(void* ctxt) { + auto* c = (ExecutionContext*)ctxt; + load_vfs_from_tf_regs(cache.transform_regs, c); + bool bc = false; + c->daddiu(sp, sp, -128); // daddiu sp, sp, -128 + c->sd(ra, 0, sp); // sd ra, 0(sp) + c->sq(s0, 16, sp); // sq s0, 16(sp) + c->sq(s1, 32, sp); // sq s1, 32(sp) + c->sq(s2, 48, sp); // sq s2, 48(sp) + c->sq(s3, 64, sp); // sq s3, 64(sp) + c->sq(s4, 80, sp); // sq s4, 80(sp) + c->sq(s5, 96, sp); // sq s5, 96(sp) + c->sq(gp, 112, sp); // sq gp, 112(sp) + c->lui(t2, 5120); // lui t2, 5120 = (0x14000000) + c->lw(v1, 4, a3); // lw v1, 4(a3) + c->lui(t3, 4096); // lui t3, 4096 = (0x10000000) + c->lui(t1, 4096); // lui t1, 4096 = (0x10000000) + // Unknown instr: sync.l + // Unknown instr: cache dxwbin v1, 0 + // Unknown instr: sync.l + // Unknown instr: cache dxwbin v1, 1 + // Unknown instr: sync.l + c->load_symbol(t0, cache.tfrag_work); // lw t0, *tfrag-work*(s7) + c->ori(t4, t3, 54272); // ori t4, t3, 54272 = (0x1000D400) SPR TO + c->ori(t1, t1, 53248); // ori t1, t1, 53248 = (0x1000D000) SPR FROM + + // patched access to scratchpad + // c->lui(t5, 28672); // lui t5, 28672 = (0x70000000) + get_fake_spad_addr(t5, cache.fake_scratchpad_data, 0, c); + + c->lqc2(vf3, 80, t0); // lqc2 vf3, 80(t0) + c->sw(a3, 176, t0); // sw a3, 176(t0) + c->ori(a3, t5, 2064); // ori a3, t5, 2064 + c->addiu(t3, r0, 0); // addiu t3, r0, 0 + c->ori(t5, t5, 1040); // ori t5, t5, 1040 + c->vmax_bc(DEST::xyzw, BC::w, vf1, vf0, vf0); // vmaxw.xyzw vf1, vf0, vf0 + c->lh(t7, 0, a0); // lh t7, 0(a0) + c->lqc2(vf4, 96, t0); // lqc2 vf4, 96(t0) + c->addiu(a1, a1, -4); // addiu a1, a1, -4 + c->addiu(t6, r0, 0); // addiu t6, r0, 0 + c->mov64(ra, a3); // or ra, a3, r0 + + block_1: + bc = c->sgpr64(t7) != 0; // bne t7, r0, L42 + // nop // sll r0, r0, 0 + if (bc) {goto block_4;} // branch non-likely + + c->addiu(a0, a0, 2); // addiu a0, a0, 2 + c->addiu(a1, a1, 1024); // addiu a1, a1, 1024 + c->daddiu(a2, a2, -16); // daddiu a2, a2, -16 + c->lh(t7, 0, a0); // lh t7, 0(a0) + bc = ((s64)c->sgpr64(a2)) <= 0; // blez a2, L69 + // nop // sll r0, r0, 0 + if (bc) {goto block_55;} // branch non-likely + + //beq r0, r0, L41 // beq r0, r0, L41 + // nop // sll r0, r0, 0 + goto block_1; // branch always + + + block_4: + + // this block is just waiting for any in-progress SPR TO's to end + // we can just skip it. + /* + c->lw(t7, 0, t4); // lw t7, 0(t4) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->andi(t7, t7, 256); // andi t7, t7, 256 + // nop // sll r0, r0, 0 + bc = c->sgpr64(t7) != 0; // bne t7, r0, L42 + // nop // sll r0, r0, 0 + if (bc) {goto block_4;} // branch non-likely + */ + + // this is setting up a scratchpad TO transfer. + { + // set MADR + //c->sw(a1, 16, t4); // sw a1, 16(t4) + u32 madr = c->sgpr64(a1); + + c->xori(t7, t5, 1024); // xori t7, t5, 1024 + + // set SADR + //c->sw(t7, 128, t4); // sw t7, 128(t4) + u32 sadr = c->sgpr64(t7); + + c->addiu(t7, r0, 64); // addiu t7, r0, 64 + + // set QWC + //c->sw(t7, 32, t4); // sw t7, 32(t4) + u32 qwc = c->sgpr64(t7); + + c->addiu(t7, r0, 256); // addiu t7, r0, 256 + + // GO! + //c->sw(t7, 0, t4); // sw t7, 0(t4) + spad_to_dma(cache.fake_scratchpad_data, madr, sadr, qwc); + // nop // sll r0, r0, 0 + } + + + block_6: + c->mov64(gp, a0); // or gp, a0, r0 + // fprintf(stderr, "block_6: gp = 0x%lx\n", c->sgpr64(gp)); + c->xori(t5, t5, 1024); // xori t5, t5, 1024 + c->daddiu(a0, a0, 2); // daddiu a0, a0, 2 + c->mov64(t9, a0); // or t9, a0, r0 + c->mov64(t8, t5); // or t8, t5, r0 + c->daddiu(t7, a2, -16); // daddiu t7, a2, -16 + bc = ((s64)c->sgpr64(t7)) > 0; // bgtz t7, L45 + c->lh(t7, 0, a0); // lh t7, 0(a0) + if (bc) {goto block_10;} // branch non-likely + + //beq r0, r0, L48 // beq r0, r0, L48 + // nop // sll r0, r0, 0 + goto block_14; // branch always + + + block_8: + c->daddiu(a2, a2, -16); // daddiu a2, a2, -16 + c->addiu(a0, a0, 2); // addiu a0, a0, 2 + bc = ((s64)c->sgpr64(a2)) <= 0; // blez a2, L48 + c->lh(t7, 0, a0); // lh t7, 0(a0) + if (bc) {goto block_14;} // branch non-likely + + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + + block_10: + bc = c->sgpr64(t7) == 0; // beq t7, r0, L44 + c->addiu(a1, a1, 1024); // addiu a1, a1, 1024 + if (bc) {goto block_8;} // branch non-likely + + + // this is waiting on spad transfer and incrementing wait counts + // block_11: + /* + c->lw(t7, 0, t4); // lw t7, 0(t4) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->andi(t7, t7, 256); // andi t7, t7, 256 + // nop // sll r0, r0, 0 + bc = c->sgpr64(t7) == 0; // beq t7, r0, L47 + // nop // sll r0, r0, 0 + if (bc) {goto block_13;} // branch non-likely + + // nop // sll r0, r0, 0 + c->lw(t7, 188, t0); // lw t7, 188(t0) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->daddiu(t7, t7, 1); // daddiu t7, t7, 1 + // nop // sll r0, r0, 0 + c->sw(t7, 188, t0); // sw t7, 188(t0) + //beq r0, r0, L46 // beq r0, r0, L46 + // nop // sll r0, r0, 0 + goto block_11; // branch always + */ + + + // tfrag bank loop? + // block_13: + { + //c->sw(a1, 16, t4); // sw a1, 16(t4) + u32 madr = c->sgpr64(a1); + c->xori(t7, t5, 1024); // xori t7, t5, 1024 + //c->sw(t7, 128, t4); // sw t7, 128(t4) + u32 sadr = c->sgpr64(t7); + c->addiu(t7, r0, 64); // addiu t7, r0, 64 + //c->sw(t7, 32, t4); // sw t7, 32(t4) + u32 qwc = c->sgpr64(t7); + c->addiu(t7, r0, 256); // addiu t7, r0, 256 + //beq r0, r0, L49 // beq r0, r0, L49 + //c->sw(t7, 0, t4); // sw t7, 0(t4) + spad_to_dma(cache.fake_scratchpad_data, madr, sadr, qwc); + } + goto block_16; // branch always + + + block_14: + /* + c->lw(t7, 0, t4); // lw t7, 0(t4) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->andi(t7, t7, 256); // andi t7, t7, 256 + // nop // sll r0, r0, 0 + bc = c->sgpr64(t7) == 0; // beq t7, r0, L49 + // nop // sll r0, r0, 0 + if (bc) {goto block_16;} // branch non-likely + + // nop // sll r0, r0, 0 + c->lw(t7, 188, t0); // lw t7, 188(t0) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->daddiu(t7, t7, 1); // daddiu t7, t7, 1 + // nop // sll r0, r0, 0 + c->sw(t7, 188, t0); // sw t7, 188(t0) + //beq r0, r0, L48 // beq r0, r0, L48 + // nop // sll r0, r0, 0 + goto block_14; // branch always + */ + + + block_16: + c->lb(t7, 0, gp); // lb t7, 0(gp) + c->addiu(gp, gp, 1); // addiu gp, gp, 1 + // nop // sll r0, r0, 0 + c->sw(gp, 160, t0); // sw gp, 160(t0) + bc = c->sgpr64(t7) != 0; // bne t7, r0, L50 + c->sw(t9, 164, t0); // sw t9, 164(t0) + if (bc) {goto block_18;} // branch non-likely + + c->daddiu(a2, a2, -8); // daddiu a2, a2, -8 + c->addiu(t8, t8, 512); // addiu t8, t8, 512 + //beq r0, r0, L65 // beq r0, r0, L65 + // nop // sll r0, r0, 0 + goto block_47; // branch always + + + block_18: + c->addiu(t9, r0, 128); // addiu t9, r0, 128 + c->lqc2(vf2, 16, t8); // lqc2 vf2, 16(t8) + + block_19: + c->daddiu(gp, t6, -124); // daddiu gp, t6, -124 + // fprintf(stderr, "block_19: gp = 0x%lx\n", c->sgpr64(gp)); + // nop // sll r0, r0, 0 + bc = ((s64)c->sgpr64(gp)) <= 0; // blez gp, L54 + // nop // sll r0, r0, 0 + if (bc) {goto block_23;} // branch non-likely + + + // block_20: + /* + c->lw(ra, 0, t1); // lw ra, 0(t1) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->andi(ra, ra, 256); // andi ra, ra, 256 + // nop // sll r0, r0, 0 + bc = c->sgpr64(ra) == 0; // beq ra, r0, L53 + // nop // sll r0, r0, 0 + if (bc) {goto block_22;} // branch non-likely + + // nop // sll r0, r0, 0 + c->lw(ra, 184, t0); // lw ra, 184(t0) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->daddiu(ra, ra, 1); // daddiu ra, ra, 1 + // nop // sll r0, r0, 0 + c->sw(ra, 184, t0); // sw ra, 184(t0) + //beq r0, r0, L52 // beq r0, r0, L52 + // nop // sll r0, r0, 0 + goto block_20; // branch always + */ + + + // block_22: + { + //c->sw(a3, 128, t1); // sw a3, 128(t1) + u32 sadr = c->sgpr64(a3); + c->xori(a3, a3, 6144); // xori a3, a3, 6144 + //c->sw(v1, 16, t1); // sw v1, 16(t1) + u32 madr = c->sgpr64(v1); + c->sll(ra, t6, 4); // sll ra, t6, 4 + c->addu(v1, v1, ra); // addu v1, v1, ra + c->mov64(ra, a3); // or ra, a3, r0 + //c->sw(t6, 32, t1); // sw t6, 32(t1) + u32 qwc = c->sgpr64(t6); + c->addiu(t6, r0, 256); // addiu t6, r0, 256 + //c->sw(t6, 0, t1); // sw t6, 0(t1) + spad_from_dma(cache.fake_scratchpad_data, madr, sadr, qwc); + c->addiu(t6, r0, 0); // addiu t6, r0, 0 + } + + block_23: + c->and_(gp, t7, t9); // and gp, t7, t9 + c->vmula_bc(DEST::xyzw, BC::x, vf16, vf2); // vmulax.xyzw acc, vf16, vf2 + bc = c->sgpr64(gp) == 0; // beq gp, r0, L64 + c->lwu(gp, 36, t8); // lwu gp, 36(t8) + if (bc) {goto block_46;} // branch non-likely + + c->vmadda_bc(DEST::xyzw, BC::y, vf17, vf2); // vmadday.xyzw acc, vf17, vf2 + c->lbu(s5, 45, t8); // lbu s5, 45(t8) + c->vmadda_bc(DEST::xyzw, BC::z, vf18, vf2); // vmaddaz.xyzw acc, vf18, vf2 + c->sw(gp, 4, t0); // sw gp, 4(t0) + c->vmsuba_bc(DEST::xyzw, BC::w, vf19, vf0); // vmsubaw.xyzw acc, vf19, vf0 + c->sh(s5, 0, t0); // sh s5, 0(t0) + c->vmadd_bc(DEST::xyzw, BC::w, vf5, vf1, vf2); // vmaddw.xyzw vf5, vf1, vf2 + c->lwu(gp, 32, t8); // lwu gp, 32(t8) + // fprintf(stderr, "block_23-0 gp = 0x%lx\n", c->sgpr64(gp)); + c->vmula_bc(DEST::xyzw, BC::w, vf27, vf0); // vmulaw.xyzw acc, vf27, vf0 + c->lbu(s5, 47, t8); // lbu s5, 47(t8) + c->vmadda_bc(DEST::xyzw, BC::x, vf24, vf2); // vmaddax.xyzw acc, vf24, vf2 + c->sw(gp, 20, t0); // sw gp, 20(t0) + c->vmadda_bc(DEST::xyzw, BC::y, vf25, vf2); // vmadday.xyzw acc, vf25, vf2 + c->sh(s5, 16, t0); // sh s5, 16(t0) + c->vmadda_bc(DEST::xyzw, BC::z, vf26, vf2); // vmaddaz.xyzw acc, vf26, vf2 + c->lwu(gp, 32, t8); // lwu gp, 32(t8) + c->mov128_gpr_vf(s5, vf5); // qmfc2.i s5, vf5 + c->lbu(s4, 44, t8); // lbu s4, 44(t8) + c->vmadd_bc(DEST::xyzw, BC::w, vf6, vf1, vf2); // vmaddw.xyzw vf6, vf1, vf2 + c->sw(gp, 36, t0); // sw gp, 36(t0) + c->vmsub_bc(DEST::xyzw, BC::w, vf8, vf1, vf2); // vmsubw.xyzw vf8, vf1, vf2 + c->sh(s4, 32, t0); // sh s4, 32(t0) + c->pcgtw(s5, r0, s5); // pcgtw s5, r0, s5 + c->lwu(gp, 40, t8); // lwu gp, 40(t8) + // fprintf(stderr, "block_23-1: gp = 0x%lx\n", c->sgpr64(gp)); + c->ppach(s5, r0, s5); // ppach s5, r0, s5 + c->lbu(s4, 46, t8); // lbu s4, 46(t8) + c->vadd_bc(DEST::xyzw, BC::z, vf6, vf3, vf6); // vaddz.xyzw vf6, vf3, vf6 + c->sw(gp, 52, t0); // sw gp, 52(t0) + c->vadd_bc(DEST::xyzw, BC::z, vf7, vf3, vf8); // vaddz.xyzw vf7, vf3, vf8 + c->sw(t3, 12, t0); // sw t3, 12(t0) + bc = c->sgpr64(s5) != 0; // bne s5, r0, L63 + c->sh(s4, 48, t0); // sh s4, 48(t0) + if (bc) {goto block_45;} // branch non-likely + + c->vmini(DEST::xyzw, vf4, vf4, vf8); // vmini.xyzw vf4, vf4, vf8 + c->sw(t3, 28, t0); // sw t3, 28(t0) + // nop // sll r0, r0, 0 + c->lbu(s5, 53, t8); // lbu s5, 53(t8) + c->mov128_gpr_vf(gp, vf6); // qmfc2.i gp, vf6 + c->sw(t3, 44, t0); // sw t3, 44(t0) + c->mov128_gpr_vf(s3, vf7); // qmfc2.i s3, vf7 + c->lbu(s4, 56, t8); // lbu s4, 56(t8) + c->pcgtw(s2, r0, gp); // pcgtw s2, r0, gp + c->lw(gp, 12, t8); // lw gp, 12(t8) + // fprintf(stderr, "loaded gp: 0x%lx from tfragment: 0x%lx\n", c->sgpr64(gp), c->sgpr64(t8)); + c->pcgtw(s3, r0, s3); // pcgtw s3, r0, s3 + c->sb(s4, 76, t0); // sb s4, 76(t0) + c->pinteh(s4, s2, s3); // pinteh s4, s2, s3 + c->lbu(s2, 54, t8); // lbu s2, 54(t8) + c->ppacb(s3, r0, s4); // ppacb s3, r0, s4 + c->lbu(s1, 55, t8); // lbu s1, 55(t8) + bc = c->sgpr64(s3) == 0; // beq s3, r0, L56 + c->dsrl32(s4, s3, 8); // dsrl32 s4, s3, 8 + if (bc) {goto block_36;} // branch non-likely + + bc = c->sgpr64(s2) == 0; // beq s2, r0, L56 + // nop // sll r0, r0, 0 + if (bc) {goto block_36;} // branch non-likely + + bc = c->sgpr64(s1) == 0; // beq s1, r0, L55 + c->dsrl(s5, s3, 16); // dsrl s5, s3, 16 + if (bc) {goto block_33;} // branch non-likely + + bc = c->sgpr64(s5) == 0; // beq s5, r0, L55 + c->dsrl32(s5, s3, 24); // dsrl32 s5, s3, 24 + if (bc) {goto block_33;} // branch non-likely + + bc = c->sgpr64(s5) != 0; // bne s5, r0, L64 + c->addiu(s5, s1, 3); // addiu s5, s1, 3 + if (bc) {goto block_46;} // branch non-likely + + c->sra(s4, s5, 2); // sra s4, s5, 2 + c->mov64(s5, s1); // or s5, s1, r0 + c->sll(t3, s4, 2); // sll t3, s4, 2 + c->sh(s4, 64, t0); // sh s4, 64(t0) + // nop // sll r0, r0, 0 + c->sb(t3, 78, t0); // sb t3, 78(t0) + c->daddiu(t6, t6, 3); // daddiu t6, t6, 3 + c->lq(s2, 32, t0); // lq s2, 32(t0) + // nop // sll r0, r0, 0 + c->lq(s1, 48, t0); // lq s1, 48(t0) + // nop // sll r0, r0, 0 + c->lq(t3, 64, t0); // lq t3, 64(t0) + c->sq(s2, 0, ra); // sq s2, 0(ra) + // nop // sll r0, r0, 0 + c->sq(s1, 16, ra); // sq s1, 16(ra) + c->dsrl32(s2, s3, 16); // dsrl32 s2, s3, 16 + c->sq(t3, 32, ra); // sq t3, 32(ra) + c->daddiu(ra, ra, 48); // daddiu ra, ra, 48 + bc = c->sgpr64(s2) != 0; // bne s2, r0, L57 + c->ori(t3, t2, 18); // ori t3, t2, 18 + if (bc) {goto block_38;} // branch non-likely + + c->dsrl32(t3, s3, 8); // dsrl32 t3, s3, 8 + // nop // sll r0, r0, 0 + bc = c->sgpr64(t3) != 0; // bne t3, r0, L57 + c->ori(t3, t2, 16); // ori t3, t2, 16 + if (bc) {goto block_38;} // branch non-likely + + //beq r0, r0, L57 // beq r0, r0, L57 + c->ori(t3, t2, 14); // ori t3, t2, 14 + goto block_38; // branch always + + + block_33: + bc = c->sgpr64(s4) != 0; // bne s4, r0, L64 + c->addiu(s5, s2, 3); // addiu s5, s2, 3 + if (bc) {goto block_46;} // branch non-likely + + c->sra(s4, s5, 2); // sra s4, s5, 2 + c->mov64(s5, s2); // or s5, s2, r0 + c->sll(t3, s4, 2); // sll t3, s4, 2 + c->sh(s4, 64, t0); // sh s4, 64(t0) + // nop // sll r0, r0, 0 + c->sb(t3, 78, t0); // sb t3, 78(t0) + c->daddiu(t6, t6, 2); // daddiu t6, t6, 2 + c->lq(s2, 16, t0); // lq s2, 16(t0) + // nop // sll r0, r0, 0 + c->lq(t3, 64, t0); // lq t3, 64(t0) + c->sq(s2, 0, ra); // sq s2, 0(ra) + c->dsrl(s3, s3, 8); // dsrl s3, s3, 8 + c->sq(t3, 16, ra); // sq t3, 16(ra) + c->daddiu(ra, ra, 32); // daddiu ra, ra, 32 + bc = c->sgpr64(s3) != 0; // bne s3, r0, L57 + c->ori(t3, t2, 10); // ori t3, t2, 10 + if (bc) {goto block_38;} // branch non-likely + + //beq r0, r0, L57 // beq r0, r0, L57 + c->ori(t3, t2, 8); // ori t3, t2, 8 + goto block_38; // branch always + + + block_36: + bc = c->sgpr64(s4) != 0; // bne s4, r0, L64 + c->addiu(s4, s5, 3); // addiu s4, s5, 3 + if (bc) {goto block_46;} // branch non-likely + + c->sra(s4, s4, 2); // sra s4, s4, 2 + // nop // sll r0, r0, 0 + c->sll(t3, s4, 2); // sll t3, s4, 2 + c->sh(s4, 64, t0); // sh s4, 64(t0) + // nop // sll r0, r0, 0 + c->sb(t3, 78, t0); // sb t3, 78(t0) + c->ori(t3, t2, 6); // ori t3, t2, 6 + c->lq(s3, 0, t0); // lq s3, 0(t0) + c->daddiu(t6, t6, 2); // daddiu t6, t6, 2 + c->lq(s2, 64, t0); // lq s2, 64(t0) + c->sq(s3, 0, ra); // sq s3, 0(ra) + // nop // sll r0, r0, 0 + c->sq(s2, 16, ra); // sq s2, 16(ra) + c->daddiu(ra, ra, 32); // daddiu ra, ra, 32 + + block_38: + c->addiu(s3, r0, 127); // addiu s3, r0, 127 + c->daddu(s2, t6, s4); // daddu s2, t6, s4 + c->dsubu(s3, s3, s2); // dsubu s3, s3, s2 + // nop // sll r0, r0, 0 + bc = ((s64)c->sgpr64(s3)) >= 0; // bgez s3, L60 + // nop // sll r0, r0, 0 + if (bc) {goto block_42;} // branch non-likely + + + // block_39: + /* + c->lw(ra, 0, t1); // lw ra, 0(t1) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->andi(ra, ra, 256); // andi ra, ra, 256 + // nop // sll r0, r0, 0 + bc = c->sgpr64(ra) == 0; // beq ra, r0, L59 + // nop // sll r0, r0, 0 + if (bc) {goto block_41;} // branch non-likely + + // nop // sll r0, r0, 0 + c->lw(ra, 184, t0); // lw ra, 184(t0) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->daddiu(ra, ra, 1); // daddiu ra, ra, 1 + // nop // sll r0, r0, 0 + c->sw(ra, 184, t0); // sw ra, 184(t0) + //beq r0, r0, L58 // beq r0, r0, L58 + // nop // sll r0, r0, 0 + goto block_39; // branch always + */ + + + // block_41: + { + //c->sw(a3, 128, t1); // sw a3, 128(t1) + u32 sadr = c->sgpr64(a3); + c->xori(a3, a3, 6144); // xori a3, a3, 6144 + //c->sw(v1, 16, t1); // sw v1, 16(t1) + u32 madr = c->sgpr64(v1); + c->sll(ra, t6, 4); // sll ra, t6, 4 + c->addu(v1, v1, ra); // addu v1, v1, ra + c->mov64(ra, a3); // or ra, a3, r0 + //c->sw(t6, 32, t1); // sw t6, 32(t1) + u32 qwc = c->sgpr64(t6); + c->addiu(t6, r0, 256); // addiu t6, r0, 256 + //c->sw(t6, 0, t1); // sw t6, 0(t1) + spad_from_dma(cache.fake_scratchpad_data, madr, sadr, qwc); + c->addiu(t6, r0, 0); // addiu t6, r0, 0 + } + + block_42: + c->daddu(t6, t6, s4); // daddu t6, t6, s4 + c->sw(t8, 168, t0); // sw t8, 168(t0) + c->ld(s4, 0, gp); // ld s4, 0(gp) + c->daddiu(t8, gp, 8); // daddiu t8, gp, 8 + c->daddiu(gp, s5, -4); // daddiu gp, s5, -4 + // fprintf(stderr, "block_42: gp = 0x%lx\n", c->sgpr64(gp)); + c->lq(s5, 128, t0); // lq s5, 128(t0) + c->pextlh(s4, r0, s4); // pextlh s4, r0, s4 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->paddw(s2, s4, s5); // paddw s2, s4, s5 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->lw(s4, 0, s2); // lw s4, 0(s2) + c->dsra32(s3, s2, 0); // dsra32 s3, s2, 0 + c->lw(s3, 0, s3); // lw s3, 0(s3) + c->pcpyud(s1, s2, s2); // pcpyud s1, s2, s2 + c->lw(s2, 0, s1); // lw s2, 0(s1) + c->dsra32(s1, s1, 0); // dsra32 s1, s1, 0 + bc = ((s64)c->sgpr64(gp)) <= 0; // blez gp, L62 + c->lw(s1, 0, s1); // lw s1, 0(s1) + if (bc) {goto block_44;} // branch non-likely + + + block_43: + c->ld(s0, 0, t8); // ld s0, 0(t8) + c->daddiu(ra, ra, 16); // daddiu ra, ra, 16 + c->daddiu(t8, t8, 8); // daddiu t8, t8, 8 + c->sw(s4, -16, ra); // sw s4, -16(ra) + c->daddiu(gp, gp, -4); // daddiu gp, gp, -4 + c->sw(s3, -12, ra); // sw s3, -12(ra) + c->pextlh(s4, r0, s0); // pextlh s4, r0, s0 + c->sw(s2, -8, ra); // sw s2, -8(ra) + c->paddw(s2, s4, s5); // paddw s2, s4, s5 + // this one is storing a 0! + c->sw(s1, -4, ra); // sw s1, -4(ra) + c->lw(s4, 0, s2); // lw s4, 0(s2) + c->dsra32(s3, s2, 0); // dsra32 s3, s2, 0 + c->lw(s3, 0, s3); // lw s3, 0(s3) + c->pcpyud(s1, s2, s2); // pcpyud s1, s2, s2 + c->lw(s2, 0, s1); // lw s2, 0(s1) + c->dsra32(s1, s1, 0); // dsra32 s1, s1, 0 + bc = ((s64)c->sgpr64(gp)) > 0; // bgtz gp, L61 + c->lw(s1, 0, s1); // lw s1, 0(s1) + if (bc) {goto block_43;} // branch non-likely + + + block_44: + c->daddiu(ra, ra, 16); // daddiu ra, ra, 16 + c->lw(t8, 168, t0); // lw t8, 168(t0) + // nop // sll r0, r0, 0 + c->sw(s4, -16, ra); // sw s4, -16(ra) + // nop // sll r0, r0, 0 + c->sw(s3, -12, ra); // sw s3, -12(ra) + // nop // sll r0, r0, 0 + c->sw(s2, -8, ra); // sw s2, -8(ra) + // nop // sll r0, r0, 0 + c->sw(s1, -4, ra); // sw s1, -4(ra) + + block_45: + c->xor_(t7, t7, t9); // xor t7, t7, t9 + // nop // sll r0, r0, 0 + + block_46: + c->daddiu(t8, t8, 64); // daddiu t8, t8, 64 + c->srl(t9, t9, 1); // srl t9, t9, 1 + c->addiu(a2, a2, -1); // addiu a2, a2, -1 + // nop // sll r0, r0, 0 + bc = c->sgpr64(t9) != 0; // bne t9, r0, L51 + c->lqc2(vf2, 16, t8); // lqc2 vf2, 16(t8) + if (bc) {goto block_19;} // branch non-likely + + + block_47: + // nop // sll r0, r0, 0 + c->lw(gp, 160, t0); // lw gp, 160(t0) + // fprintf(stderr, "block_47: gp = 0x%lx\n", c->sgpr64(gp)); + // nop // sll r0, r0, 0 + c->lw(t9, 164, t0); // lw t9, 164(t0) + bc = c->sgpr64(gp) != c->sgpr64(t9); // bne gp, t9, L49 + c->sb(t7, -1, gp); // sb t7, -1(gp) + if (bc) {goto block_16;} // branch non-likely + + bc = ((s64)c->sgpr64(a2)) > 0; // bgtz a2, L43 + // nop // sll r0, r0, 0 + if (bc) {goto block_6;} // branch non-likely + + bc = c->sgpr64(t6) == 0; // beq t6, r0, L68 + // nop // sll r0, r0, 0 + if (bc) {goto block_53;} // branch non-likely + + + // block_50: + /* + c->lw(a0, 0, t1); // lw a0, 0(t1) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->andi(a0, a0, 256); // andi a0, a0, 256 + // nop // sll r0, r0, 0 + bc = c->sgpr64(a0) == 0; // beq a0, r0, L67 + // nop // sll r0, r0, 0 + if (bc) {goto block_52;} // branch non-likely + + // nop // sll r0, r0, 0 + c->lw(a0, 184, t0); // lw a0, 184(t0) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->daddiu(a0, a0, 1); // daddiu a0, a0, 1 + // nop // sll r0, r0, 0 + c->sw(a0, 184, t0); // sw a0, 184(t0) + //beq r0, r0, L66 // beq r0, r0, L66 + // nop // sll r0, r0, 0 + goto block_50; // branch always + */ + + + // block_52: + { + //c->sw(a3, 128, t1); // sw a3, 128(t1) + u32 sadr = c->sgpr64(a3); + c->xori(a0, a3, 6144); // xori a0, a3, 6144 + //c->sw(v1, 16, t1); // sw v1, 16(t1) + u32 madr = c->sgpr64(v1); + c->sll(a1, t6, 4); // sll a1, t6, 4 + c->addu(v1, v1, a1); // addu v1, v1, a1 + c->mov64(a0, a0); // or a0, a0, r0 + //c->sw(t6, 32, t1); // sw t6, 32(t1) + u32 qwc = c->sgpr64(t6); + c->addiu(a0, r0, 256); // addiu a0, r0, 256 + //c->sw(a0, 0, t1); // sw a0, 0(t1) + spad_from_dma(cache.fake_scratchpad_data, madr, sadr, qwc); + c->addiu(a0, r0, 0); // addiu a0, r0, 0 + } + + block_53: + /* + c->lw(a0, 0, t1); // lw a0, 0(t1) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->andi(a0, a0, 256); // andi a0, a0, 256 + // nop // sll r0, r0, 0 + bc = c->sgpr64(a0) == 0; // beq a0, r0, L69 + // nop // sll r0, r0, 0 + if (bc) {goto block_55;} // branch non-likely + + // nop // sll r0, r0, 0 + c->lw(a0, 184, t0); // lw a0, 184(t0) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->daddiu(a0, a0, 1); // daddiu a0, a0, 1 + // nop // sll r0, r0, 0 + c->sw(a0, 184, t0); // sw a0, 184(t0) + //beq r0, r0, L68 // beq r0, r0, L68 + // nop // sll r0, r0, 0 + goto block_53; // branch always + */ + + + block_55: + c->lw(a0, 176, t0); // lw a0, 176(t0) + // nop // sll r0, r0, 0 + c->sw(t3, 172, t0); // sw t3, 172(t0) + // nop // sll r0, r0, 0 + c->sqc2(vf4, 112, t0); // sqc2 vf4, 112(t0) + // nop // sll r0, r0, 0 + c->sw(v1, 4, a0); // sw v1, 4(a0) + // nop // sll r0, r0, 0 + c->gprs[v0].du64[0] = 0; // or v0, r0, r0 + c->ld(ra, 0, sp); // ld ra, 0(sp) + c->lq(gp, 112, sp); // lq gp, 112(sp) + c->lq(s5, 96, sp); // lq s5, 96(sp) + c->lq(s4, 80, sp); // lq s4, 80(sp) + c->lq(s3, 64, sp); // lq s3, 64(sp) + c->lq(s2, 48, sp); // lq s2, 48(sp) + c->lq(s1, 32, sp); // lq s1, 32(sp) + c->lq(s0, 16, sp); // lq s0, 16(sp) + //jr ra // jr ra + c->daddiu(sp, sp, 128); // daddiu sp, sp, 128 + goto end_of_function; // return + + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + end_of_function: + return c->gprs[v0].du64[0]; +} + +// clang-format on +void link() { + cache.tfrag_work = intern_from_c("*tfrag-work*").c(); + cache.fake_scratchpad_data = intern_from_c("*fake-scratchpad-data*").c(); + cache.transform_regs = intern_from_c("*transform-regs*").c(); + gLinkedFunctionTable.reg("draw-inline-array-tfrag", execute, 512); +} + +} // namespace draw_inline_array_tfrag +} // namespace Mips2C + +// clang-format off +//--------------------------MIPS2C--------------------- +#include "game/mips2c/mips2c_private.h" +#include "game/kernel/kscheme.h" +namespace Mips2C { +namespace stats_tfrag_asm { +struct Cache { + void* tfrag_work; // *tfrag-work* + void* transform_regs; +} cache; + +u64 execute(void* ctxt) { + auto* c = (ExecutionContext*)ctxt; + bool bc = false; + load_vfs_from_tf_regs(cache.transform_regs, c); + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->load_symbol(v1, cache.tfrag_work); // lw v1, *tfrag-work*(s7) + // nop // sll r0, r0, 0 + c->lw(a1, 4, a0); // lw a1, 4(a0) + // nop // sll r0, r0, 0 + c->lqc2(vf10, 12, a0); // lqc2 vf10, 12(a0) + bc = c->sgpr64(a1) == 0; // beq a1, r0, L7 + c->lqc2(vf14, 80, v1); // lqc2 vf14, 80(v1) + if (bc) {goto block_15;} // branch non-likely + + c->vmula_bc(DEST::xyzw, BC::x, vf16, vf10); // vmulax.xyzw acc, vf16, vf10 + c->lb(a2, 49, a0); // lb a2, 49(a0) + c->vmadda_bc(DEST::xyzw, BC::y, vf17, vf10); // vmadday.xyzw acc, vf17, vf10 + c->lb(a2, 50, a0); // lb a2, 50(a0) + c->vmadda_bc(DEST::xyzw, BC::z, vf18, vf10); // vmaddaz.xyzw acc, vf18, vf10 + c->lb(a0, 51, a0); // lb a0, 51(a0) + c->vmsub_bc(DEST::xyzw, BC::w, vf9, vf19, vf0); // vmsubw.xyzw vf9, vf19, vf0 + // nop // sll r0, r0, 0 + c->vmula_bc(DEST::xyzw, BC::w, vf27, vf0); // vmulaw.xyzw acc, vf27, vf0 + // nop // sll r0, r0, 0 + c->vmadda_bc(DEST::xyzw, BC::x, vf24, vf10); // vmaddax.xyzw acc, vf24, vf10 + // nop // sll r0, r0, 0 + c->vmadda_bc(DEST::xyzw, BC::y, vf25, vf10); // vmadday.xyzw acc, vf25, vf10 + // nop // sll r0, r0, 0 + c->vadd_bc(DEST::xyzw, BC::w, vf9, vf9, vf10); // vaddw.xyzw vf9, vf9, vf10 + // nop // sll r0, r0, 0 + c->vmadd_bc(DEST::xyzw, BC::z, vf11, vf26, vf10); // vmaddz.xyzw vf11, vf26, vf10 + // nop // sll r0, r0, 0 + c->vadd_bc(DEST::xyzw, BC::w, vf12, vf11, vf10); // vaddw.xyzw vf12, vf11, vf10 + // nop // sll r0, r0, 0 + c->vsub_bc(DEST::xyzw, BC::w, vf13, vf11, vf10); // vsubw.xyzw vf13, vf11, vf10 + // nop // sll r0, r0, 0 + c->vsub_bc(DEST::xyzw, BC::z, vf11, vf0, vf12); // vsubz.xyzw vf11, vf0, vf12 + // nop // sll r0, r0, 0 + c->mov128_gpr_vf(a3, vf9); // qmfc2.i a3, vf9 + // nop // sll r0, r0, 0 + c->pcgtw(a3, r0, a3); // pcgtw a3, r0, a3 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->ppach(a3, r0, a3); // ppach a3, r0, a3 + c->mfc1(r0, f31); // mfc1 r0, f31 + bc = c->sgpr64(a3) != 0; // bne a3, r0, L7 + c->vadd_bc(DEST::xyzw, BC::z, vf12, vf14, vf12); // vaddz.xyzw vf12, vf14, vf12 + if (bc) {goto block_15;} // branch non-likely + + // nop // sll r0, r0, 0 + c->vadd_bc(DEST::xyzw, BC::z, vf13, vf14, vf13); // vaddz.xyzw vf13, vf14, vf13 + c->mov128_gpr_vf(t0, vf12); // qmfc2.i t0, vf12 + // nop // sll r0, r0, 0 + c->mov128_gpr_vf(a3, vf13); // qmfc2.i a3, vf13 + // nop // sll r0, r0, 0 + c->pcgtw(t0, r0, t0); // pcgtw t0, r0, t0 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pcgtw(a3, r0, a3); // pcgtw a3, r0, a3 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pinteh(a3, t0, a3); // pinteh a3, t0, a3 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->ppacb(a3, r0, a3); // ppacb a3, r0, a3 + c->mfc1(r0, f31); // mfc1 r0, f31 + bc = c->sgpr64(a3) == 0; // beq a3, r0, L5 + // nop // sll r0, r0, 0 + if (bc) {goto block_12;} // branch non-likely + + bc = c->sgpr64(a2) == 0; // beq a2, r0, L5 + // nop // sll r0, r0, 0 + if (bc) {goto block_12;} // branch non-likely + + bc = c->sgpr64(a0) == 0; // beq a0, r0, L3 + c->dsrl(a0, a3, 16); // dsrl a0, a3, 16 + if (bc) {goto block_9;} // branch non-likely + + bc = c->sgpr64(a0) == 0; // beq a0, r0, L3 + // nop // sll r0, r0, 0 + if (bc) {goto block_9;} // branch non-likely + + c->dsrl32(a0, a3, 24); // dsrl32 a0, a3, 24 + c->lbu(a2, 4, a1); // lbu a2, 4(a1) + bc = c->sgpr64(a0) != 0; // bne a0, r0, L2 + c->lw(a0, 148, v1); // lw a0, 148(v1) + if (bc) {goto block_8;} // branch non-likely + + // nop // sll r0, r0, 0 + c->lw(a0, 144, v1); // lw a0, 144(v1) + + block_8: + // nop // sll r0, r0, 0 + c->lbu(v1, 12, a1); // lbu v1, 12(a1) + // nop // sll r0, r0, 0 + c->lw(a3, 4, a0); // lw a3, 4(a0) + // nop // sll r0, r0, 0 + c->lw(a1, 8, a0); // lw a1, 8(a0) + c->addu(a2, a3, a2); // addu a2, a3, a2 + c->sw(a2, 4, a0); // sw a2, 4(a0) + // nop // sll r0, r0, 0 + c->lh(a2, 2, a0); // lh a2, 2(a0) + c->daddu(v1, a1, v1); // daddu v1, a1, v1 + c->sw(v1, 8, a0); // sw v1, 8(a0) + c->daddiu(v1, a2, 1); // daddiu v1, a2, 1 + c->sh(v1, 2, a0); // sh v1, 2(a0) + //beq r0, r0, L7 // beq r0, r0, L7 + // nop // sll r0, r0, 0 + goto block_15; // branch always + + + block_9: + c->dsrl32(a0, a3, 8); // dsrl32 a0, a3, 8 + c->lbu(a2, 2, a1); // lbu a2, 2(a1) + bc = c->sgpr64(a0) != 0; // bne a0, r0, L4 + c->lw(a0, 148, v1); // lw a0, 148(v1) + if (bc) {goto block_11;} // branch non-likely + + // nop // sll r0, r0, 0 + c->lw(a0, 144, v1); // lw a0, 144(v1) + + block_11: + // nop // sll r0, r0, 0 + c->lbu(v1, 10, a1); // lbu v1, 10(a1) + // nop // sll r0, r0, 0 + c->lw(a3, 4, a0); // lw a3, 4(a0) + // nop // sll r0, r0, 0 + c->lw(a1, 8, a0); // lw a1, 8(a0) + c->addu(a2, a3, a2); // addu a2, a3, a2 + c->sw(a2, 4, a0); // sw a2, 4(a0) + // nop // sll r0, r0, 0 + c->lh(a2, 2, a0); // lh a2, 2(a0) + c->daddu(v1, a1, v1); // daddu v1, a1, v1 + c->sw(v1, 8, a0); // sw v1, 8(a0) + c->daddiu(v1, a2, 1); // daddiu v1, a2, 1 + c->sh(v1, 2, a0); // sh v1, 2(a0) + //beq r0, r0, L7 // beq r0, r0, L7 + // nop // sll r0, r0, 0 + goto block_15; // branch always + + + block_12: + c->dsrl32(a0, a3, 8); // dsrl32 a0, a3, 8 + c->lbu(a2, 0, a1); // lbu a2, 0(a1) + bc = c->sgpr64(a0) != 0; // bne a0, r0, L6 + c->lw(a0, 148, v1); // lw a0, 148(v1) + if (bc) {goto block_14;} // branch non-likely + + //beq r0, r0, L6 // beq r0, r0, L6 + c->lw(a0, 144, v1); // lw a0, 144(v1) + goto block_14; // branch always + + + block_14: + // nop // sll r0, r0, 0 + c->lbu(v1, 8, a1); // lbu v1, 8(a1) + // nop // sll r0, r0, 0 + c->lw(a3, 4, a0); // lw a3, 4(a0) + // nop // sll r0, r0, 0 + c->lw(a1, 8, a0); // lw a1, 8(a0) + c->addu(a2, a3, a2); // addu a2, a3, a2 + c->sw(a2, 4, a0); // sw a2, 4(a0) + // nop // sll r0, r0, 0 + c->lh(a2, 2, a0); // lh a2, 2(a0) + c->daddu(v1, a1, v1); // daddu v1, a1, v1 + c->sw(v1, 8, a0); // sw v1, 8(a0) + c->daddiu(v1, a2, 1); // daddiu v1, a2, 1 + c->sh(v1, 2, a0); // sh v1, 2(a0) + + block_15: + c->gprs[v0].du64[0] = 0; // or v0, r0, r0 + //jr ra // jr ra + c->daddu(sp, sp, r0); // daddu sp, sp, r0 + goto end_of_function; // return + + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + end_of_function: + return c->gprs[v0].du64[0]; +} + +void link() { + cache.tfrag_work = intern_from_c("*tfrag-work*").c(); + cache.transform_regs = intern_from_c("*transform-regs*").c(); + gLinkedFunctionTable.reg("stats-tfrag-asm", execute, 512); +} + +} // namespace stats_tfrag_asm +} // namespace Mips2C diff --git a/game/mips2c/functions/time_of_day.cpp b/game/mips2c/functions/time_of_day.cpp new file mode 100644 index 0000000000..8226ee422d --- /dev/null +++ b/game/mips2c/functions/time_of_day.cpp @@ -0,0 +1,270 @@ + +//--------------------------MIPS2C--------------------- +#include "game/mips2c/mips2c_private.h" +#include "game/kernel/kscheme.h" + +// clang-format off +namespace Mips2C { +namespace time_of_day_interp_colors_scratch { + +struct Cache { + void* fake_scratchpad_data; // *fake-scratchpad-data* +} cache; + +u64 execute(void* ctxt) { + auto* c = (ExecutionContext*)ctxt; + bool bc = false; + c->daddiu(sp, sp, -16); // daddiu sp, sp, -16 + c->sd(ra, 0, sp); // sd ra, 0(sp) + c->sd(fp, 8, sp); // sd fp, 8(sp) + c->mov64(fp, t9); // or fp, t9, r0 + // nop // sll r0, r0, 0 + c->lui(v1, 28672); // lui v1, 28672 0x7000 + c->daddiu(t4, a1, 12); // daddiu t4, a1, 12 + //c->ori(v1, v1, 2064); // ori v1, v1, 2064 SPAD mods + get_fake_spad_addr(v1, cache.fake_scratchpad_data, 2064, c); + // Unknown instr: ld a3, L168(fp) + // L168: + // .word 0xff00ff + // .word 0x8000ff + c->gprs[a3].du32[0] = 0xff00ff; + c->gprs[a3].du32[1] = 0x8000ff; + + c->lui(t0, 4096); // lui t0, 4096 + c->lw(t1, 4, a1); // lw t1, 4(a1) + c->ori(a1, t0, 54272); // ori a1, t0, 54272 = (0x1000D400) SPR TO + c->lq(t0, 1852, a2); // lq t0, 1852(a2) + c->addiu(t2, t1, 31); // addiu t2, t1, 31 + c->lq(t1, 1868, a2); // lq t1, 1868(a2) + c->sra(t3, t2, 5); // sra t3, t2, 5 + c->lq(t2, 1884, a2); // lq t2, 1884(a2) + c->sll(t3, t3, 5); // sll t3, t3, 5 + c->lq(a2, 1900, a2); // lq a2, 1900(a2) + + // wait for DMA to finish, can just remove this + /* + block_1: + c->lw(t5, 0, a1); // lw t5, 0(a1) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->andi(t5, t5, 256); // andi t5, t5, 256 + // nop // sll r0, r0, 0 + bc = c->sgpr64(t5) != 0; // bne t5, r0, L62 + // nop // sll r0, r0, 0 + if (bc) {goto block_1;} // branch non-likely + */ + + { + // c->sw(t4, 16, a1); // sw t4, 16(a1) + u32 madr = c->sgpr64(t4); + c->daddiu(t3, t3, -32); // daddiu t3, t3, -32 + // c->sw(v1, 128, a1); // sw v1, 128(a1) + u32 sadr = c->sgpr64(v1); + c->addiu(t5, r0, 64); // addiu t5, r0, 64 + //c->sw(t5, 32, a1); // sw t5, 32(a1) + u32 qwc = c->sgpr64(t5); + c->addiu(t5, r0, 256); // addiu t5, r0, 256 + // c->sw(t5, 0, a1); // sw t5, 0(a1) + spad_to_dma(cache.fake_scratchpad_data, madr, sadr, qwc); + c->daddiu(t4, t4, 1024); // daddiu t4, t4, 1024 + } + + block_3: + c->mov64(t6, v1); // or t6, v1, r0 + c->xori(v1, v1, 1024); // xori v1, v1, 1024 + bc = ((s64)c->sgpr64(t3)) <= 0; // blez t3, L66 + c->daddiu(t3, t3, -32); // daddiu t3, t3, -32 + if (bc) {goto block_7;} // branch non-likely + + + /* + block_4: + c->lw(t5, 0, a1); // lw t5, 0(a1) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->andi(t5, t5, 256); // andi t5, t5, 256 + // nop // sll r0, r0, 0 + bc = c->sgpr64(t5) == 0; // beq t5, r0, L65 + // nop // sll r0, r0, 0 + if (bc) {goto block_6;} // branch non-likely + + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + //beq r0, r0, L64 // beq r0, r0, L64 + // nop // sll r0, r0, 0 + goto block_4; // branch always + */ + + { + // block_6: + // c->sw(t4, 16, a1); // sw t4, 16(a1) + u32 madr = c->sgpr64(t4); + // nop // sll r0, r0, 0 + // c->sw(v1, 128, a1); // sw v1, 128(a1) + u32 sadr = c->sgpr64(v1); + c->addiu(t5, r0, 64); // addiu t5, r0, 64 + // c->sw(t5, 32, a1); // sw t5, 32(a1) + u32 qwc = c->sgpr64(t5); + c->addiu(t5, r0, 256); // addiu t5, r0, 256 + // c->sw(t5, 0, a1); // sw t5, 0(a1) + spad_to_dma(cache.fake_scratchpad_data, madr, sadr, qwc); + c->daddiu(t4, t4, 1024); // daddiu t4, t4, 1024 + //beq r0, r0, L67 // beq r0, r0, L67 + // nop // sll r0, r0, 0 + } + goto block_9; // branch always + + + + block_7: + /* + c->lw(t5, 0, a1); // lw t5, 0(a1) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->andi(t5, t5, 256); // andi t5, t5, 256 + // nop // sll r0, r0, 0 + bc = c->sgpr64(t5) == 0; // beq t5, r0, L67 + // nop // sll r0, r0, 0 + if (bc) {goto block_9;} // branch non-likely + + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + //beq r0, r0, L66 // beq r0, r0, L66 + // nop // sll r0, r0, 0 + goto block_7; // branch always + */ + + /* + fmt::print("{} -> {} [{}]\n", c->gprs[a0].du32[0], c->gprs[t5].du32[0], c->gprs[t3].ds64[0]); + fmt::print("[2] t0: {:02x} {:02x} {:02x} {:02x} {:02x} {:02x} {:02x} {:02x}\n", c->gprs[t0].du16[0], c->gprs[t0].du16[1], c->gprs[t0].du16[2], c->gprs[t0].du16[3], c->gprs[t0].du16[4], c->gprs[t0].du16[5], c->gprs[t0].du16[6], c->gprs[t0].du16[7]); + fmt::print("[2] t1: {:02x} {:02x} {:02x} {:02x} {:02x} {:02x} {:02x} {:02x}\n", c->gprs[t1].du16[0], c->gprs[t1].du16[1], c->gprs[t1].du16[2], c->gprs[t1].du16[3], c->gprs[t1].du16[4], c->gprs[t1].du16[5], c->gprs[t1].du16[6], c->gprs[t1].du16[7]); + fmt::print("[2] t2: {:02x} {:02x} {:02x} {:02x} {:02x} {:02x} {:02x} {:02x}\n", c->gprs[t2].du16[0], c->gprs[t2].du16[1], c->gprs[t2].du16[2], c->gprs[t2].du16[3], c->gprs[t2].du16[4], c->gprs[t2].du16[5], c->gprs[t2].du16[6], c->gprs[t2].du16[7]); + fmt::print("[2] a2: {:02x} {:02x} {:02x} {:02x} {:02x} {:02x} {:02x} {:02x}\n", c->gprs[a2].du16[0], c->gprs[a2].du16[1], c->gprs[a2].du16[2], c->gprs[a2].du16[3], c->gprs[a2].du16[4], c->gprs[a2].du16[5], c->gprs[a2].du16[6], c->gprs[a2].du16[7]); + */ + block_9: + c->lq(t9, 12, t6); // lq t9, 12(t6) + c->daddiu(t5, a0, 128); // daddiu t5, a0, 128 + c->lq(t7, 28, t6); // lq t7, 28(t6) + // nop // sll r0, r0, 0 + c->pextlb(t8, r0, t9); // pextlb t8, r0, t9 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pextub(t9, r0, t9); // pextub t9, r0, t9 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pmulth(r0, t8, t0); // pmulth r0, t8, t0 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pextlb(t8, r0, t7); // pextlb t8, r0, t7 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pmaddh(r0, t9, t1); // pmaddh r0, t9, t1 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pextub(t7, r0, t7); // pextub t7, r0, t7 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pmaddh(r0, t8, t2); // pmaddh r0, t8, t2 + c->lq(t8, 44, t6); // lq t8, 44(t6) + c->addiu(t6, t6, 32); // addiu t6, t6, 32 + // nop // sll r0, r0, 0 + c->pmaddh(r0, t7, a2); // pmaddh r0, t7, a2 + c->lq(t7, 28, t6); // lq t7, 28(t6) + c->pextlb(t9, r0, t8); // pextlb t9, r0, t8 + c->mfc1(r0, f31); // mfc1 r0, f31 + + block_10: + c->pextub(t8, r0, t8); // pextub t8, r0, t8 + // fmt::print("[0] t1: {:02x} {:02x} {:02x} {:02x}\n", c->gprs[t1].du32[0], c->gprs[t1].du32[1], c->gprs[t1].du32[2], c->gprs[t1].du32[3]); + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pmfhl_lh(ra); // pmfhl.lh ra + // fmt::print("[1] ra: {:02x} {:02x} {:02x} {:02x}\n", c->gprs[ra].du32[0], c->gprs[ra].du32[1], c->gprs[ra].du32[2], c->gprs[ra].du32[3]); + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pmulth(r0, t9, t0); // pmulth r0, t9, t0 + + c->mfc1(r0, f31); // mfc1 r0, f31 + c->psrlh(t9, ra, 6); // psrlh t9, ra, 6 + // fmt::print("[3] t9: {:02x} {:02x} {:02x} {:02x}\n", c->gprs[t9].du32[0], c->gprs[t9].du32[1], c->gprs[t9].du32[2], c->gprs[t9].du32[3]); + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pcpyud(ra, t9, t9); // pcpyud ra, t9, t9 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->paddh(t9, ra, t9); // paddh t9, ra, t9 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pminh(t9, t9, a3); // pminh t9, t9, a3 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->ppacb(ra, r0, t9); // ppacb ra, r0, t9 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pextlb(t9, r0, t7); // pextlb t9, r0, t7 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pmaddh(r0, t8, t1); // pmaddh r0, t8, t1 + c->sw(ra, 0, a0); // sw ra, 0(a0) + c->pextub(t7, r0, t7); // pextub t7, r0, t7 + c->mfc1(r0, f31); // mfc1 r0, f31 + c->pmaddh(r0, t9, t2); // pmaddh r0, t9, t2 + c->lq(t8, 44, t6); // lq t8, 44(t6) + c->addiu(t6, t6, 32); // addiu t6, t6, 32 + c->addiu(a0, a0, 4); // addiu a0, a0, 4 + c->pmaddh(r0, t7, a2); // pmaddh r0, t7, a2 + c->lq(t7, 28, t6); // lq t7, 28(t6) + /* + fmt::print(" N0"); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + fmt::print(" {:02x}", c->gprs[t8].du8[i*4 + j]); + } + fmt::print(" |"); + } + fmt::print("\n N1"); + for (int i = 0; i < 4; i++) { + for (int j = 0; j < 4; j++) { + fmt::print(" {:02x}", c->gprs[t7].du8[i*4 + j]); + } + fmt::print(" |"); + } + fmt::print("\n"); + */ +// fmt::print("next: {:02x} {:02x} {:02x} {:02x} {:02x} {:02x} {:02x} {:02x}\n", c->gprs[t8].du32[0], c->gprs[t8].du32[1], c->gprs[t8].du32[2], c->gprs[t8].du32[3], +// c->gprs[t7].du32[0], c->gprs[t7].du32[1], c->gprs[t7].du32[2], c->gprs[t7].du32[3]); + bc = c->sgpr64(a0) != c->sgpr64(t5); // bne a0, t5, L68 + c->pextlb(t9, r0, t8); // pextlb t9, r0, t8 + if (bc) {goto block_10;} // branch non-likely + + bc = ((s64)c->sgpr64(t3)) >= 0; // bgez t3, L63 + // nop // sll r0, r0, 0 + if (bc) {goto block_3;} // branch non-likely + + c->gprs[v0].du64[0] = 0; // or v0, r0, r0 + c->ld(ra, 0, sp); // ld ra, 0(sp) + c->ld(fp, 8, sp); // ld fp, 8(sp) + //jr ra // jr ra + c->daddiu(sp, sp, 16); // daddiu sp, sp, 16 + goto end_of_function; // return + + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + end_of_function: + return c->gprs[v0].du64[0]; +} + +void link() { + cache.fake_scratchpad_data = intern_from_c("*fake-scratchpad-data*").c(); + gLinkedFunctionTable.reg("time-of-day-interp-colors-scratch", execute, 512); +} + +} // namespace time_of_day_interp_colors_scratch +} // namespace Mips2C + diff --git a/game/mips2c/mips2c_private.h b/game/mips2c/mips2c_private.h index 235e531216..a4e3e321f6 100644 --- a/game/mips2c/mips2c_private.h +++ b/game/mips2c/mips2c_private.h @@ -163,6 +163,8 @@ struct ExecutionContext { float Q; + u128 hi, lo; + void copy_vfs_from_other(const ExecutionContext* other) { for (int i = 0; i < 32; i++) { vfs[i] = other->vfs[i]; @@ -209,6 +211,12 @@ struct ExecutionContext { gprs[dst].du64[0] = val; } + void lb(int dst, int offset, int src) { + s8 val; + memcpy(&val, g_ee_main_mem + gpr_src(src).du32[0] + offset, 1); + gprs[dst].du64[0] = val; + } + void lqc2(int vf, int offset, int gpr) { memcpy(&vfs[vf], g_ee_main_mem + gpr_src(gpr).du32[0] + offset, 16); } @@ -242,7 +250,7 @@ struct ExecutionContext { } void lq(int dst, int offset, int src) { - memcpy(&gprs[dst].du64[0], g_ee_main_mem + gpr_addr(src) + offset, 16); + memcpy(&gprs[dst].du64[0], g_ee_main_mem + ((gpr_addr(src) + offset) & (~15)), 16); } void ld(int dst, int offset, int src) { @@ -267,6 +275,11 @@ struct ExecutionContext { #endif } + void sb(int src, int offset, int addr) { + auto s = gpr_src(src); + memcpy(g_ee_main_mem + gpr_addr(addr) + offset, &s.du32[0], 1); + } + void sh(int src, int offset, int addr) { auto s = gpr_src(src); memcpy(g_ee_main_mem + gpr_addr(addr) + offset, &s.du32[0], 2); @@ -279,6 +292,7 @@ struct ExecutionContext { void sq(int src, int offset, int addr) { auto s = gpr_src(src); + assert((offset & 15) == 0); memcpy(g_ee_main_mem + gpr_addr(addr) + offset, &s.du32[0], 16); } @@ -349,6 +363,201 @@ struct ExecutionContext { gprs[dst].du32[3] = s.du32[3]; } + void pcgtw(int rd, int rs, int rt) { + auto s = gpr_src(rs); + auto t = gpr_src(rt); + for (int i = 0; i < 4; i++) { + if (s.ds32[i] > t.ds32[i]) { + gprs[rd].du32[i] = 0xffffffff; + } else { + gprs[rd].du32[i] = 0; + } + } + } + + void ppach(int rd, int rs, int rt) { + auto s = gpr_src(rs); + auto t = gpr_src(rt); + gprs[rd].du16[0] = t.du16[0]; + gprs[rd].du16[1] = t.du16[2]; + gprs[rd].du16[2] = t.du16[4]; + gprs[rd].du16[3] = t.du16[6]; + gprs[rd].du16[4] = s.du16[0]; + gprs[rd].du16[5] = s.du16[2]; + gprs[rd].du16[6] = s.du16[4]; + gprs[rd].du16[7] = s.du16[6]; + } + + void pinteh(int rd, int rs, int rt) { + auto s = gpr_src(rs); + auto t = gpr_src(rt); + gprs[rd].du16[0] = t.du16[0]; + gprs[rd].du16[1] = s.du16[0]; + gprs[rd].du16[2] = t.du16[2]; + gprs[rd].du16[3] = s.du16[2]; + gprs[rd].du16[4] = t.du16[4]; + gprs[rd].du16[5] = s.du16[4]; + gprs[rd].du16[6] = t.du16[6]; + gprs[rd].du16[7] = s.du16[6]; + } + + void ppacb(int rd, int rs, int rt) { + auto s = gpr_src(rs); + auto t = gpr_src(rt); + gprs[rd].du8[0] = t.du8[0]; + gprs[rd].du8[1] = t.du8[2]; + gprs[rd].du8[2] = t.du8[4]; + gprs[rd].du8[3] = t.du8[6]; + + gprs[rd].du8[4] = t.du8[8]; + gprs[rd].du8[5] = t.du8[10]; + gprs[rd].du8[6] = t.du8[12]; + gprs[rd].du8[7] = t.du8[14]; + + gprs[rd].du8[8] = s.du8[0]; + gprs[rd].du8[9] = s.du8[2]; + gprs[rd].du8[10] = s.du8[4]; + gprs[rd].du8[11] = s.du8[6]; + + gprs[rd].du8[12] = s.du8[8]; + gprs[rd].du8[13] = s.du8[10]; + gprs[rd].du8[14] = s.du8[12]; + gprs[rd].du8[15] = s.du8[14]; + } + + void paddw(int rd, int rs, int rt) { + auto s = gpr_src(rs); + auto t = gpr_src(rt); + for (int i = 0; i < 4; i++) { + gprs[rd].du32[i] = s.du32[i] + t.du32[i]; + } + } + + void pextub(int rd, int rs, int rt) { + auto s = gpr_src(rs); + auto t = gpr_src(rt); + gprs[rd].du8[0] = t.du8[8]; + gprs[rd].du8[1] = s.du8[8]; + gprs[rd].du8[2] = t.du8[9]; + gprs[rd].du8[3] = s.du8[9]; + gprs[rd].du8[4] = t.du8[10]; + gprs[rd].du8[5] = s.du8[10]; + gprs[rd].du8[6] = t.du8[11]; + gprs[rd].du8[7] = s.du8[11]; + gprs[rd].du8[8] = t.du8[12]; + gprs[rd].du8[9] = s.du8[12]; + gprs[rd].du8[10] = t.du8[13]; + gprs[rd].du8[11] = s.du8[13]; + gprs[rd].du8[12] = t.du8[14]; + gprs[rd].du8[13] = s.du8[14]; + gprs[rd].du8[14] = t.du8[15]; + gprs[rd].du8[15] = s.du8[15]; + } + + void pmulth(int rd, int rs, int rt) { + assert(rd == 0); + s32 temp; + + auto s = gpr_src(rs); + auto t = gpr_src(rt); + + temp = (s32)s.ds16[0] * (s32)t.ds16[0]; + lo.du32[0] = temp; + /*if (_Rd_) cpuRegs.GPR.r[_Rd_].UL[0] = temp;*/ + + temp = (s32)s.ds16[1] * (s32)t.ds16[1]; + lo.du32[1] = temp; + + temp = (s32)s.ds16[2] * (s32)t.ds16[2]; + hi.du32[0] = temp; + /*if (_Rd_) cpuRegs.GPR.r[_Rd_].UL[1] = temp;*/ + + temp = (s32)s.ds16[3] * (s32)t.ds16[3]; + hi.du32[1] = temp; + + temp = (s32)s.ds16[4] * (s32)t.ds16[4]; + lo.du32[2] = temp; + /*if (_Rd_) cpuRegs.GPR.r[_Rd_].UL[2] = temp;*/ + + temp = (s32)s.ds16[5] * (s32)t.ds16[5]; + lo.du32[3] = temp; + + temp = (s32)s.ds16[6] * (s32)t.ds16[6]; + hi.du32[2] = temp; + /*if (_Rd_) cpuRegs.GPR.r[_Rd_].UL[3] = temp;*/ + + temp = (s32)s.ds16[7] * (s32)t.ds16[7]; + hi.du32[3] = temp; + } + + void pmaddh(int rd, int rs, int rt) { + assert(rd == 0); + s32 temp; + + auto s = gpr_src(rs); + auto t = gpr_src(rt); + temp = lo.du32[0] + (s32)s.ds16[0] * (s32)t.ds16[0]; + lo.du32[0] = temp; + + temp = lo.du32[1] + (s32)s.ds16[1] * (s32)t.ds16[1]; + lo.du32[1] = temp; + + temp = hi.du32[0] + (s32)s.ds16[2] * (s32)t.ds16[2]; + hi.du32[0] = temp; + + temp = hi.du32[1] + (s32)s.ds16[3] * (s32)t.ds16[3]; + hi.du32[1] = temp; + + temp = lo.du32[2] + (s32)s.ds16[4] * (s32)t.ds16[4]; + lo.du32[2] = temp; + + temp = lo.du32[3] + (s32)s.ds16[5] * (s32)t.ds16[5]; + lo.du32[3] = temp; + + temp = hi.du32[2] + (s32)s.ds16[6] * (s32)t.ds16[6]; + hi.du32[2] = temp; + + temp = hi.du32[3] + (s32)s.ds16[7] * (s32)t.ds16[7]; + hi.du32[3] = temp; + } + + void psrlh(int dest, int src, int sa) { + auto s = gpr_src(src); + for (int i = 0; i < 8; i++) { + gprs[dest].du16[i] = s.du16[i] >> (sa & 0xf); + } + } + + void paddh(int dest, int rs, int rt) { + auto s = gpr_src(rs); + auto t = gpr_src(rt); + for (int i = 0; i < 8; i++) { + gprs[dest].du16[i] = s.du16[i] + t.du16[i]; + } + } + + void pminh(int dest, int rs, int rt) { + auto s = gpr_src(rs); + auto t = gpr_src(rt); + for (int i = 0; i < 8; i++) { + if (s.ds16[i] < t.ds16[i]) + gprs[dest].du16[i] = s.ds16[i]; + else + gprs[dest].du16[i] = t.ds16[i]; + } + } + + void pmfhl_lh(int dest) { + gprs[dest].du16[0] = lo.du16[0]; + gprs[dest].du16[1] = lo.du16[2]; + gprs[dest].du16[2] = hi.du16[0]; + gprs[dest].du16[3] = hi.du16[2]; + gprs[dest].du16[4] = lo.du16[4]; + gprs[dest].du16[5] = lo.du16[6]; + gprs[dest].du16[6] = hi.du16[4]; + gprs[dest].du16[7] = hi.du16[6]; + } + void vsub_bc(DEST mask, BC bc, int dest, int src0, int src1) { auto s0 = vf_src(src0); auto s1 = vf_src(src1); @@ -393,6 +602,17 @@ struct ExecutionContext { } } + void vmini(DEST mask, int dest, int src0, int src1) { + auto s0 = vf_src(src0); + auto s1 = vf_src(src1); + + for (int i = 0; i < 4; i++) { + if ((u64)mask & (1 << i)) { + vfs[dest].f[i] = std::min(s0.f[i], s1.f[i]); + } + } + } + void vsub(DEST mask, int dest, int src0, int src1) { auto s0 = vf_src(src0); auto s1 = vf_src(src1); @@ -426,6 +646,17 @@ struct ExecutionContext { } } + void vmsuba_bc(DEST mask, BC bc, int src0, int src1) { + auto s0 = vf_src(src0); + auto s1 = vf_src(src1); + + for (int i = 0; i < 4; i++) { + if ((u64)mask & (1 << i)) { + acc.f[i] -= s0.f[i] * s1.f[(int)bc]; + } + } + } + void vmadd_bc(DEST mask, BC bc, int dst, int src0, int src1) { auto s0 = vf_src(src0); auto s1 = vf_src(src1); @@ -437,6 +668,17 @@ struct ExecutionContext { } } + void vmsub_bc(DEST mask, BC bc, int dst, int src0, int src1) { + auto s0 = vf_src(src0); + auto s1 = vf_src(src1); + + for (int i = 0; i < 4; i++) { + if ((u64)mask & (1 << i)) { + vfs[dst].f[i] = acc.f[i] - s0.f[i] * s1.f[(int)bc]; + } + } + } + void vdiv(int src0, BC bc0, int src1, BC bc1) { Q = vf_src(src0).f[(int)bc0] / vf_src(src1).f[(int)bc1]; } @@ -501,11 +743,19 @@ struct ExecutionContext { gprs[dst].ds64[0] = value_signed; } + void srl(int dst, int src, int sa) { + u32 value = gpr_src(src).du32[0] >> sa; + s32 value_signed = value; + gprs[dst].ds64[0] = value_signed; + } + void dsra(int dst, int src, int sa) { gprs[dst].ds64[0] = gpr_src(src).ds64[0] >> sa; } + void dsrl(int dst, int src, int sa) { gprs[dst].du64[0] = gpr_src(src).du64[0] >> sa; } void dsrav(int dst, int src, int sa) { gprs[dst].ds64[0] = gpr_src(src).ds64[0] >> gpr_src(sa).du32[0]; } void dsra32(int dst, int src, int sa) { gprs[dst].ds64[0] = gpr_src(src).ds64[0] >> (32 + sa); } + void dsrl32(int dst, int src, int sa) { gprs[dst].du64[0] = gpr_src(src).du64[0] >> (32 + sa); } void sra(int dst, int src, int sa) { gprs[dst].ds64[0] = gpr_src(src).ds32[0] >> sa; } void dsll(int dst, int src0, int sa) { gprs[dst].du64[0] = gpr_src(src0).du64[0] << sa; } void dsll32(int dst, int src0, int sa) { gprs[dst].du64[0] = gpr_src(src0).du64[0] << (32 + sa); } @@ -552,6 +802,7 @@ struct ExecutionContext { gprs[dst].ds64[0] = sresult; } + void xori(int dest, int src, u64 imm) { gprs[dest].du64[0] = gpr_src(src).du64[0] ^ imm; } void andi(int dest, int src, u64 imm) { gprs[dest].du64[0] = gpr_src(src).du64[0] & imm; } void ori(int dest, int src, u64 imm) { gprs[dest].du64[0] = gpr_src(src).du64[0] | imm; } void and_(int dest, int src0, int src1) { @@ -643,6 +894,15 @@ struct ExecutionContext { } } + void vftoi12(DEST mask, int dst, int src) { + auto s = vf_src(src); + for (int i = 0; i < 4; i++) { + if ((u64)mask & (1 << i)) { + vfs[dst].ds32[i] = s.f[i] * 4096.f; + } + } + } + void vftoi4(DEST mask, int dst, int src) { auto s = vf_src(src); for (int i = 0; i < 4; i++) { @@ -725,4 +985,47 @@ struct ExecutionContext { } }; +inline void get_fake_spad_addr(int dst, void* sym_addr, u32 offset, ExecutionContext* c) { + u32 val; + memcpy(&val, sym_addr, 4); + c->gprs[dst].du64[0] = val + offset; +} + +inline void spad_to_dma(void* spad_sym_addr, u32 madr, u32 sadr, u32 qwc) { + u32 spad_addr_goal; + memcpy(&spad_addr_goal, spad_sym_addr, 4); + sadr -= spad_addr_goal; + + assert((madr & 0xf) == 0); + assert((sadr & 0xf) == 0); + assert(sadr < 0x4000); + assert((sadr + 16 * qwc) <= 0x4000); + assert(qwc <= 0x4000); + + void* spad_addr_c = g_ee_main_mem + spad_addr_goal + sadr; + + memcpy(spad_addr_c, g_ee_main_mem + madr, qwc * 16); +} + +inline void spad_from_dma(void* spad_sym_addr, u32 madr, u32 sadr, u32 qwc) { + u32 spad_addr_goal; + memcpy(&spad_addr_goal, spad_sym_addr, 4); + sadr -= spad_addr_goal; + assert((madr & 0xf) == 0); + assert((sadr & 0xf) == 0); + assert(sadr < 0x4000); + assert((sadr + 16 * qwc) <= 0x4000); + assert(qwc <= 0x4000); + + void* spad_addr_c = g_ee_main_mem + spad_addr_goal + sadr; + + memcpy(g_ee_main_mem + madr, spad_addr_c, qwc * 16); +} + +inline void load_vfs_from_tf_regs(const void* tf_regs_sym, ExecutionContext* c) { + u32 goal_addr_of_vf1; + memcpy(&goal_addr_of_vf1, tf_regs_sym, 4); + u8* c_addr_of_vf1 = g_ee_main_mem + goal_addr_of_vf1; + memcpy(&c->vfs[1], c_addr_of_vf1, 31 * 16); +} } // namespace Mips2C \ No newline at end of file diff --git a/game/mips2c/mips2c_table.cpp b/game/mips2c/mips2c_table.cpp index 254a9acf27..271b6eb88a 100644 --- a/game/mips2c/mips2c_table.cpp +++ b/game/mips2c/mips2c_table.cpp @@ -88,6 +88,18 @@ namespace draw_boundary_polygon { extern void link(); } +namespace draw_inline_array_tfrag { +extern void link(); +} + +namespace stats_tfrag_asm { +extern void link(); +} + +namespace time_of_day_interp_colors_scratch { +extern void link(); +} + LinkedFunctionTable gLinkedFunctionTable; Rng gRng; std::unordered_map> gMips2CLinkCallbacks = { @@ -102,7 +114,9 @@ std::unordered_map> gMips2CLinkCallbacks = set_sky_vf23_value::link}}, {"load-boundary", {init_boundary_regs::link, render_boundary_quad::link, render_boundary_tri::link, - draw_boundary_polygon::link}}}; + draw_boundary_polygon::link}}, + {"tfrag", {draw_inline_array_tfrag::link, stats_tfrag_asm::link}}, + {"time-of-day", {time_of_day_interp_colors_scratch::link}}}; void LinkedFunctionTable::reg(const std::string& name, u64 (*exec)(void*), u32 stack_size) { const auto& it = m_executes.insert({name, {exec, Ptr()}}); diff --git a/game/system/newpad.cpp b/game/system/newpad.cpp index 5b0b559e96..a2e9b6b459 100644 --- a/game/system/newpad.cpp +++ b/game/system/newpad.cpp @@ -237,6 +237,9 @@ void update_gamepads() { for (const auto& [button, idx] : gamepad_map) { g_gamepad_buttons[(int)button] = state.buttons[idx]; } + + g_gamepad_buttons[(int)Button::L2] = state.axes[GLFW_GAMEPAD_AXIS_LEFT_TRIGGER] > 0; + g_gamepad_buttons[(int)Button::R2] = state.axes[GLFW_GAMEPAD_AXIS_RIGHT_TRIGGER] > 0; } }; // namespace Pad diff --git a/goal_src/dgos/mai.gd b/goal_src/dgos/mai.gd new file mode 100644 index 0000000000..7bae2691ae --- /dev/null +++ b/goal_src/dgos/mai.gd @@ -0,0 +1,36 @@ + +("MAI.DGO" + ("cavecrystal-light.o" "cavecrystal-light") + ("maincave-obs.o" "maincave-obs") + ("maincave-part.o" "maincave-part") + ("spiderwebs.o" "spiderwebs") + ("dark-crystal.o" "dark-crystal") + ("baby-spider.o" "baby-spider") + ("mother-spider-h.o" "mother-spider-h") + ("mother-spider-egg.o" "mother-spider-egg") + ("mother-spider-proj.o" "mother-spider-proj") + ("mother-spider.o" "mother-spider") + ("gnawer.o" "gnawer") + ("driller-lurker.o" "driller-lurker") + ("launcherdoor.o" "launcherdoor") + ("tpage-1313.go" "tpage-1313") + ("tpage-1315.go" "tpage-1315") + ("tpage-1314.go" "tpage-1314") + ("tpage-1312.go" "tpage-1312") + ("tpage-767.go" "tpage-767") + ("baby-spider-ag-MAI.go" "baby-spider") + ("cavetrapdoor-ag-MAI.go" "cavetrapdoor") + ("dark-crystal-ag.go" "dark-crystal") + ("driller-lurker-ag.go" "driller-lurker") + ("ecovalve-ag-MAI.go" "ecovalve") + ("gnawer-ag.go" "gnawer") + ("launcherdoor-maincave-ag.go" "launcherdoor-maincave") + ("maincavecam-ag.go" "maincavecam") + ("mother-spider-ag.go" "mother-spider") + ("plat-ag-MAI.go" "plat") + ("spider-egg-ag-DAR-MAI.go" "spider-egg") + ("spiderwebs-ag.go" "spiderwebs") + ("water-anim-maincave-ag.go" "water-anim-maincave") + ("water-anim-maincave-water-ag.go" "water-anim-maincave-water") + ("maincave-vis.go" "maincave-vis") + ) \ No newline at end of file diff --git a/goal_src/engine/ambient/mood.gc b/goal_src/engine/ambient/mood.gc index 5b5bd0c2af..6af48edfcb 100644 --- a/goal_src/engine/ambient/mood.gc +++ b/goal_src/engine/ambient/mood.gc @@ -146,6 +146,8 @@ ) ) + + ;; definition for function update-mood-prt-color ;; Used lq/sq (defun update-mood-prt-color ((arg0 mood-context)) diff --git a/goal_src/engine/debug/default-menu.gc b/goal_src/engine/debug/default-menu.gc index 25a14e9aad..74da1c07fe 100644 --- a/goal_src/engine/debug/default-menu.gc +++ b/goal_src/engine/debug/default-menu.gc @@ -449,7 +449,7 @@ (defun dm-vu1-user-set-pick-func ((arg0 int) (arg1 int)) (if (= arg1 4) - (set! *vu1-enable-user-menu* arg0) + (set! *vu1-enable-user-menu* (the-as vu1-renderer-mask arg0)) ) (= *vu1-enable-user-menu* arg0) ) diff --git a/goal_src/engine/debug/stats-h.gc b/goal_src/engine/debug/stats-h.gc index f63445cf3b..6917bfdd3c 100644 --- a/goal_src/engine/debug/stats-h.gc +++ b/goal_src/engine/debug/stats-h.gc @@ -71,6 +71,7 @@ (defmethod reset! perf-stat ((obj perf-stat)) "Perfomance counters are not implemented, so this does nothing." + (+! (-> obj count) 1) #| (let ((v1-0 (-> obj ctrl))) (+! (-> obj count) 1) diff --git a/goal_src/engine/dma/dma-h.gc b/goal_src/engine/dma/dma-h.gc index 78e755caaa..f4957e5de8 100644 --- a/goal_src/engine/dma/dma-h.gc +++ b/goal_src/engine/dma/dma-h.gc @@ -218,12 +218,16 @@ (sky-draw 3) (bucket-4 4) ;; ocean (tfrag-tex0 5) + (tfrag-0 6) + (tfrag-near-0 7) ;; merc0 10 ;; generic0 11 (bucket-10 10) (bucket-11 11) (tfrag-tex1 12) + (tfrag-1 13) + (tfrag-near-1 14) ;; merc1 17 ;; generic1 18 (bucket-17 17) @@ -235,10 +239,21 @@ (generic-foreground 30) ;; ? (alpha-tex0 31) - (sky-tex0 32) + (tfrag-trans-0 32) ;; also sky blend + (tfrag-trans-near-0 33) + (tfrag-dirt-0 34) + (tfrag-dirt-near-0 35) + (tfrag-ice-0 36) + (tfrag-ice-near-0 37) (alpha-tex1 38) - (sky-tex1 39) + (tfrag-trans-1 39) ;; also sky blend + (tfrag-trans-near-1 40) + (tfrag-dirt-1 41) + (tfrag-dirt-near-1 42) + (tfrag-ice-1 43) + (tfrag-ice-near-1 44) + (bucket-45 45) (bucket-46 46) diff --git a/goal_src/engine/draw/draw-node.gc b/goal_src/engine/draw/draw-node.gc index ed3420a2f1..883679708b 100644 --- a/goal_src/engine/draw/draw-node.gc +++ b/goal_src/engine/draw/draw-node.gc @@ -129,4 +129,5 @@ ;; DRAW NODE CULL ;;;;;;;;;;;;;;;;;;;;;;;; -;; TODO: waiting on tfrag/tie stuff to worry about this. \ No newline at end of file +;; TODO: waiting on tfrag/tie stuff to worry about this. +(define-extern draw-node-cull (function pointer pointer (inline-array draw-node) int none)) \ No newline at end of file diff --git a/goal_src/engine/draw/drawable-group.gc b/goal_src/engine/draw/drawable-group.gc index 2dc7dc808c..1167712c55 100644 --- a/goal_src/engine/draw/drawable-group.gc +++ b/goal_src/engine/draw/drawable-group.gc @@ -78,11 +78,11 @@ (none) ) -(defmethod dummy-14 drawable-group ((obj drawable-group)) +(defmethod collect-stats drawable-group ((obj drawable-group)) (when (vis-cull (-> obj id)) (when (sphere-cull (-> obj bsphere)) (dotimes (s5-0 (-> obj length)) - (dummy-14 (-> obj data s5-0)) + (collect-stats (-> obj data s5-0)) ) ) ) @@ -91,7 +91,7 @@ ) -(defmethod debug-draw drawable-group ((obj drawable-group) (arg0 drawable) (arg1 object)) +(defmethod debug-draw drawable-group ((obj drawable-group) (arg0 drawable) (arg1 display-frame)) (when (vis-cull (-> obj id)) (when (sphere-cull (-> obj bsphere)) (dotimes (s3-0 (-> obj length)) diff --git a/goal_src/engine/draw/drawable-h.gc b/goal_src/engine/draw/drawable-h.gc index e52529ad55..744ce70c38 100644 --- a/goal_src/engine/draw/drawable-h.gc +++ b/goal_src/engine/draw/drawable-h.gc @@ -28,8 +28,8 @@ (collide-with-box (_type_ int collide-list) none 11) (collide-y-probe (_type_ int collide-list) none 12) (collide-ray (_type_ int collide-list) none 13) - (dummy-14 (_type_) none 14) - (debug-draw (_type_ drawable object) none 15) + (collect-stats (_type_) none 14) + (debug-draw (_type_ drawable display-frame) none 15) (dummy-16 (_type_ object object) object 16) (collect-ambients (_type_ sphere int ambient-list) none 17) ) diff --git a/goal_src/engine/draw/drawable-inline-array.gc b/goal_src/engine/draw/drawable-inline-array.gc index 3ad1ab6f05..9c7870f1b1 100644 --- a/goal_src/engine/draw/drawable-inline-array.gc +++ b/goal_src/engine/draw/drawable-inline-array.gc @@ -19,12 +19,12 @@ (none) ) -(defmethod dummy-14 drawable-inline-array ((obj drawable-inline-array)) +(defmethod collect-stats drawable-inline-array ((obj drawable-inline-array)) 0 (none) ) -(defmethod debug-draw drawable-inline-array ((obj drawable-inline-array) (arg0 drawable) (arg1 object)) +(defmethod debug-draw drawable-inline-array ((obj drawable-inline-array) (arg0 drawable) (arg1 display-frame)) 0 (none) ) diff --git a/goal_src/engine/draw/drawable-tree.gc b/goal_src/engine/draw/drawable-tree.gc index 72584ba8f9..8faf5670af 100644 --- a/goal_src/engine/draw/drawable-tree.gc +++ b/goal_src/engine/draw/drawable-tree.gc @@ -5,3 +5,11 @@ ;; name in dgo: drawable-tree ;; dgos: GAME, ENGINE +;; todo +(defmethod debug-draw drawable-tree-array ((obj drawable-tree-array) (arg0 drawable) (arg1 display-frame)) + (dotimes (s3-0 (-> obj length)) + (debug-draw (-> obj trees s3-0) (-> (the-as drawable-tree-array arg0) trees s3-0) arg1) + ) + 0 + (none) + ) \ No newline at end of file diff --git a/goal_src/engine/draw/drawable.gc b/goal_src/engine/draw/drawable.gc index 3c8dad1c2a..69d4476758 100644 --- a/goal_src/engine/draw/drawable.gc +++ b/goal_src/engine/draw/drawable.gc @@ -37,6 +37,27 @@ ) ) +(defun vis-cull ((id int)) + "Is this thing visible? By draw-node id." + ;; todo + #t + + #| + (let* ((addr (scratchpad-ptr int8 :offset (+ #x3b80 (/ id 8)))) ;; address of the vis data + (vis-byte (-> addr 0)) ;; vis byte + (shift-amount (+ 56 (logand id 7))) + (shifted (shl vis-byte shift-amount)) + ) + (< shifted 0) + ) + |# + ) + +(defun sphere-cull ((arg0 vector)) + #t + ;; todo + ) + (defun real-main-draw-hook () (when *slow-frame-rate* (dotimes (v1-2 #xc3500) @@ -176,9 +197,36 @@ ;; ocean ;; merc ;; init bg + (init-background) + ;; exec bg - ;; finish bg - ;; stats + (execute-connections *background-draw-engine* (-> *display* frames (-> *display* on-screen) frame)) + + ;; finish bg (most of the work is here) + (reset! (-> *perf-stats* data 3)) + (finish-background) + (read! (-> *perf-stats* data 3)) + (update-wait-stats (-> *perf-stats* data 3) (-> *background-work* wait-to-vu0) (the-as uint 0) (the-as uint 0)) + + ;; + (end-perf-stat-collection) + (when (not (paused?)) + (when *stats-poly* + (dotimes (gp-8 (-> *level* length)) + (let ((v1-193 (-> *level* level gp-8))) + (if (= (-> v1-193 status) 'active) + (collect-stats (-> v1-193 bsp)) + ) + ) + ) + (print-terrain-stats) + ) + (if *display-perf-stats* + (print-perf-stats) + ) + ) + (start-perf-stat-collection) + ;; fg engine ;; bones ;; gmerc diff --git a/goal_src/engine/game/main.gc b/goal_src/engine/game/main.gc index c6ddb1c7cb..bd86178b2f 100644 --- a/goal_src/engine/game/main.gc +++ b/goal_src/engine/game/main.gc @@ -654,8 +654,12 @@ ) ;; added - (format *stdcon* "~3Lglobal heap at ~,,2fK remaining~0L~%" - (* (1/ 1024) (&- (-> global top) (-> global current)))) + ; (format *stdcon* "~3Lglobal heap at ~,,2fK remaining~0L~%" + ; (* (1/ 1024) (&- (-> global top) (-> global current)))) + (format *stdcon* "~3Lglob: ~d free debug: ~d free~0L~%" + (dma-buffer-free (-> disp frames (-> disp on-screen) frame global-buf)) + (dma-buffer-free (-> disp frames (-> disp on-screen) frame debug-buf)) + ) #| ;; added, prints some level status. (dotimes (i 2) diff --git a/goal_src/engine/gfx/background.gc b/goal_src/engine/gfx/background.gc index b8a70c61e2..5ded569fc6 100644 --- a/goal_src/engine/gfx/background.gc +++ b/goal_src/engine/gfx/background.gc @@ -86,6 +86,9 @@ ;;(spad-vis (the-as (pointer uint128) (+ #x38b0 #x70000000))) (spad-vis (scratchpad-ptr uint128 :offset VISIBLE_LIST_SCRATCHPAD)) ) + ;; TODO this is a hack. + (quad-copy! (-> arg0 vis-bits) (-> arg2 all-visible-list) (/ (+ (-> arg2 visible-list-length) 15) 16)) + (b! (not *artist-flip-visible*) cfg-5 :delay (nop!)) (nop!) (nop!) @@ -140,6 +143,8 @@ ;;;;;;;;;;;;;;;; ;; shrubbery ;;;;;;;;;;;;;;;; + +#| (set! (-> *instance-shrub-work* paused) (paused?)) (when (nonzero? (-> *background-work* shrub-tree-count)) (if *debug-segment* @@ -177,6 +182,7 @@ ) ) ) +|# (let ((gp-1 (the-as level #f))) @@ -242,27 +248,19 @@ ;; update colors, but only if needed (when (not (or (zero? s0-0) (= s4-1 s0-0))) (flush-cache 0) - (time-of-day-interp-colors-scratch - (the-as (pointer rgba) - (+ 6160 (the-as int (the-as terrain-context #x70000000))) - ) - s0-0 - (-> s1-0 mood) - ) + (time-of-day-interp-colors-scratch (scratchpad-ptr rgba :offset 6160) s0-0 (-> s1-0 mood)) ;; remember the previous colors (set! s4-1 s0-0) ) ) ;; set the level. - (set! (-> (the-as terrain-context #x70000000) bsp lev-index) - (-> s1-0 index) - ) + (set! (-> (scratchpad-object terrain-context) bsp lev-index) (-> s1-0 index)) ) (set! (-> *tfrag-work* min-dist z) 4095996000.0) ;; draw! (draw-drawable-tree-tfrag s2-0) ;; remember closest. - (set! (-> *level* level (-> (the-as terrain-context #x70000000) bsp lev-index) closest-object 0) + (set! (-> *level* level (-> (scratchpad-object terrain-context) bsp lev-index) closest-object 0) (-> *tfrag-work* min-dist z) ) ) @@ -278,26 +276,20 @@ (upload-vis-bits s1-1 gp-1 a2-6) (when (not (or (zero? s0-1) (= s4-1 s0-1))) (flush-cache 0) - (time-of-day-interp-colors-scratch - (the-as (pointer rgba) (+ 6160 #x70000000)) - s0-1 - (-> s1-1 mood) - ) + (time-of-day-interp-colors-scratch (scratchpad-ptr rgba :offset 6160) s0-1 (-> s1-1 mood)) (set! s4-1 s0-1) ) ) - (set! - (-> (the-as terrain-context #x70000000) bsp lev-index) - (-> s1-1 index) - ) + (set! (-> (scratchpad-object terrain-context) bsp lev-index) (-> s1-1 index)) ) (set! (-> *tfrag-work* min-dist z) 4095996000.0) (draw-drawable-tree-trans-tfrag s2-1) - (set! (-> *level* level (-> (the-as terrain-context #x70000000) bsp lev-index) closest-object 3) + (set! (-> *level* level (-> (scratchpad-object terrain-context) bsp lev-index) closest-object 3) (-> *tfrag-work* min-dist z) ) ) ) + ;;;;;;;;;;;;;; DIRT TFRAG (let ((s2-2 (-> *background-work* dirt-tfrag-trees s3-0))) @@ -309,30 +301,23 @@ (upload-vis-bits s1-2 gp-1 a2-8) (when (not (or (zero? s0-2) (= s4-1 s0-2))) (flush-cache 0) - (time-of-day-interp-colors-scratch - (the-as (pointer rgba) (+ 6160 #x70000000)) - s0-2 - (-> s1-2 mood) - ) + (time-of-day-interp-colors-scratch (scratchpad-ptr rgba :offset 6160) s0-2 (-> s1-2 mood)) (set! s4-1 s0-2) ) ) - (set! - (-> (the-as terrain-context #x70000000) bsp lev-index) - (-> s1-2 index) - ) + (set! (-> (scratchpad-object terrain-context) bsp lev-index) (-> s1-2 index)) ) (set! (-> *tfrag-work* min-dist z) 4095996000.0) (draw-drawable-tree-dirt-tfrag s2-2) - (set! (-> *level* level (-> (the-as terrain-context #x70000000) bsp lev-index) closest-object 3) - (fmin - (-> *level* level (-> (the-as terrain-context #x70000000) bsp lev-index) closest-object 3) + (set! (-> *level* level (-> (scratchpad-object terrain-context) bsp lev-index) closest-object 3) + (fmin (-> *level* level (-> (scratchpad-object terrain-context) bsp lev-index) closest-object 3) (-> *tfrag-work* min-dist z) ) ) ) ) - + + ;;;;;;;;;;;;;; ICE TFRAG (let ((s2-3 (-> *background-work* ice-tfrag-trees s3-0))) (when s2-3 @@ -343,28 +328,23 @@ (upload-vis-bits s1-3 gp-1 a2-10) (when (not (or (zero? s0-3) (= s4-1 s0-3))) (flush-cache 0) - (time-of-day-interp-colors-scratch - (the-as (pointer rgba) (+ 6160 #x70000000)) - s0-3 - (-> s1-3 mood) - ) + (time-of-day-interp-colors-scratch (scratchpad-ptr rgba :offset 6160) s0-3 (-> s1-3 mood)) (set! s4-1 s0-3) ) ) - (set! (-> (the-as terrain-context #x70000000) bsp lev-index) - (-> s1-3 index) - ) + (set! (-> (scratchpad-object terrain-context) bsp lev-index) (-> s1-3 index)) ) (set! (-> *tfrag-work* min-dist z) 4095996000.0) (draw-drawable-tree-ice-tfrag s2-3) - (set! (-> *level* level (-> (the-as terrain-context #x70000000) bsp lev-index) closest-object 3) - (fmin - (-> *level* level (-> (the-as terrain-context #x70000000) bsp lev-index) closest-object 3) + (set! (-> *level* level (-> (scratchpad-object terrain-context) bsp lev-index) closest-object 3) + (fmin (-> *level* level (-> (scratchpad-object terrain-context) bsp lev-index) closest-object 3) (-> *tfrag-work* min-dist z) ) ) ) ) + + ;;;;;;;;;;;;;; LOWRES TFRAG (let ((s2-4 (-> *background-work* lowres-tfrag-trees s3-0))) @@ -376,19 +356,13 @@ (upload-vis-bits s1-4 gp-1 a2-12) (when (not (or (zero? s0-4) (= s4-1 s0-4))) (flush-cache 0) - (time-of-day-interp-colors-scratch - (the-as (pointer rgba) (+ 6160 #x70000000)) - s0-4 - (-> s1-4 mood) - ) + (time-of-day-interp-colors-scratch (scratchpad-ptr rgba :offset 6160) s0-4 (-> s1-4 mood)) (set! s4-1 s0-4) ) ) - (set! - (-> (the-as terrain-context #x70000000) bsp lev-index) - (-> s1-4 index) - ) + (set! (-> (scratchpad-object terrain-context) bsp lev-index) (-> s1-4 index)) ) + ;;(format 0 "draw ~A~%" s2-4) (draw-drawable-tree-tfrag s2-4) ) ) @@ -403,24 +377,16 @@ (upload-vis-bits s1-5 gp-1 a2-14) (when (not (or (zero? s0-5) (= s4-1 s0-5))) (flush-cache 0) - (time-of-day-interp-colors-scratch - (the-as (pointer rgba) (+ 6160 #x70000000)) - s0-5 - (-> s1-5 mood) - ) + (time-of-day-interp-colors-scratch (scratchpad-ptr rgba :offset 6160) s0-5 (-> s1-5 mood)) (set! s4-1 s0-5) ) ) - (set! - (-> (the-as terrain-context #x70000000) bsp lev-index) - (-> s1-5 index) - ) + (set! (-> (scratchpad-object terrain-context) bsp lev-index) (-> s1-5 index)) ) (set! (-> *tfrag-work* min-dist z) 4095996000.0) (draw-drawable-tree-trans-tfrag s2-5) - (set! (-> *level* level (-> (the-as terrain-context #x70000000) bsp lev-index) closest-object 3) - (fmin - (-> *level* level (-> (the-as terrain-context #x70000000) bsp lev-index) closest-object 3) + (set! (-> *level* level (-> (scratchpad-object terrain-context) bsp lev-index) closest-object 3) + (fmin (-> *level* level (-> (scratchpad-object terrain-context) bsp lev-index) closest-object 3) (-> *tfrag-work* min-dist z) ) ) @@ -439,7 +405,7 @@ ;;;;;;;;;; TIE (TFRAG Instance Engine) - + #| ;; common setup (set! (-> *instance-tie-work* paused) (paused?)) (when (nonzero? (-> *background-work* tie-tree-count)) @@ -507,6 +473,8 @@ ) ) ) + + |# ) 0 (none) diff --git a/goal_src/engine/gfx/hw/vu1-user-h.gc b/goal_src/engine/gfx/hw/vu1-user-h.gc index 469a096c44..94b7463bf5 100644 --- a/goal_src/engine/gfx/hw/vu1-user-h.gc +++ b/goal_src/engine/gfx/hw/vu1-user-h.gc @@ -8,12 +8,55 @@ ;; VU1 renderers are enabled/disabled with a bitmask. ;; I believe this is renderers which can be enabled/disabled in the debug menu? -;; 1024 = merc -(define *vu1-enable-user-menu* #x1FFFF8) + +(defenum vu1-renderer-mask + :bitfield #t + (sky 3) + (ocean 4) + (ocean-wave 5) + (tfrag 6) + (tie-near 7) + (tie 8) + (generic 9) + (merc 10) + (shrubbery 11) + (shrub-near 12) + (billboard 13) + (trans-shrubbery 14) + (trans-frag 15) + (sprite 16) + (shadow 17) + (depth-cue 18) + (nineteen 19) + (twenty 20) + ) + +(define *vu1-enable-user-menu* + (vu1-renderer-mask + sky + ocean + ocean-wave + tfrag + tie-near + tie + generic + merc + shrubbery + shrub-near + billboard + trans-shrubbery + trans-frag + sprite + shadow + depth-cue + nineteen + twenty + ) + ) ;; by default, all off. ;; the menu renderers get copied to this on each frame -(define *vu1-enable-user* 0) +(define *vu1-enable-user* (the-as vu1-renderer-mask 0)) ;; a dma "sink" is somewhere where a renderer can put stuff. (deftype dma-foreground-sink (basic) diff --git a/goal_src/engine/gfx/merc/merc.gc b/goal_src/engine/gfx/merc/merc.gc index f9978fcb67..07cbfc6e4b 100644 --- a/goal_src/engine/gfx/merc/merc.gc +++ b/goal_src/engine/gfx/merc/merc.gc @@ -588,7 +588,7 @@ (defun merc-vu1-init-buffers () "Setup merc DMA buffers. Call this _after_ drawing." - (when (logtest? *vu1-enable-user* 1024) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask merc)) (merc-vu1-init-buffer (bucket-id bucket-10) (new 'static 'gs-test diff --git a/goal_src/engine/gfx/tfrag/subdivide.gc b/goal_src/engine/gfx/tfrag/subdivide.gc index 47b2358c9e..5125b6dd98 100644 --- a/goal_src/engine/gfx/tfrag/subdivide.gc +++ b/goal_src/engine/gfx/tfrag/subdivide.gc @@ -152,29 +152,28 @@ (define *subdivide-settings* (new 'global 'subdivide-settings (meters 30.0) (meters 70.0))) -(defun set-tfrag-dists! ((arg0 (pointer float))) - "TODO - better type for arg0?" +(defun set-tfrag-dists! ((arg0 tfrag-dists)) (let ((f2-0 (-> *subdivide-settings* dist 0)) (f1-0 (-> *subdivide-settings* dist 1)) (f0-0 (-> *subdivide-settings* dist 2)) ) - (set! (-> arg0 3) f2-0) - (set! (-> arg0 7) f1-0) + (set! (-> arg0 k0s 0 w) f2-0) + (set! (-> arg0 k0s 1 w) f1-0) (let ((f4-1 (/ 1.0 (- f2-0 f1-0))) (f3-2 (/ 1.0 (- f1-0 f0-0))) ) - (set! (-> arg0 1) (- f4-1)) - (set! (-> arg0 5) (- f3-2)) - (set! (-> arg0 0) (* 0.5 f4-1)) - (set! (-> arg0 4) (* 0.5 f3-2)) + (set! (-> arg0 k0s 0 y) (- f4-1)) + (set! (-> arg0 k0s 1 y) (- f3-2)) + (set! (-> arg0 k0s 0 x) (* 0.5 f4-1)) + (set! (-> arg0 k0s 1 x) (* 0.5 f3-2)) (let ((f2-1 (* f2-0 f4-1)) (f5-7 (* f1-0 f3-2)) ) - (set! (-> arg0 9) f2-1) - (set! (-> arg0 13) f5-7) + (set! (-> arg0 k1s 0 y) f2-1) + (set! (-> arg0 k1s 1 y) f5-7) ) - (set! (-> arg0 8) (* (* -0.5 f4-1) f1-0)) - (set! (-> arg0 12) (* (* -0.5 f3-2) f0-0)) + (set! (-> arg0 k1s 0 x) (* -0.5 f4-1 f1-0)) + (set! (-> arg0 k1s 1 x) (* -0.5 f3-2 f0-0)) ) ) (none) diff --git a/goal_src/engine/gfx/tfrag/tfrag-h.gc b/goal_src/engine/gfx/tfrag/tfrag-h.gc index 05c4ff5083..7b918a4002 100644 --- a/goal_src/engine/gfx/tfrag/tfrag-h.gc +++ b/goal_src/engine/gfx/tfrag/tfrag-h.gc @@ -17,7 +17,7 @@ (deftype tfragment-debug-data (structure) ((stats tfragment-stats :inline :offset-assert 0) - (debug-lines basic :offset-assert 16) + (debug-lines (array vector-array) :offset-assert 16) ) :method-count-assert 9 :size-assert #x14 @@ -42,7 +42,7 @@ (dma-level-0 uint32 :offset 32) (dma-base uint32 :offset 36) (dma-level-1 uint32 :offset 40) - (dma-qwc uint32 4 :offset-assert 44) + (dma-qwc uint8 4 :offset-assert 44) (shader (inline-array adgif-shader) :offset 48) (num-shaders uint8 :offset 52) (num-base-colors uint8 :offset 53) @@ -78,7 +78,9 @@ ) (deftype drawable-tree-tfrag (drawable-tree) - ((time-of-day-pal time-of-day-palette :offset 12)) + ((time-of-day-pal time-of-day-palette :offset 12) + (arrays drawable-inline-array 1 :offset 32) ;; either drawable-inline-array-node or drawable-inline-array-tfrag + ) :method-count-assert 18 :size-assert #x24 :flag-assert #x1200000024 @@ -122,8 +124,8 @@ (deftype tfrag-dists (structure) ((data uint32 16 :offset-assert 0) (vector vector 4 :inline :offset 0) - (k0s uint128 2 :offset 0) - (k1s uint128 2 :offset 32) + (k0s vector 2 :inline :offset 0) + (k1s vector 2 :inline :offset 32) ) :method-count-assert 9 :size-assert #x40 @@ -135,9 +137,9 @@ (vector vector 14 :inline :offset 0) (fog vector :inline :offset 0) (val vector :inline :offset 16) - (strgif qword :inline :offset 32) - (fangif qword :inline :offset 48) - (adgif qword :inline :offset 64) + (strgif gs-gif-tag :inline :offset 32) ;; was qword + (fangif gs-gif-tag :inline :offset 48) ;; was qword + (adgif gs-gif-tag :inline :offset 64) ;; was qword (hvdf-offset vector :inline :offset 80) (hmge-scale vector :inline :offset 96) (invh-scale vector :inline :offset 112) diff --git a/goal_src/engine/gfx/tfrag/tfrag-methods.gc b/goal_src/engine/gfx/tfrag/tfrag-methods.gc index 72d8912cf0..d868514a22 100644 --- a/goal_src/engine/gfx/tfrag/tfrag-methods.gc +++ b/goal_src/engine/gfx/tfrag/tfrag-methods.gc @@ -5,3 +5,999 @@ ;; name in dgo: tfrag-methods ;; dgos: GAME, ENGINE + +(defun edge-debug-lines ((arg0 (array vector-array))) + "Draw edge debug lines" + ;; note: these lines are not present in the retail version of the game, sadly. + (when (nonzero? arg0) + (format 0 "bad~%") + (dotimes (s5-0 (-> arg0 length)) + (when (logtest? *display-strip-lines* (ash 1 s5-0)) + (let ((s4-0 (-> arg0 s5-0))) + (dotimes (s3-0 (/ (-> s4-0 length) 2)) + (add-debug-line + #t + (bucket-id debug-draw1) + (-> s4-0 data (* s3-0 2)) + (-> s4-0 data (+ (* s3-0 2) 1)) + (new 'static 'rgba :r #xff :g #xff :b #xff :a #x80) + #f + (the-as rgba -1) + ) + ) + ) + ) + ) + ) + (none) + ) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Tree Draw Functions +;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; The TFRAG render doesn't do much drawing inside of the drawable tree. +;; Instead, when the draw method of a top-level tfrag tree is called, it simply adds a refence to the tree to +;; the background list. After draw is complete, the finish-background function will draw the trees with these +;; functions. + + +(defun draw-drawable-tree-tfrag ((arg0 drawable-tree-tfrag)) + "Draw the normal tfrag tree!" + (local-vars (r0-0 none) (a0-20 int) (a0-22 int) (a0-38 int) (a0-40 int) (sv-16 (pointer uint8))) + + + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tfrag)) + + ;; first, do draw node culling. + ;; the culling is done in batches, doing all nodes at one depth before going to the enxt. + ;; only arrays of draw nodes can be culled - tfragments themselves can't (they are the wrong size) + + + (let ((s5-0 (+ (-> arg0 length) -1))) ;; get the number of trees with draw-nodes (see doc, the last one is always tfragments) + + ;; TODO + #| + (when (nonzero? s5-0) ;; only if we have draw-nodes (levels like INT have only like 2 tfrags) + (dotimes (s4-0 s5-0) ;; loop over tree depths + + ;; not sure of the details yet, but we take two levels as inputs + (let* ((v1-7 (-> arg0 arrays s4-0)) + (a0-4 (-> arg0 arrays (+ s4-0 1))) + (a1-1 (/ (-> (the-as drawable-inline-array-node v1-7) data 0 id) 8)) + (a0-6 (/ (-> (the-as drawable-inline-array-node a0-4) data 0 id) 8)) + (a1-3 (&-> (scratchpad-object terrain-context) work background vis-list a1-1)) + (a0-8 (&-> (scratchpad-object terrain-context) work background vis-list a0-6)) + ) + (draw-node-cull + a0-8 + a1-3 + (-> (the-as drawable-inline-array-node v1-7) data) + (-> (the-as drawable-inline-array-node v1-7) length) + ) + ) + ) + ) + |# + + ;; draw, using the full list of all tfrags (not tree format) + (let* ((v1-13 (the-as drawable-inline-array-tfrag (-> arg0 arrays s5-0))) + (s4-1 (-> v1-13 data)) + (s3-0 (-> v1-13 length)) + ) + + ;; pointer to vis data for the first tfrag + (set! sv-16 (&-> (scratchpad-object terrain-context) work background vis-list (/ (-> s4-1 0 id) 8))) + + ;; all DMA + (let ((s5-1 (-> *display* frames (-> *display* on-screen) frame global-buf base))) + ;; (format *stdcon* " #x~X~%" s5-1) + ;; DMA for TFRAG + (let* ((s1-0 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s2-0 (-> s1-0 base)) + ) + + ;; clear stats + (set! (-> *tfrag-work* wait-to-spr) (the-as uint 0)) + (set! (-> *tfrag-work* wait-from-spr) (the-as uint 0)) + + ;; initialize dma buffer + (tfrag-init-buffer + s1-0 + (new 'static 'gs-test :ate #x1 :atst (gs-atest greater-equal) :aref #x26 :zte #x1 :ztst (gs-ztest greater-equal)) + 0 + ) + + ;; do the draw! + (reset! (-> *perf-stats* data 5)) + ;;(format 0 "DRAW: ~D~%" s3-0) + (draw-inline-array-tfrag sv-16 (the-as drawable-inline-array s4-1) s3-0 s1-0) + (read! (-> *perf-stats* data 5)) + + ;; update stats for the draw + (update-wait-stats (-> *perf-stats* data 5) (the-as uint 0) (-> *tfrag-work* wait-to-spr) (-> *tfrag-work* wait-from-spr)) + + ;; finish dma buffer + (tfrag-end-buffer s1-0) + + ;; close dma packet + (let ((a3-3 (-> s1-0 base))) + (let ((v1-38 (the-as object (-> s1-0 base)))) + (set! (-> (the-as dma-packet v1-38) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet v1-38) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-38) vif1) (new 'static 'vif-tag)) + (set! (-> s1-0 base) (&+ (the-as pointer v1-38) 16)) + ) + (dma-bucket-insert-tag (-> *display* frames (-> *display* on-screen) frame bucket-group) + (the-as bucket-id (if (zero? (-> (scratchpad-object terrain-context) bsp lev-index)) + (bucket-id tfrag-0) + (bucket-id tfrag-1) + ) + ) + s2-0 + (the-as (pointer dma-tag) a3-3) + ) + ) + ) + + + ;; (format *stdcon* " #x~X~%" (-> *display* frames (-> *display* on-screen) frame global-buf base)) + ;; DMA for TFRAG NEAR + ; (let* ((s1-1 (-> *display* frames (-> *display* on-screen) frame global-buf)) + ; (s2-1 (-> s1-1 base)) + ; ) + ; (set! (-> *tfrag-work* near-wait-to-spr) (the-as uint 0)) + ; (set! (-> *tfrag-work* near-wait-from-spr) (the-as uint 0)) + ; (tfrag-near-init-buffer + ; s1-1 + ; (new 'static 'gs-test :ate #x1 :atst (gs-atest greater-equal) :aref #x26 :zte #x1 :ztst (gs-ztest greater-equal)) + ; 0 + ; ) + ; (reset! (-> *perf-stats* data 6)) + ; (draw-inline-array-tfrag-near sv-16 (the-as drawable-inline-array s4-1) s3-0 s1-1) + ; (read! (-> *perf-stats* data 6)) + ; (update-wait-stats (-> *perf-stats* data 6) (the-as uint 0) (-> *tfrag-work* near-wait-to-spr) (-> *tfrag-work* near-wait-from-spr)) + ; (tfrag-near-end-buffer s1-1) + ; (let ((a3-6 (-> s1-1 base))) + ; (let ((v1-62 (the-as object (-> s1-1 base)))) + ; (set! (-> (the-as dma-packet v1-62) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + ; (set! (-> (the-as dma-packet v1-62) vif0) (new 'static 'vif-tag)) + ; (set! (-> (the-as dma-packet v1-62) vif1) (new 'static 'vif-tag)) + ; (set! (-> s1-1 base) (&+ (the-as pointer v1-62) 16)) + ; ) + ; (dma-bucket-insert-tag + ; (-> *display* frames (-> *display* on-screen) frame bucket-group) + ; (the-as bucket-id (if (zero? (-> (scratchpad-object terrain-context) bsp lev-index)) + ; (bucket-id tfrag-near-0) + ; (bucket-id tfrag-near-1) + ; ) + ; ) + ; s2-1 + ; (the-as (pointer dma-tag) a3-6) + ; ) + ; ) + ; ) + + ;; DMA memory stats + (let ((v1-69 *dma-mem-usage*)) + (when (nonzero? v1-69) + (set! (-> v1-69 length) (max 2 (-> v1-69 length))) + (set! (-> v1-69 data 1 name) "tfragment") + (+! (-> v1-69 data 1 count) 1) + (+! (-> v1-69 data 1 used) (&- (-> *display* frames (-> *display* on-screen) frame global-buf base) (the-as uint s5-1))) + (set! (-> v1-69 data 1 total) (-> v1-69 data 1 used)) + ) + ) + ) + ) + ) + ) + (none) + ) + + +(defun draw-drawable-tree-trans-tfrag ((arg0 drawable-tree-trans-tfrag)) + (local-vars + (r0-0 none) + (a0-18 int) + (a0-20 int) + (a0-35 int) + (a0-37 int) + (sv-16 (pointer uint8)) + ) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask trans-frag)) + (let ((s5-0 (+ (-> arg0 length) -1))) + #| TODO + (when (nonzero? s5-0) + (dotimes (s4-0 s5-0) + (let* ((v1-7 (-> arg0 arrays s4-0)) + (a0-4 (-> arg0 arrays (+ s4-0 1))) + (a1-1 (/ (-> (the-as drawable-inline-array-node v1-7) data 0 id) 8)) + (a0-6 (/ (-> (the-as drawable-inline-array-node a0-4) data 0 id) 8)) + (a1-3 (&-> (the-as terrain-context #x70000000) work background vis-list a1-1)) + (a0-8 (&-> (the-as terrain-context #x70000000) work background vis-list a0-6)) + ) + (draw-node-cull + a0-8 + a1-3 + (-> (the-as drawable-inline-array-node v1-7) data) + (-> (the-as drawable-inline-array-node v1-7) length) + ) + ) + ) + ) |# + (let* ((v1-13 (-> arg0 arrays s5-0)) + (s5-1 (&+ v1-13 32)) + (s4-1 (-> v1-13 length)) + ) + (set! sv-16 (&-> (scratchpad-object terrain-context) work background vis-list (/ (-> s5-1 id) 8))) + (let* ((s2-0 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s3-0 (-> s2-0 base)) + ) + (set! (-> *tfrag-work* wait-to-spr) (the-as uint 0)) + (set! (-> *tfrag-work* wait-from-spr) (the-as uint 0)) + (tfrag-init-buffer + s2-0 + (new 'static 'gs-test + :ate #x1 + :atst (gs-atest greater-equal) + :aref #x7e + :afail #x1 + :zte #x1 + :ztst (gs-ztest greater-equal) + ) + 1 + ) + (reset! (-> *perf-stats* data 5)) + (draw-inline-array-tfrag sv-16 s5-1 s4-1 s2-0) + (update-wait-stats (-> *perf-stats* data 5) (the-as uint 0) (-> *tfrag-work* wait-to-spr) (-> *tfrag-work* wait-from-spr)) + (read! (-> *perf-stats* data 5)) + (tfrag-end-buffer s2-0) + (let ((a3-3 (-> s2-0 base))) + (let ((v1-34 (the-as object (-> s2-0 base)))) + (set! (-> (the-as dma-packet v1-34) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet v1-34) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-34) vif1) (new 'static 'vif-tag)) + (set! (-> s2-0 base) (&+ (the-as pointer v1-34) 16)) + ) + #| + (dma-bucket-insert-tag + (-> *display* frames (-> *display* on-screen) frame bucket-group) + (the-as + bucket-id + (if (zero? (-> (scratchpad-object terrain-context) bsp lev-index)) + (bucket-id tfrag-trans-0) + (bucket-id tfrag-trans-1) + ) + ) + s3-0 + (the-as (pointer dma-tag) a3-3) + )|# + ) + ) + #| TODO + (let* + ((s2-1 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s3-1 (-> s2-1 base)) + ) + (set! (-> *tfrag-work* near-wait-to-spr) (the-as uint 0)) + (set! (-> *tfrag-work* near-wait-from-spr) (the-as uint 0)) + (tfrag-near-init-buffer + s2-1 + (new 'static 'gs-test + :ate #x1 + :atst (gs-atest greater-equal) + :aref #x7e + :afail #x1 + :zte #x1 + :ztst (gs-ztest greater-equal) + ) + 1 + ) + (let* ((v1-48 (-> *perf-stats* data 6)) + (a0-32 (-> v1-48 ctrl)) + ) + (+! (-> v1-48 count) 1) + (b! (zero? a0-32) cfg-15 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mtpc pcr0 r0-0) + (.mtpc pcr1 r0-0) + (.sync.l) + (.sync.p) + (.mtc0 Perf a0-32) + ) + (.sync.l) + (.sync.p) + (label cfg-15) + 0 + (draw-inline-array-tfrag-near sv-16 s5-1 s4-1 s2-1) + (let ((v1-51 (-> *perf-stats* data 6))) + (b! (zero? (-> v1-51 ctrl)) cfg-17 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mfpc a0-35 pcr0) + (+! (-> v1-51 accum0) a0-35) + (.mfpc a0-37 pcr1) + (+! (-> v1-51 accum1) a0-37) + ) + (label cfg-17) + 0 + (update-wait-stats + (-> *perf-stats* data 6) + (the-as uint 0) + (-> *tfrag-work* near-wait-to-spr) + (-> *tfrag-work* near-wait-from-spr) + ) + (tfrag-near-end-buffer s2-1) + (let ((a3-6 (-> s2-1 base))) + (let ((v1-58 (the-as object (-> s2-1 base)))) + (set! + (-> (the-as dma-packet v1-58) dma) + (new 'static 'dma-tag :id (dma-tag-id next)) + ) + (set! (-> (the-as dma-packet v1-58) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-58) vif1) (new 'static 'vif-tag)) + (set! (-> s2-1 base) (&+ (the-as pointer v1-58) 16)) + ) + (dma-bucket-insert-tag + (-> *display* frames (-> *display* on-screen) frame bucket-group) + (the-as + bucket-id + (if (zero? (-> (the-as terrain-context #x70000000) bsp lev-index)) + 33 + 40 + ) + ) + s3-1 + (the-as (pointer dma-tag) a3-6) + ) + ) + ) |# + ) + ) + ) + (none) + ) + + +(defun draw-drawable-tree-dirt-tfrag ((arg0 drawable-tree-dirt-tfrag)) + (local-vars + (r0-0 none) + (a0-18 int) + (a0-20 int) + (a0-35 int) + (a0-37 int) + (sv-16 (pointer uint8)) + ) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask trans-frag)) + (let ((s5-0 (+ (-> arg0 length) -1))) + #| TODO + (when (nonzero? s5-0) + (dotimes (s4-0 s5-0) + (let* ((v1-7 (-> arg0 arrays s4-0)) + (a0-4 (-> arg0 arrays (+ s4-0 1))) + (a1-1 (/ (-> (the-as drawable-inline-array-node v1-7) data 0 id) 8)) + (a0-6 (/ (-> (the-as drawable-inline-array-node a0-4) data 0 id) 8)) + (a1-3 + (&-> + (the-as terrain-context #x70000000) + work + background + vis-list + a1-1 + ) + ) + (a0-8 + (&-> + (the-as terrain-context #x70000000) + work + background + vis-list + a0-6 + ) + ) + ) + (draw-node-cull + a0-8 + a1-3 + (-> (the-as drawable-inline-array-node v1-7) data) + (-> (the-as drawable-inline-array-node v1-7) length) + ) + ) + ) + )|# + + (let* ((v1-13 (-> arg0 arrays s5-0)) + (s5-1 (&+ v1-13 32)) + (s4-1 (-> v1-13 length)) + ) + (set! sv-16 (&-> (scratchpad-object terrain-context) work background vis-list (/ (-> s5-1 id) 8))) + (let* ((s2-0 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s3-0 (-> s2-0 base)) + ) + (set! (-> *tfrag-work* wait-to-spr) (the-as uint 0)) + (set! (-> *tfrag-work* wait-from-spr) (the-as uint 0)) + (tfrag-init-buffer s2-0 (new 'static 'gs-test :ate #x1 :afail #x1 :zte #x1 :ztst (gs-ztest greater-equal)) 1) + (reset! (-> *perf-stats* data 5)) + + (draw-inline-array-tfrag sv-16 s5-1 s4-1 s2-0) + (update-wait-stats + (-> *perf-stats* data 5) + (the-as uint 0) + (-> *tfrag-work* wait-to-spr) + (-> *tfrag-work* wait-from-spr) + ) + (read! (-> *perf-stats* data 5)) + (tfrag-end-buffer s2-0) + (let ((a3-3 (-> s2-0 base))) + (let ((v1-34 (the-as object (-> s2-0 base)))) + (set! + (-> (the-as dma-packet v1-34) dma) + (new 'static 'dma-tag :id (dma-tag-id next)) + ) + (set! (-> (the-as dma-packet v1-34) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-34) vif1) (new 'static 'vif-tag)) + (set! (-> s2-0 base) (&+ (the-as pointer v1-34) 16)) + ) + (dma-bucket-insert-tag (-> *display* frames (-> *display* on-screen) frame bucket-group) + (if (zero? (-> (scratchpad-object terrain-context) bsp lev-index)) + (bucket-id tfrag-dirt-0) + (bucket-id tfrag-dirt-1) + ) + s3-0 + (the-as (pointer dma-tag) a3-3) + ) + ) + ) + + #| + (let* + ((s2-1 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s3-1 (-> s2-1 base)) + ) + (set! (-> *tfrag-work* near-wait-to-spr) (the-as uint 0)) + (set! (-> *tfrag-work* near-wait-from-spr) (the-as uint 0)) + (tfrag-near-init-buffer + s2-1 + (new 'static 'gs-test + :ate #x1 + :afail #x1 + :zte #x1 + :ztst (gs-ztest greater-equal) + ) + 1 + ) + (let* ((v1-48 (-> *perf-stats* data 6)) + (a0-32 (-> v1-48 ctrl)) + ) + (+! (-> v1-48 count) 1) + (b! (zero? a0-32) cfg-15 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mtpc pcr0 r0-0) + (.mtpc pcr1 r0-0) + (.sync.l) + (.sync.p) + (.mtc0 Perf a0-32) + ) + (.sync.l) + (.sync.p) + (label cfg-15) + 0 + (draw-inline-array-tfrag-near sv-16 s5-1 s4-1 s2-1) + (let ((v1-51 (-> *perf-stats* data 6))) + (b! (zero? (-> v1-51 ctrl)) cfg-17 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mfpc a0-35 pcr0) + (+! (-> v1-51 accum0) a0-35) + (.mfpc a0-37 pcr1) + (+! (-> v1-51 accum1) a0-37) + ) + (label cfg-17) + 0 + (update-wait-stats + (-> *perf-stats* data 6) + (the-as uint 0) + (-> *tfrag-work* near-wait-to-spr) + (-> *tfrag-work* near-wait-from-spr) + ) + (tfrag-near-end-buffer s2-1) + (let ((a3-6 (-> s2-1 base))) + (let ((v1-58 (the-as object (-> s2-1 base)))) + (set! + (-> (the-as dma-packet v1-58) dma) + (new 'static 'dma-tag :id (dma-tag-id next)) + ) + (set! (-> (the-as dma-packet v1-58) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-58) vif1) (new 'static 'vif-tag)) + (set! (-> s2-1 base) (&+ (the-as pointer v1-58) 16)) + ) + (dma-bucket-insert-tag + (-> *display* frames (-> *display* on-screen) frame bucket-group) + (the-as + bucket-id + (if (zero? (-> (the-as terrain-context #x70000000) bsp lev-index)) + 35 + 42 + ) + ) + s3-1 + (the-as (pointer dma-tag) a3-6) + ) + ) + ) |# + ) + ) + ) + (none) + ) + +(defun draw-drawable-tree-ice-tfrag ((arg0 drawable-tree-ice-tfrag)) + (local-vars + (r0-0 none) + (a0-18 int) + (a0-20 int) + (a0-35 int) + (a0-37 int) + (sv-16 (pointer uint8)) + ) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask trans-frag)) + (let ((s5-0 (+ (-> arg0 length) -1))) + #| TODO + (when (nonzero? s5-0) + (dotimes (s4-0 s5-0) + (let* ((v1-7 (-> arg0 arrays s4-0)) + (a0-4 (-> arg0 arrays (+ s4-0 1))) + (a1-1 (/ (-> (the-as drawable-inline-array-node v1-7) data 0 id) 8)) + (a0-6 (/ (-> (the-as drawable-inline-array-node a0-4) data 0 id) 8)) + (a1-3 (&-> (the-as terrain-context #x70000000) work background vis-list a1-1)) + (a0-8 (&-> (the-as terrain-context #x70000000) work background vis-list a0-6)) + ) + (draw-node-cull + a0-8 + a1-3 + (-> (the-as drawable-inline-array-node v1-7) data) + (-> (the-as drawable-inline-array-node v1-7) length) + ) + ) + ) + )|# + (let* ((v1-13 (-> arg0 arrays s5-0)) + (s5-1 (&+ v1-13 32)) + (s4-1 (-> v1-13 length)) + ) + (set! sv-16 (&-> (scratchpad-object terrain-context) work background vis-list (/ (-> s5-1 id) 8)) + ) + (let* + ((s2-0 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s3-0 (-> s2-0 base)) + ) + (set! (-> *tfrag-work* wait-to-spr) (the-as uint 0)) + (set! (-> *tfrag-work* wait-from-spr) (the-as uint 0)) + (tfrag-init-buffer + s2-0 + (new 'static 'gs-test + :ate #x1 + :atst (gs-atest always) + :afail #x1 + :zte #x1 + :ztst (gs-ztest greater-equal) + ) + 1 + ) + (reset! (-> *perf-stats* data 5)) + (draw-inline-array-tfrag sv-16 s5-1 s4-1 s2-0) + (update-wait-stats + (-> *perf-stats* data 5) + (the-as uint 0) + (-> *tfrag-work* wait-to-spr) + (-> *tfrag-work* wait-from-spr) + ) + (read! (-> *perf-stats* data 5)) + (tfrag-end-buffer s2-0) + (let ((a3-3 (-> s2-0 base))) + (let ((v1-34 (the-as object (-> s2-0 base)))) + (set! + (-> (the-as dma-packet v1-34) dma) + (new 'static 'dma-tag :id (dma-tag-id next)) + ) + (set! (-> (the-as dma-packet v1-34) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-34) vif1) (new 'static 'vif-tag)) + (set! (-> s2-0 base) (&+ (the-as pointer v1-34) 16)) + ) + (dma-bucket-insert-tag + (-> *display* frames (-> *display* on-screen) frame bucket-group) + (the-as + bucket-id + (if (zero? (-> (scratchpad-object terrain-context) bsp lev-index)) + 36 + 43 + ) + ) + s3-0 + (the-as (pointer dma-tag) a3-3) + ) + ) + ) + #| + (let* + ((s2-1 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s3-1 (-> s2-1 base)) + ) + (set! (-> *tfrag-work* near-wait-to-spr) (the-as uint 0)) + (set! (-> *tfrag-work* near-wait-from-spr) (the-as uint 0)) + (tfrag-near-init-buffer + s2-1 + (new 'static 'gs-test + :ate #x1 + :atst (gs-atest always) + :afail #x1 + :zte #x1 + :ztst (gs-ztest greater-equal) + ) + 1 + ) + (let* ((v1-48 (-> *perf-stats* data 6)) + (a0-32 (-> v1-48 ctrl)) + ) + (+! (-> v1-48 count) 1) + (b! (zero? a0-32) cfg-15 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mtpc pcr0 r0-0) + (.mtpc pcr1 r0-0) + (.sync.l) + (.sync.p) + (.mtc0 Perf a0-32) + ) + (.sync.l) + (.sync.p) + (label cfg-15) + 0 + (draw-inline-array-tfrag-near sv-16 s5-1 s4-1 s2-1) + (let ((v1-51 (-> *perf-stats* data 6))) + (b! (zero? (-> v1-51 ctrl)) cfg-17 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mfpc a0-35 pcr0) + (+! (-> v1-51 accum0) a0-35) + (.mfpc a0-37 pcr1) + (+! (-> v1-51 accum1) a0-37) + ) + (label cfg-17) + 0 + (update-wait-stats + (-> *perf-stats* data 6) + (the-as uint 0) + (-> *tfrag-work* near-wait-to-spr) + (-> *tfrag-work* near-wait-from-spr) + ) + (tfrag-near-end-buffer s2-1) + (let ((a3-6 (-> s2-1 base))) + (let ((v1-58 (the-as object (-> s2-1 base)))) + (set! + (-> (the-as dma-packet v1-58) dma) + (new 'static 'dma-tag :id (dma-tag-id next)) + ) + (set! (-> (the-as dma-packet v1-58) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-58) vif1) (new 'static 'vif-tag)) + (set! (-> s2-1 base) (&+ (the-as pointer v1-58) 16)) + ) + (dma-bucket-insert-tag + (-> *display* frames (-> *display* on-screen) frame bucket-group) + (the-as + bucket-id + (if (zero? (-> (the-as terrain-context #x70000000) bsp lev-index)) + 37 + 44 + ) + ) + s3-1 + (the-as (pointer dma-tag) a3-6) + ) + ) + ) |# + ) + ) + ) + (none) + ) + + ;; definition for method 10 of type drawable-tree-tfrag +;; INFO: Return type mismatch drawable-tree-tfrag vs none. +(defmethod draw drawable-tree-tfrag ((obj drawable-tree-tfrag) (arg0 drawable) (arg1 display-frame)) + (let* ((v1-1 (-> *background-work* tfrag-tree-count)) + (a1-2 (-> (scratchpad-object terrain-context) bsp lev-index)) + (a1-5 (-> *level* level a1-2)) + ) + (set! (-> *background-work* tfrag-trees v1-1) obj) + (set! (-> *background-work* tfrag-levels v1-1) a1-5) + ) + (+! (-> *background-work* tfrag-tree-count) 1) + (none) + ) + +;; definition for method 10 of type drawable-tree-trans-tfrag +;; INFO: Return type mismatch drawable-tree-trans-tfrag vs none. +(defmethod draw drawable-tree-trans-tfrag ((obj drawable-tree-trans-tfrag) (arg0 drawable) (arg1 display-frame)) + (let* ((v1-1 (-> *background-work* trans-tfrag-tree-count)) + (a1-2 (-> (scratchpad-object terrain-context) bsp lev-index)) + (a1-5 (-> *level* level a1-2)) + ) + (set! (-> *background-work* trans-tfrag-trees v1-1) obj) + (set! (-> *background-work* trans-tfrag-levels v1-1) a1-5) + ) + (+! (-> *background-work* trans-tfrag-tree-count) 1) + (none) + ) + +;; definition for method 10 of type drawable-tree-dirt-tfrag +;; INFO: Return type mismatch drawable-tree-dirt-tfrag vs none. +(defmethod draw drawable-tree-dirt-tfrag ((obj drawable-tree-dirt-tfrag) (arg0 drawable) (arg1 display-frame)) + (let* ((v1-1 (-> *background-work* dirt-tfrag-tree-count)) + (a1-2 (-> (scratchpad-object terrain-context) bsp lev-index)) + (a1-5 (-> *level* level a1-2)) + ) + (set! (-> *background-work* dirt-tfrag-trees v1-1) obj) + (set! (-> *background-work* dirt-tfrag-levels v1-1) a1-5) + ) + (+! (-> *background-work* dirt-tfrag-tree-count) 1) + (none) + ) + +;; definition for method 10 of type drawable-tree-ice-tfrag +;; INFO: Return type mismatch drawable-tree-ice-tfrag vs none. +(defmethod draw drawable-tree-ice-tfrag ((obj drawable-tree-ice-tfrag) (arg0 drawable) (arg1 display-frame)) + (let* ((v1-1 (-> *background-work* ice-tfrag-tree-count)) + (a1-2 (-> (scratchpad-object terrain-context) bsp lev-index)) + (a1-5 (-> *level* level a1-2)) + ) + (set! (-> *background-work* ice-tfrag-trees v1-1) obj) + (set! (-> *background-work* ice-tfrag-levels v1-1) a1-5) + ) + (+! (-> *background-work* ice-tfrag-tree-count) 1) + (none) + ) + +;; definition for method 10 of type drawable-tree-lowres-tfrag +;; INFO: Return type mismatch drawable-tree-lowres-tfrag vs none. +(defmethod draw drawable-tree-lowres-tfrag ((obj drawable-tree-lowres-tfrag) (arg0 drawable) (arg1 display-frame)) + (let* ((v1-1 (-> *background-work* lowres-tfrag-tree-count)) + (a1-2 (-> (scratchpad-object terrain-context) bsp lev-index)) + (a1-5 (-> *level* level a1-2)) + ) + (set! (-> *background-work* lowres-tfrag-trees v1-1) obj) + (set! (-> *background-work* lowres-tfrag-levels v1-1) a1-5) + ) + (+! (-> *background-work* lowres-tfrag-tree-count) 1) + (none) + ) + +;; definition for method 10 of type drawable-tree-lowres-trans-tfrag +;; INFO: Return type mismatch drawable-tree-lowres-trans-tfrag vs none. +(defmethod + draw + drawable-tree-lowres-trans-tfrag + ((obj drawable-tree-lowres-trans-tfrag) (arg0 drawable) (arg1 display-frame)) + (let* ((v1-1 (-> *background-work* lowres-trans-tfrag-tree-count)) + (a1-2 (-> (scratchpad-object terrain-context) bsp lev-index)) + (a1-5 (-> *level* level a1-2)) + ) + (set! (-> *background-work* lowres-trans-tfrag-trees v1-1) obj) + (set! (-> *background-work* lowres-trans-tfrag-levels v1-1) a1-5) + ) + (+! (-> *background-work* lowres-trans-tfrag-tree-count) 1) + (none) + ) + +;; definition for method 14 of type tfragment +(defmethod collect-stats tfragment ((obj tfragment)) + (stats-tfrag-asm obj) + (none) + ) + +;; definition for method 14 of type drawable-tree-tfrag +;; INFO: Return type mismatch drawable-tree-tfrag vs none. +;; Used lq/sq +(defmethod collect-stats drawable-tree-tfrag ((obj drawable-tree-tfrag)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tfrag)) + (set! (-> *tfrag-work* vu1-enable-tfrag) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask tfrag)))) + (set! (-> *tfrag-work* vu1-enable-tfrag-near) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask tfrag)))) + (set! (-> *tfrag-work* tr-stat-tfrag) (-> *terrain-stats* tfrag)) + (set! (-> *tfrag-work* tr-stat-tfrag-near) (-> *terrain-stats* tfrag-near)) + (let ((v1-12 (-> *tfrag-work* frag-dists quad))) + (set! (-> *tfrag-work* frag-dists quad) v1-12) + ) + (dotimes (s5-0 (-> obj length)) + (collect-stats (-> obj arrays s5-0)) + ) + ) + (none) + ) + +;; definition for method 14 of type drawable-tree-lowres-tfrag +;; INFO: Return type mismatch drawable-tree-lowres-tfrag vs none. +;; Used lq/sq +(defmethod collect-stats drawable-tree-lowres-tfrag ((obj drawable-tree-lowres-tfrag)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tfrag)) + (set! (-> *tfrag-work* vu1-enable-tfrag) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask tfrag)))) + (set! (-> *tfrag-work* vu1-enable-tfrag-near) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask tfrag)))) + (set! (-> *tfrag-work* tr-stat-tfrag) (-> *terrain-stats* tfrag)) + (set! (-> *tfrag-work* tr-stat-tfrag-near) (-> *terrain-stats* tfrag-near)) + (let ((v1-12 (-> *tfrag-work* frag-dists quad))) + (set! (-> *tfrag-work* frag-dists quad) v1-12) + ) + (dotimes (s5-0 (-> obj length)) + (collect-stats (-> obj arrays s5-0)) + ) + ) + (none) + ) + +;; definition for method 14 of type drawable-tree-trans-tfrag +;; INFO: Return type mismatch drawable-tree-trans-tfrag vs none. +;; Used lq/sq +(defmethod collect-stats drawable-tree-trans-tfrag ((obj drawable-tree-trans-tfrag)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask trans-frag)) + (set! (-> *tfrag-work* vu1-enable-tfrag) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask trans-frag)))) + (set! (-> *tfrag-work* vu1-enable-tfrag-near) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask trans-frag)))) + (set! (-> *tfrag-work* tr-stat-tfrag) (-> *terrain-stats* trans-tfrag)) + (set! (-> *tfrag-work* tr-stat-tfrag-near) (-> *terrain-stats* trans-tfrag-near)) + (let ((v1-12 (-> *tfrag-work* frag-dists quad))) + (set! (-> *tfrag-work* frag-dists quad) v1-12) + ) + (dotimes (s5-0 (-> obj length)) + (collect-stats (-> obj arrays s5-0)) + ) + ) + (none) + ) + +;; definition for method 14 of type drawable-tree-lowres-trans-tfrag +;; INFO: Return type mismatch drawable-tree-lowres-trans-tfrag vs none. +;; Used lq/sq +(defmethod collect-stats drawable-tree-lowres-trans-tfrag ((obj drawable-tree-lowres-trans-tfrag)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask trans-frag)) + (set! (-> *tfrag-work* vu1-enable-tfrag) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask trans-frag)))) + (set! (-> *tfrag-work* vu1-enable-tfrag-near) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask trans-frag)))) + (set! (-> *tfrag-work* tr-stat-tfrag) (-> *terrain-stats* trans-tfrag)) + (set! (-> *tfrag-work* tr-stat-tfrag-near) (-> *terrain-stats* trans-tfrag-near)) + (let ((v1-12 (-> *tfrag-work* frag-dists quad))) + (set! (-> *tfrag-work* frag-dists quad) v1-12) + ) + (dotimes (s5-0 (-> obj length)) + (collect-stats (-> obj arrays s5-0)) + ) + ) + (none) + ) + +;; definition for method 14 of type drawable-tree-dirt-tfrag +;; INFO: Return type mismatch drawable-tree-dirt-tfrag vs none. +;; Used lq/sq +(defmethod collect-stats drawable-tree-dirt-tfrag ((obj drawable-tree-dirt-tfrag)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask trans-frag)) + (set! (-> *tfrag-work* vu1-enable-tfrag) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask trans-frag)))) + (set! (-> *tfrag-work* vu1-enable-tfrag-near) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask trans-frag)))) + (set! (-> *tfrag-work* tr-stat-tfrag) (-> *terrain-stats* trans-tfrag)) + (set! (-> *tfrag-work* tr-stat-tfrag-near) (-> *terrain-stats* trans-tfrag-near)) + (let ((v1-12 (-> *tfrag-work* frag-dists quad))) + (set! (-> *tfrag-work* frag-dists quad) v1-12) + ) + (dotimes (s5-0 (-> obj length)) + (collect-stats (-> obj arrays s5-0)) + ) + ) + (none) + ) + +;; definition for method 14 of type drawable-tree-ice-tfrag +;; INFO: Return type mismatch drawable-tree-ice-tfrag vs none. +;; Used lq/sq +(defmethod collect-stats drawable-tree-ice-tfrag ((obj drawable-tree-ice-tfrag)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask trans-frag)) + (set! (-> *tfrag-work* vu1-enable-tfrag) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask trans-frag)))) + (set! (-> *tfrag-work* vu1-enable-tfrag-near) (the-as int (logand *vu1-enable-user* (vu1-renderer-mask trans-frag)))) + (set! (-> *tfrag-work* tr-stat-tfrag) (-> *terrain-stats* trans-tfrag)) + (set! (-> *tfrag-work* tr-stat-tfrag-near) (-> *terrain-stats* trans-tfrag-near)) + (let ((v1-12 (-> *tfrag-work* frag-dists quad))) + (set! (-> *tfrag-work* frag-dists quad) v1-12) + ) + (dotimes (s5-0 (-> obj length)) + (collect-stats (-> obj arrays s5-0)) + ) + ) + (none) + ) + +;; definition for method 14 of type drawable-inline-array-tfrag +;; INFO: Return type mismatch drawable-inline-array-tfrag vs none. +(defmethod collect-stats drawable-inline-array-tfrag ((obj drawable-inline-array-tfrag)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tfrag)) + (dotimes (s5-0 (-> obj length)) + (let ((s4-0 (-> obj data s5-0))) + (if (vis-cull (-> s4-0 id)) + (collect-stats s4-0) + ) + ) + ) + ) + (none) + ) + +;; definition for method 14 of type drawable-inline-array-trans-tfrag +;; INFO: Return type mismatch drawable-inline-array-trans-tfrag vs none. +(defmethod collect-stats drawable-inline-array-trans-tfrag ((obj drawable-inline-array-trans-tfrag)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask trans-frag)) + (dotimes (s5-0 (-> obj length)) + (let ((s4-0 (-> obj data s5-0))) + (if (vis-cull (-> s4-0 id)) + (collect-stats s4-0) + ) + ) + ) + ) + (none) + ) + +;; definition for method 15 of type drawable-tree-tfrag +;; INFO: Return type mismatch drawable-tree-tfrag vs none. +(defmethod debug-draw drawable-tree-tfrag ((obj drawable-tree-tfrag) (arg0 drawable) (arg1 display-frame)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tfrag)) + (dotimes (s4-0 (-> obj length)) + (let ((a1-1 (-> obj arrays s4-0))) + (debug-draw a1-1 a1-1 arg1) + ) + ) + ) + (none) + ) + +;; definition for method 15 of type drawable-tree-trans-tfrag +;; INFO: Return type mismatch drawable-tree-trans-tfrag vs none. +(defmethod debug-draw drawable-tree-trans-tfrag ((obj drawable-tree-trans-tfrag) (arg0 drawable) (arg1 display-frame)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tfrag)) + (dotimes (s4-0 (-> obj length)) + (let ((a1-1 (-> obj arrays s4-0))) + (debug-draw a1-1 a1-1 arg1) + ) + ) + ) + (none) + ) + +;; definition for method 15 of type drawable-inline-array-tfrag +;; INFO: Return type mismatch drawable-inline-array-tfrag vs none. +(defmethod + debug-draw + drawable-inline-array-tfrag + ((obj drawable-inline-array-tfrag) (arg0 drawable) (arg1 display-frame)) + (dotimes (s4-0 (-> obj length)) + (let ((s3-0 (-> obj data s4-0))) + (if (vis-cull (-> s3-0 id)) + (debug-draw s3-0 s3-0 arg1) + ) + ) + ) + (none) + ) + +;; definition for method 15 of type tfragment +(defmethod debug-draw tfragment ((obj tfragment) (arg0 drawable) (arg1 display-frame)) + (-> arg1 global-buf) + (edge-debug-lines (-> obj debug-data debug-lines)) + + (add-debug-sphere #t (bucket-id debug-draw0) (-> obj bsphere) (-> obj bsphere w) (new 'static 'rgba :r #xff :g #xff :b #xff :a #x80)) + ;;(add-debug-x #t (bucket-id debug-draw0) (-> obj bsphere) (new 'static 'rgba :r #xff :g #xff :b #xff :a #x80)) + (none) + ) + diff --git a/goal_src/engine/gfx/tfrag/tfrag-near.gc b/goal_src/engine/gfx/tfrag/tfrag-near.gc index 7dd82c5b9c..e091eb398e 100644 --- a/goal_src/engine/gfx/tfrag/tfrag-near.gc +++ b/goal_src/engine/gfx/tfrag/tfrag-near.gc @@ -5,3 +5,4 @@ ;; name in dgo: tfrag-near ;; dgos: GAME, ENGINE +(define tnear-vu1-block (new 'static 'vu-function :length #x0 :qlength #x0)) \ No newline at end of file diff --git a/goal_src/engine/gfx/tfrag/tfrag-work.gc b/goal_src/engine/gfx/tfrag/tfrag-work.gc index 4c14cd5c1e..154477007b 100644 --- a/goal_src/engine/gfx/tfrag/tfrag-work.gc +++ b/goal_src/engine/gfx/tfrag/tfrag-work.gc @@ -50,14 +50,15 @@ ) ) -;; failed to figure out what this is: -(set! (-> *tfrag-work* color-ptr x) (+ 6160 #x70000000)) ;; failed to figure out what this is: -(set! (-> *tfrag-work* color-ptr y) (+ 6160 #x70000000)) +(set! (-> *tfrag-work* color-ptr x) (+ 6160 (the-as int *fake-scratchpad-data*))) ;; failed to figure out what this is: -(set! (-> *tfrag-work* color-ptr z) (+ 6160 #x70000000)) +(set! (-> *tfrag-work* color-ptr y) (+ 6160 (the-as int *fake-scratchpad-data*))) ;; failed to figure out what this is: -(set! (-> *tfrag-work* color-ptr w) (+ 6160 #x70000000)) +(set! (-> *tfrag-work* color-ptr z) (+ 6160 (the-as int *fake-scratchpad-data*))) + +;; failed to figure out what this is: +(set! (-> *tfrag-work* color-ptr w) (+ 6160 (the-as int *fake-scratchpad-data*))) diff --git a/goal_src/engine/gfx/tfrag/tfrag.gc b/goal_src/engine/gfx/tfrag/tfrag.gc index 856f70e9b0..231250829f 100644 --- a/goal_src/engine/gfx/tfrag/tfrag.gc +++ b/goal_src/engine/gfx/tfrag/tfrag.gc @@ -5,16 +5,598 @@ ;; name in dgo: tfrag ;; dgos: GAME, ENGINE +;; TFRAG drawing procedure + +;; - when a level is loaded, it adds a connection from the bsp-header to the *background-draw-engine* with the add-bsp-drawable function. +;; this allows the level to be drawn from within drawable.gc + +;; - within real-main-draw-hook, background engine is executed, drawing all levels. + +;; - the add-bsp-drawable function calls the draw (and debug-draw) methods of the bsp-header. + +;; - draw method of the bsp-header calls draw on each of the drawable trees (after settings up vis stuff) + +;; - the draw method of drawable-tree-tfrag just adds the tree to the background-work + +;; - within real-main-draw-hook, the finish-background function is called, eventually calling draw-drawable-tree-tfrag + +;; - the draw-drawable-tree-tfrag function sets up DMA, eventually calls draw-inline-array-tfrag (in this file), which actually does the DMA setup. + + +;;;;;;;;;;;;;;;;;;;;;;;; +;; basic methods +;;;;;;;;;;;;;;;;;;;;;;;; + (defmethod login tfragment ((obj tfragment)) + "Initialize a tfragment by linking the textures in adgif shaders" (dotimes (s5-0 (the-as int (-> obj num-shaders))) - (adgif-shader-login-no-remap (-> obj shader s5-0)) - ) + (adgif-shader-login-no-remap (-> obj shader s5-0)) + ) + obj + ) + +(defmethod mem-usage tfragment ((obj tfragment) (arg0 memory-usage-block) (arg1 int)) + "Compute the memory usage of a tfragment" + + ;; seems like this flag does colors? + (when (logtest? arg1 2) + (+! (-> arg0 data 19 count) 1) + (let ((v1-6 (+ (-> obj num-base-colors) (-> obj num-level0-colors) (-> obj num-level1-colors)))) + (+! (-> arg0 data 19 used) v1-6) + (+! (-> arg0 data 19 total) (logand -4 (+ v1-6 3))) + ) + (return obj) + ) + + (let ((s4-0 1)) + (set! (-> arg0 length) (max (-> arg0 length) (+ s4-0 8))) + (set! (-> arg0 data s4-0 name) (symbol->string 'tfragment)) + (+! (-> arg0 data s4-0 count) 1) + + ;; the size of the actual tfragment + (let ((v1-22 (asize-of obj))) + (+! (-> arg0 data s4-0 used) v1-22) + (+! (-> arg0 data s4-0 total) (logand -16 (+ v1-22 15))) + ) + + ;; the size of the "base" DMA + (set! (-> arg0 data (+ s4-0 1) name) "tfragment-base") + (+! (-> arg0 data (+ s4-0 1) count) 1) + (let ((v1-33 (* (-> obj dma-qwc 0) 16))) + (+! (-> arg0 data (+ s4-0 1) used) v1-33) + (+! (-> arg0 data (+ s4-0 1) total) v1-33) + ) + + ;; the size of the "common" DMA + (set! (-> arg0 data (+ s4-0 2) name) "tfragment-common") + (+! (-> arg0 data (+ s4-0 2) count) 1) + (let ((v1-43 (* (- (-> obj dma-qwc 1) (-> obj dma-qwc 0)) 16))) + (+! (-> arg0 data (+ s4-0 2) used) v1-43) + (+! (-> arg0 data (+ s4-0 2) total) v1-43) + ) + + ;; the size of the "level0" DMA + (set! (-> arg0 data (+ s4-0 3) name) "tfragment-level0") + (when (nonzero? (-> obj num-level0-colors)) + (+! (-> arg0 data (+ s4-0 3) count) 1) + (let ((v1-55 (* (- (-> obj dma-qwc 2) (-> obj dma-qwc 0)) 16))) + (+! (-> arg0 data (+ s4-0 3) used) v1-55) + (+! (-> arg0 data (+ s4-0 3) total) v1-55) + ) + ) + + ;; The size of the "level0" DMA. Note that the dma chains can overlap, so this is a bit weird. + (set! (-> arg0 data (+ s4-0 4) name) "tfragment-level1") + (when (not (or (= (-> obj dma-level-1) (-> obj dma-common)) + (= (-> obj dma-level-1) (-> obj dma-base)) + (zero? (-> obj num-level1-colors)) + ) + ) + (+! (-> arg0 data (+ s4-0 4) count) 1) + (let ((v1-70 (* (- (-> obj dma-qwc 3) + (the-as uint (- (/ (the-as int (- (-> obj dma-level-1) (-> obj dma-common))) 16) (the-as int (-> obj dma-qwc 0)))) + ) + 16 + ) + ) + ) + (+! (-> arg0 data (+ s4-0 4) used) v1-70) + (+! (-> arg0 data (+ s4-0 4) total) v1-70) + ) + ) + + ;; colors + (set! (-> arg0 data (+ s4-0 5) name) "tfragment-color") + (+! (-> arg0 data (+ s4-0 5) count) 1) + (let ((v1-79 (if (logtest? arg1 1) + 0 + (the-as int (* (+ (-> obj num-base-colors) (-> obj num-level0-colors) (-> obj num-level1-colors)) 2)) + ) + ) + ) + (+! (-> arg0 data (+ s4-0 5) used) v1-79) + (+! (-> arg0 data (+ s4-0 5) total) (logand -16 (+ v1-79 15))) + ) + + ;; debug (unused) + (set! (-> arg0 data (+ s4-0 6) name) "tfragment-debug") + ) + + obj + ) + +;;;;;;;;;;;;;;;;;;;;;;; +;; tree/array methods +;;;;;;;;;;;;;;;;;;;;;;; + +(defmethod inspect drawable-inline-array-tfrag ((obj drawable-inline-array-tfrag)) + "Inspect an array of tfragments." + (format #t "[~8x] ~A~%" obj (-> obj type)) + (format #t "~Tlength: ~D~%" (-> obj length)) + (format #t "~Tdata[~D]: @ #x~X~%" (-> obj length) (-> obj data)) + (dotimes (s5-0 (-> obj length)) + (format #t "~T [~D] ~A~%" s5-0 (-> obj data s5-0)) + ) obj ) (defmethod login drawable-inline-array-tfrag ((obj drawable-inline-array-tfrag)) (dotimes (s5-0 (-> obj length)) - (login (-> obj data s5-0)) - ) + (login (-> obj data s5-0)) + ) obj - ) \ No newline at end of file + ) + +(defmethod mem-usage drawable-inline-array-tfrag ((obj drawable-inline-array-tfrag) (arg0 memory-usage-block) (arg1 int)) + (set! (-> arg0 length) (max 1 (-> arg0 length))) + (set! (-> arg0 data 0 name) (symbol->string 'drawable-group)) + (+! (-> arg0 data 0 count) 1) + (let ((v1-7 32)) + (+! (-> arg0 data 0 used) v1-7) + (+! (-> arg0 data 0 total) (logand -16 (+ v1-7 15))) + ) + (dotimes (s3-0 (-> obj length)) + (mem-usage (-> obj data s3-0) arg0 arg1) + ) + obj + ) + +(defmethod mem-usage drawable-tree-tfrag ((obj drawable-tree-tfrag) (arg0 memory-usage-block) (arg1 int)) + (set! (-> arg0 length) (max 1 (-> arg0 length))) + (set! (-> arg0 data 0 name) "drawable-group") + (+! (-> arg0 data 0 count) 1) + (let ((v1-6 (asize-of obj))) + (+! (-> arg0 data 0 used) v1-6) + (+! (-> arg0 data 0 total) (logand -16 (+ v1-6 15))) + ) + (when (nonzero? (-> obj time-of-day-pal)) + (set! (-> arg0 length) (max 9 (-> arg0 length))) + (set! (-> arg0 data 8 name) "tfragment-pal") + (+! (-> arg0 data 8 count) 1) + (let ((v1-18 (asize-of (-> obj time-of-day-pal)))) + (+! (-> arg0 data 8 used) v1-18) + (+! (-> arg0 data 8 total) (logand -16 (+ v1-18 15))) + ) + ) + (dotimes (s3-0 (-> obj length)) + (mem-usage (-> obj arrays s3-0) arg0 arg1) + ) + obj + ) + +(defmethod asize-of drawable-inline-array-tfrag ((obj drawable-inline-array-tfrag)) + (the-as int (+ (-> drawable-inline-array-tfrag size) (* (+ (-> obj length) -1) 64))) + ) + +;;;;;;;;;;;;;;;;;;;;;; +;; tfrag renderer +;;;;;;;;;;;;;;;;;;;;;; + +(define *tfrag-display-stats* #f) + +(define tfrag-vu1-block (new 'static 'vu-function :length #x0 :qlength #x0)) + +(defun tfrag-data-setup ((arg0 tfrag-data) (arg1 int)) + "Set up a tfrag-data. This is loaded to VU1 memory. Arg1 sets abe (alpha blend enable)" + (let ((v1-0 *math-camera*)) + (set-vector! (-> arg0 fog) (-> v1-0 pfog0) (-> v1-0 fog-min) (-> v1-0 fog-max) 3072.0) + (set-vector! (-> arg0 val) 0.5 1.0 2048.0 0.0) + (set-vector! (-> arg0 ambient) 1.0 1.0 1.0 1.0) + (cond + ((zero? *subdivide-draw-mode*) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + (set! (-> arg0 fangif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-fan) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 1) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type line-strip) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + (set! (-> arg0 fangif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type line-strip) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 2) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :fge #x1 :abe arg1) + ) + ) + (set! (-> arg0 fangif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-fan) :iip #x1 :fge #x1 :abe arg1) + ) + ) + ) + ) + (set! (-> arg0 strgif regs) (new 'static 'gif-tag-regs :regs0 (gif-reg-id st) :regs1 (gif-reg-id rgbaq) :regs2 (gif-reg-id xyzf2))) + (set! (-> arg0 fangif regs) (new 'static 'gif-tag-regs :regs0 (gif-reg-id st) :regs1 (gif-reg-id rgbaq) :regs2 (gif-reg-id xyzf2))) + (set! (-> arg0 adgif tag) (new 'static 'gif-tag64 :nloop #x5 :nreg #x1)) + (set! (-> arg0 adgif regs) (new 'static 'gif-tag-regs :regs0 (gif-reg-id a+d))) + (set! (-> arg0 hvdf-offset quad) (-> v1-0 hvdf-off quad)) + (set! (-> arg0 hmge-scale quad) (-> v1-0 hmge-scale quad)) + (set! (-> arg0 invh-scale quad) (-> v1-0 inv-hmge-scale quad)) + (set! (-> arg0 guard quad) (-> v1-0 guard quad)) + ) + (set-tfrag-dists! (-> arg0 dists)) + (none) + ) + +(defun add-tfrag-mtx-0 ((arg0 dma-buffer)) + (let* ((a1-0 4) + (v1-0 arg0) + (a0-1 (the-as object (-> v1-0 base))) + ) + (set! (-> (the-as dma-packet a0-1) dma) (new 'static 'dma-tag :id (dma-tag-id cnt) :qwc a1-0)) + (set! (-> (the-as dma-packet a0-1) vif0) (new 'static 'vif-tag :imm #x404 :cmd (vif-cmd stcycl))) + (set! (-> (the-as dma-packet a0-1) vif1) (new 'static 'vif-tag :imm #x5 :cmd (vif-cmd unpack-v4-32) :num a1-0)) + (set! (-> v1-0 base) (&+ (the-as pointer a0-1) 16)) + ) + (column-scale-matrix! (the-as matrix (-> arg0 base)) + (new 'static 'vector :x 1.0 :y 1.0 :z 1.0 :w 1.0) + (-> *math-camera* camera-temp) + ) + (&+! (-> arg0 base) 64) + (none) + ) + +(defun add-tfrag-mtx-1 ((arg0 dma-buffer)) + (let* ((a1-0 4) + (v1-0 arg0) + (a0-1 (the-as object (-> v1-0 base))) + ) + (set! (-> (the-as dma-packet a0-1) dma) (new 'static 'dma-tag :id (dma-tag-id cnt) :qwc a1-0)) + (set! (-> (the-as dma-packet a0-1) vif0) (new 'static 'vif-tag :imm #x404 :cmd (vif-cmd stcycl))) + (set! (-> (the-as dma-packet a0-1) vif1) (new 'static 'vif-tag :imm #x14d :cmd (vif-cmd unpack-v4-32) :num a1-0)) + (set! (-> v1-0 base) (&+ (the-as pointer a0-1) 16)) + ) + (column-scale-matrix! (the-as matrix (-> arg0 base)) (new 'static 'vector :x 1.0 :y 1.0 :z 1.0 :w 1.0) (-> *math-camera* camera-temp)) + (&+! (-> arg0 base) 64) + (none) + ) + +(defun add-tfrag-data ((arg0 dma-buffer) (arg1 int)) + (let* ((a2-0 14) + (v1-0 arg0) + (a0-1 (the-as object (-> v1-0 base))) + ) + (set! (-> (the-as dma-packet a0-1) dma) (new 'static 'dma-tag :id (dma-tag-id cnt) :qwc a2-0)) + (set! (-> (the-as dma-packet a0-1) vif0) (new 'static 'vif-tag :imm #x404 :cmd (vif-cmd stcycl))) + (set! (-> (the-as dma-packet a0-1) vif1) (new 'static 'vif-tag :imm #x290 :cmd (vif-cmd unpack-v4-32) :num a2-0)) + (set! (-> v1-0 base) (&+ (the-as pointer a0-1) 16)) + ) + (tfrag-data-setup (the-as tfrag-data (-> arg0 base)) arg1) + (&+! (-> arg0 base) 224) + (let ((v1-3 (the-as object (-> arg0 base)))) + (set! (-> (the-as dma-packet v1-3) dma) (new 'static 'dma-tag :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet v1-3) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-3) vif1) (new 'static 'vif-tag :cmd (vif-cmd mscal) :msk #x1)) + (set! (-> arg0 base) (&+ (the-as pointer v1-3) 16)) + ) + (none) + ) + +;;;;;;;;;;;;;;;;;;;;; +;; TFRAG Stats +;;;;;;;;;;;;;;;;;;;;; + +(define t-stat (new 'global 'tfrag-stats)) + +(defun tfrag-print-stats ((arg0 symbol)) + (when (and *tfrag-display-stats* (!= *master-mode* 'menu)) + (format arg0 "~%") + (format arg0 "tris: ~8d~%" (-> t-stat tris)) + (format + arg0 + "verts: ~8d~%" + (+ (-> t-stat base-verts) (-> t-stat level0-verts) (-> t-stat level1-verts)) + ) + (format arg0 " base: ~8d~%" (-> t-stat base-verts)) + (format arg0 " lev0: ~8d~%" (-> t-stat level0-verts)) + (format arg0 " lev1: ~8d~%" (-> t-stat level1-verts)) + (format arg0 "tfaces: ~8d~%" (-> t-stat tfaces)) + (format arg0 "tfrags: ~8d~%" (-> t-stat tfrags)) + (format arg0 "dtris: ~8d~%" (-> t-stat dtris)) + (format arg0 "dps: ~8d~%" (-> t-stat drawpoints)) + (format arg0 "strips: ~8d~%" (-> t-stat strips)) + (format arg0 "shaders:~8d~%" (-> t-stat dma-tex)) + (format arg0 "tri/str:~8f~%" (/ (the float (-> t-stat dtris)) (the float (-> t-stat strips)))) + (format arg0 "dma-cnt:~8d (~8d)~%" (-> t-stat dma-cnt) (* (-> t-stat dma-cnt) 32)) + (format arg0 "dma-dta:~8d (~8d)~%" (-> t-stat dma-dta) (/ (* 33 (-> t-stat dma-dta)) 10)) + (let ((f0-4 (* 32.0 (the float (-> t-stat dma-cnt)))) + (f1-5 (* 3.3 (the float (-> t-stat dma-dta)))) + (f2-3 (* 30.0 (the float (-> t-stat tfrags)))) + ) + (+ f0-4 f1-5 f2-3) + ) + ) + (none) + ) + +(set! (-> t-stat from) 0) +(set! (-> t-stat to) 0) +(set! (-> t-stat cnt) 0) + + +;;;;;;;;;;;;;;;;;;;;;;;; +;; buffer +;;;;;;;;;;;;;;;;;;;;;;;; + +(defun tfrag-init-buffer ((arg0 dma-buffer) (arg1 gs-test) (arg2 int)) + (dma-buffer-add-vu-function arg0 tfrag-vu1-block 1) + (let* ((v1-0 arg0) + (a0-2 (the-as object (-> v1-0 base))) + ) + (set! (-> (the-as dma-packet a0-2) dma) (new 'static 'dma-tag :qwc #x2 :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a0-2) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-2) vif1) (new 'static 'vif-tag :imm #x2 :cmd (vif-cmd direct) :msk #x1)) + (set! (-> v1-0 base) (&+ (the-as pointer a0-2) 16)) + ) + (let* ((v1-1 arg0) + (a0-4 (the-as object (-> v1-1 base))) + ) + (set! (-> (the-as gs-gif-tag a0-4) tag) (new 'static 'gif-tag64 :nloop #x1 :eop #x1 :nreg #x1)) + (set! (-> (the-as gs-gif-tag a0-4) regs) + (new 'static 'gif-tag-regs + :regs0 (gif-reg-id a+d) + :regs1 (gif-reg-id a+d) + :regs2 (gif-reg-id a+d) + :regs3 (gif-reg-id a+d) + :regs4 (gif-reg-id a+d) + :regs5 (gif-reg-id a+d) + :regs6 (gif-reg-id a+d) + :regs7 (gif-reg-id a+d) + :regs8 (gif-reg-id a+d) + :regs9 (gif-reg-id a+d) + :regs10 (gif-reg-id a+d) + :regs11 (gif-reg-id a+d) + :regs12 (gif-reg-id a+d) + :regs13 (gif-reg-id a+d) + :regs14 (gif-reg-id a+d) + :regs15 (gif-reg-id a+d) + ) + ) + (set! (-> v1-1 base) (&+ (the-as pointer a0-4) 16)) + ) + (let* ((v1-2 arg0) + (a0-6 (-> v1-2 base)) + ) + (set! (-> (the-as (pointer gs-test) a0-6)) arg1) + (set! (-> (the-as (pointer gs-reg64) a0-6) 1) (gs-reg64 test-1)) + (set! (-> v1-2 base) (&+ a0-6 16)) + ) + (add-tfrag-mtx-0 arg0) + (add-tfrag-mtx-1 arg0) + (add-tfrag-data arg0 arg2) + (let ((v1-3 (the-as object (-> arg0 base)))) + (set! (-> (the-as dma-packet v1-3) dma) (new 'static 'dma-tag :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet v1-3) vif0) (new 'static 'vif-tag :cmd (vif-cmd base))) + (set! (-> (the-as dma-packet v1-3) vif1) (new 'static 'vif-tag :imm #x148 :cmd (vif-cmd offset))) + (set! (-> arg0 base) (&+ (the-as pointer v1-3) 16)) + ) + (set! (-> *tfrag-work* last-call) (the-as uint 0)) + (set! (-> t-stat cnt) 0) + (set! (-> t-stat tris) 0) + (set! (-> t-stat tfaces) 0) + (set! (-> t-stat tfrags) 0) + (set! (-> t-stat dtris) 0) + (set! (-> t-stat dma-cnt) 0) + (set! (-> t-stat dma-dta) 0) + (set! (-> t-stat dma-tex) 0) + (set! (-> t-stat strips) 0) + (set! (-> t-stat drawpoints) 0) + (set! (-> t-stat base-verts) 0) + (set! (-> t-stat level0-verts) 0) + (set! (-> t-stat level1-verts) 0) + (none) + ) + +(defun tfrag-end-buffer ((arg0 dma-buffer)) + (let* ((v1-0 arg0) + (a1-0 (the-as object (-> v1-0 base))) + ) + (set! (-> (the-as dma-packet a1-0) dma) (new 'static 'dma-tag :qwc #x3 :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a1-0) vif0) (new 'static 'vif-tag :cmd (vif-cmd stmask))) + (set! (-> (the-as dma-packet a1-0) vif1) (new 'static 'vif-tag)) + (set! (-> v1-0 base) (&+ (the-as pointer a1-0) 16)) + ) + (let* ((v1-1 arg0) + (a0-1 (-> v1-1 base)) + ) + (set! (-> (the-as (pointer vif-tag) a0-1) 0) (the-as vif-tag (-> *tfrag-work* last-call))) + (set! (-> (the-as (pointer vif-tag) a0-1) 1) (new 'static 'vif-tag :cmd (vif-cmd flusha) :msk #x1)) + (set! (-> (the-as (pointer vif-tag) a0-1) 2) (new 'static 'vif-tag :cmd (vif-cmd stmod))) + (set! (-> (the-as (pointer vif-tag) a0-1) 3) (new 'static 'vif-tag :cmd (vif-cmd strow) :msk #x1)) + (set! (-> (the-as (pointer uint32) a0-1) 4) (the-as uint 0)) + (set! (-> (the-as (pointer uint32) a0-1) 5) (the-as uint 0)) + (set! (-> (the-as (pointer uint32) a0-1) 6) (the-as uint 0)) + (set! (-> (the-as (pointer uint32) a0-1) 7) (the-as uint 0)) + (set! (-> (the-as (pointer vif-tag) a0-1) 8) (new 'static 'vif-tag :cmd (vif-cmd base))) + (set! (-> (the-as (pointer vif-tag) a0-1) 9) (new 'static 'vif-tag :cmd (vif-cmd offset))) + (set! (-> (the-as (pointer vif-tag) a0-1) 10) (new 'static 'vif-tag :imm #x404 :cmd (vif-cmd stcycl))) + (set! (-> (the-as (pointer vif-tag) a0-1) 11) (new 'static 'vif-tag)) + (set! (-> v1-1 base) (&+ a0-1 48)) + ) + (none) + ) + +;;(define-extern draw-inline-array-tfrag (function pointer drawable-inline-array int dma-buffer none)) +(def-mips2c draw-inline-array-tfrag (function pointer drawable-inline-array int dma-buffer none)) + + +(defun tfrag-near-init-buffer ((arg0 dma-buffer) (arg1 gs-test) (arg2 int)) + (dma-buffer-add-vu-function arg0 tnear-vu1-block 1) + (let* ((v1-0 arg0) + (a0-2 (the-as object (-> v1-0 base))) + ) + (set! + (-> (the-as dma-packet a0-2) dma) + (new 'static 'dma-tag :qwc #x2 :id (dma-tag-id cnt)) + ) + (set! (-> (the-as dma-packet a0-2) vif0) (new 'static 'vif-tag)) + (set! + (-> (the-as dma-packet a0-2) vif1) + (new 'static 'vif-tag :imm #x2 :cmd (vif-cmd direct) :msk #x1) + ) + (set! (-> v1-0 base) (&+ (the-as pointer a0-2) 16)) + ) + (let* ((v1-1 arg0) + (a0-4 (the-as object (-> v1-1 base))) + ) + (set! + (-> (the-as gs-gif-tag a0-4) tag) + (new 'static 'gif-tag64 :nloop #x1 :eop #x1 :nreg #x1) + ) + (set! + (-> (the-as gs-gif-tag a0-4) regs) + (new 'static 'gif-tag-regs + :regs0 (gif-reg-id a+d) + :regs1 (gif-reg-id a+d) + :regs2 (gif-reg-id a+d) + :regs3 (gif-reg-id a+d) + :regs4 (gif-reg-id a+d) + :regs5 (gif-reg-id a+d) + :regs6 (gif-reg-id a+d) + :regs7 (gif-reg-id a+d) + :regs8 (gif-reg-id a+d) + :regs9 (gif-reg-id a+d) + :regs10 (gif-reg-id a+d) + :regs11 (gif-reg-id a+d) + :regs12 (gif-reg-id a+d) + :regs13 (gif-reg-id a+d) + :regs14 (gif-reg-id a+d) + :regs15 (gif-reg-id a+d) + ) + ) + (set! (-> v1-1 base) (&+ (the-as pointer a0-4) 16)) + ) + (let* ((v1-2 arg0) + (a0-6 (-> v1-2 base)) + ) + (set! (-> (the-as (pointer gs-test) a0-6)) arg1) + (set! (-> (the-as (pointer gs-reg64) a0-6) 1) (gs-reg64 test-1)) + (set! (-> v1-2 base) (&+ a0-6 16)) + ) + (add-tfrag-mtx-0 arg0) + (add-tfrag-mtx-1 arg0) + (add-tfrag-data arg0 arg2) + (let ((v1-3 (the-as object (-> arg0 base)))) + (set! + (-> (the-as dma-packet v1-3) dma) + (new 'static 'dma-tag :id (dma-tag-id cnt)) + ) + (set! + (-> (the-as dma-packet v1-3) vif0) + (new 'static 'vif-tag :cmd (vif-cmd base)) + ) + (set! + (-> (the-as dma-packet v1-3) vif1) + (new 'static 'vif-tag :imm #x148 :cmd (vif-cmd offset)) + ) + (set! (-> arg0 base) (&+ (the-as pointer v1-3) 16)) + ) + (set! (-> *tfrag-work* last-call) (the-as uint 0)) + (none) + ) + +;; definition for function tfrag-near-end-buffer +;; INFO: Return type mismatch symbol vs none. +(defun tfrag-near-end-buffer ((arg0 dma-buffer)) + (let* ((v1-0 arg0) + (a1-0 (the-as object (-> v1-0 base))) + ) + (set! + (-> (the-as dma-packet a1-0) dma) + (new 'static 'dma-tag :qwc #x3 :id (dma-tag-id cnt)) + ) + (set! + (-> (the-as dma-packet a1-0) vif0) + (new 'static 'vif-tag :cmd (vif-cmd stmask)) + ) + (set! (-> (the-as dma-packet a1-0) vif1) (new 'static 'vif-tag)) + (set! (-> v1-0 base) (&+ (the-as pointer a1-0) 16)) + ) + (let* ((v1-1 arg0) + (a0-1 (-> v1-1 base)) + ) + (set! (-> (the-as (pointer uint32) a0-1)) (-> *tfrag-work* last-call)) + (set! + (-> (the-as (pointer vif-tag) a0-1) 1) + (new 'static 'vif-tag :cmd (vif-cmd flusha) :msk #x1) + ) + (set! + (-> (the-as (pointer vif-tag) a0-1) 2) + (new 'static 'vif-tag :cmd (vif-cmd stmod)) + ) + (set! + (-> (the-as (pointer vif-tag) a0-1) 3) + (new 'static 'vif-tag :cmd (vif-cmd strow) :msk #x1) + ) + (set! (-> (the-as (pointer uint32) a0-1) 4) (the-as uint 0)) + (set! (-> (the-as (pointer uint32) a0-1) 5) (the-as uint 0)) + (set! (-> (the-as (pointer uint32) a0-1) 6) (the-as uint 0)) + (set! (-> (the-as (pointer uint32) a0-1) 7) (the-as uint 0)) + (set! + (-> (the-as (pointer vif-tag) a0-1) 8) + (new 'static 'vif-tag :cmd (vif-cmd base)) + ) + (set! + (-> (the-as (pointer vif-tag) a0-1) 9) + (new 'static 'vif-tag :cmd (vif-cmd offset)) + ) + (set! + (-> (the-as (pointer vif-tag) a0-1) 10) + (new 'static 'vif-tag :imm #x404 :cmd (vif-cmd stcycl)) + ) + (set! (-> (the-as (pointer vif-tag) a0-1) 11) (new 'static 'vif-tag)) + (set! (-> v1-1 base) (&+ a0-1 48)) + ) + (none) + ) + +(define-extern draw-inline-array-tfrag-near (function pointer drawable-inline-array int dma-buffer none)) + +(def-mips2c stats-tfrag-asm (function tfragment none)) diff --git a/goal_src/engine/gfx/time-of-day.gc b/goal_src/engine/gfx/time-of-day.gc index 1aad792c24..32ae8046e5 100644 --- a/goal_src/engine/gfx/time-of-day.gc +++ b/goal_src/engine/gfx/time-of-day.gc @@ -261,6 +261,12 @@ ;; TODO time-of-day-interp-colors ;; TODO time-of-day-interp-colors-scratch +; (defun time-of-day-interp-colors-scratch ((data (pointer rgba)) (pal time-of-day-palette) (ctxt mood-context)) +; ;; TODO +; (none) +; ) +(def-mips2c time-of-day-interp-colors-scratch (function (pointer rgba) time-of-day-palette mood-context none)) + (defun init-time-of-day-context ((arg0 time-of-day-context)) "Set up the title-light-group." diff --git a/goal_src/engine/gfx/vis/bsp.gc b/goal_src/engine/gfx/vis/bsp.gc index fca06a550f..4118cf176d 100644 --- a/goal_src/engine/gfx/vis/bsp.gc +++ b/goal_src/engine/gfx/vis/bsp.gc @@ -298,37 +298,41 @@ ) ) - ;; run the foreground system - (let ((s5-1 (-> *display* frames (-> *display* on-screen) frame))) - ;; 0 - (foreground-engine-execute - (-> obj level foreground-draw-engine 0) - s5-1 - (-> (scratchpad-object terrain-bsp :offset TERRAIN_BSP_SCRATCHPAD) lev-index) - 0 - ) - ;; 1 - (foreground-engine-execute - (-> obj level foreground-draw-engine 1) - s5-1 - (-> (scratchpad-object terrain-bsp :offset TERRAIN_BSP_SCRATCHPAD) lev-index) - 1 - ) - - ;; 2 - (foreground-engine-execute - (-> obj level foreground-draw-engine 2) - s5-1 - (-> (scratchpad-object terrain-bsp :offset TERRAIN_BSP_SCRATCHPAD) lev-index) - 2 - ) + ;; run the foreground system (0 check added) + (when (nonzero? foreground-engine-execute) + (let ((s5-1 (-> *display* frames (-> *display* on-screen) frame))) + ;; 0 + (foreground-engine-execute + (-> obj level foreground-draw-engine 0) + s5-1 + (-> (scratchpad-object terrain-bsp :offset TERRAIN_BSP_SCRATCHPAD) lev-index) + 0 + ) + ;; 1 + (foreground-engine-execute + (-> obj level foreground-draw-engine 1) + s5-1 + (-> (scratchpad-object terrain-bsp :offset TERRAIN_BSP_SCRATCHPAD) lev-index) + 1 + ) + + ;; 2 + (foreground-engine-execute + (-> obj level foreground-draw-engine 2) + s5-1 + (-> (scratchpad-object terrain-bsp :offset TERRAIN_BSP_SCRATCHPAD) lev-index) + 2 + ) + ) ) + (none) ) -(defmethod debug-draw bsp-header ((obj bsp-header) (arg0 drawable) (arg1 object)) +(defmethod debug-draw bsp-header ((obj bsp-header) (arg0 drawable) (arg1 display-frame)) "This is some sort of debugging thing. It calls debug-draw on the drawables with the scratchpad and vfs set up." + (let ((s4-0 (-> obj level))) ;; set up some stuff in the scratchpad (set! (-> (scratchpad-object terrain-bsp :offset TERRAIN_BSP_SCRATCHPAD) lev-index) (-> s4-0 index)) @@ -384,10 +388,8 @@ (none) ) -(defmethod dummy-14 bsp-header ((obj bsp-header)) - "Also some sort of debug thing. - Calls dummy-14 on children with visible list and vfs set up - but no terrain-bsp" +(defmethod collect-stats bsp-header ((obj bsp-header)) + "Collect drawing statistics" (let ((v1-0 (-> obj level)) (a2-0 (/ (+ (-> obj visible-list-length) 15) 16)) ) @@ -420,7 +422,7 @@ ) ) (if (nonzero? (-> obj drawable-trees)) - (dummy-14 (-> obj drawable-trees)) + (collect-stats (-> obj drawable-trees)) ) (none) ) diff --git a/goal_src/engine/level/level.gc b/goal_src/engine/level/level.gc index 6084cb3041..aae1692474 100644 --- a/goal_src/engine/level/level.gc +++ b/goal_src/engine/level/level.gc @@ -91,7 +91,6 @@ (defun add-bsp-drawable ((arg0 bsp-header) (arg1 level) (arg2 symbol) (arg3 display-frame)) "Draw a level!" - ;; do the draw (draw arg0 arg0 arg3) diff --git a/goal_src/examples/debug-draw-example.gc b/goal_src/examples/debug-draw-example.gc index f97b774bfd..bf8819e7c7 100644 --- a/goal_src/examples/debug-draw-example.gc +++ b/goal_src/examples/debug-draw-example.gc @@ -120,15 +120,16 @@ (defun wasd-camera-update () (let ((local-trans (new-stack-vector0)) (trans *wasd-camera-transform*) + (fast-mode (cpad-hold? 0 r2)) (pad-idx 0)) ;; circle/square move camera relative x (left and right) (set! (-> local-trans x) (cond ((cpad-hold? 0 circle) - -80.0 + -1600.0 ) ((cpad-hold? 0 square) - 80.0 + 1600.0 ) (else 0.0 @@ -155,6 +156,10 @@ ) (set! (-> local-trans w) 1.0) + (when fast-mode + (vector-float*! local-trans local-trans 10.) + ) + ;; rotate this into world frame (let ((inv-cam-rot (new-stack-vector0)) (cam-rot-mat (new-stack-matrix0))) @@ -174,25 +179,28 @@ (set! (-> trans trans w) 1.0) ;; global translation - (if (cpad-hold? 0 l1) - (set! (-> trans trans y) (+ 2000.0 (-> trans trans y))) - ) - (if (cpad-hold? 0 r1) - (set! (-> trans trans y) (+ -2000.0 (-> trans trans y))) - ) + (let ((diff (if fast-mode 10000.0 2000.0))) + (if (cpad-hold? 0 l1) + (set! (-> trans trans y) (+ diff (-> trans trans y))) + ) + (if (cpad-hold? 0 r1) + (set! (-> trans trans y) (+ (- diff) (-> trans trans y))) + ) + ) + ;; rotation (don't allow camera roll) (if (cpad-hold? 0 x) - (set! (-> trans rot x) (+ 54.13336 (-> trans rot x))) + (set! (-> trans rot x) (+ 200. (-> trans rot x))) ) (if (cpad-hold? 0 triangle) - (set! (-> trans rot x) (+ -54.13336 (-> trans rot x))) + (set! (-> trans rot x) (+ -200. (-> trans rot x))) ) (if (cpad-hold? 0 left) - (set! (-> trans rot y) (+ 150.13336 (-> trans rot y))) + (set! (-> trans rot y) (+ 300. (-> trans rot y))) ) (if (cpad-hold? 0 right) - (set! (-> trans rot y) (+ -150.13336 (-> trans rot y))) + (set! (-> trans rot y) (+ -300. (-> trans rot y))) ) (set! (-> trans scale x) 1.) @@ -264,9 +272,9 @@ ) (set! *display-profile* #t) -(set! *display-split-boxes* #t) -(set! *display-level-border* #t) -(set! *display-split-box-info* #t) +; (set! *display-split-boxes* #t) +; (set! *display-level-border* #t) +; (set! *display-split-box-info* #t) (set! *display-deci-count* #t) (defun text-randomizer () @@ -283,4 +291,52 @@ ) ) -(test-make-target) \ No newline at end of file +(defun update-subdivide-settings! ((settings subdivide-settings) (math-cam math-camera) (idx int)) + "Change the subdivide settings." + (set! (-> settings meters 0) (meters 20000.0)) + (set! (-> settings meters 1) (meters 20.0)) + (set! (-> settings meters 2) (meters 20.0)) + (set! (-> settings meters 3) (meters 20.0)) + (set! (-> settings meters 4) (meters 20.0)) + + ; (set! (-> settings meters 0) (-> settings far idx)) + ; (set! (-> settings meters 4) (-> settings close idx)) + ; (let ((f0-3 (* 0.14285715 (- (-> settings meters 0) (-> settings meters 4))))) + ; (set! (-> settings meters 3) (+ (-> settings meters 4) (* 0.5 f0-3))) + ; (set! (-> settings meters 2) (+ (-> settings meters 3) f0-3)) + ; (set! (-> settings meters 1) (+ (-> settings meters 2) (* 2.0 f0-3))) + ; ) + (let ((f0-7 (/ (-> math-cam inv-hmge-scale w) (-> math-cam d)))) + (dotimes (v1-5 5) + (set! (-> settings dist v1-5) (* f0-7 (-> settings meters v1-5))) + ) + ) + (set! (-> *tfrag-work* frag-dists x) (- (-> settings meters 0))) + (set! (-> *tfrag-work* frag-dists y) (- (-> settings meters 1))) + (set! (-> *tfrag-work* frag-dists z) (- (-> settings meters 2))) + (set! (-> *tfrag-work* frag-dists w) (- (-> settings meters 4))) + 0 + (none) + ) + +(test-make-target) + + +(define *debug-load-level* #f) +(defun load-slot-1 ((lev symbol)) + (load-state-want-levels (-> *load-state* want 0 name) lev) + (set! *debug-load-level* lev) + (make-function-process + process + (lambda () + (suspend) + (suspend) + + (while (!= (-> *level* data 1 status) 'loaded) + (format 0 "waiting...~%") + (suspend) + ) + (load-state-want-display-level *debug-load-level* #t) + ) + ) + ) \ No newline at end of file diff --git a/goal_src/game.gp b/goal_src/game.gp index 075cce0c73..05e52a9933 100644 --- a/goal_src/game.gp +++ b/goal_src/game.gp @@ -216,6 +216,7 @@ "out/iso/FIN.DGO" "out/iso/FIC.DGO" "out/iso/JUN.DGO" + "out/iso/MAI.DGO" ) ;;;;;;;;;;;;;;;;;;;;;;;; @@ -563,6 +564,59 @@ "village3-vis" ) +;;;;;;;;;;;;;;;;;;;;; +;; Spider Cave +;;;;;;;;;;;;;;;;;;;;; + +(cgo "MAI.DGO" "mai.gd") + +(goal-src-sequence + "levels/" + :deps ;; no idea what these depend on, make it depend on the whole engine + ("out/obj/default-menu.o" + ;;"out/obj/darkcave-obs.o" + ) + "maincave/cavecrystal-light.gc" + "maincave/maincave-obs.gc" + "maincave/maincave-part.gc" + "maincave/spiderwebs.gc" + "maincave/dark-crystal.gc" + "maincave/baby-spider.gc" + "maincave/mother-spider-h.gc" + "maincave/mother-spider-egg.gc" + "maincave/mother-spider-proj.gc" + "maincave/mother-spider.gc" + "maincave/gnawer.gc" + "maincave/driller-lurker.gc" + ) + +(copy-textures 1313 1315 1314 1312 767) + +(copy-gos + "baby-spider-ag-MAI" + "cavetrapdoor-ag-MAI" + "dark-crystal-ag" + "driller-lurker-ag" + "ecovalve-ag-MAI" + "gnawer-ag" + "launcherdoor-maincave-ag" + "maincavecam-ag" + "mother-spider-ag" + "plat-ag-MAI" + "spider-egg-ag-DAR-MAI" + "spiderwebs-ag" + "water-anim-maincave-ag" + "water-anim-maincave-water-ag" + "maincave-vis" + ) + +; (goal-src-sequence +; "levels/" +; :deps ;; no idea what these depend on, make it depend on the whole engine +; ("out/obj/default-menu.o" "out/obj/cavecrystal-light.o") +; "darkcave/darkcave-obs.gc" +; ) + ;;;;;;;;;;;;;;;;;;;;; ;; Final Boss ;;;;;;;;;;;;;;;;;;;;; diff --git a/goal_src/kernel/gkernel.gc b/goal_src/kernel/gkernel.gc index 7faa8b56e1..4e1fa4b3e8 100644 --- a/goal_src/kernel/gkernel.gc +++ b/goal_src/kernel/gkernel.gc @@ -125,8 +125,13 @@ (define *fake-scratchpad-stack* (new 'global 'array 'uint8 (* 16 1024))) ;; similar thing for the scratchpad data. - ;; in - (define *fake-scratchpad-data* (new 'global 'array 'uint8 (* 16 1024))) + + (let* ((mem (new 'global 'array 'uint8 (* (+ 16 8) 1024))) + (aligned (logand (&+ mem 8192) (lognot 8191))) + ) + (define *fake-scratchpad-data* aligned) + ) + (defmacro scratchpad-start() diff --git a/test/decompiler/reference/engine/debug/default-menu_REF.gc b/test/decompiler/reference/engine/debug/default-menu_REF.gc index 615611bf6a..56014c1168 100644 --- a/test/decompiler/reference/engine/debug/default-menu_REF.gc +++ b/test/decompiler/reference/engine/debug/default-menu_REF.gc @@ -454,7 +454,10 @@ ;; definition for function dm-vu1-user-toggle-pick-func (defun dm-vu1-user-toggle-pick-func ((arg0 int) (arg1 int)) (if (= arg1 4) - (set! *vu1-enable-user-menu* (logxor *vu1-enable-user-menu* arg0)) + (set! + *vu1-enable-user-menu* + (logxor *vu1-enable-user-menu* (the-as uint arg0)) + ) ) (logtest? *vu1-enable-user-menu* arg0) ) @@ -462,7 +465,7 @@ ;; definition for function dm-vu1-user-set-pick-func (defun dm-vu1-user-set-pick-func ((arg0 int) (arg1 int)) (if (= arg1 4) - (set! *vu1-enable-user-menu* arg0) + (set! *vu1-enable-user-menu* (the-as vu1-renderer-mask arg0)) ) (= *vu1-enable-user-menu* arg0) ) diff --git a/test/decompiler/reference/engine/draw/drawable-group_REF.gc b/test/decompiler/reference/engine/draw/drawable-group_REF.gc index ee52c2fd1a..85933b91ed 100644 --- a/test/decompiler/reference/engine/draw/drawable-group_REF.gc +++ b/test/decompiler/reference/engine/draw/drawable-group_REF.gc @@ -98,11 +98,11 @@ ;; definition for method 14 of type drawable-group ;; INFO: Return type mismatch int vs none. -(defmethod dummy-14 drawable-group ((obj drawable-group)) +(defmethod collect-stats drawable-group ((obj drawable-group)) (when (vis-cull (-> obj id)) (when (sphere-cull (-> obj bsphere)) (dotimes (s5-0 (-> obj length)) - (dummy-14 (-> obj data s5-0)) + (collect-stats (-> obj data s5-0)) ) ) ) @@ -115,7 +115,7 @@ (defmethod debug-draw drawable-group - ((obj drawable-group) (arg0 drawable) (arg1 object)) + ((obj drawable-group) (arg0 drawable) (arg1 display-frame)) (when (vis-cull (-> obj id)) (when (sphere-cull (-> obj bsphere)) (dotimes (s3-0 (-> obj length)) @@ -141,7 +141,3 @@ ) arg1 ) - - - - diff --git a/test/decompiler/reference/engine/draw/drawable-h_REF.gc b/test/decompiler/reference/engine/draw/drawable-h_REF.gc index 029208cd9a..60a1d710de 100644 --- a/test/decompiler/reference/engine/draw/drawable-h_REF.gc +++ b/test/decompiler/reference/engine/draw/drawable-h_REF.gc @@ -15,8 +15,8 @@ (collide-with-box (_type_ int collide-list) none 11) (collide-y-probe (_type_ int collide-list) none 12) (collide-ray (_type_ int collide-list) none 13) - (dummy-14 (_type_) none 14) - (debug-draw (_type_ drawable object) none 15) + (collect-stats (_type_) none 14) + (debug-draw (_type_ drawable display-frame) none 15) (dummy-16 (_type_ object object) object 16) (collect-ambients (_type_ sphere int ambient-list) none 17) ) diff --git a/test/decompiler/reference/engine/draw/drawable-inline-array_REF.gc b/test/decompiler/reference/engine/draw/drawable-inline-array_REF.gc index 6054614928..69ee179e68 100644 --- a/test/decompiler/reference/engine/draw/drawable-inline-array_REF.gc +++ b/test/decompiler/reference/engine/draw/drawable-inline-array_REF.gc @@ -23,7 +23,7 @@ ;; definition for method 14 of type drawable-inline-array ;; INFO: Return type mismatch int vs none. -(defmethod dummy-14 drawable-inline-array ((obj drawable-inline-array)) +(defmethod collect-stats drawable-inline-array ((obj drawable-inline-array)) 0 (none) ) @@ -33,7 +33,7 @@ (defmethod debug-draw drawable-inline-array - ((obj drawable-inline-array) (arg0 drawable) (arg1 object)) + ((obj drawable-inline-array) (arg0 drawable) (arg1 display-frame)) 0 (none) ) diff --git a/test/decompiler/reference/engine/gfx/hw/vu1-user-h_REF.gc b/test/decompiler/reference/engine/gfx/hw/vu1-user-h_REF.gc index 87480789b9..cc9ff8cdef 100644 --- a/test/decompiler/reference/engine/gfx/hw/vu1-user-h_REF.gc +++ b/test/decompiler/reference/engine/gfx/hw/vu1-user-h_REF.gc @@ -1,11 +1,33 @@ ;;-*-Lisp-*- (in-package goal) -;; definition for symbol *vu1-enable-user-menu*, type int -(define *vu1-enable-user-menu* #x1ffff8) +;; definition for symbol *vu1-enable-user-menu*, type vu1-renderer-mask +(define + *vu1-enable-user-menu* + (vu1-renderer-mask + sky + ocean + ocean-wave + tfrag + tie-near + tie + generic + merc + shrubbery + shrub-near + billboard + trans-shrubbery + trans-frag + sprite + shadow + depth-cue + nineteen + twenty + ) + ) -;; definition for symbol *vu1-enable-user*, type int -(define *vu1-enable-user* 0) +;; definition for symbol *vu1-enable-user*, type vu1-renderer-mask +(define *vu1-enable-user* (the-as vu1-renderer-mask 0)) ;; definition of type dma-foreground-sink (deftype dma-foreground-sink (basic) diff --git a/test/decompiler/reference/engine/gfx/merc/merc_REF.gc b/test/decompiler/reference/engine/gfx/merc/merc_REF.gc index e432fd621d..9a301255e1 100644 --- a/test/decompiler/reference/engine/gfx/merc/merc_REF.gc +++ b/test/decompiler/reference/engine/gfx/merc/merc_REF.gc @@ -665,7 +665,7 @@ ;; definition for function merc-vu1-init-buffers ;; INFO: Return type mismatch int vs none. (defun merc-vu1-init-buffers () - (when (logtest? *vu1-enable-user* 1024) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask merc)) (merc-vu1-init-buffer (bucket-id bucket-10) (new 'static 'gs-test diff --git a/test/decompiler/reference/engine/gfx/tfrag/subdivide_REF.gc b/test/decompiler/reference/engine/gfx/tfrag/subdivide_REF.gc index 18713dbb4e..3df2b2c361 100644 --- a/test/decompiler/reference/engine/gfx/tfrag/subdivide_REF.gc +++ b/test/decompiler/reference/engine/gfx/tfrag/subdivide_REF.gc @@ -183,29 +183,29 @@ ) ;; definition for function set-tfrag-dists! -;; INFO: Return type mismatch (pointer float) vs none. -(defun set-tfrag-dists! ((arg0 (pointer float))) +;; INFO: Return type mismatch tfrag-dists vs none. +(defun set-tfrag-dists! ((arg0 tfrag-dists)) (let ((f2-0 (-> *subdivide-settings* dist 0)) (f1-0 (-> *subdivide-settings* dist 1)) (f0-0 (-> *subdivide-settings* dist 2)) ) - (set! (-> arg0 3) f2-0) - (set! (-> arg0 7) f1-0) + (set! (-> arg0 k0s 0 w) f2-0) + (set! (-> arg0 k0s 1 w) f1-0) (let ((f4-1 (/ 1.0 (- f2-0 f1-0))) (f3-2 (/ 1.0 (- f1-0 f0-0))) ) - (set! (-> arg0 1) (- f4-1)) - (set! (-> arg0 5) (- f3-2)) - (set! (-> arg0 0) (* 0.5 f4-1)) - (set! (-> arg0 4) (* 0.5 f3-2)) + (set! (-> arg0 k0s 0 y) (- f4-1)) + (set! (-> arg0 k0s 1 y) (- f3-2)) + (set! (-> arg0 k0s 0 x) (* 0.5 f4-1)) + (set! (-> arg0 k0s 1 x) (* 0.5 f3-2)) (let ((f2-1 (* f2-0 f4-1)) (f5-7 (* f1-0 f3-2)) ) - (set! (-> arg0 9) f2-1) - (set! (-> arg0 13) f5-7) + (set! (-> arg0 k1s 0 y) f2-1) + (set! (-> arg0 k1s 1 y) f5-7) ) - (set! (-> arg0 8) (* -0.5 f4-1 f1-0)) - (set! (-> arg0 12) (* -0.5 f3-2 f0-0)) + (set! (-> arg0 k1s 0 x) (* -0.5 f4-1 f1-0)) + (set! (-> arg0 k1s 1 x) (* -0.5 f3-2 f0-0)) ) ) (none) diff --git a/test/decompiler/reference/engine/gfx/tfrag/tfrag-h_REF.gc b/test/decompiler/reference/engine/gfx/tfrag/tfrag-h_REF.gc index 42ef7d0f55..0515a70501 100644 --- a/test/decompiler/reference/engine/gfx/tfrag/tfrag-h_REF.gc +++ b/test/decompiler/reference/engine/gfx/tfrag/tfrag-h_REF.gc @@ -21,8 +21,8 @@ ;; definition of type tfragment-debug-data (deftype tfragment-debug-data (structure) - ((stats tfragment-stats :inline :offset-assert 0) - (debug-lines basic :offset-assert 16) + ((stats tfragment-stats :inline :offset-assert 0) + (debug-lines (array vector-array) :offset-assert 16) ) :method-count-assert 9 :size-assert #x14 @@ -64,7 +64,7 @@ (dma-level-0 uint32 :offset 32) (dma-base uint32 :offset 36) (dma-level-1 uint32 :offset 40) - (dma-qwc uint32 4 :offset 44) + (dma-qwc uint8 4 :offset 44) (shader (inline-array adgif-shader) :offset 48) (num-shaders uint8 :offset 52) (num-base-colors uint8 :offset 53) @@ -135,7 +135,8 @@ ;; definition of type drawable-tree-tfrag (deftype drawable-tree-tfrag (drawable-tree) - ((time-of-day-pal time-of-day-palette :offset 12) + ((time-of-day-pal time-of-day-palette :offset 12) + (arrays drawable-inline-array 1 :offset 32) ) :method-count-assert 18 :size-assert #x24 @@ -184,10 +185,10 @@ ;; definition of type tfrag-dists (deftype tfrag-dists (structure) - ((data uint32 16 :offset-assert 0) - (vector vector 4 :inline :offset 0) - (k0s uint128 2 :offset 0) - (k1s uint128 2 :offset 32) + ((data uint32 16 :offset-assert 0) + (vector vector 4 :inline :offset 0) + (k0s vector 2 :inline :offset 0) + (k1s vector 2 :inline :offset 32) ) :method-count-assert 9 :size-assert #x40 @@ -197,9 +198,9 @@ ;; definition for method 3 of type tfrag-dists (defmethod inspect tfrag-dists ((obj tfrag-dists)) (format #t "[~8x] ~A~%" obj 'tfrag-dists) - (format #t "~Tdata[16] @ #x~X~%" (-> obj data)) - (format #t "~Tvector[4] @ #x~X~%" (-> obj data)) - (format #t "~Tk0s[2] @ #x~X~%" (-> obj data)) + (format #t "~Tdata[16] @ #x~X~%" (-> obj k0s)) + (format #t "~Tvector[4] @ #x~X~%" (-> obj k0s)) + (format #t "~Tk0s[2] @ #x~X~%" (-> obj k0s)) (format #t "~Tk1s[2] @ #x~X~%" (-> obj k1s)) obj ) @@ -210,9 +211,9 @@ (vector vector 14 :inline :offset 0) (fog vector :inline :offset 0) (val vector :inline :offset 16) - (strgif qword :inline :offset 32) - (fangif qword :inline :offset 48) - (adgif qword :inline :offset 64) + (strgif gs-gif-tag :inline :offset 32) + (fangif gs-gif-tag :inline :offset 48) + (adgif gs-gif-tag :inline :offset 64) (hvdf-offset vector :inline :offset 80) (hmge-scale vector :inline :offset 96) (invh-scale vector :inline :offset 112) @@ -230,9 +231,9 @@ ;; definition for method 3 of type tfrag-data (defmethod inspect tfrag-data ((obj tfrag-data)) (format #t "[~8x] ~A~%" obj 'tfrag-data) - (format #t "~Tdata[56] @ #x~X~%" (-> obj data)) - (format #t "~Tvector[14] @ #x~X~%" (-> obj data)) - (format #t "~Tfog: #~%" (-> obj data)) + (format #t "~Tdata[56] @ #x~X~%" (-> obj fog)) + (format #t "~Tvector[14] @ #x~X~%" (-> obj fog)) + (format #t "~Tfog: #~%" (-> obj fog)) (format #t "~Tval: #~%" (-> obj val)) (format #t "~Tstrgif: #~%" (-> obj strgif)) (format #t "~Tfangif: #~%" (-> obj fangif)) diff --git a/test/decompiler/reference/engine/gfx/vis/bsp_REF.gc b/test/decompiler/reference/engine/gfx/vis/bsp_REF.gc index 3c4c95b5f9..bac83641c8 100644 --- a/test/decompiler/reference/engine/gfx/vis/bsp_REF.gc +++ b/test/decompiler/reference/engine/gfx/vis/bsp_REF.gc @@ -280,7 +280,10 @@ ;; definition for method 15 of type bsp-header ;; INFO: Return type mismatch profile-frame vs none. -(defmethod debug-draw bsp-header ((obj bsp-header) (arg0 drawable) (arg1 object)) +(defmethod + debug-draw + bsp-header + ((obj bsp-header) (arg0 drawable) (arg1 display-frame)) (rlet ((vf16 :class vf) (vf17 :class vf) (vf18 :class vf) @@ -353,7 +356,7 @@ ) ;; definition for method 14 of type bsp-header -(defmethod dummy-14 bsp-header ((obj bsp-header)) +(defmethod collect-stats bsp-header ((obj bsp-header)) (rlet ((vf16 :class vf) (vf17 :class vf) (vf18 :class vf) @@ -400,7 +403,7 @@ (.lvf vf31 (&-> at-0 camera-temp vector 3 quad)) ) (if (nonzero? (-> obj drawable-trees)) - (dummy-14 (-> obj drawable-trees)) + (collect-stats (-> obj drawable-trees)) ) (none) ) diff --git a/test/decompiler/test_VuDisasm.cpp b/test/decompiler/test_VuDisasm.cpp index c418ef7f03..5b3d9343f6 100644 --- a/test/decompiler/test_VuDisasm.cpp +++ b/test/decompiler/test_VuDisasm.cpp @@ -30,133 +30,133 @@ std::string get_expected(const std::string& name) { TEST(VuDisasm, SpriteDistort) { auto data = get_test_data("sprite-distort"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("sprite-distort")); } TEST(VuDisasm, BackgroundVu0) { auto data = get_test_data("background-vu0"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU0); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("background-vu0")); } TEST(VuDisasm, CollideVu0) { auto data = get_test_data("collide-vu0"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU0); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("collide-vu0")); } TEST(VuDisasm, BonesVu0) { auto data = get_test_data("bones-vu0"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU0); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("bones-vu0")); } TEST(VuDisasm, ShadowVu0) { auto data = get_test_data("shadow-vu0"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU0); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("shadow-vu0")); } TEST(VuDisasm, OceanVu0) { auto data = get_test_data("ocean-vu0"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU0); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("ocean-vu0")); } TEST(VuDisasm, GenericVu0) { auto data = get_test_data("generic-vu0"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU0); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("generic-vu0")); } TEST(VuDisasm, MercnericVu0) { auto data = get_test_data("mercneric-vu0"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU0); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("mercneric-vu0")); } TEST(VuDisasm, OceanTexture) { auto data = get_test_data("ocean-texture"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("ocean-texture")); } TEST(VuDisasm, Sky) { auto data = get_test_data("sky"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("sky")); } TEST(VuDisasm, Shrub) { auto data = get_test_data("shrub"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("shrub")); } TEST(VuDisasm, Shadow) { auto data = get_test_data("shadow"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("shadow")); } TEST(VuDisasm, TNear) { auto data = get_test_data("tnear"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("tnear")); } TEST(VuDisasm, Sprite) { auto data = get_test_data("sprite"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("sprite")); } TEST(VuDisasm, Tie) { auto data = get_test_data("tie"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("tie")); } TEST(VuDisasm, Generic) { auto data = get_test_data("generic"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("generic")); } TEST(VuDisasm, TieNear) { auto data = get_test_data("tie-near"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("tie-near")); } TEST(VuDisasm, Tfrag) { auto data = get_test_data("tfrag"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("tfrag")); } TEST(VuDisasm, Merc) { auto data = get_test_data("merc"); - VuDisassembler disasm; + VuDisassembler disasm(VuDisassembler::VuKind::VU1); auto prog = disasm.disassemble(data.data(), data.size() * 4, false); EXPECT_EQ(disasm.to_string(prog), get_expected("merc")); } \ No newline at end of file diff --git a/test/decompiler/vu_reference/generic-result.txt b/test/decompiler/vu_reference/generic-result.txt index 9ac9e42bd1..6324195028 100644 --- a/test/decompiler/vu_reference/generic-result.txt +++ b/test/decompiler/vu_reference/generic-result.txt @@ -1,26 +1,26 @@ + b L4 | nop + nop | nop b L5 | nop nop | nop - b L6 | nop + b L84 | nop + nop | nop + b L33 | nop + nop | nop + b L8 | nop nop | nop b L1 | nop nop | nop - b L34 | nop + b L6 | nop nop | nop - b L9 | nop - nop | nop - b L2 | nop - nop | nop - b L7 | nop - nop | nop -L2: +L1: iaddiu vi02, vi00, 0x381 | nop lq.xyzw vf31, 7(vi02) | nop isubiu vi02, vi13, 0x363 | addw.z vf22, vf00, vf00 iaddiu vi13, vi13, 0x1e | addw.z vf23, vf00, vf00 - ibne vi00, vi02, L3 | addw.z vf24, vf00, vf00 + ibne vi00, vi02, L2 | addw.z vf24, vf00, vf00 lq.xyzw vf03, 899(vi00) | addw.z vf25, vf00, vf00 iaddiu vi13, vi00, 0x345 | nop -L3: +L2: ilw.x vi01, 5(vi13) | nop iaddi vi07, vi12, 0xa | nop iaddi vi05, vi01, -0x1 | nop @@ -35,7 +35,7 @@ L3: sq.xyzw vf03, 4(vi13) | nop isw.w vi01, 5(vi13) | nop isw.w vi00, 6(vi13) | nop -L4: +L3: lq.xyz vf13, 0(vi07) | nop lq.xyz vf14, 3(vi07) | nop lq.xyz vf15, 6(vi07) | nop @@ -59,11 +59,11 @@ L4: sq.xyzw vf18, -11(vi07) | nop sq.xyzw vf19, -8(vi07) | nop sq.xyzw vf20, -5(vi07) | nop - ibgez vi05, L4 | nop + ibgez vi05, L3 | nop sq.xyzw vf21, -2(vi07) | nop - b L17 | nop + b L16 | nop nop | nop -L5: +L4: iaddiu vi01, vi00, 0x381 | nop lq.xyzw vf01, 0(vi01) | nop lq.xyzw vf02, 1(vi01) | nop @@ -72,7 +72,7 @@ L5: lq.xyzw vf05, 4(vi01) | nop lq.xyzw vf06, 5(vi01) | nop lq.xyzw vf07, 6(vi01) | nop -L6: +L5: iaddiu vi13, vi00, 0x363 | nop iaddi vi02, vi13, 0x5 | nop iaddi vi12, vi00, 0x0 | nop @@ -96,7 +96,7 @@ L6: isw.z vi01, 935(vi00) | nop iaddiu vi01, vi00, 0x419 | nop :e isw.z vi01, 942(vi00) | nop -L7: +L6: iaddiu vi01, vi00, 0x381 | nop ilw.z vi13, 9(vi01) | nop ilw.w vi12, 9(vi01) | nop @@ -105,22 +105,22 @@ L7: isw.y vi02, 9(vi01) | nop isubiu vi02, vi13, 0x363 | nop iaddiu vi13, vi13, 0x1e | nop - ibne vi00, vi02, L8 | nop + ibne vi00, vi02, L7 | nop isubiu vi01, vi01, 0x100 | nop iaddiu vi13, vi00, 0x345 | nop -L8: +L7: iaddi vi03, vi13, 0x7 | nop iaddi vi03, vi13, 0x7 | nop isw.x vi03, 906(vi00) | nop jr vi15 | nop isw.y vi03, 906(vi00) | nop -L9: +L8: isubiu vi02, vi13, 0x363 | addw.z vf22, vf00, vf00 iaddiu vi13, vi13, 0x1e | addw.z vf23, vf00, vf00 - ibne vi00, vi02, L10 | addw.z vf24, vf00, vf00 + ibne vi00, vi02, L9 | addw.z vf24, vf00, vf00 nop | addw.z vf25, vf00, vf00 iaddiu vi13, vi00, 0x345 | nop -L10: +L9: iaddi vi03, vi13, 0x7 | nop ilw.w vi01, 5(vi13) | nop isw.x vi03, 906(vi00) | nop @@ -164,68 +164,68 @@ L10: nop | mul.xyz vf13, vf13, Q nop | mul.xyz vf19, vf19, Q iaddi vi14, vi14, 0x9 | maxy.w vf12, vf12, vf01 -L11: +L10: div Q, vf01.x, vf14.w | itof12.xyz vf20, vf24 - ibeq vi02, vi06, L12 | add.xyzw vf13, vf13, vf04 + ibeq vi02, vi06, L11 | add.xyzw vf13, vf13, vf04 mtir vi05, vf25.x | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf12, vf12, vf01 -L12: +L11: iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf17 lq.xy vf22, 0(vi10) | madday.xyzw ACC, vf09, vf17 lq.xyz vf16, 2(vi10) | miniz.w vf13, vf13, vf01 iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 mfir.x vf25, vi09 | maddz.xyzw vf15, vf10, vf17 sq.xyzw vf18, -12(vi10) | mul.xyz vf14, vf14, Q - ibeq vi14, vi10, L16 | mul.xyz vf20, vf20, Q + ibeq vi14, vi10, L15 | mul.xyz vf20, vf20, Q sq.xyzw vf12, -10(vi10) | maxy.w vf13, vf13, vf01 div Q, vf01.x, vf15.w | itof12.xyz vf21, vf25 - ibeq vi03, vi07, L13 | add.xyzw vf14, vf14, vf04 + ibeq vi03, vi07, L12 | add.xyzw vf14, vf14, vf04 mtir vi02, vf22.x | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf13, vf13, vf01 -L13: +L12: iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 lq.xy vf23, 0(vi10) | madday.xyzw ACC, vf09, vf16 lq.xyz vf17, 2(vi10) | miniz.w vf14, vf14, vf01 iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 mfir.x vf22, vi06 | maddz.xyzw vf12, vf10, vf16 sq.xyzw vf19, -12(vi10) | mul.xyz vf15, vf15, Q - ibeq vi14, vi10, L16 | mul.xyz vf21, vf21, Q + ibeq vi14, vi10, L15 | mul.xyz vf21, vf21, Q sq.xyzw vf13, -10(vi10) | maxy.w vf14, vf14, vf01 div Q, vf01.x, vf12.w | itof12.xyz vf18, vf22 - ibeq vi04, vi08, L14 | add.xyzw vf15, vf15, vf04 + ibeq vi04, vi08, L13 | add.xyzw vf15, vf15, vf04 mtir vi03, vf23.x | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf14, vf14, vf01 -L14: +L13: iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf17 lq.xy vf24, 0(vi10) | madday.xyzw ACC, vf09, vf17 lq.xyz vf16, 2(vi10) | miniz.w vf15, vf15, vf01 iand vi07, vi03, vi11 | ftoi4.xyzw vf14, vf14 mfir.x vf23, vi07 | maddz.xyzw vf13, vf10, vf17 sq.xyzw vf20, -12(vi10) | mul.xyz vf12, vf12, Q - ibeq vi14, vi10, L16 | mul.xyz vf18, vf18, Q + ibeq vi14, vi10, L15 | mul.xyz vf18, vf18, Q sq.xyzw vf14, -10(vi10) | maxy.w vf15, vf15, vf01 div Q, vf01.x, vf13.w | itof12.xyz vf19, vf23 - ibeq vi05, vi09, L15 | add.xyzw vf12, vf12, vf04 + ibeq vi05, vi09, L14 | add.xyzw vf12, vf12, vf04 mtir vi04, vf24.x | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf15, vf15, vf01 -L15: +L14: iaddi vi10, vi10, 0x3 | maddax.xyzw ACC, vf08, vf16 lq.xy vf25, 0(vi10) | madday.xyzw ACC, vf09, vf16 lq.xyz vf17, 2(vi10) | miniz.w vf12, vf12, vf01 iand vi08, vi04, vi11 | ftoi4.xyzw vf15, vf15 mfir.x vf24, vi08 | maddz.xyzw vf14, vf10, vf16 sq.xyzw vf21, -12(vi10) | mul.xyz vf13, vf13, Q - ibne vi14, vi10, L11 | mul.xyz vf19, vf19, Q + ibne vi14, vi10, L10 | mul.xyz vf19, vf19, Q sq.xyzw vf15, -10(vi10) | maxy.w vf12, vf12, vf01 -L16: - b L83 | nop +L15: + b L82 | nop ilw.w vi12, 906(vi00) | nop isubiu vi02, vi13, 0x363 | addw.z vf22, vf00, vf00 iaddiu vi13, vi13, 0x1e | addw.z vf23, vf00, vf00 - ibne vi00, vi02, L17 | addw.z vf24, vf00, vf00 + ibne vi00, vi02, L16 | addw.z vf24, vf00, vf00 nop | addw.z vf25, vf00, vf00 iaddiu vi13, vi00, 0x345 | nop -L17: +L16: iaddi vi03, vi13, 0x7 | nop ilw.w vi01, 5(vi13) | nop isw.x vi03, 906(vi00) | nop @@ -280,10 +280,10 @@ L17: nop | maxy.w vf12, vf12, vf01 nop | clipw.xyz vf26, vf26 nop | mul.xyz vf19, vf19, Q - ibeq vi02, vi06, L18 | itof12.xyz vf20, vf24 + ibeq vi02, vi06, L17 | itof12.xyz vf20, vf24 div Q, vf01.x, vf14.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf12, vf12, vf01 -L18: +L17: nop | add.xyzw vf13, vf13, vf04 nop | maddax.xyzw ACC, vf08, vf16 mtir vi05, vf25.x | madday.xyzw ACC, vf09, vf16 @@ -293,12 +293,12 @@ L18: iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 mfir.x vf25, vi09 | mul.xyz vf14, vf14, Q sq.xyzw vf18, -12(vi10) | maxy.w vf13, vf13, vf01 - ibeq vi14, vi10, L29 | clipw.xyz vf27, vf27 + ibeq vi14, vi10, L28 | clipw.xyz vf27, vf27 sq.xyzw vf12, -10(vi10) | mul.xyz vf20, vf20, Q - ibeq vi03, vi07, L19 | itof12.xyz vf21, vf25 + ibeq vi03, vi07, L18 | itof12.xyz vf21, vf25 div Q, vf01.x, vf15.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf13, vf13, vf01 -L19: +L18: nop | add.xyzw vf14, vf14, vf04 nop | maddax.xyzw ACC, vf08, vf16 mtir vi02, vf22.x | madday.xyzw ACC, vf09, vf16 @@ -308,95 +308,95 @@ L19: iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 mfir.x vf22, vi06 | mul.xyz vf15, vf15, Q sq.xyzw vf19, -12(vi10) | maxy.w vf14, vf14, vf01 - ibeq vi14, vi10, L29 | clipw.xyz vf28, vf28 + ibeq vi14, vi10, L28 | clipw.xyz vf28, vf28 sq.xyzw vf13, -10(vi10) | mul.xyz vf21, vf21, Q -L20: - ibeq vi04, vi08, L21 | itof12.xyz vf18, vf22 +L19: + ibeq vi04, vi08, L20 | itof12.xyz vf18, vf22 div Q, vf01.x, vf12.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf14, vf14, vf01 -L21: +L20: fcand vi01, 0x3ffff | add.xyzw vf15, vf15, vf04 - ibne vi00, vi01, L32 | maddax.xyzw ACC, vf08, vf16 + ibne vi00, vi01, L31 | maddax.xyzw ACC, vf08, vf16 mtir vi03, vf23.x | madday.xyzw ACC, vf09, vf16 -L22: +L21: iaddi vi10, vi10, 0x3 | maddz.xyzw vf13, vf10, vf16 lq.xy vf24, 0(vi10) | miniz.w vf15, vf15, vf01 lq.xyz vf16, 2(vi10) | mul.xyzw vf26, vf12, vf05 iand vi07, vi03, vi11 | ftoi4.xyzw vf14, vf14 mfir.x vf23, vi07 | mul.xyz vf12, vf12, Q sq.xyzw vf20, -12(vi10) | maxy.w vf15, vf15, vf01 - ibeq vi14, vi10, L29 | clipw.xyz vf29, vf29 + ibeq vi14, vi10, L28 | clipw.xyz vf29, vf29 sq.xyzw vf14, -10(vi10) | mul.xyz vf18, vf18, Q - ibeq vi05, vi09, L23 | itof12.xyz vf19, vf23 + ibeq vi05, vi09, L22 | itof12.xyz vf19, vf23 div Q, vf01.x, vf13.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf15, vf15, vf01 -L23: +L22: fcand vi01, 0x3ffff | add.xyzw vf12, vf12, vf04 - ibne vi00, vi01, L33 | maddax.xyzw ACC, vf08, vf16 + ibne vi00, vi01, L32 | maddax.xyzw ACC, vf08, vf16 mtir vi04, vf24.x | madday.xyzw ACC, vf09, vf16 -L24: +L23: iaddi vi10, vi10, 0x3 | maddz.xyzw vf14, vf10, vf16 lq.xy vf25, 0(vi10) | miniz.w vf12, vf12, vf01 lq.xyz vf16, 2(vi10) | mul.xyzw vf27, vf13, vf05 iand vi08, vi04, vi11 | ftoi4.xyzw vf15, vf15 mfir.x vf24, vi08 | mul.xyz vf13, vf13, Q sq.xyzw vf21, -12(vi10) | maxy.w vf12, vf12, vf01 - ibeq vi14, vi10, L29 | clipw.xyz vf26, vf26 + ibeq vi14, vi10, L28 | clipw.xyz vf26, vf26 sq.xyzw vf15, -10(vi10) | mul.xyz vf19, vf19, Q - ibeq vi02, vi06, L25 | itof12.xyz vf20, vf24 + ibeq vi02, vi06, L24 | itof12.xyz vf20, vf24 div Q, vf01.x, vf14.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf12, vf12, vf01 -L25: +L24: fcand vi01, 0x3ffff | add.xyzw vf13, vf13, vf04 - ibne vi00, vi01, L30 | maddax.xyzw ACC, vf08, vf16 + ibne vi00, vi01, L29 | maddax.xyzw ACC, vf08, vf16 mtir vi05, vf25.x | madday.xyzw ACC, vf09, vf16 -L26: +L25: iaddi vi10, vi10, 0x3 | maddz.xyzw vf15, vf10, vf16 lq.xy vf22, 0(vi10) | miniz.w vf13, vf13, vf01 lq.xyz vf16, 2(vi10) | mul.xyzw vf28, vf14, vf05 iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 mfir.x vf25, vi09 | mul.xyz vf14, vf14, Q sq.xyzw vf18, -12(vi10) | maxy.w vf13, vf13, vf01 - ibeq vi14, vi10, L29 | clipw.xyz vf27, vf27 + ibeq vi14, vi10, L28 | clipw.xyz vf27, vf27 sq.xyzw vf12, -10(vi10) | mul.xyz vf20, vf20, Q - ibeq vi03, vi07, L27 | itof12.xyz vf21, vf25 + ibeq vi03, vi07, L26 | itof12.xyz vf21, vf25 div Q, vf01.x, vf15.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf13, vf13, vf01 -L27: +L26: fcand vi01, 0x3ffff | add.xyzw vf14, vf14, vf04 - ibne vi00, vi01, L31 | maddax.xyzw ACC, vf08, vf16 + ibne vi00, vi01, L30 | maddax.xyzw ACC, vf08, vf16 mtir vi02, vf22.x | madday.xyzw ACC, vf09, vf16 -L28: +L27: iaddi vi10, vi10, 0x3 | maddz.xyzw vf12, vf10, vf16 lq.xy vf23, 0(vi10) | miniz.w vf14, vf14, vf01 lq.xyz vf16, 2(vi10) | mul.xyzw vf29, vf15, vf05 iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 mfir.x vf22, vi06 | mul.xyz vf15, vf15, Q sq.xyzw vf19, -12(vi10) | maxy.w vf14, vf14, vf01 - ibne vi14, vi10, L20 | clipw.xyz vf28, vf28 + ibne vi14, vi10, L19 | clipw.xyz vf28, vf28 sq.xyzw vf13, -10(vi10) | mul.xyz vf21, vf21, Q -L29: - b L83 | nop +L28: + b L82 | nop ilw.w vi12, 906(vi00) | nop +L29: + b L25 | addw.w vf12, vf12, vf01 + nop | nop L30: - b L26 | addw.w vf12, vf12, vf01 + b L27 | addw.w vf13, vf13, vf01 nop | nop L31: - b L28 | addw.w vf13, vf13, vf01 + b L21 | addw.w vf14, vf14, vf01 nop | nop L32: - b L22 | addw.w vf14, vf14, vf01 + b L23 | addw.w vf15, vf15, vf01 nop | nop L33: - b L24 | addw.w vf15, vf15, vf01 - nop | nop -L34: isubiu vi02, vi13, 0x363 | addw.z vf22, vf00, vf00 iaddiu vi13, vi13, 0x1e | addw.z vf23, vf00, vf00 - ibne vi00, vi02, L35 | addw.z vf24, vf00, vf00 + ibne vi00, vi02, L34 | addw.z vf24, vf00, vf00 nop | addw.z vf25, vf00, vf00 iaddiu vi13, vi00, 0x345 | nop -L35: +L34: iaddi vi03, vi13, 0x7 | nop ilw.w vi01, 5(vi13) | nop isw.x vi03, 906(vi00) | nop @@ -451,10 +451,10 @@ L35: nop | maxy.w vf12, vf12, vf01 nop | clipw.xyz vf26, vf26 nop | mul.xyz vf19, vf19, Q - ibeq vi02, vi06, L36 | itof12.xyz vf20, vf24 + ibeq vi02, vi06, L35 | itof12.xyz vf20, vf24 div Q, vf01.x, vf14.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf12, vf12, vf01 -L36: +L35: nop | add.xyzw vf13, vf13, vf04 nop | maddax.xyzw ACC, vf08, vf16 mtir vi05, vf25.x | madday.xyzw ACC, vf09, vf16 @@ -464,12 +464,12 @@ L36: iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 mfir.x vf25, vi09 | mul.xyz vf14, vf14, Q sq.xyzw vf18, -12(vi10) | maxy.w vf13, vf13, vf01 - ibeq vi14, vi10, L47 | clipw.xyz vf27, vf27 + ibeq vi14, vi10, L46 | clipw.xyz vf27, vf27 sq.xyzw vf12, -10(vi10) | mul.xyz vf20, vf20, Q - ibeq vi03, vi07, L37 | itof12.xyz vf21, vf25 + ibeq vi03, vi07, L36 | itof12.xyz vf21, vf25 div Q, vf01.x, vf15.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf13, vf13, vf01 -L37: +L36: nop | add.xyzw vf14, vf14, vf04 nop | maddax.xyzw ACC, vf08, vf16 mtir vi02, vf22.x | madday.xyzw ACC, vf09, vf16 @@ -479,78 +479,78 @@ L37: iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 mfir.x vf22, vi06 | mul.xyz vf15, vf15, Q sq.xyzw vf19, -12(vi10) | maxy.w vf14, vf14, vf01 - ibeq vi14, vi10, L47 | clipw.xyz vf28, vf28 + ibeq vi14, vi10, L46 | clipw.xyz vf28, vf28 sq.xyzw vf13, -10(vi10) | mul.xyz vf21, vf21, Q -L38: - ibeq vi04, vi08, L39 | itof12.xyz vf18, vf22 +L37: + ibeq vi04, vi08, L38 | itof12.xyz vf18, vf22 div Q, vf01.x, vf12.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf14, vf14, vf01 -L39: +L38: fcand vi01, 0x3ffff | add.xyzw vf15, vf15, vf04 - ibne vi00, vi01, L56 | maddax.xyzw ACC, vf08, vf16 + ibne vi00, vi01, L55 | maddax.xyzw ACC, vf08, vf16 mtir vi03, vf23.x | madday.xyzw ACC, vf09, vf16 -L40: +L39: iaddi vi10, vi10, 0x3 | maddz.xyzw vf13, vf10, vf16 lq.xy vf24, 0(vi10) | miniz.w vf15, vf15, vf01 lq.xyz vf16, 2(vi10) | mul.xyzw vf26, vf12, vf05 iand vi07, vi03, vi11 | ftoi4.xyzw vf14, vf14 mfir.x vf23, vi07 | mul.xyz vf12, vf12, Q sq.xyzw vf20, -12(vi10) | maxy.w vf15, vf15, vf01 - ibeq vi14, vi10, L47 | clipw.xyz vf29, vf29 + ibeq vi14, vi10, L46 | clipw.xyz vf29, vf29 sq.xyzw vf14, -10(vi10) | mul.xyz vf18, vf18, Q - ibeq vi05, vi09, L41 | itof12.xyz vf19, vf23 + ibeq vi05, vi09, L40 | itof12.xyz vf19, vf23 div Q, vf01.x, vf13.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf15, vf15, vf01 -L41: +L40: fcand vi01, 0x3ffff | add.xyzw vf12, vf12, vf04 - ibne vi00, vi01, L60 | maddax.xyzw ACC, vf08, vf16 + ibne vi00, vi01, L59 | maddax.xyzw ACC, vf08, vf16 mtir vi04, vf24.x | madday.xyzw ACC, vf09, vf16 -L42: +L41: iaddi vi10, vi10, 0x3 | maddz.xyzw vf14, vf10, vf16 lq.xy vf25, 0(vi10) | miniz.w vf12, vf12, vf01 lq.xyz vf16, 2(vi10) | mul.xyzw vf27, vf13, vf05 iand vi08, vi04, vi11 | ftoi4.xyzw vf15, vf15 mfir.x vf24, vi08 | mul.xyz vf13, vf13, Q sq.xyzw vf21, -12(vi10) | maxy.w vf12, vf12, vf01 - ibeq vi14, vi10, L47 | clipw.xyz vf26, vf26 + ibeq vi14, vi10, L46 | clipw.xyz vf26, vf26 sq.xyzw vf15, -10(vi10) | mul.xyz vf19, vf19, Q - ibeq vi02, vi06, L43 | itof12.xyz vf20, vf24 + ibeq vi02, vi06, L42 | itof12.xyz vf20, vf24 div Q, vf01.x, vf14.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf12, vf12, vf01 -L43: +L42: fcand vi01, 0x3ffff | add.xyzw vf13, vf13, vf04 - ibne vi00, vi01, L48 | maddax.xyzw ACC, vf08, vf16 + ibne vi00, vi01, L47 | maddax.xyzw ACC, vf08, vf16 mtir vi05, vf25.x | madday.xyzw ACC, vf09, vf16 -L44: +L43: iaddi vi10, vi10, 0x3 | maddz.xyzw vf15, vf10, vf16 lq.xy vf22, 0(vi10) | miniz.w vf13, vf13, vf01 lq.xyz vf16, 2(vi10) | mul.xyzw vf28, vf14, vf05 iand vi09, vi05, vi11 | ftoi4.xyzw vf12, vf12 mfir.x vf25, vi09 | mul.xyz vf14, vf14, Q sq.xyzw vf18, -12(vi10) | maxy.w vf13, vf13, vf01 - ibeq vi14, vi10, L47 | clipw.xyz vf27, vf27 + ibeq vi14, vi10, L46 | clipw.xyz vf27, vf27 sq.xyzw vf12, -10(vi10) | mul.xyz vf20, vf20, Q - ibeq vi03, vi07, L45 | itof12.xyz vf21, vf25 + ibeq vi03, vi07, L44 | itof12.xyz vf21, vf25 div Q, vf01.x, vf15.w | mulaw.xyzw ACC, vf11, vf00 nop | addw.w vf13, vf13, vf01 -L45: +L44: fcand vi01, 0x3ffff | add.xyzw vf14, vf14, vf04 - ibne vi00, vi01, L52 | maddax.xyzw ACC, vf08, vf16 + ibne vi00, vi01, L51 | maddax.xyzw ACC, vf08, vf16 mtir vi02, vf22.x | madday.xyzw ACC, vf09, vf16 -L46: +L45: iaddi vi10, vi10, 0x3 | maddz.xyzw vf12, vf10, vf16 lq.xy vf23, 0(vi10) | miniz.w vf14, vf14, vf01 lq.xyz vf16, 2(vi10) | mul.xyzw vf29, vf15, vf05 iand vi06, vi02, vi11 | ftoi4.xyzw vf13, vf13 mfir.x vf22, vi06 | mul.xyz vf15, vf15, Q sq.xyzw vf19, -12(vi10) | maxy.w vf14, vf14, vf01 - ibne vi14, vi10, L38 | clipw.xyz vf28, vf28 + ibne vi14, vi10, L37 | clipw.xyz vf28, vf28 sq.xyzw vf13, -10(vi10) | mul.xyz vf21, vf21, Q -L47: - b L83 | nop +L46: + b L82 | nop ilw.w vi12, 906(vi00) | nop -L48: - ibne vi02, vi06, L44 | nop +L47: + ibne vi02, vi06, L43 | nop sq.xyzw vf23, 998(vi00) | addw.w vf12, vf12, vf01 sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf28, vf07 sq.xyzw vf25, 1000(vi00) | mul.xyzw vf24, vf29, vf07 @@ -567,9 +567,9 @@ L48: iand vi01, vi01, vi02 | clipw.xyz vf29, vf29 iand vi01, vi01, vi03 | clipw.xyz vf26, vf26 mfir.y vf31, vi06 | nop - ibeq vi00, vi01, L50 | nop + ibeq vi00, vi01, L49 | nop mfir.z vf31, vi07 | nop -L49: +L48: div Q, vf01.x, vf14.w | nop lq.xyzw vf23, 998(vi00) | nop lq.xyzw vf24, 999(vi00) | nop @@ -577,9 +577,9 @@ L49: ilw.x vi01, 1001(vi00) | nop ilw.y vi02, 1001(vi00) | nop ilw.z vi03, 1001(vi00) | nop - b L44 | nop + b L43 | nop ilw.w vi04, 1001(vi00) | nop -L50: +L49: mfir.w vf31, vi08 | nop mfir.x vf30, vi09 | nop mfir.y vf30, vi10 | nop @@ -615,13 +615,13 @@ L50: sq.xyzw vf31, 961(vi00) | mul.xyz vf14, vf18, Q nop | nop nop | nop - bal vi15, L67 | nop + bal vi15, L66 | nop sq.xyzw vf14, 997(vi00) | nop - ibeq vi00, vi05, L51 | nop + ibeq vi00, vi05, L50 | nop nop | nop - bal vi15, L64 | nop + bal vi15, L63 | nop nop | nop -L51: +L50: ilw.x vi05, 1002(vi00) | nop ilw.y vi06, 1002(vi00) | nop ilw.z vi07, 1002(vi00) | nop @@ -634,10 +634,10 @@ L51: lq.xyzw vf13, 1005(vi00) | nop lq.xyzw vf14, 1006(vi00) | nop lq.xyzw vf15, 1007(vi00) | nop - b L49 | nop + b L48 | nop lq.xyzw vf16, 1008(vi00) | nop -L52: - ibne vi03, vi07, L46 | nop +L51: + ibne vi03, vi07, L45 | nop sq.xyzw vf23, 998(vi00) | addw.w vf13, vf13, vf01 sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf29, vf07 sq.xyzw vf25, 1000(vi00) | mul.xyzw vf24, vf26, vf07 @@ -654,9 +654,9 @@ L52: iand vi01, vi01, vi02 | clipw.xyz vf26, vf26 iand vi01, vi01, vi03 | clipw.xyz vf27, vf27 mfir.y vf31, vi06 | nop - ibeq vi00, vi01, L54 | nop + ibeq vi00, vi01, L53 | nop mfir.z vf31, vi07 | nop -L53: +L52: div Q, vf01.x, vf15.w | nop lq.xyzw vf23, 998(vi00) | nop lq.xyzw vf24, 999(vi00) | nop @@ -664,9 +664,9 @@ L53: ilw.x vi01, 1001(vi00) | nop ilw.y vi02, 1001(vi00) | nop ilw.z vi03, 1001(vi00) | nop - b L46 | nop + b L45 | nop ilw.w vi04, 1001(vi00) | nop -L54: +L53: mfir.w vf31, vi08 | nop mfir.x vf30, vi09 | nop mfir.y vf30, vi10 | nop @@ -702,13 +702,13 @@ L54: sq.xyzw vf31, 961(vi00) | mul.xyz vf14, vf19, Q nop | nop nop | nop - bal vi15, L67 | nop + bal vi15, L66 | nop sq.xyzw vf14, 997(vi00) | nop - ibeq vi00, vi05, L55 | nop + ibeq vi00, vi05, L54 | nop nop | nop - bal vi15, L64 | nop + bal vi15, L63 | nop nop | nop -L55: +L54: ilw.x vi05, 1002(vi00) | nop ilw.y vi06, 1002(vi00) | nop ilw.z vi07, 1002(vi00) | nop @@ -721,10 +721,10 @@ L55: lq.xyzw vf13, 1005(vi00) | nop lq.xyzw vf14, 1006(vi00) | nop lq.xyzw vf15, 1007(vi00) | nop - b L53 | nop + b L52 | nop lq.xyzw vf16, 1008(vi00) | nop -L56: - ibne vi04, vi08, L40 | nop +L55: + ibne vi04, vi08, L39 | nop sq.xyzw vf23, 998(vi00) | addw.w vf14, vf14, vf01 sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf26, vf07 sq.xyzw vf25, 1000(vi00) | mul.xyzw vf24, vf27, vf07 @@ -741,9 +741,9 @@ L56: iand vi01, vi01, vi02 | clipw.xyz vf27, vf27 iand vi01, vi01, vi03 | clipw.xyz vf28, vf28 mfir.y vf31, vi06 | nop - ibeq vi00, vi01, L58 | nop + ibeq vi00, vi01, L57 | nop mfir.z vf31, vi07 | nop -L57: +L56: div Q, vf01.x, vf12.w | nop lq.xyzw vf23, 998(vi00) | nop lq.xyzw vf24, 999(vi00) | nop @@ -751,9 +751,9 @@ L57: ilw.x vi01, 1001(vi00) | nop ilw.y vi02, 1001(vi00) | nop ilw.z vi03, 1001(vi00) | nop - b L40 | nop + b L39 | nop ilw.w vi04, 1001(vi00) | nop -L58: +L57: mfir.w vf31, vi08 | nop mfir.x vf30, vi09 | nop mfir.y vf30, vi10 | nop @@ -789,13 +789,13 @@ L58: sq.xyzw vf31, 961(vi00) | mul.xyz vf14, vf20, Q nop | nop nop | nop - bal vi15, L67 | nop + bal vi15, L66 | nop sq.xyzw vf14, 997(vi00) | nop - ibeq vi00, vi05, L59 | nop + ibeq vi00, vi05, L58 | nop nop | nop - bal vi15, L64 | nop + bal vi15, L63 | nop nop | nop -L59: +L58: ilw.x vi05, 1002(vi00) | nop ilw.y vi06, 1002(vi00) | nop ilw.z vi07, 1002(vi00) | nop @@ -808,10 +808,10 @@ L59: lq.xyzw vf13, 1005(vi00) | nop lq.xyzw vf14, 1006(vi00) | nop lq.xyzw vf15, 1007(vi00) | nop - b L57 | nop + b L56 | nop lq.xyzw vf16, 1008(vi00) | nop -L60: - ibne vi05, vi09, L42 | nop +L59: + ibne vi05, vi09, L41 | nop sq.xyzw vf23, 998(vi00) | addw.w vf15, vf15, vf01 sq.xyzw vf24, 999(vi00) | mul.xyzw vf23, vf27, vf07 sq.xyzw vf25, 1000(vi00) | mul.xyzw vf24, vf28, vf07 @@ -828,9 +828,9 @@ L60: iand vi01, vi01, vi02 | clipw.xyz vf28, vf28 iand vi01, vi01, vi03 | clipw.xyz vf29, vf29 mfir.y vf31, vi06 | nop - ibeq vi00, vi01, L62 | nop + ibeq vi00, vi01, L61 | nop mfir.z vf31, vi07 | nop -L61: +L60: div Q, vf01.x, vf13.w | nop lq.xyzw vf23, 998(vi00) | nop lq.xyzw vf24, 999(vi00) | nop @@ -838,9 +838,9 @@ L61: ilw.x vi01, 1001(vi00) | nop ilw.y vi02, 1001(vi00) | nop ilw.z vi03, 1001(vi00) | nop - b L42 | nop + b L41 | nop ilw.w vi04, 1001(vi00) | nop -L62: +L61: mfir.w vf31, vi08 | nop mfir.x vf30, vi09 | nop mfir.y vf30, vi10 | nop @@ -876,13 +876,13 @@ L62: sq.xyzw vf31, 961(vi00) | mul.xyz vf14, vf21, Q nop | nop nop | nop - bal vi15, L67 | nop + bal vi15, L66 | nop sq.xyzw vf14, 997(vi00) | nop - ibeq vi00, vi05, L63 | nop + ibeq vi00, vi05, L62 | nop nop | nop - bal vi15, L64 | nop + bal vi15, L63 | nop nop | nop -L63: +L62: ilw.x vi05, 1002(vi00) | nop ilw.y vi06, 1002(vi00) | nop ilw.z vi07, 1002(vi00) | nop @@ -895,28 +895,28 @@ L63: lq.xyzw vf13, 1005(vi00) | nop lq.xyzw vf14, 1006(vi00) | nop lq.xyzw vf15, 1007(vi00) | nop - b L61 | nop + b L60 | nop lq.xyzw vf16, 1008(vi00) | nop -L64: +L63: ilw.w vi01, 8(vi13) | nop ilw.y vi02, 1003(vi00) | nop iaddi vi03, vi13, 0x7 | nop - ibltz vi01, L66 | nop + ibltz vi01, L65 | nop ilw.w vi04, 906(vi00) | nop iaddi vi02, vi02, -0xf | nop isub vi02, vi02, vi04 | nop -L65: +L64: ilw.w vi04, 5(vi03) | nop ilw.w vi01, 6(vi03) | nop nop | nop nop | nop isub vi04, vi02, vi04 | nop nop | nop - ibltz vi04, L66 | nop + ibltz vi04, L65 | nop nop | nop - ibgtz vi01, L65 | nop + ibgtz vi01, L64 | nop iaddi vi03, vi03, 0x5 | nop -L66: +L65: iaddiu vi01, vi00, 0x3b9 | nop lq.xyzw vf12, 0(vi03) | nop lq.xyzw vf13, 1(vi03) | nop @@ -941,7 +941,7 @@ L66: iswr.x vi02, vi01 | nop sq.xyzw vf12, 1(vi01) | nop xgkick vi01 | nop -L67: +L66: sq.xyzw vf00, 907(vi00) | nop sq.xyzw vf00, 914(vi00) | nop sq.xyzw vf00, 921(vi00) | nop @@ -964,26 +964,26 @@ L67: iaddiu vi04, vi00, 0x3c1 | nop mfir.x vf31, vi15 | nop iaddi vi05, vi00, 0x0 | nop - bal vi15, L68 | nop + bal vi15, L67 | nop iaddiu vi07, vi00, 0x3dd | nop - bal vi15, L68 | nop + bal vi15, L67 | nop iaddiu vi07, vi00, 0x3e0 | nop - bal vi15, L68 | nop + bal vi15, L67 | nop iaddiu vi07, vi00, 0x3e3 | nop - b L77 | nop + b L76 | nop nop | nop -L68: +L67: iaddiu vi09, vi00, 0x38b | nop -L69: +L68: iaddi vi10, vi00, 0x0 | nop -L70: +L69: isubiu vi01, vi09, 0x3b5 | nop ilwr.y vi08, vi09 | nop - ibgez vi01, L74 | nop + ibgez vi01, L73 | nop ilwr.z vi06, vi09 | nop lq.xyzw vf24, 0(vi07) | nop lq.xyzw vf23, 0(vi08) | nop - ibne vi00, vi08, L71 | nop + ibne vi00, vi08, L70 | nop iswr.y vi07, vi09 | nop jalr vi11, vi06 | nop iswr.x vi07, vi09 | nop @@ -991,35 +991,35 @@ L70: nop | nop nop | nop fsand vi02, 0x2 | nop - ibne vi00, vi02, L75 | nop + ibne vi00, vi02, L74 | nop nop | nop - b L70 | nop + b L69 | nop iaddi vi09, vi09, 0x7 | nop -L71: +L70: jalr vi11, vi06 | nop lq.xyzw vf15, 1(vi08) | nop lq.xyzw vf16, 1(vi07) | nop lq.xyzw vf12, 2(vi08) | nop fsand vi01, 0x2 | nop fsand vi02, 0x2 | subw.w vf31, vf30, vf31 - ibne vi00, vi01, L73 | nop + ibne vi00, vi01, L72 | nop lq.xyzw vf13, 2(vi07) | nop - ibne vi00, vi02, L72 | nop + ibne vi00, vi02, L71 | nop div Q, vf30.w, vf31.w | nop - b L70 | nop + b L69 | nop iaddi vi09, vi09, 0x7 | nop -L72: - bal vi11, L82 | nop +L71: + bal vi11, L81 | nop iaddi vi07, vi09, 0x1 | nop sq.xyzw vf25, 1(vi09) | nop sq.xyzw vf17, 2(vi09) | nop sq.xyzw vf14, 3(vi09) | nop - b L70 | nop + b L69 | nop iaddi vi09, vi09, 0x7 | nop -L73: - ibne vi00, vi02, L75 | nop +L72: + ibne vi00, vi02, L74 | nop div Q, vf30.w, vf31.w | nop - bal vi11, L82 | nop + bal vi11, L81 | nop nop | nop sq.xyzw vf25, 4(vi09) | nop sq.xyzw vf17, 5(vi09) | nop @@ -1028,9 +1028,9 @@ L73: isw.x vi09, 949(vi10) | nop isw.y vi07, 949(vi10) | nop iaddi vi10, vi10, 0x1 | nop - b L70 | nop + b L69 | nop iaddi vi07, vi09, -0x3 | nop -L74: +L73: lq.xyzw vf23, 0(vi07) | nop lq.xyzw vf15, 1(vi07) | nop lq.xyzw vf12, 2(vi07) | nop @@ -1049,26 +1049,26 @@ L74: nop | ftoi4.xyzw vf23, vf23 sq.xyzw vf12, -3(vi03) | nop sq.xyzw vf23, -1(vi03) | nop -L75: - iblez vi10, L76 | nop +L74: + iblez vi10, L75 | nop nop | nop ilw.x vi09, 948(vi10) | nop ilw.y vi07, 948(vi10) | nop - b L70 | nop + b L69 | nop iaddi vi10, vi10, -0x1 | nop -L76: +L75: jr vi15 | nop nop | nop -L77: +L76: iaddiu vi09, vi00, 0x38b | nop -L78: +L77: ilwr.x vi08, vi09 | nop ilwr.y vi07, vi09 | nop ilwr.z vi06, vi09 | nop nop | nop - ibeq vi00, vi08, L80 | nop + ibeq vi00, vi08, L79 | nop lq.xyzw vf23, 0(vi07) | nop - ibeq vi07, vi08, L80 | nop + ibeq vi07, vi08, L79 | nop lq.xyzw vf24, 0(vi08) | nop jalr vi11, vi06 | nop lq.xyzw vf15, 1(vi07) | nop @@ -1076,44 +1076,44 @@ L78: lq.xyzw vf12, 2(vi07) | nop fsand vi01, 0x2 | nop fsand vi02, 0x2 | subw.w vf31, vf30, vf31 - ibeq vi02, vi01, L80 | nop + ibeq vi02, vi01, L79 | nop lq.xyzw vf13, 2(vi08) | nop - ibeq vi00, vi01, L79 | nop + ibeq vi00, vi01, L78 | nop div Q, vf30.w, vf31.w | nop - bal vi11, L82 | nop + bal vi11, L81 | nop nop | nop sq.xyzw vf25, 4(vi09) | nop sq.xyzw vf17, 5(vi09) | nop sq.xyzw vf14, 6(vi09) | nop iaddi vi07, vi09, 0x4 | nop ior vi12, vi09, vi00 | nop - bal vi15, L69 | nop + bal vi15, L68 | nop iaddi vi09, vi09, 0x7 | nop - b L80 | nop + b L79 | nop ior vi09, vi12, vi00 | nop -L79: - bal vi11, L82 | nop +L78: + bal vi11, L81 | nop nop | nop sq.xyzw vf25, 1(vi09) | nop sq.xyzw vf17, 2(vi09) | nop sq.xyzw vf14, 3(vi09) | nop iaddi vi07, vi09, 0x1 | nop ior vi12, vi09, vi00 | nop - bal vi15, L69 | nop + bal vi15, L68 | nop iaddi vi09, vi09, 0x7 | nop ior vi09, vi12, vi00 | nop -L80: +L79: isubiu vi01, vi09, 0x3ae | nop iswr.x vi00, vi09 | nop iswr.y vi00, vi09 | nop - ibltz vi01, L78 | nop + ibltz vi01, L77 | nop iaddi vi09, vi09, 0x7 | nop - ibeq vi00, vi05, L81 | nop + ibeq vi00, vi05, L80 | nop mtir vi15, vf31.x | nop iaddiu vi05, vi05, 0x4000 | nop iaddiu vi05, vi05, 0x4000 | nop iswr.x vi05, vi04 | nop -L81: +L80: nop | nop jr vi15 | nop nop | nop @@ -1129,7 +1129,7 @@ L81: nop | addz.w vf31, vf24, vf24 jr vi11 | subz.w vf30, vf23, vf23 nop | subz.w vf31, vf24, vf24 -L82: +L81: nop | sub.xyzw vf25, vf24, vf23 nop | sub.xyzw vf17, vf16, vf15 nop | sub.xyzw vf14, vf13, vf12 @@ -1139,13 +1139,13 @@ L82: nop | add.xyzw vf25, vf23, vf25 jr vi11 | add.xyzw vf17, vf15, vf17 nop | add.xyzw vf14, vf12, vf14 -L83: +L82: iaddi vi14, vi13, 0x7 | nop lq.xyzw vf03, 4(vi13) | nop ilw.w vi02, 6(vi13) | nop lq.xyzw vf21, 5(vi13) | nop lq.xyzw vf22, 6(vi13) | nop -L84: +L83: ilwr.w vi03, vi14 | nop ilw.w vi04, 1(vi14) | nop lqi.xyzw vf16, vi14 | nop @@ -1163,16 +1163,16 @@ L84: sqi.xyzw vf21, vi06 | nop sqi.xyzw vf22, vi06 | nop sqi.xyzw vf03, vi06 | nop - ibgez vi04, L84 | nop + ibgez vi04, L83 | nop isw.x vi04, -1(vi06) | nop iadd vi02, vi12, vi02 | nop nop | nop xgkick vi02 | nop isubiu vi01, vi12, 0x22e | nop nop | nop - ibltz vi01, L85 | nop + ibltz vi01, L84 | nop iaddiu vi12, vi12, 0x117 | nop iaddi vi12, vi00, 0x0 | nop -L85: +L84: nop | nop :e nop | nop diff --git a/test/decompiler/vu_reference/merc-result.txt b/test/decompiler/vu_reference/merc-result.txt index 9cb75a5989..9da2c0bcd0 100644 --- a/test/decompiler/vu_reference/merc-result.txt +++ b/test/decompiler/vu_reference/merc-result.txt @@ -16,10 +16,10 @@ sq.xyzw vf28, 4(vi00) | minii.w vf29, vf00, I :e mr32.xyzw vf03, vf02 | nop iaddi vi07, vi00, 0x1 | nop - b L4 | nop + b L1 | nop isw.w vi07, 1(vi00) | nop iaddi vi07, vi00, 0x0 | nop -L4: +L1: lq.xyzw vf25, 139(vi00) | nop lq.xyzw vf26, 3(vi00) | nop lq.xyz vf01, 132(vi00) | nop @@ -29,51 +29,51 @@ L4: lq.xyzw vf05, 136(vi00) | nop lq.xyzw vf06, 137(vi00) | nop lq.xyzw vf07, 138(vi00) | nop - b L5 | nop + b L2 | nop sq.xyzw vf26, 5(vi00) | nop iaddi vi07, vi00, 0x1 | nop - b L5 | nop + b L2 | nop isw.w vi07, 1(vi00) | nop iaddi vi07, vi00, 0x0 | nop -L5: +L2: lq.xyzw vf28, 139(vi00) | minix.xyzw vf15, vf00, vf00 xtop vi15 | nop iaddiu vi12, vi15, 0x8c | nop - ibeq vi00, vi15, L6 | nop + ibeq vi00, vi15, L3 | nop ilwr.w vi03, vi12 | maxz.xy vf18, vf00, vf28 nop | maxw.xy vf18, vf00, vf28 -L6: +L3: ilw.w vi10, 133(vi00) | nop iaddiu vi15, vi15, 0x173 | nop ilw.y vi02, 2(vi12) | nop lq.xyzw vf14, 0(vi00) | nop - ibeq vi00, vi10, L7 | nop + ibeq vi00, vi10, L4 | nop iadd vi03, vi03, vi12 | nop mr32.xyzw vf27, vf14 | nop ilw.w vi11, 134(vi00) | nop iaddiu vi13, vi00, 0x42 | nop mr32.y vf14, vf27 | nop -L7: +L4: ilwr.w vi09, vi03 | nop lqi.xyzw vf27, vi03 | nop ilw.x vi04, 1(vi12) | nop iaddiu vi05, vi00, 0x7f | addw.xyz vf15, vf15, vf00 iand vi09, vi09, vi05 | nop ilw.y vi06, 1(vi12) | miniz.w vf19, vf00, vf27 - ibeq vi00, vi02, L9 | miniy.w vf18, vf00, vf27 + ibeq vi00, vi02, L6 | miniy.w vf18, vf00, vf27 ilwr.z vi01, vi12 | minix.w vf17, vf00, vf27 - ibne vi00, vi09, L8 | nop + ibne vi00, vi09, L5 | nop sq.yzw vf14, 0(vi15) | nop iaddiu vi02, vi02, 0x4000 | nop iaddiu vi02, vi02, 0x4000 | nop iswr.x vi02, vi15 | nop - b L12 | nop + b L9 | nop nop | nop -L8: +L5: iswr.x vi02, vi15 | nop -L9: +L6: lq.xyzw vf13, 1(vi00) | nop -L10: +L7: ilwr.w vi02, vi03 | nop lqi.xyzw vf08, vi03 | nop lqi.xyzw vf09, vi03 | nop @@ -88,11 +88,11 @@ L10: mfir.x vf14, vi08 | nop sqi.xyzw vf10, vi02 | nop sqi.xyzw vf11, vi02 | nop - ibeq vi00, vi10, L11 | nop + ibeq vi00, vi10, L8 | nop sqi.xyzw vf12, vi02 | nop mtir vi14, vf12.z | nop isw.x vi10, -1(vi02) | nop - ibeq vi14, vi13, L11 | nop + ibeq vi14, vi13, L8 | nop isw.y vi11, -1(vi02) | nop ilw.x vi13, -4(vi02) | nop isubiu vi14, vi00, 0x1d | nop @@ -101,19 +101,19 @@ L10: isw.x vi13, -4(vi02) | nop iaddiu vi13, vi00, 0x42 | nop isw.z vi13, -1(vi02) | nop -L11: - ibgtz vi08, L10 | nop +L8: + ibgtz vi08, L7 | nop sq.xyzw vf14, 0(vi02) | nop -L12: +L9: lq.xyzw vf28, 3(vi00) | nop ilw.y vi08, 3(vi12) | nop lq.xyzw vf16, 5(vi00) | nop lq.xyzw vf20, 4(vi00) | nop ilw.z vi09, 3(vi12) | mul.xyzw vf27, vf28, vf15 ior vi11, vi08, vi00 | mul.xyzw vf28, vf28, vf00 - ibeq vi00, vi08, L14 | mul.xyzw vf15, vf16, vf15 + ibeq vi00, vi08, L11 | mul.xyzw vf15, vf16, vf15 iaddi vi13, vi12, 0x3 | mul.xyzw vf16, vf16, vf00 -L13: +L10: lq.xyzw vf08, 0(vi08) | addax.xyzw vf20, vf00 lq.xyzw vf10, 1(vi08) | madda.xyzw ACC, vf27, vf25 lq.xyzw vf12, 2(vi08) | maddz.xyzw vf26, vf28, vf25 @@ -122,7 +122,7 @@ L13: sq.xyzw vf11, 1(vi11) | maddz.xyzw vf09, vf16, vf08 sq.xyzw vf13, 2(vi11) | mula.xyzw ACC, vf15, vf10 sq.xyzw vf26, 3(vi11) | maddz.xyzw vf11, vf16, vf10 - ibeq vi00, vi08, L14 | mula.xyzw ACC, vf15, vf12 + ibeq vi00, vi08, L11 | mula.xyzw ACC, vf15, vf12 ilwr.w vi10, vi13 | maddz.xyzw vf13, vf16, vf12 lq.xyzw vf08, 0(vi09) | addax.xyzw vf20, vf00 lq.xyzw vf10, 1(vi09) | madda.xyzw ACC, vf27, vf25 @@ -132,7 +132,7 @@ L13: sq.xyzw vf11, 1(vi08) | maddz.xyzw vf09, vf16, vf08 sq.xyzw vf13, 2(vi08) | mula.xyzw ACC, vf15, vf10 sq.xyzw vf26, 3(vi08) | maddz.xyzw vf11, vf16, vf10 - ibeq vi00, vi09, L14 | mula.xyzw ACC, vf15, vf12 + ibeq vi00, vi09, L11 | mula.xyzw ACC, vf15, vf12 ilw.x vi11, 1(vi13) | maddz.xyzw vf13, vf16, vf12 lq.xyzw vf08, 0(vi10) | addax.xyzw vf20, vf00 lq.xyzw vf10, 1(vi10) | madda.xyzw ACC, vf27, vf25 @@ -142,7 +142,7 @@ L13: sq.xyzw vf11, 1(vi09) | maddz.xyzw vf09, vf16, vf08 sq.xyzw vf13, 2(vi09) | mula.xyzw ACC, vf15, vf10 sq.xyzw vf26, 3(vi09) | maddz.xyzw vf11, vf16, vf10 - ibeq vi00, vi10, L14 | mula.xyzw ACC, vf15, vf12 + ibeq vi00, vi10, L11 | mula.xyzw ACC, vf15, vf12 ilw.y vi08, 1(vi13) | maddz.xyzw vf13, vf16, vf12 lq.xyzw vf08, 0(vi11) | addax.xyzw vf20, vf00 lq.xyzw vf10, 1(vi11) | madda.xyzw ACC, vf27, vf25 @@ -153,11 +153,11 @@ L13: sq.xyzw vf13, 2(vi10) | mula.xyzw ACC, vf15, vf10 sq.xyzw vf26, 3(vi10) | maddz.xyzw vf11, vf16, vf10 iaddi vi13, vi13, 0x1 | nop - ibne vi00, vi11, L13 | mula.xyzw ACC, vf15, vf12 + ibne vi00, vi11, L10 | mula.xyzw ACC, vf15, vf12 ilwr.z vi09, vi13 | maddz.xyzw vf13, vf16, vf12 -L14: +L11: ilw.x vi02, 3(vi12) | nop - ibeq vi00, vi04, L26 | nop + ibeq vi00, vi04, L23 | nop iadd vi01, vi01, vi12 | nop ilwr.x vi08, vi01 | nop lqi.xyzw vf08, vi01 | nop @@ -184,10 +184,10 @@ L14: lqi.xyzw vf12, vi01 | add.xyzw vf08, vf08, vf28 lqi.xyzw vf15, vi01 | nop mtir vi08, vf09.x | nop - ibeq vi00, vi15, L15 | nop + ibeq vi00, vi15, L12 | nop iadd vi03, vi03, vi12 | nop nop | miniw.w vf08, vf08, vf01 -L15: +L12: div Q, vf01.w, vf08.w | add.zw vf09, vf09, vf17 iadd vi04, vi04, vi03 | add.xyzw vf12, vf12, vf18 lq.xyz vf29, 4(vi08) | add.xyzw vf15, vf15, vf19 @@ -205,23 +205,23 @@ L15: mfp.w vf20, P | maddz.xyz vf12, vf31, vf15 nop | nop 1024.0 | miniw.w vf08, vf08, vf03 :i - ibne vi00, vi15, L82 | mulaw.xyzw ACC, vf25, vf09 + ibne vi00, vi15, L79 | mulaw.xyzw ACC, vf25, vf09 ilw.y vi09, -6(vi01) | mulw.xyzw vf11, vf11, vf20 erleng.xyz P, vf12 | nop nop | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 lqi.xyzw vf10, vi01 | mulax.xyzw ACC, vf01, vf11 - ibne vi04, vi03, L17 | madday.xyzw ACC, vf02, vf11 + ibne vi04, vi03, L14 | madday.xyzw ACC, vf02, vf11 nop | maddz.xyzw vf11, vf03, vf11 - ibne vi06, vi03, L31 | nop + ibne vi06, vi03, L28 | nop nop | nop - b L67 | nop + b L64 | nop nop | nop -L16: +L13: lqi.xyzw vf10, vi01 | mulax.xyzw ACC, vf01, vf11 sq.xyzw vf13, 1(vi12) | madday.xyzw ACC, vf02, vf11 sq.xyzw vf13, 1(vi15) | maddz.xyzw vf11, vf03, vf11 -L17: +L14: lqi.xyzw vf13, vi01 | add.xyzw vf09, vf09, vf28 lqi.xyzw vf16, vi01 | maxw.w vf08, vf08, vf02 mtir vi08, vf10.x | itof0.xyzw vf23, vf23 @@ -230,35 +230,35 @@ L17: move.xyzw vf21, vf08 | add.xyzw vf13, vf13, vf18 lq.xyz vf29, 4(vi08) | add.xyzw vf16, vf16, vf19 lq.xyz vf30, 5(vi08) | mulax.xyzw ACC, vf04, vf11 - ibgtz vi09, L18 | madday.xyzw ACC, vf05, vf11 + ibgtz vi09, L15 | madday.xyzw ACC, vf05, vf11 lq.xyzw vf31, 6(vi08) | maddaz.xyzw ACC, vf06, vf11 nop | addx.w vf21, vf21, vf17 -L18: +L15: lq.xyzw vf25, 0(vi08) | maddw.xyzw vf11, vf07, vf00 lq.xyzw vf26, 1(vi08) | mul.xyz vf09, vf09, Q mtir vi12, vf13.x | mul.xyzw vf15, vf15, Q mtir vi15, vf13.y | ftoi4.xyzw vf21, vf21 lq.xyzw vf27, 2(vi08) | mul.xyzw vf11, vf11, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf09, vf09, vf22 - ibne vi00, vi09, L19 | mulaz.xyzw ACC, vf29, vf10 + ibne vi00, vi09, L16 | mulaz.xyzw ACC, vf29, vf10 sq.xyzw vf21, 2(vi10) | maddaz.xyzw ACC, vf30, vf13 nop | ftoi4.xyzw vf21, vf08 -L19: +L16: mfp.w vf20, P | maddz.xyz vf13, vf31, vf16 sq.xyzw vf14, 0(vi10) | miniy.xyzw vf11, vf11, vf17 sq.xyzw vf14, 0(vi13) | miniw.w vf09, vf09, vf03 sq.xyzw vf21, 2(vi13) | mulaw.xyzw ACC, vf25, vf10 lq.xyzw vf28, 3(vi08) | mulw.xyzw vf12, vf12, vf20 erleng.xyz P, vf13 | ftoi0.xyzw vf11, vf11 - ibne vi04, vi03, L20 | maddaw.xyzw ACC, vf26, vf13 + ibne vi04, vi03, L17 | maddaw.xyzw ACC, vf26, vf13 mr32.z vf16, vf00 | maddw.xyzw vf10, vf27, vf16 - ibne vi06, vi03, L36 | nop + ibne vi06, vi03, L33 | nop ilw.y vi09, -6(vi01) | nop - ibne vi07, vi03, L72 | nop + ibne vi07, vi03, L69 | nop nop | nop - b L1 | nop + b L140 | nop nop | nop -L20: +L17: lqi.xyzw vf08, vi01 | mulax.xyzw ACC, vf01, vf12 sq.xyzw vf11, 1(vi10) | madday.xyzw ACC, vf02, vf12 sq.xyzw vf11, 1(vi13) | maddz.xyzw vf12, vf03, vf12 @@ -270,35 +270,35 @@ L20: move.xyzw vf21, vf09 | add.xyzw vf11, vf11, vf18 lq.xyz vf29, 4(vi08) | add.xyzw vf14, vf14, vf19 lq.xyz vf30, 5(vi08) | mulax.xyzw ACC, vf04, vf12 - ibgtz vi09, L21 | madday.xyzw ACC, vf05, vf12 + ibgtz vi09, L18 | madday.xyzw ACC, vf05, vf12 lq.xyzw vf31, 6(vi08) | maddaz.xyzw ACC, vf06, vf12 nop | addx.w vf21, vf21, vf17 -L21: +L18: lq.xyzw vf25, 0(vi08) | maddw.xyzw vf12, vf07, vf00 lq.xyzw vf26, 1(vi08) | mul.xyz vf10, vf10, Q mtir vi10, vf11.x | mul.xyzw vf16, vf16, Q mtir vi13, vf11.y | ftoi4.xyzw vf21, vf21 lq.xyzw vf27, 2(vi08) | mul.xyzw vf12, vf12, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf10, vf10, vf22 - ibne vi00, vi09, L22 | mulaz.xyzw ACC, vf29, vf08 + ibne vi00, vi09, L19 | mulaz.xyzw ACC, vf29, vf08 sq.xyzw vf21, 2(vi11) | maddaz.xyzw ACC, vf30, vf11 nop | ftoi4.xyzw vf21, vf09 -L22: +L19: mfp.w vf20, P | maddz.xyz vf11, vf31, vf14 sq.xyzw vf15, 0(vi11) | miniy.xyzw vf12, vf12, vf17 sq.xyzw vf15, 0(vi14) | miniw.w vf10, vf10, vf03 sq.xyzw vf21, 2(vi14) | mulaw.xyzw ACC, vf25, vf08 lq.xyzw vf28, 3(vi08) | mulw.xyzw vf13, vf13, vf20 erleng.xyz P, vf11 | ftoi0.xyzw vf12, vf12 - ibne vi04, vi03, L23 | maddaw.xyzw ACC, vf26, vf11 + ibne vi04, vi03, L20 | maddaw.xyzw ACC, vf26, vf11 mr32.z vf14, vf00 | maddw.xyzw vf08, vf27, vf14 - ibne vi06, vi03, L41 | nop + ibne vi06, vi03, L38 | nop ilw.y vi09, -6(vi01) | nop - ibne vi07, vi03, L77 | nop + ibne vi07, vi03, L74 | nop nop | nop - b L2 | nop + b L150 | nop nop | nop -L23: +L20: lqi.xyzw vf09, vi01 | mulax.xyzw ACC, vf01, vf13 sq.xyzw vf12, 1(vi11) | madday.xyzw ACC, vf02, vf13 sq.xyzw vf12, 1(vi14) | maddz.xyzw vf13, vf03, vf13 @@ -310,36 +310,36 @@ L23: move.xyzw vf21, vf10 | add.xyzw vf12, vf12, vf18 lq.xyz vf29, 4(vi08) | add.xyzw vf15, vf15, vf19 lq.xyz vf30, 5(vi08) | mulax.xyzw ACC, vf04, vf13 - ibgtz vi09, L24 | madday.xyzw ACC, vf05, vf13 + ibgtz vi09, L21 | madday.xyzw ACC, vf05, vf13 lq.xyzw vf31, 6(vi08) | maddaz.xyzw ACC, vf06, vf13 nop | addx.w vf21, vf21, vf17 -L24: +L21: lq.xyzw vf25, 0(vi08) | maddw.xyzw vf13, vf07, vf00 lq.xyzw vf26, 1(vi08) | mul.xyz vf08, vf08, Q mtir vi11, vf12.x | mul.xyzw vf14, vf14, Q mtir vi14, vf12.y | ftoi4.xyzw vf21, vf21 lq.xyzw vf27, 2(vi08) | mul.xyzw vf13, vf13, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf08, vf08, vf22 - ibne vi00, vi09, L25 | mulaz.xyzw ACC, vf29, vf09 + ibne vi00, vi09, L22 | mulaz.xyzw ACC, vf29, vf09 sq.xyzw vf21, 2(vi12) | maddaz.xyzw ACC, vf30, vf12 nop | ftoi4.xyzw vf21, vf10 -L25: +L22: mfp.w vf20, P | maddz.xyz vf12, vf31, vf15 sq.xyzw vf16, 0(vi12) | miniy.xyzw vf13, vf13, vf17 sq.xyzw vf16, 0(vi15) | miniw.w vf08, vf08, vf03 sq.xyzw vf21, 2(vi15) | mulaw.xyzw ACC, vf25, vf09 lq.xyzw vf28, 3(vi08) | mulw.xyzw vf11, vf11, vf20 erleng.xyz P, vf12 | ftoi0.xyzw vf13, vf13 - ibne vi04, vi03, L16 | maddaw.xyzw ACC, vf26, vf12 + ibne vi04, vi03, L13 | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 - ibne vi06, vi03, L30 | nop + ibne vi06, vi03, L27 | nop ilw.y vi09, -6(vi01) | nop - ibne vi07, vi03, L66 | nop + ibne vi07, vi03, L63 | nop nop | nop - b L3 | nop + b L160 | nop nop | nop -L26: - ibeq vi00, vi06, L61 | nop +L23: + ibeq vi00, vi06, L58 | nop iadd vi02, vi02, vi12 | nop lqi.xyzw vf08, vi01 | nop lqi.xyzw vf24, vi02 | nop @@ -386,23 +386,23 @@ L26: lqi.xyzw vf12, vi01 | add.xyzw vf08, vf08, vf28 lqi.xyzw vf15, vi01 | nop mtir vi11, vf09.x | nop - ibeq vi00, vi15, L27 | nop + ibeq vi00, vi15, L24 | nop mtir vi14, vf09.y | nop iaddiu vi08, vi00, 0x539 | miniw.w vf08, vf08, vf01 -L27: +L24: div Q, vf01.w, vf08.w | add.zw vf09, vf09, vf17 iadd vi03, vi03, vi12 | add.xyzw vf12, vf12, vf18 iand vi11, vi11, vi05 | add.xyzw vf15, vf15, vf19 iadd vi06, vi06, vi03 | nop iadd vi07, vi07, vi06 | nop iand vi14, vi14, vi05 | nop - ibne vi05, vi11, L28 | nop + ibne vi05, vi11, L25 | nop nop | mul.xyz vf08, vf08, Q mtir vi11, vf12.x | mul.xyzw vf14, vf14, Q mtir vi14, vf12.y | nop - b L29 | nop + b L26 | nop lqi.xyzw vf23, vi03 | add.xyzw vf08, vf08, vf22 -L28: +L25: lq.xyzw vf20, 0(vi11) | mul.xyzw vf14, vf14, Q lq.xyzw vf25, 0(vi14) | nop lq.xyzw vf23, 1(vi11) | nop @@ -421,28 +421,28 @@ L28: mtir vi11, vf12.x | maddw.xyz vf30, vf30, vf24 mtir vi14, vf12.y | mulaz.xyzw ACC, vf20, vf24 iaddiu vi08, vi00, 0x1a1 | maddw.xyzw vf31, vf31, vf24 - ibeq vi00, vi15, L29 | nop + ibeq vi00, vi15, L26 | nop lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 iaddiu vi08, vi00, 0x48e | nop -L29: +L26: nop | mulaz.xyzw ACC, vf29, vf09 nop | maddaz.xyzw ACC, vf30, vf12 mfp.w vf20, P | maddz.xyz vf12, vf31, vf15 nop | nop 1024.0 | miniw.w vf08, vf08, vf03 :i - ibne vi00, vi15, L93 | mulaw.xyzw ACC, vf25, vf09 + ibne vi00, vi15, L90 | mulaw.xyzw ACC, vf25, vf09 ilw.y vi09, -6(vi01) | mulw.xyzw vf11, vf11, vf20 erleng.xyz P, vf12 | nop - ibeq vi06, vi03, L65 | maddaw.xyzw ACC, vf26, vf12 + ibeq vi06, vi03, L62 | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 lqi.xyzw vf10, vi01 | mulax.xyzw ACC, vf01, vf11 jr vi08 | madday.xyzw ACC, vf02, vf11 nop | maddz.xyzw vf11, vf03, vf11 -L30: +L27: lqi.xyzw vf10, vi01 | mulax.xyzw ACC, vf01, vf11 sq.xyzw vf13, 1(vi12) | madday.xyzw ACC, vf02, vf11 sq.xyzw vf13, 1(vi15) | maddz.xyzw vf11, vf03, vf11 -L31: +L28: lqi.xyzw vf13, vi01 | add.xyzw vf09, vf09, vf28 lqi.xyzw vf16, vi01 | maxw.w vf08, vf08, vf02 mtir vi12, vf10.x | itof0.xyzw vf23, vf23 @@ -451,17 +451,17 @@ L31: move.xyzw vf21, vf08 | add.xyzw vf13, vf13, vf18 iand vi12, vi12, vi05 | add.xyzw vf16, vf16, vf19 nop | mulax.xyzw ACC, vf04, vf11 - ibgtz vi09, L32 | madday.xyzw ACC, vf05, vf11 + ibgtz vi09, L29 | madday.xyzw ACC, vf05, vf11 iand vi15, vi15, vi05 | maddaz.xyzw ACC, vf06, vf11 nop | addx.w vf21, vf21, vf17 -L32: - ibne vi05, vi12, L33 | maddw.xyzw vf11, vf07, vf00 +L29: + ibne vi05, vi12, L30 | maddw.xyzw vf11, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf09, vf09, Q mtir vi12, vf13.x | mul.xyzw vf15, vf15, Q mtir vi15, vf13.y | ftoi4.xyzw vf21, vf21 - b L34 | mul.xyzw vf11, vf11, vf23 + b L31 | mul.xyzw vf11, vf11, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf09, vf09, vf22 -L33: +L30: lq.xyzw vf20, 0(vi12) | mul.xyzw vf15, vf15, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf25, 0(vi15) | ftoi4.xyzw vf21, vf21 @@ -479,26 +479,26 @@ L33: lq.xyzw vf31, 6(vi15) | maddy.xyz vf29, vf29, vf24 mtir vi12, vf13.x | mulax.xyzw ACC, vf23, vf24 mtir vi15, vf13.y | maddy.xyz vf30, vf30, vf24 - b L49 | mulax.xyzw ACC, vf20, vf24 + b L46 | mulax.xyzw ACC, vf20, vf24 lqi.xyzw vf23, vi03 | maddy.xyzw vf31, vf31, vf24 -L34: - ibgez vi09, L35 | mulaz.xyzw ACC, vf29, vf10 +L31: + ibgez vi09, L32 | mulaz.xyzw ACC, vf29, vf10 sq.xyzw vf21, 2(vi10) | maddaz.xyzw ACC, vf30, vf13 nop | ftoi4.xyzw vf21, vf08 -L35: +L32: mfp.w vf20, P | maddz.xyz vf13, vf31, vf16 sq.xyzw vf14, 0(vi10) | miniy.xyzw vf11, vf11, vf17 sq.xyzw vf14, 0(vi13) | miniw.w vf09, vf09, vf03 sq.xyzw vf21, 2(vi13) | mulaw.xyzw ACC, vf25, vf10 ilw.y vi09, -6(vi01) | mulw.xyzw vf12, vf12, vf20 erleng.xyz P, vf13 | ftoi0.xyzw vf11, vf11 - ibne vi06, vi03, L36 | maddaw.xyzw ACC, vf26, vf13 + ibne vi06, vi03, L33 | maddaw.xyzw ACC, vf26, vf13 mr32.z vf16, vf00 | maddw.xyzw vf10, vf27, vf16 - ibne vi07, vi03, L72 | nop + ibne vi07, vi03, L69 | nop nop | nop - b L1 | nop + b L140 | nop nop | nop -L36: +L33: lqi.xyzw vf08, vi01 | mulax.xyzw ACC, vf01, vf12 sq.xyzw vf11, 1(vi10) | madday.xyzw ACC, vf02, vf12 sq.xyzw vf11, 1(vi13) | maddz.xyzw vf12, vf03, vf12 @@ -510,17 +510,17 @@ L36: move.xyzw vf21, vf09 | add.xyzw vf11, vf11, vf18 iand vi10, vi10, vi05 | add.xyzw vf14, vf14, vf19 nop | mulax.xyzw ACC, vf04, vf12 - ibgtz vi09, L37 | madday.xyzw ACC, vf05, vf12 + ibgtz vi09, L34 | madday.xyzw ACC, vf05, vf12 iand vi13, vi13, vi05 | maddaz.xyzw ACC, vf06, vf12 nop | addx.w vf21, vf21, vf17 -L37: - ibne vi05, vi10, L38 | maddw.xyzw vf12, vf07, vf00 +L34: + ibne vi05, vi10, L35 | maddw.xyzw vf12, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf10, vf10, Q mtir vi10, vf11.x | mul.xyzw vf16, vf16, Q mtir vi13, vf11.y | ftoi4.xyzw vf21, vf21 - b L39 | mul.xyzw vf12, vf12, vf23 + b L36 | mul.xyzw vf12, vf12, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf10, vf10, vf22 -L38: +L35: lq.xyzw vf20, 0(vi10) | mul.xyzw vf16, vf16, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf25, 0(vi13) | ftoi4.xyzw vf21, vf21 @@ -538,26 +538,26 @@ L38: lq.xyzw vf31, 6(vi13) | maddy.xyz vf29, vf29, vf24 mtir vi10, vf11.x | mulax.xyzw ACC, vf23, vf24 mtir vi13, vf11.y | maddy.xyz vf30, vf30, vf24 - b L54 | mulax.xyzw ACC, vf20, vf24 + b L51 | mulax.xyzw ACC, vf20, vf24 lqi.xyzw vf23, vi03 | maddy.xyzw vf31, vf31, vf24 -L39: - ibgez vi09, L40 | mulaz.xyzw ACC, vf29, vf08 +L36: + ibgez vi09, L37 | mulaz.xyzw ACC, vf29, vf08 sq.xyzw vf21, 2(vi11) | maddaz.xyzw ACC, vf30, vf11 nop | ftoi4.xyzw vf21, vf09 -L40: +L37: mfp.w vf20, P | maddz.xyz vf11, vf31, vf14 sq.xyzw vf15, 0(vi11) | miniy.xyzw vf12, vf12, vf17 sq.xyzw vf15, 0(vi14) | miniw.w vf10, vf10, vf03 sq.xyzw vf21, 2(vi14) | mulaw.xyzw ACC, vf25, vf08 ilw.y vi09, -6(vi01) | mulw.xyzw vf13, vf13, vf20 erleng.xyz P, vf11 | ftoi0.xyzw vf12, vf12 - ibne vi06, vi03, L41 | maddaw.xyzw ACC, vf26, vf11 + ibne vi06, vi03, L38 | maddaw.xyzw ACC, vf26, vf11 mr32.z vf14, vf00 | maddw.xyzw vf08, vf27, vf14 - ibne vi07, vi03, L77 | nop + ibne vi07, vi03, L74 | nop nop | nop - b L2 | nop + b L150 | nop nop | nop -L41: +L38: lqi.xyzw vf09, vi01 | mulax.xyzw ACC, vf01, vf13 sq.xyzw vf12, 1(vi11) | madday.xyzw ACC, vf02, vf13 sq.xyzw vf12, 1(vi14) | maddz.xyzw vf13, vf03, vf13 @@ -569,17 +569,17 @@ L41: move.xyzw vf21, vf10 | add.xyzw vf12, vf12, vf18 iand vi11, vi11, vi05 | add.xyzw vf15, vf15, vf19 nop | mulax.xyzw ACC, vf04, vf13 - ibgtz vi09, L42 | madday.xyzw ACC, vf05, vf13 + ibgtz vi09, L39 | madday.xyzw ACC, vf05, vf13 iand vi14, vi14, vi05 | maddaz.xyzw ACC, vf06, vf13 nop | addx.w vf21, vf21, vf17 -L42: - ibne vi05, vi11, L43 | maddw.xyzw vf13, vf07, vf00 +L39: + ibne vi05, vi11, L40 | maddw.xyzw vf13, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf08, vf08, Q mtir vi11, vf12.x | mul.xyzw vf14, vf14, Q mtir vi14, vf12.y | ftoi4.xyzw vf21, vf21 - b L44 | mul.xyzw vf13, vf13, vf23 + b L41 | mul.xyzw vf13, vf13, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf08, vf08, vf22 -L43: +L40: lq.xyzw vf20, 0(vi11) | mul.xyzw vf14, vf14, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf25, 0(vi14) | ftoi4.xyzw vf21, vf21 @@ -597,26 +597,26 @@ L43: lq.xyzw vf31, 6(vi14) | maddy.xyz vf29, vf29, vf24 mtir vi11, vf12.x | mulax.xyzw ACC, vf23, vf24 mtir vi14, vf12.y | maddy.xyz vf30, vf30, vf24 - b L59 | mulax.xyzw ACC, vf20, vf24 + b L56 | mulax.xyzw ACC, vf20, vf24 lqi.xyzw vf23, vi03 | maddy.xyzw vf31, vf31, vf24 -L44: - ibgez vi09, L45 | mulaz.xyzw ACC, vf29, vf09 +L41: + ibgez vi09, L42 | mulaz.xyzw ACC, vf29, vf09 sq.xyzw vf21, 2(vi12) | maddaz.xyzw ACC, vf30, vf12 nop | ftoi4.xyzw vf21, vf10 -L45: +L42: mfp.w vf20, P | maddz.xyz vf12, vf31, vf15 sq.xyzw vf16, 0(vi12) | miniy.xyzw vf13, vf13, vf17 sq.xyzw vf16, 0(vi15) | miniw.w vf08, vf08, vf03 sq.xyzw vf21, 2(vi15) | mulaw.xyzw ACC, vf25, vf09 ilw.y vi09, -6(vi01) | mulw.xyzw vf11, vf11, vf20 erleng.xyz P, vf12 | ftoi0.xyzw vf13, vf13 - ibne vi06, vi03, L30 | maddaw.xyzw ACC, vf26, vf12 + ibne vi06, vi03, L27 | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 - ibne vi07, vi03, L66 | nop + ibne vi07, vi03, L63 | nop nop | nop - b L3 | nop + b L160 | nop nop | nop -L46: +L43: lqi.xyzw vf10, vi01 | mulax.xyzw ACC, vf01, vf11 sq.xyzw vf13, 1(vi12) | madday.xyzw ACC, vf02, vf11 sq.xyzw vf13, 1(vi15) | maddz.xyzw vf11, vf03, vf11 @@ -628,17 +628,17 @@ L46: move.xyzw vf21, vf08 | add.xyzw vf13, vf13, vf18 iand vi12, vi12, vi05 | add.xyzw vf16, vf16, vf19 nop | mulax.xyzw ACC, vf04, vf11 - ibgtz vi09, L47 | madday.xyzw ACC, vf05, vf11 + ibgtz vi09, L44 | madday.xyzw ACC, vf05, vf11 iand vi15, vi15, vi05 | maddaz.xyzw ACC, vf06, vf11 nop | addx.w vf21, vf21, vf17 -L47: - ibne vi05, vi12, L48 | maddw.xyzw vf11, vf07, vf00 +L44: + ibne vi05, vi12, L45 | maddw.xyzw vf11, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf09, vf09, Q mtir vi12, vf13.x | mul.xyzw vf15, vf15, Q mtir vi15, vf13.y | ftoi4.xyzw vf21, vf21 - b L49 | mul.xyzw vf11, vf11, vf23 + b L46 | mul.xyzw vf11, vf11, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf09, vf09, vf22 -L48: +L45: lq.xyzw vf20, 0(vi12) | mul.xyzw vf15, vf15, Q lq.xyzw vf25, 0(vi15) | ftoi4.xyzw vf21, vf21 lq.xyzw vf23, 1(vi12) | mul.xyzw vf11, vf11, vf23 @@ -656,26 +656,26 @@ L48: lqi.xyzw vf23, vi02 | mulaz.xyzw ACC, vf23, vf24 mtir vi12, vf13.x | maddw.xyz vf30, vf30, vf24 mtir vi15, vf13.y | mulaz.xyzw ACC, vf20, vf24 - b L34 | maddw.xyzw vf31, vf31, vf24 + b L31 | maddw.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L49: - ibgez vi09, L50 | mulaz.xyzw ACC, vf29, vf10 +L46: + ibgez vi09, L47 | mulaz.xyzw ACC, vf29, vf10 sq.xyzw vf21, 2(vi10) | maddaz.xyzw ACC, vf30, vf13 nop | ftoi4.xyzw vf21, vf08 -L50: +L47: mfp.w vf20, P | maddz.xyz vf13, vf31, vf16 sq.xyzw vf14, 0(vi10) | miniy.xyzw vf11, vf11, vf17 sq.xyzw vf14, 0(vi13) | miniw.w vf09, vf09, vf03 sq.xyzw vf21, 2(vi13) | mulaw.xyzw ACC, vf25, vf10 ilw.y vi09, -6(vi01) | mulw.xyzw vf12, vf12, vf20 erleng.xyz P, vf13 | ftoi0.xyzw vf11, vf11 - ibne vi06, vi03, L51 | maddaw.xyzw ACC, vf26, vf13 + ibne vi06, vi03, L48 | maddaw.xyzw ACC, vf26, vf13 mr32.z vf16, vf00 | maddw.xyzw vf10, vf27, vf16 - ibne vi07, vi03, L72 | nop + ibne vi07, vi03, L69 | nop nop | nop - b L1 | nop + b L140 | nop nop | nop -L51: +L48: lqi.xyzw vf08, vi01 | mulax.xyzw ACC, vf01, vf12 sq.xyzw vf11, 1(vi10) | madday.xyzw ACC, vf02, vf12 sq.xyzw vf11, 1(vi13) | maddz.xyzw vf12, vf03, vf12 @@ -687,17 +687,17 @@ L51: move.xyzw vf21, vf09 | add.xyzw vf11, vf11, vf18 iand vi10, vi10, vi05 | add.xyzw vf14, vf14, vf19 nop | mulax.xyzw ACC, vf04, vf12 - ibgtz vi09, L52 | madday.xyzw ACC, vf05, vf12 + ibgtz vi09, L49 | madday.xyzw ACC, vf05, vf12 iand vi13, vi13, vi05 | maddaz.xyzw ACC, vf06, vf12 nop | addx.w vf21, vf21, vf17 -L52: - ibne vi05, vi10, L53 | maddw.xyzw vf12, vf07, vf00 +L49: + ibne vi05, vi10, L50 | maddw.xyzw vf12, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf10, vf10, Q mtir vi10, vf11.x | mul.xyzw vf16, vf16, Q mtir vi13, vf11.y | ftoi4.xyzw vf21, vf21 - b L54 | mul.xyzw vf12, vf12, vf23 + b L51 | mul.xyzw vf12, vf12, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf10, vf10, vf22 -L53: +L50: lq.xyzw vf20, 0(vi10) | mul.xyzw vf16, vf16, Q lq.xyzw vf25, 0(vi13) | ftoi4.xyzw vf21, vf21 lq.xyzw vf23, 1(vi10) | mul.xyzw vf12, vf12, vf23 @@ -715,26 +715,26 @@ L53: lqi.xyzw vf23, vi02 | mulaz.xyzw ACC, vf23, vf24 mtir vi10, vf11.x | maddw.xyz vf30, vf30, vf24 mtir vi13, vf11.y | mulaz.xyzw ACC, vf20, vf24 - b L39 | maddw.xyzw vf31, vf31, vf24 + b L36 | maddw.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L54: - ibgez vi09, L55 | mulaz.xyzw ACC, vf29, vf08 +L51: + ibgez vi09, L52 | mulaz.xyzw ACC, vf29, vf08 sq.xyzw vf21, 2(vi11) | maddaz.xyzw ACC, vf30, vf11 nop | ftoi4.xyzw vf21, vf09 -L55: +L52: mfp.w vf20, P | maddz.xyz vf11, vf31, vf14 sq.xyzw vf15, 0(vi11) | miniy.xyzw vf12, vf12, vf17 sq.xyzw vf15, 0(vi14) | miniw.w vf10, vf10, vf03 sq.xyzw vf21, 2(vi14) | mulaw.xyzw ACC, vf25, vf08 ilw.y vi09, -6(vi01) | mulw.xyzw vf13, vf13, vf20 erleng.xyz P, vf11 | ftoi0.xyzw vf12, vf12 - ibne vi06, vi03, L56 | maddaw.xyzw ACC, vf26, vf11 + ibne vi06, vi03, L53 | maddaw.xyzw ACC, vf26, vf11 mr32.z vf14, vf00 | maddw.xyzw vf08, vf27, vf14 - ibne vi07, vi03, L77 | nop + ibne vi07, vi03, L74 | nop nop | nop - b L2 | nop + b L150 | nop nop | nop -L56: +L53: lqi.xyzw vf09, vi01 | mulax.xyzw ACC, vf01, vf13 sq.xyzw vf12, 1(vi11) | madday.xyzw ACC, vf02, vf13 sq.xyzw vf12, 1(vi14) | maddz.xyzw vf13, vf03, vf13 @@ -746,17 +746,17 @@ L56: move.xyzw vf21, vf10 | add.xyzw vf12, vf12, vf18 iand vi11, vi11, vi05 | add.xyzw vf15, vf15, vf19 nop | mulax.xyzw ACC, vf04, vf13 - ibgtz vi09, L57 | madday.xyzw ACC, vf05, vf13 + ibgtz vi09, L54 | madday.xyzw ACC, vf05, vf13 iand vi14, vi14, vi05 | maddaz.xyzw ACC, vf06, vf13 nop | addx.w vf21, vf21, vf17 -L57: - ibne vi05, vi11, L58 | maddw.xyzw vf13, vf07, vf00 +L54: + ibne vi05, vi11, L55 | maddw.xyzw vf13, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf08, vf08, Q mtir vi11, vf12.x | mul.xyzw vf14, vf14, Q mtir vi14, vf12.y | ftoi4.xyzw vf21, vf21 - b L59 | mul.xyzw vf13, vf13, vf23 + b L56 | mul.xyzw vf13, vf13, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf08, vf08, vf22 -L58: +L55: lq.xyzw vf20, 0(vi11) | mul.xyzw vf14, vf14, Q lq.xyzw vf25, 0(vi14) | ftoi4.xyzw vf21, vf21 lq.xyzw vf23, 1(vi11) | mul.xyzw vf13, vf13, vf23 @@ -774,26 +774,26 @@ L58: lqi.xyzw vf23, vi02 | mulaz.xyzw ACC, vf23, vf24 mtir vi11, vf12.x | maddw.xyz vf30, vf30, vf24 mtir vi14, vf12.y | mulaz.xyzw ACC, vf20, vf24 - b L44 | maddw.xyzw vf31, vf31, vf24 + b L41 | maddw.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L59: - ibgez vi09, L60 | mulaz.xyzw ACC, vf29, vf09 +L56: + ibgez vi09, L57 | mulaz.xyzw ACC, vf29, vf09 sq.xyzw vf21, 2(vi12) | maddaz.xyzw ACC, vf30, vf12 nop | ftoi4.xyzw vf21, vf10 -L60: +L57: mfp.w vf20, P | maddz.xyz vf12, vf31, vf15 sq.xyzw vf16, 0(vi12) | miniy.xyzw vf13, vf13, vf17 sq.xyzw vf16, 0(vi15) | miniw.w vf08, vf08, vf03 sq.xyzw vf21, 2(vi15) | mulaw.xyzw ACC, vf25, vf09 ilw.y vi09, -6(vi01) | mulw.xyzw vf11, vf11, vf20 erleng.xyz P, vf12 | ftoi0.xyzw vf13, vf13 - ibne vi06, vi03, L46 | maddaw.xyzw ACC, vf26, vf12 + ibne vi06, vi03, L43 | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 - ibne vi07, vi03, L72 | nop + ibne vi07, vi03, L69 | nop nop | nop - b L3 | nop + b L160 | nop nop | nop -L61: +L58: lqi.xyzw vf08, vi01 | nop lqi.xyzw vf24, vi02 | nop lqi.xyzw vf11, vi01 | nop @@ -847,23 +847,23 @@ L61: lqi.xyzw vf12, vi01 | add.xyzw vf08, vf08, vf28 lqi.xyzw vf15, vi01 | nop mtir vi11, vf09.x | nop - ibeq vi00, vi15, L62 | nop + ibeq vi00, vi15, L59 | nop mtir vi14, vf09.y | nop nop | miniw.w vf08, vf08, vf01 -L62: +L59: div Q, vf01.w, vf08.w | add.zw vf09, vf09, vf17 iadd vi03, vi03, vi12 | add.xyzw vf12, vf12, vf18 iand vi11, vi11, vi05 | add.xyzw vf15, vf15, vf19 ilw.w vi08, -1(vi02) | nop iadd vi07, vi07, vi03 | nop iand vi14, vi14, vi05 | nop - ibne vi05, vi11, L63 | nop + ibne vi05, vi11, L60 | nop iaddi vi07, vi07, -0x1 | mul.xyz vf08, vf08, Q mtir vi11, vf12.x | mul.xyzw vf14, vf14, Q mtir vi14, vf12.y | nop - b L64 | nop + b L61 | nop lqi.xyzw vf23, vi03 | add.xyzw vf08, vf08, vf22 -L63: +L60: lq.xyzw vf20, 0(vi11) | mul.xyzw vf14, vf14, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf31, 0(vi14) | nop @@ -891,26 +891,26 @@ L63: mtir vi14, vf12.y | madday.xyzw ACC, vf22, vf24 lq.xyzw vf22, 2(vi00) | maddz.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L64: +L61: nop | mulaz.xyzw ACC, vf29, vf09 nop | maddaz.xyzw ACC, vf30, vf12 mfp.w vf20, P | maddz.xyz vf12, vf31, vf15 nop | nop 1024.0 | miniw.w vf08, vf08, vf03 :i - ibne vi00, vi15, L125 | mulaw.xyzw ACC, vf25, vf09 + ibne vi00, vi15, L122 | mulaw.xyzw ACC, vf25, vf09 ilw.y vi09, -6(vi01) | mulw.xyzw vf11, vf11, vf20 erleng.xyz P, vf12 | nop nop | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 -L65: +L62: lqi.xyzw vf10, vi01 | mulax.xyzw ACC, vf01, vf11 - b L67 | madday.xyzw ACC, vf02, vf11 + b L64 | madday.xyzw ACC, vf02, vf11 nop | maddz.xyzw vf11, vf03, vf11 -L66: +L63: lqi.xyzw vf10, vi01 | mulax.xyzw ACC, vf01, vf11 sq.xyzw vf13, 1(vi12) | madday.xyzw ACC, vf02, vf11 sq.xyzw vf13, 1(vi15) | maddz.xyzw vf11, vf03, vf11 -L67: +L64: lqi.xyzw vf13, vi01 | add.xyzw vf09, vf09, vf28 lqi.xyzw vf16, vi01 | maxw.w vf08, vf08, vf02 mtir vi12, vf10.x | itof0.xyzw vf23, vf23 @@ -919,17 +919,17 @@ L67: move.xyzw vf21, vf08 | add.xyzw vf13, vf13, vf18 iand vi12, vi12, vi05 | add.xyzw vf16, vf16, vf19 ilw.w vi08, -1(vi02) | mulax.xyzw ACC, vf04, vf11 - ibgtz vi09, L68 | madday.xyzw ACC, vf05, vf11 + ibgtz vi09, L65 | madday.xyzw ACC, vf05, vf11 iand vi15, vi15, vi05 | maddaz.xyzw ACC, vf06, vf11 nop | addx.w vf21, vf21, vf17 -L68: - ibne vi05, vi12, L69 | maddw.xyzw vf11, vf07, vf00 +L65: + ibne vi05, vi12, L66 | maddw.xyzw vf11, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf09, vf09, Q mtir vi12, vf13.x | mul.xyzw vf15, vf15, Q mtir vi15, vf13.y | ftoi4.xyzw vf21, vf21 - b L70 | mul.xyzw vf11, vf11, vf23 + b L67 | mul.xyzw vf11, vf11, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf09, vf09, vf22 -L69: +L66: lq.xyzw vf20, 0(vi12) | mul.xyzw vf15, vf15, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf31, 0(vi15) | ftoi4.xyzw vf21, vf21 @@ -957,20 +957,20 @@ L69: mtir vi15, vf13.y | madday.xyzw ACC, vf22, vf24 lq.xyzw vf22, 2(vi00) | maddz.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L70: - ibgez vi09, L71 | mulaz.xyzw ACC, vf29, vf10 +L67: + ibgez vi09, L68 | mulaz.xyzw ACC, vf29, vf10 sq.xyzw vf21, 2(vi10) | maddaz.xyzw ACC, vf30, vf13 nop | ftoi4.xyzw vf21, vf08 -L71: +L68: mfp.w vf20, P | maddz.xyz vf13, vf31, vf16 sq.xyzw vf14, 0(vi10) | miniy.xyzw vf11, vf11, vf17 sq.xyzw vf14, 0(vi13) | miniw.w vf09, vf09, vf03 sq.xyzw vf21, 2(vi13) | mulaw.xyzw ACC, vf25, vf10 ilw.y vi09, -6(vi01) | mulw.xyzw vf12, vf12, vf20 erleng.xyz P, vf13 | ftoi0.xyzw vf11, vf11 - ibeq vi07, vi03, L143 | maddaw.xyzw ACC, vf26, vf13 + ibeq vi07, vi03, L140 | maddaw.xyzw ACC, vf26, vf13 mr32.z vf16, vf00 | maddw.xyzw vf10, vf27, vf16 -L72: +L69: lqi.xyzw vf08, vi01 | mulax.xyzw ACC, vf01, vf12 sq.xyzw vf11, 1(vi10) | madday.xyzw ACC, vf02, vf12 sq.xyzw vf11, 1(vi13) | maddz.xyzw vf12, vf03, vf12 @@ -982,17 +982,17 @@ L72: move.xyzw vf21, vf09 | add.xyzw vf11, vf11, vf18 iand vi10, vi10, vi05 | add.xyzw vf14, vf14, vf19 ilw.w vi08, -1(vi02) | mulax.xyzw ACC, vf04, vf12 - ibgtz vi09, L73 | madday.xyzw ACC, vf05, vf12 + ibgtz vi09, L70 | madday.xyzw ACC, vf05, vf12 iand vi13, vi13, vi05 | maddaz.xyzw ACC, vf06, vf12 nop | addx.w vf21, vf21, vf17 -L73: - ibne vi05, vi10, L74 | maddw.xyzw vf12, vf07, vf00 +L70: + ibne vi05, vi10, L71 | maddw.xyzw vf12, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf10, vf10, Q mtir vi10, vf11.x | mul.xyzw vf16, vf16, Q mtir vi13, vf11.y | ftoi4.xyzw vf21, vf21 - b L75 | mul.xyzw vf12, vf12, vf23 + b L72 | mul.xyzw vf12, vf12, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf10, vf10, vf22 -L74: +L71: lq.xyzw vf20, 0(vi10) | mul.xyzw vf16, vf16, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf31, 0(vi13) | ftoi4.xyzw vf21, vf21 @@ -1020,20 +1020,20 @@ L74: mtir vi13, vf11.y | madday.xyzw ACC, vf22, vf24 lq.xyzw vf22, 2(vi00) | maddz.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L75: - ibgez vi09, L76 | mulaz.xyzw ACC, vf29, vf08 +L72: + ibgez vi09, L73 | mulaz.xyzw ACC, vf29, vf08 sq.xyzw vf21, 2(vi11) | maddaz.xyzw ACC, vf30, vf11 nop | ftoi4.xyzw vf21, vf09 -L76: +L73: mfp.w vf20, P | maddz.xyz vf11, vf31, vf14 sq.xyzw vf15, 0(vi11) | miniy.xyzw vf12, vf12, vf17 sq.xyzw vf15, 0(vi14) | miniw.w vf10, vf10, vf03 sq.xyzw vf21, 2(vi14) | mulaw.xyzw ACC, vf25, vf08 ilw.y vi09, -6(vi01) | mulw.xyzw vf13, vf13, vf20 erleng.xyz P, vf11 | ftoi0.xyzw vf12, vf12 - ibeq vi07, vi03, L153 | maddaw.xyzw ACC, vf26, vf11 + ibeq vi07, vi03, L150 | maddaw.xyzw ACC, vf26, vf11 mr32.z vf14, vf00 | maddw.xyzw vf08, vf27, vf14 -L77: +L74: lqi.xyzw vf09, vi01 | mulax.xyzw ACC, vf01, vf13 sq.xyzw vf12, 1(vi11) | madday.xyzw ACC, vf02, vf13 sq.xyzw vf12, 1(vi14) | maddz.xyzw vf13, vf03, vf13 @@ -1045,17 +1045,17 @@ L77: move.xyzw vf21, vf10 | add.xyzw vf12, vf12, vf18 iand vi11, vi11, vi05 | add.xyzw vf15, vf15, vf19 ilw.w vi08, -1(vi02) | mulax.xyzw ACC, vf04, vf13 - ibgtz vi09, L78 | madday.xyzw ACC, vf05, vf13 + ibgtz vi09, L75 | madday.xyzw ACC, vf05, vf13 iand vi14, vi14, vi05 | maddaz.xyzw ACC, vf06, vf13 nop | addx.w vf21, vf21, vf17 -L78: - ibne vi05, vi11, L79 | maddw.xyzw vf13, vf07, vf00 +L75: + ibne vi05, vi11, L76 | maddw.xyzw vf13, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf08, vf08, Q mtir vi11, vf12.x | mul.xyzw vf14, vf14, Q mtir vi14, vf12.y | ftoi4.xyzw vf21, vf21 - b L80 | mul.xyzw vf13, vf13, vf23 + b L77 | mul.xyzw vf13, vf13, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf08, vf08, vf22 -L79: +L76: lq.xyzw vf20, 0(vi11) | mul.xyzw vf14, vf14, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf31, 0(vi14) | ftoi4.xyzw vf21, vf21 @@ -1083,40 +1083,40 @@ L79: mtir vi14, vf12.y | madday.xyzw ACC, vf22, vf24 lq.xyzw vf22, 2(vi00) | maddz.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L80: - ibgez vi09, L81 | mulaz.xyzw ACC, vf29, vf09 +L77: + ibgez vi09, L78 | mulaz.xyzw ACC, vf29, vf09 sq.xyzw vf21, 2(vi12) | maddaz.xyzw ACC, vf30, vf12 nop | ftoi4.xyzw vf21, vf10 -L81: +L78: mfp.w vf20, P | maddz.xyz vf12, vf31, vf15 sq.xyzw vf16, 0(vi12) | miniy.xyzw vf13, vf13, vf17 sq.xyzw vf16, 0(vi15) | miniw.w vf08, vf08, vf03 sq.xyzw vf21, 2(vi15) | mulaw.xyzw ACC, vf25, vf09 ilw.y vi09, -6(vi01) | mulw.xyzw vf11, vf11, vf20 erleng.xyz P, vf12 | ftoi0.xyzw vf13, vf13 - ibne vi07, vi03, L66 | maddaw.xyzw ACC, vf26, vf12 + ibne vi07, vi03, L63 | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 - b L163 | nop + b L160 | nop nop | nop -L82: +L79: erleng.xyz P, vf12 | maxi.xy vf08, vf08, I 3072.0 | nop :i nop | minii.xy vf08, vf08, I nop | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 lqi.xyzw vf10, vi01 | mulax.xyzw ACC, vf01, vf11 - ibne vi04, vi03, L84 | madday.xyzw ACC, vf02, vf11 + ibne vi04, vi03, L81 | madday.xyzw ACC, vf02, vf11 nop | maddz.xyzw vf11, vf03, vf11 - ibne vi06, vi03, L95 | nop + ibne vi06, vi03, L92 | nop nop | nop - b L128 | nop + b L125 | nop nop | nop -L83: +L80: 3072.0 | mulax.xyzw ACC, vf01, vf11 :i lqi.xyzw vf10, vi01 | minii.xy vf08, vf08, I sq.xyzw vf13, 1(vi12) | madday.xyzw ACC, vf02, vf11 sq.xyzw vf13, 1(vi15) | maddz.xyzw vf11, vf03, vf11 -L84: +L81: lqi.xyzw vf13, vi01 | add.xyzw vf09, vf09, vf28 lqi.xyzw vf16, vi01 | maxw.w vf08, vf08, vf02 mtir vi08, vf10.x | itof0.xyzw vf23, vf23 @@ -1126,20 +1126,20 @@ L84: move.xyzw vf21, vf08 | add.xyzw vf13, vf13, vf18 lq.xyz vf29, 4(vi08) | add.xyzw vf16, vf16, vf19 lq.xyz vf30, 5(vi08) | mulax.xyzw ACC, vf04, vf11 - ibgtz vi09, L85 | madday.xyzw ACC, vf05, vf11 + ibgtz vi09, L82 | madday.xyzw ACC, vf05, vf11 lq.xyzw vf31, 6(vi08) | maddaz.xyzw ACC, vf06, vf11 nop | addx.w vf21, vf21, vf17 -L85: +L82: lq.xyzw vf25, 0(vi08) | maddw.xyzw vf11, vf07, vf00 lq.xyzw vf26, 1(vi08) | mul.xyz vf09, vf09, Q mtir vi12, vf13.x | mul.xyzw vf15, vf15, Q mtir vi15, vf13.y | ftoi4.xyzw vf21, vf21 lq.xyzw vf27, 2(vi08) | mul.xyzw vf11, vf11, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf09, vf09, vf22 - ibne vi00, vi09, L86 | mulaz.xyzw ACC, vf29, vf10 + ibne vi00, vi09, L83 | mulaz.xyzw ACC, vf29, vf10 sq.xyzw vf21, 2(vi10) | maddaz.xyzw ACC, vf30, vf13 nop | ftoi4.xyzw vf21, vf08 -L86: +L83: mfp.w vf20, P | maddz.xyz vf13, vf31, vf16 sq.xyzw vf14, 0(vi10) | miniy.xyzw vf11, vf11, vf17 sq.xyzw vf14, 0(vi13) | miniw.w vf09, vf09, vf03 @@ -1147,15 +1147,15 @@ L86: lq.xyzw vf28, 3(vi08) | mulw.xyzw vf12, vf12, vf20 1024.0 | ftoi0.xyzw vf11, vf11 :i erleng.xyz P, vf13 | maxi.xy vf09, vf09, I - ibne vi04, vi03, L87 | maddaw.xyzw ACC, vf26, vf13 + ibne vi04, vi03, L84 | maddaw.xyzw ACC, vf26, vf13 mr32.z vf16, vf00 | maddw.xyzw vf10, vf27, vf16 - ibne vi06, vi03, L100 | nop + ibne vi06, vi03, L97 | nop ilw.y vi09, -6(vi01) | nop - ibne vi07, vi03, L133 | nop + ibne vi07, vi03, L130 | nop nop | nop - b L143 | nop + b L140 | nop nop | nop -L87: +L84: 3072.0 | mulax.xyzw ACC, vf01, vf12 :i lqi.xyzw vf08, vi01 | minii.xy vf09, vf09, I sq.xyzw vf11, 1(vi10) | madday.xyzw ACC, vf02, vf12 @@ -1169,20 +1169,20 @@ L87: move.xyzw vf21, vf09 | add.xyzw vf11, vf11, vf18 lq.xyz vf29, 4(vi08) | add.xyzw vf14, vf14, vf19 lq.xyz vf30, 5(vi08) | mulax.xyzw ACC, vf04, vf12 - ibgtz vi09, L88 | madday.xyzw ACC, vf05, vf12 + ibgtz vi09, L85 | madday.xyzw ACC, vf05, vf12 lq.xyzw vf31, 6(vi08) | maddaz.xyzw ACC, vf06, vf12 nop | addx.w vf21, vf21, vf17 -L88: +L85: lq.xyzw vf25, 0(vi08) | maddw.xyzw vf12, vf07, vf00 lq.xyzw vf26, 1(vi08) | mul.xyz vf10, vf10, Q mtir vi10, vf11.x | mul.xyzw vf16, vf16, Q mtir vi13, vf11.y | ftoi4.xyzw vf21, vf21 lq.xyzw vf27, 2(vi08) | mul.xyzw vf12, vf12, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf10, vf10, vf22 - ibne vi00, vi09, L89 | mulaz.xyzw ACC, vf29, vf08 + ibne vi00, vi09, L86 | mulaz.xyzw ACC, vf29, vf08 sq.xyzw vf21, 2(vi11) | maddaz.xyzw ACC, vf30, vf11 nop | ftoi4.xyzw vf21, vf09 -L89: +L86: mfp.w vf20, P | maddz.xyz vf11, vf31, vf14 sq.xyzw vf15, 0(vi11) | miniy.xyzw vf12, vf12, vf17 sq.xyzw vf15, 0(vi14) | miniw.w vf10, vf10, vf03 @@ -1190,15 +1190,15 @@ L89: lq.xyzw vf28, 3(vi08) | mulw.xyzw vf13, vf13, vf20 1024.0 | ftoi0.xyzw vf12, vf12 :i erleng.xyz P, vf11 | maxi.xy vf10, vf10, I - ibne vi04, vi03, L90 | maddaw.xyzw ACC, vf26, vf11 + ibne vi04, vi03, L87 | maddaw.xyzw ACC, vf26, vf11 mr32.z vf14, vf00 | maddw.xyzw vf08, vf27, vf14 - ibne vi06, vi03, L105 | nop + ibne vi06, vi03, L102 | nop ilw.y vi09, -6(vi01) | nop - ibne vi07, vi03, L138 | nop + ibne vi07, vi03, L135 | nop nop | nop - b L153 | nop + b L150 | nop nop | nop -L90: +L87: 3072.0 | mulax.xyzw ACC, vf01, vf13 :i lqi.xyzw vf09, vi01 | minii.xy vf10, vf10, I sq.xyzw vf12, 1(vi11) | madday.xyzw ACC, vf02, vf13 @@ -1212,20 +1212,20 @@ L90: move.xyzw vf21, vf10 | add.xyzw vf12, vf12, vf18 lq.xyz vf29, 4(vi08) | add.xyzw vf15, vf15, vf19 lq.xyz vf30, 5(vi08) | mulax.xyzw ACC, vf04, vf13 - ibgtz vi09, L91 | madday.xyzw ACC, vf05, vf13 + ibgtz vi09, L88 | madday.xyzw ACC, vf05, vf13 lq.xyzw vf31, 6(vi08) | maddaz.xyzw ACC, vf06, vf13 nop | addx.w vf21, vf21, vf17 -L91: +L88: lq.xyzw vf25, 0(vi08) | maddw.xyzw vf13, vf07, vf00 lq.xyzw vf26, 1(vi08) | mul.xyz vf08, vf08, Q mtir vi11, vf12.x | mul.xyzw vf14, vf14, Q mtir vi14, vf12.y | ftoi4.xyzw vf21, vf21 lq.xyzw vf27, 2(vi08) | mul.xyzw vf13, vf13, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf08, vf08, vf22 - ibne vi00, vi09, L92 | mulaz.xyzw ACC, vf29, vf09 + ibne vi00, vi09, L89 | mulaz.xyzw ACC, vf29, vf09 sq.xyzw vf21, 2(vi12) | maddaz.xyzw ACC, vf30, vf12 nop | ftoi4.xyzw vf21, vf10 -L92: +L89: mfp.w vf20, P | maddz.xyz vf12, vf31, vf15 sq.xyzw vf16, 0(vi12) | miniy.xyzw vf13, vf13, vf17 sq.xyzw vf16, 0(vi15) | miniw.w vf08, vf08, vf03 @@ -1233,29 +1233,29 @@ L92: lq.xyzw vf28, 3(vi08) | mulw.xyzw vf11, vf11, vf20 1024.0 | ftoi0.xyzw vf13, vf13 :i erleng.xyz P, vf12 | maxi.xy vf08, vf08, I - ibne vi04, vi03, L83 | maddaw.xyzw ACC, vf26, vf12 + ibne vi04, vi03, L80 | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 - ibne vi06, vi03, L94 | nop + ibne vi06, vi03, L91 | nop ilw.y vi09, -6(vi01) | nop - ibne vi07, vi03, L127 | nop + ibne vi07, vi03, L124 | nop nop | nop - b L163 | nop + b L160 | nop nop | nop -L93: +L90: erleng.xyz P, vf12 | maxi.xy vf08, vf08, I 3072.0 | nop :i nop | minii.xy vf08, vf08, I - ibeq vi06, vi03, L126 | maddaw.xyzw ACC, vf26, vf12 + ibeq vi06, vi03, L123 | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 lqi.xyzw vf10, vi01 | mulax.xyzw ACC, vf01, vf11 jr vi08 | madday.xyzw ACC, vf02, vf11 nop | maddz.xyzw vf11, vf03, vf11 -L94: +L91: 3072.0 | mulax.xyzw ACC, vf01, vf11 :i lqi.xyzw vf10, vi01 | minii.xy vf08, vf08, I sq.xyzw vf13, 1(vi12) | madday.xyzw ACC, vf02, vf11 sq.xyzw vf13, 1(vi15) | maddz.xyzw vf11, vf03, vf11 -L95: +L92: lqi.xyzw vf13, vi01 | add.xyzw vf09, vf09, vf28 lqi.xyzw vf16, vi01 | maxw.w vf08, vf08, vf02 mtir vi12, vf10.x | itof0.xyzw vf23, vf23 @@ -1265,17 +1265,17 @@ L95: move.xyzw vf21, vf08 | add.xyzw vf13, vf13, vf18 iand vi12, vi12, vi05 | add.xyzw vf16, vf16, vf19 nop | mulax.xyzw ACC, vf04, vf11 - ibgtz vi09, L96 | madday.xyzw ACC, vf05, vf11 + ibgtz vi09, L93 | madday.xyzw ACC, vf05, vf11 iand vi15, vi15, vi05 | maddaz.xyzw ACC, vf06, vf11 nop | addx.w vf21, vf21, vf17 -L96: - ibne vi05, vi12, L97 | maddw.xyzw vf11, vf07, vf00 +L93: + ibne vi05, vi12, L94 | maddw.xyzw vf11, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf09, vf09, Q mtir vi12, vf13.x | mul.xyzw vf15, vf15, Q mtir vi15, vf13.y | ftoi4.xyzw vf21, vf21 - b L98 | mul.xyzw vf11, vf11, vf23 + b L95 | mul.xyzw vf11, vf11, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf09, vf09, vf22 -L97: +L94: lq.xyzw vf20, 0(vi12) | mul.xyzw vf15, vf15, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf25, 0(vi15) | ftoi4.xyzw vf21, vf21 @@ -1293,13 +1293,13 @@ L97: lq.xyzw vf31, 6(vi15) | maddy.xyz vf29, vf29, vf24 mtir vi12, vf13.x | mulax.xyzw ACC, vf23, vf24 mtir vi15, vf13.y | maddy.xyz vf30, vf30, vf24 - b L113 | mulax.xyzw ACC, vf20, vf24 + b L110 | mulax.xyzw ACC, vf20, vf24 lqi.xyzw vf23, vi03 | maddy.xyzw vf31, vf31, vf24 -L98: - ibgez vi09, L99 | mulaz.xyzw ACC, vf29, vf10 +L95: + ibgez vi09, L96 | mulaz.xyzw ACC, vf29, vf10 sq.xyzw vf21, 2(vi10) | maddaz.xyzw ACC, vf30, vf13 nop | ftoi4.xyzw vf21, vf08 -L99: +L96: mfp.w vf20, P | maddz.xyz vf13, vf31, vf16 sq.xyzw vf14, 0(vi10) | miniy.xyzw vf11, vf11, vf17 sq.xyzw vf14, 0(vi13) | miniw.w vf09, vf09, vf03 @@ -1307,13 +1307,13 @@ L99: ilw.y vi09, -6(vi01) | mulw.xyzw vf12, vf12, vf20 1024.0 | ftoi0.xyzw vf11, vf11 :i erleng.xyz P, vf13 | maxi.xy vf09, vf09, I - ibne vi06, vi03, L100 | maddaw.xyzw ACC, vf26, vf13 + ibne vi06, vi03, L97 | maddaw.xyzw ACC, vf26, vf13 mr32.z vf16, vf00 | maddw.xyzw vf10, vf27, vf16 - ibne vi07, vi03, L133 | nop + ibne vi07, vi03, L130 | nop nop | nop - b L143 | nop + b L140 | nop nop | nop -L100: +L97: 3072.0 | mulax.xyzw ACC, vf01, vf12 :i lqi.xyzw vf08, vi01 | minii.xy vf09, vf09, I sq.xyzw vf11, 1(vi10) | madday.xyzw ACC, vf02, vf12 @@ -1327,17 +1327,17 @@ L100: move.xyzw vf21, vf09 | add.xyzw vf11, vf11, vf18 iand vi10, vi10, vi05 | add.xyzw vf14, vf14, vf19 nop | mulax.xyzw ACC, vf04, vf12 - ibgtz vi09, L101 | madday.xyzw ACC, vf05, vf12 + ibgtz vi09, L98 | madday.xyzw ACC, vf05, vf12 iand vi13, vi13, vi05 | maddaz.xyzw ACC, vf06, vf12 nop | addx.w vf21, vf21, vf17 -L101: - ibne vi05, vi10, L102 | maddw.xyzw vf12, vf07, vf00 +L98: + ibne vi05, vi10, L99 | maddw.xyzw vf12, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf10, vf10, Q mtir vi10, vf11.x | mul.xyzw vf16, vf16, Q mtir vi13, vf11.y | ftoi4.xyzw vf21, vf21 - b L103 | mul.xyzw vf12, vf12, vf23 + b L100 | mul.xyzw vf12, vf12, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf10, vf10, vf22 -L102: +L99: lq.xyzw vf20, 0(vi10) | mul.xyzw vf16, vf16, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf25, 0(vi13) | ftoi4.xyzw vf21, vf21 @@ -1355,13 +1355,13 @@ L102: lq.xyzw vf31, 6(vi13) | maddy.xyz vf29, vf29, vf24 mtir vi10, vf11.x | mulax.xyzw ACC, vf23, vf24 mtir vi13, vf11.y | maddy.xyz vf30, vf30, vf24 - b L118 | mulax.xyzw ACC, vf20, vf24 + b L115 | mulax.xyzw ACC, vf20, vf24 lqi.xyzw vf23, vi03 | maddy.xyzw vf31, vf31, vf24 -L103: - ibgez vi09, L104 | mulaz.xyzw ACC, vf29, vf08 +L100: + ibgez vi09, L101 | mulaz.xyzw ACC, vf29, vf08 sq.xyzw vf21, 2(vi11) | maddaz.xyzw ACC, vf30, vf11 nop | ftoi4.xyzw vf21, vf09 -L104: +L101: mfp.w vf20, P | maddz.xyz vf11, vf31, vf14 sq.xyzw vf15, 0(vi11) | miniy.xyzw vf12, vf12, vf17 sq.xyzw vf15, 0(vi14) | miniw.w vf10, vf10, vf03 @@ -1369,13 +1369,13 @@ L104: ilw.y vi09, -6(vi01) | mulw.xyzw vf13, vf13, vf20 1024.0 | ftoi0.xyzw vf12, vf12 :i erleng.xyz P, vf11 | maxi.xy vf10, vf10, I - ibne vi06, vi03, L105 | maddaw.xyzw ACC, vf26, vf11 + ibne vi06, vi03, L102 | maddaw.xyzw ACC, vf26, vf11 mr32.z vf14, vf00 | maddw.xyzw vf08, vf27, vf14 - ibne vi07, vi03, L138 | nop + ibne vi07, vi03, L135 | nop nop | nop - b L153 | nop + b L150 | nop nop | nop -L105: +L102: 3072.0 | mulax.xyzw ACC, vf01, vf13 :i lqi.xyzw vf09, vi01 | minii.xy vf10, vf10, I sq.xyzw vf12, 1(vi11) | madday.xyzw ACC, vf02, vf13 @@ -1389,17 +1389,17 @@ L105: move.xyzw vf21, vf10 | add.xyzw vf12, vf12, vf18 iand vi11, vi11, vi05 | add.xyzw vf15, vf15, vf19 nop | mulax.xyzw ACC, vf04, vf13 - ibgtz vi09, L106 | madday.xyzw ACC, vf05, vf13 + ibgtz vi09, L103 | madday.xyzw ACC, vf05, vf13 iand vi14, vi14, vi05 | maddaz.xyzw ACC, vf06, vf13 nop | addx.w vf21, vf21, vf17 -L106: - ibne vi05, vi11, L107 | maddw.xyzw vf13, vf07, vf00 +L103: + ibne vi05, vi11, L104 | maddw.xyzw vf13, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf08, vf08, Q mtir vi11, vf12.x | mul.xyzw vf14, vf14, Q mtir vi14, vf12.y | ftoi4.xyzw vf21, vf21 - b L108 | mul.xyzw vf13, vf13, vf23 + b L105 | mul.xyzw vf13, vf13, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf08, vf08, vf22 -L107: +L104: lq.xyzw vf20, 0(vi11) | mul.xyzw vf14, vf14, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf25, 0(vi14) | ftoi4.xyzw vf21, vf21 @@ -1417,13 +1417,13 @@ L107: lq.xyzw vf31, 6(vi14) | maddy.xyz vf29, vf29, vf24 mtir vi11, vf12.x | mulax.xyzw ACC, vf23, vf24 mtir vi14, vf12.y | maddy.xyz vf30, vf30, vf24 - b L123 | mulax.xyzw ACC, vf20, vf24 + b L120 | mulax.xyzw ACC, vf20, vf24 lqi.xyzw vf23, vi03 | maddy.xyzw vf31, vf31, vf24 -L108: - ibgez vi09, L109 | mulaz.xyzw ACC, vf29, vf09 +L105: + ibgez vi09, L106 | mulaz.xyzw ACC, vf29, vf09 sq.xyzw vf21, 2(vi12) | maddaz.xyzw ACC, vf30, vf12 nop | ftoi4.xyzw vf21, vf10 -L109: +L106: mfp.w vf20, P | maddz.xyz vf12, vf31, vf15 sq.xyzw vf16, 0(vi12) | miniy.xyzw vf13, vf13, vf17 sq.xyzw vf16, 0(vi15) | miniw.w vf08, vf08, vf03 @@ -1431,13 +1431,13 @@ L109: ilw.y vi09, -6(vi01) | mulw.xyzw vf11, vf11, vf20 1024.0 | ftoi0.xyzw vf13, vf13 :i erleng.xyz P, vf12 | maxi.xy vf08, vf08, I - ibne vi06, vi03, L94 | maddaw.xyzw ACC, vf26, vf12 + ibne vi06, vi03, L91 | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 - ibne vi07, vi03, L127 | nop + ibne vi07, vi03, L124 | nop nop | nop - b L163 | nop + b L160 | nop nop | nop -L110: +L107: 3072.0 | mulax.xyzw ACC, vf01, vf11 :i lqi.xyzw vf10, vi01 | minii.xy vf08, vf08, I sq.xyzw vf13, 1(vi12) | madday.xyzw ACC, vf02, vf11 @@ -1451,17 +1451,17 @@ L110: move.xyzw vf21, vf08 | add.xyzw vf13, vf13, vf18 iand vi12, vi12, vi05 | add.xyzw vf16, vf16, vf19 nop | mulax.xyzw ACC, vf04, vf11 - ibgtz vi09, L111 | madday.xyzw ACC, vf05, vf11 + ibgtz vi09, L108 | madday.xyzw ACC, vf05, vf11 iand vi15, vi15, vi05 | maddaz.xyzw ACC, vf06, vf11 nop | addx.w vf21, vf21, vf17 -L111: - ibne vi05, vi12, L112 | maddw.xyzw vf11, vf07, vf00 +L108: + ibne vi05, vi12, L109 | maddw.xyzw vf11, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf09, vf09, Q mtir vi12, vf13.x | mul.xyzw vf15, vf15, Q mtir vi15, vf13.y | ftoi4.xyzw vf21, vf21 - b L113 | mul.xyzw vf11, vf11, vf23 + b L110 | mul.xyzw vf11, vf11, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf09, vf09, vf22 -L112: +L109: lq.xyzw vf20, 0(vi12) | mul.xyzw vf15, vf15, Q lq.xyzw vf25, 0(vi15) | ftoi4.xyzw vf21, vf21 lq.xyzw vf23, 1(vi12) | mul.xyzw vf11, vf11, vf23 @@ -1479,13 +1479,13 @@ L112: lqi.xyzw vf23, vi02 | mulaz.xyzw ACC, vf23, vf24 mtir vi12, vf13.x | maddw.xyz vf30, vf30, vf24 mtir vi15, vf13.y | mulaz.xyzw ACC, vf20, vf24 - b L98 | maddw.xyzw vf31, vf31, vf24 + b L95 | maddw.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L113: - ibgez vi09, L114 | mulaz.xyzw ACC, vf29, vf10 +L110: + ibgez vi09, L111 | mulaz.xyzw ACC, vf29, vf10 sq.xyzw vf21, 2(vi10) | maddaz.xyzw ACC, vf30, vf13 nop | ftoi4.xyzw vf21, vf08 -L114: +L111: mfp.w vf20, P | maddz.xyz vf13, vf31, vf16 sq.xyzw vf14, 0(vi10) | miniy.xyzw vf11, vf11, vf17 sq.xyzw vf14, 0(vi13) | miniw.w vf09, vf09, vf03 @@ -1493,13 +1493,13 @@ L114: ilw.y vi09, -6(vi01) | mulw.xyzw vf12, vf12, vf20 1024.0 | ftoi0.xyzw vf11, vf11 :i erleng.xyz P, vf13 | maxi.xy vf09, vf09, I - ibne vi06, vi03, L115 | maddaw.xyzw ACC, vf26, vf13 + ibne vi06, vi03, L112 | maddaw.xyzw ACC, vf26, vf13 mr32.z vf16, vf00 | maddw.xyzw vf10, vf27, vf16 - ibne vi07, vi03, L133 | nop + ibne vi07, vi03, L130 | nop nop | nop - b L143 | nop + b L140 | nop nop | nop -L115: +L112: 3072.0 | mulax.xyzw ACC, vf01, vf12 :i lqi.xyzw vf08, vi01 | minii.xy vf09, vf09, I sq.xyzw vf11, 1(vi10) | madday.xyzw ACC, vf02, vf12 @@ -1513,17 +1513,17 @@ L115: move.xyzw vf21, vf09 | add.xyzw vf11, vf11, vf18 iand vi10, vi10, vi05 | add.xyzw vf14, vf14, vf19 nop | mulax.xyzw ACC, vf04, vf12 - ibgtz vi09, L116 | madday.xyzw ACC, vf05, vf12 + ibgtz vi09, L113 | madday.xyzw ACC, vf05, vf12 iand vi13, vi13, vi05 | maddaz.xyzw ACC, vf06, vf12 nop | addx.w vf21, vf21, vf17 -L116: - ibne vi05, vi10, L117 | maddw.xyzw vf12, vf07, vf00 +L113: + ibne vi05, vi10, L114 | maddw.xyzw vf12, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf10, vf10, Q mtir vi10, vf11.x | mul.xyzw vf16, vf16, Q mtir vi13, vf11.y | ftoi4.xyzw vf21, vf21 - b L118 | mul.xyzw vf12, vf12, vf23 + b L115 | mul.xyzw vf12, vf12, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf10, vf10, vf22 -L117: +L114: lq.xyzw vf20, 0(vi10) | mul.xyzw vf16, vf16, Q lq.xyzw vf25, 0(vi13) | ftoi4.xyzw vf21, vf21 lq.xyzw vf23, 1(vi10) | mul.xyzw vf12, vf12, vf23 @@ -1541,13 +1541,13 @@ L117: lqi.xyzw vf23, vi02 | mulaz.xyzw ACC, vf23, vf24 mtir vi10, vf11.x | maddw.xyz vf30, vf30, vf24 mtir vi13, vf11.y | mulaz.xyzw ACC, vf20, vf24 - b L103 | maddw.xyzw vf31, vf31, vf24 + b L100 | maddw.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L118: - ibgez vi09, L119 | mulaz.xyzw ACC, vf29, vf08 +L115: + ibgez vi09, L116 | mulaz.xyzw ACC, vf29, vf08 sq.xyzw vf21, 2(vi11) | maddaz.xyzw ACC, vf30, vf11 nop | ftoi4.xyzw vf21, vf09 -L119: +L116: mfp.w vf20, P | maddz.xyz vf11, vf31, vf14 sq.xyzw vf15, 0(vi11) | miniy.xyzw vf12, vf12, vf17 sq.xyzw vf15, 0(vi14) | miniw.w vf10, vf10, vf03 @@ -1555,13 +1555,13 @@ L119: ilw.y vi09, -6(vi01) | mulw.xyzw vf13, vf13, vf20 1024.0 | ftoi0.xyzw vf12, vf12 :i erleng.xyz P, vf11 | maxi.xy vf10, vf10, I - ibne vi06, vi03, L120 | maddaw.xyzw ACC, vf26, vf11 + ibne vi06, vi03, L117 | maddaw.xyzw ACC, vf26, vf11 mr32.z vf14, vf00 | maddw.xyzw vf08, vf27, vf14 - ibne vi07, vi03, L138 | nop + ibne vi07, vi03, L135 | nop nop | nop - b L153 | nop + b L150 | nop nop | nop -L120: +L117: 3072.0 | mulax.xyzw ACC, vf01, vf13 :i lqi.xyzw vf09, vi01 | minii.xy vf10, vf10, I sq.xyzw vf12, 1(vi11) | madday.xyzw ACC, vf02, vf13 @@ -1575,17 +1575,17 @@ L120: move.xyzw vf21, vf10 | add.xyzw vf12, vf12, vf18 iand vi11, vi11, vi05 | add.xyzw vf15, vf15, vf19 nop | mulax.xyzw ACC, vf04, vf13 - ibgtz vi09, L121 | madday.xyzw ACC, vf05, vf13 + ibgtz vi09, L118 | madday.xyzw ACC, vf05, vf13 iand vi14, vi14, vi05 | maddaz.xyzw ACC, vf06, vf13 nop | addx.w vf21, vf21, vf17 -L121: - ibne vi05, vi11, L122 | maddw.xyzw vf13, vf07, vf00 +L118: + ibne vi05, vi11, L119 | maddw.xyzw vf13, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf08, vf08, Q mtir vi11, vf12.x | mul.xyzw vf14, vf14, Q mtir vi14, vf12.y | ftoi4.xyzw vf21, vf21 - b L123 | mul.xyzw vf13, vf13, vf23 + b L120 | mul.xyzw vf13, vf13, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf08, vf08, vf22 -L122: +L119: lq.xyzw vf20, 0(vi11) | mul.xyzw vf14, vf14, Q lq.xyzw vf25, 0(vi14) | ftoi4.xyzw vf21, vf21 lq.xyzw vf23, 1(vi11) | mul.xyzw vf13, vf13, vf23 @@ -1603,13 +1603,13 @@ L122: lqi.xyzw vf23, vi02 | mulaz.xyzw ACC, vf23, vf24 mtir vi11, vf12.x | maddw.xyz vf30, vf30, vf24 mtir vi14, vf12.y | mulaz.xyzw ACC, vf20, vf24 - b L108 | maddw.xyzw vf31, vf31, vf24 + b L105 | maddw.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L123: - ibgez vi09, L124 | mulaz.xyzw ACC, vf29, vf09 +L120: + ibgez vi09, L121 | mulaz.xyzw ACC, vf29, vf09 sq.xyzw vf21, 2(vi12) | maddaz.xyzw ACC, vf30, vf12 nop | ftoi4.xyzw vf21, vf10 -L124: +L121: mfp.w vf20, P | maddz.xyz vf12, vf31, vf15 sq.xyzw vf16, 0(vi12) | miniy.xyzw vf13, vf13, vf17 sq.xyzw vf16, 0(vi15) | miniw.w vf08, vf08, vf03 @@ -1617,28 +1617,28 @@ L124: ilw.y vi09, -6(vi01) | mulw.xyzw vf11, vf11, vf20 1024.0 | ftoi0.xyzw vf13, vf13 :i erleng.xyz P, vf12 | maxi.xy vf08, vf08, I - ibne vi06, vi03, L110 | maddaw.xyzw ACC, vf26, vf12 + ibne vi06, vi03, L107 | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 - ibne vi07, vi03, L133 | nop + ibne vi07, vi03, L130 | nop nop | nop - b L163 | nop + b L160 | nop nop | nop -L125: +L122: erleng.xyz P, vf12 | maxi.xy vf08, vf08, I 3072.0 | nop :i nop | minii.xy vf08, vf08, I nop | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 -L126: +L123: lqi.xyzw vf10, vi01 | mulax.xyzw ACC, vf01, vf11 - b L128 | madday.xyzw ACC, vf02, vf11 + b L125 | madday.xyzw ACC, vf02, vf11 nop | maddz.xyzw vf11, vf03, vf11 -L127: +L124: 3072.0 | mulax.xyzw ACC, vf01, vf11 :i lqi.xyzw vf10, vi01 | minii.xy vf08, vf08, I sq.xyzw vf13, 1(vi12) | madday.xyzw ACC, vf02, vf11 sq.xyzw vf13, 1(vi15) | maddz.xyzw vf11, vf03, vf11 -L128: +L125: lqi.xyzw vf13, vi01 | add.xyzw vf09, vf09, vf28 lqi.xyzw vf16, vi01 | maxw.w vf08, vf08, vf02 mtir vi12, vf10.x | itof0.xyzw vf23, vf23 @@ -1648,17 +1648,17 @@ L128: move.xyzw vf21, vf08 | add.xyzw vf13, vf13, vf18 iand vi12, vi12, vi05 | add.xyzw vf16, vf16, vf19 ilw.w vi08, -1(vi02) | mulax.xyzw ACC, vf04, vf11 - ibgtz vi09, L129 | madday.xyzw ACC, vf05, vf11 + ibgtz vi09, L126 | madday.xyzw ACC, vf05, vf11 iand vi15, vi15, vi05 | maddaz.xyzw ACC, vf06, vf11 nop | addx.w vf21, vf21, vf17 -L129: - ibne vi05, vi12, L130 | maddw.xyzw vf11, vf07, vf00 +L126: + ibne vi05, vi12, L127 | maddw.xyzw vf11, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf09, vf09, Q mtir vi12, vf13.x | mul.xyzw vf15, vf15, Q mtir vi15, vf13.y | ftoi4.xyzw vf21, vf21 - b L131 | mul.xyzw vf11, vf11, vf23 + b L128 | mul.xyzw vf11, vf11, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf09, vf09, vf22 -L130: +L127: lq.xyzw vf20, 0(vi12) | mul.xyzw vf15, vf15, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf31, 0(vi15) | ftoi4.xyzw vf21, vf21 @@ -1686,11 +1686,11 @@ L130: mtir vi15, vf13.y | madday.xyzw ACC, vf22, vf24 lq.xyzw vf22, 2(vi00) | maddz.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L131: - ibgez vi09, L132 | mulaz.xyzw ACC, vf29, vf10 +L128: + ibgez vi09, L129 | mulaz.xyzw ACC, vf29, vf10 sq.xyzw vf21, 2(vi10) | maddaz.xyzw ACC, vf30, vf13 nop | ftoi4.xyzw vf21, vf08 -L132: +L129: mfp.w vf20, P | maddz.xyz vf13, vf31, vf16 sq.xyzw vf14, 0(vi10) | miniy.xyzw vf11, vf11, vf17 sq.xyzw vf14, 0(vi13) | miniw.w vf09, vf09, vf03 @@ -1698,9 +1698,9 @@ L132: ilw.y vi09, -6(vi01) | mulw.xyzw vf12, vf12, vf20 1024.0 | ftoi0.xyzw vf11, vf11 :i erleng.xyz P, vf13 | maxi.xy vf09, vf09, I - ibeq vi07, vi03, L143 | maddaw.xyzw ACC, vf26, vf13 + ibeq vi07, vi03, L140 | maddaw.xyzw ACC, vf26, vf13 mr32.z vf16, vf00 | maddw.xyzw vf10, vf27, vf16 -L133: +L130: 3072.0 | mulax.xyzw ACC, vf01, vf12 :i lqi.xyzw vf08, vi01 | minii.xy vf09, vf09, I sq.xyzw vf11, 1(vi10) | madday.xyzw ACC, vf02, vf12 @@ -1714,17 +1714,17 @@ L133: move.xyzw vf21, vf09 | add.xyzw vf11, vf11, vf18 iand vi10, vi10, vi05 | add.xyzw vf14, vf14, vf19 ilw.w vi08, -1(vi02) | mulax.xyzw ACC, vf04, vf12 - ibgtz vi09, L134 | madday.xyzw ACC, vf05, vf12 + ibgtz vi09, L131 | madday.xyzw ACC, vf05, vf12 iand vi13, vi13, vi05 | maddaz.xyzw ACC, vf06, vf12 nop | addx.w vf21, vf21, vf17 -L134: - ibne vi05, vi10, L135 | maddw.xyzw vf12, vf07, vf00 +L131: + ibne vi05, vi10, L132 | maddw.xyzw vf12, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf10, vf10, Q mtir vi10, vf11.x | mul.xyzw vf16, vf16, Q mtir vi13, vf11.y | ftoi4.xyzw vf21, vf21 - b L136 | mul.xyzw vf12, vf12, vf23 + b L133 | mul.xyzw vf12, vf12, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf10, vf10, vf22 -L135: +L132: lq.xyzw vf20, 0(vi10) | mul.xyzw vf16, vf16, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf31, 0(vi13) | ftoi4.xyzw vf21, vf21 @@ -1752,11 +1752,11 @@ L135: mtir vi13, vf11.y | madday.xyzw ACC, vf22, vf24 lq.xyzw vf22, 2(vi00) | maddz.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L136: - ibgez vi09, L137 | mulaz.xyzw ACC, vf29, vf08 +L133: + ibgez vi09, L134 | mulaz.xyzw ACC, vf29, vf08 sq.xyzw vf21, 2(vi11) | maddaz.xyzw ACC, vf30, vf11 nop | ftoi4.xyzw vf21, vf09 -L137: +L134: mfp.w vf20, P | maddz.xyz vf11, vf31, vf14 sq.xyzw vf15, 0(vi11) | miniy.xyzw vf12, vf12, vf17 sq.xyzw vf15, 0(vi14) | miniw.w vf10, vf10, vf03 @@ -1764,9 +1764,9 @@ L137: ilw.y vi09, -6(vi01) | mulw.xyzw vf13, vf13, vf20 1024.0 | ftoi0.xyzw vf12, vf12 :i erleng.xyz P, vf11 | maxi.xy vf10, vf10, I - ibeq vi07, vi03, L153 | maddaw.xyzw ACC, vf26, vf11 + ibeq vi07, vi03, L150 | maddaw.xyzw ACC, vf26, vf11 mr32.z vf14, vf00 | maddw.xyzw vf08, vf27, vf14 -L138: +L135: 3072.0 | mulax.xyzw ACC, vf01, vf13 :i lqi.xyzw vf09, vi01 | minii.xy vf10, vf10, I sq.xyzw vf12, 1(vi11) | madday.xyzw ACC, vf02, vf13 @@ -1780,17 +1780,17 @@ L138: move.xyzw vf21, vf10 | add.xyzw vf12, vf12, vf18 iand vi11, vi11, vi05 | add.xyzw vf15, vf15, vf19 ilw.w vi08, -1(vi02) | mulax.xyzw ACC, vf04, vf13 - ibgtz vi09, L139 | madday.xyzw ACC, vf05, vf13 + ibgtz vi09, L136 | madday.xyzw ACC, vf05, vf13 iand vi14, vi14, vi05 | maddaz.xyzw ACC, vf06, vf13 nop | addx.w vf21, vf21, vf17 -L139: - ibne vi05, vi11, L140 | maddw.xyzw vf13, vf07, vf00 +L136: + ibne vi05, vi11, L137 | maddw.xyzw vf13, vf07, vf00 ilw.x vi09, -9(vi01) | mul.xyz vf08, vf08, Q mtir vi11, vf12.x | mul.xyzw vf14, vf14, Q mtir vi14, vf12.y | ftoi4.xyzw vf21, vf21 - b L141 | mul.xyzw vf13, vf13, vf23 + b L138 | mul.xyzw vf13, vf13, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf08, vf08, vf22 -L140: +L137: lq.xyzw vf20, 0(vi11) | mul.xyzw vf14, vf14, Q nop | mulw.xyzw vf24, vf24, vf29 lq.xyzw vf31, 0(vi14) | ftoi4.xyzw vf21, vf21 @@ -1818,11 +1818,11 @@ L140: mtir vi14, vf12.y | madday.xyzw ACC, vf22, vf24 lq.xyzw vf22, 2(vi00) | maddz.xyzw vf31, vf31, vf24 lqi.xyzw vf23, vi03 | itof0.xyzw vf24, vf23 -L141: - ibgez vi09, L142 | mulaz.xyzw ACC, vf29, vf09 +L138: + ibgez vi09, L139 | mulaz.xyzw ACC, vf29, vf09 sq.xyzw vf21, 2(vi12) | maddaz.xyzw ACC, vf30, vf12 nop | ftoi4.xyzw vf21, vf10 -L142: +L139: mfp.w vf20, P | maddz.xyz vf12, vf31, vf15 sq.xyzw vf16, 0(vi12) | miniy.xyzw vf13, vf13, vf17 sq.xyzw vf16, 0(vi15) | miniw.w vf08, vf08, vf03 @@ -1830,11 +1830,11 @@ L142: ilw.y vi09, -6(vi01) | mulw.xyzw vf11, vf11, vf20 1024.0 | ftoi0.xyzw vf13, vf13 :i erleng.xyz P, vf12 | maxi.xy vf08, vf08, I - ibne vi07, vi03, L127 | maddaw.xyzw ACC, vf26, vf12 + ibne vi07, vi03, L124 | maddaw.xyzw ACC, vf26, vf12 mr32.z vf15, vf00 | maddw.xyzw vf09, vf27, vf15 - b L163 | nop + b L160 | nop nop | nop -L143: +L140: ilw.w vi08, 1(vi00) | nop xtop vi02 | mulax.xyzw ACC, vf01, vf12 sq.xyzw vf11, 1(vi10) | madday.xyzw ACC, vf02, vf12 @@ -1842,27 +1842,27 @@ L143: iaddiu vi04, vi02, 0x8c | add.xyzw vf10, vf10, vf28 ilwr.x vi05, vi04 | maxw.w vf09, vf09, vf02 ilw.w vi06, 1(vi04) | itof0.xyzw vf23, vf23 - ibne vi00, vi08, L151 | nop + ibne vi00, vi08, L148 | nop ilw.x vi07, 2(vi04) | maxx.xyzw vf12, vf12, vf00 -L144: +L141: div Q, vf01.w, vf10.w | minix.xyzw vf25, vf00, vf00 move.xyzw vf21, vf09 | minix.xyzw vf26, vf00, vf00 iadd vi05, vi05, vi04 | nop iaddiu vi04, vi02, 0x173 | mulax.xyzw ACC, vf04, vf12 - ibgtz vi09, L145 | madday.xyzw ACC, vf05, vf12 + ibgtz vi09, L142 | madday.xyzw ACC, vf05, vf12 iadd vi06, vi06, vi05 | maddaz.xyzw ACC, vf06, vf12 nop | addx.w vf21, vf21, vf17 -L145: +L142: iadd vi07, vi07, vi06 | maddw.xyzw vf12, vf07, vf00 ilw.x vi09, -6(vi01) | mul.xyz vf10, vf10, Q iaddiu vi08, vi00, 0x1ba | mul.xyzw vf16, vf16, Q isub vi08, vi08, vi02 | ftoi4.xyzw vf21, vf21 iaddiu vi08, vi08, 0x173 | mul.xyzw vf12, vf12, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf10, vf10, vf22 - ibgez vi09, L146 | nop + ibgez vi09, L143 | nop sq.xyzw vf21, 2(vi11) | nop nop | ftoi4.xyzw vf21, vf09 -L146: +L143: mfp.w vf20, P | nop sq.xyzw vf15, 0(vi11) | miniy.xyzw vf12, vf12, vf17 sq.xyzw vf15, 0(vi14) | miniw.w vf10, vf10, vf03 @@ -1876,32 +1876,32 @@ L146: sq.xyzw vf12, 1(vi11) | madday.xyzw ACC, vf02, vf13 sq.xyzw vf12, 1(vi14) | maddz.xyzw vf13, vf03, vf13 nop | nop - ibne vi00, vi02, L152 | maxw.w vf10, vf10, vf02 + ibne vi00, vi02, L149 | maxw.w vf10, vf10, vf02 nop | itof0.xyzw vf23, vf23 -L147: +L144: 8388608.0 | maxx.xyzw vf13, vf13, vf00 :i 256.0 | maxi.xy vf27, vf00, I :i move.xyzw vf21, vf10 | maxi.w vf27, vf00, I nop | nop nop | mulax.xyzw ACC, vf04, vf13 - ibgtz vi09, L148 | madday.xyzw ACC, vf05, vf13 + ibgtz vi09, L145 | madday.xyzw ACC, vf05, vf13 nop | maddaz.xyzw ACC, vf06, vf13 nop | addx.w vf21, vf21, vf17 -L148: +L145: nop | maddw.xyzw vf13, vf07, vf00 ilw.x vi09, -3(vi01) | itof0.xyzw vf25, vf25 nop | itof0.xyzw vf26, vf26 nop | ftoi4.xyzw vf21, vf21 nop | mul.xyzw vf13, vf13, vf23 ior vi02, vi05, vi00 | add.xyzw vf25, vf25, vf27 - ibgez vi09, L149 | add.xyzw vf26, vf26, vf27 + ibgez vi09, L146 | add.xyzw vf26, vf26, vf27 sq.xyzw vf21, 2(vi12) | nop nop | ftoi4.xyzw vf21, vf10 -L149: - ibne vi06, vi05, L150 | nop +L146: + ibne vi06, vi05, L147 | nop sq.xyzw vf16, 0(vi12) | miniy.xyzw vf13, vf13, vf17 ior vi06, vi07, vi00 | max.xyzw vf25, vf26, vf26 -L150: +L147: sq.xyzw vf16, 0(vi15) | nop sq.xyzw vf21, 2(vi15) | nop lqi.xyzw vf27, vi05 | nop @@ -1910,18 +1910,18 @@ L150: nop | nop nop | itof0.xyzw vf27, vf27 sq.xyzw vf13, 1(vi12) | nop - b L173 | nop + b L170 | nop sq.xyzw vf13, 1(vi15) | nop -L151: +L148: 3072.0 | miniw.w vf10, vf10, vf01 :i - b L144 | minii.xy vf09, vf09, I + b L141 | minii.xy vf09, vf09, I nop | nop -L152: +L149: 1024.0 | nop :i 3072.0 | maxi.xy vf10, vf10, I :i - b L147 | minii.xy vf10, vf10, I + b L144 | minii.xy vf10, vf10, I isw.w vi00, 1(vi00) | nop -L153: +L150: ilw.w vi08, 1(vi00) | nop xtop vi02 | mulax.xyzw ACC, vf01, vf13 sq.xyzw vf12, 1(vi11) | madday.xyzw ACC, vf02, vf13 @@ -1929,27 +1929,27 @@ L153: iaddiu vi04, vi02, 0x8c | add.xyzw vf08, vf08, vf28 ilwr.x vi05, vi04 | maxw.w vf10, vf10, vf02 ilw.w vi06, 1(vi04) | itof0.xyzw vf23, vf23 - ibne vi00, vi08, L161 | nop + ibne vi00, vi08, L158 | nop ilw.x vi07, 2(vi04) | maxx.xyzw vf13, vf13, vf00 -L154: +L151: div Q, vf01.w, vf08.w | minix.xyzw vf25, vf00, vf00 move.xyzw vf21, vf10 | minix.xyzw vf26, vf00, vf00 iadd vi05, vi05, vi04 | nop iaddiu vi04, vi02, 0x173 | mulax.xyzw ACC, vf04, vf13 - ibgtz vi09, L155 | madday.xyzw ACC, vf05, vf13 + ibgtz vi09, L152 | madday.xyzw ACC, vf05, vf13 iadd vi06, vi06, vi05 | maddaz.xyzw ACC, vf06, vf13 nop | addx.w vf21, vf21, vf17 -L155: +L152: iadd vi07, vi07, vi06 | maddw.xyzw vf13, vf07, vf00 ilw.x vi09, -6(vi01) | mul.xyz vf08, vf08, Q iaddiu vi08, vi00, 0x1ba | mul.xyzw vf14, vf14, Q isub vi08, vi08, vi02 | ftoi4.xyzw vf21, vf21 iaddiu vi08, vi08, 0x173 | mul.xyzw vf13, vf13, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf08, vf08, vf22 - ibgez vi09, L156 | nop + ibgez vi09, L153 | nop sq.xyzw vf21, 2(vi12) | nop nop | ftoi4.xyzw vf21, vf10 -L156: +L153: mfp.w vf20, P | nop sq.xyzw vf16, 0(vi12) | miniy.xyzw vf13, vf13, vf17 sq.xyzw vf16, 0(vi15) | miniw.w vf08, vf08, vf03 @@ -1963,32 +1963,32 @@ L156: sq.xyzw vf13, 1(vi12) | madday.xyzw ACC, vf02, vf11 sq.xyzw vf13, 1(vi15) | maddz.xyzw vf11, vf03, vf11 nop | nop - ibne vi00, vi02, L162 | maxw.w vf08, vf08, vf02 + ibne vi00, vi02, L159 | maxw.w vf08, vf08, vf02 nop | itof0.xyzw vf23, vf23 -L157: +L154: 8388608.0 | maxx.xyzw vf11, vf11, vf00 :i 256.0 | maxi.xy vf27, vf00, I :i move.xyzw vf21, vf08 | maxi.w vf27, vf00, I nop | nop nop | mulax.xyzw ACC, vf04, vf11 - ibgtz vi09, L158 | madday.xyzw ACC, vf05, vf11 + ibgtz vi09, L155 | madday.xyzw ACC, vf05, vf11 nop | maddaz.xyzw ACC, vf06, vf11 nop | addx.w vf21, vf21, vf17 -L158: +L155: nop | maddw.xyzw vf11, vf07, vf00 ilw.x vi09, -3(vi01) | itof0.xyzw vf25, vf25 nop | itof0.xyzw vf26, vf26 nop | ftoi4.xyzw vf21, vf21 nop | mul.xyzw vf11, vf11, vf23 ior vi02, vi05, vi00 | add.xyzw vf25, vf25, vf27 - ibgez vi09, L159 | add.xyzw vf26, vf26, vf27 + ibgez vi09, L156 | add.xyzw vf26, vf26, vf27 sq.xyzw vf21, 2(vi10) | nop nop | ftoi4.xyzw vf21, vf08 -L159: - ibne vi06, vi05, L160 | nop +L156: + ibne vi06, vi05, L157 | nop sq.xyzw vf14, 0(vi10) | miniy.xyzw vf11, vf11, vf17 ior vi06, vi07, vi00 | max.xyzw vf25, vf26, vf26 -L160: +L157: sq.xyzw vf14, 0(vi13) | nop sq.xyzw vf21, 2(vi13) | nop lqi.xyzw vf27, vi05 | nop @@ -1997,18 +1997,18 @@ L160: nop | nop nop | itof0.xyzw vf27, vf27 sq.xyzw vf11, 1(vi10) | nop - b L173 | nop + b L170 | nop sq.xyzw vf11, 1(vi13) | nop -L161: +L158: 3072.0 | miniw.w vf08, vf08, vf01 :i - b L154 | minii.xy vf10, vf10, I + b L151 | minii.xy vf10, vf10, I nop | nop -L162: +L159: 1024.0 | nop :i 3072.0 | maxi.xy vf08, vf08, I :i - b L157 | minii.xy vf08, vf08, I + b L154 | minii.xy vf08, vf08, I isw.w vi00, 1(vi00) | nop -L163: +L160: ilw.w vi08, 1(vi00) | nop xtop vi02 | mulax.xyzw ACC, vf01, vf11 sq.xyzw vf13, 1(vi12) | madday.xyzw ACC, vf02, vf11 @@ -2016,27 +2016,27 @@ L163: iaddiu vi04, vi02, 0x8c | add.xyzw vf09, vf09, vf28 ilwr.x vi05, vi04 | maxw.w vf08, vf08, vf02 ilw.w vi06, 1(vi04) | itof0.xyzw vf23, vf23 - ibne vi00, vi08, L171 | nop + ibne vi00, vi08, L168 | nop ilw.x vi07, 2(vi04) | maxx.xyzw vf11, vf11, vf00 -L164: +L161: div Q, vf01.w, vf09.w | minix.xyzw vf25, vf00, vf00 move.xyzw vf21, vf08 | minix.xyzw vf26, vf00, vf00 iadd vi05, vi05, vi04 | nop iaddiu vi04, vi02, 0x173 | mulax.xyzw ACC, vf04, vf11 - ibgtz vi09, L165 | madday.xyzw ACC, vf05, vf11 + ibgtz vi09, L162 | madday.xyzw ACC, vf05, vf11 iadd vi06, vi06, vi05 | maddaz.xyzw ACC, vf06, vf11 nop | addx.w vf21, vf21, vf17 -L165: +L162: iadd vi07, vi07, vi06 | maddw.xyzw vf11, vf07, vf00 ilw.x vi09, -6(vi01) | mul.xyz vf09, vf09, Q iaddiu vi08, vi00, 0x1ba | mul.xyzw vf15, vf15, Q isub vi08, vi08, vi02 | ftoi4.xyzw vf21, vf21 iaddiu vi08, vi08, 0x173 | mul.xyzw vf11, vf11, vf23 lqi.xyzw vf23, vi03 | add.xyzw vf09, vf09, vf22 - ibgez vi09, L166 | nop + ibgez vi09, L163 | nop sq.xyzw vf21, 2(vi10) | nop nop | ftoi4.xyzw vf21, vf08 -L166: +L163: mfp.w vf20, P | nop sq.xyzw vf14, 0(vi10) | miniy.xyzw vf11, vf11, vf17 sq.xyzw vf14, 0(vi13) | miniw.w vf09, vf09, vf03 @@ -2050,32 +2050,32 @@ L166: sq.xyzw vf11, 1(vi10) | madday.xyzw ACC, vf02, vf12 sq.xyzw vf11, 1(vi13) | maddz.xyzw vf12, vf03, vf12 nop | nop - ibne vi00, vi02, L172 | maxw.w vf09, vf09, vf02 + ibne vi00, vi02, L169 | maxw.w vf09, vf09, vf02 nop | itof0.xyzw vf23, vf23 -L167: +L164: 8388608.0 | maxx.xyzw vf12, vf12, vf00 :i 256.0 | maxi.xy vf27, vf00, I :i move.xyzw vf21, vf09 | maxi.w vf27, vf00, I nop | nop nop | mulax.xyzw ACC, vf04, vf12 - ibgtz vi09, L168 | madday.xyzw ACC, vf05, vf12 + ibgtz vi09, L165 | madday.xyzw ACC, vf05, vf12 nop | maddaz.xyzw ACC, vf06, vf12 nop | addx.w vf21, vf21, vf17 -L168: +L165: nop | maddw.xyzw vf12, vf07, vf00 ilw.x vi09, -3(vi01) | itof0.xyzw vf25, vf25 nop | itof0.xyzw vf26, vf26 nop | ftoi4.xyzw vf21, vf21 nop | mul.xyzw vf12, vf12, vf23 ior vi02, vi05, vi00 | add.xyzw vf25, vf25, vf27 - ibgez vi09, L169 | add.xyzw vf26, vf26, vf27 + ibgez vi09, L166 | add.xyzw vf26, vf26, vf27 sq.xyzw vf21, 2(vi11) | nop nop | ftoi4.xyzw vf21, vf09 -L169: - ibne vi06, vi05, L170 | nop +L166: + ibne vi06, vi05, L167 | nop sq.xyzw vf15, 0(vi11) | miniy.xyzw vf12, vf12, vf17 ior vi06, vi07, vi00 | max.xyzw vf25, vf26, vf26 -L170: +L167: sq.xyzw vf15, 0(vi14) | nop sq.xyzw vf21, 2(vi14) | nop lqi.xyzw vf27, vi05 | nop @@ -2084,23 +2084,23 @@ L170: nop | nop nop | itof0.xyzw vf27, vf27 sq.xyzw vf12, 1(vi11) | nop - b L173 | nop + b L170 | nop sq.xyzw vf12, 1(vi14) | nop -L171: +L168: 3072.0 | miniw.w vf09, vf09, vf01 :i - b L164 | minii.xy vf08, vf08, I + b L161 | minii.xy vf08, vf08, I nop | nop -L172: +L169: 1024.0 | nop :i 3072.0 | maxi.xy vf09, vf09, I :i - b L167 | minii.xy vf09, vf09, I + b L164 | minii.xy vf09, vf09, I isw.w vi00, 1(vi00) | nop -L173: - ibeq vi07, vi02, L179 | nop +L170: + ibeq vi07, vi02, L176 | nop ilw.w vi15, 132(vi00) | nop - ibne vi06, vi05, L174 | add.xyzw vf11, vf27, vf25 + ibne vi06, vi05, L171 | add.xyzw vf11, vf27, vf25 nop | nop - ibne vi07, vi06, L174 | nop + ibne vi07, vi06, L171 | nop ior vi06, vi07, vi00 | max.xyzw vf25, vf26, vf26 nop | nop nop | nop @@ -2110,9 +2110,9 @@ L173: nop | nop lq.xyzw vf16, 2(vi08) | maxx.xyzw vf15, vf11, vf00 lq.xyzw vf13, 0(vi08) | nop - b L178 | nop + b L175 | nop nop | nop -L174: +L171: lqi.xyzw vf27, vi05 | nop nop | nop mtir vi08, vf11.x | nop @@ -2121,11 +2121,11 @@ L174: nop | nop lq.xyzw vf12, 2(vi08) | maxx.xyzw vf15, vf11, vf00 lq.xyzw vf13, 0(vi08) | nop - ibne vi06, vi05, L175 | add.xyzw vf11, vf27, vf25 + ibne vi06, vi05, L172 | add.xyzw vf11, vf27, vf25 nop | nop - ibeq vi07, vi06, L177 | nop + ibeq vi07, vi06, L174 | nop ior vi06, vi07, vi00 | max.xyzw vf25, vf26, vf26 -L175: +L172: lqi.xyzw vf27, vi05 | itof15.w vf12, vf12 lq.xyzw vf14, 1(vi08) | nop mtir vi08, vf11.x | nop @@ -2134,14 +2134,14 @@ L175: sq.xyzw vf14, 1(vi09) | add.w vf12, vf12, vf15 lq.xyzw vf16, 2(vi08) | maxx.xyzw vf15, vf11, vf00 lq.xyzw vf13, 0(vi08) | nop - ibne vi06, vi05, L176 | add.xyzw vf11, vf27, vf25 + ibne vi06, vi05, L173 | add.xyzw vf11, vf27, vf25 sq.xyzw vf12, 2(vi09) | nop - ibne vi07, vi06, L176 | nop + ibne vi07, vi06, L173 | nop ior vi06, vi07, vi00 | max.xyzw vf25, vf26, vf26 move.xyzw vf12, vf16 | nop - b L177 | nop + b L174 | nop ior vi09, vi10, vi00 | nop -L176: +L173: lqi.xyzw vf27, vi05 | itof15.w vf16, vf16 lq.xyzw vf14, 1(vi08) | nop mtir vi08, vf11.x | nop @@ -2150,11 +2150,11 @@ L176: sq.xyzw vf14, 1(vi10) | add.w vf16, vf16, vf15 lq.xyzw vf12, 2(vi08) | maxx.xyzw vf15, vf11, vf00 lq.xyzw vf13, 0(vi08) | nop - ibne vi06, vi05, L175 | add.xyzw vf11, vf27, vf25 + ibne vi06, vi05, L172 | add.xyzw vf11, vf27, vf25 sq.xyzw vf16, 2(vi10) | nop - ibne vi07, vi06, L175 | nop + ibne vi07, vi06, L172 | nop ior vi06, vi07, vi00 | max.xyzw vf25, vf26, vf26 -L177: +L174: nop | itof15.w vf12, vf12 lq.xyzw vf14, 1(vi08) | nop mtir vi08, vf11.x | nop @@ -2165,7 +2165,7 @@ L177: lq.xyzw vf13, 0(vi08) | nop nop | nop sq.xyzw vf12, 2(vi09) | nop -L178: +L175: nop | itof15.w vf16, vf16 lq.xyzw vf14, 1(vi08) | nop nop | nop @@ -2176,13 +2176,13 @@ L178: nop | nop nop | nop sq.xyzw vf16, 2(vi10) | nop -L179: - ibne vi00, vi15, L180 | nop +L176: + ibne vi00, vi15, L177 | nop nop | nop xgkick vi04 | nop nop | nop :e nop | nop -L180: +L177: lq.xyzw vf20, 132(vi00) | nop lq.xyzw vf21, 1(vi00) | nop iaddi vi01, vi00, 0x1 | nop diff --git a/test/decompiler/vu_reference/tfrag-result.txt b/test/decompiler/vu_reference/tfrag-result.txt index 5971441ad5..3d13cd3fb6 100644 --- a/test/decompiler/vu_reference/tfrag-result.txt +++ b/test/decompiler/vu_reference/tfrag-result.txt @@ -1,6 +1,12 @@ - b L14 | nop + b L11 | nop nop | nop - b L13 | nop + b L10 | nop + xtop vi14 | nop + b L1 | nop + xtop vi14 | nop + b L2 | nop + xtop vi14 | nop + b L3 | nop xtop vi14 | nop b L4 | nop xtop vi14 | nop @@ -16,71 +22,65 @@ xtop vi14 | nop b L10 | nop xtop vi14 | nop - b L11 | nop - xtop vi14 | nop - b L12 | nop - xtop vi14 | nop - b L13 | nop - xtop vi14 | nop - b L13 | nop + b L10 | nop xtop vi14 | nop +L1: + b L112 | nop + nop | nop +L2: + b L127 | nop + nop | nop +L3: + bal vi15, L12 | nop + nop | nop + bal vi15, L26 | nop + nop | nop + bal vi15, L48 | nop + nop | nop + b L102 | nop + nop | nop L4: - b L2 | nop + bal vi15, L12 | nop + nop | nop + bal vi15, L18 | nop + nop | nop + b L102 | nop nop | nop L5: - b L3 | nop + b L127 | nop nop | nop L6: - bal vi15, L15 | nop + bal vi15, L12 | nop nop | nop - bal vi15, L29 | nop + bal vi15, L18 | nop nop | nop - bal vi15, L51 | nop + bal vi15, L25 | nop nop | nop - b L1 | nop + bal vi15, L47 | nop + nop | nop + b L102 | nop nop | nop L7: - bal vi15, L15 | nop + bal vi15, L13 | nop nop | nop - bal vi15, L21 | nop + bal vi15, L17 | nop nop | nop - b L1 | nop + b L102 | nop nop | nop L8: - b L3 | nop + bal vi15, L13 | nop + nop | nop + bal vi15, L17 | nop + nop | nop + b L84 | nop nop | nop L9: - bal vi15, L15 | nop - nop | nop - bal vi15, L21 | nop - nop | nop - bal vi15, L28 | nop - nop | nop - bal vi15, L50 | nop - nop | nop - b L1 | nop + b L127 | nop nop | nop L10: - bal vi15, L16 | nop - nop | nop - bal vi15, L20 | nop - nop | nop - b L1 | nop - nop | nop -L11: - bal vi15, L16 | nop - nop | nop - bal vi15, L20 | nop - nop | nop - b L87 | nop - nop | nop -L12: - b L3 | nop - nop | nop -L13: lq.xyzw vf04, 664(vi00) | nop :e nop | nop -L14: +L11: iaddiu vi14, vi00, 0x2a0 | nop iaddiu vi01, vi00, 0x350 | nop mfir.x vf03, vi14 | nop @@ -88,15 +88,15 @@ L14: mfir.z vf03, vi14 | nop mfir.w vf03, vi01 | nop :e lq.xyzw vf04, 664(vi00) | nop -L15: +L12: ilw.z vi05, 1(vi14) | nop - b L17 | nop + b L14 | nop ilwr.x vi02, vi14 | nop -L16: +L13: ilwr.z vi02, vi14 | nop ilwr.x vi04, vi14 | nop ilw.z vi05, 1(vi14) | nop - b L17 | nop + b L14 | nop iadd vi02, vi02, vi04 | nop ilwr.z vi02, vi14 | nop ilw.x vi01, 1(vi14) | nop @@ -104,7 +104,7 @@ L16: ilw.z vi05, 1(vi14) | nop iadd vi02, vi02, vi04 | nop iadd vi02, vi02, vi01 | nop -L17: +L14: iadd vi05, vi05, vi14 | nop lqi.xyzw vf12, vi05 | nop lqi.xyzw vf16, vi05 | nop @@ -124,58 +124,58 @@ L17: nop | mulaw.xyzw ACC, vf08, vf00 nop | itof0.xyzw vf14, vf14 iaddi vi06, vi05, -0x6 | itof0.xyzw vf18, vf18 -L18: +L15: lqi.xyzw vf15, vi05 | maddax.xyzw ACC, vf05, vf13 lqi.xyzw vf19, vi05 | madday.xyzw ACC, vf06, vf13 iaddi vi02, vi02, -0x1 | maddz.xyzw vf13, vf07, vf13 sqi.xyzw vf12, vi06 | mulaw.xyzw ACC, vf08, vf00 - ibeq vi00, vi02, L19 | itof0.xyzw vf15, vf15 + ibeq vi00, vi02, L16 | itof0.xyzw vf15, vf15 sqi.xyzw vf16, vi06 | itof0.xyzw vf19, vf19 lqi.xyzw vf12, vi05 | maddax.xyzw ACC, vf05, vf14 lqi.xyzw vf16, vi05 | madday.xyzw ACC, vf06, vf14 iaddi vi02, vi02, -0x1 | maddz.xyzw vf14, vf07, vf14 sqi.xyzw vf13, vi06 | mulaw.xyzw ACC, vf08, vf00 - ibeq vi00, vi02, L19 | itof0.xyzw vf12, vf12 + ibeq vi00, vi02, L16 | itof0.xyzw vf12, vf12 sqi.xyzw vf17, vi06 | itof0.xyzw vf16, vf16 lqi.xyzw vf13, vi05 | maddax.xyzw ACC, vf05, vf15 lqi.xyzw vf17, vi05 | madday.xyzw ACC, vf06, vf15 iaddi vi02, vi02, -0x1 | maddz.xyzw vf15, vf07, vf15 sqi.xyzw vf14, vi06 | mulaw.xyzw ACC, vf08, vf00 - ibeq vi00, vi02, L19 | itof0.xyzw vf13, vf13 + ibeq vi00, vi02, L16 | itof0.xyzw vf13, vf13 sqi.xyzw vf18, vi06 | itof0.xyzw vf17, vf17 lqi.xyzw vf14, vi05 | maddax.xyzw ACC, vf05, vf12 lqi.xyzw vf18, vi05 | madday.xyzw ACC, vf06, vf12 iaddi vi02, vi02, -0x1 | maddz.xyzw vf12, vf07, vf12 sqi.xyzw vf15, vi06 | mulaw.xyzw ACC, vf08, vf00 - ibne vi00, vi02, L18 | itof0.xyzw vf14, vf14 + ibne vi00, vi02, L15 | itof0.xyzw vf14, vf14 sqi.xyzw vf19, vi06 | itof0.xyzw vf18, vf18 -L19: +L16: jr vi15 | nop nop | nop -L20: +L17: ilw.x vi02, 1(vi14) | nop ilw.w vi03, 2(vi14) | nop ilw.x vi04, 4(vi14) | nop lq.xyzw vf01, 656(vi00) | nop - ibeq vi00, vi02, L24 | nop + ibeq vi00, vi02, L21 | nop lq.xyzw vf02, 657(vi00) | nop lq.xy vf18, 667(vi00) | nop - b L22 | nop + b L19 | nop lq.xy vf19, 669(vi00) | nop -L21: +L18: ilwr.z vi02, vi14 | nop ilw.y vi03, 2(vi14) | nop ilw.z vi04, 3(vi14) | nop lq.xyzw vf01, 656(vi00) | nop - ibeq vi00, vi02, L24 | nop + ibeq vi00, vi02, L21 | nop lq.xyzw vf02, 657(vi00) | nop lq.xy vf18, 666(vi00) | nop lq.xy vf19, 668(vi00) | nop -L22: +L19: lq.xyzw vf05, 5(vi14) | nop lq.xyzw vf06, 6(vi14) | nop lq.xyzw vf07, 7(vi14) | nop - ibne vi00, vi14, L25 | nop + ibne vi00, vi14, L22 | nop lq.xyzw vf08, 8(vi14) | nop ilwr.w vi05, vi03 | nop iaddi vi03, vi03, 0x1 | nop @@ -202,7 +202,7 @@ L22: ilwr.w vi08, vi07 | mini.xy vf16, vf16, vf02 nop | add.xyzw vf26, vf21, vf23 nop | itof0.xyzw vf13, vf27 -L23: +L20: ilw.z vi09, -1(vi03) | itof0.xyzw vf14, vf14 lq.xyzw vf20, 0(vi08) | maxx.xy vf16, vf16, vf00 ior vi10, vi05, vi00 | mulaw.xyzw ACC, vf08, vf00 @@ -217,7 +217,7 @@ L23: lq.xyzw vf27, 0(vi05) | maddy.xyzw vf14, vf14, vf16 sq.xyzw vf12, 0(vi10) | add.xyzw vf25, vf20, vf22 ilwr.w vi08, vi07 | mini.xy vf17, vf17, vf02 - ibeq vi00, vi02, L24 | add.xyzw vf26, vf21, vf23 + ibeq vi00, vi02, L21 | add.xyzw vf26, vf21, vf23 sq.xyzw vf14, 1(vi10) | itof0.xyzw vf12, vf27 ilwr.z vi09, vi03 | itof0.xyzw vf15, vf15 lq.xyzw vf20, 0(vi08) | maxx.xy vf17, vf17, vf00 @@ -233,7 +233,7 @@ L23: lq.xyzw vf27, 0(vi06) | maddy.xyzw vf15, vf15, vf17 sq.xyzw vf13, 0(vi10) | add.xyzw vf25, vf20, vf22 ilwr.w vi08, vi07 | mini.xy vf16, vf16, vf02 - ibeq vi00, vi02, L24 | add.xyzw vf26, vf21, vf23 + ibeq vi00, vi02, L21 | add.xyzw vf26, vf21, vf23 sq.xyzw vf15, 1(vi10) | itof0.xyzw vf13, vf27 ilw.z vi09, -1(vi03) | itof0.xyzw vf14, vf14 lq.xyzw vf20, 0(vi08) | maxx.xy vf16, vf16, vf00 @@ -249,7 +249,7 @@ L23: lq.xyzw vf27, 0(vi05) | maddy.xyzw vf14, vf14, vf16 sq.xyzw vf12, 0(vi10) | add.xyzw vf25, vf20, vf22 ilwr.w vi08, vi07 | mini.xy vf17, vf17, vf02 - ibeq vi00, vi02, L24 | add.xyzw vf26, vf21, vf23 + ibeq vi00, vi02, L21 | add.xyzw vf26, vf21, vf23 sq.xyzw vf14, 1(vi10) | itof0.xyzw vf12, vf27 ilwr.z vi09, vi03 | itof0.xyzw vf15, vf15 lq.xyzw vf20, 0(vi08) | maxx.xy vf17, vf17, vf00 @@ -265,12 +265,12 @@ L23: lq.xyzw vf27, 0(vi06) | maddy.xyzw vf15, vf15, vf17 sq.xyzw vf13, 0(vi10) | add.xyzw vf25, vf20, vf22 ilwr.w vi08, vi07 | mini.xy vf16, vf16, vf02 - ibne vi00, vi02, L23 | add.xyzw vf26, vf21, vf23 + ibne vi00, vi02, L20 | add.xyzw vf26, vf21, vf23 sq.xyzw vf15, 1(vi10) | itof0.xyzw vf13, vf27 -L24: +L21: jr vi15 | nop nop | nop -L25: +L22: ilw.w vi05, 328(vi03) | nop iaddi vi03, vi03, 0x1 | nop ilw.x vi07, 328(vi04) | nop @@ -296,7 +296,7 @@ L25: ilw.w vi08, 328(vi07) | mini.xy vf16, vf16, vf02 nop | add.xyzw vf26, vf21, vf23 nop | itof0.xyzw vf13, vf27 -L26: +L23: ilw.z vi09, 327(vi03) | itof0.xyzw vf14, vf14 lq.xyzw vf20, 328(vi08) | maxx.xy vf16, vf16, vf00 ior vi10, vi05, vi00 | mulaw.xyzw ACC, vf08, vf00 @@ -311,7 +311,7 @@ L26: lq.xyzw vf27, 328(vi05) | maddy.xyzw vf14, vf14, vf16 sq.xyzw vf12, 328(vi10) | add.xyzw vf25, vf20, vf22 ilw.w vi08, 328(vi07) | mini.xy vf17, vf17, vf02 - ibeq vi00, vi02, L27 | add.xyzw vf26, vf21, vf23 + ibeq vi00, vi02, L24 | add.xyzw vf26, vf21, vf23 sq.xyzw vf14, 329(vi10) | itof0.xyzw vf12, vf27 ilw.z vi09, 328(vi03) | itof0.xyzw vf15, vf15 lq.xyzw vf20, 328(vi08) | maxx.xy vf17, vf17, vf00 @@ -327,7 +327,7 @@ L26: lq.xyzw vf27, 328(vi06) | maddy.xyzw vf15, vf15, vf17 sq.xyzw vf13, 328(vi10) | add.xyzw vf25, vf20, vf22 ilw.w vi08, 328(vi07) | mini.xy vf16, vf16, vf02 - ibeq vi00, vi02, L27 | add.xyzw vf26, vf21, vf23 + ibeq vi00, vi02, L24 | add.xyzw vf26, vf21, vf23 sq.xyzw vf15, 329(vi10) | itof0.xyzw vf13, vf27 ilw.z vi09, 327(vi03) | itof0.xyzw vf14, vf14 lq.xyzw vf20, 328(vi08) | maxx.xy vf16, vf16, vf00 @@ -343,7 +343,7 @@ L26: lq.xyzw vf27, 328(vi05) | maddy.xyzw vf14, vf14, vf16 sq.xyzw vf12, 328(vi10) | add.xyzw vf25, vf20, vf22 ilw.w vi08, 328(vi07) | mini.xy vf17, vf17, vf02 - ibeq vi00, vi02, L27 | add.xyzw vf26, vf21, vf23 + ibeq vi00, vi02, L24 | add.xyzw vf26, vf21, vf23 sq.xyzw vf14, 329(vi10) | itof0.xyzw vf12, vf27 ilw.z vi09, 328(vi03) | itof0.xyzw vf15, vf15 lq.xyzw vf20, 328(vi08) | maxx.xy vf17, vf17, vf00 @@ -359,37 +359,37 @@ L26: lq.xyzw vf27, 328(vi06) | maddy.xyzw vf15, vf15, vf17 sq.xyzw vf13, 328(vi10) | add.xyzw vf25, vf20, vf22 ilw.w vi08, 328(vi07) | mini.xy vf16, vf16, vf02 - ibne vi00, vi02, L26 | add.xyzw vf26, vf21, vf23 + ibne vi00, vi02, L23 | add.xyzw vf26, vf21, vf23 sq.xyzw vf15, 329(vi10) | itof0.xyzw vf13, vf27 -L27: +L24: jr vi15 | nop nop | nop -L28: +L25: ilw.x vi02, 1(vi14) | nop ilw.w vi03, 2(vi14) | nop ilw.x vi04, 4(vi14) | nop lq.xyzw vf01, 656(vi00) | nop - ibeq vi00, vi02, L35 | nop + ibeq vi00, vi02, L32 | nop lq.xyzw vf02, 657(vi00) | nop lq.xy vf18, 667(vi00) | nop lq.xy vf19, 669(vi00) | nop - b L30 | nop + b L27 | nop lq.w vf26, 667(vi00) | nop -L29: +L26: ilwr.z vi02, vi14 | nop ilw.y vi03, 2(vi14) | nop ilw.z vi04, 3(vi14) | nop lq.xyzw vf01, 656(vi00) | nop - ibeq vi00, vi02, L35 | nop + ibeq vi00, vi02, L32 | nop lq.xyzw vf02, 657(vi00) | nop lq.xy vf18, 666(vi00) | nop lq.xy vf19, 668(vi00) | nop lq.w vf26, 666(vi00) | nop -L30: +L27: lq.xyzw vf05, 5(vi14) | nop lq.xyzw vf06, 6(vi14) | nop lq.xyzw vf07, 7(vi14) | nop - ibne vi00, vi14, L40 | nop + ibne vi00, vi14, L37 | nop lq.xyzw vf08, 8(vi14) | nop ilwr.w vi05, vi03 | nop nop | nop @@ -414,7 +414,7 @@ L30: nop | maddw.xy vf16, vf18, vf12 nop | add.xyzw vf24, vf20, vf22 nop | add.xyzw vf25, vf21, vf23 -L31: +L28: lq.xyzw vf13, 0(vi06) | nop ilw.z vi10, 1(vi03) | subw.w vf00, vf20, vf26 ilwr.y vi08, vi04 | subw.w vf00, vf22, vf26 @@ -428,14 +428,14 @@ L31: lq.xyzw vf23, 1(vi10) | maddz.xyzw vf13, vf07, vf13 lq.xyzw vf20, 0(vi09) | mulax.xyzw ACC, vf24, vf16 lq.xyzw vf21, 1(vi09) | maddy.xyzw vf12, vf12, vf16 - ibne vi00, vi01, L36 | mulax.xyzw ACC, vf25, vf16 + ibne vi00, vi01, L33 | mulax.xyzw ACC, vf25, vf16 lq.xyzw vf15, 1(vi06) | maddy.xyzw vf14, vf14, vf16 ilw.w vi05, 2(vi03) | nop nop | mulaw.xy ACC, vf19, vf00 sq.xyzw vf12, 0(vi11) | maddw.xy vf17, vf18, vf13 - ibeq vi00, vi02, L35 | add.xyzw vf24, vf20, vf22 + ibeq vi00, vi02, L32 | add.xyzw vf24, vf20, vf22 sq.xyzw vf14, 1(vi11) | add.xyzw vf25, vf21, vf23 -L32: +L29: lq.xyzw vf12, 0(vi05) | nop ilw.z vi10, 2(vi03) | subw.w vf00, vf20, vf26 ilwr.z vi07, vi04 | subw.w vf00, vf22, vf26 @@ -449,14 +449,14 @@ L32: lq.xyzw vf23, 1(vi10) | maddz.xyzw vf12, vf07, vf12 lq.xyzw vf20, 0(vi09) | mulax.xyzw ACC, vf24, vf17 lq.xyzw vf21, 1(vi09) | maddy.xyzw vf13, vf13, vf17 - ibne vi00, vi01, L37 | mulax.xyzw ACC, vf25, vf17 + ibne vi00, vi01, L34 | mulax.xyzw ACC, vf25, vf17 lq.xyzw vf14, 1(vi05) | maddy.xyzw vf15, vf15, vf17 ilw.w vi06, 3(vi03) | nop nop | mulaw.xy ACC, vf19, vf00 sq.xyzw vf13, 0(vi11) | maddw.xy vf16, vf18, vf12 - ibeq vi00, vi02, L35 | add.xyzw vf24, vf20, vf22 + ibeq vi00, vi02, L32 | add.xyzw vf24, vf20, vf22 sq.xyzw vf15, 1(vi11) | add.xyzw vf25, vf21, vf23 -L33: +L30: lq.xyzw vf13, 0(vi06) | nop ilw.z vi10, 3(vi03) | subw.w vf00, vf20, vf26 ilwr.w vi08, vi04 | subw.w vf00, vf22, vf26 @@ -470,14 +470,14 @@ L33: lq.xyzw vf23, 1(vi10) | maddz.xyzw vf13, vf07, vf13 lq.xyzw vf20, 0(vi09) | mulax.xyzw ACC, vf24, vf16 lq.xyzw vf21, 1(vi09) | maddy.xyzw vf12, vf12, vf16 - ibne vi00, vi01, L38 | mulax.xyzw ACC, vf25, vf16 + ibne vi00, vi01, L35 | mulax.xyzw ACC, vf25, vf16 lq.xyzw vf15, 1(vi06) | maddy.xyzw vf14, vf14, vf16 ilw.w vi05, 4(vi03) | nop iaddi vi04, vi04, 0x1 | mulaw.xy ACC, vf19, vf00 sq.xyzw vf12, 0(vi11) | maddw.xy vf17, vf18, vf13 - ibeq vi00, vi02, L35 | add.xyzw vf24, vf20, vf22 + ibeq vi00, vi02, L32 | add.xyzw vf24, vf20, vf22 sq.xyzw vf14, 1(vi11) | add.xyzw vf25, vf21, vf23 -L34: +L31: lq.xyzw vf12, 0(vi05) | nop ilw.z vi10, 4(vi03) | subw.w vf00, vf20, vf26 ilwr.x vi07, vi04 | subw.w vf00, vf22, vf26 @@ -491,49 +491,49 @@ L34: lq.xyzw vf23, 1(vi10) | maddz.xyzw vf12, vf07, vf12 lq.xyzw vf20, 0(vi09) | mulax.xyzw ACC, vf24, vf17 lq.xyzw vf21, 1(vi09) | maddy.xyzw vf13, vf13, vf17 - ibne vi00, vi01, L39 | mulax.xyzw ACC, vf25, vf17 + ibne vi00, vi01, L36 | mulax.xyzw ACC, vf25, vf17 lq.xyzw vf14, 1(vi05) | maddy.xyzw vf15, vf15, vf17 ilw.w vi06, 5(vi03) | nop iaddi vi03, vi03, 0x4 | mulaw.xy ACC, vf19, vf00 sq.xyzw vf13, 0(vi11) | maddw.xy vf16, vf18, vf12 - ibne vi00, vi02, L31 | add.xyzw vf24, vf20, vf22 + ibne vi00, vi02, L28 | add.xyzw vf24, vf20, vf22 sq.xyzw vf15, 1(vi11) | add.xyzw vf25, vf21, vf23 -L35: +L32: jr vi15 | nop nop | nop -L36: +L33: lq.xyzw vf27, 0(vi07) | nop ilw.w vi05, 2(vi03) | mulaw.xy ACC, vf19, vf00 nop | maddw.xy vf17, vf18, vf13 - ibne vi00, vi02, L32 | add.xyzw vf24, vf20, vf22 + ibne vi00, vi02, L29 | add.xyzw vf24, vf20, vf22 sq.xyzw vf27, 0(vi03) | add.xyzw vf25, vf21, vf23 jr vi15 | nop nop | nop -L37: +L34: lq.xyzw vf27, 0(vi08) | nop ilw.w vi06, 3(vi03) | mulaw.xy ACC, vf19, vf00 nop | maddw.xy vf16, vf18, vf12 - ibne vi00, vi02, L33 | add.xyzw vf24, vf20, vf22 + ibne vi00, vi02, L30 | add.xyzw vf24, vf20, vf22 sq.xyzw vf27, 1(vi03) | add.xyzw vf25, vf21, vf23 jr vi15 | nop nop | nop -L38: +L35: lq.xyzw vf27, 0(vi07) | nop ilw.w vi05, 4(vi03) | mulaw.xy ACC, vf19, vf00 iaddi vi04, vi04, 0x1 | maddw.xy vf17, vf18, vf13 - ibne vi00, vi02, L34 | add.xyzw vf24, vf20, vf22 + ibne vi00, vi02, L31 | add.xyzw vf24, vf20, vf22 sq.xyzw vf27, 2(vi03) | add.xyzw vf25, vf21, vf23 jr vi15 | nop nop | nop -L39: +L36: lq.xyzw vf27, 0(vi08) | nop ilw.w vi06, 5(vi03) | mulaw.xy ACC, vf19, vf00 iaddi vi03, vi03, 0x4 | maddw.xy vf16, vf18, vf12 - ibne vi00, vi02, L31 | add.xyzw vf24, vf20, vf22 + ibne vi00, vi02, L28 | add.xyzw vf24, vf20, vf22 sq.xyzw vf27, -1(vi03) | add.xyzw vf25, vf21, vf23 jr vi15 | nop nop | nop -L40: +L37: ilw.w vi05, 328(vi03) | nop nop | nop nop | nop @@ -557,7 +557,7 @@ L40: nop | maddw.xy vf16, vf18, vf12 nop | add.xyzw vf24, vf20, vf22 nop | add.xyzw vf25, vf21, vf23 -L41: +L38: lq.xyzw vf13, 328(vi06) | nop ilw.z vi10, 329(vi03) | subw.w vf00, vf20, vf26 ilw.y vi08, 328(vi04) | subw.w vf00, vf22, vf26 @@ -571,14 +571,14 @@ L41: lq.xyzw vf23, 329(vi10) | maddz.xyzw vf13, vf07, vf13 lq.xyzw vf20, 328(vi09) | mulax.xyzw ACC, vf24, vf16 lq.xyzw vf21, 329(vi09) | maddy.xyzw vf12, vf12, vf16 - ibne vi00, vi01, L46 | mulax.xyzw ACC, vf25, vf16 + ibne vi00, vi01, L43 | mulax.xyzw ACC, vf25, vf16 lq.xyzw vf15, 329(vi06) | maddy.xyzw vf14, vf14, vf16 ilw.w vi05, 330(vi03) | nop nop | mulaw.xy ACC, vf19, vf00 sq.xyzw vf12, 328(vi11) | maddw.xy vf17, vf18, vf13 - ibeq vi00, vi02, L45 | add.xyzw vf24, vf20, vf22 + ibeq vi00, vi02, L42 | add.xyzw vf24, vf20, vf22 sq.xyzw vf14, 329(vi11) | add.xyzw vf25, vf21, vf23 -L42: +L39: lq.xyzw vf12, 328(vi05) | nop ilw.z vi10, 330(vi03) | subw.w vf00, vf20, vf26 ilw.z vi07, 328(vi04) | subw.w vf00, vf22, vf26 @@ -592,14 +592,14 @@ L42: lq.xyzw vf23, 329(vi10) | maddz.xyzw vf12, vf07, vf12 lq.xyzw vf20, 328(vi09) | mulax.xyzw ACC, vf24, vf17 lq.xyzw vf21, 329(vi09) | maddy.xyzw vf13, vf13, vf17 - ibne vi00, vi01, L47 | mulax.xyzw ACC, vf25, vf17 + ibne vi00, vi01, L44 | mulax.xyzw ACC, vf25, vf17 lq.xyzw vf14, 329(vi05) | maddy.xyzw vf15, vf15, vf17 ilw.w vi06, 331(vi03) | nop nop | mulaw.xy ACC, vf19, vf00 sq.xyzw vf13, 328(vi11) | maddw.xy vf16, vf18, vf12 - ibeq vi00, vi02, L45 | add.xyzw vf24, vf20, vf22 + ibeq vi00, vi02, L42 | add.xyzw vf24, vf20, vf22 sq.xyzw vf15, 329(vi11) | add.xyzw vf25, vf21, vf23 -L43: +L40: lq.xyzw vf13, 328(vi06) | nop ilw.z vi10, 331(vi03) | subw.w vf00, vf20, vf26 ilw.w vi08, 328(vi04) | subw.w vf00, vf22, vf26 @@ -613,14 +613,14 @@ L43: lq.xyzw vf23, 329(vi10) | maddz.xyzw vf13, vf07, vf13 lq.xyzw vf20, 328(vi09) | mulax.xyzw ACC, vf24, vf16 lq.xyzw vf21, 329(vi09) | maddy.xyzw vf12, vf12, vf16 - ibne vi00, vi01, L48 | mulax.xyzw ACC, vf25, vf16 + ibne vi00, vi01, L45 | mulax.xyzw ACC, vf25, vf16 lq.xyzw vf15, 329(vi06) | maddy.xyzw vf14, vf14, vf16 ilw.w vi05, 332(vi03) | nop iaddi vi04, vi04, 0x1 | mulaw.xy ACC, vf19, vf00 sq.xyzw vf12, 328(vi11) | maddw.xy vf17, vf18, vf13 - ibeq vi00, vi02, L45 | add.xyzw vf24, vf20, vf22 + ibeq vi00, vi02, L42 | add.xyzw vf24, vf20, vf22 sq.xyzw vf14, 329(vi11) | add.xyzw vf25, vf21, vf23 -L44: +L41: lq.xyzw vf12, 328(vi05) | nop ilw.z vi10, 332(vi03) | subw.w vf00, vf20, vf26 ilw.x vi07, 328(vi04) | subw.w vf00, vf22, vf26 @@ -634,64 +634,64 @@ L44: lq.xyzw vf23, 329(vi10) | maddz.xyzw vf12, vf07, vf12 lq.xyzw vf20, 328(vi09) | mulax.xyzw ACC, vf24, vf17 lq.xyzw vf21, 329(vi09) | maddy.xyzw vf13, vf13, vf17 - ibne vi00, vi01, L49 | mulax.xyzw ACC, vf25, vf17 + ibne vi00, vi01, L46 | mulax.xyzw ACC, vf25, vf17 lq.xyzw vf14, 329(vi05) | maddy.xyzw vf15, vf15, vf17 ilw.w vi06, 333(vi03) | nop iaddi vi03, vi03, 0x4 | mulaw.xy ACC, vf19, vf00 sq.xyzw vf13, 328(vi11) | maddw.xy vf16, vf18, vf12 - ibne vi00, vi02, L41 | add.xyzw vf24, vf20, vf22 + ibne vi00, vi02, L38 | add.xyzw vf24, vf20, vf22 sq.xyzw vf15, 329(vi11) | add.xyzw vf25, vf21, vf23 -L45: +L42: jr vi15 | nop nop | nop -L46: +L43: lq.xyzw vf27, 328(vi07) | nop ilw.w vi05, 330(vi03) | mulaw.xy ACC, vf19, vf00 nop | maddw.xy vf17, vf18, vf13 - ibne vi00, vi02, L42 | add.xyzw vf24, vf20, vf22 + ibne vi00, vi02, L39 | add.xyzw vf24, vf20, vf22 sq.xyzw vf27, 328(vi03) | add.xyzw vf25, vf21, vf23 jr vi15 | nop nop | nop -L47: +L44: lq.xyzw vf27, 328(vi08) | nop ilw.w vi06, 331(vi03) | mulaw.xy ACC, vf19, vf00 nop | maddw.xy vf16, vf18, vf12 - ibne vi00, vi02, L43 | add.xyzw vf24, vf20, vf22 + ibne vi00, vi02, L40 | add.xyzw vf24, vf20, vf22 sq.xyzw vf27, 329(vi03) | add.xyzw vf25, vf21, vf23 jr vi15 | nop nop | nop -L48: +L45: lq.xyzw vf27, 328(vi07) | nop ilw.w vi05, 332(vi03) | mulaw.xy ACC, vf19, vf00 iaddi vi04, vi04, 0x1 | maddw.xy vf17, vf18, vf13 - ibne vi00, vi02, L44 | add.xyzw vf24, vf20, vf22 + ibne vi00, vi02, L41 | add.xyzw vf24, vf20, vf22 sq.xyzw vf27, 330(vi03) | add.xyzw vf25, vf21, vf23 jr vi15 | nop nop | nop -L49: +L46: lq.xyzw vf27, 328(vi08) | nop ilw.w vi06, 333(vi03) | mulaw.xy ACC, vf19, vf00 iaddi vi03, vi03, 0x4 | maddw.xy vf16, vf18, vf12 - ibne vi00, vi02, L41 | add.xyzw vf24, vf20, vf22 + ibne vi00, vi02, L38 | add.xyzw vf24, vf20, vf22 sq.xyzw vf27, 327(vi03) | add.xyzw vf25, vf21, vf23 jr vi15 | nop nop | nop -L50: +L47: ilw.y vi11, 1(vi14) | nop ilw.x vi02, 3(vi14) | nop ilw.y vi03, 4(vi14) | nop - b L52 | nop + b L49 | nop lq.w vf10, 667(vi00) | nop -L51: +L48: ilwr.w vi11, vi14 | nop ilw.z vi02, 2(vi14) | nop ilw.w vi03, 3(vi14) | nop lq.w vf10, 666(vi00) | nop -L52: - ibeq vi00, vi11, L62 | nop +L49: + ibeq vi00, vi11, L59 | nop iadd vi02, vi02, vi14 | nop iadd vi10, vi02, vi11 | nop - ibne vi00, vi14, L67 | nop + ibne vi00, vi14, L64 | nop iaddi vi10, vi10, 0x3 | nop lqi.xyzw vf06, vi02 | nop ilwr.x vi04, vi03 | nop @@ -707,93 +707,93 @@ L52: lq.xyzw vf07, 0(vi08) | nop nop | nop mtir vi09, vf06.z | nop -L53: +L50: lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L54: +L51: ilwr.w vi08, vi05 | nop ilwr.z vi06, vi03 | nop lq.xyzw vf08, 0(vi09) | nop fsand vi01, 0x2 | subw.w vf00, vf07, vf10 - ibne vi00, vi01, L63 | nop + ibne vi00, vi01, L60 | nop lq.xyzw vf07, 0(vi08) | nop -L55: - ibeq vi10, vi02, L62 | nop +L52: + ibeq vi10, vi02, L59 | nop mtir vi09, vf06.z | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L56: +L53: ilwr.w vi08, vi06 | nop ilwr.w vi07, vi03 | nop lq.xyzw vf08, 0(vi09) | nop fsand vi01, 0x2 | subw.w vf00, vf07, vf10 - ibne vi00, vi01, L64 | nop + ibne vi00, vi01, L61 | nop lq.xyzw vf07, 0(vi08) | nop -L57: - ibeq vi10, vi02, L62 | nop +L54: + ibeq vi10, vi02, L59 | nop mtir vi09, vf06.z | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L58: +L55: ilwr.w vi08, vi07 | nop ilw.x vi04, 1(vi03) | nop lq.xyzw vf08, 0(vi09) | nop fsand vi01, 0x2 | subw.w vf00, vf07, vf10 - ibne vi00, vi01, L65 | nop + ibne vi00, vi01, L62 | nop lq.xyzw vf07, 0(vi08) | nop -L59: - ibeq vi10, vi02, L62 | nop +L56: + ibeq vi10, vi02, L59 | nop mtir vi09, vf06.z | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L60: +L57: ilwr.w vi08, vi04 | nop ilw.y vi05, 1(vi03) | nop lq.xyzw vf08, 0(vi09) | nop fsand vi01, 0x2 | subw.w vf00, vf07, vf10 - ibne vi00, vi01, L66 | nop + ibne vi00, vi01, L63 | nop lq.xyzw vf07, 0(vi08) | nop -L61: +L58: iaddi vi03, vi03, 0x1 | nop - ibne vi10, vi02, L53 | nop + ibne vi10, vi02, L50 | nop mtir vi09, vf06.z | nop -L62: +L59: jr vi15 | nop nop | nop -L63: +L60: lq.xyzw vf09, 0(vi04) | nop fsand vi01, 0x2 | nop - ibeq vi00, vi01, L55 | nop + ibeq vi00, vi01, L52 | nop mtir vi09, vf06.z | nop - ibeq vi10, vi02, L62 | nop + ibeq vi10, vi02, L59 | nop sq.xyzw vf09, -3(vi02) | nop - b L56 | nop + b L53 | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L64: +L61: lq.xyzw vf09, 0(vi05) | nop fsand vi01, 0x2 | nop - ibeq vi00, vi01, L57 | nop + ibeq vi00, vi01, L54 | nop mtir vi09, vf06.z | nop - ibeq vi10, vi02, L62 | nop + ibeq vi10, vi02, L59 | nop sq.xyzw vf09, -3(vi02) | nop - b L58 | nop + b L55 | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L65: +L62: lq.xyzw vf09, 0(vi06) | nop fsand vi01, 0x2 | nop - ibeq vi00, vi01, L59 | nop + ibeq vi00, vi01, L56 | nop mtir vi09, vf06.z | nop - ibeq vi10, vi02, L62 | nop + ibeq vi10, vi02, L59 | nop sq.xyzw vf09, -3(vi02) | nop - b L60 | nop + b L57 | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L66: +L63: lq.xyzw vf09, 0(vi07) | nop fsand vi01, 0x2 | nop - ibeq vi00, vi01, L61 | nop + ibeq vi00, vi01, L58 | nop mtir vi09, vf06.z | nop - ibeq vi10, vi02, L62 | nop + ibeq vi10, vi02, L59 | nop sq.xyzw vf09, -3(vi02) | nop iaddi vi03, vi03, 0x1 | nop - b L54 | nop + b L51 | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L67: +L64: lqi.xyzw vf06, vi02 | nop ilw.x vi04, 328(vi03) | nop nop | nop @@ -808,95 +808,95 @@ L67: lq.xyzw vf07, 328(vi08) | nop nop | nop mtir vi09, vf06.z | nop -L68: +L65: lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L69: +L66: ilw.w vi08, 328(vi05) | nop ilw.z vi06, 328(vi03) | nop lq.xyzw vf08, 328(vi09) | nop fsand vi01, 0x2 | subw.w vf00, vf07, vf10 - ibne vi00, vi01, L78 | nop + ibne vi00, vi01, L75 | nop lq.xyzw vf07, 328(vi08) | nop -L70: - ibeq vi10, vi02, L77 | nop +L67: + ibeq vi10, vi02, L74 | nop mtir vi09, vf06.z | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L71: +L68: ilw.w vi08, 328(vi06) | nop ilw.w vi07, 328(vi03) | nop lq.xyzw vf08, 328(vi09) | nop fsand vi01, 0x2 | subw.w vf00, vf07, vf10 - ibne vi00, vi01, L79 | nop + ibne vi00, vi01, L76 | nop lq.xyzw vf07, 328(vi08) | nop -L72: - ibeq vi10, vi02, L77 | nop +L69: + ibeq vi10, vi02, L74 | nop mtir vi09, vf06.z | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L73: +L70: ilw.w vi08, 328(vi07) | nop ilw.x vi04, 329(vi03) | nop lq.xyzw vf08, 328(vi09) | nop fsand vi01, 0x2 | subw.w vf00, vf07, vf10 - ibne vi00, vi01, L80 | nop + ibne vi00, vi01, L77 | nop lq.xyzw vf07, 328(vi08) | nop -L74: - ibeq vi10, vi02, L77 | nop +L71: + ibeq vi10, vi02, L74 | nop mtir vi09, vf06.z | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L75: +L72: ilw.w vi08, 328(vi04) | nop ilw.y vi05, 329(vi03) | nop lq.xyzw vf08, 328(vi09) | nop fsand vi01, 0x2 | subw.w vf00, vf07, vf10 - ibne vi00, vi01, L81 | nop + ibne vi00, vi01, L78 | nop lq.xyzw vf07, 328(vi08) | nop -L76: +L73: iaddi vi03, vi03, 0x1 | nop - ibne vi10, vi02, L68 | nop + ibne vi10, vi02, L65 | nop mtir vi09, vf06.z | nop -L77: +L74: jr vi15 | nop nop | nop -L78: +L75: lq.xyzw vf09, 328(vi04) | nop fsand vi01, 0x2 | nop - ibeq vi00, vi01, L70 | nop + ibeq vi00, vi01, L67 | nop mtir vi09, vf06.z | nop - ibeq vi10, vi02, L77 | nop + ibeq vi10, vi02, L74 | nop sq.xyzw vf09, -3(vi02) | nop - b L71 | nop + b L68 | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L79: +L76: lq.xyzw vf09, 328(vi05) | nop fsand vi01, 0x2 | nop - ibeq vi00, vi01, L72 | nop + ibeq vi00, vi01, L69 | nop mtir vi09, vf06.z | nop - ibeq vi10, vi02, L77 | nop + ibeq vi10, vi02, L74 | nop sq.xyzw vf09, -3(vi02) | nop - b L73 | nop + b L70 | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L80: +L77: lq.xyzw vf09, 328(vi06) | nop fsand vi01, 0x2 | nop - ibeq vi00, vi01, L74 | nop + ibeq vi00, vi01, L71 | nop mtir vi09, vf06.z | nop - ibeq vi10, vi02, L77 | nop + ibeq vi10, vi02, L74 | nop sq.xyzw vf09, -3(vi02) | nop - b L75 | nop + b L72 | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L81: +L78: lq.xyzw vf09, 328(vi07) | nop fsand vi01, 0x2 | nop - ibeq vi00, vi01, L76 | nop + ibeq vi00, vi01, L73 | nop mtir vi09, vf06.z | nop - ibeq vi10, vi02, L77 | nop + ibeq vi10, vi02, L74 | nop sq.xyzw vf09, -3(vi02) | nop iaddi vi03, vi03, 0x1 | nop - b L69 | nop + b L66 | nop lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 -L82: +L79: fcset 0x0 | nop - iblez vi12, L83 | nop + iblez vi12, L80 | nop iaddi vi09, vi09, 0x1 | nop ior vi10, vi06, vi00 | nop iadd vi01, vi12, vi12 | nop @@ -906,12 +906,12 @@ L82: isw.x vi12, -1(vi06) | nop jr vi15 | nop ilwr.x vi12, vi09 | nop -L83: +L80: ilw.y vi01, -1(vi09) | nop ilw.z vi13, -1(vi09) | nop - ibeq vi00, vi12, L86 | nop + ibeq vi00, vi12, L83 | nop ilwr.x vi07, vi10 | nop - ibltz vi01, L84 | nop + ibltz vi01, L81 | nop iaddiu vi12, vi12, 0x80 | nop iadd vi13, vi13, vi08 | nop lqi.xyzw vf29, vi13 | nop @@ -933,12 +933,12 @@ L83: isw.x vi12, -1(vi06) | nop jr vi15 | nop ilwr.x vi12, vi09 | nop -L84: +L81: mtir vi01, vf03.x | nop mtir vi06, vf03.y | nop mr32.xyzw vf03, vf03 | nop iadd vi07, vi07, vi11 | nop - ibgez vi13, L85 | nop + ibgez vi13, L82 | nop iswr.x vi07, vi10 | nop xgkick vi01 | nop ior vi10, vi06, vi00 | nop @@ -949,7 +949,7 @@ L84: isw.x vi12, -1(vi06) | nop jr vi15 | nop ilwr.x vi12, vi09 | nop -L85: +L82: iadd vi13, vi13, vi08 | nop xgkick vi01 | nop lqi.xyzw vf29, vi13 | nop @@ -971,7 +971,7 @@ L85: isw.x vi12, -1(vi06) | nop jr vi15 | nop ilwr.x vi12, vi09 | nop -L86: +L83: mtir vi01, vf03.x | nop mr32.xyzw vf03, vf03 | nop iadd vi07, vi07, vi11 | nop @@ -979,7 +979,7 @@ L86: xgkick vi01 | nop lq.xyzw vf04, 664(vi00) | nop :e nop | nop -L87: +L84: ilw.w vi08, 4(vi14) | addw.z vf25, vf00, vf00 lq.xyzw vf06, 658(vi00) | nop lq.xyzw vf07, 661(vi00) | nop @@ -991,7 +991,7 @@ L87: ilw.y vi03, 3(vi14) | nop lq.xyzw vf08, 662(vi00) | nop iadd vi09, vi09, vi14 | addw.z vf02, vf00, vf29 - ibne vi00, vi14, L96 | nop + ibne vi00, vi14, L93 | nop mtir vi06, vf03.x | addw.z vf25, vf25, vf29 ilwr.x vi12, vi09 | nop ilwr.z vi13, vi09 | nop @@ -1035,13 +1035,13 @@ L87: lq.xyzw vf19, 1(vi04) | clipw.xyz vf13, vf13 iaddi vi09, vi09, 0x1 | mul.xyz vf10, vf10, Q ilwr.x vi12, vi09 | mul.xyz vf22, vf22, Q -L88: +L85: ilwr.w vi02, vi03 | maxy.w vf09, vf09, vf01 fcand vi01, 0x3ffff | subz.xyz vf23, vf25, vf02 - ibeq vi00, vi01, L89 | nop + ibeq vi00, vi01, L86 | nop div Q, vf01.x, vf11.w | ftoi0.xyzw vf17, vf17 nop | addw.w vf09, vf09, vf01 -L89: +L86: ilwr.w vi04, vi02 | add.xyzw vf10, vf10, vf07 lq.xy vf25, 0(vi02) | mul.xyzw vf15, vf11, vf08 sqi.xyzw vf21, vi06 | nop @@ -1049,62 +1049,62 @@ L89: iaddi vi03, vi03, 0x1 | mul.xyzw vf18, vf18, vf04 lq.xyzw vf12, 0(vi04) | miniz.w vf10, vf10, vf01 lq.xyzw vf20, 1(vi04) | clipw.xyz vf14, vf14 - ibeq vi05, vi06, L93 | mul.xyz vf11, vf11, Q + ibeq vi05, vi06, L90 | mul.xyz vf11, vf11, Q sqi.xyzw vf09, vi06 | mul.xyz vf23, vf23, Q ilwr.x vi02, vi03 | maxy.w vf10, vf10, vf01 fcand vi01, 0x3ffff | subz.xyz vf24, vf25, vf02 - ibeq vi00, vi01, L90 | nop + ibeq vi00, vi01, L87 | nop div Q, vf01.x, vf12.w | ftoi0.xyzw vf18, vf18 nop | addw.w vf10, vf10, vf01 -L90: +L87: ilwr.w vi04, vi02 | add.xyzw vf11, vf11, vf07 lq.xy vf25, 0(vi02) | mul.xyzw vf16, vf12, vf08 sqi.xyzw vf22, vi06 | mul.xyzw vf19, vf19, vf04 sqi.xyzw vf18, vi06 | ftoi4.xyzw vf10, vf10 lq.xyzw vf09, 0(vi04) | miniz.w vf11, vf11, vf01 lq.xyzw vf17, 1(vi04) | clipw.xyz vf15, vf15 - ibeq vi05, vi06, L94 | mul.xyz vf12, vf12, Q + ibeq vi05, vi06, L91 | mul.xyz vf12, vf12, Q sqi.xyzw vf10, vi06 | mul.xyz vf24, vf24, Q ilwr.y vi02, vi03 | maxy.w vf11, vf11, vf01 fcand vi01, 0x3ffff | subz.xyz vf21, vf25, vf02 - ibeq vi00, vi01, L91 | ftoi0.xyzw vf19, vf19 + ibeq vi00, vi01, L88 | ftoi0.xyzw vf19, vf19 div Q, vf01.x, vf09.w | nop nop | addw.w vf11, vf11, vf01 -L91: +L88: ilwr.w vi04, vi02 | add.xyzw vf12, vf12, vf07 lq.xy vf25, 0(vi02) | mul.xyzw vf13, vf09, vf08 sqi.xyzw vf23, vi06 | mul.xyzw vf20, vf20, vf04 sqi.xyzw vf19, vi06 | ftoi4.xyzw vf11, vf11 lq.xyzw vf10, 0(vi04) | miniz.w vf12, vf12, vf01 lq.xyzw vf18, 1(vi04) | clipw.xyz vf16, vf16 - ibeq vi05, vi06, L95 | mul.xyz vf09, vf09, Q + ibeq vi05, vi06, L92 | mul.xyz vf09, vf09, Q sqi.xyzw vf11, vi06 | mul.xyz vf21, vf21, Q ilwr.z vi02, vi03 | maxy.w vf12, vf12, vf01 fcand vi01, 0x3ffff | subz.xyz vf22, vf25, vf02 - ibeq vi00, vi01, L92 | ftoi0.xyzw vf20, vf20 + ibeq vi00, vi01, L89 | ftoi0.xyzw vf20, vf20 div Q, vf01.x, vf10.w | nop nop | addw.w vf12, vf12, vf01 -L92: +L89: ilwr.w vi04, vi02 | add.xyzw vf09, vf09, vf07 lq.xy vf25, 0(vi02) | mul.xyzw vf14, vf10, vf08 sqi.xyzw vf24, vi06 | mul.xyzw vf17, vf17, vf04 sqi.xyzw vf20, vi06 | ftoi4.xyzw vf12, vf12 lq.xyzw vf11, 0(vi04) | miniz.w vf09, vf09, vf01 lq.xyzw vf19, 1(vi04) | clipw.xyz vf13, vf13 - ibne vi05, vi06, L88 | mul.xyz vf10, vf10, Q + ibne vi05, vi06, L85 | mul.xyz vf10, vf10, Q sqi.xyzw vf12, vi06 | mul.xyz vf22, vf22, Q - b L82 | nop + b L79 | nop iaddiu vi15, vi00, 0x3b9 | nop -L93: - b L82 | nop +L90: + b L79 | nop iaddiu vi15, vi00, 0x3c7 | nop -L94: - b L82 | nop +L91: + b L79 | nop iaddiu vi15, vi00, 0x3d4 | nop -L95: - b L82 | nop +L92: + b L79 | nop iaddiu vi15, vi00, 0x3e1 | nop -L96: +L93: ilwr.x vi12, vi09 | nop ilwr.z vi13, vi09 | nop iaddiu vi11, vi00, 0x4000 | nop @@ -1147,13 +1147,13 @@ L96: lq.xyzw vf19, 329(vi04) | clipw.xyz vf13, vf13 iaddi vi09, vi09, 0x1 | mul.xyz vf10, vf10, Q ilwr.x vi12, vi09 | mul.xyz vf22, vf22, Q -L97: +L94: ilw.w vi02, 328(vi03) | maxy.w vf09, vf09, vf01 fcand vi01, 0x3ffff | subz.xyz vf23, vf25, vf02 - ibeq vi00, vi01, L98 | nop + ibeq vi00, vi01, L95 | nop div Q, vf01.x, vf11.w | ftoi0.xyzw vf17, vf17 nop | addw.w vf09, vf09, vf01 -L98: +L95: ilw.w vi04, 328(vi02) | add.xyzw vf10, vf10, vf07 lq.xy vf25, 328(vi02) | mul.xyzw vf15, vf11, vf08 sqi.xyzw vf21, vi06 | nop @@ -1161,61 +1161,62 @@ L98: iaddi vi03, vi03, 0x1 | mul.xyzw vf18, vf18, vf04 lq.xyzw vf12, 328(vi04) | miniz.w vf10, vf10, vf01 lq.xyzw vf20, 329(vi04) | clipw.xyz vf14, vf14 - ibeq vi05, vi06, L102 | mul.xyz vf11, vf11, Q + ibeq vi05, vi06, L99 | mul.xyz vf11, vf11, Q sqi.xyzw vf09, vi06 | mul.xyz vf23, vf23, Q ilw.x vi02, 328(vi03) | maxy.w vf10, vf10, vf01 fcand vi01, 0x3ffff | subz.xyz vf24, vf25, vf02 - ibeq vi00, vi01, L99 | nop + ibeq vi00, vi01, L96 | nop div Q, vf01.x, vf12.w | ftoi0.xyzw vf18, vf18 nop | addw.w vf10, vf10, vf01 -L99: +L96: ilw.w vi04, 328(vi02) | add.xyzw vf11, vf11, vf07 lq.xy vf25, 328(vi02) | mul.xyzw vf16, vf12, vf08 sqi.xyzw vf22, vi06 | mul.xyzw vf19, vf19, vf04 sqi.xyzw vf18, vi06 | ftoi4.xyzw vf10, vf10 lq.xyzw vf09, 328(vi04) | miniz.w vf11, vf11, vf01 lq.xyzw vf17, 329(vi04) | clipw.xyz vf15, vf15 - ibeq vi05, vi06, L103 | mul.xyz vf12, vf12, Q + ibeq vi05, vi06, L100 | mul.xyz vf12, vf12, Q sqi.xyzw vf10, vi06 | mul.xyz vf24, vf24, Q ilw.y vi02, 328(vi03) | maxy.w vf11, vf11, vf01 fcand vi01, 0x3ffff | subz.xyz vf21, vf25, vf02 - ibeq vi00, vi01, L100 | ftoi0.xyzw vf19, vf19 + ibeq vi00, vi01, L97 | ftoi0.xyzw vf19, vf19 div Q, vf01.x, vf09.w | nop nop | addw.w vf11, vf11, vf01 -L100: +L97: ilw.w vi04, 328(vi02) | add.xyzw vf12, vf12, vf07 lq.xy vf25, 328(vi02) | mul.xyzw vf13, vf09, vf08 sqi.xyzw vf23, vi06 | mul.xyzw vf20, vf20, vf04 sqi.xyzw vf19, vi06 | ftoi4.xyzw vf11, vf11 lq.xyzw vf10, 328(vi04) | miniz.w vf12, vf12, vf01 lq.xyzw vf18, 329(vi04) | clipw.xyz vf16, vf16 - ibeq vi05, vi06, L104 | mul.xyz vf09, vf09, Q + ibeq vi05, vi06, L101 | mul.xyz vf09, vf09, Q sqi.xyzw vf11, vi06 | mul.xyz vf21, vf21, Q ilw.z vi02, 328(vi03) | maxy.w vf12, vf12, vf01 fcand vi01, 0x3ffff | subz.xyz vf22, vf25, vf02 - ibeq vi00, vi01, L101 | ftoi0.xyzw vf20, vf20 + ibeq vi00, vi01, L98 | ftoi0.xyzw vf20, vf20 div Q, vf01.x, vf10.w | nop nop | addw.w vf12, vf12, vf01 -L101: +L98: ilw.w vi04, 328(vi02) | add.xyzw vf09, vf09, vf07 lq.xy vf25, 328(vi02) | mul.xyzw vf14, vf10, vf08 sqi.xyzw vf24, vi06 | mul.xyzw vf17, vf17, vf04 sqi.xyzw vf20, vi06 | ftoi4.xyzw vf12, vf12 lq.xyzw vf11, 328(vi04) | miniz.w vf09, vf09, vf01 lq.xyzw vf19, 329(vi04) | clipw.xyz vf13, vf13 - ibne vi05, vi06, L97 | mul.xyz vf10, vf10, Q + ibne vi05, vi06, L94 | mul.xyz vf10, vf10, Q sqi.xyzw vf12, vi06 | mul.xyz vf22, vf22, Q - b L82 | nop + b L79 | nop iaddiu vi15, vi00, 0x420 | nop -L102: - b L82 | nop +L99: + b L79 | nop iaddiu vi15, vi00, 0x42e | nop -L103: - b L82 | nop +L100: + b L79 | nop iaddiu vi15, vi00, 0x43b | nop -L104: - b L82 | nop +L101: + b L79 | nop iaddiu vi15, vi00, 0x448 | nop +L102: lq.xyzw vf05, 660(vi00) | addw.z vf13, vf00, vf00 lq.xyzw vf06, 658(vi00) | nop ilw.w vi08, 4(vi14) | nop @@ -1224,7 +1225,7 @@ L104: lq.xyzw vf07, 661(vi00) | nop iadd vi08, vi08, vi14 | nop iadd vi09, vi09, vi14 | nop - ibne vi00, vi14, L109 | nop + ibne vi00, vi14, L107 | nop mtir vi06, vf03.x | nop ilwr.x vi12, vi09 | nop ilwr.z vi13, vi09 | nop @@ -1268,7 +1269,7 @@ L104: lq.xyzw vf11, 0(vi04) | nop iaddi vi09, vi09, 0x1 | mul.xyz vf10, vf10, Q ilwr.x vi12, vi09 | mul.xyz vf19, vf19, Q -L105: +L103: iaddi vi03, vi03, 0x1 | nop lq.xyzw vf16, 1(vi04) | miniz.w vf09, vf09, vf01 div Q, vf01.x, vf11.w | subz.xyz vf20, vf13, vf02 @@ -1278,7 +1279,7 @@ L105: ilwr.x vi02, vi03 | mul.xyzw vf16, vf16, vf04 sqi.xyzw vf14, vi06 | maxy.w vf10, vf10, vf01 lq.xyzw vf12, 0(vi04) | nop - ibeq vi05, vi06, L106 | mul.xyz vf11, vf11, Q + ibeq vi05, vi06, L104 | mul.xyz vf11, vf11, Q sqi.xyzw vf09, vi06 | mul.xyz vf20, vf20, Q lq.xyzw vf17, 1(vi04) | miniz.w vf10, vf10, vf01 div Q, vf01.x, vf12.w | subz.xyz vf21, vf13, vf02 @@ -1288,7 +1289,7 @@ L105: ilwr.y vi02, vi03 | mul.xyzw vf17, vf17, vf04 sqi.xyzw vf15, vi06 | maxy.w vf11, vf11, vf01 lq.xyzw vf09, 0(vi04) | nop - ibeq vi05, vi06, L107 | mul.xyz vf12, vf12, Q + ibeq vi05, vi06, L105 | mul.xyz vf12, vf12, Q sqi.xyzw vf10, vi06 | mul.xyz vf21, vf21, Q lq.xyzw vf14, 1(vi04) | miniz.w vf11, vf11, vf01 div Q, vf01.x, vf09.w | subz.xyz vf18, vf13, vf02 @@ -1298,7 +1299,7 @@ L105: ilwr.z vi02, vi03 | mul.xyzw vf14, vf14, vf04 sqi.xyzw vf16, vi06 | maxy.w vf12, vf12, vf01 lq.xyzw vf10, 0(vi04) | nop - ibeq vi05, vi06, L108 | mul.xyz vf09, vf09, Q + ibeq vi05, vi06, L106 | mul.xyz vf09, vf09, Q sqi.xyzw vf11, vi06 | mul.xyz vf18, vf18, Q lq.xyzw vf15, 1(vi04) | miniz.w vf12, vf12, vf01 div Q, vf01.x, vf10.w | subz.xyz vf19, vf13, vf02 @@ -1308,20 +1309,20 @@ L105: ilwr.w vi02, vi03 | mul.xyzw vf15, vf15, vf04 sqi.xyzw vf17, vi06 | maxy.w vf09, vf09, vf01 lq.xyzw vf11, 0(vi04) | nop - ibne vi05, vi06, L105 | mul.xyz vf10, vf10, Q + ibne vi05, vi06, L103 | mul.xyz vf10, vf10, Q sqi.xyzw vf12, vi06 | mul.xyz vf19, vf19, Q - b L82 | nop + b L79 | nop iaddiu vi15, vi00, 0x491 | nop -L106: - b L82 | nop +L104: + b L79 | nop iaddiu vi15, vi00, 0x49c | nop -L107: - b L82 | nop +L105: + b L79 | nop iaddiu vi15, vi00, 0x4a6 | nop -L108: - b L82 | nop +L106: + b L79 | nop iaddiu vi15, vi00, 0x4b0 | nop -L109: +L107: ilwr.x vi12, vi09 | nop ilwr.z vi13, vi09 | nop iaddiu vi11, vi00, 0x4000 | nop @@ -1364,7 +1365,7 @@ L109: lq.xyzw vf11, 328(vi04) | nop iaddi vi09, vi09, 0x1 | mul.xyz vf10, vf10, Q ilwr.x vi12, vi09 | mul.xyz vf19, vf19, Q -L110: +L108: iaddi vi03, vi03, 0x1 | nop lq.xyzw vf16, 329(vi04) | miniz.w vf09, vf09, vf01 div Q, vf01.x, vf11.w | subz.xyz vf20, vf13, vf02 @@ -1374,7 +1375,7 @@ L110: ilw.x vi02, 328(vi03) | mul.xyzw vf16, vf16, vf04 sqi.xyzw vf14, vi06 | maxy.w vf10, vf10, vf01 lq.xyzw vf12, 328(vi04) | nop - ibeq vi05, vi06, L111 | mul.xyz vf11, vf11, Q + ibeq vi05, vi06, L109 | mul.xyz vf11, vf11, Q sqi.xyzw vf09, vi06 | mul.xyz vf20, vf20, Q lq.xyzw vf17, 329(vi04) | miniz.w vf10, vf10, vf01 div Q, vf01.x, vf12.w | subz.xyz vf21, vf13, vf02 @@ -1384,7 +1385,7 @@ L110: ilw.y vi02, 328(vi03) | mul.xyzw vf17, vf17, vf04 sqi.xyzw vf15, vi06 | maxy.w vf11, vf11, vf01 lq.xyzw vf09, 328(vi04) | nop - ibeq vi05, vi06, L112 | mul.xyz vf12, vf12, Q + ibeq vi05, vi06, L110 | mul.xyz vf12, vf12, Q sqi.xyzw vf10, vi06 | mul.xyz vf21, vf21, Q lq.xyzw vf14, 329(vi04) | miniz.w vf11, vf11, vf01 div Q, vf01.x, vf09.w | subz.xyz vf18, vf13, vf02 @@ -1394,7 +1395,7 @@ L110: ilw.z vi02, 328(vi03) | mul.xyzw vf14, vf14, vf04 sqi.xyzw vf16, vi06 | maxy.w vf12, vf12, vf01 lq.xyzw vf10, 328(vi04) | nop - ibeq vi05, vi06, L113 | mul.xyz vf09, vf09, Q + ibeq vi05, vi06, L111 | mul.xyz vf09, vf09, Q sqi.xyzw vf11, vi06 | mul.xyz vf18, vf18, Q lq.xyzw vf15, 329(vi04) | miniz.w vf12, vf12, vf01 div Q, vf01.x, vf10.w | subz.xyz vf19, vf13, vf02 @@ -1404,19 +1405,20 @@ L110: ilw.w vi02, 328(vi03) | mul.xyzw vf15, vf15, vf04 sqi.xyzw vf17, vi06 | maxy.w vf09, vf09, vf01 lq.xyzw vf11, 328(vi04) | nop - ibne vi05, vi06, L110 | mul.xyz vf10, vf10, Q + ibne vi05, vi06, L108 | mul.xyz vf10, vf10, Q sqi.xyzw vf12, vi06 | mul.xyz vf19, vf19, Q - b L82 | nop + b L79 | nop iaddiu vi15, vi00, 0x4ec | nop -L111: - b L82 | nop +L109: + b L79 | nop iaddiu vi15, vi00, 0x4f7 | nop -L112: - b L82 | nop +L110: + b L79 | nop iaddiu vi15, vi00, 0x501 | nop -L113: - b L82 | nop +L111: + b L79 | nop iaddiu vi15, vi00, 0x50b | nop +L112: lq.xyzw vf05, 660(vi00) | nop lq.xyzw vf06, 658(vi00) | nop lq.xyzw vf25, 661(vi00) | nop @@ -1431,7 +1433,7 @@ L113: lq.xyzw vf10, 8(vi14) | nop iadd vi08, vi08, vi14 | nop iadd vi09, vi09, vi14 | nop - ibne vi00, vi14, L118 | nop + ibne vi00, vi14, L117 | nop mtir vi06, vf03.x | nop ilwr.x vi02, vi03 | maxw.xyzw vf24, vf00, vf00 ilwr.x vi12, vi09 | nop @@ -1481,7 +1483,7 @@ L113: isw.x vi12, -1(vi06) | maddax.xyzw ACC, vf07, vf13 iaddi vi09, vi09, 0x1 | madday.xyzw ACC, vf08, vf13 ilwr.x vi12, vi09 | maddz.xyzw vf13, vf09, vf13 -L114: +L113: ilwr.x vi02, vi03 | mul.xyz vf12, vf12, Q lq.xyzw vf14, 0(vi04) | mul.xyz vf20, vf20, Q lq.xyzw vf18, 1(vi04) | miniz.w vf11, vf11, vf01 @@ -1492,7 +1494,7 @@ L114: sqi.xyzw vf15, vi06 | mulaw.xyzw ACC, vf10, vf00 mtir vi04, vf23.w | maxy.w vf12, vf12, vf01 move.z vf21, vf24 | maddax.xyzw ACC, vf07, vf14 - ibeq vi05, vi06, L115 | madday.xyzw ACC, vf08, vf14 + ibeq vi05, vi06, L114 | madday.xyzw ACC, vf08, vf14 sqi.xyzw vf11, vi06 | maddz.xyzw vf14, vf09, vf14 ilwr.y vi02, vi03 | mul.xyz vf13, vf13, Q lq.xyzw vf11, 0(vi04) | mul.xyz vf21, vf21, Q @@ -1504,7 +1506,7 @@ L114: sqi.xyzw vf16, vi06 | mulaw.xyzw ACC, vf10, vf00 mtir vi04, vf23.w | maxy.w vf13, vf13, vf01 move.z vf22, vf24 | maddax.xyzw ACC, vf07, vf11 - ibeq vi05, vi06, L116 | madday.xyzw ACC, vf08, vf11 + ibeq vi05, vi06, L115 | madday.xyzw ACC, vf08, vf11 sqi.xyzw vf12, vi06 | maddz.xyzw vf11, vf09, vf11 ilwr.z vi02, vi03 | mul.xyz vf14, vf14, Q lq.xyzw vf12, 0(vi04) | mul.xyz vf22, vf22, Q @@ -1516,7 +1518,7 @@ L114: sqi.xyzw vf17, vi06 | mulaw.xyzw ACC, vf10, vf00 mtir vi04, vf23.w | maxy.w vf14, vf14, vf01 move.z vf19, vf24 | maddax.xyzw ACC, vf07, vf12 - ibeq vi05, vi06, L117 | madday.xyzw ACC, vf08, vf12 + ibeq vi05, vi06, L116 | madday.xyzw ACC, vf08, vf12 sqi.xyzw vf13, vi06 | maddz.xyzw vf12, vf09, vf12 ilwr.w vi02, vi03 | mul.xyz vf11, vf11, Q lq.xyzw vf13, 0(vi04) | mul.xyz vf19, vf19, Q @@ -1528,20 +1530,20 @@ L114: sqi.xyzw vf18, vi06 | mulaw.xyzw ACC, vf10, vf00 mtir vi04, vf23.w | maxy.w vf11, vf11, vf01 move.z vf20, vf24 | maddax.xyzw ACC, vf07, vf13 - ibne vi05, vi06, L114 | madday.xyzw ACC, vf08, vf13 + ibne vi05, vi06, L113 | madday.xyzw ACC, vf08, vf13 sqi.xyzw vf14, vi06 | maddz.xyzw vf13, vf09, vf13 - b L82 | nop + b L79 | nop iaddiu vi15, vi00, 0x55d | nop -L115: - b L82 | nop +L114: + b L79 | nop iaddiu vi15, vi00, 0x569 | nop -L116: - b L82 | nop +L115: + b L79 | nop iaddiu vi15, vi00, 0x575 | nop -L117: - b L82 | nop +L116: + b L79 | nop iaddiu vi15, vi00, 0x581 | nop -L118: +L117: ilw.x vi02, 328(vi03) | maxw.xyzw vf24, vf00, vf00 ilwr.x vi12, vi09 | nop ilwr.z vi13, vi09 | nop @@ -1590,7 +1592,7 @@ L118: isw.x vi12, -1(vi06) | maddax.xyzw ACC, vf07, vf13 iaddi vi09, vi09, 0x1 | madday.xyzw ACC, vf08, vf13 ilwr.x vi12, vi09 | maddz.xyzw vf13, vf09, vf13 -L119: +L118: ilw.x vi02, 328(vi03) | mul.xyz vf12, vf12, Q lq.xyzw vf14, 328(vi04) | mul.xyz vf20, vf20, Q lq.xyzw vf18, 329(vi04) | miniz.w vf11, vf11, vf01 @@ -1601,7 +1603,7 @@ L119: sqi.xyzw vf15, vi06 | mulaw.xyzw ACC, vf10, vf00 mtir vi04, vf23.w | maxy.w vf12, vf12, vf01 move.z vf21, vf24 | maddax.xyzw ACC, vf07, vf14 - ibeq vi05, vi06, L120 | madday.xyzw ACC, vf08, vf14 + ibeq vi05, vi06, L119 | madday.xyzw ACC, vf08, vf14 sqi.xyzw vf11, vi06 | maddz.xyzw vf14, vf09, vf14 ilw.y vi02, 328(vi03) | mul.xyz vf13, vf13, Q lq.xyzw vf11, 328(vi04) | mul.xyz vf21, vf21, Q @@ -1613,7 +1615,7 @@ L119: sqi.xyzw vf16, vi06 | mulaw.xyzw ACC, vf10, vf00 mtir vi04, vf23.w | maxy.w vf13, vf13, vf01 move.z vf22, vf24 | maddax.xyzw ACC, vf07, vf11 - ibeq vi05, vi06, L121 | madday.xyzw ACC, vf08, vf11 + ibeq vi05, vi06, L120 | madday.xyzw ACC, vf08, vf11 sqi.xyzw vf12, vi06 | maddz.xyzw vf11, vf09, vf11 ilw.z vi02, 328(vi03) | mul.xyz vf14, vf14, Q lq.xyzw vf12, 328(vi04) | mul.xyz vf22, vf22, Q @@ -1625,7 +1627,7 @@ L119: sqi.xyzw vf17, vi06 | mulaw.xyzw ACC, vf10, vf00 mtir vi04, vf23.w | maxy.w vf14, vf14, vf01 move.z vf19, vf24 | maddax.xyzw ACC, vf07, vf12 - ibeq vi05, vi06, L122 | madday.xyzw ACC, vf08, vf12 + ibeq vi05, vi06, L121 | madday.xyzw ACC, vf08, vf12 sqi.xyzw vf13, vi06 | maddz.xyzw vf12, vf09, vf12 ilw.w vi02, 328(vi03) | mul.xyz vf11, vf11, Q lq.xyzw vf13, 328(vi04) | mul.xyz vf19, vf19, Q @@ -1637,23 +1639,23 @@ L119: sqi.xyzw vf18, vi06 | mulaw.xyzw ACC, vf10, vf00 mtir vi04, vf23.w | maxy.w vf11, vf11, vf01 move.z vf20, vf24 | maddax.xyzw ACC, vf07, vf13 - ibne vi05, vi06, L119 | madday.xyzw ACC, vf08, vf13 + ibne vi05, vi06, L118 | madday.xyzw ACC, vf08, vf13 sqi.xyzw vf14, vi06 | maddz.xyzw vf13, vf09, vf13 - b L82 | nop + b L79 | nop iaddiu vi15, vi00, 0x5c5 | nop -L120: - b L82 | nop +L119: + b L79 | nop iaddiu vi15, vi00, 0x5d1 | nop -L121: - b L82 | nop +L120: + b L79 | nop iaddiu vi15, vi00, 0x5dd | nop -L122: - b L82 | nop +L121: + b L79 | nop iaddiu vi15, vi00, 0x5e9 | nop -L123: +L122: fcset 0x0 | nop iaddi vi07, vi00, -0x1 | nop - iblez vi12, L124 | nop + iblez vi12, L123 | nop iaddi vi09, vi09, 0x1 | nop ior vi10, vi06, vi00 | nop iadd vi01, vi12, vi12 | nop @@ -1663,12 +1665,12 @@ L123: isw.x vi12, -1(vi06) | nop jr vi15 | nop ilwr.x vi12, vi09 | nop -L124: +L123: ilw.y vi01, -1(vi09) | nop ilw.z vi13, -1(vi09) | nop - ibeq vi00, vi12, L127 | nop + ibeq vi00, vi12, L126 | nop ilwr.x vi14, vi10 | nop - ibltz vi01, L125 | nop + ibltz vi01, L124 | nop iaddiu vi12, vi12, 0x80 | nop iadd vi13, vi13, vi08 | nop lqi.xyzw vf29, vi13 | nop @@ -1690,12 +1692,12 @@ L124: isw.x vi12, -1(vi06) | nop jr vi15 | nop ilwr.x vi12, vi09 | nop -L125: +L124: mtir vi01, vf24.w | nop mtir vi06, vf03.y | nop mr32.xyzw vf03, vf03 | nop iadd vi14, vi14, vi11 | nop - ibgez vi13, L126 | nop + ibgez vi13, L125 | nop iswr.x vi14, vi10 | nop xgkick vi01 | nop ior vi10, vi06, vi00 | nop @@ -1707,7 +1709,7 @@ L125: isw.x vi12, -1(vi06) | nop jr vi15 | nop ilwr.x vi12, vi09 | nop -L126: +L125: iadd vi13, vi13, vi08 | nop xgkick vi01 | nop lqi.xyzw vf29, vi13 | nop @@ -1731,7 +1733,7 @@ L126: isw.x vi12, -1(vi06) | nop jr vi15 | nop ilwr.x vi12, vi09 | nop -L127: +L126: mtir vi01, vf24.w | nop mr32.xyzw vf03, vf03 | nop iadd vi14, vi14, vi11 | nop @@ -1740,6 +1742,7 @@ L127: xgkick vi01 | nop nop | nop :e nop | nop +L127: lq.xyzw vf02, 657(vi00) | nop lq.xyzw vf05, 660(vi00) | addw.z vf28, vf00, vf00 lq.xyzw vf06, 658(vi00) | nop @@ -1869,16 +1872,16 @@ L132: div Q, vf01.x, vf13.w | mul.xyzw vf17, vf13, vf11 ibne vi05, vi06, L128 | miniz.w vf12, vf12, vf01 sqi.xyzw vf15, vi06 | clipw.xyz vf16, vf16 - b L123 | nop + b L122 | nop iaddiu vi15, vi00, 0x692 | nop L133: - b L123 | nop + b L122 | nop iaddiu vi15, vi00, 0x6a1 | nop L134: - b L123 | nop + b L122 | nop iaddiu vi15, vi00, 0x6b0 | nop L135: - b L123 | nop + b L122 | nop iaddiu vi15, vi00, 0x6bf | nop L136: ilw.x vi02, 328(vi03) | nop @@ -1995,14 +1998,14 @@ L141: div Q, vf01.x, vf13.w | mul.xyzw vf17, vf13, vf11 ibne vi05, vi06, L137 | miniz.w vf12, vf12, vf01 sqi.xyzw vf15, vi06 | clipw.xyz vf16, vf16 - b L123 | nop + b L122 | nop iaddiu vi15, vi00, 0x707 | nop L142: - b L123 | nop + b L122 | nop iaddiu vi15, vi00, 0x716 | nop L143: - b L123 | nop + b L122 | nop iaddiu vi15, vi00, 0x725 | nop L144: - b L123 | nop + b L122 | nop iaddiu vi15, vi00, 0x734 | nop diff --git a/test/decompiler/vu_reference/tie-near-result.txt b/test/decompiler/vu_reference/tie-near-result.txt index e44aae07ae..fd71609d0b 100644 --- a/test/decompiler/vu_reference/tie-near-result.txt +++ b/test/decompiler/vu_reference/tie-near-result.txt @@ -1,14 +1,14 @@ - b L13 | nop + b L12 | nop mr32.xyzw vf21, vf21 | nop - b L1 | nop + b L93 | nop nop | nop nop | nop :e nop | nop - b L3 | nop - iaddi vi02, vi00, 0x0 | subx.xz vf21, vf23, vf21 b L2 | nop + iaddi vi02, vi00, 0x0 | subx.xz vf21, vf23, vf21 + b L1 | nop nop | nop -L2: +L1: lq.xyzw vf21, 967(vi00) | nop lq.xz vf23, 966(vi00) | nop lq.xyzw vf22, 968(vi00) | nop @@ -30,7 +30,7 @@ L2: isw.z vi01, 1005(vi00) | nop iaddiu vi01, vi00, 0x6ed | nop :e isw.z vi01, 1012(vi00) | nop -L3: +L2: lq.xyz vf01, 969(vi00) | nop ilwr.w vi04, vi02 | nop ilw.w vi09, 1(vi02) | nop @@ -41,7 +41,7 @@ L3: lqi.xyzw vf05, vi02 | nop mtir vi05, vf21.x | nop lqi.xyzw vf06, vi02 | subw.w vf01, vf01, vf01 -L4: +L3: iadd vi03, vi04, vi05 | nop iadd vi04, vi04, vi06 | nop iaddi vi09, vi09, -0x1 | nop @@ -62,7 +62,7 @@ L4: lqi.xyzw vf03, vi02 | nop lqi.xyzw vf04, vi02 | nop lqi.xyzw vf05, vi02 | nop - ibgtz vi09, L4 | nop + ibgtz vi09, L3 | nop lqi.xyzw vf06, vi02 | nop mtir vi09, vf02.w | nop iaddi vi02, vi02, -0x2 | subw.w vf07, vf07, vf07 @@ -71,9 +71,9 @@ L4: ilwr.z vi04, vi02 | nop iaddi vi09, vi09, -0x1 | nop iaddi vi02, vi02, 0x1 | nop - ibeq vi00, vi09, L6 | nop + ibeq vi00, vi09, L5 | nop lq.xyz vf07, 970(vi08) | nop -L5: +L4: iadd vi03, vi04, vi05 | nop iadd vi04, vi04, vi06 | nop iaddi vi09, vi09, -0x1 | nop @@ -85,9 +85,9 @@ L5: ilwr.y vi08, vi02 | nop ilwr.z vi04, vi02 | nop iaddi vi02, vi02, 0x1 | nop - ibne vi00, vi09, L5 | nop + ibne vi00, vi09, L4 | nop lq.xyz vf07, 970(vi08) | nop -L6: +L5: iaddiu vi07, vi07, 0x4000 | nop iaddiu vi07, vi07, 0x4000 | nop iadd vi03, vi04, vi05 | nop @@ -109,25 +109,25 @@ L6: lqi.xyzw vf07, vi11 | itof12.xyz vf12, vf12 lqi.xyzw vf13, vi11 | itof0.w vf12, vf12 nop | nop - ibeq vi12, vi11, L8 | muli.xyz vf06, vf06, I + ibeq vi12, vi11, L7 | muli.xyz vf06, vf06, I nop | itof0.xyzw vf07, vf07 -L7: +L6: lqi.xyzw vf08, vi11 | itof12.xyz vf13, vf13 lqi.xyzw vf14, vi11 | itof0.w vf13, vf13 sq.xyzw vf12, -5(vi11) | nop - ibeq vi12, vi11, L8 | muli.xyz vf07, vf07, I + ibeq vi12, vi11, L7 | muli.xyz vf07, vf07, I sq.xyzw vf06, -6(vi11) | itof0.xyzw vf08, vf08 lqi.xyzw vf06, vi11 | itof12.xyz vf14, vf14 lqi.xyzw vf12, vi11 | itof0.w vf14, vf14 sq.xyzw vf13, -5(vi11) | nop - ibeq vi12, vi11, L8 | muli.xyz vf08, vf08, I + ibeq vi12, vi11, L7 | muli.xyz vf08, vf08, I sq.xyzw vf07, -6(vi11) | itof0.xyzw vf06, vf06 lqi.xyzw vf07, vi11 | itof12.xyz vf12, vf12 lqi.xyzw vf13, vi11 | itof0.w vf12, vf12 sq.xyzw vf14, -5(vi11) | nop - ibne vi12, vi11, L7 | muli.xyz vf06, vf06, I + ibne vi12, vi11, L6 | muli.xyz vf06, vf06, I sq.xyzw vf08, -6(vi11) | itof0.xyzw vf07, vf07 -L8: +L7: iaddi vi11, vi11, -0x4 | nop iadd vi13, vi13, vi11 | nop ior vi02, vi11, vi00 | nop @@ -136,49 +136,49 @@ L8: lqi.xyzw vf12, vi11 | nop nop | nop nop | nop - ibeq vi13, vi11, L10 | nop + ibeq vi13, vi11, L9 | nop nop | itof0.xyzw vf09, vf09 lqi.xyzw vf10, vi11 | itof0.xyzw vf06, vf06 lqi.xyzw vf07, vi11 | itof0.w vf12, vf12 lqi.xyzw vf13, vi11 | itof12.xyz vf12, vf12 nop | nop nop | muli.xyz vf09, vf09, I - ibeq vi13, vi11, L10 | muli.xyz vf06, vf06, I + ibeq vi13, vi11, L9 | muli.xyz vf06, vf06, I nop | itof0.xyzw vf10, vf10 -L9: +L8: lqi.xyzw vf11, vi11 | itof0.xyzw vf07, vf07 lqi.xyzw vf08, vi11 | itof0.w vf13, vf13 lqi.xyzw vf14, vi11 | itof12.xyz vf13, vf13 sqi.xyzw vf09, vi02 | nop sqi.xyzw vf06, vi02 | muli.xyz vf10, vf10, I - ibeq vi13, vi11, L10 | muli.xyz vf07, vf07, I + ibeq vi13, vi11, L9 | muli.xyz vf07, vf07, I sqi.xyzw vf12, vi02 | itof0.xyzw vf11, vf11 lqi.xyzw vf09, vi11 | itof0.xyzw vf08, vf08 lqi.xyzw vf06, vi11 | itof0.w vf14, vf14 lqi.xyzw vf12, vi11 | itof12.xyz vf14, vf14 sqi.xyzw vf10, vi02 | nop sqi.xyzw vf07, vi02 | muli.xyz vf11, vf11, I - ibeq vi13, vi11, L10 | muli.xyz vf08, vf08, I + ibeq vi13, vi11, L9 | muli.xyz vf08, vf08, I sqi.xyzw vf13, vi02 | itof0.xyzw vf09, vf09 lqi.xyzw vf10, vi11 | itof0.xyzw vf06, vf06 lqi.xyzw vf07, vi11 | itof0.w vf12, vf12 lqi.xyzw vf13, vi11 | itof12.xyz vf12, vf12 sqi.xyzw vf11, vi02 | nop sqi.xyzw vf08, vi02 | muli.xyz vf09, vf09, I - ibne vi13, vi11, L9 | muli.xyz vf06, vf06, I + ibne vi13, vi11, L8 | muli.xyz vf06, vf06, I sqi.xyzw vf14, vi02 | itof0.xyzw vf10, vf10 -L10: +L9: mtir vi01, vf04.z | nop mtir vi05, vf02.x | nop mtir vi14, vf02.y | nop mtir vi04, vf03.x | nop - ibne vi00, vi01, L11 | nop + ibne vi00, vi01, L10 | nop isubiu vi09, vi00, 0x7fff | nop - b L12 | nop + b L11 | nop lq.xyzw vf05, 975(vi00) | nop -L11: +L10: lq.xyzw vf05, 976(vi00) | nop -L12: +L11: iaddiu vi01, vi00, 0x3c6 | nop mtir vi06, vf03.y | nop mtir vi07, vf03.z | nop @@ -195,7 +195,7 @@ L12: xgkick vi01 | nop nop | nop :e nop | nop -L13: +L12: lqi.xyzw vf05, vi02 | nop lq.xyzw vf24, 6(vi03) | nop lq.xyzw vf04, 3(vi03) | nop @@ -224,9 +224,9 @@ L13: iadd vi07, vi07, vi01 | maddax.xyzw ACC, vf01, vf07 iadd vi08, vi08, vi01 | madday.xyzw ACC, vf02, vf07 lq.xyzw vf15, 838(vi09) | nop - ibeq vi04, vi12, L44 | maddz.xyzw vf11, vf03, vf07 + ibeq vi04, vi12, L43 | maddz.xyzw vf11, vf03, vf07 iadd vi15, vi15, vi01 | nop -L14: +L13: mtir vi12, vf05.w | nop lqi.xyzw vf08, vi02 | nop div Q, vf00.w, vf11.w | mul.xyz vf18, vf18, Q @@ -235,7 +235,7 @@ L14: sq.xyzw vf17, 0(vi12) | maddax.xyzw ACC, vf01, vf08 sq.xyzw vf13, 1(vi12) | madday.xyzw ACC, vf02, vf08 lq.xyzw vf16, 838(vi09) | nop - ibeq vi04, vi12, L17 | maddz.xyzw vf12, vf03, vf08 + ibeq vi04, vi12, L16 | maddz.xyzw vf12, vf03, vf08 sq.xyzw vf09, 2(vi12) | nop lqi.xyzw vf24, vi03 | nop mtir vi12, vf06.w | nop @@ -246,7 +246,7 @@ L14: sq.xyzw vf18, 0(vi12) | maddax.xyzw ACC, vf01, vf05 sq.xyzw vf14, 1(vi12) | madday.xyzw ACC, vf02, vf05 lq.xyzw vf13, 838(vi09) | nop - ibeq vi04, vi12, L16 | maddz.xyzw vf09, vf03, vf05 + ibeq vi04, vi12, L15 | maddz.xyzw vf09, vf03, vf05 sq.xyzw vf10, 2(vi12) | nop mtir vi12, vf07.w | nop lqi.xyzw vf06, vi02 | nop @@ -256,7 +256,7 @@ L14: sq.xyzw vf19, 0(vi12) | maddax.xyzw ACC, vf01, vf06 sq.xyzw vf15, 1(vi12) | madday.xyzw ACC, vf02, vf06 lq.xyzw vf14, 838(vi09) | nop - ibeq vi04, vi12, L15 | maddz.xyzw vf10, vf03, vf06 + ibeq vi04, vi12, L14 | maddz.xyzw vf10, vf03, vf06 sq.xyzw vf11, 2(vi12) | nop mtir vi12, vf08.w | nop lqi.xyzw vf07, vi02 | nop @@ -266,10 +266,10 @@ L14: sq.xyzw vf20, 0(vi12) | maddax.xyzw ACC, vf01, vf07 sq.xyzw vf16, 1(vi12) | madday.xyzw ACC, vf02, vf07 lq.xyzw vf15, 838(vi09) | nop - ibne vi04, vi12, L14 | maddz.xyzw vf11, vf03, vf07 + ibne vi04, vi12, L13 | maddz.xyzw vf11, vf03, vf07 sq.xyzw vf12, 2(vi12) | nop mtir vi12, vf05.w | nop - ibne vi00, vi05, L28 | nop + ibne vi00, vi05, L27 | nop lqi.xyzw vf08, vi02 | mul.xyz vf18, vf18, Q div Q, vf00.w, vf11.w | addx.w vf06, vf06, vf21 mtir vi09, vf24.w | mulaw.xyzw ACC, vf04, vf00 @@ -294,11 +294,11 @@ L14: sq.xyzw vf19, 0(vi12) | mul.xyz vf20, vf20, Q sq.xyzw vf15, 1(vi12) | addx.w vf20, vf20, vf21 sq.xyzw vf11, 2(vi12) | nop - b L18 | nop + b L17 | nop mtir vi12, vf08.w | nop -L15: +L14: mtir vi12, vf08.w | nop - ibne vi00, vi05, L27 | nop + ibne vi00, vi05, L26 | nop lqi.xyzw vf07, vi02 | mul.xyz vf17, vf17, Q div Q, vf00.w, vf10.w | addx.w vf05, vf05, vf21 mtir vi09, vf24.z | mulaw.xyzw ACC, vf04, vf00 @@ -322,11 +322,11 @@ L15: sq.xyzw vf18, 0(vi12) | mul.xyz vf19, vf19, Q sq.xyzw vf14, 1(vi12) | addx.w vf19, vf19, vf21 sq.xyzw vf10, 2(vi12) | nop - b L21 | nop + b L20 | nop mtir vi12, vf07.w | nop -L16: +L15: mtir vi12, vf07.w | nop - ibne vi00, vi05, L26 | nop + ibne vi00, vi05, L25 | nop lqi.xyzw vf06, vi02 | mul.xyz vf20, vf20, Q div Q, vf00.w, vf09.w | addx.w vf08, vf08, vf21 mtir vi09, vf24.y | mulaw.xyzw ACC, vf04, vf00 @@ -349,12 +349,12 @@ L16: div Q, vf00.w, vf11.w | mul.xyz vf18, vf18, Q sq.xyzw vf13, 1(vi12) | addx.w vf18, vf18, vf21 sq.xyzw vf09, 2(vi12) | nop - b L20 | nop + b L19 | nop mtir vi12, vf06.w | nop -L17: +L16: lqi.xyzw vf24, vi03 | nop mtir vi12, vf06.w | nop - ibne vi00, vi05, L25 | nop + ibne vi00, vi05, L24 | nop lqi.xyzw vf05, vi02 | mul.xyz vf19, vf19, Q div Q, vf00.w, vf12.w | addx.w vf07, vf07, vf21 mtir vi09, vf24.x | mulaw.xyzw ACC, vf04, vf00 @@ -377,9 +377,9 @@ L17: sq.xyzw vf20, 0(vi12) | mul.xyz vf17, vf17, Q sq.xyzw vf16, 1(vi12) | addx.w vf17, vf17, vf21 sq.xyzw vf12, 2(vi12) | nop - b L19 | nop + b L18 | nop mtir vi12, vf05.w | nop -L18: +L17: lqi.xyzw vf06, vi02 | nop mtir vi09, vf24.y | nop mtir vi13, vf20.w | mulaw.xyzw ACC, vf04, vf00 @@ -392,9 +392,9 @@ L18: sq.xyzw vf16, 1(vi13) | addx.w vf17, vf17, vf21 sq.xyzw vf12, 2(vi13) | nop div Q, vf00.w, vf10.w | nop - ibeq vi06, vi12, L22 | nop + ibeq vi06, vi12, L21 | nop mtir vi12, vf05.w | nop -L19: +L18: lqi.xyzw vf07, vi02 | nop mtir vi09, vf24.z | nop mtir vi13, vf17.w | mulaw.xyzw ACC, vf04, vf00 @@ -407,9 +407,9 @@ L19: sq.xyzw vf13, 1(vi13) | addx.w vf18, vf18, vf21 sq.xyzw vf09, 2(vi13) | nop div Q, vf00.w, vf11.w | nop - ibeq vi06, vi12, L23 | nop + ibeq vi06, vi12, L22 | nop mtir vi12, vf06.w | nop -L20: +L19: lqi.xyzw vf08, vi02 | nop mtir vi09, vf24.w | nop lqi.xyzw vf24, vi03 | nop @@ -423,9 +423,9 @@ L20: sq.xyzw vf14, 1(vi13) | addx.w vf19, vf19, vf21 sq.xyzw vf10, 2(vi13) | nop div Q, vf00.w, vf12.w | nop - ibeq vi06, vi12, L24 | nop + ibeq vi06, vi12, L23 | nop mtir vi12, vf07.w | nop -L21: +L20: lqi.xyzw vf05, vi02 | nop mtir vi09, vf24.x | nop mtir vi13, vf19.w | mulaw.xyzw ACC, vf04, vf00 @@ -438,7 +438,7 @@ L21: sq.xyzw vf15, 1(vi13) | addx.w vf20, vf20, vf21 sq.xyzw vf11, 2(vi13) | nop div Q, vf00.w, vf09.w | nop - ibne vi06, vi12, L18 | nop + ibne vi06, vi12, L17 | nop mtir vi12, vf08.w | nop nop | nop nop | nop @@ -456,11 +456,11 @@ L21: sq.xyzw vf09, 2(vi12) | nop sq.xyzw vf17, 0(vi13) | nop sq.xyzw vf13, 1(vi13) | nop - ibeq vi00, vi14, L30 | nop + ibeq vi00, vi14, L29 | nop sq.xyzw vf09, 2(vi13) | nop - b L44 | nop + b L43 | nop nop | nop -L22: +L21: nop | nop nop | nop mtir vi13, vf17.w | nop @@ -477,11 +477,11 @@ L22: sq.xyzw vf10, 2(vi12) | nop sq.xyzw vf18, 0(vi13) | nop sq.xyzw vf14, 1(vi13) | nop - ibeq vi00, vi14, L31 | nop + ibeq vi00, vi14, L30 | nop sq.xyzw vf10, 2(vi13) | nop - b L44 | nop + b L43 | nop nop | nop -L23: +L22: nop | nop nop | nop mtir vi13, vf18.w | nop @@ -498,11 +498,11 @@ L23: sq.xyzw vf11, 2(vi12) | nop sq.xyzw vf19, 0(vi13) | nop sq.xyzw vf15, 1(vi13) | nop - ibeq vi00, vi14, L32 | nop + ibeq vi00, vi14, L31 | nop sq.xyzw vf11, 2(vi13) | nop - b L44 | nop + b L43 | nop nop | nop -L24: +L23: nop | nop nop | nop mtir vi13, vf19.w | nop @@ -519,11 +519,11 @@ L24: sq.xyzw vf12, 2(vi12) | nop sq.xyzw vf20, 0(vi13) | nop sq.xyzw vf16, 1(vi13) | nop - ibeq vi00, vi14, L29 | nop + ibeq vi00, vi14, L28 | nop sq.xyzw vf12, 2(vi13) | nop - b L44 | nop + b L43 | nop nop | nop -L25: +L24: div Q, vf00.w, vf12.w | addx.w vf07, vf07, vf21 sq.xyzw vf18, 0(vi12) | addx.w vf08, vf08, vf21 sq.xyzw vf14, 1(vi12) | nop @@ -539,11 +539,11 @@ L25: sq.xyzw vf11, 2(vi12) | nop sq.xyzw vf20, 0(vi13) | nop sq.xyzw vf16, 1(vi13) | nop - ibeq vi00, vi14, L29 | nop + ibeq vi00, vi14, L28 | nop sq.xyzw vf12, 2(vi13) | nop - b L44 | nop + b L43 | nop nop | nop -L26: +L25: div Q, vf00.w, vf09.w | addx.w vf08, vf08, vf21 sq.xyzw vf19, 0(vi12) | addx.w vf05, vf05, vf21 sq.xyzw vf15, 1(vi12) | nop @@ -559,11 +559,11 @@ L26: sq.xyzw vf12, 2(vi12) | nop sq.xyzw vf17, 0(vi13) | nop sq.xyzw vf13, 1(vi13) | nop - ibeq vi00, vi14, L30 | nop + ibeq vi00, vi14, L29 | nop sq.xyzw vf09, 2(vi13) | nop - b L44 | nop + b L43 | nop nop | nop -L27: +L26: div Q, vf00.w, vf10.w | addx.w vf05, vf05, vf21 sq.xyzw vf20, 0(vi12) | addx.w vf06, vf06, vf21 sq.xyzw vf16, 1(vi12) | nop @@ -579,11 +579,11 @@ L27: sq.xyzw vf09, 2(vi12) | nop sq.xyzw vf18, 0(vi13) | nop sq.xyzw vf14, 1(vi13) | nop - ibeq vi00, vi14, L31 | nop + ibeq vi00, vi14, L30 | nop sq.xyzw vf10, 2(vi13) | nop - b L44 | nop + b L43 | nop nop | nop -L28: +L27: div Q, vf00.w, vf11.w | addx.w vf06, vf06, vf21 sq.xyzw vf17, 0(vi12) | addx.w vf07, vf07, vf21 sq.xyzw vf13, 1(vi12) | nop @@ -599,11 +599,11 @@ L28: sq.xyzw vf10, 2(vi12) | nop sq.xyzw vf19, 0(vi13) | nop sq.xyzw vf15, 1(vi13) | nop - ibeq vi00, vi14, L32 | nop + ibeq vi00, vi14, L31 | nop sq.xyzw vf11, 2(vi13) | nop - b L44 | nop + b L43 | nop nop | nop -L29: +L28: lqi.xyzw vf05, vi02 | nop 0.5 | subw.w vf28, vf00, vf00 :i lqi.xyz vf28, vi02 | addi.y vf27, vf00, I @@ -647,9 +647,9 @@ L29: lq.xyzw vf30, 838(vi11) | nop div Q, vf00.w, vf10.w | nop nop | nop - b L33 | mulay.xyzw ACC, vf29, vf27 + b L32 | mulay.xyzw ACC, vf29, vf27 lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 -L30: +L29: lqi.xyzw vf06, vi02 | nop lqi.xyzw vf24, vi03 | subw.w vf28, vf00, vf00 0.5 | nop :i @@ -694,9 +694,9 @@ L30: lq.xyzw vf30, 838(vi11) | nop div Q, vf00.w, vf11.w | nop nop | nop - b L34 | mulay.xyzw ACC, vf29, vf27 + b L33 | mulay.xyzw ACC, vf29, vf27 lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 -L31: +L30: lqi.xyzw vf07, vi02 | nop lqi.xyzw vf24, vi03 | subw.w vf28, vf00, vf00 0.5 | nop :i @@ -741,9 +741,9 @@ L31: lq.xyzw vf30, 838(vi11) | nop div Q, vf00.w, vf12.w | nop nop | nop - b L35 | mulay.xyzw ACC, vf29, vf27 + b L34 | mulay.xyzw ACC, vf29, vf27 lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 -L32: +L31: lqi.xyzw vf08, vi02 | nop lqi.xyzw vf24, vi03 | subw.w vf28, vf00, vf00 0.5 | nop :i @@ -788,9 +788,9 @@ L32: lq.xyzw vf30, 838(vi11) | nop div Q, vf00.w, vf09.w | nop nop | nop - b L36 | mulay.xyzw ACC, vf29, vf27 + b L35 | mulay.xyzw ACC, vf29, vf27 lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 -L33: +L32: lqi.xyzw vf08, vi02 | mulaw.xyzw ACC, vf04, vf00 lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf07 lqi.xyzw vf20, vi02 | madday.xyzw ACC, vf02, vf07 @@ -805,9 +805,9 @@ L33: div Q, vf00.w, vf11.w | nop sq.xyzw vf13, 1(vi12) | add.xyzw vf08, vf08, vf28 lqi.xyzw vf24, vi03 | mulay.xyzw ACC, vf29, vf27 - ibeq vi07, vi12, L37 | nop + ibeq vi07, vi12, L36 | nop sq.xyzw vf09, 2(vi12) | maddy.xyzw vf29, vf30, vf27 -L34: +L33: lqi.xyzw vf05, vi02 | mulaw.xyzw ACC, vf04, vf00 lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf08 lqi.xyzw vf17, vi02 | madday.xyzw ACC, vf02, vf08 @@ -822,9 +822,9 @@ L34: div Q, vf00.w, vf12.w | nop sq.xyzw vf14, 1(vi12) | add.xyzw vf05, vf05, vf28 lqi.xyzw vf24, vi03 | mulay.xyzw ACC, vf29, vf27 - ibeq vi07, vi12, L38 | nop + ibeq vi07, vi12, L37 | nop sq.xyzw vf10, 2(vi12) | maddy.xyzw vf29, vf30, vf27 -L35: +L34: lqi.xyzw vf06, vi02 | mulaw.xyzw ACC, vf04, vf00 lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf05 lqi.xyzw vf18, vi02 | madday.xyzw ACC, vf02, vf05 @@ -839,9 +839,9 @@ L35: div Q, vf00.w, vf09.w | nop sq.xyzw vf15, 1(vi12) | add.xyzw vf06, vf06, vf28 lqi.xyzw vf24, vi03 | mulay.xyzw ACC, vf29, vf27 - ibeq vi07, vi12, L39 | nop + ibeq vi07, vi12, L38 | nop sq.xyzw vf11, 2(vi12) | maddy.xyzw vf29, vf30, vf27 -L36: +L35: lqi.xyzw vf07, vi02 | mulaw.xyzw ACC, vf04, vf00 lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf06 lqi.xyzw vf19, vi02 | madday.xyzw ACC, vf02, vf06 @@ -856,7 +856,7 @@ L36: div Q, vf00.w, vf10.w | nop sq.xyzw vf16, 1(vi12) | add.xyzw vf07, vf07, vf28 lqi.xyzw vf24, vi03 | mulay.xyzw ACC, vf29, vf27 - ibne vi07, vi12, L33 | nop + ibne vi07, vi12, L32 | nop sq.xyzw vf12, 2(vi12) | maddy.xyzw vf29, vf30, vf27 mtir vi12, vf05.w | addx.w vf17, vf17, vf21 lqi.xyzw vf08, vi02 | mulaw.xyzw ACC, vf04, vf00 @@ -877,11 +877,11 @@ L36: lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf04, vf00 lq.xyzw vf30, 838(vi11) | nop lqi.xyzw vf24, vi03 | nop - ibne vi08, vi12, L40 | nop + ibne vi08, vi12, L39 | nop mtir vi12, vf06.w | maddax.xyzw ACC, vf01, vf08 - b L44 | nop + b L43 | nop nop | nop -L37: +L36: mtir vi12, vf06.w | addx.w vf18, vf18, vf21 lqi.xyzw vf05, vi02 | mulaw.xyzw ACC, vf04, vf00 lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf08 @@ -901,11 +901,11 @@ L37: lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf04, vf00 lq.xyzw vf30, 838(vi11) | nop lqi.xyzw vf24, vi03 | nop - ibne vi08, vi12, L41 | nop + ibne vi08, vi12, L40 | nop mtir vi12, vf07.w | maddax.xyzw ACC, vf01, vf05 - b L44 | nop + b L43 | nop nop | nop -L38: +L37: mtir vi12, vf07.w | addx.w vf19, vf19, vf21 lqi.xyzw vf06, vi02 | mulaw.xyzw ACC, vf04, vf00 lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf05 @@ -925,11 +925,11 @@ L38: lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf04, vf00 lq.xyzw vf30, 838(vi11) | nop lqi.xyzw vf24, vi03 | nop - ibne vi08, vi12, L42 | nop + ibne vi08, vi12, L41 | nop mtir vi12, vf08.w | maddax.xyzw ACC, vf01, vf06 - b L44 | nop + b L43 | nop nop | nop -L39: +L38: mtir vi12, vf08.w | addx.w vf20, vf20, vf21 lqi.xyzw vf07, vi02 | mulaw.xyzw ACC, vf04, vf00 lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf06 @@ -949,11 +949,11 @@ L39: lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf04, vf00 lq.xyzw vf30, 838(vi11) | nop lqi.xyzw vf24, vi03 | nop - ibne vi08, vi12, L43 | nop + ibne vi08, vi12, L42 | nop mtir vi12, vf05.w | maddax.xyzw ACC, vf01, vf07 - b L44 | nop + b L43 | nop nop | nop -L40: +L39: lqi.xyzw vf05, vi02 | madday.xyzw ACC, vf02, vf08 lqi.xyz vf28, vi02 | maddz.xyzw vf12, vf03, vf08 lqi.xyzw vf17, vi02 | mul.xyz vf19, vf19, Q @@ -972,9 +972,9 @@ L40: lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf04, vf00 lq.xyzw vf30, 838(vi11) | nop lqi.xyzw vf24, vi03 | nop - ibeq vi08, vi12, L44 | nop + ibeq vi08, vi12, L43 | nop mtir vi12, vf07.w | maddax.xyzw ACC, vf01, vf05 -L41: +L40: lqi.xyzw vf06, vi02 | madday.xyzw ACC, vf02, vf05 lqi.xyz vf28, vi02 | maddz.xyzw vf09, vf03, vf05 lqi.xyzw vf18, vi02 | mul.xyz vf20, vf20, Q @@ -993,9 +993,9 @@ L41: lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf04, vf00 lq.xyzw vf30, 838(vi11) | nop lqi.xyzw vf24, vi03 | nop - ibeq vi08, vi12, L44 | nop + ibeq vi08, vi12, L43 | nop mtir vi12, vf08.w | maddax.xyzw ACC, vf01, vf06 -L42: +L41: lqi.xyzw vf07, vi02 | madday.xyzw ACC, vf02, vf06 lqi.xyz vf28, vi02 | maddz.xyzw vf10, vf03, vf06 lqi.xyzw vf19, vi02 | mul.xyz vf17, vf17, Q @@ -1014,9 +1014,9 @@ L42: lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf04, vf00 lq.xyzw vf30, 838(vi11) | nop lqi.xyzw vf24, vi03 | nop - ibeq vi08, vi12, L44 | nop + ibeq vi08, vi12, L43 | nop mtir vi12, vf05.w | maddax.xyzw ACC, vf01, vf07 -L43: +L42: lqi.xyzw vf08, vi02 | madday.xyzw ACC, vf02, vf07 lqi.xyz vf28, vi02 | maddz.xyzw vf11, vf03, vf07 lqi.xyzw vf20, vi02 | mul.xyz vf18, vf18, Q @@ -1035,9 +1035,9 @@ L43: lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf04, vf00 lq.xyzw vf30, 838(vi11) | nop lqi.xyzw vf24, vi03 | nop - ibne vi08, vi12, L40 | nop + ibne vi08, vi12, L39 | nop mtir vi12, vf06.w | maddax.xyzw ACC, vf01, vf08 -L44: +L43: mfir.x vf29, vi04 | nop mfir.y vf29, vi05 | nop mfir.z vf29, vi06 | nop @@ -1072,7 +1072,7 @@ L44: lq.xyzw vf06, 2(vi04) | nop lq.xyzw vf17, 974(vi00) | mulz.xyz vf01, vf01, vf09 nop | subw.w vf01, vf00, vf00 - iblez vi02, L69 | subw.w vf11, vf00, vf00 + iblez vi02, L68 | subw.w vf11, vf00, vf00 lq.xyz vf10, 0(vi04) | mul.xyz vf02, vf06, vf14 iaddi vi01, vi00, 0x0 | clipw.xyz vf06, vf06 iadd vi05, vi04, vi02 | nop @@ -1080,176 +1080,176 @@ L44: lq.xyzw vf07, 2(vi05) | subw.w vf12, vf00, vf00 fcand vi01, 0x3ffff | mulz.xyz vf02, vf02, vf10 lqi.xyzw vf13, vi07 | subw.w vf02, vf00, vf00 - iblez vi08, L53 | ftoi4.xyzw vf01, vf01 + iblez vi08, L52 | ftoi4.xyzw vf01, vf01 lq.xyz vf11, 0(vi05) | mul.xyz vf03, vf07, vf14 -L45: - ibne vi00, vi01, L74 | clipw.xyz vf07, vf07 +L44: + ibne vi00, vi01, L73 | clipw.xyz vf07, vf07 mtir vi02, vf13.x | nop iadd vi06, vi05, vi08 | add.xyzw vf02, vf02, vf16 lq.xyzw vf08, 2(vi06) | subw.w vf09, vf00, vf00 fcand vi01, 0x3ffff | mulz.xyz vf03, vf03, vf11 sq.xyzw vf01, 2(vi03) | subw.w vf03, vf00, vf00 -L46: - iblez vi02, L57 | ftoi4.xyzw vf02, vf02 +L45: + iblez vi02, L56 | ftoi4.xyzw vf02, vf02 lq.xyz vf12, 0(vi06) | mul.xyz vf04, vf08, vf14 -L47: - ibne vi00, vi01, L76 | clipw.xyz vf08, vf08 +L46: + ibne vi00, vi01, L75 | clipw.xyz vf08, vf08 mtir vi08, vf13.y | nop iadd vi03, vi06, vi02 | add.xyzw vf03, vf03, vf16 lq.xyzw vf05, 2(vi03) | subw.w vf10, vf00, vf00 fcand vi01, 0x3ffff | mulz.xyz vf04, vf04, vf12 sq.xyzw vf02, 2(vi04) | subw.w vf04, vf00, vf00 -L48: - iblez vi08, L61 | ftoi4.xyzw vf03, vf03 +L47: + iblez vi08, L60 | ftoi4.xyzw vf03, vf03 lq.xyz vf09, 0(vi03) | mul.xyz vf01, vf05, vf14 -L49: - ibne vi00, vi01, L78 | clipw.xyz vf05, vf05 +L48: + ibne vi00, vi01, L77 | clipw.xyz vf05, vf05 mtir vi02, vf13.z | nop iadd vi04, vi03, vi08 | add.xyzw vf04, vf04, vf16 lq.xyzw vf06, 2(vi04) | subw.w vf11, vf00, vf00 fcand vi01, 0x3ffff | mulz.xyz vf01, vf01, vf09 sq.xyzw vf03, 2(vi05) | subw.w vf01, vf00, vf00 -L50: - iblez vi02, L65 | ftoi4.xyzw vf04, vf04 +L49: + iblez vi02, L64 | ftoi4.xyzw vf04, vf04 lq.xyz vf10, 0(vi04) | mul.xyz vf02, vf06, vf14 -L51: - ibne vi00, vi01, L72 | clipw.xyz vf06, vf06 +L50: + ibne vi00, vi01, L71 | clipw.xyz vf06, vf06 mtir vi08, vf13.w | nop iadd vi05, vi04, vi02 | add.xyzw vf01, vf01, vf16 lq.xyzw vf07, 2(vi05) | subw.w vf12, vf00, vf00 fcand vi01, 0x3ffff | mulz.xyz vf02, vf02, vf10 sq.xyzw vf04, 2(vi06) | subw.w vf02, vf00, vf00 -L52: +L51: lqi.xyzw vf13, vi07 | nop - ibgtz vi08, L45 | ftoi4.xyzw vf01, vf01 + ibgtz vi08, L44 | ftoi4.xyzw vf01, vf01 lq.xyz vf11, 0(vi05) | mul.xyz vf03, vf07, vf14 -L53: - ibne vi00, vi01, L82 | clipw.xyz vf07, vf07 +L52: + ibne vi00, vi01, L81 | clipw.xyz vf07, vf07 mtir vi02, vf13.x | nop isub vi06, vi05, vi08 | addx.w vf12, vf00, vf00 lq.xyzw vf08, 2(vi06) | add.xyzw vf02, vf02, vf16 fcand vi01, 0x3ffff | mulz.xyz vf03, vf03, vf11 sq.xyzw vf01, 2(vi03) | subw.w vf03, vf00, vf00 -L54: +L53: iaddi vi10, vi08, 0xa | nop fcset 0x0 | addx.w vf09, vf00, vf00 - ibne vi00, vi10, L47 | ftoi4.xyzw vf02, vf02 + ibne vi00, vi10, L46 | ftoi4.xyzw vf02, vf02 lq.xyz vf12, 0(vi06) | mul.xyz vf04, vf08, vf14 - ibeq vi00, vi02, L55 | nop + ibeq vi00, vi02, L54 | nop iaddi vi11, vi06, -0x6 | nop - b L47 | nop + b L46 | nop isw.z vi11, 1023(vi00) | nop -L55: - ibne vi00, vi01, L89 | nop +L54: + ibne vi00, vi01, L88 | nop nop | nop nop | add.xyzw vf03, vf03, vf16 nop | nop nop | nop sq.xyzw vf02, 2(vi04) | nop -L56: +L55: nop | ftoi4.xyzw vf03, vf03 nop | nop nop | nop - b L70 | nop + b L69 | nop sq.xyzw vf03, 2(vi05) | nop -L57: - ibne vi00, vi01, L84 | clipw.xyz vf08, vf08 +L56: + ibne vi00, vi01, L83 | clipw.xyz vf08, vf08 mtir vi08, vf13.y | nop isub vi03, vi06, vi02 | addx.w vf09, vf00, vf00 lq.xyzw vf05, 2(vi03) | add.xyzw vf03, vf03, vf16 fcand vi01, 0x3ffff | mulz.xyz vf04, vf04, vf12 sq.xyzw vf02, 2(vi04) | subw.w vf04, vf00, vf00 -L58: +L57: iaddi vi10, vi02, 0xa | nop fcset 0x0 | addx.w vf10, vf00, vf00 - ibne vi00, vi10, L49 | ftoi4.xyzw vf03, vf03 + ibne vi00, vi10, L48 | ftoi4.xyzw vf03, vf03 lq.xyz vf09, 0(vi03) | mul.xyz vf01, vf05, vf14 - ibeq vi00, vi08, L59 | nop + ibeq vi00, vi08, L58 | nop iaddi vi11, vi03, -0x6 | nop - b L49 | nop + b L48 | nop isw.z vi11, 1023(vi00) | nop -L59: - ibne vi00, vi01, L90 | nop +L58: + ibne vi00, vi01, L89 | nop nop | nop nop | add.xyzw vf04, vf04, vf16 nop | nop nop | nop sq.xyzw vf03, 2(vi05) | nop -L60: +L59: nop | ftoi4.xyzw vf04, vf04 nop | nop nop | nop - b L70 | nop + b L69 | nop sq.xyzw vf04, 2(vi06) | nop -L61: - ibne vi00, vi01, L86 | clipw.xyz vf05, vf05 +L60: + ibne vi00, vi01, L85 | clipw.xyz vf05, vf05 mtir vi02, vf13.z | nop isub vi04, vi03, vi08 | addx.w vf10, vf00, vf00 lq.xyzw vf06, 2(vi04) | add.xyzw vf04, vf04, vf16 fcand vi01, 0x3ffff | mulz.xyz vf01, vf01, vf09 sq.xyzw vf03, 2(vi05) | subw.w vf01, vf00, vf00 -L62: +L61: iaddi vi10, vi08, 0xa | nop fcset 0x0 | addx.w vf11, vf00, vf00 - ibne vi00, vi10, L51 | ftoi4.xyzw vf04, vf04 + ibne vi00, vi10, L50 | ftoi4.xyzw vf04, vf04 lq.xyz vf10, 0(vi04) | mul.xyz vf02, vf06, vf14 - ibeq vi00, vi02, L63 | nop + ibeq vi00, vi02, L62 | nop iaddi vi11, vi04, -0x6 | nop - b L51 | nop + b L50 | nop isw.z vi11, 1023(vi00) | nop -L63: - ibne vi00, vi01, L87 | nop +L62: + ibne vi00, vi01, L86 | nop nop | nop nop | add.xyzw vf01, vf01, vf16 nop | nop nop | nop sq.xyzw vf04, 2(vi06) | nop -L64: +L63: nop | ftoi4.xyzw vf01, vf01 nop | nop nop | nop - b L70 | nop + b L69 | nop sq.xyzw vf04, 2(vi03) | nop -L65: - ibne vi00, vi01, L80 | clipw.xyz vf06, vf06 +L64: + ibne vi00, vi01, L79 | clipw.xyz vf06, vf06 mtir vi08, vf13.w | nop isub vi05, vi04, vi02 | addx.w vf11, vf00, vf00 lq.xyzw vf07, 2(vi05) | add.xyzw vf01, vf01, vf16 fcand vi01, 0x3ffff | mulz.xyz vf02, vf02, vf10 sq.xyzw vf04, 2(vi06) | subw.w vf02, vf00, vf00 -L66: +L65: lqi.xyzw vf13, vi07 | nop iaddi vi10, vi02, 0xa | nop fcset 0x0 | addx.w vf12, vf00, vf00 - ibne vi00, vi10, L45 | ftoi4.xyzw vf01, vf01 + ibne vi00, vi10, L44 | ftoi4.xyzw vf01, vf01 lq.xyz vf11, 0(vi05) | mul.xyz vf03, vf07, vf14 - ibeq vi00, vi08, L67 | nop + ibeq vi00, vi08, L66 | nop iaddi vi11, vi05, -0x6 | nop - b L45 | nop + b L44 | nop isw.z vi11, 1023(vi00) | nop -L67: - ibne vi00, vi01, L88 | nop +L66: + ibne vi00, vi01, L87 | nop nop | nop nop | add.xyzw vf02, vf02, vf16 nop | nop nop | nop sq.xyzw vf01, 2(vi03) | nop -L68: +L67: nop | ftoi4.xyzw vf02, vf02 nop | nop nop | nop - b L70 | nop + b L69 | nop sq.xyzw vf02, 2(vi04) | nop -L69: +L68: iaddi vi01, vi00, 0x0 | clipw.xyz vf06, vf06 isub vi05, vi04, vi02 | nop mtir vi08, vf13.w | add.xyzw vf01, vf01, vf16 lq.xyzw vf07, 2(vi05) | subw.w vf12, vf00, vf00 fcand vi01, 0x3ffff | mulz.xyz vf02, vf02, vf10 lqi.xyzw vf13, vi07 | subw.w vf02, vf00, vf00 - b L45 | ftoi4.xyzw vf01, vf01 + b L44 | ftoi4.xyzw vf01, vf01 lq.xyz vf11, 0(vi05) | mul.xyz vf03, vf07, vf14 -L70: +L69: mtir vi04, vf29.x | nop mtir vi05, vf29.y | nop mtir vi06, vf29.z | nop @@ -1272,17 +1272,17 @@ L70: xgkick vi01 | nop mtir vi03, vf22.x | nop :e nop | nop -L71: +L70: fcand vi01, 0x3ffff | mulz.xyz vf02, vf02, vf10 - b L52 | subw.w vf02, vf00, vf00 + b L51 | subw.w vf02, vf00, vf00 lq.xyzw vf07, 2(vi05) | nop -L72: +L71: nop | subw.w vf00, vf09, vf00 ior vi14, vi05, vi00 | mul.xyzw vf18, vf07, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf08, vf17 iadd vi05, vi04, vi02 | mul.xyzw vf20, vf05, vf17 fsand vi10, 0x2 | nop - ibeq vi00, vi10, L71 | add.xyzw vf01, vf01, vf15 + ibeq vi00, vi10, L70 | add.xyzw vf01, vf01, vf15 sq.xyzw vf04, 2(vi06) | subw.w vf12, vf00, vf00 nop | clipw.xyz vf18, vf18 move.xyzw vf18, vf07 | clipw.xyz vf19, vf19 @@ -1296,23 +1296,23 @@ L72: iand vi10, vi10, vi12 | addx.xyz vf26, vf09, vf00 iand vi10, vi10, vi13 | mulz.xyz vf02, vf02, vf10 nop | nop - ibne vi00, vi10, L52 | subw.w vf02, vf00, vf00 + ibne vi00, vi10, L51 | subw.w vf02, vf00, vf00 fcand vi01, 0x3ffff | nop ior vi10, vi06, vi00 | nop ior vi11, vi03, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x42f | nop -L73: +L72: fcand vi01, 0x3ffff | mulz.xyz vf03, vf03, vf11 - b L46 | subw.w vf03, vf00, vf00 + b L45 | subw.w vf03, vf00, vf00 lq.xyzw vf08, 2(vi06) | nop -L74: +L73: nop | subw.w vf00, vf10, vf00 ior vi14, vi06, vi00 | mul.xyzw vf18, vf08, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf05, vf17 iadd vi06, vi05, vi08 | mul.xyzw vf20, vf06, vf17 fsand vi10, 0x2 | nop - ibeq vi00, vi10, L73 | add.xyzw vf02, vf02, vf15 + ibeq vi00, vi10, L72 | add.xyzw vf02, vf02, vf15 sq.xyzw vf01, 2(vi03) | subw.w vf09, vf00, vf00 nop | clipw.xyz vf18, vf18 move.xyzw vf18, vf08 | clipw.xyz vf19, vf19 @@ -1326,23 +1326,23 @@ L74: iand vi10, vi10, vi12 | addx.xyz vf26, vf10, vf00 iand vi10, vi10, vi13 | mulz.xyz vf03, vf03, vf11 nop | nop - ibne vi00, vi10, L46 | subw.w vf03, vf00, vf00 + ibne vi00, vi10, L45 | subw.w vf03, vf00, vf00 fcand vi01, 0x3ffff | nop ior vi10, vi03, vi00 | nop ior vi11, vi04, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x417 | nop -L75: +L74: fcand vi01, 0x3ffff | mulz.xyz vf04, vf04, vf12 - b L48 | subw.w vf04, vf00, vf00 + b L47 | subw.w vf04, vf00, vf00 lq.xyzw vf05, 2(vi03) | nop -L76: +L75: nop | subw.w vf00, vf11, vf00 ior vi14, vi03, vi00 | mul.xyzw vf18, vf05, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf06, vf17 iadd vi03, vi06, vi02 | mul.xyzw vf20, vf07, vf17 fsand vi10, 0x2 | nop - ibeq vi00, vi10, L75 | add.xyzw vf03, vf03, vf15 + ibeq vi00, vi10, L74 | add.xyzw vf03, vf03, vf15 sq.xyzw vf02, 2(vi04) | subw.w vf10, vf00, vf00 nop | clipw.xyz vf18, vf18 move.xyzw vf18, vf05 | clipw.xyz vf19, vf19 @@ -1356,23 +1356,23 @@ L76: iand vi10, vi10, vi12 | addx.xyz vf26, vf11, vf00 iand vi10, vi10, vi13 | mulz.xyz vf04, vf04, vf12 nop | nop - ibne vi00, vi10, L48 | subw.w vf04, vf00, vf00 + ibne vi00, vi10, L47 | subw.w vf04, vf00, vf00 fcand vi01, 0x3ffff | nop ior vi10, vi04, vi00 | nop ior vi11, vi05, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x41f | nop -L77: +L76: fcand vi01, 0x3ffff | mulz.xyz vf01, vf01, vf09 - b L50 | subw.w vf01, vf00, vf00 + b L49 | subw.w vf01, vf00, vf00 lq.xyzw vf06, 2(vi04) | nop -L78: +L77: nop | subw.w vf00, vf12, vf00 ior vi14, vi04, vi00 | mul.xyzw vf18, vf06, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf07, vf17 iadd vi04, vi03, vi08 | mul.xyzw vf20, vf08, vf17 fsand vi10, 0x2 | nop - ibeq vi00, vi10, L77 | add.xyzw vf04, vf04, vf15 + ibeq vi00, vi10, L76 | add.xyzw vf04, vf04, vf15 sq.xyzw vf03, 2(vi05) | subw.w vf11, vf00, vf00 nop | clipw.xyz vf18, vf18 move.xyzw vf18, vf06 | clipw.xyz vf19, vf19 @@ -1386,23 +1386,23 @@ L78: iand vi10, vi10, vi12 | addx.xyz vf26, vf12, vf00 iand vi10, vi10, vi13 | mulz.xyz vf01, vf01, vf09 nop | nop - ibne vi00, vi10, L50 | subw.w vf01, vf00, vf00 + ibne vi00, vi10, L49 | subw.w vf01, vf00, vf00 fcand vi01, 0x3ffff | nop ior vi10, vi05, vi00 | nop ior vi11, vi06, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x427 | nop -L79: +L78: fcand vi01, 0x3ffff | mulz.xyz vf02, vf02, vf10 - b L66 | subw.w vf02, vf00, vf00 + b L65 | subw.w vf02, vf00, vf00 lq.xyzw vf07, 2(vi05) | nop -L80: +L79: nop | subw.w vf00, vf09, vf00 ior vi14, vi05, vi00 | mul.xyzw vf18, vf07, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf08, vf17 isub vi05, vi04, vi02 | mul.xyzw vf20, vf05, vf17 fsand vi10, 0x2 | nop - ibeq vi00, vi10, L79 | add.xyzw vf01, vf01, vf15 + ibeq vi00, vi10, L78 | add.xyzw vf01, vf01, vf15 sq.xyzw vf04, 2(vi06) | addx.w vf11, vf00, vf00 nop | clipw.xyz vf18, vf18 move.xyzw vf18, vf07 | clipw.xyz vf19, vf19 @@ -1416,23 +1416,23 @@ L80: iand vi10, vi10, vi12 | addx.xyz vf26, vf09, vf00 iand vi10, vi10, vi13 | mulz.xyz vf02, vf02, vf10 nop | nop - ibne vi00, vi10, L66 | subw.w vf02, vf00, vf00 + ibne vi00, vi10, L65 | subw.w vf02, vf00, vf00 fcand vi01, 0x3ffff | nop ior vi10, vi06, vi00 | nop ior vi11, vi03, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x483 | nop -L81: +L80: fcand vi01, 0x3ffff | mulz.xyz vf03, vf03, vf11 - b L54 | subw.w vf03, vf00, vf00 + b L53 | subw.w vf03, vf00, vf00 lq.xyzw vf08, 2(vi06) | nop -L82: +L81: nop | subw.w vf00, vf10, vf00 ior vi14, vi06, vi00 | mul.xyzw vf18, vf08, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf05, vf17 isub vi06, vi05, vi08 | mul.xyzw vf20, vf06, vf17 fsand vi10, 0x2 | nop - ibeq vi00, vi10, L81 | add.xyzw vf02, vf02, vf15 + ibeq vi00, vi10, L80 | add.xyzw vf02, vf02, vf15 sq.xyzw vf01, 2(vi03) | addx.w vf12, vf00, vf00 nop | clipw.xyz vf18, vf18 move.xyzw vf18, vf08 | clipw.xyz vf19, vf19 @@ -1446,23 +1446,23 @@ L82: iand vi10, vi10, vi12 | addx.xyz vf26, vf10, vf00 iand vi10, vi10, vi13 | mulz.xyz vf03, vf03, vf11 nop | nop - ibne vi00, vi10, L54 | subw.w vf03, vf00, vf00 + ibne vi00, vi10, L53 | subw.w vf03, vf00, vf00 fcand vi01, 0x3ffff | nop ior vi10, vi03, vi00 | nop ior vi11, vi04, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x438 | nop -L83: +L82: fcand vi01, 0x3ffff | mulz.xyz vf04, vf04, vf12 - b L58 | subw.w vf04, vf00, vf00 + b L57 | subw.w vf04, vf00, vf00 lq.xyzw vf05, 2(vi03) | nop -L84: +L83: nop | subw.w vf00, vf11, vf00 ior vi14, vi03, vi00 | mul.xyzw vf18, vf05, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf06, vf17 isub vi03, vi06, vi02 | mul.xyzw vf20, vf07, vf17 fsand vi10, 0x2 | nop - ibeq vi00, vi10, L83 | add.xyzw vf03, vf03, vf15 + ibeq vi00, vi10, L82 | add.xyzw vf03, vf03, vf15 sq.xyzw vf02, 2(vi04) | addx.w vf09, vf00, vf00 nop | clipw.xyz vf18, vf18 move.xyzw vf18, vf05 | clipw.xyz vf19, vf19 @@ -1476,23 +1476,23 @@ L84: iand vi10, vi10, vi12 | addx.xyz vf26, vf11, vf00 iand vi10, vi10, vi13 | mulz.xyz vf04, vf04, vf12 nop | nop - ibne vi00, vi10, L58 | subw.w vf04, vf00, vf00 + ibne vi00, vi10, L57 | subw.w vf04, vf00, vf00 fcand vi01, 0x3ffff | nop ior vi10, vi04, vi00 | nop ior vi11, vi05, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x451 | nop -L85: +L84: fcand vi01, 0x3ffff | mulz.xyz vf01, vf01, vf09 - b L62 | subw.w vf01, vf00, vf00 + b L61 | subw.w vf01, vf00, vf00 lq.xyzw vf06, 2(vi04) | nop -L86: +L85: nop | subw.w vf00, vf12, vf00 ior vi14, vi04, vi00 | mul.xyzw vf18, vf06, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf07, vf17 isub vi04, vi03, vi08 | mul.xyzw vf20, vf08, vf17 fsand vi10, 0x2 | nop - ibeq vi00, vi10, L85 | add.xyzw vf04, vf04, vf15 + ibeq vi00, vi10, L84 | add.xyzw vf04, vf04, vf15 sq.xyzw vf03, 2(vi05) | addx.w vf10, vf00, vf00 nop | clipw.xyz vf18, vf18 move.xyzw vf18, vf06 | clipw.xyz vf19, vf19 @@ -1506,13 +1506,13 @@ L86: iand vi10, vi10, vi12 | addx.xyz vf26, vf12, vf00 iand vi10, vi10, vi13 | mulz.xyz vf01, vf01, vf09 nop | nop - ibne vi00, vi10, L62 | subw.w vf01, vf00, vf00 + ibne vi00, vi10, L61 | subw.w vf01, vf00, vf00 fcand vi01, 0x3ffff | nop ior vi10, vi05, vi00 | nop ior vi11, vi06, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x46a | nop -L87: +L86: nop | mul.xyzw vf18, vf07, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf08, vf17 nop | mul.xyzw vf20, vf05, vf17 @@ -1529,12 +1529,12 @@ L87: iand vi10, vi10, vi12 | addx.xyz vf26, vf09, vf00 iand vi10, vi10, vi13 | nop nop | nop - ibne vi00, vi10, L64 | nop + ibne vi00, vi10, L63 | nop ior vi10, vi06, vi00 | nop ior vi11, vi03, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x478 | nop -L88: +L87: nop | mul.xyzw vf18, vf08, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf05, vf17 nop | mul.xyzw vf20, vf06, vf17 @@ -1551,12 +1551,12 @@ L88: iand vi10, vi10, vi12 | addx.xyz vf26, vf10, vf00 iand vi10, vi10, vi13 | nop nop | nop - ibne vi00, vi10, L68 | nop + ibne vi00, vi10, L67 | nop ior vi10, vi03, vi00 | nop ior vi11, vi04, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x492 | nop -L89: +L88: nop | mul.xyzw vf18, vf05, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf06, vf17 nop | mul.xyzw vf20, vf07, vf17 @@ -1573,12 +1573,12 @@ L89: iand vi10, vi10, vi12 | addx.xyz vf26, vf11, vf00 iand vi10, vi10, vi13 | nop nop | nop - ibne vi00, vi10, L56 | nop + ibne vi00, vi10, L55 | nop ior vi10, vi04, vi00 | nop ior vi11, vi05, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x446 | nop -L90: +L89: nop | mul.xyzw vf18, vf06, vf17 iaddiu vi13, vi00, 0x3f | mul.xyzw vf19, vf07, vf17 nop | mul.xyzw vf20, vf08, vf17 @@ -1595,12 +1595,12 @@ L90: iand vi10, vi10, vi12 | addx.xyz vf26, vf12, vf00 iand vi10, vi10, vi13 | nop nop | nop - ibne vi00, vi10, L60 | nop + ibne vi00, vi10, L59 | nop ior vi10, vi05, vi00 | nop ior vi11, vi06, vi00 | nop - b L91 | nop + b L90 | nop iaddiu vi15, vi00, 0x45f | nop -L91: +L90: isw.x vi02, 44(vi09) | nop isw.y vi03, 44(vi09) | nop isw.z vi04, 44(vi09) | nop @@ -1616,10 +1616,10 @@ L91: ior vi04, vi11, vi00 | nop isub vi11, vi02, vi14 | nop ilw.x vi10, 1(vi14) | nop - iblez vi11, L92 | nop + iblez vi11, L91 | nop ilw.y vi11, 1(vi14) | nop ior vi02, vi01, vi00 | nop -L92: +L91: sq.xyzw vf18, 47(vi09) | nop sq.xyzw vf19, 48(vi09) | nop sq.xyzw vf20, 49(vi09) | nop @@ -1677,7 +1677,7 @@ L92: sq.xyzw vf28, 6(vi09) | nop bal vi15, L94 | nop nop | nop - ibeq vi00, vi02, L93 | nop + ibeq vi00, vi02, L92 | nop nop | nop iaddiu vi10, vi00, 0x3c6 | nop xgkick vi10 | nop @@ -1685,7 +1685,7 @@ L92: xgkick vi09 | nop nop | nop xgkick vi10 | nop -L93: +L92: lq.xyzw vf29, 43(vi09) | nop ilw.x vi15, 46(vi09) | nop ilw.x vi02, 44(vi09) | nop @@ -1708,6 +1708,7 @@ L93: mtir vi13, vf31.x | nop mtir vi14, vf31.y | nop mtir vi15, vf31.z | nop +L93: mtir vi01, vf23.y | nop mtir vi12, vf23.w | nop mr32.xyzw vf22, vf22 | nop diff --git a/tools/level_tools/BspHeader.h b/tools/level_tools/BspHeader.h index f21209ea0a..6943e15f36 100644 --- a/tools/level_tools/BspHeader.h +++ b/tools/level_tools/BspHeader.h @@ -42,8 +42,8 @@ struct PrintSettings { bool print_tfrag = true; bool expand_draw_node = true; bool expand_drawable_tree_tfrag = true; - bool expand_drawable_tree_trans_tfrag = false; - bool expand_drawable_tree_tie_proto = false; + bool expand_drawable_tree_trans_tfrag = true; + bool expand_drawable_tree_tie_proto = true; bool expand_drawable_tree_tie_proto_data = false; bool expand_drawable_tree_instance_tie = false; bool expand_drawable_tree_actor = false;