diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index 18966fafe9..00958be7c7 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -53,5 +53,3 @@ if(UNIX) elseif(WIN32) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2") endif() - -install(TARGETS common) diff --git a/common/custom_data/TFrag3Data.cpp b/common/custom_data/TFrag3Data.cpp index 6fd0794781..78fcfafa8b 100644 --- a/common/custom_data/TFrag3Data.cpp +++ b/common/custom_data/TFrag3Data.cpp @@ -3,7 +3,7 @@ namespace tfrag3 { -void Draw::serialize(Serializer& ser) { +void StripDraw::serialize(Serializer& ser) { ser.from_ptr(&mode); ser.from_ptr(&tree_tex_id); ser.from_pod_vector(&vertex_index_stream); @@ -11,7 +11,7 @@ void Draw::serialize(Serializer& ser) { ser.from_ptr(&num_triangles); } -void Tree::serialize(Serializer& ser) { +void TfragTree::serialize(Serializer& ser) { ser.from_ptr(&kind); if (ser.is_saving()) { @@ -24,14 +24,32 @@ void Tree::serialize(Serializer& ser) { } ser.from_pod_vector(&vertices); - ser.from_pod_vector(&color_indices_per_vertex); - ser.from_pod_vector(&vis_nodes); ser.from_pod_vector(&colors); + bvh.serialize(ser); +} + +void TieTree::serialize(Serializer& ser) { + if (ser.is_saving()) { + ser.save(static_draws.size()); + } else { + static_draws.resize(ser.load()); + } + for (auto& draw : static_draws) { + draw.serialize(ser); + } + + ser.from_pod_vector(&vertices); + ser.from_pod_vector(&colors); + bvh.serialize(ser); +} + +void BVH::serialize(Serializer& ser) { ser.from_ptr(&first_leaf_node); ser.from_ptr(&last_leaf_node); ser.from_ptr(&first_root); ser.from_ptr(&num_roots); ser.from_ptr(&only_children); + ser.from_pod_vector(&vis_nodes); } void Texture::serialize(Serializer& ser) { @@ -62,11 +80,20 @@ void Level::serialize(Serializer& ser) { } if (ser.is_saving()) { - ser.save(trees.size()); + ser.save(tfrag_trees.size()); } else { - trees.resize(ser.load()); + tfrag_trees.resize(ser.load()); } - for (auto& tree : trees) { + for (auto& tree : tfrag_trees) { + tree.serialize(ser); + } + + if (ser.is_saving()) { + ser.save(tie_trees.size()); + } else { + tie_trees.resize(ser.load()); + } + for (auto& tree : tie_trees) { tree.serialize(ser); } diff --git a/common/custom_data/Tfrag3Data.h b/common/custom_data/Tfrag3Data.h index c3e197a951..ceca48701b 100644 --- a/common/custom_data/Tfrag3Data.h +++ b/common/custom_data/Tfrag3Data.h @@ -10,7 +10,7 @@ namespace tfrag3 { -constexpr int TFRAG3_VERSION = 6; +constexpr int TFRAG3_VERSION = 7; // These vertices should be uploaded to the GPU at load time and don't change struct PreloadedVertex { @@ -18,71 +18,110 @@ struct PreloadedVertex { float x, y, z; // texture coordinates float s, t, q; - // currently unused, color table indices. + // color table index u16 color_index; u16 pad[3]; }; static_assert(sizeof(PreloadedVertex) == 32, "PreloadedVertex size"); -// Settings for an OpenGL draw -struct Draw { +// Settings for drawing a group of triangle strips. +// This refers to a group of PreloadedVertices that are already uploaded. +// All triangles here are drawn in the same "mode" (blending, texture, etc) +// The vertex index list is chunked by visibility group. +// You can just memcpy the entire list to draw everything, or iterate through visgroups and +// check visibility. +struct StripDraw { DrawMode mode; // the OpenGL draw settings. u32 tree_tex_id = 0; // the texture that should be bound for the draw - // the list of vertices in the draw. + // the list of vertices in the draw. This includes the restart code of UINT32_MAX that OpenGL + // will use to start a new strip. std::vector vertex_index_stream; // to do culling, the above vertex stream is grouped. - // by following the visgroups and checking the visibility of the tfrag_idx, you can leave out - // invisible vertices. + // by following the visgroups and checking the visibility, you can leave out invisible vertices. struct VisGroup { - u32 num = 0; - u32 tfrag_idx = 0; + u32 num = 0; // number of vertex indices in this group + u32 vis_idx = 0; // the visibility group they belong to }; std::vector vis_groups; - u32 num_triangles = 0; + // for debug counting. + u32 num_triangles = 0; void serialize(Serializer& ser); }; +// node in the BVH. struct VisNode { math::Vector bsphere; // the bounding sphere, in meters (4096 = 1 game meter). w = rad u16 child_id = 0xffff; // the ID of our first child. u8 num_kids = 0xff; // number of children. The children are consecutive in memory - u8 flags = 0; // flags. If 1, we have a DrawVisNode child, otherwise a Tfrag. + u8 flags = 0; // flags. If 1, we have a DrawVisNode child, otherwise a leaf. }; -enum class TFragmentTreeKind { NORMAL, TRANS, DIRT, ICE, LOWRES, LOWRES_TRANS, INVALID }; - -constexpr const char* tfrag_tree_names[] = {"normal", "trans", "dirt", "ice", - "lowres", "lowres-trans", "invalid"}; - -struct TimeOfDayColor { - math::Vector rgba[8]; -}; - -struct Tree { - TFragmentTreeKind kind; - std::vector draws; - std::vector color_indices_per_vertex; - std::vector vis_nodes; - std::vector vertices; - std::vector colors; +// The leaf nodes don't actually exist in the vector of VisNodes, but instead they are ID's used +// by the actual geometry. Currently we do not include the bspheres of these, but this might be +// worth it if we have a more performant culling algorithm. +struct BVH { + std::vector vis_nodes; // bvh for frustum culling + // additional information about the BVH u16 first_leaf_node = 0; u16 last_leaf_node = 0; u16 first_root = 0; u16 num_roots = 0; bool only_children = false; - void serialize(Serializer& ser); }; +// A time-of-day color. Each stores 8 colors. At a given "time of day", they are interpolated +// to find a single color which goes into a color palette. +struct TimeOfDayColor { + math::Vector rgba[8]; + + bool operator==(const TimeOfDayColor& other) const { + for (size_t i = 0; i < 8; i++) { + if (rgba[i] != other.rgba[i]) { + return false; + } + } + return true; + } +}; + +// A single texture. Stored as RGBA8888. struct Texture { u16 w, h; u32 combo_id = 0; std::vector data; std::string debug_name; std::string debug_tpage_name; + void serialize(Serializer& ser); +}; + +// Tfrag trees have several kinds: +enum class TFragmentTreeKind { NORMAL, TRANS, DIRT, ICE, LOWRES, LOWRES_TRANS, INVALID }; + +constexpr const char* tfrag_tree_names[] = {"normal", "trans", "dirt", "ice", + "lowres", "lowres-trans", "invalid"}; + +// A tfrag model +struct TfragTree { + TFragmentTreeKind kind; // our tfrag kind + std::vector draws; // the actual topology and settings + std::vector vertices; // mesh vertices + std::vector colors; // vertex colors (pre-interpolation) + BVH bvh; // the bvh for frustum culling + void serialize(Serializer& ser); +}; + +// A tie model +struct TieTree { + BVH bvh; + std::vector static_draws; // the actual topology and settings + std::vector vertices; // mesh vertices + std::vector colors; // vertex colors (pre-interpolation) + + // TODO wind stuff void serialize(Serializer& ser); }; @@ -91,7 +130,8 @@ struct Level { u16 version = TFRAG3_VERSION; std::string level_name; std::vector textures; - std::vector trees; + std::vector tfrag_trees; + std::vector tie_trees; u16 version2 = TFRAG3_VERSION; void serialize(Serializer& ser); }; diff --git a/common/dma/gs.h b/common/dma/gs.h index 8cc510deca..03a84136d5 100644 --- a/common/dma/gs.h +++ b/common/dma/gs.h @@ -349,7 +349,13 @@ struct AdGifData { // it can also represent "invalid". class DrawMode { public: - enum class AlphaBlend { DISABLED = 0, SRC_DST_SRC_DST = 1, SRC_0_SRC_DST = 2, SRC_0_FIX_DST = 3 }; + enum class AlphaBlend { + DISABLED = 0, + SRC_DST_SRC_DST = 1, + SRC_0_SRC_DST = 2, + SRC_0_FIX_DST = 3, // fix = 128 + SRC_DST_FIX_DST = 4 // fix = 64 + }; enum class AlphaTest { NEVER = 0, @@ -364,8 +370,8 @@ class DrawMode { GsTest::ZTest get_depth_test() const { return (GsTest::ZTest)((m_val >> 1) & 0b11); } void set_depth_test(GsTest::ZTest dt) { m_val = (m_val & ~(0b110)) | ((u32)(dt) << 1); } - AlphaBlend get_alpha_blend() const { return (AlphaBlend)((m_val >> 3) & 0b11); } - void set_alpha_blend(AlphaBlend ab) { m_val = (m_val & ~(0b11000)) | ((u32)(ab) << 3); } + AlphaBlend get_alpha_blend() const { return (AlphaBlend)((m_val >> 24) & 0b111); } + void set_alpha_blend(AlphaBlend ab) { m_val = (m_val & ~(0b111 << 24)) | ((u32)(ab) << 24); } u8 get_aref() const { return m_val >> 8; } void set_aref(u8 val) { m_val = (m_val & ~(0xff00)) | (val << 8); } @@ -457,6 +463,10 @@ class DrawMode { void enable_t_clamp() { m_val = m_val | (1 << 23); } void disable_t_clamp() { m_val = m_val & (~(1 << 23)); } + bool get_decal() const { return !(m_val & (1 << 28)); } + void enable_decal() { m_val = m_val & (~(1 << 28)); } + void disable_decal() { m_val = m_val | (1 << 28); } + u32& as_int() { return m_val; } bool operator==(const DrawMode& other) const { return m_val == other.m_val; } @@ -467,7 +477,8 @@ class DrawMode { private: // 0 - depth write enable // 1, 2 - test: never, always, gequal, greater - // 3, 4 - alpha: disable, [src,dst,src,dst], [src,0,src,dst], XX + // 3, 4 - free + // 5 - clamp enable // 6 - filt enable // 7 - tcc enable @@ -478,5 +489,7 @@ class DrawMode { // 20 - abe // 21, 22 - afail // 23 t clamp + // 24 - 27 alpha blend + // 28 !decal u32 m_val = UINT32_MAX; }; diff --git a/common/math/Vector.h b/common/math/Vector.h index f70483e8dd..97c74c01a2 100644 --- a/common/math/Vector.h +++ b/common/math/Vector.h @@ -68,6 +68,17 @@ class Vector { return sum; } + bool operator==(const Vector& other) const { + for (int i = 0; i < Size; i++) { + if (m_data[i] != other.m_data[i]) { + return false; + } + } + return true; + } + + bool operator!=(const Vector& other) const { return !((*this) == other); } + const T length() const { return std::sqrt(squared_length()); } Vector operator+(const Vector& other) const { @@ -213,8 +224,8 @@ struct Matrix { return result; } - const T& operator()(int r, int c) const { return m_data[c + r * Cols]; } - T& operator()(int r, int c) { return m_data[r + c * Rows]; } + // const T& operator()(int r, int c) const { return m_data[c + r * Cols]; } + // T& operator()(int r, int c) { return m_data[r + c * Rows]; } Vector col(int c) const { Vector result; diff --git a/common/util/FilteredValue.h b/common/util/FilteredValue.h new file mode 100644 index 0000000000..4b11e1af56 --- /dev/null +++ b/common/util/FilteredValue.h @@ -0,0 +1,17 @@ +#pragma once + +template +class Filtered { + public: + Filtered(const T& alpha = 0.9) : m_val(T(0)), m_alpha(alpha) {} + Filtered(const T& v, const T& alpha) : m_val(v), m_alpha(alpha) {} + const T& add(const T& v) { + m_val = (m_val * m_alpha) + (v * (T(1) - m_alpha)); + return m_val; + } + const T& get() const { return m_val; } + + private: + T m_val; + T m_alpha; +}; \ No newline at end of file diff --git a/common/util/SmallVector.h b/common/util/SmallVector.h index 82c73d8bfa..f3dffb4831 100644 --- a/common/util/SmallVector.h +++ b/common/util/SmallVector.h @@ -44,6 +44,14 @@ constexpr const T& min(const T& a, const T& b) { template class SmallVector { private: + template + constexpr U* launder(U* in) const { +#if __cpp_lib_launder >= 201606 + return std::launder(in); +#else + return in; +#endif + } // how much to increase the storage amount when we run out. static constexpr double GROW_AMOUNT = 1.5; @@ -53,10 +61,8 @@ class SmallVector { typename std::aligned_storage::type m_inline[inline_elt_count]; // get a T* at the beginning of our inline storage. - constexpr const T* inline_begin() const { - return std::launder(reinterpret_cast(m_inline)); - } - constexpr T* inline_begin() { return std::launder(reinterpret_cast(m_inline)); } + constexpr const T* inline_begin() const { return launder(reinterpret_cast(m_inline)); } + constexpr T* inline_begin() { return launder(reinterpret_cast(m_inline)); } // regardless of our storage mode, these hold the beginning and end of the storage. // by default, they are initialized to the inline storage. @@ -73,14 +79,14 @@ class SmallVector { * The objects in storage are uninitialized. */ void allocate_and_set_heap_storage(std::size_t elt_count) { - m_storage_begin = std::launder(reinterpret_cast(new uint8_t[elt_count * sizeof(T)])); + m_storage_begin = launder(reinterpret_cast(new uint8_t[elt_count * sizeof(T)])); m_storage_end = m_storage_begin + elt_count; } /*! * Free heap storage, without calling destructors of objects. */ - void free_heap_storage(T* ptr) { delete[] std::launder(reinterpret_cast(ptr)); } + void free_heap_storage(T* ptr) { delete[] launder(reinterpret_cast(ptr)); } /*! * Set the current storage to the inline memory. diff --git a/decompiler/CMakeLists.txt b/decompiler/CMakeLists.txt index ad791ba78d..1e81e8d938 100644 --- a/decompiler/CMakeLists.txt +++ b/decompiler/CMakeLists.txt @@ -57,6 +57,7 @@ add_library( level_extractor/extract_level.cpp level_extractor/extract_tfrag.cpp + level_extractor/extract_tie.cpp level_extractor/BspHeader.cpp ObjectFile/LinkedObjectFile.cpp @@ -97,5 +98,3 @@ target_link_libraries(decompiler common lzokay fmt) - -install(TARGETS decompiler) diff --git a/decompiler/config/all-types.gc b/decompiler/config/all-types.gc index 1ff4bdbf6b..0089dc9d5d 100644 --- a/decompiler/config/all-types.gc +++ b/decompiler/config/all-types.gc @@ -484,6 +484,8 @@ (tfrag-tex0 5) (tfrag-0 6) (tfrag-near-0 7) + (tie-near-0 8) + (tie-0 9) ;; merc0 10 ;; generic0 11 (bucket-10 10) @@ -492,6 +494,8 @@ (tfrag-tex1 12) (tfrag-1 13) (tfrag-near-1 14) + (tie-near-1 15) + (tie-1 16) ;; merc1 17 ;; generic1 18 (bucket-17 17) @@ -6763,7 +6767,7 @@ (quad uint128 :offset 0) (data uint64 :offset 0) (cmds uint64 :offset 8) - (cmd uint8 :offset 8) + (cmd gs-reg :offset 8) (x uint32 :offset 0) (y uint32 :offset 4) (z uint32 :offset 8) @@ -9526,6 +9530,7 @@ ) (declare-type drawable-inline-array-collide-fragment drawable-inline-array) +(declare-type prototype-tie drawable) (deftype prototype-bucket-tie (prototype-bucket) ((generic-count uint16 4 :offset-assert 88) (generic-next uint32 4 :offset-assert 96) @@ -9541,6 +9546,7 @@ (color-index-qwc uint32 :dynamic :offset-assert 148) (generic-next-clear uint128 :offset 96) (generic-count-clear uint128 :offset 80) + (geometry-override prototype-tie 4 :offset 16 :score 1) ) :method-count-assert 9 :size-assert #x94 @@ -9554,7 +9560,7 @@ :size-assert #x10 :flag-assert #xa00000010 (:methods - (TODO-RENAME-9 (_type_) none 9) + (login (_type_) none 9) ) ) @@ -12751,7 +12757,7 @@ ;; - Types (deftype tie-fragment (drawable) - ((gif-ref uint32 :offset 4) + ((gif-ref (inline-array adgif-shader) :offset 4) (point-ref uint32 :offset 8) (color-index uint16 :offset 12) (base-colors uint8 :offset 14) @@ -12763,9 +12769,9 @@ (num-dverts uint16 :offset-assert 42) (dp-ref uint32 :offset-assert 44) (dp-qwc uint32 :offset-assert 48) - (generic-ref uint32 :offset-assert 52) + (generic-ref uint32 :offset-assert 52) ;; L891 ish, just a pointer to data. (generic-count uint32 :offset-assert 56) - (debug-lines basic :offset-assert 60) + (debug-lines (array vector-array) :offset-assert 60) ) :method-count-assert 18 :size-assert #x40 @@ -12905,6 +12911,7 @@ :flag-assert #x900000134 ) +;; stored at spr + 16 (I think) (deftype prototype-tie-dma (structure) ((colora rgba 256 :offset-assert 0) (colorb rgba 256 :offset-assert 1024) @@ -16689,7 +16696,7 @@ (define-extern draw-drawable-tree-trans-tfrag (function drawable-tree-trans-tfrag none)) (define-extern draw-drawable-tree-dirt-tfrag (function drawable-tree-dirt-tfrag none)) (define-extern draw-drawable-tree-ice-tfrag (function drawable-tree-ice-tfrag none)) -(define-extern tie-near-make-perspective-matrix (function matrix none)) +(define-extern tie-near-make-perspective-matrix (function matrix matrix)) (define-extern draw-drawable-tree-instance-tie (function drawable-tree-instance-tie level none)) (define-extern init-background (function none)) (define-extern finish-background (function none)) @@ -16829,35 +16836,35 @@ ;; - Types -; (deftype tie-consts (structure) -; ((data UNKNOWN 24 :offset-assert 0) -; (vector UNKNOWN 6 :offset-assert 0) -; (quads UNKNOWN 6 :offset-assert 0) -; (adgif qword :inline :offset-assert 0) -; (strgif qword :inline :offset-assert 16) -; (extra qword :inline :offset-assert 32) -; (gifbufs qword :inline :offset-assert 48) -; (clrbufs qword :inline :offset-assert 64) -; (misc qword :inline :offset-assert 80) -; (atestgif qword :inline :offset-assert 96) -; (atest UNKNOWN 2 :offset-assert 112) -; (atest-tra ad-cmd :inline :offset-assert 112) -; (atest-def ad-cmd :inline :offset-assert 128) -; ) -; :method-count-assert 9 -; :size-assert #x90 -; :flag-assert #x900000090 -; ) +(deftype tie-consts (structure) + ((data uint32 24 :offset-assert 0) + (vector vector 6 :inline :offset 0) + (quads uint128 6 :offset 0) + (adgif gs-gif-tag :inline :offset 0) ;; was qword + (strgif gs-gif-tag :inline :offset 16) ;; was qword + (extra vector :inline :offset 32) ;; was qword + (gifbufs vector :inline :offset 48) ;; was qword + (clrbufs qword :inline :offset 64) + (misc qword :inline :offset 80) + (atestgif gs-gif-tag :inline :offset 96) + (atest ad-cmd 2 :inline :offset 112) + (atest-tra ad-cmd :inline :offset 112) + (atest-def ad-cmd :inline :offset 128) + ) + :method-count-assert 9 + :size-assert #x90 + :flag-assert #x900000090 + ) ;; - Functions -(define-extern tie-init-consts function) -(define-extern tie-float-reg function) -(define-extern tie-int-reg function) -(define-extern tie-init-engine function) -(define-extern tie-end-buffer function) -(define-extern tie-ints function) -(define-extern tie-floats function) +(define-extern tie-init-consts (function tie-consts int none)) +(define-extern tie-float-reg (function int string)) +(define-extern tie-int-reg (function int string)) +(define-extern tie-init-engine (function dma-buffer gs-test int none)) ;; probably first int is gs-test +(define-extern tie-end-buffer (function dma-buffer none)) +(define-extern tie-ints (function none)) +(define-extern tie-floats (function none)) ;; - Unknowns @@ -16872,32 +16879,32 @@ ;; - Types -; (deftype tie-near-consts (structure) -; ((extra qword :inline :offset-assert 0) -; (gifbufs qword :inline :offset-assert 16) -; (clrbufs qword :inline :offset-assert 32) -; (adgif qword :inline :offset-assert 48) -; (strgif qword :inline :offset-assert 64) -; (fangif qword :inline :offset-assert 80) -; (hvdfoffs vector :inline :offset-assert 96) -; (invhscale vector :inline :offset-assert 112) -; (guard vector :inline :offset-assert 128) -; (atest UNKNOWN 2 :offset-assert 144) -; (atest-tra ad-cmd :inline :offset-assert 144) -; (atest-def ad-cmd :inline :offset-assert 160) -; ) -; :method-count-assert 9 -; :size-assert #xb0 -; :flag-assert #x9000000b0 -; ) +(deftype tie-near-consts (structure) + ((extra qword :inline :offset-assert 0) + (gifbufs qword :inline :offset-assert 16) + (clrbufs qword :inline :offset-assert 32) + (adgif gs-gif-tag :inline :offset-assert 48) ;; was qword + (strgif gs-gif-tag :inline :offset-assert 64) ;; was qword + (fangif gs-gif-tag :inline :offset-assert 80) ;; was qword + (hvdfoffs vector :inline :offset-assert 96) + (invhscale vector :inline :offset-assert 112) + (guard vector :inline :offset-assert 128) + (atest ad-cmd 2 :inline :offset-assert 144) + (atest-tra ad-cmd :inline :offset 144) + (atest-def ad-cmd :inline :offset 160) + ) + :method-count-assert 9 + :size-assert #xb0 + :flag-assert #x9000000b0 + ) ;; - Functions -(define-extern tie-near-init-consts function) -(define-extern tie-near-init-engine function) -(define-extern tie-near-end-buffer function) -(define-extern tie-near-int-reg function) -(define-extern tie-near-float-reg function) +(define-extern tie-near-init-consts (function tie-near-consts int none)) +(define-extern tie-near-init-engine (function dma-buffer gs-test int none)) +(define-extern tie-near-end-buffer (function dma-buffer none)) +(define-extern tie-near-int-reg (function int string)) +(define-extern tie-near-float-reg (function int string)) ;; - Unknowns @@ -16935,19 +16942,19 @@ ;; - Functions (define-extern tie-init-buffers (function dma-buffer none)) -(define-extern tie-debug-between function) -(define-extern tie-debug-one function) -(define-extern walk-tie-generic-prototypes function) -(define-extern draw-inline-array-instance-tie function) -(define-extern draw-inline-array-prototype-tie-generic-asm function) -(define-extern draw-inline-array-prototype-tie-asm function) -(define-extern draw-inline-array-prototype-tie-near-asm function) -(define-extern tie-test-cam-restore function) +(define-extern tie-debug-between (function uint uint uint)) +(define-extern tie-debug-one (function uint uint uint)) +(define-extern walk-tie-generic-prototypes (function none)) +(define-extern draw-inline-array-instance-tie (function pointer drawable int dma-buffer none)) +(define-extern draw-inline-array-prototype-tie-generic-asm (function dma-buffer int prototype-array-tie none)) +(define-extern draw-inline-array-prototype-tie-asm (function dma-buffer int prototype-array-tie none)) +(define-extern draw-inline-array-prototype-tie-near-asm (function dma-buffer int prototype-array-tie none)) +(define-extern tie-test-cam-restore (function none)) ;; - Unknowns -;;(define-extern *tie* object) ;; unknown type -;;(define-extern *pke-hack* object) ;; unknown type +(define-extern *tie* tie-instance-debug) +(define-extern *pke-hack* vector) ;; ---------------------- diff --git a/decompiler/config/jak1_ntsc_black_label/hacks.jsonc b/decompiler/config/jak1_ntsc_black_label/hacks.jsonc index 874feff3b2..5fd310ea6b 100644 --- a/decompiler/config/jak1_ntsc_black_label/hacks.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/hacks.jsonc @@ -478,9 +478,9 @@ "draw-drawable-tree-trans-tfrag": [6, 8, 13, 15], "draw-drawable-tree-dirt-tfrag": [6, 8, 13, 15], "draw-drawable-tree-ice-tfrag": [6, 8, 13, 15], - - "birth-pickup-at-point": [0], + "draw-drawable-tree-instance-tie": [10, 12, 18, 20, 26, 28, 37, 39], + "birth-pickup-at-point": [0], "draw-bones": [0, 1, 2, 8, 81], "draw-bones-hud": [7, 8] }, diff --git a/decompiler/config/jak1_ntsc_black_label/label_types.jsonc b/decompiler/config/jak1_ntsc_black_label/label_types.jsonc index 518ad0e0ac..8d356bbd75 100644 --- a/decompiler/config/jak1_ntsc_black_label/label_types.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/label_types.jsonc @@ -1973,6 +1973,13 @@ ["L162", "vu-function"] ], + "tie": [ + ["L43", "vu-function"] + ], + + "tie-near": [ + ["L91", "vu-function"] + ], // please do not add things after this entry! git is dumb. "object-file-that-doesnt-actually-exist-and-i-just-put-this-here-to-prevent-merge-conflicts-with-this-file": [] } diff --git a/decompiler/config/jak1_ntsc_black_label/stack_structures.jsonc b/decompiler/config/jak1_ntsc_black_label/stack_structures.jsonc index 337a3e46a4..077ba65017 100644 --- a/decompiler/config/jak1_ntsc_black_label/stack_structures.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/stack_structures.jsonc @@ -5941,5 +5941,11 @@ [16, "vector"] ], + "tie-test-cam-restore": [ + [16, "vector"], + [32, "matrix"], + [96, "event-message-block"] + ], + "placeholder-do-not-add-below!": [] } diff --git a/decompiler/config/jak1_ntsc_black_label/type_casts.jsonc b/decompiler/config/jak1_ntsc_black_label/type_casts.jsonc index 90fc7785ac..7bf60d1199 100644 --- a/decompiler/config/jak1_ntsc_black_label/type_casts.jsonc +++ b/decompiler/config/jak1_ntsc_black_label/type_casts.jsonc @@ -7205,5 +7205,82 @@ [137, "v1", "float"] ], + "tie-init-engine": [ + [[14, 18], "a0", "dma-packet"], + [[24, 28], "a0", "gs-gif-tag"], + [31, "a0", "(pointer gs-test)"], + [33, "a0", "(pointer gs-reg64)"], + [[43, 51], "a0", "dma-packet"], + [[64, 69], "a0", "dma-packet"], + [[74, 78], "a0", "dma-packet"], + [[82, 89], "v1", "(inline-array vector4w)"], + [[89, 97], "v1", "(pointer vif-tag)"] + ], + + "tie-end-buffer": [ + [[6, 10], "a1", "dma-packet"], + [[16, 19], "a1", "gs-gif-tag"], + [24, "a1", "(pointer gs-test)"], + [26, "a1", "(pointer gs-reg64)"], + [[32, 36], "a1", "dma-packet"], + [[41, 52], "a0", "(pointer vif-tag)"] + ], + + "tie-ints": [ + [[3, 30], "gp", "(pointer uint32)"] + ], + + "tie-floats": [ + [[3, 73], "gp", "(pointer uint32)"] + ], + + "tie-init-buffers": [ + [[29, 32], "v1", "dma-packet"], + [[59, 62], "a0", "dma-packet"], + [65, "a0", "(pointer uint32)"], + [[96, 99], "v1", "dma-packet"], + [[126, 129], "a0", "dma-packet"], + [132, "a0", "(pointer uint32)"], + [[163, 166], "v1", "dma-packet"], + [[193, 196], "a0", "dma-packet"], + [199, "a0", "(pointer uint32)"], + [[230, 233], "v1", "dma-packet"], + [[260, 263], "a0", "dma-packet"], + [266, "a0", "(pointer uint32)"] + ], + + "draw-drawable-tree-instance-tie": [ + [[23, 36], "v1", "drawable-inline-array-node"], + [25, "a0", "drawable-inline-array-node"], + [61, "v1", "drawable-inline-array-instance-tie"], + [74, "v1", "drawable-inline-array-node"], + [84, "v1", "int"], + [86, "a0", "int"], + [66, "a1", "terrain-context"], + [[363, 366], "v1", "dma-packet"], + [[484, 487], "v1", "dma-packet"] + ], + + "(method 10 drawable-tree-instance-tie)": [ + [3, "a1", "terrain-context"] + ], + + "(method 14 drawable-tree-instance-tie)": [ + [[47, 62], "t1", "tie-fragment"], + [[102, 117], "t1", "tie-fragment"], + [[150, 165], "a1", "tie-fragment"] + ], + + "(method 11 drawable-inline-array-instance-tie)": [ + [[1, 6], "v1", "instance-tie"] + ], + + "(method 12 drawable-inline-array-instance-tie)": [ + [[1, 6], "v1", "instance-tie"] + ], + + "(method 13 drawable-inline-array-instance-tie)": [ + [[1, 6], "v1", "instance-tie"] + ], "placeholder-do-not-add-below": [] } diff --git a/decompiler/level_extractor/BspHeader.cpp b/decompiler/level_extractor/BspHeader.cpp index 62fe508f8d..f11c326e35 100644 --- a/decompiler/level_extractor/BspHeader.cpp +++ b/decompiler/level_extractor/BspHeader.cpp @@ -31,6 +31,19 @@ void Vector::read_from_file(Ref ref) { } } +void Matrix4h::read_from_file(Ref ref) { + if ((ref.byte_offset % 16) != 0) { + throw Error("misaligned Matrix4h"); + } + for (int i = 0; i < 8; i++) { + const auto& word = ref.data->words_by_seg.at(ref.seg).at((ref.byte_offset / 4) + i); + if (word.kind() != decompiler::LinkedWord::PLAIN_DATA) { + throw Error("Matrix4h didn't get plain data."); + } + memcpy(&data[i * 2], &word.data, 4); + } +} + std::string Vector::print(int indent) const { std::string is(indent, ' '); std::string result; @@ -426,6 +439,32 @@ void TieFragment::read_from_file(TypedRef ref, bsphere.read_from_file(get_field_ref(ref, "bsphere", dts)); num_tris = read_plain_data_field(ref, "num-tris", dts); num_dverts = read_plain_data_field(ref, "num-dverts", dts); + tex_count = read_plain_data_field(ref, "tex-count", dts); + gif_count = read_plain_data_field(ref, "gif-count", dts); + vertex_count = read_plain_data_field(ref, "vertex-count", dts); + + auto gif_data_ref = deref_label(get_field_ref(ref, "gif-ref", dts)); + + assert((tex_count % 5) == 0); + u32 total_gif_qw = tex_count + gif_count; + gif_data.resize(16 * total_gif_qw); + for (u32 i = 0; i < total_gif_qw * 4; i++) { + auto& word = + gif_data_ref.data->words_by_seg.at(gif_data_ref.seg).at((gif_data_ref.byte_offset / 4) + i); + assert(word.kind() == decompiler::LinkedWord::PLAIN_DATA); + memcpy(gif_data.data() + (i * 4), &word.data, 4); + } + + auto points_data_ref = deref_label(get_field_ref(ref, "point-ref", dts)); + point_ref.resize(16 * vertex_count); + debug_label_name = inspect_ref(get_field_ref(ref, "point-ref", dts)); + for (u32 i = 0; i < vertex_count * 4; i++) { + auto& word = points_data_ref.data->words_by_seg.at(points_data_ref.seg) + .at((points_data_ref.byte_offset / 4) + i); + assert(word.kind() == decompiler::LinkedWord::PLAIN_DATA); + memcpy(point_ref.data() + (i * 4), &word.data, 4); + } + stats->total_tie_prototype_tris += num_tris; } @@ -457,6 +496,12 @@ void InstanceTie::read_from_file(TypedRef ref, DrawStats* stats) { bsphere.read_from_file(get_field_ref(ref, "bsphere", dts)); bucket_index = read_plain_data_field(ref, "bucket-index", dts); + id = read_plain_data_field(ref, "id", dts); + flags = read_plain_data_field(ref, "flags", dts); + // assert(flags == 0); // TODO + origin.read_from_file(get_field_ref(ref, "origin", dts)); + wind_index = read_plain_data_field(ref, "wind-index", dts); + color_indices = deref_label(get_field_ref(ref, "color-indices", dts)); stats->total_tie_instances++; } @@ -596,9 +641,9 @@ std::string DrawableInlineArrayTFrag::my_type() const { return "drawable-inline-array-tfrag"; } -void DrawableInlineArrayTie::read_from_file(TypedRef ref, - const decompiler::DecompilerTypeSystem& dts, - DrawStats* stats) { +void DrawableInlineArrayInstanceTie::read_from_file(TypedRef ref, + const decompiler::DecompilerTypeSystem& dts, + DrawStats* stats) { id = read_plain_data_field(ref, "id", dts); length = read_plain_data_field(ref, "length", dts); bsphere.read_from_file(get_field_ref(ref, "bsphere", dts)); @@ -616,7 +661,7 @@ void DrawableInlineArrayTie::read_from_file(TypedRef ref, } } -std::string DrawableInlineArrayTie::print(const PrintSettings& settings, int indent) const { +std::string DrawableInlineArrayInstanceTie::print(const PrintSettings& settings, int indent) const { std::string is(indent, ' '); std::string result; int next_indent = indent + 4; @@ -634,7 +679,7 @@ std::string DrawableInlineArrayTie::print(const PrintSettings& settings, int ind return result; } -std::string DrawableInlineArrayTie::my_type() const { +std::string DrawableInlineArrayInstanceTie::my_type() const { return "drawable-inline-array-instance-tie"; } @@ -703,7 +748,7 @@ std::unique_ptr make_drawable_inline_array( } if (ref.type->get_name() == "drawable-inline-array-instance-tie") { - auto result = std::make_unique(); + auto result = std::make_unique(); result->read_from_file(ref, dts, stats); return result; } @@ -824,11 +869,13 @@ void PrototypeBucketTie::read_from_file(TypedRef ref, DrawStats* stats) { name = read_string_field(ref, "name", dts, true); flags = read_plain_data_field(ref, "flags", dts); + assert(flags == 0 || flags == 2); in_level = read_plain_data_field(ref, "in-level", dts); utextures = read_plain_data_field(ref, "utextures", dts); // todo drawables dists.read_from_file(get_field_ref(ref, "dists", dts)); rdists.read_from_file(get_field_ref(ref, "rdists", dts)); + stiffness = read_plain_data_field(ref, "stiffness", dts); auto next_slot = get_field_ref(ref, "next", dts); for (int i = 0; i < 4; i++) { @@ -882,6 +929,37 @@ void PrototypeBucketTie::read_from_file(TypedRef ref, for (auto x : generic_next) { assert(x == 0); } + + // get the color count data + { + u32 num_color_qwcs = 0; + for (int i = 0; i < 4; i++) { + u32 start = index_start[i]; + u32 end = start + frag_count[i]; + // fmt::print("i = {}: {} -> {}\n", i, start, end); + assert(num_color_qwcs <= end); + num_color_qwcs = std::max(end, num_color_qwcs); + } + + auto data_array = get_field_ref(ref, "color-index-qwc", dts); + for (u32 i = 0; i < num_color_qwcs; i++) { + int byte_offset = data_array.byte_offset + i; + auto word = data_array.data->words_by_seg.at(data_array.seg).at(byte_offset / 4); + color_index_qwc.push_back(word.get_byte(byte_offset % 4)); + } + } + + // get the colors + auto palette = deref_label(get_field_ref(ref, "tie-colors", dts)); + time_of_day.width = deref_u32(palette, 0); + + assert(time_of_day.width == 8); + time_of_day.height = deref_u32(palette, 1); + time_of_day.pad = deref_u32(palette, 2); + assert(time_of_day.pad == 0); + for (int i = 0; i < int(8 * time_of_day.height); i++) { + time_of_day.colors.push_back(deref_u32(palette, 3 + i)); + } } std::string PrototypeBucketTie::print(const PrintSettings& settings, int indent) const { diff --git a/decompiler/level_extractor/BspHeader.h b/decompiler/level_extractor/BspHeader.h index 742b90c460..326c3535d2 100644 --- a/decompiler/level_extractor/BspHeader.h +++ b/decompiler/level_extractor/BspHeader.h @@ -23,6 +23,11 @@ struct Vector { std::string print_meters(int indent = 0) const; }; +struct Matrix4h { + u16 data[16]; + void read_from_file(Ref ref); +}; + struct FileInfo { std::string file_type; std::string file_name; @@ -156,6 +161,15 @@ struct TieFragment : public Drawable { u16 num_tris; u16 num_dverts; + u16 tex_count; + u16 gif_count; + u16 vertex_count; // qwc of vertex data. + + std::vector gif_data; + std::vector point_ref; + + std::string debug_label_name; + // todo, lots more }; @@ -168,7 +182,13 @@ struct InstanceTie : public Drawable { // (bucket-index uint16 :offset 6) u16 bucket_index; + s16 id; Vector bsphere; + Matrix4h origin; + u16 flags; + u16 wind_index; + + Ref color_indices; // can't read this in the first pass because we don't know how long. // todo, lots more }; @@ -201,7 +221,7 @@ struct DrawableInlineArrayTFrag : public DrawableInlineArray { std::string my_type() const override; }; -struct DrawableInlineArrayTie : public DrawableInlineArray { +struct DrawableInlineArrayInstanceTie : public DrawableInlineArray { s16 id; s16 length; Vector bsphere; @@ -295,13 +315,19 @@ struct PrototypeBucketTie { u16 generic_count[4]; u32 generic_next[4]; - u8 frag_count[4]; + u8 frag_count[4] = {0}; u8 index_start[4]; u16 base_qw[4]; float envmap_rfade; float envmap_fade_far; + float stiffness; + + std::vector color_index_qwc; + + TimeOfDayPalette time_of_day; + // todo envmap shader // todo collide-frag // todo tie-colors diff --git a/decompiler/level_extractor/extract_level.cpp b/decompiler/level_extractor/extract_level.cpp index 2331c63f6b..72b519d6c3 100644 --- a/decompiler/level_extractor/extract_level.cpp +++ b/decompiler/level_extractor/extract_level.cpp @@ -3,6 +3,7 @@ #include "extract_level.h" #include "decompiler/level_extractor/BspHeader.h" #include "decompiler/level_extractor/extract_tfrag.h" +#include "decompiler/level_extractor/extract_tie.h" #include "common/util/FileUtil.h" namespace decompiler { @@ -91,10 +92,14 @@ void extract_from_level(ObjectFileDB& db, } extract_tfrag(as_tfrag_tree, fmt::format("{}-{}", dgo_name, i++), bsp_header.texture_remap_table, tex_db, expected_missing_textures, tfrag_level); + } else if (draw_tree->my_type() == "drawable-tree-instance-tie") { + fmt::print(" extracting TIE\n"); + auto as_tie_tree = dynamic_cast(draw_tree.get()); + assert(as_tie_tree); + extract_tie(as_tie_tree, fmt::format("{}-{}-tie", dgo_name, i++), + bsp_header.texture_remap_table, tex_db, tfrag_level); } else { fmt::print(" unsupported tree {}\n", draw_tree->my_type()); - tfrag_level.trees.emplace_back(); - tfrag_level.trees.back().kind = tfrag3::TFragmentTreeKind::INVALID; } } diff --git a/decompiler/level_extractor/extract_tfrag.cpp b/decompiler/level_extractor/extract_tfrag.cpp index ef2085c12d..351028cedb 100644 --- a/decompiler/level_extractor/extract_tfrag.cpp +++ b/decompiler/level_extractor/extract_tfrag.cpp @@ -1976,7 +1976,7 @@ std::map> make_draw_groups(std::vector& } void make_tfrag3_data(std::map>& draws, - tfrag3::Tree& tree_out, + tfrag3::TfragTree& tree_out, std::vector& texture_pool, const TextureDB& tdb, const std::vector>& expected_missing_textures) { @@ -2030,13 +2030,13 @@ void make_tfrag3_data(std::map>& draws, // now, add draws for (auto& draw : draw_list) { - tfrag3::Draw tdraw; + tfrag3::StripDraw tdraw; tdraw.mode = draw.mode; tdraw.tree_tex_id = tfrag3_tex_id; for (auto& strip : draw.strips) { - tfrag3::Draw::VisGroup vgroup; - vgroup.tfrag_idx = strip.tfrag_id; // associate with the tfrag for culling + tfrag3::StripDraw::VisGroup vgroup; + vgroup.vis_idx = strip.tfrag_id; // associate with the tfrag for culling vgroup.num = strip.verts.size() + 1; // one for the primitive restart! tdraw.num_triangles += strip.verts.size() - 2; @@ -2049,7 +2049,7 @@ void make_tfrag3_data(std::map>& draws, vtx.s = vert.stq.x(); vtx.t = vert.stq.y(); vtx.q = vert.stq.z(); - vtx.color_index = vert.rgba; + vtx.color_index = vert.rgba / 4; // assert((vert.rgba >> 2) < 1024); spider cave has 2048? assert((vert.rgba & 3) == 0); @@ -2071,7 +2071,7 @@ void emulate_tfrags(const std::vector& frags, const std::string& debug_name, const std::vector& map, tfrag3::Level& level_out, - tfrag3::Tree& tree_out, + tfrag3::TfragTree& tree_out, const TextureDB& tdb, const std::vector>& expected_missing_textures) { TFragExtractStats stats; @@ -2099,7 +2099,7 @@ void emulate_tfrags(const std::vector& frags, file_util::get_file_path({"debug_out", fmt::format("tfrag-{}.obj", debug_name)}), debug_out); } -void extract_time_of_day(const level_tools::DrawableTreeTfrag* tree, tfrag3::Tree& out) { +void extract_time_of_day(const level_tools::DrawableTreeTfrag* tree, tfrag3::TfragTree& out) { out.colors.resize(tree->time_of_day.height); for (int i = 0; i < (int)tree->time_of_day.height; i++) { for (int j = 0; j < 8; j++) { @@ -2116,7 +2116,7 @@ void extract_tfrag(const level_tools::DrawableTreeTfrag* tree, const TextureDB& tex_db, const std::vector>& expected_missing_textures, tfrag3::Level& out) { - tfrag3::Tree this_tree; + tfrag3::TfragTree this_tree; if (tree->my_type() == "drawable-tree-tfrag") { this_tree.kind = tfrag3::TFragmentTreeKind::NORMAL; } else if (tree->my_type() == "drawable-tree-dirt-tfrag") { @@ -2151,17 +2151,17 @@ void extract_tfrag(const level_tools::DrawableTreeTfrag* tree, fmt::print(" tree has {} arrays and {} tfragments\n", tree->length, as_tfrag_array->length); auto vis_nodes = extract_vis_data(tree, as_tfrag_array->tfragments.front().id); - this_tree.first_leaf_node = vis_nodes.first_child_node; - this_tree.last_leaf_node = vis_nodes.last_child_node; - this_tree.num_roots = vis_nodes.num_roots; - this_tree.only_children = vis_nodes.only_children; - this_tree.first_root = vis_nodes.first_root; - this_tree.vis_nodes = std::move(vis_nodes.vis_nodes); + this_tree.bvh.first_leaf_node = vis_nodes.first_child_node; + this_tree.bvh.last_leaf_node = vis_nodes.last_child_node; + this_tree.bvh.num_roots = vis_nodes.num_roots; + this_tree.bvh.only_children = vis_nodes.only_children; + this_tree.bvh.first_root = vis_nodes.first_root; + this_tree.bvh.vis_nodes = std::move(vis_nodes.vis_nodes); std::unordered_map tfrag_parents; // for (auto& node : this_tree.vis_nodes) { - for (size_t node_idx = 0; node_idx < this_tree.vis_nodes.size(); node_idx++) { - const auto& node = this_tree.vis_nodes[node_idx]; + for (size_t node_idx = 0; node_idx < this_tree.bvh.vis_nodes.size(); node_idx++) { + const auto& node = this_tree.bvh.vis_nodes[node_idx]; if (node.flags == 0) { for (int i = 0; i < node.num_kids; i++) { tfrag_parents[node.child_id + i] = node_idx; @@ -2176,14 +2176,14 @@ void extract_tfrag(const level_tools::DrawableTreeTfrag* tree, for (auto& draw : this_tree.draws) { for (auto& str : draw.vis_groups) { - auto it = tfrag_parents.find(str.tfrag_idx); + auto it = tfrag_parents.find(str.vis_idx); if (it == tfrag_parents.end()) { - str.tfrag_idx = UINT32_MAX; + str.vis_idx = UINT32_MAX; } else { - str.tfrag_idx = it->second; + str.vis_idx = it->second; } } } - out.trees.push_back(this_tree); + out.tfrag_trees.push_back(this_tree); } } // namespace decompiler \ No newline at end of file diff --git a/decompiler/level_extractor/extract_tie.cpp b/decompiler/level_extractor/extract_tie.cpp new file mode 100644 index 0000000000..b1994d0926 --- /dev/null +++ b/decompiler/level_extractor/extract_tie.cpp @@ -0,0 +1,2068 @@ +#include + +#include "extract_tie.h" + +#include "decompiler/ObjectFile/LinkedObjectFile.h" + +#include "common/util/FileUtil.h" + +namespace decompiler { + +/*! + * Get the index of the first draw node in an array. Works for node or tfrag. + */ +u16 get_first_idx(const level_tools::DrawableInlineArray* array) { + auto as_tie_instances = dynamic_cast(array); + auto as_nodes = dynamic_cast(array); + if (as_tie_instances) { + return as_tie_instances->instances.at(0).id; + } else if (as_nodes) { + return as_nodes->draw_nodes.at(0).id; + } else { + assert(false); + } +} + +/*! + * Verify node indices follow the patterns we expect. Takes start as the expected first, + * writes the end. + */ +bool verify_node_indices_from_array(const level_tools::DrawableInlineArray* array, + u16 start, + u16* end) { + auto as_tie_instances = dynamic_cast(array); + auto as_nodes = dynamic_cast(array); + + if (as_tie_instances) { + for (auto& elt : as_tie_instances->instances) { + if (elt.id != start) { + fmt::print("bad inst: exp {} got {}\n", start, elt.id); + return false; + } + start++; + } + *end = start; + return true; + } else if (as_nodes) { + for (auto& elt : as_nodes->draw_nodes) { + if (elt.id != start) { + fmt::print("bad node: exp {} got {}\n", start, elt.id); + return false; + } + start++; + } + *end = start; + return true; + } else { + fmt::print("bad node array type: {}\n", array->my_type()); + return false; + } +} + +/*! + * Verify all node indices in a tree. + */ +bool verify_node_indices(const level_tools::DrawableTreeInstanceTie* tree) { + u16 start = get_first_idx(tree->arrays.at(0).get()); + for (auto& array : tree->arrays) { + if (!verify_node_indices_from_array(array.get(), start, &start)) { + return false; + } + start = (start + 31) & ~(31); + } + return true; +} + +/*! + * Extract the visibility tree. + * This does not insert nodes for the bottom level. + */ +void extract_vis_data(const level_tools::DrawableTreeInstanceTie* tree, + u16 first_child, + tfrag3::TieTree& out) { + out.bvh.first_leaf_node = first_child; + out.bvh.last_leaf_node = first_child; + + if (tree->arrays.size() == 0) { + // shouldn't hit this? + } else if (tree->arrays.size() == 1) { + auto array = + dynamic_cast(tree->arrays.at(0).get()); + assert(array); + out.bvh.first_root = array->instances.at(0).id; + out.bvh.num_roots = array->instances.size(); + out.bvh.only_children = true; + } else { + auto array = + dynamic_cast(tree->arrays.at(0).get()); + assert(array); + out.bvh.first_root = array->draw_nodes.at(0).id; + out.bvh.num_roots = array->draw_nodes.size(); + out.bvh.only_children = false; + } + + out.bvh.vis_nodes.resize(first_child - out.bvh.first_root); + + // may run 0 times, if there are only children. + for (int i = 0; i < ((int)tree->arrays.size()) - 1; i++) { + bool expecting_leaves = i == ((int)tree->arrays.size()) - 2; + + auto array = + dynamic_cast(tree->arrays.at(i).get()); + assert(array); + u16 idx = first_child; + for (auto& elt : array->draw_nodes) { + auto& vis = out.bvh.vis_nodes.at(elt.id - out.bvh.first_root); + assert(vis.num_kids == 0xff); + for (int j = 0; j < 4; j++) { + vis.bsphere[j] = elt.bsphere.data[j]; + } + vis.num_kids = elt.child_count; + vis.flags = elt.flags; + assert(vis.flags == expecting_leaves ? 0 : 1); + assert(vis.num_kids > 0); + assert(vis.num_kids <= 8); + assert(elt.children.size() == vis.num_kids); + if (expecting_leaves) { + for (int leaf = 0; leaf < (int)vis.num_kids; leaf++) { + auto l = dynamic_cast(elt.children.at(leaf).get()); + assert(l); + + assert(idx == l->id); + + assert(l->id >= out.bvh.first_leaf_node); + if (leaf == 0) { + vis.child_id = l->id; + } + out.bvh.last_leaf_node = std::max((u16)l->id, out.bvh.last_leaf_node); + idx++; + } + + } else { + u16 arr_idx = 0; + for (int child = 0; child < (int)vis.num_kids; child++) { + auto l = dynamic_cast(elt.children.at(child).get()); + assert(l); + if (child == 0) { + arr_idx = l->id; + } else { + assert(arr_idx < l->id); + arr_idx = l->id; + } + if (child == 0) { + vis.child_id = l->id; + } + + assert(l->id < out.bvh.first_leaf_node); + } + } + } + } +} + +struct TieInstanceFragInfo { + // the color index table uploaded to VU. + // this contains indices into the shared palette. + std::vector color_indices; + + u16 color_index_offset_in_big_palette = -1; + + math::Vector lq_colors_ui(u32 qw) const { + // note: this includes the unpack + assert(qw >= 204); + qw -= 204; + qw *= 4; + assert(qw + 4 <= color_indices.size()); + math::Vector result; + for (int i = 0; i < 4; i++) { + result[i] = color_indices.at(qw + i); + } + return result; + } +}; + +struct TieInstanceInfo { + // The index of the prototype (the geometry) that is used by this instance + // note: we're going to trust that this lines up with bucket. + // if this assumption is wrong, we'll be drawing with the wrong model and it will be super + // obvious. + u16 prototype_idx = 0; + + // our bsphere's index in the BVH tree + u16 vis_id = 0; + + // not totally sure if we'll use this (currently unused in tfrag, but probably worth if we + // actually cull using the tree) + math::Vector4f bsphere; + + std::array mat; + + u16 wind_index = 0; + float unknown_wind_related_value = 0.f; // w of the first mat vec. + + std::vector frags; // per-instance per-fragment info +}; + +struct AdgifInfo { + u32 first_w; + u32 second_w; + u32 third_w; + u32 combo_tex; + u64 alpha_val; + u64 clamp_val; +}; + +struct StrGifInfo { + u16 address; + u16 nloop; + u16 mode; // not yet fully understood, but can allow the use of other templates. + bool eop; +}; + +struct TieProtoVertex { + math::Vector pos; + math::Vector tex; + + // NOTE: this is a double lookup. + // first you look up the index in the _instance_ color table + // then you look up the color in the _proto_'s interpolated color palette. + u32 color_index_index; +}; + +struct TieStrip { + AdgifInfo adgif; + std::vector verts; +}; + +struct TieFrag { + bool has_magic_tex0_bit = false; + std::vector adgifs; + + std::vector other_gif_data; + std::vector points_data; + std::vector point_sizes; + + u32 expected_dverts = 0; + + std::vector strips; + + // this contains vertices, key is the start of the actual xyzf/st/rgbaq data for it. + std::unordered_map vertex_by_dest_addr; + + math::Vector lq_points(u32 qw) const { + assert(qw >= 50); + qw -= 50; + assert((qw * 16) + 16 <= points_data.size()); + math::Vector result; + memcpy(result.data(), points_data.data() + (qw * 16), 16); + return result; + } + + math::Vector lq_points_allow_past_end(u32 qw) const { + assert(qw >= 50); + qw -= 50; + if ((qw * 16) + 16 <= points_data.size()) { + math::Vector result; + memcpy(result.data(), points_data.data() + (qw * 16), 16); + return result; + } else { + return math::Vector4f(-1, -1, -1, -1); + } + } + + void sq_points(u32 qw, const math::Vector4f& data) { + assert(qw >= 50); + qw -= 50; + assert((qw * 16) + 16 <= points_data.size()); + memcpy(points_data.data() + (qw * 16), data.data(), 16); + } + + u16 ilw_other_gif(u32 qw, u32 offset) const { + // unpacked with v8. + int qwi = qw; + qwi -= (adgifs.size() * 5); + assert(qwi >= 0); + return other_gif_data.at(qwi * 4 + offset); + } + + struct ProgramInfo { + std::vector adgif_offset_in_gif_buf_qw; + std::vector str_gifs; + u16 skip_bp2 = 0; + u16 skip_ips = 0; + u16 tgt_bp1_ptr = 0; + u16 tgt_bp2_ptr = 0; + u16 tgt_ip1_ptr = 0; + u16 tgt_ip2_ptr = 0; + u16 kick_addr = 0; + u16 clr_ptr = 0; + u16 point_ptr = 0; + u16 misc_x = 0; // at 971's x. + math::Vector4f gifbufs; + math::Vector4f extra; + } prog_info; +}; + +struct TieProtoInfo { + std::string name; + std::vector instances; + bool uses_generic = false; + float stiffness = 0; + u32 generic_flag; + std::vector time_of_day_colors; + std::vector frags; +}; + +std::array extract_tie_matrix(const u16* data) { + std::array result; + for (int i = 0; i < 4; i++) { + s32 x = data[12 + i]; + x <<= 16; + x >>= 10; + result[3][i] = x; + } + + for (int vec = 0; vec < 3; vec++) { + for (int i = 0; i < 4; i++) { + s32 x = data[vec * 4 + i]; + x <<= 16; + x >>= 16; + result[vec][i] = (float)x / 4096.f; + } + } + + return result; +} + +constexpr int GEOM_IDX = 1; // todo 0 or 1?? + +std::vector collect_instance_info( + const level_tools::DrawableInlineArrayInstanceTie* instances, + const std::vector* protos) { + std::vector result; + for (auto& instance : instances->instances) { + TieInstanceInfo info; + info.prototype_idx = instance.bucket_index; + info.vis_id = instance.id; + for (int i = 0; i < 4; i++) { + info.bsphere[i] = instance.bsphere.data[i]; + } + info.mat = extract_tie_matrix(instance.origin.data); + info.mat[3][0] += info.bsphere[0]; + info.mat[3][1] += info.bsphere[1]; + info.mat[3][2] += info.bsphere[2]; + info.wind_index = instance.wind_index; + // there's a value stashed here that we can get rid of + // it is related to wind. + info.unknown_wind_related_value = info.mat[0][3]; + info.mat[0][3] = 0.f; + + // each fragment has its own color data (3 dmatags) + + // the number of colors (qwc) is stored in the prototype, in the color-index-qwc array of bytes. + // at an offset of index-start[geom] + frag_idx. + + // the actual data is located at the instance's color-indices + (proto.base-qw[geom] * 16) + + // and this is only the indices.... there's yet another lookup on the VU + auto& proto = protos->at(info.prototype_idx); + u32 offset_bytes = proto.base_qw[GEOM_IDX] * 16; + for (int frag_idx = 0; frag_idx < proto.frag_count[GEOM_IDX]; frag_idx++) { + TieInstanceFragInfo frag_info; + u32 num_color_qwc = proto.color_index_qwc.at(proto.index_start[GEOM_IDX] + frag_idx); + for (u32 i = 0; i < num_color_qwc * 4; i++) { + for (u32 j = 0; j < 4; j++) { + frag_info.color_indices.push_back( + instance.color_indices.data->words_by_seg.at(instance.color_indices.seg) + .at(((offset_bytes + instance.color_indices.byte_offset) / 4) + i) + .get_byte(j)); + } + } + info.frags.push_back(std::move(frag_info)); + assert(info.frags.back().color_indices.size() > 0); + offset_bytes += num_color_qwc * 16; + } + + if (result.size() <= info.prototype_idx) { + result.resize(info.prototype_idx + 1); + } + result[info.prototype_idx].instances.push_back(info); + } + + return result; +} + +u32 remap_texture(u32 original, const std::vector& map) { + auto masked = original & 0xffffff00; + for (auto& t : map) { + if (t.original_texid == masked) { + fmt::print("OKAY! remapped!\n"); + assert(false); + return t.new_texid | 20; + } + } + return original; +} + +void update_proto_info(std::vector* out, + const std::vector& map, + const TextureDB& tdb, + const std::vector& protos) { + out->resize(std::max(out->size(), protos.size())); + for (size_t i = 0; i < protos.size(); i++) { + const auto& proto = protos[i]; + auto& info = out->at(i); + assert(proto.flags == 0 || proto.flags == 2); + info.uses_generic = (proto.flags == 2); + info.name = proto.name; + info.stiffness = proto.stiffness; + info.generic_flag = proto.flags & 2; + + info.time_of_day_colors.resize(proto.time_of_day.height); + for (int k = 0; k < (int)proto.time_of_day.height; k++) { + for (int j = 0; j < 8; j++) { + memcpy(info.time_of_day_colors[k].rgba[j].data(), &proto.time_of_day.colors[k * 8 + j], 4); + } + } + + for (int frag_idx = 0; frag_idx < proto.frag_count[GEOM_IDX]; frag_idx++) { + TieFrag frag_info; + for (int tex_idx = 0; + tex_idx < proto.geometry[GEOM_IDX].tie_fragments.at(frag_idx).tex_count / 5; tex_idx++) { + AdgifInfo adgif; + auto& gif_data = proto.geometry[GEOM_IDX].tie_fragments[frag_idx].gif_data; + u8 ra_tex0 = gif_data.at(16 * (tex_idx * 5 + 0) + 8); + u64 ra_tex0_val; + memcpy(&ra_tex0_val, &gif_data.at(16 * (tex_idx * 5 + 0)), 8); + assert(ra_tex0 == (u8)GsRegisterAddress::TEX0_1); + assert(ra_tex0_val == 0 || ra_tex0_val == 0x800000000); // note: decal + frag_info.has_magic_tex0_bit = ra_tex0_val == 0x800000000; + memcpy(&adgif.first_w, &gif_data.at(16 * (tex_idx * 5 + 0) + 12), 4); + + u8 ra_tex1 = gif_data.at(16 * (tex_idx * 5 + 1) + 8); + u64 ra_tex1_val; + memcpy(&ra_tex1_val, &gif_data.at(16 * (tex_idx * 5 + 1)), 8); + assert(ra_tex1 == (u8)GsRegisterAddress::TEX1_1); + assert(ra_tex1_val == 0x120); // some flag + u32 original_tex; + memcpy(&original_tex, &gif_data.at(16 * (tex_idx * 5 + 1) + 8), 4); + u32 new_tex = remap_texture(original_tex, map); + if (original_tex != new_tex) { + fmt::print("map from 0x{:x} to 0x{:x}\n", original_tex, new_tex); + } + u32 tpage = new_tex >> 20; + u32 tidx = (new_tex >> 8) & 0b1111'1111'1111; + u32 tex_combo = (((u32)tpage) << 16) | tidx; + auto tex = tdb.textures.find(tex_combo); + assert(tex != tdb.textures.end()); + adgif.combo_tex = tex_combo; + memcpy(&adgif.second_w, &gif_data.at(16 * (tex_idx * 5 + 1) + 12), 4); + + if (ra_tex0_val == 0x800000000) { + fmt::print("texture {} in {} has weird tex setting\n", tex->second.name, proto.name); + } + + u8 ra_mip = gif_data.at(16 * (tex_idx * 5 + 2) + 8); + assert(ra_mip == (u8)GsRegisterAddress::MIPTBP1_1); + memcpy(&adgif.third_w, &gif_data.at(16 * (tex_idx * 5 + 2) + 12), 4); + + // who cares about the value + + u8 ra_clamp = gif_data.at(16 * (tex_idx * 5 + 3) + 8); + assert(ra_clamp == (u8)GsRegisterAddress::CLAMP_1); + u64 clamp; + memcpy(&clamp, &gif_data.at(16 * (tex_idx * 5 + 3)), 8); + adgif.clamp_val = clamp; + + u8 ra_alpha = gif_data.at(16 * (tex_idx * 5 + 4) + 8); + assert(ra_alpha == (u8)GsRegisterAddress::ALPHA_1); + u64 alpha; + memcpy(&alpha, &gif_data.at(16 * (tex_idx * 5 + 4)), 8); + adgif.alpha_val = alpha; + frag_info.adgifs.push_back(adgif); + } + frag_info.expected_dverts = proto.geometry[GEOM_IDX].tie_fragments[frag_idx].num_dverts; + int tex_qwc = proto.geometry[GEOM_IDX].tie_fragments.at(frag_idx).tex_count; + int other_qwc = proto.geometry[GEOM_IDX].tie_fragments.at(frag_idx).gif_count; + frag_info.other_gif_data.resize(16 * other_qwc); + memcpy(frag_info.other_gif_data.data(), + proto.geometry[GEOM_IDX].tie_fragments[frag_idx].gif_data.data() + (16 * tex_qwc), + 16 * other_qwc); + + const auto& pr = proto.geometry[GEOM_IDX].tie_fragments[frag_idx].point_ref; + int in_qw = pr.size() / 16; + int out_qw = in_qw * 2; + frag_info.points_data.resize(out_qw * 16); + { + const s16* in_ptr = (const s16*)pr.data(); + s32* out_ptr = (s32*)frag_info.points_data.data(); + for (int ii = 0; ii < out_qw * 4; ii++) { + out_ptr[ii] = in_ptr[ii]; + } + } + + // just for debug + for (int g = 0; g < 4; g++) { + frag_info.point_sizes.push_back(proto.geometry[g].tie_fragments[frag_idx].point_ref.size()); + } + + info.frags.push_back(std::move(frag_info)); + } + } +} + +// upload-palette-0: just a flusha +// no data + +// upload-palette-1: stmod 1 (add row), unpack v4 (32 qw in, 128 qw out), imm = usn, 0x346 +// colors (after time of day interpolation) +// NOTE: adds row + +// upload-model-0: stmod = 0, unpack-v4-32 imm = 0 (upload to 0?) (usn doesn't matter for v4-32) +// adgifs, size of adgifs. + +// upload-model-1: +// mscal 4 +// unpack-v4-8 imm = right after adgifs, usn. +// extra gif stuff + +// upload-model-2: +// unpack-v4-16 imm = 32, signed. +// points + +// upload-model-3 +// mscal 6 +// call the models! + +// upload-color-0 +// 6 qw of matrix plus flag stuff +// to 198 (relative to TOP) + +// upload-color-1 +// to 204 unsigned (relative to TOP) + +// upload-color-2/ret +// mscal 0 + +// MEMORY MAP of TIE +// 0 gif tags +// extra gifs +// 32 model +// 198 instance matrix +// 204 instance colors +// 242 instance matrix again +// 248 instance colors again +// 286 gifbuf +// 470 gifbuf again +// 654 ?? +// 838 color palette +// 966 tie-consts +// 966 adgif +// 967 strgid +// 968 extra +// 969 gifbufs +// 970 clrbufs +// 971 misc +// 972 atestgif +// 973 atest-tra +// 974 atest-def + +math::Vector4f itof0(const math::Vector4f& vec) { + math::Vector4f result; + for (int i = 0; i < 4; i++) { + s32 val; + memcpy(&val, vec.data() + i, 4); + result[i] = val; + } + return result; +} + +math::Vector4f itof12xyz_0w(const math::Vector4f& vec) { + math::Vector4f result; + for (int i = 0; i < 4; i++) { + s32 val; + memcpy(&val, vec.data() + i, 4); + result[i] = val; + } + result.x() /= 4096.f; + result.y() /= 4096.f; + result.z() /= 4096.f; + return result; +} + +math::Vector4f muli64_xyz(const math::Vector4f& vec) { + math::Vector4f result = vec; + result.x() *= 64.f; + result.y() *= 64.f; + result.z() *= 64.f; + return result; +} + +void emulate_tie_prototype_program(std::vector& protos) { + using math::Vector4f; + + // our convention here is to use the lower buffer for everything double buffered. + + // because double buffering was too easy, the xgkick output buffer is triple buffered! + // the normal double buffering approach would not allow one prototype to be in setup + // while the second is being kicked. Each prototype gets two gif bufs and the third gif buf + // is used to xgkick whatever is left over from the previous prototype. + float gifbuf_start = 8388894.f; // 0x4b00011e. The 0x11e in the mantissa is 286. + float gifbuf_middle = 8389078.f; // 0x4b0001d6. The 0x1d6 in the mantissa is 470. + float gifbuf_end = 8389262.f; // 0x4b00028e. The 0x28e in the mantissa is 654. + + Vector4f vf_gifbufs(gifbuf_end, gifbuf_middle, gifbuf_end, gifbuf_middle); + + float gifbuf_sum = gifbuf_start + gifbuf_middle + gifbuf_end; + Vector4f vf_extra(gifbuf_sum, 0, gifbuf_sum, 0); + + // u16 misc_x = 0; + // u16 misc_y = 1; + + // First, we will emulate the program that runs after model uploads. (L1, imm = 6) + // it runs once per fragment + for (auto& proto : protos) { + // loop over fragments in this proto + for (u32 frag_idx = 0; frag_idx < proto.frags.size(); frag_idx++) { + auto& frag = proto.frags[frag_idx]; + + // this basically sets up some templates in memory. + // we're going to track the memory addresses of where certain tags are placed. + + // there are 6qw gif packets that do an adgif-shader upload. + // this vector will store the location of these adgif shaders, relative to the start + // of the gif output buffer being used. + + // this starts off pointing to 0, which is the adgif shaders for this fragment (input data) + u16 vi_point_ptr = 0; + + // this fiddles with the triple buffering magic for gif bufs + // todo: figure out the trick and just use a fixed addr. + vf_gifbufs.z() = vf_extra.z() - vf_gifbufs.x(); + vf_gifbufs.x() = vf_extra.x() - vf_gifbufs.x(); + + // L1: + // lq.xyz vf01, 966(vi00) | nop vf01 = adgif header. + // ilwr.w vi04, vi_point_ptr | nop + // some integers are hidden in the upper 32-bits of the adgif data. + // the first one has the offset in the gif buffer. + // we expect this to be 0 for the first one - we should start with adgif shaders always. + u16 vi04 = frag.adgifs.at(0).first_w; + assert(vi04 == 0); + + // ilw.w vi_ind, 1(vi_point_ptr) | nop + // the next hidden integer is the number of adgif shaders used in this fragment. + // we already know this, so check it. + u16 vi_ind = frag.adgifs.at(0).second_w; + assert(vi_ind == frag.adgifs.size()); + + // mtir vi06, vf_gifbufs.y | nop + // vi06 will be one of our gifbufs we can use. + u16 vi06; + memcpy(&vi06, &vf_gifbufs.y(), sizeof(u16)); + // fmt::print("vi06: {}\n", vi06); + assert(vi06 == 470 || vi06 == 286 || vi06 == 654); // should be one of the three gifbufs. + + // lqi.xyzw vf02, vi_point_ptr | suby.xz vf_gifbufs, vf_gifbufs, vf_gifbufs + // lqi.xyzw vf03, vi_point_ptr | nop + // lqi.xyzw vf04, vi_point_ptr | nop + // lqi.xyzw vf05, vi_point_ptr | nop + // mtir vi05, vf_gifbufs.x | nop + // lqi.xyzw vf06, vi_point_ptr | subw.w vf01, vf01, vf01 + + // loads the adgif data into vf02 -> vf06 + // the subw.w is to clear out the secret integer (I think the gs ignores this anyway) + vf_gifbufs.x() -= vf_gifbufs.y(); + vf_gifbufs.z() -= vf_gifbufs.y(); + // and vi05 is our other buffer. + u16 vi05; + memcpy(&vi05, &vf_gifbufs.x(), sizeof(u16)); + // fmt::print("vi05: {}\n", vi05); + // check that we understand the buffer rotation. + if (vi06 == 470) { + assert(vi05 == 286); + } else if (vi06 == 286) { + assert(vi05 == 654); + } else { + assert(vi05 == 470); + } + vi_point_ptr += 5; + + // this loop copies the adgifs to the gif buf at the appropriate address. + // Note: the final iteration through the loop does a load that's past the end of the + // adgif array, and vf02 is the first qw of the "extra gif data" + u32 adgif_load_idx = 1; + adgif_setup_loop_top: + // L2: + // iadd vi03, vi04, vi05 | nop + // vi04 is the adgif offset, vi05 is the buffer. + u16 vi03 = vi04 + vi05; + + // iadd vi04, vi04, vi06 | nop + // set vi04 to the offset for the adgif in the second buffer. + vi04 += vi06; + + // iaddi vi_ind, vi_ind, -0x1 | nop + vi_ind--; // decrement remaining adgifs + + // store adgifs in one buffer. + frag.prog_info.adgif_offset_in_gif_buf_qw.push_back(vi03 - vi05); + // fmt::print("adgifs at offset {}\n", frag.prog_info.adgif_offset_in_gif_buf_qw.back()); + // sqi.xyzw vf01, vi03 | nop + // sqi.xyzw vf02, vi03 | nop + // sqi.xyzw vf03, vi03 | nop + // sqi.xyzw vf04, vi03 | nop + // sqi.xyzw vf05, vi03 | nop + // sqi.xyzw vf06, vi03 | nop + vi03 += 5; + + // and the other buffer + // sqi.xyzw vf01, vi04 | nop + // sqi.xyzw vf02, vi04 | nop + // sqi.xyzw vf03, vi04 | nop + // sqi.xyzw vf04, vi04 | nop + // sqi.xyzw vf05, vi04 | nop + // sqi.xyzw vf06, vi04 | nop + vi04 += 5; + + // ilwr.w vi04, vi_point_ptr | nop + // get the offset of the next adgif + // vi04 = frag.ilw_points(vi_point_ptr, 3); + + // lqi.xyzw vf02, vi_point_ptr | nop + // lqi.xyzw vf03, vi_point_ptr | nop + // lqi.xyzw vf04, vi_point_ptr | nop + // lqi.xyzw vf05, vi_point_ptr | nop + vi_point_ptr += 5; + + // ibgtz vi_ind, L2 | nop + if (((s16)vi_ind) > 0) { + // moved down + vi04 = frag.adgifs.at(adgif_load_idx++).first_w; + goto adgif_setup_loop_top; + } + // lqi.xyzw vf06, vi_point_ptr | nop (adgif load) + + // Extra gif stuff + // this part builds the headers for the actual drawing packets. + // again, we do it in two parts. The extra gif data gives us offsets, + // The extra gif stuff is unpacked immediately after adgifs. Unpacked with v8 4. + // the above adgif loop will run off the end and vf02 will have the first byte in it's w. + assert(frag.other_gif_data.size() > 1); + // mtir vi_ind, vf02.w | nop + // vi_ind will contain the number of drawing packets for this fragment. + vi_ind = frag.other_gif_data.at(3); + u16 vf02_x = frag.other_gif_data.at(0); + u16 vf02_y = frag.other_gif_data.at(1); + // u16 vf02_z = frag.other_gif_data.at(2); + u16 vf03_x = frag.other_gif_data.at(4); + u16 vf03_y = frag.other_gif_data.at(5); + u16 vf03_z = frag.other_gif_data.at(6); + u16 vf03_w = frag.other_gif_data.at(7); + u16 vf04_x = frag.other_gif_data.at(8); + u16 vf04_y = frag.other_gif_data.at(9); + u16 vf04_z = frag.other_gif_data.at(10); + // u16 vf04_w = frag.other_gif_data.at(11); + assert(vi_ind >= frag.adgifs.size()); // at least 1 draw per shader. + assert(vi_ind < 1000); // check for insane value. + // fmt::print("got: {}, other size: {}\n", vi_ind, frag.other_gif_data.size()); + + // iaddi vi_point_ptr, vi_point_ptr, -0x2 | subw.w vf07, vf07, vf07 + vi_point_ptr -= 2; + // vf07.w = 0 + + // setup for tag building loop. + + // ilwr.x vi07, vi_point_ptr | nop + u16 vi07 = frag.ilw_other_gif(vi_point_ptr, 0); + // vi07 is the nloop/eop. + + // ilwr.y vi08, vi_point_ptr | nop + u16 vi08 = frag.ilw_other_gif(vi_point_ptr, 1); + // this can toggle to a different mode but I don't understand it yet. + assert(vi08 == 0); + + // ilwr.z vi04, vi_point_ptr | nop + vi04 = frag.ilw_other_gif(vi_point_ptr, 2); + // offset + + // fmt::print("[{}] 7: {} 8: {} 4: {}, for {}\n", vi_point_ptr, vi07, vi08, vi04, vi_ind - 1); + + // iaddi vi_ind, vi_ind, -0x1 | nop + vi_ind--; + + // iaddi vi_point_ptr, vi_point_ptr, 0x1 | nop + vi_point_ptr++; + + // ibeq vi00, vi_ind, L4 | nop + // lq.xyz vf07, 967(vi08) | nop + u16 next_mode = vi08; + + // todo: can we rely on a strgif from a previous fragment? + while (vi_ind) { + StrGifInfo info; + // L3: + // iadd vi03, vi04, vi05 | nop + vi03 = vi04 + vi05; // addr in one buf + // iadd vi04, vi04, vi06 | nop + vi04 = vi04 + vi06; // addr in other buf + // iaddi vi_ind, vi_ind, -0x1 | nop + vi_ind--; // dec remaining tag + // sq.xyzw vf07, 0(vi03) | nop + info.address = vi03 - vi05; // store the template. but this doesn't have size or anything. + // fmt::print("strgif at {}, {}\n", vi03, vi04); + + // iswr.x vi07, vi03 | nop + info.nloop = vi07 & 0x7fff; + info.eop = vi07 & 0x8000; + assert(!info.eop); // seems like we handle this manually after the loop + info.mode = next_mode; + + // sq.xyzw vf07, 0(vi04) | nop + // iswr.x vi07, vi04 | nop + // and the same for the other tag in the other buffer + + // ilwr.x vi07, vi_point_ptr | nop + vi07 = frag.ilw_other_gif(vi_point_ptr, 0); + + // ilwr.y vi08, vi_point_ptr | nop + vi08 = frag.ilw_other_gif(vi_point_ptr, 1); + + // ilwr.z vi04, vi_point_ptr | nop + vi04 = frag.ilw_other_gif(vi_point_ptr, 2); + + // iaddi vi_point_ptr, vi_point_ptr, 0x1 | nop + vi_point_ptr++; + + // ibne vi00, vi_ind, L3 | nop + // lq.xyz vf07, 967(vi08) | nop + next_mode = vi08; + // fmt::print("[{}] 7: {} 8: {} 4: {}, for {}\n", vi_point_ptr, vi07, vi08, vi04, vi_ind); + frag.prog_info.str_gifs.push_back(info); + } + + // and now, the final tag, which ends the drawing packet! + // L4: + // iaddiu vi07, vi07, 0x4000 | nop + vi07 += 0x8000; + // iaddiu vi07, vi07, 0x4000 | nop + StrGifInfo info; + info.eop = true; // the 0x8000 sets the eop bit. + + // compute addresses + // iadd vi03, vi04, vi05 | nop + vi03 = vi04 + vi05; + // iadd vi04, vi04, vi06 | nop + vi04 += vi06; + + // store and set nloop/eop + // sq.xyzw vf07, 0(vi03) | nop + info.address = vi03 - vi05; + // iswr.x vi07, vi03 | nop + info.nloop = vi07 & 0x7fff; + // sq.xyzw vf07, 0(vi04) | nop + // iswr.x vi07, vi04 | nop + frag.prog_info.str_gifs.push_back(info); + + // mtir vi06, vf04.x | nop + vi06 = vf04_x; + + // lq.xyzw vf05, 50(vi00) | nop + auto vf05 = frag.lq_points(50); + // lq.xyzw vf15, 51(vi00) | nop + auto vf15 = frag.lq_points(51); + // iaddiu vi05, vi00, 0x34 | nop + vi05 = 0x34; // points to after the two qw's we just loaded + // nop | nop + // iaddiu vi06, vi06, 0x32 | itof0.xyzw vf05, vf05 + vi06 += 0x32; + vf05 = itof0(vf05); + + // lqi.xyzw vf06, vi05 | itof12.xyz vf15, vf15 + auto vf06 = frag.lq_points(vi05); + vi05++; + vf15 = itof12xyz_0w(vf15); + + // lqi.xyzw vf16, vi05 | itof0.w vf15, vf15 + auto vf16 = frag.lq_points(vi05); + vi05++; + // itof0 already done by previous + + // 64.0 | nop :i + // ibeq vi06, vi05, L6 | muli.xyz vf05, vf05, I + vf05 = muli64_xyz(vf05); + // mtir vi07, vf04.y | itof0.xyzw vf06, vf06 + vi07 = vf04_y; + // fmt::print("bonus points: {}\n", vi07); + vf06 = itof0(vf06); + + // L5: + Vector4f vf07; + top_of_points_loop: + // fmt::print("{}/{}\n", vi05, vi06); + // lqi.xyzw vf07, vi05 | itof12.xyz vf16, vf16 + vf07 = frag.lq_points_allow_past_end(vi05); + vi05++; + vf16 = itof12xyz_0w(vf16); + + // lqi.xyzw vf17, vi05 | itof0.w vf16, vf16 + auto vf17 = frag.lq_points_allow_past_end(vi05); + vi05++; + // itof done above. + + // sq.xyzw vf15, -5(vi05) | nop + frag.sq_points(vi05 - 5, vf15); + + // ibeq vi06, vi05, L6 | muli.xyz vf06, vf06, I + // sq.xyzw vf05, -6(vi05) | itof0.xyzw vf07, vf07 + vf06 = muli64_xyz(vf06); + frag.sq_points(vi05 - 6, vf05); + vf07 = itof0(vf07); + if (vi05 == vi06) { + goto end_of_int_to_float_loop; + } + + // lqi.xyzw vf05, vi05 | itof12.xyz vf17, vf17 + vf05 = frag.lq_points_allow_past_end(vi05); + vi05++; + vf17 = itof12xyz_0w(vf17); + + // lqi.xyzw vf15, vi05 | itof0.w vf17, vf17 + vf15 = frag.lq_points_allow_past_end(vi05); + vi05++; + // itof doen above + + // sq.xyzw vf16, -5(vi05) | nop + frag.sq_points(vi05 - 5, vf16); + // ibeq vi06, vi05, L6 | muli.xyz vf07, vf07, I + vf07 = muli64_xyz(vf07); + // sq.xyzw vf06, -6(vi05) | itof0.xyzw vf05, vf05 + frag.sq_points(vi05 - 6, vf06); + vf05 = itof0(vf05); + if (vi05 == vi06) { + goto end_of_int_to_float_loop; + } + + // lqi.xyzw vf06, vi05 | itof12.xyz vf15, vf15 + vf06 = frag.lq_points_allow_past_end(vi05); + vf15 = itof12xyz_0w(vf15); + vi05++; + + // lqi.xyzw vf16, vi05 | itof0.w vf15, vf15 + vf16 = frag.lq_points_allow_past_end(vi05); + vi05++; + // itof done above + + // sq.xyzw vf17, -5(vi05) | nop + frag.sq_points(vi05 - 5, vf17); + + // ibne vi06, vi05, L5 | muli.xyz vf05, vf05, I + // sq.xyzw vf07, -6(vi05) | itof0.xyzw vf06, vf06 + vf05 = muli64_xyz(vf05); + frag.sq_points(vi05 - 6, vf07); + vf06 = itof0(vf06); + if (vi05 != vi06) { + goto top_of_points_loop; + } + + end_of_int_to_float_loop: + // another points loop + Vector4f vf10; + + // L6: + // lq.xyzw vf09, -4(vi05) | nop + auto vf09 = frag.lq_points_allow_past_end(vi05 - 4); + // lq.xyzw vf05, -3(vi05) | nop + vf05 = frag.lq_points_allow_past_end(vi05 - 3); + // lq.xyzw vf15, -2(vi05) | nop + vf15 = frag.lq_points_allow_past_end(vi05 - 2); + // iadd vi07, vi07, vi05 | nop + vi07 += vi05; + // iaddi vi07, vi07, -0x4 | nop + vi07 -= 4; + // iaddi vi05, vi05, -0x1 | nop + vi05 -= 1; + // iaddi vi08, vi05, -0x3 | nop + vi08 = vi05 - 3; + // ibeq vi07, vi05, L8 | nop + // nop | itof0.xyzw vf09, vf09 + vf09 = itof0(vf09); + if (vi07 == vi05) { + goto end_of_points2; + } + + // lqi.xyzw vf10, vi05 | itof0.xyzw vf05, vf05 + vf10 = frag.lq_points_allow_past_end(vi05); + vi05++; + vf05 = itof0(vf05); + + // lqi.xyzw vf06, vi05 | itof0.w vf15, vf15 + vf06 = frag.lq_points_allow_past_end(vi05); + vi05++; + vf15 = itof12xyz_0w(vf15); + + // lqi.xyzw vf16, vi05 | itof12.xyz vf15, vf15 + vf16 = frag.lq_points_allow_past_end(vi05); + vi05++; // itof done above + + // nop | nop + // nop | muli.xyz vf09, vf09, I + vf09 = muli64_xyz(vf09); + + // ibeq vi07, vi05, L8 | muli.xyz vf05, vf05, I + // nop | itof0.xyzw vf10, vf10 + vf05 = muli64_xyz(vf05); + vf10 = itof0(vf10); + if (vi05 == vi07) { + goto end_of_points2; + } + + Vector4f vf11; + top_of_points2: + // L7: + // lqi.xyzw vf11, vi05 | itof0.xyzw vf06, vf06 + vf11 = frag.lq_points_allow_past_end(vi05); + vi05++; + vf06 = itof0(vf06); + // lqi.xyzw vf07, vi05 | itof0.w vf16, vf16 + vf07 = frag.lq_points_allow_past_end(vi05); + vi05++; + vf16 = itof12xyz_0w(vf16); + + // lqi.xyzw vf17, vi05 | itof12.xyz vf16, vf16 + vf17 = frag.lq_points_allow_past_end(vi05); + vi05++; + // sqi.xyzw vf09, vi08 | nop + frag.sq_points(vi08, vf09); + vi08++; + // sqi.xyzw vf05, vi08 | muli.xyz vf10, vf10, I + frag.sq_points(vi08, vf05); + vi08++; + vf10 = muli64_xyz(vf10); + // ibeq vi07, vi05, L8 | muli.xyz vf06, vf06, I + vf06 = muli64_xyz(vf06); + // sqi.xyzw vf15, vi08 | itof0.xyzw vf11, vf11 + frag.sq_points(vi08, vf15); + vi08++; + vf11 = itof0(vf11); + if (vi07 == vi05) { + goto end_of_points2; + } + + // lqi.xyzw vf09, vi05 | itof0.xyzw vf07, vf07 + vf09 = frag.lq_points_allow_past_end(vi05); + vi05++; + vf07 = itof0(vf07); + // lqi.xyzw vf05, vi05 | itof0.w vf17, vf17 + vf05 = frag.lq_points_allow_past_end(vi05); + vi05++; + vf17 = itof12xyz_0w(vf17); + // lqi.xyzw vf15, vi05 | itof12.xyz vf17, vf17 + vf15 = frag.lq_points_allow_past_end(vi05); + vi05++; + + // sqi.xyzw vf10, vi08 | nop + frag.sq_points(vi08, vf10); + vi08++; + // sqi.xyzw vf06, vi08 | muli.xyz vf11, vf11, I + frag.sq_points(vi08, vf06); + vi08++; + vf11 = muli64_xyz(vf11); + // ibeq vi07, vi05, L8 | muli.xyz vf07, vf07, I + // sqi.xyzw vf16, vi08 | itof0.xyzw vf09, vf09 + vf07 = muli64_xyz(vf07); + frag.sq_points(vi08, vf16); + vi08++; + vf09 = itof0(vf09); + if (vi07 == vi05) { + goto end_of_points2; + } + + // lqi.xyzw vf10, vi05 | itof0.xyzw vf05, vf05 + vf10 = frag.lq_points_allow_past_end(vi05); + vi05++; + vf05 = itof0(vf05); + // lqi.xyzw vf06, vi05 | itof0.w vf15, vf15 + vf06 = frag.lq_points_allow_past_end(vi05); + vi05++; + vf15 = itof12xyz_0w(vf15); + // lqi.xyzw vf16, vi05 | itof12.xyz vf15, vf15 + vf16 = frag.lq_points_allow_past_end(vi05); + vi05++; + + // sqi.xyzw vf11, vi08 | nop + frag.sq_points(vi08, vf11); + vi08++; + // sqi.xyzw vf07, vi08 | muli.xyz vf09, vf09, I + frag.sq_points(vi08, vf07); + vi08++; + vf09 = muli64_xyz(vf09); + // ibne vi07, vi05, L7 | muli.xyz vf05, vf05, I + // sqi.xyzw vf17, vi08 | itof0.xyzw vf10, vf10 + vf05 = muli64_xyz(vf05); + frag.sq_points(vi08, vf17); + vi08++; + vf10 = itof0(vf10); + if (vi07 != vi05) { + goto top_of_points2; + } + + end_of_points2: + // L8: + // mtir vi01, vf04.z | nop + u16 vi01 = vf04_z; + // mtir vi05, vf02.x | nop + frag.prog_info.skip_bp2 = vf02_x; + + // mtir vi14, vf02.y | nop + frag.prog_info.skip_ips = vf02_y; + // mtir vi04, vf03.x | nop + frag.prog_info.tgt_bp1_ptr = vf03_x; + // mtir vi06, vf03.y | nop + frag.prog_info.tgt_bp2_ptr = vf03_y; + // mtir vi07, vf03.z | nop + frag.prog_info.tgt_ip1_ptr = vf03_z; + // mtir vi08, vf03.w | nop + frag.prog_info.tgt_ip2_ptr = vf03_w; + // isw.x vi01, 971(vi00) | nop + frag.prog_info.misc_x = vi01; + // iaddi vi15, vi00, 0x0 | nop + frag.prog_info.kick_addr = 0; + // mtir vi03, vf_clrbuf.x | nop + frag.prog_info.clr_ptr = 198; // just forcing it to one buffer for now + // iaddiu vi_point_ptr, vi00, 0x32 | nop + frag.prog_info.point_ptr = 0x32; + + // mr32.xyzw vf_gifbufs, vf_gifbufs | nop + // mfir.y vf_extra, vi00 | nop :e + // mfir.w vf_extra, vi00 | nop + float temp = vf_gifbufs.x(); + vf_gifbufs.x() = vf_gifbufs.y(); + vf_gifbufs.y() = vf_gifbufs.z(); + vf_gifbufs.z() = vf_gifbufs.w(); + vf_gifbufs.w() = temp; + vf_extra.y() = 0; + vf_extra.w() = 0; + frag.prog_info.gifbufs = vf_gifbufs; + frag.prog_info.extra = vf_extra; + // todo: maybe we need more. + } + + // assert(false); + } +} + +void debug_print_info(const std::vector& out) { + for (auto& proto : out) { + fmt::print("[{:40}]\n", proto.name); + fmt::print(" generic: {}\n", proto.uses_generic); + fmt::print(" use count: {}\n", proto.instances.size()); + fmt::print(" stiffness: {}\n", proto.stiffness); + } +} + +u16 float_to_u16(float f) { + u16 result; + memcpy(&result, &f, 2); + return result; +} + +int get_fancy_base(int draw1, int draw2) { + int total = draw1 + draw2; + total += 3; + total /= 4; + total *= 4; + return total; +} + +void emulate_tie_instance_program(std::vector& protos) { + for (auto& proto : protos) { + // bool first_instance = true; + // for (auto& instance : proto.instances) { + for (u32 frag_idx = 0; frag_idx < proto.frags.size(); frag_idx++) { + auto& frag = proto.frags.at(frag_idx); + // for these sections, see the TIE Instance VU Program Doc. + int draw_1_count = 0; + int draw_2_count = 0; + int ip_1_count = 0; + + ///////////////////////////////////// + // SETUP + ///////////////////////////////////// + // this is some basic register setup for the TIE instance + // ad also for the pipelined Draw1 loop. + // we omit the pipeline startup here. + + // this was set by the previous program that sets up this prototype frag + u16 clr_ptr = frag.prog_info.clr_ptr; + u16 tgt_bp1_ptr = frag.prog_info.tgt_bp1_ptr; + u16 tgt_bp2_ptr = frag.prog_info.tgt_bp2_ptr; + u16 tgt_ip1_ptr = frag.prog_info.tgt_ip1_ptr; + u16 tgt_ip2_ptr = frag.prog_info.tgt_ip2_ptr; + u16 skip_bp2 = frag.prog_info.skip_bp2; + u16 kick_addr = frag.prog_info.kick_addr; + u16 dest_ptr = 0; // they never initialized this... seems like a bug + + // lqi.xyzw vtx_0, vi_point_ptr | nop + // use hard-coded lower buffer for model data + u16 point_ptr = 0x32; + // lq.xyzw vf_inds, 6(vi_clr_ptr) | nop + // pipeline + + // lq.xyzw vf_clr2, 3(vi_clr_ptr) | nop + // lq.xyzw vf_mtx0, 0(vi_clr_ptr) | nop + // lq.xyzw vf_mtx1, 1(vi_clr_ptr) | nop + // lq.xyzw vf_clr1, 2(vi_clr_ptr) | nop + // this is the matrix + + // mtir vi_ind, vf_inds.x | nop + // pipeline + + // lqi.xyzw vf_tex0, vi_point_ptr | mulaw.xyzw ACC, vf_clr2, vf00 + // pipeline + // lq.xyzw vf_morph, 4(vi_clr_ptr) | maddax.xyzw ACC, vf_mtx0, vtx_0 + // we're going to ignore the "morph" and use hi-res everywehere + + // ilw.x vi01, 5(vi_clr_ptr) | madday.xyzw ACC, vf_mtx1, vtx_0 + // the vi01 is unused here. (indicates if we're generic or not) + + // lq.xyzw vf_clr0, 838(vi_ind) | maddz.xyzw vf_pos02, vf_clr1, vtx_0 + // pipeline + + // lqi.xyzw vf_vtx1, vi_point_ptr | nop + // pipeline + + // lq.xyzw vf_res02, 5(vi_clr_ptr) | nop + // loading the flags and stuff, which we will ignore too + + // iaddi vi_clr_ptr, vi_clr_ptr, 0x7 | nop + // u16 clr_ptr_base = clr_ptr; + clr_ptr += 6; // it says 7, but we want to point to the first index data. + + // mtir vi_ind, vf_inds.y | addx.w vf_res13, vf_res02, vf00 <- flags crap + // div Q, vf00.w, vf_pos02.w | mulaw.xyzw ACC, vf_clr2, vf00 + // lqi.xyzw vf_tex1, vi_point_ptr | maddax.xyzw ACC, vf_mtx0, vf_vtx1 + // mtir vi01, vf_gifbufs.x | madday.xyzw ACC, vf_mtx1, vf_vtx1 + u16 vi01 = float_to_u16(frag.prog_info.gifbufs.x()); + + // lq.xyzw vf_mtx2, 838(vi_ind) | maddz.xyzw vf_pos13, vf_clr1, vf_vtx1 + + // isub vi01, vi01, vi_kick_addr | ftoi4.w vf_res02, vf_res02 + vi01 -= kick_addr; + + // iadd vi_tgt_bp1_ptr, vi_tgt_bp1_ptr, vi01 | ftoi4.w vf_res13, vf_res13 + tgt_bp1_ptr += vi01; + // iadd vi_tgt_bp2_ptr, vi_tgt_bp2_ptr, vi01 | nop + tgt_bp2_ptr += vi01; + + // fmt::print("b tgts: {} {}\n", tgt_bp1_ptr, tgt_bp2_ptr); + // lqi.xyzw vf_vtx2, vi_point_ptr | mul.xyz vf_pos02, vf_pos02, Q + // div Q, vf00.w, vf_pos13.w | mul.xyz vf_tex0, vf_tex0, Q + // mtir vi_ind, vf_inds.z | addx.w vtx_0, vtx_0, vf_gifbufs + // lqi.xyzw vf_tex2, vi_point_ptr | mulaw.xyzw ACC, vf_clr2, vf00 + // iadd vi_tgt_ip1_ptr, vi_tgt_ip1_ptr, vi01 | maddax.xyzw ACC, vf_mtx0, vf_vtx2 + // iadd vi_tgt_ip2_ptr, vi_tgt_ip2_ptr, vi01 | madday.xyzw ACC, vf_mtx1, vf_vtx2 + tgt_ip1_ptr += vi01; + tgt_ip2_ptr += vi01; + // fmt::print("i tgts: {} {}\n", tgt_ip1_ptr, tgt_ip2_ptr); + // lq.xyzw vf_mtx3, 838(vi_ind) | ftoi4.xyz vf_res02, vf_pos02 + // ibeq vi_tgt_bp1_ptr, vi_dest_ptr, L40 | maddz.xyzw vf_pos02, vf_clr1, vf_vtx2 + // iadd vi_kick_addr, vi_kick_addr, vi01 | nop + kick_addr += vi01; + if (tgt_bp1_ptr == dest_ptr) { + fmt::print("DRAW FINISH 1 (no points)\n"); + goto program_end; + } + + ///////////////////////////////////// + // DRAW 1 + ///////////////////////////////////// + { + // Draw 1 computes and sets vertices that appear once. + // Note that it does 3 more vertices after reaching the target pointer. + bool reached_target = false; + int past_target = 0; + while (past_target < 3) { + // there's 1 load of colors per 4x verts. + // (lqi.xyzw vf_inds, vi_clr_ptr | nop) + // these are different per instance, but index into a palette shared by all instances + // for the i-th point, we just load the i-th color index. + + // This is reordered. + // A "T" means it is part of transformation and we leave it out. + // A number corresponds to the line below. + + // (4) mtir vi_dest_ptr, vtx_0.w | nop + // (2) lqi.xyzw vi_vtx3, vi_point_ptr | (T) mul.xyz vf_pos13, vf_pos13, Q + // (T) div Q, vf00.w, vf_pos02.w | (T) mul.xyz vf_tex1, vf_tex1, Q + // (1) mtir vi_ind, vf_inds.w | (3) addx.w vf_vtx1, vf_vtx1, vf_gifbufs + // (5) lqi.xyzw vi_tex3, vi_point_ptr | (T) mulaw.xyzw ACC, vf_clr2, vf00 + // (7) sq.xyzw vf_tex0, 0(vi_dest_ptr) | (T) maddax.xyzw ACC, vf_mtx0, vi_vtx3 + // (7) sq.xyzw vf_clr0, 1(vi_dest_ptr) | (T) madday.xyzw ACC, vf_mtx1, vi_vtx3 + // (6) lq.xyzw vi_clr3, 838(vi_ind) | (T) ftoi4.xyz vf_res13, vf_pos13 + // ibeq vi_tgt_bp1_ptr, vi_dest_ptr, L13 | (T) maddz.xyzw vf_pos13, vf_clr1, vi_vtx3 + // (7) sq.xyzw vf_res02, 2(vi_dest_ptr) | nop + + // 01 - grab the index for this vertex color + // we don't want to actually do the lookup here, just remember where we would have + // looked. + u32 clr_idx_idx = draw_1_count; + + // 02 - load the floating point vertex values + auto vert_pos = frag.lq_points(point_ptr); + point_ptr++; + + // 03 - do the weird gifbuf triple buffer with floats crap + float vtx_w = vert_pos.w() + frag.prog_info.gifbufs.x(); + + // 04 - now get the destination + dest_ptr = float_to_u16(vtx_w); + + // 05 - load tex coords + auto tex_coord = frag.lq_points(point_ptr); + point_ptr++; + + // 06 - actually do the color load in the palette. (skip) + + // 07 - set vertex + TieProtoVertex vertex_info; + vertex_info.color_index_index = clr_idx_idx; + vertex_info.pos.x() = vert_pos.x(); + vertex_info.pos.y() = vert_pos.y(); + vertex_info.pos.z() = vert_pos.z(); + vertex_info.tex.x() = tex_coord.x(); + vertex_info.tex.y() = tex_coord.y(); + vertex_info.tex.z() = tex_coord.z(); + + bool inserted = frag.vertex_by_dest_addr.insert({(u32)dest_ptr, vertex_info}).second; + assert(inserted); + + if (reached_target) { + past_target++; + } + + if (dest_ptr == tgt_bp1_ptr) { + reached_target = true; + } + + draw_1_count++; + } + } + + if (!skip_bp2) { + // bp2 setup: + // The BP2 drawing is similar to BP1, but duplicate draws vertices. + bool reached_target = false; + int past_target = 0; + while (past_target < 2) { + u32 clr_idx_idx = draw_1_count + draw_2_count; + auto vert_pos = frag.lq_points(point_ptr); + point_ptr++; + float vtx_w = vert_pos.w() + frag.prog_info.gifbufs.x(); + dest_ptr = float_to_u16(vtx_w); + auto tex_coord = frag.lq_points(point_ptr); + // fmt::print("texw: [{}] {}\n", point_ptr, tex_coord.w()); + point_ptr++; + float tex_w = tex_coord.w() + frag.prog_info.gifbufs.x(); + u16 dest2_ptr = float_to_u16(tex_w); + + TieProtoVertex vertex_info; + vertex_info.color_index_index = clr_idx_idx; + vertex_info.pos.x() = vert_pos.x(); + vertex_info.pos.y() = vert_pos.y(); + vertex_info.pos.z() = vert_pos.z(); + vertex_info.tex.x() = tex_coord.x(); + vertex_info.tex.y() = tex_coord.y(); + vertex_info.tex.z() = tex_coord.z(); + + // fmt::print("double draw: {} {}\n", dest_ptr, dest2_ptr); + bool inserted = frag.vertex_by_dest_addr.insert({(u32)dest_ptr, vertex_info}).second; + assert(inserted); + + bool inserted2 = frag.vertex_by_dest_addr.insert({(u32)dest2_ptr, vertex_info}).second; + assert(inserted2); + + if (reached_target) { + past_target++; + } + + if (dest_ptr == tgt_bp2_ptr) { + reached_target = true; + } + + draw_2_count++; + } + + // setup + // ibne vi00, vi_skip_bp2, L24 | mul.xyz vf_pos13, vf_pos13, Q + // lqi.xyzw vi_vtx3, vi_point_ptr | mul.xyz vf_tex1, vf_tex1, Q + // div Q, vf00.w, vf_pos02.w | addx.w vf_vtx1, vf_vtx1, vf_gifbufs + // mtir vi_ind, vf_inds.w | mulaw.xyzw ACC, vf_clr2, vf00 + // lqi.xyzw vf_inds, vi_clr_ptr | nop + // sq.xyzw vf_tex0, 0(vi_dest_ptr) | addx.w vf_vtx2, vf_vtx2, vf_gifbufs + // sq.xyzw vf_clr0, 1(vi_dest_ptr) | maddax.xyzw ACC, vf_mtx0, vi_vtx3 + // lqi.xyzw vi_tex3, vi_point_ptr | madday.xyzw ACC, vf_mtx1, vi_vtx3 + // lq.xyzw vi_clr3, 838(vi_ind) | ftoi4.xyz vf_res13, vf_pos13 + // lqi.xyzw vtx_0, vi_point_ptr | maddz.xyzw vf_pos13, vf_clr1, vi_vtx3 + // sq.xyzw vf_res02, 2(vi_dest_ptr) | mul.xyz vf_pos02, vf_pos02, Q + // mtir vi_dest_ptr, vf_vtx1.w | mul.xyz vf_tex2, vf_tex2, Q + // lqi.xyzw vf_tex0, vi_point_ptr | mulaw.xyzw ACC, vf_clr2, vf00 + // mtir vi_ind, vf_inds.x | maddax.xyzw ACC, vf_mtx0, vtx_0 + // nop | madday.xyzw ACC, vf_mtx1, vtx_0 + // div Q, vf00.w, vf_pos13.w | ftoi4.xyz vf_res02, vf_pos02 + // sq.xyzw vf_tex1, 0(vi_dest_ptr) | maddz.xyzw vf_pos02, vf_clr1, vtx_0 + // sq.xyzw vf_mtx2, 1(vi_dest_ptr) | nop + // sq.xyzw vf_res13, 2(vi_dest_ptr) | nop + // mtir vi_dest_ptr, vf_vtx2.w | nop + // lq.xyzw vf_clr0, 838(vi_ind) | addx.w vi_vtx3, vi_vtx3, vf_gifbufs + // div Q, vf00.w, vf_pos02.w | mul.xyz vf_pos13, vf_pos13, Q + // sq.xyzw vf_tex2, 0(vi_dest_ptr) | mul.xyz vi_tex3, vi_tex3, Q + // sq.xyzw vf_mtx3, 1(vi_dest_ptr) | addx.w vi_tex3, vi_tex3, vf_gifbufs + // sq.xyzw vf_res02, 2(vi_dest_ptr) | nop + // b L14 | ftoi4.xyz vf_res13, vf_pos13 + // mtir vi_dest_ptr, vi_vtx3.w | nop + + // bp2 chunk (out of 4) + // lqi.xyzw vf_vtx1, vi_point_ptr | nop + // mtir vi_ind, vf_inds.y | nop + // mtir vi13, vi_tex3.w | mulaw.xyzw ACC, vf_clr2, vf00 + // sq.xyzw vi_tex3, 0(vi_dest_ptr) | addx.w vtx_0, vtx_0, vf_gifbufs + // sq.xyzw vi_clr3, 1(vi_dest_ptr) | maddax.xyzw ACC, vf_mtx0, vf_vtx1 + // sq.xyzw vf_res13, 2(vi_dest_ptr) | madday.xyzw ACC, vf_mtx1, vf_vtx1 + // lqi.xyzw vf_tex1, vi_point_ptr | maddz.xyzw vf_pos13, vf_clr1, vf_vtx1 + // lq.xyzw vf_mtx2, 838(vi_ind) | mul.xyz vf_pos02, vf_pos02, Q + // sq.xyzw vi_tex3, 0(vi13) | mul.xyz vf_tex0, vf_tex0, Q + // sq.xyzw vi_clr3, 1(vi13) | addx.w vf_tex0, vf_tex0, vf_gifbufs + // sq.xyzw vf_res13, 2(vi13) | nop + // div Q, vf00.w, vf_pos13.w | nop + // ibeq vi_tgt_bp2_ptr, vi_dest_ptr, L18 | ftoi4.xyz vf_res02, vf_pos02 + // mtir vi_dest_ptr, vtx_0.w | nop + } + + if (!frag.prog_info.skip_ips) { + // Sadly TIE has no special case for highest lod. + // this is surprising to me, but really does seem to be the case. + + // L31 + // lqi.xyzw vf_vtx1, vi_point_ptr | mulaw.xyzw ACC, vf_clr2, vf00 + // lqi.xyz vf_xyofs, vi_point_ptr | maddax.xyzw ACC, vf_mtx0, vtx_0 + // lqi.xyzw vf_tex1, vi_point_ptr | madday.xyzw ACC, vf_mtx1, vtx_0 + + // we have an additional "xyofs" here, but otherwise similar + + // mtir vi_dest_ptr, vf_vtx2.w | maddz.xyzw vf_pos02, vf_clr1, vtx_0 + // as usual, using vtx.w for dest pointer. + + // mtir vi_ind, vf_inds.x | mulaw.xyzw ACC, vf_clr_val1, vf_morph + // mtir vi10, vf_inds.y | maddz.xyzw vf_clr0, vf_clr0, vf_morph + // mtir vi11, vf_inds.z | mulx.xyz vf_vtx1, vf_vtx1, vf_morph + // inds works differently. There is a qw per vertex, containing 3 indices. + // the formula is a pain, so I will ignore it for today. + // ideally we can figure out the constant value of vf_morph first, to simplify all this. + // + + // sq.xyzw vf_tex2, 0(vi_dest_ptr) | mul.xyz vf_res13, vf_pos13, Q + // lq.xyzw vf_mtx2, 838(vi_ind) | mul.xyz vi_tex3, vi_tex3, Q + // lq.xyzw vf_clr_val1, 838(vi10) | nop + // lq.xyzw vf_clr_val2, 838(vi11) | nop + // div Q, vf00.w, vf_pos02.w | ftoi4.xyz vf_res13, vf_res13 + // sq.xyzw vf_mtx3, 1(vi_dest_ptr) | add.xyzw vf_vtx1, vf_vtx1, vf_xyofs + // lqi.xyzw vf_inds, vi_clr_ptr | mulay.xyzw ACC, vf_clr_val1, vf_morph + // ibeq vi_tgt_ip1_ptr, vi_dest_ptr, L35 | nop + // sq.xyzw vf_res02, 2(vi_dest_ptr) | maddy.xyzw vf_clr_val1, vf_clr_val2, vf_morph + + int base = get_fancy_base(draw_1_count, draw_2_count); + + while (dest_ptr != tgt_ip1_ptr) { + // todo - might be some rounding here. + u32 clr_idx_idx = base + ip_1_count * 4 + 0; + auto vert_pos = frag.lq_points(point_ptr); + point_ptr++; + auto xy_offs = frag.lq_points(point_ptr); + point_ptr++; + float vtx_w = vert_pos.w() + frag.prog_info.gifbufs.x(); + dest_ptr = float_to_u16(vtx_w); + auto tex_coord = frag.lq_points(point_ptr); + point_ptr++; + + TieProtoVertex vertex_info; + vertex_info.color_index_index = clr_idx_idx; + // random guess + vert_pos = xy_offs; + vertex_info.pos.x() = vert_pos.x(); + vertex_info.pos.y() = vert_pos.y(); + vertex_info.pos.z() = vert_pos.z(); + vertex_info.tex.x() = tex_coord.x(); + vertex_info.tex.y() = tex_coord.y(); + vertex_info.tex.z() = tex_coord.z(); + + bool inserted = frag.vertex_by_dest_addr.insert({(u32)dest_ptr, vertex_info}).second; + assert(inserted); + + ip_1_count++; + } + + bool first_iter = true; + while (dest_ptr != tgt_ip2_ptr) { + // todo - might be some rounding here. + u32 clr_idx_idx = base + ip_1_count * 4 + 0; + auto vert_pos = frag.lq_points(point_ptr); + point_ptr++; + auto xy_offs = frag.lq_points(point_ptr); + point_ptr++; + float vtx_w = vert_pos.w() + frag.prog_info.gifbufs.x(); + dest_ptr = float_to_u16(vtx_w); + auto tex_coord = frag.lq_points(point_ptr); + point_ptr++; + float tex_w = tex_coord.w() + frag.prog_info.gifbufs.x(); + u16 dest2_ptr = float_to_u16(tex_w); + + TieProtoVertex vertex_info; + vertex_info.color_index_index = clr_idx_idx; + // random guess + vert_pos = xy_offs; + vertex_info.pos.x() = vert_pos.x(); + vertex_info.pos.y() = vert_pos.y(); + vertex_info.pos.z() = vert_pos.z(); + vertex_info.tex.x() = tex_coord.x(); + vertex_info.tex.y() = tex_coord.y(); + vertex_info.tex.z() = tex_coord.z(); + + bool inserted = frag.vertex_by_dest_addr.insert({(u32)dest_ptr, vertex_info}).second; + assert(inserted); + + // first iteration of ip2 is a bit strange because how it jumps from loop to loop. + // in some cases it uses ip2 on a point that should have used ip1 with the same addr + // twice. I am pretty sure it's not our fault because we get exactly the right dvert. + bool inserted2 = frag.vertex_by_dest_addr.insert({(u32)dest2_ptr, vertex_info}).second; + if (!first_iter) { + assert(inserted2); + } + first_iter = false; + ip_1_count++; + } + } + + // now, let's check count: + assert(frag.vertex_by_dest_addr.size() == frag.expected_dverts); + + program_end:; + // assert(false); + } + + // } + } +} + +// makes per-prototype meshes +void emulate_kicks(std::vector& protos) { + for (auto& proto : protos) { + for (auto& frag : proto.frags) { + auto adgif_it = frag.prog_info.adgif_offset_in_gif_buf_qw.begin(); + auto adgif_end = frag.prog_info.adgif_offset_in_gif_buf_qw.end(); + auto str_it = frag.prog_info.str_gifs.begin(); + auto str_end = frag.prog_info.str_gifs.end(); + + assert(frag.prog_info.adgif_offset_in_gif_buf_qw.at(0) == 0); + assert(frag.prog_info.adgif_offset_in_gif_buf_qw.size() == frag.adgifs.size()); + const AdgifInfo* adgif_info = nullptr; + int expected_next_tag = 0; + + // loop over strgifs + while (str_it != str_end) { + // try advance adgif + if (adgif_it != adgif_end && (*adgif_it) == expected_next_tag) { + int idx = adgif_it - frag.prog_info.adgif_offset_in_gif_buf_qw.begin(); + adgif_info = &frag.adgifs.at(idx); + // fmt::print("using adgif {}\n", *adgif_it); + expected_next_tag += 6; + adgif_it++; + } + assert(adgif_info); + + // fmt::print("strip: {}\n", str_it->address); + assert(expected_next_tag == str_it->address); + expected_next_tag += 3 * str_it->nloop + 1; + // here we have the right str and adgif. + + // kinda stupid, but we have to guess the base address of the gifbuf + // 286 gifbuf + // 470 gifbuf again + // 654 ?? + assert(!frag.vertex_by_dest_addr.empty()); + int gifbuf_addr = frag.vertex_by_dest_addr.begin()->first; + int base_address = 286; + if (gifbuf_addr >= 654) { + base_address = 654; + } else if (gifbuf_addr >= 470) { + base_address = 470; + } + + // now, vertices! + frag.strips.emplace_back(); + auto& strip = frag.strips.back(); + strip.adgif = *adgif_info; + for (int vtx = 0; vtx < str_it->nloop; vtx++) { + u32 vtx_addr = str_it->address + 1 + (3 * vtx) + base_address; + strip.verts.push_back(frag.vertex_by_dest_addr.at(vtx_addr)); + } + + str_it++; + } + + assert(adgif_it == adgif_end); + } + } +} + +std::string debug_dump_proto_to_obj(const TieProtoInfo& proto) { + std::vector> verts; + std::vector> tcs; + std::vector> faces; + + for (auto& frag : proto.frags) { + for (auto& strip : frag.strips) { + // add verts... + assert(strip.verts.size() >= 3); + + int vert_idx = 0; + + int vtx_idx_queue[3]; + + int q_idx = 0; + int startup = 0; + while (vert_idx < (int)strip.verts.size()) { + verts.push_back(strip.verts.at(vert_idx).pos / 65536); // no idea + tcs.push_back(math::Vector{strip.verts.at(vert_idx).tex.x(), + strip.verts.at(vert_idx).tex.y()}); + vert_idx++; + vtx_idx_queue[q_idx++] = verts.size(); + + // wrap the index + if (q_idx == 3) { + q_idx = 0; + } + + // bump the startup + if (startup < 3) { + startup++; + } + + if (startup >= 3) { + faces.push_back( + math::Vector{vtx_idx_queue[0], vtx_idx_queue[1], vtx_idx_queue[2]}); + } + } + } + } + + std::string result; + for (auto& vert : verts) { + result += fmt::format("v {} {} {}\n", vert.x(), vert.y(), vert.z()); + } + for (auto& tc : tcs) { + result += fmt::format("vt {} {}\n", tc.x(), tc.y()); + } + for (auto& face : faces) { + result += fmt::format("f {}/{} {}/{} {}/{}\n", face.x(), face.x(), face.y(), face.y(), face.z(), + face.z()); + } + + return result; +} + +math::Vector transform_tie(const std::array mat, + const math::Vector3f& pt) { + auto temp = mat[0] * pt.x() + mat[1] * pt.y() + mat[2] * pt.z() + mat[3]; + + // math::Vector4f temp; + // temp.x() = pt.x(); + // temp.y() = pt.y(); + // temp.z() = pt.z(); + // temp += mat[3]; + + math::Vector3f result; + result.x() = temp.x(); + result.y() = temp.y(); + result.z() = temp.z(); + return result; +} + +std::string dump_full_to_obj(const std::vector& protos) { + std::vector> verts; + std::vector> tcs; + std::vector> faces; + + for (auto& proto : protos) { + for (auto& inst : proto.instances) { + auto& mat = inst.mat; + for (auto& frag : proto.frags) { + for (auto& strip : frag.strips) { + // add verts... + assert(strip.verts.size() >= 3); + + int vert_idx = 0; + + int vtx_idx_queue[3]; + + int q_idx = 0; + int startup = 0; + while (vert_idx < (int)strip.verts.size()) { + verts.push_back(transform_tie(mat, strip.verts.at(vert_idx).pos) / 65536); // no idea + tcs.push_back(math::Vector{strip.verts.at(vert_idx).tex.x(), + strip.verts.at(vert_idx).tex.y()}); + vert_idx++; + vtx_idx_queue[q_idx++] = verts.size(); + + // wrap the index + if (q_idx == 3) { + q_idx = 0; + } + + // bump the startup + if (startup < 3) { + startup++; + } + + if (startup >= 3) { + faces.push_back( + math::Vector{vtx_idx_queue[0], vtx_idx_queue[1], vtx_idx_queue[2]}); + } + } + } + } + } + } + + std::string result; + for (auto& vert : verts) { + result += fmt::format("v {} {} {}\n", vert.x(), vert.y(), vert.z()); + } + for (auto& tc : tcs) { + result += fmt::format("vt {} {}\n", tc.x(), tc.y()); + } + for (auto& face : faces) { + result += fmt::format("f {}/{} {}/{} {}/{}\n", face.x(), face.x(), face.y(), face.y(), face.z(), + face.z()); + } + + return result; +} + +// The time of day stuff has a lot of lookups +// Each prototype has a palette. This palette is generated based on the time of day, blending +// together 8 colors from 8 times. + +// Each instance is made up of fragments. +// The instance provides a color list per fragment. These are indices into the palette. + +// So, to know the color we need: +// - which prototype +// - which instance +// - which fragment +// - which color within the fragment +// and this tells us an index in the time of day palette. + +struct BigPalette { + std::vector colors; +}; + +BigPalette make_big_palette(std::vector& protos) { + BigPalette result; + + for (u32 proto_idx = 0; proto_idx < protos.size(); proto_idx++) { + auto& proto = protos[proto_idx]; + u32 base_color_of_proto = result.colors.size(); + + // add all colors + for (auto& color : proto.time_of_day_colors) { + result.colors.push_back(color); + } + + for (u32 instance_idx = 0; instance_idx < proto.instances.size(); instance_idx++) { + auto& instance = proto.instances[instance_idx]; + assert(proto.frags.size() == instance.frags.size()); + for (u32 frag_idx = 0; frag_idx < proto.frags.size(); frag_idx++) { + auto& ifrag = instance.frags.at(frag_idx); + ifrag.color_index_offset_in_big_palette = base_color_of_proto; + } + } + } + + assert(result.colors.size() < UINT16_MAX); + return result; +} + +void update_mode_from_alpha1(u64 val, DrawMode& mode) { + GsAlpha reg(val); + if (reg.a_mode() == GsAlpha::BlendMode::SOURCE && reg.b_mode() == GsAlpha::BlendMode::DEST && + reg.c_mode() == GsAlpha::BlendMode::SOURCE && reg.d_mode() == GsAlpha::BlendMode::DEST) { + // (Cs - Cd) * As + Cd + // Cs * As + (1 - As) * Cd + mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_SRC_DST); + + } else if (reg.a_mode() == GsAlpha::BlendMode::SOURCE && + reg.b_mode() == GsAlpha::BlendMode::ZERO_OR_FIXED && + reg.c_mode() == GsAlpha::BlendMode::SOURCE && + reg.d_mode() == GsAlpha::BlendMode::DEST) { + // (Cs - 0) * As + Cd + // Cs * As + (1) * CD + mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_0_SRC_DST); + } else if (reg.a_mode() == GsAlpha::BlendMode::SOURCE && + reg.b_mode() == GsAlpha::BlendMode::ZERO_OR_FIXED && + reg.c_mode() == GsAlpha::BlendMode::ZERO_OR_FIXED && + reg.d_mode() == GsAlpha::BlendMode::DEST) { + assert(reg.fix() == 128); + // Cv = (Cs - 0) * FIX + Cd + // if fix = 128, it works out to 1.0 + mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_0_FIX_DST); + // src plus dest + } else if (reg.a_mode() == GsAlpha::BlendMode::SOURCE && + reg.b_mode() == GsAlpha::BlendMode::DEST && + reg.c_mode() == GsAlpha::BlendMode::ZERO_OR_FIXED && + reg.d_mode() == GsAlpha::BlendMode::DEST) { + // Cv = (Cs - Cd) * FIX + Cd + assert(reg.fix() == 64); + mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_FIX_DST); + } + + else { + fmt::print("unsupported blend: a {} b {} c {} d {}\n", (int)reg.a_mode(), (int)reg.b_mode(), + (int)reg.c_mode(), (int)reg.d_mode()); + mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_SRC_DST); + // assert(false); + } +} + +DrawMode process_draw_mode(const AdgifInfo& info, bool use_atest, bool use_decal) { + DrawMode mode; + mode.set_alpha_test(DrawMode::AlphaTest::GEQUAL); + if (use_atest) { + mode.enable_at(); + mode.set_aref(0x26); + mode.set_alpha_fail(GsTest::AlphaFail::KEEP); + mode.set_alpha_test(DrawMode::AlphaTest::GEQUAL); + } else { + mode.disable_at(); + } + if (use_decal) { + mode.enable_decal(); + } + mode.enable_depth_write(); + mode.enable_zt(); // :zte #x1 + mode.set_depth_test(GsTest::ZTest::GEQUAL); // :ztst (gs-ztest greater-equal)) + mode.disable_ab(); + mode.set_alpha_blend(DrawMode::AlphaBlend::SRC_DST_SRC_DST); + + update_mode_from_alpha1(info.alpha_val, mode); + if (!(info.clamp_val == 0b101 || info.clamp_val == 0 || info.clamp_val == 1 || + info.clamp_val == 0b100)) { + fmt::print("clamp: 0x{:x}\n", info.clamp_val); + assert(false); + } + + mode.set_clamp_s_enable(info.clamp_val & 0b1); + mode.set_clamp_t_enable(info.clamp_val & 0b100); + + return mode; +} + +// we need the lev to pool textures with tfrag. +void add_vertices_and_static_draw(tfrag3::TieTree& tree, + tfrag3::Level& lev, + const TextureDB& tdb, + const std::vector& protos) { + // our current approach for static draws is just to flatten to giant mesh. + + std::unordered_map> draws_by_tex; + + std::unordered_map interp_hack_colors; + + for (auto& proto : protos) { + for (auto& inst : proto.instances) { + for (size_t frag_idx = 0; frag_idx < proto.frags.size(); frag_idx++) { + auto& frag = proto.frags[frag_idx]; + auto& ifrag = inst.frags.at(frag_idx); + for (auto& strip : frag.strips) { + // what texture are we using? + u32 combo_tex = strip.adgif.combo_tex; + + // try looking it up in the existing textures + u32 idx_in_lev_data = UINT32_MAX; + for (u32 i = 0; i < lev.textures.size(); i++) { + if (lev.textures[i].combo_id == combo_tex) { + idx_in_lev_data = i; + break; + } + } + + if (idx_in_lev_data == UINT32_MAX) { + // didn't find it, have to add a new one + auto tex_it = tdb.textures.find(combo_tex); + if (tex_it == tdb.textures.end()) { + bool ok_to_miss = false; // TODO + if (ok_to_miss) { + // we're missing a texture, just use the first one. + tex_it = tdb.textures.begin(); + } else { + fmt::print( + "texture {} wasn't found. make sure it is loaded somehow. You may need to " + "include " + "ART.DGO or GAME.DGO in addition to the level DGOs for shared textures.\n", + combo_tex); + fmt::print("tpage is {}\n", combo_tex >> 16); + fmt::print("id is {} (0x{:x})\n", combo_tex & 0xffff, combo_tex & 0xffff); + assert(false); + } + } + idx_in_lev_data = lev.textures.size(); + lev.textures.emplace_back(); + auto& new_tex = lev.textures.back(); + new_tex.combo_id = combo_tex; + new_tex.w = tex_it->second.w; + new_tex.h = tex_it->second.h; + new_tex.debug_name = tex_it->second.name; + new_tex.debug_tpage_name = tdb.tpage_names.at(tex_it->second.page); + new_tex.data = tex_it->second.rgba_bytes; + } + + // determine the draw mode + DrawMode mode = + process_draw_mode(strip.adgif, frag.prog_info.misc_x == 0, frag.has_magic_tex0_bit); + + // okay, we now have a texture and draw mode, let's see if we can add to an existing... + auto existing_draws_in_tex = draws_by_tex.find(idx_in_lev_data); + tfrag3::StripDraw* draw_to_add_to = nullptr; + if (existing_draws_in_tex != draws_by_tex.end()) { + for (auto idx : existing_draws_in_tex->second) { + if (tree.static_draws.at(idx).mode == mode) { + draw_to_add_to = &tree.static_draws[idx]; + } + } + } + + if (!draw_to_add_to) { + // nope, need to create a new draw + tree.static_draws.emplace_back(); + draws_by_tex[idx_in_lev_data].push_back(tree.static_draws.size() - 1); + draw_to_add_to = &tree.static_draws.back(); + draw_to_add_to->mode = mode; + draw_to_add_to->tree_tex_id = idx_in_lev_data; + } + + // now we have a draw, time to add vertices + tfrag3::StripDraw::VisGroup vgroup; + vgroup.vis_idx = inst.vis_id; // associate with the tfrag for culling + vgroup.num = strip.verts.size() + 1; // one for the primitive restart! + draw_to_add_to->num_triangles += strip.verts.size() - 2; + for (auto& vert : strip.verts) { + tfrag3::PreloadedVertex vtx; + // todo fields + auto tf = transform_tie(inst.mat, vert.pos); + vtx.x = tf.x(); + vtx.y = tf.y(); + vtx.z = tf.z(); + vtx.s = vert.tex.x(); + vtx.t = vert.tex.y(); + vtx.q = vert.tex.z(); + if (vert.color_index_index == UINT32_MAX) { + vtx.color_index = 0; + } else { + vtx.color_index = ifrag.color_indices.at(vert.color_index_index); + assert(vert.color_index_index < ifrag.color_indices.size()); + vtx.color_index += ifrag.color_index_offset_in_big_palette; + } + + size_t vert_idx = tree.vertices.size(); + tree.vertices.push_back(vtx); + draw_to_add_to->vertex_index_stream.push_back(vert_idx); + } + draw_to_add_to->vertex_index_stream.push_back(UINT32_MAX); + draw_to_add_to->vis_groups.push_back(vgroup); + } + } + } + } + + std::stable_sort(tree.static_draws.begin(), tree.static_draws.end(), + [](const tfrag3::StripDraw& a, const tfrag3::StripDraw& b) { + return a.tree_tex_id < b.tree_tex_id; + }); +} + +void extract_tie(const level_tools::DrawableTreeInstanceTie* tree, + const std::string& debug_name, + const std::vector& tex_map, + const TextureDB& tex_db, + tfrag3::Level& out) { + tfrag3::TieTree this_tree; + + // sanity check the vis tree (not a perfect check, but this is used in game and should be right) + assert(tree->length == (int)tree->arrays.size()); + assert(tree->length > 0); + auto last_array = tree->arrays.back().get(); + auto as_instance_array = dynamic_cast(last_array); + assert(as_instance_array); + assert(as_instance_array->length == (int)as_instance_array->instances.size()); + assert(as_instance_array->length > 0); + u16 idx = as_instance_array->instances.front().id; + for (auto& elt : as_instance_array->instances) { + assert(elt.id == idx); + idx++; + } + bool ok = verify_node_indices(tree); + assert(ok); + fmt::print(" tree has {} arrays and {} instances\n", tree->length, as_instance_array->length); + + // extract the vis tree. Note that this extracts the tree only down to the last draw node, a + // parent of between 1 and 8 instances. + extract_vis_data(tree, as_instance_array->instances.front().id, this_tree); + + // map of instance ID to its parent. We'll need this later. + std::unordered_map instance_parents; + for (size_t node_idx = 0; node_idx < this_tree.bvh.vis_nodes.size(); node_idx++) { + const auto& node = this_tree.bvh.vis_nodes[node_idx]; + if (node.flags == 0) { + for (int i = 0; i < node.num_kids; i++) { + instance_parents[node.child_id + i] = node_idx; + } + } + } + + auto info = collect_instance_info(as_instance_array, &tree->prototypes.prototype_array_tie.data); + update_proto_info(&info, tex_map, tex_db, tree->prototypes.prototype_array_tie.data); + // debug_print_info(info); + emulate_tie_prototype_program(info); + emulate_tie_instance_program(info); + emulate_kicks(info); + + auto dir = file_util::get_file_path({fmt::format("debug_out/tie-{}/", debug_name)}); + file_util::create_dir_if_needed(dir); + for (auto& proto : info) { + auto data = debug_dump_proto_to_obj(proto); + file_util::write_text_file(fmt::format("{}/{}.obj", dir, proto.name), data); + // file_util::create_dir_if_needed() + } + + auto full = dump_full_to_obj(info); + file_util::write_text_file(fmt::format("{}/ALL.obj", dir), full); + + auto full_palette = make_big_palette(info); + add_vertices_and_static_draw(this_tree, out, tex_db, info); + + for (auto& draw : this_tree.static_draws) { + for (auto& str : draw.vis_groups) { + auto it = instance_parents.find(str.vis_idx); + if (it == instance_parents.end()) { + str.vis_idx = UINT32_MAX; + } else { + str.vis_idx = it->second; + } + } + } + + this_tree.colors = full_palette.colors; + fmt::print("TIE tree has {} draws\n", this_tree.static_draws.size()); + out.tie_trees.push_back(std::move(this_tree)); +} +} // namespace decompiler \ No newline at end of file diff --git a/decompiler/level_extractor/extract_tie.h b/decompiler/level_extractor/extract_tie.h new file mode 100644 index 0000000000..4f0154e4b6 --- /dev/null +++ b/decompiler/level_extractor/extract_tie.h @@ -0,0 +1,15 @@ +#pragma once + +#include "extract_tie.h" +#include "decompiler/level_extractor/BspHeader.h" +#include "decompiler/data/TextureDB.h" +#include "common/custom_data/Tfrag3Data.h" + +namespace decompiler { + +void extract_tie(const level_tools::DrawableTreeInstanceTie* tree, + const std::string& debug_name, + const std::vector& tex_map, + const TextureDB& tex_db, + tfrag3::Level& out); +} \ No newline at end of file diff --git a/decompiler/main.cpp b/decompiler/main.cpp index 3c0de13ce9..3ec49f0681 100644 --- a/decompiler/main.cpp +++ b/decompiler/main.cpp @@ -57,6 +57,7 @@ int main(int argc, char** argv) { } file_util::create_dir_if_needed(out_folder); + file_util::create_dir_if_needed(file_util::get_file_path({"debug_out"})); fmt::print("[Mem] After config read: {} MB\n", get_peak_rss() / (1024 * 1024)); diff --git a/docs/markdown/porting_tfrag.md b/docs/markdown/porting_tfrag.md new file mode 100644 index 0000000000..a78f91f0da --- /dev/null +++ b/docs/markdown/porting_tfrag.md @@ -0,0 +1,355 @@ +# Porting Tfrag +Tfrag is the renderer for non-instanced background geometry. It's typically used for the floor and unique walls/level geometry. It has a level of detail system, and time of day lighting, optionaly transparancy and that's it. No other features. + +The approach I took was to go slowly and understand the rendering code. I made two different "test" renderers that were slow but did things exactly the same way as in the PS2 version. After this, I made a custom PC version called tfrag3. The key difference with tfrag3 is that there is an offline preprocessing step that reads the level data and outputs data in a good format for PC. + +Trying to understand the rendering code is annoying, but I think it was worth it in the end. +- you can often leave out huge chunks of code. I never touched `tfrag-near` or most of the `tfrag` VU program. +- you can eventually figure out how move more to the GPU. For example, all clipping/scissoring and transformation is done on the GPU. This is faster (GPUs are good) and easier (OpenGL does it automatically if you set it up right, you don't have to do the math). +- you can rearrange things for better performance. Keeping the number of OpenGL draw calls down is probably the best thing we can do for performance. + +But in order to understand the renderer, I had to start with a slow "emulation-like" port. + +This document is divided into three parts: +1. PS2 rendering (in Jak). +2. Jak's `drawable` system +3. Tfrag-specific details + +# Basics of PS2 Rendering +The main idea of the Jak rendering system is that there are always two frames in progress at a time. One frame is being "rendered" meaning triangles are being transformed and rasterized and the VRAM is being written to. The other frame is being "calculated", meaning the game is building the list of instructions to draw the frame. The "calculation" happens from GOAL code, mostly on the EE, and builds a single giant "DMA chain". At the end of a frame, the engine takes the full DMA chain that was built, and sends it to the rendering hardware. The rendering process is all "automatic" - once it gets the list of data it will run for an entire frame and do all of the drawing. + +The EE user manual sections for DMAC, VPU (VU), and GIF are worth reading before trying to understand new rendering code. + +Because the calculation and rendering happen simultaneously, the calculation cannot use the same hardware and memory as the rendering. The following resources are used only by rendering: +- VU1 +- VIF1 +- GIF +- GS + +The following resources are used only by calculation: +- VU0-macro mode (`vf` register on EE and `vadd` like instructions) +- VU0-micro mode +- VIF0 +- The scratchpad + +The following resources are shared: +- DMA controller. It can handle multiple transfers to different places at the same time, and there are no shared destinations, so there's no issue here. Rendering uses only to VIF1. Calculation uses to VIF0, to scratchpad, and from scratchpad. +- The "global" DMA buffer. The calculation process fills this buffer and the rendering reads from it. There are two copies of this buffer and the engine will swap them automatically at the end of the frame. So one copy is always being filled while the other is being read and the graphics code mostly doesn't worry about this. +- DMA data inside of level data. The DMA list may include chunks of data that's part of the level data. In practice there's not much to be aware of here - the rendering process just reads this data. + +## DMA +The whole rendering system is driven by DMA. The DMA controller can copy data from main memory to different peripherals. If the destination is "busy", it will wait. So it doesn't just blindly dump data into things as fast as it can - it only sends data if the destination is ready to accept it. The DMA controller is controlled by "DMA tags". These contain a command like "transfer X bytes of data from address Y, then move on to the DMA tag at address Z". This allows the game to build up really complicated linked-lists of data to send. + +The DMA list built for rendering is divided into buckets. See `dma-h.gc` for the bucket names. Individual renderers add data to buckets, and then the buckets are linked together in the order they are listed in that enum. Code like `tfrag` won't start DMA to VIF1 or deal with linking buckets - that is handled by the game engine. + +However, code like `tfrag` may just set up its own transfers to/from SPR and VIF0 - these are free-to-use during the "calculation" step. + +## The VIF +The VIF is "vector unit interface". There's one for VU0 and VU1 and they are (as far as I know) identical. The rendering DMA list is sent directly to VIF1. There are also "tags" that control the VIF. The general types of tags are: +- "take the following N bytes of data and copy it to VU data memory" (possibly with some fancy "unpacking" operation to move stuff around) +- "take the following N bytes of data and copy it to VU program memory" - to upload a new VU program +- "run a VU program starting at this address in VU program memory" +- "send this data **direct**ly to the GIF" This is called a "direct" transfer. It's typically used to set things up on the GS that will be constant for one specific renderer. + +I haven't seen VIF0 really used much. The pattern for VIF1 is usually: +1. upload program +2. upload some constants +3. upload some model data +4. run program +5. repeat steps 3 and 4 many times + +## VU programs +The VU programs are usually responsible for transforming vertices with the camera matrix, clipping, lighting calculations, etc. The output of a VU program is GIF packets containing the actual drawing information (transformed vertices, drawing settings, etc). + +The usual pattern is that the VU program will build up a GIF packet, then use the `XGKICK` instruction with the address of the start of the packet. This will start transferring the packet directly to the GIF. The transfer happens in the background. The transfer will only be completed once all triangles are drawn - there's no buffer on the GIF/GS. A single packet can be pretty big and have many triangles. + +For the tfrag1/tfrag2 renderers, I ported up to this part. Then, I sent the `xgkick` data to the `DirectRenderer` which can handle this format of data. It is not super fast, but it's nice for debugging. Being able to inspect this was helpful to understand how it works. + +## VU buffer hell +Typically the VU programs have 4 buffers. There is a buffer for input and output data, and both are double buffered. This allows you to be uploading new data with DMA, transforming vertices, and sending data to the GIF all at the same time. + +All 4 buffers are in use at the same time. +1. Untransformed Data being uploaded from the DMA list to VU data. This happens automatically by DMA. +2. Untransformed Data being transformed by the VU1 program. +3. A GIF packet being built from the output of the transformation. This is written by the VU1 program. +4. A GIF packet currently being `XGKICK`ed. + +Once 1 is full and 2 is totally used, these buffers are swapped. The same thing happens for 3 and 4. +In some renderers, these swaps are always done at the same time. For example `sprite`. This tends to use the built-in `xitop` instructions for managing double buffering. + +In other renderers, the buffer swaps can happen at different times. This leads to awful code where you have 4 different versions of the same renderer for all possible combinations of which buffers are input/output. Storing the address of the input/output buffer in a variable can lead to extra instructions inside the transformation loops, which will significantly slow down. + +## GIF +The GIF can receive commands like: +- "set the alpha blending mode to X" +- "use texture located at VRAM address Y" +- "draw a triangle" + +It can't do any transformation or lighting calculations. + + +# Jak `drawable` system +There is a `drawable` system that's used to store things that can be "drawn". It uses a tree structure. So you can do something like +``` +(draw some-level) +``` +and it will recursively go through the entire level's tree of drawables and draw everything. Note that there are a lot of tricks/hacks so not every `drawable` supports `draw` and some code may defer some `draw`s until later. + +The lowest-level drawable for tfrag is `tfragment`. It makes sense to split up the level into "fragments" because the entire level is way too big to fit in the VU memory. Most of the time, you can't see every triangle in the level, so it makes sense to skip uploading the fragments that you know can't be seen. +There are thousands of these fragments. They tend to be ~ a few kB and each contains a chunk of data to upload to the VUs. Note that `draw` is not called directly on `tfragment`, despite the fact that they are `drawable`s (more details later). The `tfragment` is just a reference to some DMA data. + +## Drawables for Tfrag + +The top-level drawable type for an entire level is `bsp-header`. This is the type of the `-vis` file of the level's DGO, and has all the graphics data (not including textures). It is also a `drawable`. + +Within `bsp-header` is a `drawable-tree-array`. As the name implies, this contains an array of `drawable-tree`. Usually there are 5-10 of these in a level. There will be a `drawable-tree` for each renderer. Or possibly a few per renderer, if that renderer supports different modes. For example there's one for tfrag, one for transparent tfrag, one for tie, etc. + +You can just check the type of the `drawable-tree` to see if it's for tfrag/tie etc. The tfrag types are: +- `drawable-tree-tfrag` (parent of the rest) +- `drawable-tree-trans-tfrag` +- `drawable-tree-dirt-tfrag` +- `drawable-tree-ice-tfrag` +- `drawable-tree-lowres-tfrag` +- `drawable-tree-lowres-trans-tfrag` + +Each "tree" contains a bunch of `tfragment`s and a time of day color palette. But they are stored in a really weird way. There is a bounding volume hierarchy of `tfragment`s. This is just a tree-like structure where each node stores a sphere, and all the node's children fit inside of that sphere. The nodes at each depth are stored in an array. The layout of this tree is designed to let them use some crazy assembly SIMD 8-at-a-time traversal of the tree, with minimal pointer-chasing and good memory access patterns. + +Each tree has an array of `drawable-inline-array`s, storing all the nodes at a given depth. The last `drawable-inline-array` is actually a `drawable-inline-array-frag`, which is a wrapper around an inline array of `tfragment`s. + +The other arrays are used to store tree nodes to organize these `tfragment`s. Each node in the tree contains a `bsphere`. All tfrags below the node fit into this sphere. + +The second to last array (if it exists) is a `drawable-inline-array-node`. This contains an inline array of `draw-node`. Each `draw-node` is the parent of between 1 and 8 `tfragment`s. They store a reference to the first child `tfragment` and a child count, and the children are just the ones that come after the first `tfragment` in memory. + +The third to last array (if it exists) is also a `drawable-inline-array-node`, containing an inline array of `draw-node`. Each `draw-node` is the parent of between 1 and 8 `draw-node`s from the array mentioned above. They store a reference to the first child `draw-node` and a child count, and the children are stored consecutively. + +This pattern continues until you get a `drawable-inline-array-node` with 8 or fewer nodes at the top. + +All the `draw-node` and `tfragment`s have ID numbers. These are used for the occlusion culling system. The visibility numbering is shared with all the other `drawable-tree`s in the `bsp-header`. The indices are given out consecutively, starting from the roots. Between depths, they are aligned to 32 elements, so there are some unused ids. These IDs are the index of the bit in the visibility string. + +With that out of the way, we can now go through the tfrag renderer + +# Tfrag + +The rough process for rendering is: +- "login" the data +- do "drawing" as part of the `drawable` system (on EE) +- do the real "draw" +- do culling +- compute time of day colors (or other precomputation) +- generate DMA lists +- unpack data to VU memory +- transform vertices +- clip +- build gs packets +- XGKICK + +I expect that most other renderers will be pretty similar. + +## Login +The tfrag data needs to be initialized before it can be used. You only have to do this once. This is called `login`, and it's a method of all `drawable`s. The level loader will call the `login` method of many things as part of the level load. For `tfrag`, all I had to do was decompile the `login` methods, and it worked and I could completely ignore this until tfrag3. + +It's possible to just call `login` on an entire level, but this probably takes too long, so the level loader will cleverly split it up over multiple frames. + +It is from: +- `level-update` +- `load-continue` +- `level-update-after-load` +- various calls to `login`. + + +In the end, the only thing the `login` does for tfrag is: +``` +(adgif-shader-login-no-remap (-> obj shader i)) +``` +for all the "shaders" in all the tfrags. A "shader" is an `adgif-shader`, which is just some settings for the GS that tells it drawing modes, like which texture to use, blending modes, etc. The `tfrag` VU1 code will send these to the GIF as needed when drawing. A `tfragment` can have multiple shaders. There is a different shader per texture. + +The actual "shader" object is just 5x quadwords that contain "adress + data" format data. The address tells the GIF which parameter to change and the "data" has the value of the parameter. Some of them are not set properly in the level data, and the `adgif-shader-login-no-remap` function updates them. For tfrag, the 5 addresses are always the same: + +- `TEST_1`: this sets the alpha and z test settings. This is set properly in the level data and `login` doesn't touch it. +- `TEX0_1`: this has some texture parameters. This is 0 in the level data and is modified by `login`. +- `TEX1_1`: this has more texture parameters. In the level data this is has `0x120` as the value and the address is set to the texture ID of the texture. During `login`, the texture ID is looked up in the texture pool and `TEX0_1`/`TEX1_1` are set to point to the right VRAM address and have the right settings to use the texture. +- `MIPTBP1_1`: is mipmap settings. I ignore these because we do our own mipmapping. +- `CLAMP_1`: this has texture clamp settings. This is set properly in the level data. +- `ALPHA_1`: this has alpha blend settings. This is set properly in the level data. + + +## Calling the `draw` method +The `tfragment` at least pretends to use the `drawable` system, and the drawing is initiated by calling `draw` on the `drawable-tree-tfrag`. Getting this to actually be called took some digging - it uses some functions in later files that we haven't completed yet. + +When the level is loaded, the `bsp-header` is added to the `*background-draw-engine*` by the level loader. The path to calling draw is: +- In `main.gc`, there is a `display-loop`. This has a while loop that runs once per frame and runs many systems. +- The `display-loop` calls `*draw-hook*` +- The `*draw-hook*` variable is set to `main-draw-hook` +- The `main-draw-hook` calls `real-main-draw-hook` +- The `real-main-draw-hook` calls `(execute-connections *background-draw-engine*` +- This "engine" calls the `add-bsp-drawable` function on the `bsp-header` for each loaded level. +- The `draw` method of `bsp-header` sets up some stuff on the scratchpad and some `vf` registers. +- The `draw` method of `bsp-header` calls `draw` on the `drawable-tree-array` (defined in parent class `drawable-group`) +- The `draw` method of `drawable-group` checks if the level is visible, and if so calls `draw` on each tree. +- The `draw` method of `drawable-tree-tfrag` simply adds the tree to a list of trees in `*background-work*`. + + +## Real "drawing" +Later on, in the `real-main-draw-hook`, there is a call to `finish-background`. + +There's some stuff at the top of this function that's only used for the separate shrubbery renderer. It sets up some VU0 programs. I noticed that the stuff before tfrag drawing would overwrite this VU0 stuff so I ignored it for now. + +The first thing that happens before any tfrag drawing is setting the `vf` registers to store the `math-camera` values. In OpenGOAL, the `vf` registers aren't saved between functions, so I had to manually use the `with-vf` macro with the `:rw 'write` flag to save these: +```lisp + (let ((v1-48 *math-camera*)) + (with-vf (vf16 vf17 vf18 vf19 vf20 vf21 vf22 vf23 vf24 vf25 vf26 vf27 vf28 vf29 vf30 vf31) + :rw 'write + (.lvf vf16 (&-> v1-48 plane 0 quad)) + (.lvf vf17 (&-> v1-48 plane 1 quad)) + ;; ... +``` +these will later be used in part of the drawing function. The `:rw 'write` flag will save these to a structure where we can read them later. + +Then, for each tree: +``` +(upload-vis-bits s1-0 gp-1 a2-4) +``` +this uploads the visibility data to the scratchpad. The visibility data is stored at the end of the 16 kB scratchpad. The drawable with ID of `n` can look at the `n`-th bit of this data to determine if it is visible. The visibility IDs are per level, and the drawing order of the `tfrag` will alternate between levels, so they upload this for each tree to draw. It seems like you could skip this after the first upload if you detect that you're drawing multiple trees in the same level. They do it for TIE and not TFRAG and don't know why. The visibility data is based on the position of the camera. Currently this doesn't work so I modified it to upload all 1's. + +The modification to the code to use the scratchpad in OpenGOAL is: +``` + ;;(spad-vis (the-as (pointer uint128) (+ #x38b0 #x70000000))) + (spad-vis (scratchpad-ptr uint128 :offset VISIBLE_LIST_SCRATCHPAD)) +``` +The `0x38b0` offset is just something we've noticed over time as being the location of the visible list, so there's a constant I made for it. (TODO: I think it's also `terrain-context work background vis-list`) + +The hack for visibility is: +```lisp +;; TODO this is a hack. +(quad-copy! (-> arg0 vis-bits) (-> arg2 all-visible-list) (/ (+ (-> arg2 visible-list-length) 15) 16)) +``` +which actually modifies the level to say that everything is visbile. The `all-visible-list` is just a list which has `1` for every drawable that actually exists (I think, need to configm). There are some skipped ID's. + + +The next part of drawing is: +``` + (when (not (or (zero? s0-0) (= s4-1 s0-0))) + (flush-cache 0) + (time-of-day-interp-colors-scratch (scratchpad-ptr rgba :offset 6160) s0-0 (-> s1-0 mood)) + ;; remember the previous colors + (set! s4-1 s0-0) +``` +where `s0-0` is the `time-of-day-pal` for the tfrag tree. It will skip interpolation if it is the same color palette that was just interpolated. + +The `time-of-day-interp-colors-scratch` function uploads the colors from `s0-0` to the scratchpad at offset `6160`. It computes the correct colors for the time-of-day/lighting settings in the the level `s1-0`'s mood. This function is pretty complicated, so I used MIPS2C. + +### Time of Day Interp Colors Scratch +The very first attempts for TFRAG just skipped this function because it wasn't needed to debug the basic drawing functions. I manually set the lighting to `0.5` for all colors and `1.0` for alpha. I suspected that this stored the colors in the scratchpad. I assumed it would be fine if these garbage for a first test. + +I noticed that this function does a few tricky things. It uses the scratchpad and it uses DMA. I know it uses DMA because I saw: +```cpp + c->lui(t0, 4096); // lui t0, 4096 + // some stuff in between... + c->ori(a1, t0, 54272); // ori a1, t0, 54272 = (0x1000D400) SPR TO +``` +and this `0x1000D4000` is the address of the DMA control register for transferring to the scratchpad. The scratchpad here is just used as a faster memory. And eventually the draw code will read the result from the scratchpad. + +They really like this pattern of doing work on the scratchpad while DMA is running in the background, copying things to/from the scratchpad. In this case, they upload the palette to the scratchpad in chunks. As those uploads are running, they do math on the previous upload to blend together the colors for the chosen time of day. To get optimal performance, they often count how many times they finish before the DMA is ready. When this happens, they increment a "wait" variable. + +I modified scratchpad access like this: +```cpp + c->lui(v1, 28672); // lui v1, 28672 0x7000 + // stuff in between skipped... + //c->ori(v1, v1, 2064); // ori v1, v1, 2064 SPAD mods + get_fake_spad_addr(v1, cache.fake_scratchpad_data, 2064, c); +``` +the original code would set the address to an offset of 2064 in the scratchpad. + +The first thing they do is wait for any in-progress DMA transfers to finish: +```cpp + block_1: + c->lw(t5, 0, a1); // lw t5, 0(a1) + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + // nop // sll r0, r0, 0 + c->andi(t5, t5, 256); // andi t5, t5, 256 + // nop // sll r0, r0, 0 + bc = c->sgpr64(t5) != 0; // bne t5, r0, L62 + // nop // sll r0, r0, 0 + if (bc) {goto block_1;} // branch non-likely +``` +which is reading and checking the DMA register in a loop. We can just get rid of this - we make all DMA instant. + + +I also modified the code that starts the transfer to just do a memcpy from the fake scratchpad. See the EE manual for details on what these registers mean. The `a1` register points to the control register for SPR TO DMA. +```cpp + { + // c->sw(t4, 16, a1); // sw t4, 16(a1) + u32 madr = c->sgpr64(t4); + c->daddiu(t3, t3, -32); // daddiu t3, t3, -32 + // c->sw(v1, 128, a1); // sw v1, 128(a1) + u32 sadr = c->sgpr64(v1); + c->addiu(t5, r0, 64); // addiu t5, r0, 64 + //c->sw(t5, 32, a1); // sw t5, 32(a1) + u32 qwc = c->sgpr64(t5); + c->addiu(t5, r0, 256); // addiu t5, r0, 256 + // c->sw(t5, 0, a1); // sw t5, 0(a1) + spad_to_dma(cache.fake_scratchpad_data, madr, sadr, qwc); + c->daddiu(t4, t4, 1024); // daddiu t4, t4, 1024 + } + ``` +This data is double buffered. One buffer is being filled from DMA while another is being processed. To swap buffers, they often use `xor` to toggle a bit. But this trick only works if our buffer has the same alignment as theirs up to the bit being toggled (otherwise their first `xor` might toggle a 0 to a 1, advancing the address, where ours does the opposite). +``` +c->xori(v1, v1, 1024); // xori v1, v1, 1024 +``` + +The actual processing is an annoying pipelined loop. The palette stores groups of 8 colors. The time of day system computes 8 weights, passed to this function. Each of the 8 colors is multiplied by the weight and added. This process is repeated for each group. There are usually 1024 or 2048 groups. The tfragments are lit by indexing into these groups. One important detail is that the r/g/b/a values are saturated so they don't overflow. + +This part works using the MIPS2C function in the first tfrag renderers. In the third one, it was annoying to get this data to the C++ renderer, so I just recomputed it in C++. This also lets us manually override the time of day values for fun. The code is much simpler: +```cpp +void Tfrag3::interp_time_of_day_slow(const float weights[8], + const std::vector& in, + math::Vector* out) { + for (size_t color = 0; color < in.size(); color++) { + math::Vector4f result = math::Vector4f::zero(); + for (int component = 0; component < 8; component++) { + result += in[color].rgba[component].cast() * weights[component]; + } + result[0] = std::min(result[0], 255.f); + result[1] = std::min(result[1], 255.f); + result[2] = std::min(result[2], 255.f); + result[3] = std::min(result[3], 128.f); // note: different for alpha! + out[color] = result.cast(); + } +} +``` + +### The Call to Draw +There was another scratchpad use to patch up here. They often treat the scratchpad as a `terrain-context`. There are quite a few overlays here so sometimes you have to do some manual searching to figure it out. +```lisp + (set! (-> (scratchpad-object terrain-context) bsp lev-index) (-> s1-0 index)) + + (set! (-> *tfrag-work* min-dist z) 4095996000.0) + ;; draw! + (draw-drawable-tree-tfrag s2-0 s1-0) + ) + ;; remember closest. + (set! (-> *level* level (-> (scratchpad-object terrain-context) bsp lev-index) closest-object 0) + (-> *tfrag-work* min-dist z) + ) + ) +``` +the remembering closest is used for figuring out which mip levels of texture need uploading. + + +### Draw node culling +This is a part that I left out. I still haven't done it. But I suspect it looks at the position of the camera (stored in `vf` regs from earlier) and modifies the visibility data. I think it uses a "sphere in view frustum" check and traverses the tree of `draw-node`s. I think it only culls the `draw-node`s and not actually the `tfragment`s, and it modifies the visibility data in place. It only culls the range of nodes that correspond to the tree we're drawing. + +Later, on tfrag3, I did the culling in C++. (More on this later - it's done in a tricky way so that you can efficiently build a list of only the visible things to send to the GPU). + + +### DMA List Generation +The objective of the draw function is to generate a DMA list. This gets added to the entire DMA list for the frame and gets sent to the VIF. The DMA data is a list of instructions like: +- upload this data to the VU memory +- run this VU program +- change various settings related to the VU data upload. + +The pattern used by tfrag is: +- Call `tfrag-init-buffer` once. This is unoptimized code that just sets things up. +- Call `draw-inline-array-tfrag`. This adds DMA per tfragment. It is super optimized. +- Call `tfrag-end-buffer`. This is unoptimized code that ends the DMA list for tfrag diff --git a/docs/scratch/sprite_2d_3d.txt b/docs/scratch/sprite_2d_3d.txt new file mode 100644 index 0000000000..206b23d66e --- /dev/null +++ b/docs/scratch/sprite_2d_3d.txt @@ -0,0 +1,112 @@ + xtop vi02 | nop + nop | nop + ilwr.x vi04, vi02 | nop + iaddi vi02, vi02, 0x1 | nop + iaddiu vi03, vi02, 0x90 | nop +L1: + ilw.y vi08, 1(vi02) | nop + lq.xyzw vf25, 900(vi00) | nop + lq.xyzw vf26, 901(vi00) | nop + lq.xyzw vf27, 902(vi00) | nop + lq.xyzw vf28, 903(vi00) | nop + lq.xyzw vf30, 904(vi00) | nop + lqi.xyzw vf01, vi02 | nop + lqi.xyzw vf05, vi02 | nop + lqi.xyzw vf11, vi02 | nop + lq.xyzw vf12, 1020(vi00) | mulaw.xyzw ACC, vf28, vf00 + nop | maddax.xyzw ACC, vf25, vf01 + nop | madday.xyzw ACC, vf26, vf01 + nop | maddz.xyzw vf02, vf27, vf01 + move.w vf05, vf00 | addw.z vf01, vf00, vf05 + nop | nop + div Q, vf31.x, vf02.w | muly.z vf05, vf05, vf31 + nop | mul.xyzw vf03, vf02, vf29 + nop | nop + nop | nop + nop | mulz.z vf04, vf05, vf05 + lq.xyzw vf14, 1001(vi00) | clipw.xyz vf03, vf03 + iaddi vi06, vi00, 0x1 | adda.xyzw ACC, vf11, vf11 +L2: + ior vi05, vi15, vi00 | mul.zw vf01, vf01, Q + lq.xyzw vf06, 998(vi00) | mulz.xyzw vf15, vf05, vf04 + lq.xyzw vf14, 1002(vi00) | mula.xyzw ACC, vf05, vf14 + fmand vi01, vi06 | mul.xyz vf02, vf02, Q + ibne vi00, vi01, L5 | addz.x vf01, vf00, vf01 + lqi.xyzw vf07, vi03 | mulz.xyzw vf16, vf15, vf04 + lq.xyzw vf14, 1003(vi00) | madda.xyzw ACC, vf15, vf14 + lqi.xyzw vf08, vi03 | add.xyzw vf10, vf02, vf30 + lqi.xyzw vf09, vi03 | mulw.x vf01, vf01, vf01 + sqi.xyzw vf06, vi05 | mulz.xyzw vf15, vf16, vf04 + lq.xyzw vf14, 1004(vi00) | madda.xyzw ACC, vf16, vf14 + sqi.xyzw vf07, vi05 | maxx.w vf10, vf10, vf12 + sqi.xyzw vf08, vi05 | maxz.zw vf01, vf01, vf31 + sqi.xyzw vf09, vi05 | mulz.xyzw vf16, vf15, vf04 + lq.xyzw vf14, 1005(vi00) | madda.xyzw ACC, vf15, vf14 + lqi.xyzw vf06, vi03 | mulw.x vf01, vf01, vf31 + lqi.xyzw vf07, vi03 | miniy.w vf10, vf10, vf12 + lq.xyzw vf08, 999(vi08) | miniz.zw vf01, vf01, vf12 + ilw.x vi07, -2(vi02) | madd.xyzw vf05, vf16, vf14 + lqi.xyzw vf23, vi02 | miniw.x vf01, vf01, vf00 + nop | suby.w vf02, vf10, vf12 + lqi.xyzw vf24, vi02 | mulx.w vf11, vf11, vf01 + fcand vi01, 0x3f | mulaw.xyzw ACC, vf28, vf00 + lq.xyzw vf17, 1006(vi00) | maddax.xyzw ACC, vf25, vf23 + fmand vi09, vi06 | nop + ibne vi00, vi09, L6 | nop + lq.xyzw vf18, 1007(vi00) | madday.xyzw ACC, vf26, vf23 +L3: + lq.xyzw vf19, 980(vi07) | ftoi0.xyzw vf11, vf11 + lq.xyzw vf20, 981(vi07) | maddz.xyzw vf02, vf27, vf23 + lq.xyzw vf21, 982(vi07) | mulaw.xyzw ACC, vf17, vf05 + lq.xyzw vf22, 983(vi07) | msubz.xyzw vf12, vf18, vf05 + sq.xyzw vf11, 3(vi05) | mulaz.xyzw ACC, vf17, vf05 + lqi.xyzw vf11, vi02 | maddw.xyzw vf13, vf18, vf05 + move.w vf24, vf00 | addw.z vf23, vf00, vf24 + div Q, vf31.x, vf02.w | mulw.xyzw vf12, vf12, vf01 + ibne vi00, vi01, L4 | muly.z vf24, vf24, vf31 + ilw.y vi08, -2(vi02) | mulz.xyzw vf13, vf13, vf01 + sqi.xyzw vf06, vi05 | mul.xyzw vf03, vf02, vf29 + sqi.xyzw vf07, vi05 | mulaw.xyzw ACC, vf10, vf00 + sqi.xyzw vf08, vi05 | maddax.xyzw ACC, vf12, vf19 + lq.xyzw vf06, 988(vi00) | maddy.xyzw vf19, vf13, vf19 + lq.xyzw vf07, 989(vi00) | mulaw.xyzw ACC, vf10, vf00 + lq.xyzw vf08, 990(vi00) | maddax.xyzw ACC, vf12, vf20 + lq.xyzw vf09, 991(vi00) | maddy.xyzw vf20, vf13, vf20 + sq.xyzw vf06, 1(vi05) | mulaw.xyzw ACC, vf10, vf00 + sq.xyzw vf07, 3(vi05) | maddax.xyzw ACC, vf12, vf21 + sq.xyzw vf08, 5(vi05) | maddy.xyzw vf21, vf13, vf21 + sq.xyzw vf09, 7(vi05) | mulaw.xyzw ACC, vf10, vf00 + nop | maddax.xyzw ACC, vf12, vf22 + nop | maddy.xyzw vf22, vf13, vf22 + lq.xyzw vf12, 1020(vi00) | ftoi4.xyzw vf19, vf19 + lq.xyzw vf14, 1001(vi00) | ftoi4.xyzw vf20, vf20 + move.xyzw vf05, vf24 | ftoi4.xyzw vf21, vf21 + move.xyzw vf01, vf23 | ftoi4.xyzw vf22, vf22 + sq.xyzw vf19, 2(vi05) | mulz.z vf04, vf24, vf24 + sq.xyzw vf20, 4(vi05) | clipw.xyz vf03, vf03 + sq.xyzw vf21, 6(vi05) | nop + sq.xyzw vf22, 8(vi05) | nop + xgkick vi15 | nop + iaddi vi04, vi04, -0x1 | nop + iaddiu vi01, vi00, 0x672 | nop + ibne vi00, vi04, L2 | nop + isub vi15, vi01, vi15 | adda.xyzw ACC, vf11, vf11 + nop | nop :e + nop | nop +L4: + iaddi vi04, vi04, -0x1 | nop + iaddi vi02, vi02, -0x3 | nop + ibne vi00, vi04, L1 | nop + nop | nop + nop | nop :e + nop | nop +L5: + iaddi vi04, vi04, -0x1 | nop + iaddi vi03, vi03, 0x4 | nop + ibne vi00, vi04, L1 | nop + nop | nop + nop | nop :e + nop | nop +L6: + b L3 | nop + lq.xyzw vf08, 1000(vi00) | nop \ No newline at end of file diff --git a/docs/scratch/sprite_2d_ss.txt b/docs/scratch/sprite_2d_ss.txt new file mode 100644 index 0000000000..699fe6d048 --- /dev/null +++ b/docs/scratch/sprite_2d_ss.txt @@ -0,0 +1,105 @@ + xtop vi02 | nop + nop | nop + ilwr.x vi04, vi02 | nop + iaddi vi02, vi02, 0x1 | nop + iaddiu vi03, vi02, 0x90 | nop +L7: + ilw.y vi08, 1(vi02) | nop + lq.xyzw vf25, 900(vi00) | nop + lq.xyzw vf26, 901(vi00) | nop + lq.xyzw vf27, 902(vi00) | nop + lq.xyzw vf28, 903(vi00) | nop + lq.xyzw vf30, 904(vi08) | nop + lqi.xyzw vf01, vi02 | nop + lqi.xyzw vf05, vi02 | nop + lqi.xyzw vf11, vi02 | nop + lq.xyzw vf12, 1020(vi00) | mulaw.xyzw ACC, vf28, vf00 + ilw.y vi08, 1(vi02) | maddax.xyzw ACC, vf25, vf01 + nop | madday.xyzw ACC, vf26, vf01 + nop | maddz.xyzw vf02, vf27, vf01 + move.w vf05, vf00 | addw.z vf01, vf00, vf05 + nop | nop + div Q, vf31.x, vf02.w | muly.z vf05, vf05, vf31 + nop | mul.xyzw vf03, vf02, vf29 + nop | nop + nop | nop + nop | mulz.z vf04, vf05, vf05 + lq.xyzw vf14, 1001(vi00) | clipw.xyz vf03, vf03 + iaddi vi06, vi00, 0x1 | adda.xyzw ACC, vf11, vf11 +L8: + ior vi05, vi15, vi00 | mul.zw vf01, vf01, Q + lq.xyzw vf06, 998(vi00) | mulz.xyzw vf15, vf05, vf04 + lq.xyzw vf14, 1002(vi00) | mula.xyzw ACC, vf05, vf14 + fmand vi01, vi06 | mul.xyz vf02, vf02, Q + ibne vi00, vi01, L10 | addz.x vf01, vf00, vf01 + lqi.xyzw vf07, vi03 | mulz.xyzw vf16, vf15, vf04 + lq.xyzw vf14, 1003(vi00) | madda.xyzw ACC, vf15, vf14 + lqi.xyzw vf08, vi03 | add.xyzw vf10, vf02, vf30 + lqi.xyzw vf09, vi03 | mulw.x vf01, vf01, vf01 + sqi.xyzw vf06, vi05 | mulz.xyzw vf15, vf16, vf04 + lq.xyzw vf14, 1004(vi00) | madda.xyzw ACC, vf16, vf14 + sqi.xyzw vf07, vi05 | maxx.w vf10, vf10, vf12 + sqi.xyzw vf08, vi05 | maxz.zw vf01, vf01, vf31 + sqi.xyzw vf09, vi05 | mulz.xyzw vf16, vf15, vf04 + lq.xyzw vf14, 1005(vi00) | madda.xyzw ACC, vf15, vf14 + lqi.xyzw vf06, vi03 | mulw.x vf01, vf01, vf31 + lqi.xyzw vf07, vi03 | miniy.w vf10, vf10, vf12 + lq.xyzw vf08, 1000(vi00) | nop + ilw.x vi07, -2(vi02) | madd.xyzw vf05, vf16, vf14 + lq.xyzw vf30, 904(vi08) | nop + lqi.xyzw vf23, vi02 | miniw.x vf01, vf01, vf00 + lqi.xyzw vf24, vi02 | mulx.w vf11, vf11, vf01 + fcand vi01, 0x3f | mulaw.xyzw ACC, vf28, vf00 + lq.xyzw vf17, 1006(vi00) | maddax.xyzw ACC, vf25, vf23 + lq.xyzw vf18, 1007(vi00) | madday.xyzw ACC, vf26, vf23 + lq.xyzw vf19, 980(vi07) | ftoi0.xyzw vf11, vf11 + lq.xyzw vf20, 981(vi07) | maddz.xyzw vf02, vf27, vf23 + lq.xyzw vf21, 982(vi07) | mulaw.xyzw ACC, vf17, vf05 + lq.xyzw vf22, 983(vi07) | msubz.xyzw vf12, vf18, vf05 + sq.xyzw vf11, 3(vi05) | mulaz.xyzw ACC, vf17, vf05 + lqi.xyzw vf11, vi02 | maddw.xyzw vf13, vf18, vf05 + move.w vf24, vf00 | addw.z vf23, vf00, vf24 + div Q, vf31.x, vf02.w | mulw.xyzw vf12, vf12, vf01 + ibne vi00, vi01, L9 | muly.z vf24, vf24, vf31 + ilw.y vi08, 1(vi02) | mulz.xyzw vf13, vf13, vf01 + sqi.xyzw vf06, vi05 | mul.xyzw vf03, vf02, vf29 + sqi.xyzw vf07, vi05 | mulaw.xyzw ACC, vf10, vf00 + sqi.xyzw vf08, vi05 | maddax.xyzw ACC, vf12, vf19 + lq.xyzw vf06, 988(vi00) | maddy.xyzw vf19, vf13, vf19 + lq.xyzw vf07, 989(vi00) | mulaw.xyzw ACC, vf10, vf00 + lq.xyzw vf08, 990(vi00) | maddax.xyzw ACC, vf12, vf20 + lq.xyzw vf09, 991(vi00) | maddy.xyzw vf20, vf13, vf20 + sq.xyzw vf06, 1(vi05) | mulaw.xyzw ACC, vf10, vf00 + sq.xyzw vf07, 3(vi05) | maddax.xyzw ACC, vf12, vf21 + sq.xyzw vf08, 5(vi05) | maddy.xyzw vf21, vf13, vf21 + sq.xyzw vf09, 7(vi05) | mulaw.xyzw ACC, vf10, vf00 + nop | maddax.xyzw ACC, vf12, vf22 + nop | maddy.xyzw vf22, vf13, vf22 + lq.xyzw vf12, 1020(vi00) | ftoi4.xyzw vf19, vf19 + lq.xyzw vf14, 1001(vi00) | ftoi4.xyzw vf20, vf20 + move.xyzw vf05, vf24 | ftoi4.xyzw vf21, vf21 + move.xyzw vf01, vf23 | ftoi4.xyzw vf22, vf22 + sq.xyzw vf19, 2(vi05) | mulz.z vf04, vf24, vf24 + sq.xyzw vf20, 4(vi05) | clipw.xyz vf03, vf03 + sq.xyzw vf21, 6(vi05) | nop + sq.xyzw vf22, 8(vi05) | nop + xgkick vi15 | nop + iaddi vi04, vi04, -0x1 | nop + iaddiu vi01, vi00, 0x672 | nop + ibne vi00, vi04, L8 | nop + isub vi15, vi01, vi15 | adda.xyzw ACC, vf11, vf11 + nop | nop :e + nop | nop +L9: + iaddi vi04, vi04, -0x1 | nop + iaddi vi02, vi02, -0x3 | nop + ibne vi00, vi04, L7 | nop + nop | nop + nop | nop :e + nop | nop +L10: + iaddi vi04, vi04, -0x1 | nop + iaddi vi03, vi03, 0x4 | nop + ibne vi00, vi04, L7 | nop + nop | nop + nop | nop :e \ No newline at end of file diff --git a/docs/scratch/tfrag.txt b/docs/scratch/tfrag.txt new file mode 100644 index 0000000000..ce13e6e18f --- /dev/null +++ b/docs/scratch/tfrag.txt @@ -0,0 +1,2147 @@ +;; debug notes + +;; getting bad adgif a+d data (messed up registers) +;;;;;;;;; + +unpacked with v4-32 (makes sense) + VIF: STROW + VIF: STMOD 0b1 + VIF: UNPACK-V4-8: 4 addr: 113 us: true tops: true + VIF: STMOD 0b0 + VIF: UNPACK-V4-8: 4 addr: 123 us: false tops: true + VIF: NOP + VIF: NOP + VIF: NOP + VIF: UNPACK-V4-16: 5 addr: 0 us: true tops: true + VIF: UNPACK-V4-32: 15 addr: 9 us: false tops: true <--------- 9 is the ad data + +;; here's one set of data (printing the 8-bit addr field of each qw, assuming it's a+d data) +UNPACK V4: 0x6 from offset: 128 ;; tex0_1 <--- all these register make sense +UNPACK V4: 0x14 from offset: 144 ;; tex1_1 +UNPACK V4: 0x34 from offset: 160 ;; miptbp +UNPACK V4: 0x8 from offset: 176 ;; clamp +UNPACK V4: 0x42 from offset: 192 ;; alpha + +;; next (repeated pattern of addrs) +UNPACK V4: 0x6 from offset: 208 +UNPACK V4: 0x14 from offset: 224 +UNPACK V4: 0x34 from offset: 240 +UNPACK V4: 0x8 from offset: 256 +UNPACK V4: 0x36 from offset: 272 + +UNPACK V4: 0x6 from offset: 288 +UNPACK V4: 0x14 from offset: 304 +UNPACK V4: 0x34 from offset: 320 +UNPACK V4: 0x8 from offset: 336 +UNPACK V4: 0x36 from offset: 352 + +;; unpack math was bad.... + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; getting bad tex coords +;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +Packet structure for most draws: + +Tag at offset 112: packed nloop: 4 prim: 0x3c + ST RGBAQ XYZF2 + +;; getting tc's like +q = 4.4845376 tc = 0, 4.484537 +q = 4.5222845 tc = 0, 4.520077 +q = 4.4883046 tc = 0.004379901, 4.4883046 +q = 4.5170135 tc = 0.0044089933, 4.514809 + +;; from a store in the kick area of +A: vf27 store: [ 0.004 4.515 4.517 0.000] +;; looks a lot like a screwed up unpack?? +;; messed up vector math on subtract... + + +;;;;;;;;;;;;;;;;;;; +;; sps +;;;;;;;;;;;;;;;;;;; + +coords sent to GS: +... +tfrag coords: 7cf6 8074 ;; ok +tfrag coords: 7cb9 7f9c ;; ok +tfrag coords: 0 0 ;; sps +tfrag coords: 0 0 + +;; the vert count stuff works out correctly, we do 8 verts then go to the kicking zone. +;; I don't remember what the fix is lol +;; note that we haven't filled the kicking zone, so we should probably reload then keep adding, or kick a smaller group. + +;;;;;;;;;;;;;;;;;;;; +;; verts on top of each other +;;;;;;;;;;;;;;;;;;;; + +;; bad killed pipelining? + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; flames applied everywhere +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; the itimes are: +;; this is for "sunrise, which is [1, 0, 0, 0, 0, 0, 0, 0]" +[2] t0: 40 40 40 40 00 00 00 00 +[2] t1: 00 00 00 00 00 00 00 00 +[2] t2: 00 00 00 00 00 00 00 00 +[2] a2: 00 00 00 00 00 00 00 00 + + +;; an example color: + N0 46 46 40 80 | 31 33 41 80 | 00 00 00 80 | 4a 4f 72 80 | + N1 3e 72 53 80 | 3a 21 38 80 | 3b 32 2e 80 | 3f 35 2c 80 | + + +;; Data Layout + +;; Code Layout + +;; 0, init-globals + b L11 | nop + nop | nop +;; 2, reset VF04 + b L10 | nop + xtop vi14 | nop +;; 4 + b L1 | nop + xtop vi14 | nop +;; 6 + b L2 | nop + xtop vi14 | nop +;; 8 + b L3 | nop + xtop vi14 | nop +;; 10 + b L4 | nop + xtop vi14 | nop +;; 12 + b L5 | nop + xtop vi14 | nop +;; 14 + b L6 | nop + xtop vi14 | nop +;; 16 + b L7 | nop + xtop vi14 | nop +;; 18 + b L8 | nop + xtop vi14 | nop +;; 20 + b L9 | nop + xtop vi14 | nop +;; 22 + b L10 | nop + xtop vi14 | nop +;; 24 + b L10 | nop + xtop vi14 | nop + +;; PRGOGRAM 4 +L1: + b L112 | nop + nop | nop + +;; PROGRAM 6 +L2: + b L127 | nop + nop | nop + + +L3: + bal vi15, L12 | nop + nop | nop + bal vi15, L26 | nop + nop | nop + bal vi15, L48 | nop + nop | nop + b L102 | nop + nop | nop +L4: + bal vi15, L12 | nop + nop | nop + bal vi15, L18 | nop + nop | nop + b L102 | nop + nop | nop +L5: + b L127 | nop + nop | nop +L6: + bal vi15, L12 | nop + nop | nop + bal vi15, L18 | nop + nop | nop + bal vi15, L25 | nop + nop | nop + bal vi15, L47 | nop + nop | nop + b L102 | nop + nop | nop +L7: + bal vi15, L13 | nop + nop | nop + bal vi15, L17 | nop + nop | nop + b L102 | nop + nop | nop +L8: + bal vi15, L13 | nop + nop | nop + bal vi15, L17 | nop + nop | nop + b L84 | nop + nop | nop +L9: + b L127 | nop + nop | nop +L10: + lq.xyzw vf04, 664(vi00) | nop :e + nop | nop +L11: + iaddiu vi14, vi00, 0x2a0 | nop + iaddiu vi01, vi00, 0x350 | nop + mfir.x vf03, vi14 | nop + mfir.y vf03, vi01 | nop + mfir.z vf03, vi14 | nop + mfir.w vf03, vi01 | nop :e + lq.xyzw vf04, 664(vi00) | nop +L12: + ilw.z vi05, 1(vi14) | nop + b L14 | nop + ilwr.x vi02, vi14 | nop +L13: + ilwr.z vi02, vi14 | nop + ilwr.x vi04, vi14 | nop + ilw.z vi05, 1(vi14) | nop + b L14 | nop + iadd vi02, vi02, vi04 | nop + ilwr.z vi02, vi14 | nop + ilw.x vi01, 1(vi14) | nop + ilwr.x vi04, vi14 | nop + ilw.z vi05, 1(vi14) | nop + iadd vi02, vi02, vi04 | nop + iadd vi02, vi02, vi01 | nop +L14: + iadd vi05, vi05, vi14 | nop + lqi.xyzw vf12, vi05 | nop + lqi.xyzw vf16, vi05 | nop + lq.xyzw vf01, 656(vi00) | nop + lq.xyzw vf02, 657(vi00) | nop + lq.xyzw vf05, 5(vi14) | itof0.xyzw vf12, vf12 + lq.xyzw vf08, 8(vi14) | itof0.xyzw vf16, vf16 + lqi.xyzw vf13, vi05 | nop + lqi.xyzw vf17, vi05 | nop + nop | nop + lq.xyzw vf06, 6(vi14) | mulaw.xyzw ACC, vf08, vf00 + lq.xyzw vf07, 7(vi14) | itof0.xyzw vf13, vf13 + nop | itof0.xyzw vf17, vf17 + lqi.xyzw vf14, vi05 | maddax.xyzw ACC, vf05, vf12 + lqi.xyzw vf18, vi05 | madday.xyzw ACC, vf06, vf12 + nop | maddz.xyzw vf12, vf07, vf12 + nop | mulaw.xyzw ACC, vf08, vf00 + nop | itof0.xyzw vf14, vf14 + iaddi vi06, vi05, -0x6 | itof0.xyzw vf18, vf18 +L15: + lqi.xyzw vf15, vi05 | maddax.xyzw ACC, vf05, vf13 + lqi.xyzw vf19, vi05 | madday.xyzw ACC, vf06, vf13 + iaddi vi02, vi02, -0x1 | maddz.xyzw vf13, vf07, vf13 + sqi.xyzw vf12, vi06 | mulaw.xyzw ACC, vf08, vf00 + ibeq vi00, vi02, L16 | itof0.xyzw vf15, vf15 + sqi.xyzw vf16, vi06 | itof0.xyzw vf19, vf19 + lqi.xyzw vf12, vi05 | maddax.xyzw ACC, vf05, vf14 + lqi.xyzw vf16, vi05 | madday.xyzw ACC, vf06, vf14 + iaddi vi02, vi02, -0x1 | maddz.xyzw vf14, vf07, vf14 + sqi.xyzw vf13, vi06 | mulaw.xyzw ACC, vf08, vf00 + ibeq vi00, vi02, L16 | itof0.xyzw vf12, vf12 + sqi.xyzw vf17, vi06 | itof0.xyzw vf16, vf16 + lqi.xyzw vf13, vi05 | maddax.xyzw ACC, vf05, vf15 + lqi.xyzw vf17, vi05 | madday.xyzw ACC, vf06, vf15 + iaddi vi02, vi02, -0x1 | maddz.xyzw vf15, vf07, vf15 + sqi.xyzw vf14, vi06 | mulaw.xyzw ACC, vf08, vf00 + ibeq vi00, vi02, L16 | itof0.xyzw vf13, vf13 + sqi.xyzw vf18, vi06 | itof0.xyzw vf17, vf17 + lqi.xyzw vf14, vi05 | maddax.xyzw ACC, vf05, vf12 + lqi.xyzw vf18, vi05 | madday.xyzw ACC, vf06, vf12 + iaddi vi02, vi02, -0x1 | maddz.xyzw vf12, vf07, vf12 + sqi.xyzw vf15, vi06 | mulaw.xyzw ACC, vf08, vf00 + ibne vi00, vi02, L15 | itof0.xyzw vf14, vf14 + sqi.xyzw vf19, vi06 | itof0.xyzw vf18, vf18 +L16: + jr vi15 | nop + nop | nop +L17: + ilw.x vi02, 1(vi14) | nop + ilw.w vi03, 2(vi14) | nop + ilw.x vi04, 4(vi14) | nop + lq.xyzw vf01, 656(vi00) | nop + ibeq vi00, vi02, L21 | nop + lq.xyzw vf02, 657(vi00) | nop + lq.xy vf18, 667(vi00) | nop + b L19 | nop + lq.xy vf19, 669(vi00) | nop +L18: + ilwr.z vi02, vi14 | nop + ilw.y vi03, 2(vi14) | nop + ilw.z vi04, 3(vi14) | nop + lq.xyzw vf01, 656(vi00) | nop + ibeq vi00, vi02, L21 | nop + lq.xyzw vf02, 657(vi00) | nop + lq.xy vf18, 666(vi00) | nop + lq.xy vf19, 668(vi00) | nop +L19: + lq.xyzw vf05, 5(vi14) | nop + lq.xyzw vf06, 6(vi14) | nop + lq.xyzw vf07, 7(vi14) | nop + ibne vi00, vi14, L22 | nop + lq.xyzw vf08, 8(vi14) | nop + ilwr.w vi05, vi03 | nop + iaddi vi03, vi03, 0x1 | nop + ilwr.x vi07, vi04 | nop + nop | nop + lq.xyzw vf27, 0(vi05) | nop + nop | nop + ilwr.w vi08, vi07 | nop + nop | nop + nop | itof0.xyzw vf12, vf27 + ilw.z vi09, -1(vi03) | nop + lq.xyzw vf20, 0(vi08) | nop + nop | mulaw.xyzw ACC, vf08, vf00 + lq.xyzw vf14, 1(vi05) | maddax.xyzw ACC, vf05, vf12 + lq.xyzw vf22, 0(vi09) | madday.xyzw ACC, vf06, vf12 + nop | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf23, 1(vi09) | nop + ilwr.w vi06, vi03 | nop + iaddi vi03, vi03, 0x1 | mulaw.xy ACC, vf19, vf00 + ilwr.y vi07, vi04 | maddw.xy vf16, vf18, vf12 + lq.xyzw vf21, 1(vi08) | nop + lq.xyzw vf27, 0(vi06) | nop + nop | add.xyzw vf25, vf20, vf22 + ilwr.w vi08, vi07 | mini.xy vf16, vf16, vf02 + nop | add.xyzw vf26, vf21, vf23 + nop | itof0.xyzw vf13, vf27 +L20: + ilw.z vi09, -1(vi03) | itof0.xyzw vf14, vf14 + lq.xyzw vf20, 0(vi08) | maxx.xy vf16, vf16, vf00 + ior vi10, vi05, vi00 | mulaw.xyzw ACC, vf08, vf00 + lq.xyzw vf15, 1(vi06) | maddax.xyzw ACC, vf05, vf13 + lq.xyzw vf22, 0(vi09) | madday.xyzw ACC, vf06, vf13 + nop | maddz.xyzw vf13, vf07, vf13 + lq.xyzw vf23, 1(vi09) | mulax.xyzw ACC, vf25, vf16 + ilwr.w vi05, vi03 | maddy.xyzw vf12, vf12, vf16 + iaddi vi02, vi02, -0x1 | mulaw.xy ACC, vf19, vf00 + ilwr.z vi07, vi04 | maddw.xy vf17, vf18, vf13 + lq.xyzw vf21, 1(vi08) | mulax.xyzw ACC, vf26, vf16 + lq.xyzw vf27, 0(vi05) | maddy.xyzw vf14, vf14, vf16 + sq.xyzw vf12, 0(vi10) | add.xyzw vf25, vf20, vf22 + ilwr.w vi08, vi07 | mini.xy vf17, vf17, vf02 + ibeq vi00, vi02, L21 | add.xyzw vf26, vf21, vf23 + sq.xyzw vf14, 1(vi10) | itof0.xyzw vf12, vf27 + ilwr.z vi09, vi03 | itof0.xyzw vf15, vf15 + lq.xyzw vf20, 0(vi08) | maxx.xy vf17, vf17, vf00 + ior vi10, vi06, vi00 | mulaw.xyzw ACC, vf08, vf00 + lq.xyzw vf14, 1(vi05) | maddax.xyzw ACC, vf05, vf12 + lq.xyzw vf22, 0(vi09) | madday.xyzw ACC, vf06, vf12 + iaddi vi02, vi02, -0x1 | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf23, 1(vi09) | mulax.xyzw ACC, vf25, vf17 + ilw.w vi06, 1(vi03) | maddy.xyzw vf13, vf13, vf17 + iaddi vi03, vi03, 0x2 | mulaw.xy ACC, vf19, vf00 + ilwr.w vi07, vi04 | maddw.xy vf16, vf18, vf12 + lq.xyzw vf21, 1(vi08) | mulax.xyzw ACC, vf26, vf17 + lq.xyzw vf27, 0(vi06) | maddy.xyzw vf15, vf15, vf17 + sq.xyzw vf13, 0(vi10) | add.xyzw vf25, vf20, vf22 + ilwr.w vi08, vi07 | mini.xy vf16, vf16, vf02 + ibeq vi00, vi02, L21 | add.xyzw vf26, vf21, vf23 + sq.xyzw vf15, 1(vi10) | itof0.xyzw vf13, vf27 + ilw.z vi09, -1(vi03) | itof0.xyzw vf14, vf14 + lq.xyzw vf20, 0(vi08) | maxx.xy vf16, vf16, vf00 + ior vi10, vi05, vi00 | mulaw.xyzw ACC, vf08, vf00 + lq.xyzw vf15, 1(vi06) | maddax.xyzw ACC, vf05, vf13 + lq.xyzw vf22, 0(vi09) | madday.xyzw ACC, vf06, vf13 + iaddi vi04, vi04, 0x1 | maddz.xyzw vf13, vf07, vf13 + lq.xyzw vf23, 1(vi09) | mulax.xyzw ACC, vf25, vf16 + ilwr.w vi05, vi03 | maddy.xyzw vf12, vf12, vf16 + iaddi vi02, vi02, -0x1 | mulaw.xy ACC, vf19, vf00 + ilwr.x vi07, vi04 | maddw.xy vf17, vf18, vf13 + lq.xyzw vf21, 1(vi08) | mulax.xyzw ACC, vf26, vf16 + lq.xyzw vf27, 0(vi05) | maddy.xyzw vf14, vf14, vf16 + sq.xyzw vf12, 0(vi10) | add.xyzw vf25, vf20, vf22 + ilwr.w vi08, vi07 | mini.xy vf17, vf17, vf02 + ibeq vi00, vi02, L21 | add.xyzw vf26, vf21, vf23 + sq.xyzw vf14, 1(vi10) | itof0.xyzw vf12, vf27 + ilwr.z vi09, vi03 | itof0.xyzw vf15, vf15 + lq.xyzw vf20, 0(vi08) | maxx.xy vf17, vf17, vf00 + ior vi10, vi06, vi00 | mulaw.xyzw ACC, vf08, vf00 + lq.xyzw vf14, 1(vi05) | maddax.xyzw ACC, vf05, vf12 + lq.xyzw vf22, 0(vi09) | madday.xyzw ACC, vf06, vf12 + iaddi vi02, vi02, -0x1 | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf23, 1(vi09) | mulax.xyzw ACC, vf25, vf17 + ilw.w vi06, 1(vi03) | maddy.xyzw vf13, vf13, vf17 + iaddi vi03, vi03, 0x2 | mulaw.xy ACC, vf19, vf00 + ilwr.y vi07, vi04 | maddw.xy vf16, vf18, vf12 + lq.xyzw vf21, 1(vi08) | mulax.xyzw ACC, vf26, vf17 + lq.xyzw vf27, 0(vi06) | maddy.xyzw vf15, vf15, vf17 + sq.xyzw vf13, 0(vi10) | add.xyzw vf25, vf20, vf22 + ilwr.w vi08, vi07 | mini.xy vf16, vf16, vf02 + ibne vi00, vi02, L20 | add.xyzw vf26, vf21, vf23 + sq.xyzw vf15, 1(vi10) | itof0.xyzw vf13, vf27 +L21: + jr vi15 | nop + nop | nop +L22: + ilw.w vi05, 328(vi03) | nop + iaddi vi03, vi03, 0x1 | nop + ilw.x vi07, 328(vi04) | nop + nop | nop + lq.xyzw vf27, 328(vi05) | nop + nop | nop + ilw.w vi08, 328(vi07) | nop + nop | nop + nop | itof0.xyzw vf12, vf27 + ilw.z vi09, 327(vi03) | nop + lq.xyzw vf20, 328(vi08) | nop + nop | mulaw.xyzw ACC, vf08, vf00 + lq.xyzw vf14, 329(vi05) | maddax.xyzw ACC, vf05, vf12 + lq.xyzw vf22, 328(vi09) | madday.xyzw ACC, vf06, vf12 + nop | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf23, 329(vi09) | nop + ilw.w vi06, 328(vi03) | nop + iaddi vi03, vi03, 0x1 | mulaw.xy ACC, vf19, vf00 + ilw.y vi07, 328(vi04) | maddw.xy vf16, vf18, vf12 + lq.xyzw vf21, 329(vi08) | nop + lq.xyzw vf27, 328(vi06) | nop + nop | add.xyzw vf25, vf20, vf22 + ilw.w vi08, 328(vi07) | mini.xy vf16, vf16, vf02 + nop | add.xyzw vf26, vf21, vf23 + nop | itof0.xyzw vf13, vf27 +L23: + ilw.z vi09, 327(vi03) | itof0.xyzw vf14, vf14 + lq.xyzw vf20, 328(vi08) | maxx.xy vf16, vf16, vf00 + ior vi10, vi05, vi00 | mulaw.xyzw ACC, vf08, vf00 + lq.xyzw vf15, 329(vi06) | maddax.xyzw ACC, vf05, vf13 + lq.xyzw vf22, 328(vi09) | madday.xyzw ACC, vf06, vf13 + nop | maddz.xyzw vf13, vf07, vf13 + lq.xyzw vf23, 329(vi09) | mulax.xyzw ACC, vf25, vf16 + ilw.w vi05, 328(vi03) | maddy.xyzw vf12, vf12, vf16 + iaddi vi02, vi02, -0x1 | mulaw.xy ACC, vf19, vf00 + ilw.z vi07, 328(vi04) | maddw.xy vf17, vf18, vf13 + lq.xyzw vf21, 329(vi08) | mulax.xyzw ACC, vf26, vf16 + lq.xyzw vf27, 328(vi05) | maddy.xyzw vf14, vf14, vf16 + sq.xyzw vf12, 328(vi10) | add.xyzw vf25, vf20, vf22 + ilw.w vi08, 328(vi07) | mini.xy vf17, vf17, vf02 + ibeq vi00, vi02, L24 | add.xyzw vf26, vf21, vf23 + sq.xyzw vf14, 329(vi10) | itof0.xyzw vf12, vf27 + ilw.z vi09, 328(vi03) | itof0.xyzw vf15, vf15 + lq.xyzw vf20, 328(vi08) | maxx.xy vf17, vf17, vf00 + ior vi10, vi06, vi00 | mulaw.xyzw ACC, vf08, vf00 + lq.xyzw vf14, 329(vi05) | maddax.xyzw ACC, vf05, vf12 + lq.xyzw vf22, 328(vi09) | madday.xyzw ACC, vf06, vf12 + iaddi vi02, vi02, -0x1 | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf23, 329(vi09) | mulax.xyzw ACC, vf25, vf17 + ilw.w vi06, 329(vi03) | maddy.xyzw vf13, vf13, vf17 + iaddi vi03, vi03, 0x2 | mulaw.xy ACC, vf19, vf00 + ilw.w vi07, 328(vi04) | maddw.xy vf16, vf18, vf12 + lq.xyzw vf21, 329(vi08) | mulax.xyzw ACC, vf26, vf17 + lq.xyzw vf27, 328(vi06) | maddy.xyzw vf15, vf15, vf17 + sq.xyzw vf13, 328(vi10) | add.xyzw vf25, vf20, vf22 + ilw.w vi08, 328(vi07) | mini.xy vf16, vf16, vf02 + ibeq vi00, vi02, L24 | add.xyzw vf26, vf21, vf23 + sq.xyzw vf15, 329(vi10) | itof0.xyzw vf13, vf27 + ilw.z vi09, 327(vi03) | itof0.xyzw vf14, vf14 + lq.xyzw vf20, 328(vi08) | maxx.xy vf16, vf16, vf00 + ior vi10, vi05, vi00 | mulaw.xyzw ACC, vf08, vf00 + lq.xyzw vf15, 329(vi06) | maddax.xyzw ACC, vf05, vf13 + lq.xyzw vf22, 328(vi09) | madday.xyzw ACC, vf06, vf13 + iaddi vi04, vi04, 0x1 | maddz.xyzw vf13, vf07, vf13 + lq.xyzw vf23, 329(vi09) | mulax.xyzw ACC, vf25, vf16 + ilw.w vi05, 328(vi03) | maddy.xyzw vf12, vf12, vf16 + iaddi vi02, vi02, -0x1 | mulaw.xy ACC, vf19, vf00 + ilw.x vi07, 328(vi04) | maddw.xy vf17, vf18, vf13 + lq.xyzw vf21, 329(vi08) | mulax.xyzw ACC, vf26, vf16 + lq.xyzw vf27, 328(vi05) | maddy.xyzw vf14, vf14, vf16 + sq.xyzw vf12, 328(vi10) | add.xyzw vf25, vf20, vf22 + ilw.w vi08, 328(vi07) | mini.xy vf17, vf17, vf02 + ibeq vi00, vi02, L24 | add.xyzw vf26, vf21, vf23 + sq.xyzw vf14, 329(vi10) | itof0.xyzw vf12, vf27 + ilw.z vi09, 328(vi03) | itof0.xyzw vf15, vf15 + lq.xyzw vf20, 328(vi08) | maxx.xy vf17, vf17, vf00 + ior vi10, vi06, vi00 | mulaw.xyzw ACC, vf08, vf00 + lq.xyzw vf14, 329(vi05) | maddax.xyzw ACC, vf05, vf12 + lq.xyzw vf22, 328(vi09) | madday.xyzw ACC, vf06, vf12 + iaddi vi02, vi02, -0x1 | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf23, 329(vi09) | mulax.xyzw ACC, vf25, vf17 + ilw.w vi06, 329(vi03) | maddy.xyzw vf13, vf13, vf17 + iaddi vi03, vi03, 0x2 | mulaw.xy ACC, vf19, vf00 + ilw.y vi07, 328(vi04) | maddw.xy vf16, vf18, vf12 + lq.xyzw vf21, 329(vi08) | mulax.xyzw ACC, vf26, vf17 + lq.xyzw vf27, 328(vi06) | maddy.xyzw vf15, vf15, vf17 + sq.xyzw vf13, 328(vi10) | add.xyzw vf25, vf20, vf22 + ilw.w vi08, 328(vi07) | mini.xy vf16, vf16, vf02 + ibne vi00, vi02, L23 | add.xyzw vf26, vf21, vf23 + sq.xyzw vf15, 329(vi10) | itof0.xyzw vf13, vf27 +L24: + jr vi15 | nop + nop | nop +L25: + ilw.x vi02, 1(vi14) | nop + ilw.w vi03, 2(vi14) | nop + ilw.x vi04, 4(vi14) | nop + lq.xyzw vf01, 656(vi00) | nop + ibeq vi00, vi02, L32 | nop + lq.xyzw vf02, 657(vi00) | nop + lq.xy vf18, 667(vi00) | nop + lq.xy vf19, 669(vi00) | nop + b L27 | nop + lq.w vf26, 667(vi00) | nop +L26: + ilwr.z vi02, vi14 | nop + ilw.y vi03, 2(vi14) | nop + ilw.z vi04, 3(vi14) | nop + lq.xyzw vf01, 656(vi00) | nop + ibeq vi00, vi02, L32 | nop + lq.xyzw vf02, 657(vi00) | nop + lq.xy vf18, 666(vi00) | nop + lq.xy vf19, 668(vi00) | nop + lq.w vf26, 666(vi00) | nop +L27: + lq.xyzw vf05, 5(vi14) | nop + lq.xyzw vf06, 6(vi14) | nop + lq.xyzw vf07, 7(vi14) | nop + ibne vi00, vi14, L37 | nop + lq.xyzw vf08, 8(vi14) | nop + ilwr.w vi05, vi03 | nop + nop | nop + nop | nop + nop | nop + lq.xyzw vf12, 0(vi05) | nop + ilwr.z vi10, vi03 | nop + ilwr.x vi07, vi04 | nop + nop | nop + nop | itof0.xyzw vf12, vf12 + nop | nop + nop | mulaw.xyzw ACC, vf08, vf00 + ilwr.w vi09, vi07 | nop + lq.xyzw vf22, 0(vi10) | maddax.xyzw ACC, vf05, vf12 + nop | madday.xyzw ACC, vf06, vf12 + lq.xyzw vf23, 1(vi10) | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf20, 0(vi09) | nop + lq.xyzw vf21, 1(vi09) | nop + lq.xyzw vf14, 1(vi05) | nop + ilw.w vi06, 1(vi03) | nop + nop | mulaw.xy ACC, vf19, vf00 + nop | maddw.xy vf16, vf18, vf12 + nop | add.xyzw vf24, vf20, vf22 + nop | add.xyzw vf25, vf21, vf23 +L28: + lq.xyzw vf13, 0(vi06) | nop + ilw.z vi10, 1(vi03) | subw.w vf00, vf20, vf26 + ilwr.y vi08, vi04 | subw.w vf00, vf22, vf26 + ior vi11, vi05, vi00 | mini.xy vf16, vf16, vf02 + iaddi vi02, vi02, -0x1 | itof0.xyzw vf13, vf13 + fsand vi01, 0x2 | itof0.xyzw vf14, vf14 + fsand vi12, 0x2 | mulaw.xyzw ACC, vf08, vf00 + ilwr.w vi09, vi08 | maxx.xy vf16, vf16, vf00 + lq.xyzw vf22, 0(vi10) | maddax.xyzw ACC, vf05, vf13 + iand vi01, vi01, vi12 | madday.xyzw ACC, vf06, vf13 + lq.xyzw vf23, 1(vi10) | maddz.xyzw vf13, vf07, vf13 + lq.xyzw vf20, 0(vi09) | mulax.xyzw ACC, vf24, vf16 + lq.xyzw vf21, 1(vi09) | maddy.xyzw vf12, vf12, vf16 + ibne vi00, vi01, L33 | mulax.xyzw ACC, vf25, vf16 + lq.xyzw vf15, 1(vi06) | maddy.xyzw vf14, vf14, vf16 + ilw.w vi05, 2(vi03) | nop + nop | mulaw.xy ACC, vf19, vf00 + sq.xyzw vf12, 0(vi11) | maddw.xy vf17, vf18, vf13 + ibeq vi00, vi02, L32 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf14, 1(vi11) | add.xyzw vf25, vf21, vf23 +L29: + lq.xyzw vf12, 0(vi05) | nop + ilw.z vi10, 2(vi03) | subw.w vf00, vf20, vf26 + ilwr.z vi07, vi04 | subw.w vf00, vf22, vf26 + ior vi11, vi06, vi00 | mini.xy vf17, vf17, vf02 + iaddi vi02, vi02, -0x1 | itof0.xyzw vf12, vf12 + fsand vi01, 0x2 | itof0.xyzw vf15, vf15 + fsand vi12, 0x2 | mulaw.xyzw ACC, vf08, vf00 + ilwr.w vi09, vi07 | maxx.xy vf17, vf17, vf00 + lq.xyzw vf22, 0(vi10) | maddax.xyzw ACC, vf05, vf12 + iand vi01, vi01, vi12 | madday.xyzw ACC, vf06, vf12 + lq.xyzw vf23, 1(vi10) | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf20, 0(vi09) | mulax.xyzw ACC, vf24, vf17 + lq.xyzw vf21, 1(vi09) | maddy.xyzw vf13, vf13, vf17 + ibne vi00, vi01, L34 | mulax.xyzw ACC, vf25, vf17 + lq.xyzw vf14, 1(vi05) | maddy.xyzw vf15, vf15, vf17 + ilw.w vi06, 3(vi03) | nop + nop | mulaw.xy ACC, vf19, vf00 + sq.xyzw vf13, 0(vi11) | maddw.xy vf16, vf18, vf12 + ibeq vi00, vi02, L32 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf15, 1(vi11) | add.xyzw vf25, vf21, vf23 +L30: + lq.xyzw vf13, 0(vi06) | nop + ilw.z vi10, 3(vi03) | subw.w vf00, vf20, vf26 + ilwr.w vi08, vi04 | subw.w vf00, vf22, vf26 + ior vi11, vi05, vi00 | mini.xy vf16, vf16, vf02 + iaddi vi02, vi02, -0x1 | itof0.xyzw vf13, vf13 + fsand vi01, 0x2 | itof0.xyzw vf14, vf14 + fsand vi12, 0x2 | mulaw.xyzw ACC, vf08, vf00 + ilwr.w vi09, vi08 | maxx.xy vf16, vf16, vf00 + lq.xyzw vf22, 0(vi10) | maddax.xyzw ACC, vf05, vf13 + iand vi01, vi01, vi12 | madday.xyzw ACC, vf06, vf13 + lq.xyzw vf23, 1(vi10) | maddz.xyzw vf13, vf07, vf13 + lq.xyzw vf20, 0(vi09) | mulax.xyzw ACC, vf24, vf16 + lq.xyzw vf21, 1(vi09) | maddy.xyzw vf12, vf12, vf16 + ibne vi00, vi01, L35 | mulax.xyzw ACC, vf25, vf16 + lq.xyzw vf15, 1(vi06) | maddy.xyzw vf14, vf14, vf16 + ilw.w vi05, 4(vi03) | nop + iaddi vi04, vi04, 0x1 | mulaw.xy ACC, vf19, vf00 + sq.xyzw vf12, 0(vi11) | maddw.xy vf17, vf18, vf13 + ibeq vi00, vi02, L32 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf14, 1(vi11) | add.xyzw vf25, vf21, vf23 +L31: + lq.xyzw vf12, 0(vi05) | nop + ilw.z vi10, 4(vi03) | subw.w vf00, vf20, vf26 + ilwr.x vi07, vi04 | subw.w vf00, vf22, vf26 + ior vi11, vi06, vi00 | mini.xy vf17, vf17, vf02 + iaddi vi02, vi02, -0x1 | itof0.xyzw vf12, vf12 + fsand vi01, 0x2 | itof0.xyzw vf15, vf15 + fsand vi12, 0x2 | mulaw.xyzw ACC, vf08, vf00 + ilwr.w vi09, vi07 | maxx.xy vf17, vf17, vf00 + lq.xyzw vf22, 0(vi10) | maddax.xyzw ACC, vf05, vf12 + iand vi01, vi01, vi12 | madday.xyzw ACC, vf06, vf12 + lq.xyzw vf23, 1(vi10) | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf20, 0(vi09) | mulax.xyzw ACC, vf24, vf17 + lq.xyzw vf21, 1(vi09) | maddy.xyzw vf13, vf13, vf17 + ibne vi00, vi01, L36 | mulax.xyzw ACC, vf25, vf17 + lq.xyzw vf14, 1(vi05) | maddy.xyzw vf15, vf15, vf17 + ilw.w vi06, 5(vi03) | nop + iaddi vi03, vi03, 0x4 | mulaw.xy ACC, vf19, vf00 + sq.xyzw vf13, 0(vi11) | maddw.xy vf16, vf18, vf12 + ibne vi00, vi02, L28 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf15, 1(vi11) | add.xyzw vf25, vf21, vf23 +L32: + jr vi15 | nop + nop | nop +L33: + lq.xyzw vf27, 0(vi07) | nop + ilw.w vi05, 2(vi03) | mulaw.xy ACC, vf19, vf00 + nop | maddw.xy vf17, vf18, vf13 + ibne vi00, vi02, L29 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf27, 0(vi03) | add.xyzw vf25, vf21, vf23 + jr vi15 | nop + nop | nop +L34: + lq.xyzw vf27, 0(vi08) | nop + ilw.w vi06, 3(vi03) | mulaw.xy ACC, vf19, vf00 + nop | maddw.xy vf16, vf18, vf12 + ibne vi00, vi02, L30 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf27, 1(vi03) | add.xyzw vf25, vf21, vf23 + jr vi15 | nop + nop | nop +L35: + lq.xyzw vf27, 0(vi07) | nop + ilw.w vi05, 4(vi03) | mulaw.xy ACC, vf19, vf00 + iaddi vi04, vi04, 0x1 | maddw.xy vf17, vf18, vf13 + ibne vi00, vi02, L31 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf27, 2(vi03) | add.xyzw vf25, vf21, vf23 + jr vi15 | nop + nop | nop +L36: + lq.xyzw vf27, 0(vi08) | nop + ilw.w vi06, 5(vi03) | mulaw.xy ACC, vf19, vf00 + iaddi vi03, vi03, 0x4 | maddw.xy vf16, vf18, vf12 + ibne vi00, vi02, L28 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf27, -1(vi03) | add.xyzw vf25, vf21, vf23 + jr vi15 | nop + nop | nop +L37: + ilw.w vi05, 328(vi03) | nop + nop | nop + nop | nop + nop | nop + lq.xyzw vf12, 328(vi05) | nop + ilw.z vi10, 328(vi03) | nop + ilw.x vi07, 328(vi04) | nop + nop | nop + nop | itof0.xyzw vf12, vf12 + nop | nop + nop | mulaw.xyzw ACC, vf08, vf00 + ilw.w vi09, 328(vi07) | nop + lq.xyzw vf22, 328(vi10) | maddax.xyzw ACC, vf05, vf12 + nop | madday.xyzw ACC, vf06, vf12 + lq.xyzw vf23, 329(vi10) | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf20, 328(vi09) | nop + lq.xyzw vf21, 329(vi09) | nop + lq.xyzw vf14, 329(vi05) | nop + ilw.w vi06, 329(vi03) | nop + nop | mulaw.xy ACC, vf19, vf00 + nop | maddw.xy vf16, vf18, vf12 + nop | add.xyzw vf24, vf20, vf22 + nop | add.xyzw vf25, vf21, vf23 +L38: + lq.xyzw vf13, 328(vi06) | nop + ilw.z vi10, 329(vi03) | subw.w vf00, vf20, vf26 + ilw.y vi08, 328(vi04) | subw.w vf00, vf22, vf26 + ior vi11, vi05, vi00 | mini.xy vf16, vf16, vf02 + iaddi vi02, vi02, -0x1 | itof0.xyzw vf13, vf13 + fsand vi01, 0x2 | itof0.xyzw vf14, vf14 + fsand vi12, 0x2 | mulaw.xyzw ACC, vf08, vf00 + ilw.w vi09, 328(vi08) | maxx.xy vf16, vf16, vf00 + lq.xyzw vf22, 328(vi10) | maddax.xyzw ACC, vf05, vf13 + iand vi01, vi01, vi12 | madday.xyzw ACC, vf06, vf13 + lq.xyzw vf23, 329(vi10) | maddz.xyzw vf13, vf07, vf13 + lq.xyzw vf20, 328(vi09) | mulax.xyzw ACC, vf24, vf16 + lq.xyzw vf21, 329(vi09) | maddy.xyzw vf12, vf12, vf16 + ibne vi00, vi01, L43 | mulax.xyzw ACC, vf25, vf16 + lq.xyzw vf15, 329(vi06) | maddy.xyzw vf14, vf14, vf16 + ilw.w vi05, 330(vi03) | nop + nop | mulaw.xy ACC, vf19, vf00 + sq.xyzw vf12, 328(vi11) | maddw.xy vf17, vf18, vf13 + ibeq vi00, vi02, L42 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf14, 329(vi11) | add.xyzw vf25, vf21, vf23 +L39: + lq.xyzw vf12, 328(vi05) | nop + ilw.z vi10, 330(vi03) | subw.w vf00, vf20, vf26 + ilw.z vi07, 328(vi04) | subw.w vf00, vf22, vf26 + ior vi11, vi06, vi00 | mini.xy vf17, vf17, vf02 + iaddi vi02, vi02, -0x1 | itof0.xyzw vf12, vf12 + fsand vi01, 0x2 | itof0.xyzw vf15, vf15 + fsand vi12, 0x2 | mulaw.xyzw ACC, vf08, vf00 + ilw.w vi09, 328(vi07) | maxx.xy vf17, vf17, vf00 + lq.xyzw vf22, 328(vi10) | maddax.xyzw ACC, vf05, vf12 + iand vi01, vi01, vi12 | madday.xyzw ACC, vf06, vf12 + lq.xyzw vf23, 329(vi10) | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf20, 328(vi09) | mulax.xyzw ACC, vf24, vf17 + lq.xyzw vf21, 329(vi09) | maddy.xyzw vf13, vf13, vf17 + ibne vi00, vi01, L44 | mulax.xyzw ACC, vf25, vf17 + lq.xyzw vf14, 329(vi05) | maddy.xyzw vf15, vf15, vf17 + ilw.w vi06, 331(vi03) | nop + nop | mulaw.xy ACC, vf19, vf00 + sq.xyzw vf13, 328(vi11) | maddw.xy vf16, vf18, vf12 + ibeq vi00, vi02, L42 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf15, 329(vi11) | add.xyzw vf25, vf21, vf23 +L40: + lq.xyzw vf13, 328(vi06) | nop + ilw.z vi10, 331(vi03) | subw.w vf00, vf20, vf26 + ilw.w vi08, 328(vi04) | subw.w vf00, vf22, vf26 + ior vi11, vi05, vi00 | mini.xy vf16, vf16, vf02 + iaddi vi02, vi02, -0x1 | itof0.xyzw vf13, vf13 + fsand vi01, 0x2 | itof0.xyzw vf14, vf14 + fsand vi12, 0x2 | mulaw.xyzw ACC, vf08, vf00 + ilw.w vi09, 328(vi08) | maxx.xy vf16, vf16, vf00 + lq.xyzw vf22, 328(vi10) | maddax.xyzw ACC, vf05, vf13 + iand vi01, vi01, vi12 | madday.xyzw ACC, vf06, vf13 + lq.xyzw vf23, 329(vi10) | maddz.xyzw vf13, vf07, vf13 + lq.xyzw vf20, 328(vi09) | mulax.xyzw ACC, vf24, vf16 + lq.xyzw vf21, 329(vi09) | maddy.xyzw vf12, vf12, vf16 + ibne vi00, vi01, L45 | mulax.xyzw ACC, vf25, vf16 + lq.xyzw vf15, 329(vi06) | maddy.xyzw vf14, vf14, vf16 + ilw.w vi05, 332(vi03) | nop + iaddi vi04, vi04, 0x1 | mulaw.xy ACC, vf19, vf00 + sq.xyzw vf12, 328(vi11) | maddw.xy vf17, vf18, vf13 + ibeq vi00, vi02, L42 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf14, 329(vi11) | add.xyzw vf25, vf21, vf23 +L41: + lq.xyzw vf12, 328(vi05) | nop + ilw.z vi10, 332(vi03) | subw.w vf00, vf20, vf26 + ilw.x vi07, 328(vi04) | subw.w vf00, vf22, vf26 + ior vi11, vi06, vi00 | mini.xy vf17, vf17, vf02 + iaddi vi02, vi02, -0x1 | itof0.xyzw vf12, vf12 + fsand vi01, 0x2 | itof0.xyzw vf15, vf15 + fsand vi12, 0x2 | mulaw.xyzw ACC, vf08, vf00 + ilw.w vi09, 328(vi07) | maxx.xy vf17, vf17, vf00 + lq.xyzw vf22, 328(vi10) | maddax.xyzw ACC, vf05, vf12 + iand vi01, vi01, vi12 | madday.xyzw ACC, vf06, vf12 + lq.xyzw vf23, 329(vi10) | maddz.xyzw vf12, vf07, vf12 + lq.xyzw vf20, 328(vi09) | mulax.xyzw ACC, vf24, vf17 + lq.xyzw vf21, 329(vi09) | maddy.xyzw vf13, vf13, vf17 + ibne vi00, vi01, L46 | mulax.xyzw ACC, vf25, vf17 + lq.xyzw vf14, 329(vi05) | maddy.xyzw vf15, vf15, vf17 + ilw.w vi06, 333(vi03) | nop + iaddi vi03, vi03, 0x4 | mulaw.xy ACC, vf19, vf00 + sq.xyzw vf13, 328(vi11) | maddw.xy vf16, vf18, vf12 + ibne vi00, vi02, L38 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf15, 329(vi11) | add.xyzw vf25, vf21, vf23 +L42: + jr vi15 | nop + nop | nop +L43: + lq.xyzw vf27, 328(vi07) | nop + ilw.w vi05, 330(vi03) | mulaw.xy ACC, vf19, vf00 + nop | maddw.xy vf17, vf18, vf13 + ibne vi00, vi02, L39 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf27, 328(vi03) | add.xyzw vf25, vf21, vf23 + jr vi15 | nop + nop | nop +L44: + lq.xyzw vf27, 328(vi08) | nop + ilw.w vi06, 331(vi03) | mulaw.xy ACC, vf19, vf00 + nop | maddw.xy vf16, vf18, vf12 + ibne vi00, vi02, L40 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf27, 329(vi03) | add.xyzw vf25, vf21, vf23 + jr vi15 | nop + nop | nop +L45: + lq.xyzw vf27, 328(vi07) | nop + ilw.w vi05, 332(vi03) | mulaw.xy ACC, vf19, vf00 + iaddi vi04, vi04, 0x1 | maddw.xy vf17, vf18, vf13 + ibne vi00, vi02, L41 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf27, 330(vi03) | add.xyzw vf25, vf21, vf23 + jr vi15 | nop + nop | nop +L46: + lq.xyzw vf27, 328(vi08) | nop + ilw.w vi06, 333(vi03) | mulaw.xy ACC, vf19, vf00 + iaddi vi03, vi03, 0x4 | maddw.xy vf16, vf18, vf12 + ibne vi00, vi02, L38 | add.xyzw vf24, vf20, vf22 + sq.xyzw vf27, 327(vi03) | add.xyzw vf25, vf21, vf23 + jr vi15 | nop + nop | nop +L47: + ilw.y vi11, 1(vi14) | nop + ilw.x vi02, 3(vi14) | nop + ilw.y vi03, 4(vi14) | nop + b L49 | nop + lq.w vf10, 667(vi00) | nop +L48: + ilwr.w vi11, vi14 | nop + ilw.z vi02, 2(vi14) | nop + ilw.w vi03, 3(vi14) | nop + lq.w vf10, 666(vi00) | nop +L49: + ibeq vi00, vi11, L59 | nop + iadd vi02, vi02, vi14 | nop + iadd vi10, vi02, vi11 | nop + ibne vi00, vi14, L64 | nop + iaddi vi10, vi10, 0x3 | nop + lqi.xyzw vf06, vi02 | nop + ilwr.x vi04, vi03 | nop + nop | nop + nop | nop + mtir vi09, vf06.z | nop + lqi.xyzw vf06, vi02 | nop + ilwr.w vi08, vi04 | nop + ilwr.y vi05, vi03 | nop + lq.xyzw vf08, 0(vi09) | nop + nop | nop + nop | nop + lq.xyzw vf07, 0(vi08) | nop + nop | nop + mtir vi09, vf06.z | nop +L50: + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L51: + ilwr.w vi08, vi05 | nop + ilwr.z vi06, vi03 | nop + lq.xyzw vf08, 0(vi09) | nop + fsand vi01, 0x2 | subw.w vf00, vf07, vf10 + ibne vi00, vi01, L60 | nop + lq.xyzw vf07, 0(vi08) | nop +L52: + ibeq vi10, vi02, L59 | nop + mtir vi09, vf06.z | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L53: + ilwr.w vi08, vi06 | nop + ilwr.w vi07, vi03 | nop + lq.xyzw vf08, 0(vi09) | nop + fsand vi01, 0x2 | subw.w vf00, vf07, vf10 + ibne vi00, vi01, L61 | nop + lq.xyzw vf07, 0(vi08) | nop +L54: + ibeq vi10, vi02, L59 | nop + mtir vi09, vf06.z | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L55: + ilwr.w vi08, vi07 | nop + ilw.x vi04, 1(vi03) | nop + lq.xyzw vf08, 0(vi09) | nop + fsand vi01, 0x2 | subw.w vf00, vf07, vf10 + ibne vi00, vi01, L62 | nop + lq.xyzw vf07, 0(vi08) | nop +L56: + ibeq vi10, vi02, L59 | nop + mtir vi09, vf06.z | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L57: + ilwr.w vi08, vi04 | nop + ilw.y vi05, 1(vi03) | nop + lq.xyzw vf08, 0(vi09) | nop + fsand vi01, 0x2 | subw.w vf00, vf07, vf10 + ibne vi00, vi01, L63 | nop + lq.xyzw vf07, 0(vi08) | nop +L58: + iaddi vi03, vi03, 0x1 | nop + ibne vi10, vi02, L50 | nop + mtir vi09, vf06.z | nop +L59: + jr vi15 | nop + nop | nop +L60: + lq.xyzw vf09, 0(vi04) | nop + fsand vi01, 0x2 | nop + ibeq vi00, vi01, L52 | nop + mtir vi09, vf06.z | nop + ibeq vi10, vi02, L59 | nop + sq.xyzw vf09, -3(vi02) | nop + b L53 | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L61: + lq.xyzw vf09, 0(vi05) | nop + fsand vi01, 0x2 | nop + ibeq vi00, vi01, L54 | nop + mtir vi09, vf06.z | nop + ibeq vi10, vi02, L59 | nop + sq.xyzw vf09, -3(vi02) | nop + b L55 | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L62: + lq.xyzw vf09, 0(vi06) | nop + fsand vi01, 0x2 | nop + ibeq vi00, vi01, L56 | nop + mtir vi09, vf06.z | nop + ibeq vi10, vi02, L59 | nop + sq.xyzw vf09, -3(vi02) | nop + b L57 | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L63: + lq.xyzw vf09, 0(vi07) | nop + fsand vi01, 0x2 | nop + ibeq vi00, vi01, L58 | nop + mtir vi09, vf06.z | nop + ibeq vi10, vi02, L59 | nop + sq.xyzw vf09, -3(vi02) | nop + iaddi vi03, vi03, 0x1 | nop + b L51 | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L64: + lqi.xyzw vf06, vi02 | nop + ilw.x vi04, 328(vi03) | nop + nop | nop + nop | nop + mtir vi09, vf06.z | nop + lqi.xyzw vf06, vi02 | nop + ilw.w vi08, 328(vi04) | nop + ilw.y vi05, 328(vi03) | nop + lq.xyzw vf08, 328(vi09) | nop + nop | nop + nop | nop + lq.xyzw vf07, 328(vi08) | nop + nop | nop + mtir vi09, vf06.z | nop +L65: + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L66: + ilw.w vi08, 328(vi05) | nop + ilw.z vi06, 328(vi03) | nop + lq.xyzw vf08, 328(vi09) | nop + fsand vi01, 0x2 | subw.w vf00, vf07, vf10 + ibne vi00, vi01, L75 | nop + lq.xyzw vf07, 328(vi08) | nop +L67: + ibeq vi10, vi02, L74 | nop + mtir vi09, vf06.z | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L68: + ilw.w vi08, 328(vi06) | nop + ilw.w vi07, 328(vi03) | nop + lq.xyzw vf08, 328(vi09) | nop + fsand vi01, 0x2 | subw.w vf00, vf07, vf10 + ibne vi00, vi01, L76 | nop + lq.xyzw vf07, 328(vi08) | nop +L69: + ibeq vi10, vi02, L74 | nop + mtir vi09, vf06.z | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L70: + ilw.w vi08, 328(vi07) | nop + ilw.x vi04, 329(vi03) | nop + lq.xyzw vf08, 328(vi09) | nop + fsand vi01, 0x2 | subw.w vf00, vf07, vf10 + ibne vi00, vi01, L77 | nop + lq.xyzw vf07, 328(vi08) | nop +L71: + ibeq vi10, vi02, L74 | nop + mtir vi09, vf06.z | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L72: + ilw.w vi08, 328(vi04) | nop + ilw.y vi05, 329(vi03) | nop + lq.xyzw vf08, 328(vi09) | nop + fsand vi01, 0x2 | subw.w vf00, vf07, vf10 + ibne vi00, vi01, L78 | nop + lq.xyzw vf07, 328(vi08) | nop +L73: + iaddi vi03, vi03, 0x1 | nop + ibne vi10, vi02, L65 | nop + mtir vi09, vf06.z | nop +L74: + jr vi15 | nop + nop | nop +L75: + lq.xyzw vf09, 328(vi04) | nop + fsand vi01, 0x2 | nop + ibeq vi00, vi01, L67 | nop + mtir vi09, vf06.z | nop + ibeq vi10, vi02, L74 | nop + sq.xyzw vf09, -3(vi02) | nop + b L68 | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L76: + lq.xyzw vf09, 328(vi05) | nop + fsand vi01, 0x2 | nop + ibeq vi00, vi01, L69 | nop + mtir vi09, vf06.z | nop + ibeq vi10, vi02, L74 | nop + sq.xyzw vf09, -3(vi02) | nop + b L70 | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L77: + lq.xyzw vf09, 328(vi06) | nop + fsand vi01, 0x2 | nop + ibeq vi00, vi01, L71 | nop + mtir vi09, vf06.z | nop + ibeq vi10, vi02, L74 | nop + sq.xyzw vf09, -3(vi02) | nop + b L72 | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L78: + lq.xyzw vf09, 328(vi07) | nop + fsand vi01, 0x2 | nop + ibeq vi00, vi01, L73 | nop + mtir vi09, vf06.z | nop + ibeq vi10, vi02, L74 | nop + sq.xyzw vf09, -3(vi02) | nop + iaddi vi03, vi03, 0x1 | nop + b L66 | nop + lqi.xyzw vf06, vi02 | subw.w vf00, vf08, vf10 +L79: + fcset 0x0 | nop + iblez vi12, L80 | nop + iaddi vi09, vi09, 0x1 | nop + ior vi10, vi06, vi00 | nop + iadd vi01, vi12, vi12 | nop + iadd vi01, vi01, vi12 | nop + iadd vi05, vi06, vi01 | nop + sqi.xyzw vf06, vi06 | nop + isw.x vi12, -1(vi06) | nop + jr vi15 | nop + ilwr.x vi12, vi09 | nop +L80: + ilw.y vi01, -1(vi09) | nop + ilw.z vi13, -1(vi09) | nop + ibeq vi00, vi12, L83 | nop + ilwr.x vi07, vi10 | nop + ibltz vi01, L81 | nop + iaddiu vi12, vi12, 0x80 | nop + iadd vi13, vi13, vi08 | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + lqi.xyzw vf31, vi13 | nop + sqi.xyzw vf05, vi06 | nop + sqi.xyzw vf29, vi06 | nop + sqi.xyzw vf30, vi06 | nop + sqi.xyzw vf31, vi06 | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + iadd vi01, vi12, vi12 | nop + iadd vi01, vi01, vi12 | nop + sqi.xyzw vf29, vi06 | nop + sqi.xyzw vf30, vi06 | nop + ior vi10, vi06, vi00 | nop + iadd vi05, vi06, vi01 | nop + sqi.xyzw vf06, vi06 | nop + isw.x vi12, -1(vi06) | nop + jr vi15 | nop + ilwr.x vi12, vi09 | nop +L81: + mtir vi01, vf03.x | nop + mtir vi06, vf03.y | nop + mr32.xyzw vf03, vf03 | nop + iadd vi07, vi07, vi11 | nop + ibgez vi13, L82 | nop + iswr.x vi07, vi10 | nop + xgkick vi01 | nop + ior vi10, vi06, vi00 | nop + iadd vi01, vi12, vi12 | nop + iadd vi01, vi01, vi12 | nop + iadd vi05, vi06, vi01 | nop + sqi.xyzw vf06, vi06 | nop + isw.x vi12, -1(vi06) | nop + jr vi15 | nop + ilwr.x vi12, vi09 | nop +L82: + iadd vi13, vi13, vi08 | nop + xgkick vi01 | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + lqi.xyzw vf31, vi13 | nop + sqi.xyzw vf05, vi06 | nop + sqi.xyzw vf29, vi06 | nop + sqi.xyzw vf30, vi06 | nop + sqi.xyzw vf31, vi06 | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + iadd vi01, vi12, vi12 | nop + iadd vi01, vi01, vi12 | nop + sqi.xyzw vf29, vi06 | nop + sqi.xyzw vf30, vi06 | nop + ior vi10, vi06, vi00 | nop + iadd vi05, vi06, vi01 | nop + sqi.xyzw vf06, vi06 | nop + isw.x vi12, -1(vi06) | nop + jr vi15 | nop + ilwr.x vi12, vi09 | nop +L83: + mtir vi01, vf03.x | nop + mr32.xyzw vf03, vf03 | nop + iadd vi07, vi07, vi11 | nop + iswr.x vi07, vi10 | nop + xgkick vi01 | nop + lq.xyzw vf04, 664(vi00) | nop :e + nop | nop +L84: + ilw.w vi08, 4(vi14) | addw.z vf25, vf00, vf00 + lq.xyzw vf06, 658(vi00) | nop + lq.xyzw vf07, 661(vi00) | nop + lq.xyzw vf05, 660(vi00) | nop + iadd vi08, vi08, vi14 | nop + lq.w vf29, 1(vi08) | nop + fcset 0x0 | nop + ilw.z vi09, 4(vi14) | nop + ilw.y vi03, 3(vi14) | nop + lq.xyzw vf08, 662(vi00) | nop + iadd vi09, vi09, vi14 | addw.z vf02, vf00, vf29 + ibne vi00, vi14, L93 | nop + mtir vi06, vf03.x | addw.z vf25, vf25, vf29 + ilwr.x vi12, vi09 | nop + ilwr.z vi13, vi09 | nop + iaddiu vi11, vi00, 0x4000 | nop + iaddiu vi11, vi11, 0x4000 | nop + iaddiu vi12, vi12, 0x80 | nop + iadd vi13, vi13, vi08 | nop + ilwr.x vi02, vi03 | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + lqi.xyzw vf31, vi13 | nop + ilwr.w vi04, vi02 | nop + lq.xy vf25, 0(vi02) | nop + sqi.xyzw vf05, vi06 | nop + sqi.xyzw vf29, vi06 | nop + lq.xyzw vf09, 0(vi04) | nop + lq.xyzw vf17, 1(vi04) | nop + sqi.xyzw vf30, vi06 | nop + sqi.xyzw vf31, vi06 | nop + ilwr.y vi02, vi03 | nop + lqi.xyzw vf29, vi13 | subz.xyz vf21, vf25, vf02 + lqi.xyzw vf30, vi13 | nop + div Q, vf01.x, vf09.w | nop + ilwr.w vi04, vi02 | nop + lq.xy vf25, 0(vi02) | mul.xyzw vf13, vf09, vf08 + sqi.xyzw vf29, vi06 | nop + sqi.xyzw vf30, vi06 | nop + lq.xyzw vf10, 0(vi04) | nop + lq.xyzw vf18, 1(vi04) | nop + iadd vi01, vi12, vi12 | mul.xyz vf09, vf09, Q + iadd vi01, vi01, vi12 | mul.xyz vf21, vf21, Q + ilwr.z vi02, vi03 | nop + iadd vi05, vi06, vi01 | subz.xyz vf22, vf25, vf02 + ior vi10, vi06, vi00 | nop + div Q, vf01.x, vf10.w | nop + ilwr.w vi04, vi02 | add.xyzw vf09, vf09, vf07 + lq.xy vf25, 0(vi02) | mul.xyzw vf14, vf10, vf08 + sqi.xyzw vf06, vi06 | nop + isw.x vi12, -1(vi06) | mul.xyzw vf17, vf17, vf04 + lq.xyzw vf11, 0(vi04) | miniz.w vf09, vf09, vf01 + lq.xyzw vf19, 1(vi04) | clipw.xyz vf13, vf13 + iaddi vi09, vi09, 0x1 | mul.xyz vf10, vf10, Q + ilwr.x vi12, vi09 | mul.xyz vf22, vf22, Q +L85: + ilwr.w vi02, vi03 | maxy.w vf09, vf09, vf01 + fcand vi01, 0x3ffff | subz.xyz vf23, vf25, vf02 + ibeq vi00, vi01, L86 | nop + div Q, vf01.x, vf11.w | ftoi0.xyzw vf17, vf17 + nop | addw.w vf09, vf09, vf01 +L86: + ilwr.w vi04, vi02 | add.xyzw vf10, vf10, vf07 + lq.xy vf25, 0(vi02) | mul.xyzw vf15, vf11, vf08 + sqi.xyzw vf21, vi06 | nop + sqi.xyzw vf17, vi06 | ftoi4.xyzw vf09, vf09 + iaddi vi03, vi03, 0x1 | mul.xyzw vf18, vf18, vf04 + lq.xyzw vf12, 0(vi04) | miniz.w vf10, vf10, vf01 + lq.xyzw vf20, 1(vi04) | clipw.xyz vf14, vf14 + ibeq vi05, vi06, L90 | mul.xyz vf11, vf11, Q + sqi.xyzw vf09, vi06 | mul.xyz vf23, vf23, Q + ilwr.x vi02, vi03 | maxy.w vf10, vf10, vf01 + fcand vi01, 0x3ffff | subz.xyz vf24, vf25, vf02 + ibeq vi00, vi01, L87 | nop + div Q, vf01.x, vf12.w | ftoi0.xyzw vf18, vf18 + nop | addw.w vf10, vf10, vf01 +L87: + ilwr.w vi04, vi02 | add.xyzw vf11, vf11, vf07 + lq.xy vf25, 0(vi02) | mul.xyzw vf16, vf12, vf08 + sqi.xyzw vf22, vi06 | mul.xyzw vf19, vf19, vf04 + sqi.xyzw vf18, vi06 | ftoi4.xyzw vf10, vf10 + lq.xyzw vf09, 0(vi04) | miniz.w vf11, vf11, vf01 + lq.xyzw vf17, 1(vi04) | clipw.xyz vf15, vf15 + ibeq vi05, vi06, L91 | mul.xyz vf12, vf12, Q + sqi.xyzw vf10, vi06 | mul.xyz vf24, vf24, Q + ilwr.y vi02, vi03 | maxy.w vf11, vf11, vf01 + fcand vi01, 0x3ffff | subz.xyz vf21, vf25, vf02 + ibeq vi00, vi01, L88 | ftoi0.xyzw vf19, vf19 + div Q, vf01.x, vf09.w | nop + nop | addw.w vf11, vf11, vf01 +L88: + ilwr.w vi04, vi02 | add.xyzw vf12, vf12, vf07 + lq.xy vf25, 0(vi02) | mul.xyzw vf13, vf09, vf08 + sqi.xyzw vf23, vi06 | mul.xyzw vf20, vf20, vf04 + sqi.xyzw vf19, vi06 | ftoi4.xyzw vf11, vf11 + lq.xyzw vf10, 0(vi04) | miniz.w vf12, vf12, vf01 + lq.xyzw vf18, 1(vi04) | clipw.xyz vf16, vf16 + ibeq vi05, vi06, L92 | mul.xyz vf09, vf09, Q + sqi.xyzw vf11, vi06 | mul.xyz vf21, vf21, Q + ilwr.z vi02, vi03 | maxy.w vf12, vf12, vf01 + fcand vi01, 0x3ffff | subz.xyz vf22, vf25, vf02 + ibeq vi00, vi01, L89 | ftoi0.xyzw vf20, vf20 + div Q, vf01.x, vf10.w | nop + nop | addw.w vf12, vf12, vf01 +L89: + ilwr.w vi04, vi02 | add.xyzw vf09, vf09, vf07 + lq.xy vf25, 0(vi02) | mul.xyzw vf14, vf10, vf08 + sqi.xyzw vf24, vi06 | mul.xyzw vf17, vf17, vf04 + sqi.xyzw vf20, vi06 | ftoi4.xyzw vf12, vf12 + lq.xyzw vf11, 0(vi04) | miniz.w vf09, vf09, vf01 + lq.xyzw vf19, 1(vi04) | clipw.xyz vf13, vf13 + ibne vi05, vi06, L85 | mul.xyz vf10, vf10, Q + sqi.xyzw vf12, vi06 | mul.xyz vf22, vf22, Q + b L79 | nop + iaddiu vi15, vi00, 0x3b9 | nop +L90: + b L79 | nop + iaddiu vi15, vi00, 0x3c7 | nop +L91: + b L79 | nop + iaddiu vi15, vi00, 0x3d4 | nop +L92: + b L79 | nop + iaddiu vi15, vi00, 0x3e1 | nop +L93: + ilwr.x vi12, vi09 | nop + ilwr.z vi13, vi09 | nop + iaddiu vi11, vi00, 0x4000 | nop + iaddiu vi11, vi11, 0x4000 | nop + iaddiu vi12, vi12, 0x80 | nop + iadd vi13, vi13, vi08 | nop + ilw.x vi02, 328(vi03) | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + lqi.xyzw vf31, vi13 | nop + ilw.w vi04, 328(vi02) | nop + lq.xy vf25, 328(vi02) | nop + sqi.xyzw vf05, vi06 | nop + sqi.xyzw vf29, vi06 | nop + lq.xyzw vf09, 328(vi04) | nop + lq.xyzw vf17, 329(vi04) | nop + sqi.xyzw vf30, vi06 | nop + sqi.xyzw vf31, vi06 | nop + ilw.y vi02, 328(vi03) | nop + lqi.xyzw vf29, vi13 | subz.xyz vf21, vf25, vf02 + lqi.xyzw vf30, vi13 | nop + div Q, vf01.x, vf09.w | nop + ilw.w vi04, 328(vi02) | nop + lq.xy vf25, 328(vi02) | mul.xyzw vf13, vf09, vf08 + sqi.xyzw vf29, vi06 | nop + sqi.xyzw vf30, vi06 | nop + lq.xyzw vf10, 328(vi04) | nop + lq.xyzw vf18, 329(vi04) | nop + iadd vi01, vi12, vi12 | mul.xyz vf09, vf09, Q + iadd vi01, vi01, vi12 | mul.xyz vf21, vf21, Q + ilw.z vi02, 328(vi03) | nop + iadd vi05, vi06, vi01 | subz.xyz vf22, vf25, vf02 + ior vi10, vi06, vi00 | nop + div Q, vf01.x, vf10.w | nop + ilw.w vi04, 328(vi02) | add.xyzw vf09, vf09, vf07 + lq.xy vf25, 328(vi02) | mul.xyzw vf14, vf10, vf08 + sqi.xyzw vf06, vi06 | nop + isw.x vi12, -1(vi06) | mul.xyzw vf17, vf17, vf04 + lq.xyzw vf11, 328(vi04) | miniz.w vf09, vf09, vf01 + lq.xyzw vf19, 329(vi04) | clipw.xyz vf13, vf13 + iaddi vi09, vi09, 0x1 | mul.xyz vf10, vf10, Q + ilwr.x vi12, vi09 | mul.xyz vf22, vf22, Q +L94: + ilw.w vi02, 328(vi03) | maxy.w vf09, vf09, vf01 + fcand vi01, 0x3ffff | subz.xyz vf23, vf25, vf02 + ibeq vi00, vi01, L95 | nop + div Q, vf01.x, vf11.w | ftoi0.xyzw vf17, vf17 + nop | addw.w vf09, vf09, vf01 +L95: + ilw.w vi04, 328(vi02) | add.xyzw vf10, vf10, vf07 + lq.xy vf25, 328(vi02) | mul.xyzw vf15, vf11, vf08 + sqi.xyzw vf21, vi06 | nop + sqi.xyzw vf17, vi06 | ftoi4.xyzw vf09, vf09 + iaddi vi03, vi03, 0x1 | mul.xyzw vf18, vf18, vf04 + lq.xyzw vf12, 328(vi04) | miniz.w vf10, vf10, vf01 + lq.xyzw vf20, 329(vi04) | clipw.xyz vf14, vf14 + ibeq vi05, vi06, L99 | mul.xyz vf11, vf11, Q + sqi.xyzw vf09, vi06 | mul.xyz vf23, vf23, Q + ilw.x vi02, 328(vi03) | maxy.w vf10, vf10, vf01 + fcand vi01, 0x3ffff | subz.xyz vf24, vf25, vf02 + ibeq vi00, vi01, L96 | nop + div Q, vf01.x, vf12.w | ftoi0.xyzw vf18, vf18 + nop | addw.w vf10, vf10, vf01 +L96: + ilw.w vi04, 328(vi02) | add.xyzw vf11, vf11, vf07 + lq.xy vf25, 328(vi02) | mul.xyzw vf16, vf12, vf08 + sqi.xyzw vf22, vi06 | mul.xyzw vf19, vf19, vf04 + sqi.xyzw vf18, vi06 | ftoi4.xyzw vf10, vf10 + lq.xyzw vf09, 328(vi04) | miniz.w vf11, vf11, vf01 + lq.xyzw vf17, 329(vi04) | clipw.xyz vf15, vf15 + ibeq vi05, vi06, L100 | mul.xyz vf12, vf12, Q + sqi.xyzw vf10, vi06 | mul.xyz vf24, vf24, Q + ilw.y vi02, 328(vi03) | maxy.w vf11, vf11, vf01 + fcand vi01, 0x3ffff | subz.xyz vf21, vf25, vf02 + ibeq vi00, vi01, L97 | ftoi0.xyzw vf19, vf19 + div Q, vf01.x, vf09.w | nop + nop | addw.w vf11, vf11, vf01 +L97: + ilw.w vi04, 328(vi02) | add.xyzw vf12, vf12, vf07 + lq.xy vf25, 328(vi02) | mul.xyzw vf13, vf09, vf08 + sqi.xyzw vf23, vi06 | mul.xyzw vf20, vf20, vf04 + sqi.xyzw vf19, vi06 | ftoi4.xyzw vf11, vf11 + lq.xyzw vf10, 328(vi04) | miniz.w vf12, vf12, vf01 + lq.xyzw vf18, 329(vi04) | clipw.xyz vf16, vf16 + ibeq vi05, vi06, L101 | mul.xyz vf09, vf09, Q + sqi.xyzw vf11, vi06 | mul.xyz vf21, vf21, Q + ilw.z vi02, 328(vi03) | maxy.w vf12, vf12, vf01 + fcand vi01, 0x3ffff | subz.xyz vf22, vf25, vf02 + ibeq vi00, vi01, L98 | ftoi0.xyzw vf20, vf20 + div Q, vf01.x, vf10.w | nop + nop | addw.w vf12, vf12, vf01 +L98: + ilw.w vi04, 328(vi02) | add.xyzw vf09, vf09, vf07 + lq.xy vf25, 328(vi02) | mul.xyzw vf14, vf10, vf08 + sqi.xyzw vf24, vi06 | mul.xyzw vf17, vf17, vf04 + sqi.xyzw vf20, vi06 | ftoi4.xyzw vf12, vf12 + lq.xyzw vf11, 328(vi04) | miniz.w vf09, vf09, vf01 + lq.xyzw vf19, 329(vi04) | clipw.xyz vf13, vf13 + ibne vi05, vi06, L94 | mul.xyz vf10, vf10, Q + sqi.xyzw vf12, vi06 | mul.xyz vf22, vf22, Q + b L79 | nop + iaddiu vi15, vi00, 0x420 | nop +L99: + b L79 | nop + iaddiu vi15, vi00, 0x42e | nop +L100: + b L79 | nop + iaddiu vi15, vi00, 0x43b | nop +L101: + b L79 | nop + iaddiu vi15, vi00, 0x448 | nop +L102: + lq.xyzw vf05, 660(vi00) | addw.z vf13, vf00, vf00 + lq.xyzw vf06, 658(vi00) | nop + ilw.w vi08, 4(vi14) | nop + ilw.z vi09, 4(vi14) | nop + ilw.y vi03, 3(vi14) | addz.z vf13, vf13, vf02 + lq.xyzw vf07, 661(vi00) | nop + iadd vi08, vi08, vi14 | nop + iadd vi09, vi09, vi14 | nop + ibne vi00, vi14, L107 | nop + mtir vi06, vf03.x | nop + ilwr.x vi12, vi09 | nop + ilwr.z vi13, vi09 | nop + iaddiu vi11, vi00, 0x4000 | nop + iaddiu vi11, vi11, 0x4000 | nop + iaddiu vi12, vi12, 0x80 | nop + iadd vi13, vi13, vi08 | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + lqi.xyzw vf31, vi13 | nop + sqi.xyzw vf05, vi06 | nop + sqi.xyzw vf29, vi06 | nop + ilwr.x vi02, vi03 | nop + sqi.xyzw vf30, vi06 | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + ilwr.w vi04, vi02 | nop + lq.xy vf13, 0(vi02) | nop + ilwr.y vi02, vi03 | nop + sqi.xyzw vf31, vi06 | nop + lq.xyzw vf09, 0(vi04) | nop + sqi.xyzw vf29, vi06 | nop + sqi.xyzw vf30, vi06 | nop + lq.xyzw vf14, 1(vi04) | nop + div Q, vf01.x, vf09.w | subz.xyz vf18, vf13, vf02 + iadd vi01, vi12, vi12 | nop + ilwr.w vi04, vi02 | nop + lq.xy vf13, 0(vi02) | nop + ilwr.z vi02, vi03 | mul.xyzw vf14, vf14, vf04 + iadd vi01, vi01, vi12 | nop + lq.xyzw vf10, 0(vi04) | nop + iadd vi05, vi06, vi01 | mul.xyz vf09, vf09, Q + ior vi10, vi06, vi00 | mul.xyz vf18, vf18, Q + lq.xyzw vf15, 1(vi04) | nop + div Q, vf01.x, vf10.w | subz.xyz vf19, vf13, vf02 + sqi.xyzw vf06, vi06 | add.xyzw vf09, vf09, vf07 + ilwr.w vi04, vi02 | ftoi0.xyzw vf14, vf14 + lq.xy vf13, 0(vi02) | nop + ilwr.w vi02, vi03 | mul.xyzw vf15, vf15, vf04 + isw.x vi12, -1(vi06) | maxy.w vf09, vf09, vf01 + lq.xyzw vf11, 0(vi04) | nop + iaddi vi09, vi09, 0x1 | mul.xyz vf10, vf10, Q + ilwr.x vi12, vi09 | mul.xyz vf19, vf19, Q +L103: + iaddi vi03, vi03, 0x1 | nop + lq.xyzw vf16, 1(vi04) | miniz.w vf09, vf09, vf01 + div Q, vf01.x, vf11.w | subz.xyz vf20, vf13, vf02 + sqi.xyzw vf18, vi06 | add.xyzw vf10, vf10, vf07 + ilwr.w vi04, vi02 | ftoi0.xyzw vf15, vf15 + lq.xy vf13, 0(vi02) | ftoi4.xyzw vf09, vf09 + ilwr.x vi02, vi03 | mul.xyzw vf16, vf16, vf04 + sqi.xyzw vf14, vi06 | maxy.w vf10, vf10, vf01 + lq.xyzw vf12, 0(vi04) | nop + ibeq vi05, vi06, L104 | mul.xyz vf11, vf11, Q + sqi.xyzw vf09, vi06 | mul.xyz vf20, vf20, Q + lq.xyzw vf17, 1(vi04) | miniz.w vf10, vf10, vf01 + div Q, vf01.x, vf12.w | subz.xyz vf21, vf13, vf02 + sqi.xyzw vf19, vi06 | add.xyzw vf11, vf11, vf07 + ilwr.w vi04, vi02 | ftoi0.xyzw vf16, vf16 + lq.xy vf13, 0(vi02) | ftoi4.xyzw vf10, vf10 + ilwr.y vi02, vi03 | mul.xyzw vf17, vf17, vf04 + sqi.xyzw vf15, vi06 | maxy.w vf11, vf11, vf01 + lq.xyzw vf09, 0(vi04) | nop + ibeq vi05, vi06, L105 | mul.xyz vf12, vf12, Q + sqi.xyzw vf10, vi06 | mul.xyz vf21, vf21, Q + lq.xyzw vf14, 1(vi04) | miniz.w vf11, vf11, vf01 + div Q, vf01.x, vf09.w | subz.xyz vf18, vf13, vf02 + sqi.xyzw vf20, vi06 | add.xyzw vf12, vf12, vf07 + ilwr.w vi04, vi02 | ftoi0.xyzw vf17, vf17 + lq.xy vf13, 0(vi02) | ftoi4.xyzw vf11, vf11 + ilwr.z vi02, vi03 | mul.xyzw vf14, vf14, vf04 + sqi.xyzw vf16, vi06 | maxy.w vf12, vf12, vf01 + lq.xyzw vf10, 0(vi04) | nop + ibeq vi05, vi06, L106 | mul.xyz vf09, vf09, Q + sqi.xyzw vf11, vi06 | mul.xyz vf18, vf18, Q + lq.xyzw vf15, 1(vi04) | miniz.w vf12, vf12, vf01 + div Q, vf01.x, vf10.w | subz.xyz vf19, vf13, vf02 + sqi.xyzw vf21, vi06 | add.xyzw vf09, vf09, vf07 + ilwr.w vi04, vi02 | ftoi0.xyzw vf14, vf14 + lq.xy vf13, 0(vi02) | ftoi4.xyzw vf12, vf12 + ilwr.w vi02, vi03 | mul.xyzw vf15, vf15, vf04 + sqi.xyzw vf17, vi06 | maxy.w vf09, vf09, vf01 + lq.xyzw vf11, 0(vi04) | nop + ibne vi05, vi06, L103 | mul.xyz vf10, vf10, Q + sqi.xyzw vf12, vi06 | mul.xyz vf19, vf19, Q + b L79 | nop + iaddiu vi15, vi00, 0x491 | nop +L104: + b L79 | nop + iaddiu vi15, vi00, 0x49c | nop +L105: + b L79 | nop + iaddiu vi15, vi00, 0x4a6 | nop +L106: + b L79 | nop + iaddiu vi15, vi00, 0x4b0 | nop +L107: + ilwr.x vi12, vi09 | nop + ilwr.z vi13, vi09 | nop + iaddiu vi11, vi00, 0x4000 | nop + iaddiu vi11, vi11, 0x4000 | nop + iaddiu vi12, vi12, 0x80 | nop + iadd vi13, vi13, vi08 | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + lqi.xyzw vf31, vi13 | nop + sqi.xyzw vf05, vi06 | nop + sqi.xyzw vf29, vi06 | nop + ilw.x vi02, 328(vi03) | nop + sqi.xyzw vf30, vi06 | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + ilw.w vi04, 328(vi02) | nop + lq.xy vf13, 328(vi02) | nop + ilw.y vi02, 328(vi03) | nop + sqi.xyzw vf31, vi06 | nop + lq.xyzw vf09, 328(vi04) | nop + sqi.xyzw vf29, vi06 | nop + sqi.xyzw vf30, vi06 | nop + lq.xyzw vf14, 329(vi04) | nop + div Q, vf01.x, vf09.w | subz.xyz vf18, vf13, vf02 + iadd vi01, vi12, vi12 | nop + ilw.w vi04, 328(vi02) | nop + lq.xy vf13, 328(vi02) | nop + ilw.z vi02, 328(vi03) | mul.xyzw vf14, vf14, vf04 + iadd vi01, vi01, vi12 | nop + lq.xyzw vf10, 328(vi04) | nop + iadd vi05, vi06, vi01 | mul.xyz vf09, vf09, Q + ior vi10, vi06, vi00 | mul.xyz vf18, vf18, Q + lq.xyzw vf15, 329(vi04) | nop + div Q, vf01.x, vf10.w | subz.xyz vf19, vf13, vf02 + sqi.xyzw vf06, vi06 | add.xyzw vf09, vf09, vf07 + ilw.w vi04, 328(vi02) | ftoi0.xyzw vf14, vf14 + lq.xy vf13, 328(vi02) | nop + ilw.w vi02, 328(vi03) | mul.xyzw vf15, vf15, vf04 + isw.x vi12, -1(vi06) | maxy.w vf09, vf09, vf01 + lq.xyzw vf11, 328(vi04) | nop + iaddi vi09, vi09, 0x1 | mul.xyz vf10, vf10, Q + ilwr.x vi12, vi09 | mul.xyz vf19, vf19, Q +L108: + iaddi vi03, vi03, 0x1 | nop + lq.xyzw vf16, 329(vi04) | miniz.w vf09, vf09, vf01 + div Q, vf01.x, vf11.w | subz.xyz vf20, vf13, vf02 + sqi.xyzw vf18, vi06 | add.xyzw vf10, vf10, vf07 + ilw.w vi04, 328(vi02) | ftoi0.xyzw vf15, vf15 + lq.xy vf13, 328(vi02) | ftoi4.xyzw vf09, vf09 + ilw.x vi02, 328(vi03) | mul.xyzw vf16, vf16, vf04 + sqi.xyzw vf14, vi06 | maxy.w vf10, vf10, vf01 + lq.xyzw vf12, 328(vi04) | nop + ibeq vi05, vi06, L109 | mul.xyz vf11, vf11, Q + sqi.xyzw vf09, vi06 | mul.xyz vf20, vf20, Q + lq.xyzw vf17, 329(vi04) | miniz.w vf10, vf10, vf01 + div Q, vf01.x, vf12.w | subz.xyz vf21, vf13, vf02 + sqi.xyzw vf19, vi06 | add.xyzw vf11, vf11, vf07 + ilw.w vi04, 328(vi02) | ftoi0.xyzw vf16, vf16 + lq.xy vf13, 328(vi02) | ftoi4.xyzw vf10, vf10 + ilw.y vi02, 328(vi03) | mul.xyzw vf17, vf17, vf04 + sqi.xyzw vf15, vi06 | maxy.w vf11, vf11, vf01 + lq.xyzw vf09, 328(vi04) | nop + ibeq vi05, vi06, L110 | mul.xyz vf12, vf12, Q + sqi.xyzw vf10, vi06 | mul.xyz vf21, vf21, Q + lq.xyzw vf14, 329(vi04) | miniz.w vf11, vf11, vf01 + div Q, vf01.x, vf09.w | subz.xyz vf18, vf13, vf02 + sqi.xyzw vf20, vi06 | add.xyzw vf12, vf12, vf07 + ilw.w vi04, 328(vi02) | ftoi0.xyzw vf17, vf17 + lq.xy vf13, 328(vi02) | ftoi4.xyzw vf11, vf11 + ilw.z vi02, 328(vi03) | mul.xyzw vf14, vf14, vf04 + sqi.xyzw vf16, vi06 | maxy.w vf12, vf12, vf01 + lq.xyzw vf10, 328(vi04) | nop + ibeq vi05, vi06, L111 | mul.xyz vf09, vf09, Q + sqi.xyzw vf11, vi06 | mul.xyz vf18, vf18, Q + lq.xyzw vf15, 329(vi04) | miniz.w vf12, vf12, vf01 + div Q, vf01.x, vf10.w | subz.xyz vf19, vf13, vf02 + sqi.xyzw vf21, vi06 | add.xyzw vf09, vf09, vf07 + ilw.w vi04, 328(vi02) | ftoi0.xyzw vf14, vf14 + lq.xy vf13, 328(vi02) | ftoi4.xyzw vf12, vf12 + ilw.w vi02, 328(vi03) | mul.xyzw vf15, vf15, vf04 + sqi.xyzw vf17, vi06 | maxy.w vf09, vf09, vf01 + lq.xyzw vf11, 328(vi04) | nop + ibne vi05, vi06, L108 | mul.xyz vf10, vf10, Q + sqi.xyzw vf12, vi06 | mul.xyz vf19, vf19, Q + b L79 | nop + iaddiu vi15, vi00, 0x4ec | nop +L109: + b L79 | nop + iaddiu vi15, vi00, 0x4f7 | nop +L110: + b L79 | nop + iaddiu vi15, vi00, 0x501 | nop +L111: + b L79 | nop + iaddiu vi15, vi00, 0x50b | nop + +;; PROGRAM 4 START +L112: + lq.xyzw vf05, 660(vi00) | nop + lq.xyzw vf06, 658(vi00) | nop + lq.xyzw vf25, 661(vi00) | nop + lq.xyzw vf01, 656(vi00) | nop + lq.xyzw vf02, 657(vi00) | nop + ilw.w vi08, 4(vi14) | nop + ilw.z vi09, 4(vi14) | nop + ilw.y vi03, 3(vi14) | nop + lq.xyzw vf07, 5(vi14) | nop + lq.xyzw vf08, 6(vi14) | nop + lq.xyzw vf09, 7(vi14) | nop + lq.xyzw vf10, 8(vi14) | nop + iadd vi08, vi08, vi14 | nop + iadd vi09, vi09, vi14 | nop + ibne vi00, vi14, L117 | nop + mtir vi06, vf03.x | nop + ilwr.x vi02, vi03 | maxw.xyzw vf24, vf00, vf00 + ilwr.x vi12, vi09 | nop + ilwr.z vi13, vi09 | nop + iaddiu vi11, vi00, 0x4000 | nop + lq.xyzw vf23, 0(vi02) | nop + iaddiu vi11, vi11, 0x4000 | nop + iaddiu vi12, vi12, 0x80 | nop + iadd vi13, vi13, vi08 | nop + mtir vi04, vf23.w | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + ilwr.y vi02, vi03 | nop + lq.xyzw vf11, 0(vi04) | nop + lq.xyzw vf15, 1(vi04) | nop + lqi.xyzw vf31, vi13 | subz.xy vf19, vf23, vf02 + lq.xyzw vf23, 0(vi02) | nop + sqi.xyzw vf05, vi06 | itof0.xyzw vf11, vf11 + sqi.xyzw vf29, vi06 | nop + lqi.xyzw vf29, vi13 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | nop + sqi.xyzw vf30, vi06 | nop + lqi.xyzw vf30, vi13 | maddax.xyzw ACC, vf07, vf11 + sqi.xyzw vf31, vi06 | madday.xyzw ACC, vf08, vf11 + sqi.xyzw vf29, vi06 | maddz.xyzw vf11, vf09, vf11 + ilwr.z vi02, vi03 | nop + lq.xyzw vf12, 0(vi04) | nop + lq.xyzw vf16, 1(vi04) | nop + div Q, vf01.x, vf11.w | subz.xy vf20, vf23, vf02 + lq.xyzw vf23, 0(vi02) | nop + sqi.xyzw vf30, vi06 | itof0.xyzw vf12, vf12 + move.z vf19, vf24 | nop + iadd vi01, vi12, vi12 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | nop + iadd vi01, vi01, vi12 | maddax.xyzw ACC, vf07, vf12 + iadd vi05, vi06, vi01 | madday.xyzw ACC, vf08, vf12 + ior vi10, vi06, vi00 | maddz.xyzw vf12, vf09, vf12 + ilwr.w vi02, vi03 | mul.xyz vf11, vf11, Q + lq.xyzw vf13, 0(vi04) | mul.xyz vf19, vf19, Q + lq.xyzw vf17, 1(vi04) | nop + div Q, vf01.x, vf12.w | subz.xy vf21, vf23, vf02 + lq.xyzw vf23, 0(vi02) | add.xyzw vf11, vf11, vf25 + iaddi vi03, vi03, 0x1 | itof0.xyzw vf13, vf13 + move.z vf20, vf24 | nop + sqi.xyzw vf06, vi06 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | maxy.w vf11, vf11, vf01 + isw.x vi12, -1(vi06) | maddax.xyzw ACC, vf07, vf13 + iaddi vi09, vi09, 0x1 | madday.xyzw ACC, vf08, vf13 + ilwr.x vi12, vi09 | maddz.xyzw vf13, vf09, vf13 +L113: + ilwr.x vi02, vi03 | mul.xyz vf12, vf12, Q + lq.xyzw vf14, 0(vi04) | mul.xyz vf20, vf20, Q + lq.xyzw vf18, 1(vi04) | miniz.w vf11, vf11, vf01 + div Q, vf01.x, vf13.w | subz.xy vf22, vf23, vf02 + lq.xyzw vf23, 0(vi02) | add.xyzw vf12, vf12, vf25 + nop | itof0.xyzw vf14, vf14 + sqi.xyzw vf19, vi06 | ftoi4.xyzw vf11, vf11 + sqi.xyzw vf15, vi06 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | maxy.w vf12, vf12, vf01 + move.z vf21, vf24 | maddax.xyzw ACC, vf07, vf14 + ibeq vi05, vi06, L114 | madday.xyzw ACC, vf08, vf14 + sqi.xyzw vf11, vi06 | maddz.xyzw vf14, vf09, vf14 + ilwr.y vi02, vi03 | mul.xyz vf13, vf13, Q + lq.xyzw vf11, 0(vi04) | mul.xyz vf21, vf21, Q + lq.xyzw vf15, 1(vi04) | miniz.w vf12, vf12, vf01 + div Q, vf01.x, vf14.w | subz.xy vf19, vf23, vf02 + lq.xyzw vf23, 0(vi02) | add.xyzw vf13, vf13, vf25 + nop | itof0.xyzw vf11, vf11 + sqi.xyzw vf20, vi06 | ftoi4.xyzw vf12, vf12 + sqi.xyzw vf16, vi06 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | maxy.w vf13, vf13, vf01 + move.z vf22, vf24 | maddax.xyzw ACC, vf07, vf11 + ibeq vi05, vi06, L115 | madday.xyzw ACC, vf08, vf11 + sqi.xyzw vf12, vi06 | maddz.xyzw vf11, vf09, vf11 + ilwr.z vi02, vi03 | mul.xyz vf14, vf14, Q + lq.xyzw vf12, 0(vi04) | mul.xyz vf22, vf22, Q + lq.xyzw vf16, 1(vi04) | miniz.w vf13, vf13, vf01 + div Q, vf01.x, vf11.w | subz.xy vf20, vf23, vf02 + lq.xyzw vf23, 0(vi02) | add.xyzw vf14, vf14, vf25 + nop | itof0.xyzw vf12, vf12 + sqi.xyzw vf21, vi06 | ftoi4.xyzw vf13, vf13 + sqi.xyzw vf17, vi06 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | maxy.w vf14, vf14, vf01 + move.z vf19, vf24 | maddax.xyzw ACC, vf07, vf12 + ibeq vi05, vi06, L116 | madday.xyzw ACC, vf08, vf12 + sqi.xyzw vf13, vi06 | maddz.xyzw vf12, vf09, vf12 + ilwr.w vi02, vi03 | mul.xyz vf11, vf11, Q + lq.xyzw vf13, 0(vi04) | mul.xyz vf19, vf19, Q + lq.xyzw vf17, 1(vi04) | miniz.w vf14, vf14, vf01 + div Q, vf01.x, vf12.w | subz.xy vf21, vf23, vf02 + lq.xyzw vf23, 0(vi02) | add.xyzw vf11, vf11, vf25 + iaddi vi03, vi03, 0x1 | itof0.xyzw vf13, vf13 + sqi.xyzw vf22, vi06 | ftoi4.xyzw vf14, vf14 + sqi.xyzw vf18, vi06 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | maxy.w vf11, vf11, vf01 + move.z vf20, vf24 | maddax.xyzw ACC, vf07, vf13 + ibne vi05, vi06, L113 | madday.xyzw ACC, vf08, vf13 + sqi.xyzw vf14, vi06 | maddz.xyzw vf13, vf09, vf13 + b L79 | nop + iaddiu vi15, vi00, 0x55d | nop +L114: + b L79 | nop + iaddiu vi15, vi00, 0x569 | nop +L115: + b L79 | nop + iaddiu vi15, vi00, 0x575 | nop +L116: + b L79 | nop + iaddiu vi15, vi00, 0x581 | nop +L117: + ilw.x vi02, 328(vi03) | maxw.xyzw vf24, vf00, vf00 + ilwr.x vi12, vi09 | nop + ilwr.z vi13, vi09 | nop + iaddiu vi11, vi00, 0x4000 | nop + lq.xyzw vf23, 328(vi02) | nop + iaddiu vi11, vi11, 0x4000 | nop + iaddiu vi12, vi12, 0x80 | nop + iadd vi13, vi13, vi08 | nop + mtir vi04, vf23.w | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + ilw.y vi02, 328(vi03) | nop + lq.xyzw vf11, 328(vi04) | nop + lq.xyzw vf15, 329(vi04) | nop + lqi.xyzw vf31, vi13 | subz.xy vf19, vf23, vf02 + lq.xyzw vf23, 328(vi02) | nop + sqi.xyzw vf05, vi06 | itof0.xyzw vf11, vf11 + sqi.xyzw vf29, vi06 | nop + lqi.xyzw vf29, vi13 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | nop + sqi.xyzw vf30, vi06 | nop + lqi.xyzw vf30, vi13 | maddax.xyzw ACC, vf07, vf11 + sqi.xyzw vf31, vi06 | madday.xyzw ACC, vf08, vf11 + sqi.xyzw vf29, vi06 | maddz.xyzw vf11, vf09, vf11 + ilw.z vi02, 328(vi03) | nop + lq.xyzw vf12, 328(vi04) | nop + lq.xyzw vf16, 329(vi04) | nop + div Q, vf01.x, vf11.w | subz.xy vf20, vf23, vf02 + lq.xyzw vf23, 328(vi02) | nop + sqi.xyzw vf30, vi06 | itof0.xyzw vf12, vf12 + move.z vf19, vf24 | nop + iadd vi01, vi12, vi12 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | nop + iadd vi01, vi01, vi12 | maddax.xyzw ACC, vf07, vf12 + iadd vi05, vi06, vi01 | madday.xyzw ACC, vf08, vf12 + ior vi10, vi06, vi00 | maddz.xyzw vf12, vf09, vf12 + ilw.w vi02, 328(vi03) | mul.xyz vf11, vf11, Q + lq.xyzw vf13, 328(vi04) | mul.xyz vf19, vf19, Q + lq.xyzw vf17, 329(vi04) | nop + div Q, vf01.x, vf12.w | subz.xy vf21, vf23, vf02 + lq.xyzw vf23, 328(vi02) | add.xyzw vf11, vf11, vf25 + iaddi vi03, vi03, 0x1 | itof0.xyzw vf13, vf13 + move.z vf20, vf24 | nop + sqi.xyzw vf06, vi06 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | maxy.w vf11, vf11, vf01 + isw.x vi12, -1(vi06) | maddax.xyzw ACC, vf07, vf13 + iaddi vi09, vi09, 0x1 | madday.xyzw ACC, vf08, vf13 + ilwr.x vi12, vi09 | maddz.xyzw vf13, vf09, vf13 +L118: + ilw.x vi02, 328(vi03) | mul.xyz vf12, vf12, Q + lq.xyzw vf14, 328(vi04) | mul.xyz vf20, vf20, Q + lq.xyzw vf18, 329(vi04) | miniz.w vf11, vf11, vf01 + div Q, vf01.x, vf13.w | subz.xy vf22, vf23, vf02 + lq.xyzw vf23, 328(vi02) | add.xyzw vf12, vf12, vf25 + nop | itof0.xyzw vf14, vf14 + sqi.xyzw vf19, vi06 | ftoi4.xyzw vf11, vf11 + sqi.xyzw vf15, vi06 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | maxy.w vf12, vf12, vf01 + move.z vf21, vf24 | maddax.xyzw ACC, vf07, vf14 + ibeq vi05, vi06, L119 | madday.xyzw ACC, vf08, vf14 + sqi.xyzw vf11, vi06 | maddz.xyzw vf14, vf09, vf14 + ilw.y vi02, 328(vi03) | mul.xyz vf13, vf13, Q + lq.xyzw vf11, 328(vi04) | mul.xyz vf21, vf21, Q + lq.xyzw vf15, 329(vi04) | miniz.w vf12, vf12, vf01 + div Q, vf01.x, vf14.w | subz.xy vf19, vf23, vf02 + lq.xyzw vf23, 328(vi02) | add.xyzw vf13, vf13, vf25 + nop | itof0.xyzw vf11, vf11 + sqi.xyzw vf20, vi06 | ftoi4.xyzw vf12, vf12 + sqi.xyzw vf16, vi06 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | maxy.w vf13, vf13, vf01 + move.z vf22, vf24 | maddax.xyzw ACC, vf07, vf11 + ibeq vi05, vi06, L120 | madday.xyzw ACC, vf08, vf11 + sqi.xyzw vf12, vi06 | maddz.xyzw vf11, vf09, vf11 + ilw.z vi02, 328(vi03) | mul.xyz vf14, vf14, Q + lq.xyzw vf12, 328(vi04) | mul.xyz vf22, vf22, Q + lq.xyzw vf16, 329(vi04) | miniz.w vf13, vf13, vf01 + div Q, vf01.x, vf11.w | subz.xy vf20, vf23, vf02 + lq.xyzw vf23, 328(vi02) | add.xyzw vf14, vf14, vf25 + nop | itof0.xyzw vf12, vf12 + sqi.xyzw vf21, vi06 | ftoi4.xyzw vf13, vf13 + sqi.xyzw vf17, vi06 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | maxy.w vf14, vf14, vf01 + move.z vf19, vf24 | maddax.xyzw ACC, vf07, vf12 + ibeq vi05, vi06, L121 | madday.xyzw ACC, vf08, vf12 + sqi.xyzw vf13, vi06 | maddz.xyzw vf12, vf09, vf12 + ilw.w vi02, 328(vi03) | mul.xyz vf11, vf11, Q + lq.xyzw vf13, 328(vi04) | mul.xyz vf19, vf19, Q + lq.xyzw vf17, 329(vi04) | miniz.w vf14, vf14, vf01 + div Q, vf01.x, vf12.w | subz.xy vf21, vf23, vf02 + lq.xyzw vf23, 328(vi02) | add.xyzw vf11, vf11, vf25 + iaddi vi03, vi03, 0x1 | itof0.xyzw vf13, vf13 + sqi.xyzw vf22, vi06 | ftoi4.xyzw vf14, vf14 + sqi.xyzw vf18, vi06 | mulaw.xyzw ACC, vf10, vf00 + mtir vi04, vf23.w | maxy.w vf11, vf11, vf01 + move.z vf20, vf24 | maddax.xyzw ACC, vf07, vf13 + ibne vi05, vi06, L118 | madday.xyzw ACC, vf08, vf13 + sqi.xyzw vf14, vi06 | maddz.xyzw vf13, vf09, vf13 + b L79 | nop + iaddiu vi15, vi00, 0x5c5 | nop +L119: + b L79 | nop + iaddiu vi15, vi00, 0x5d1 | nop +L120: + b L79 | nop + iaddiu vi15, vi00, 0x5dd | nop +L121: + b L79 | nop + iaddiu vi15, vi00, 0x5e9 | nop + +;; COMMON KICKING ZONE for L127 programs +L122: + fcset 0x0 + iaddi vi07, vi00, -0x1 + iblez vi12, L123 + iaddi vi09, vi09, 0x1 + ior vi10, vi06, vi00 + iadd vi01, vi12, vi12 + iadd vi01, vi01, vi12 + iadd vi05, vi06, vi01 + sqi.xyzw vf06, vi06 + isw.x vi12, -1(vi06) + jr vi15 + ilwr.x vi12, vi09 +L123: + ilw.y vi01, -1(vi09) + ilw.z vi13, -1(vi09) + ibeq vi00, vi12, L126 + ilwr.x vi14, vi10 + ibltz vi01, L124 + iaddiu vi12, vi12, 0x80 + iadd vi13, vi13, vi08 + lqi.xyzw vf29, vi13 + lqi.xyzw vf30, vi13 + lqi.xyzw vf31, vi13 + sqi.xyzw vf05, vi06 + sqi.xyzw vf29, vi06 + sqi.xyzw vf30, vi06 + sqi.xyzw vf31, vi06 + lqi.xyzw vf29, vi13 + lqi.xyzw vf30, vi13 + iadd vi01, vi12, vi12 + iadd vi01, vi01, vi12 + sqi.xyzw vf29, vi06 + sqi.xyzw vf30, vi06 + ior vi10, vi06, vi00 + iadd vi05, vi06, vi01 + sqi.xyzw vf06, vi06 + isw.x vi12, -1(vi06) + jr vi15 + ilwr.x vi12, vi09 +L124: + mtir vi01, vf24.w + mtir vi06, vf03.y + mr32.xyzw vf03, vf03 + iadd vi14, vi14, vi11 + ibgez vi13, L125 + iswr.x vi14, vi10 + xgkick vi01 + ior vi10, vi06, vi00 + mfir.w vf24, vi06 + iadd vi01, vi12, vi12 + iadd vi01, vi01, vi12 + iadd vi05, vi06, vi01 + sqi.xyzw vf06, vi06 + isw.x vi12, -1(vi06) + jr vi15 + ilwr.x vi12, vi09 +L125: + iadd vi13, vi13, vi08 + xgkick vi01 + lqi.xyzw vf29, vi13 + lqi.xyzw vf30, vi13 + lqi.xyzw vf31, vi13 + mfir.w vf24, vi06 + sqi.xyzw vf05, vi06 + sqi.xyzw vf29, vi06 + sqi.xyzw vf30, vi06 + sqi.xyzw vf31, vi06 + lqi.xyzw vf29, vi13 + lqi.xyzw vf30, vi13 + iadd vi01, vi12, vi12 + iadd vi01, vi01, vi12 + sqi.xyzw vf29, vi06 + sqi.xyzw vf30, vi06 + nop + ior vi10, vi06, vi00 + iadd vi05, vi06, vi01 + sqi.xyzw vf06, vi06 + isw.x vi12, -1(vi06) + jr vi15 + ilwr.x vi12, vi09 +L126: + mtir vi01, vf24.w + mr32.xyzw vf03, vf03 + iadd vi14, vi14, vi11 + iswr.x vi14, vi10 + lq.xyzw vf04, 664(vi00) + xgkick vi01 + nop | nop :e + nop | nop + +;; PROGRAM 6: +L127: + lq.xyzw vf02, 657(vi00) | nop + lq.xyzw vf05, 660(vi00) | addw.z vf28, vf00, vf00 + lq.xyzw vf06, 658(vi00) | nop + lq.xyzw vf10, 661(vi00) | nop + lq.xyzw vf11, 662(vi00) | nop + lq.xyzw vf01, 656(vi00) | addz.z vf28, vf28, vf02 + ilw.w vi08, 4(vi14) | nop + ilw.z vi09, 4(vi14) | nop + ilw.y vi03, 3(vi14) | nop + fcset 0x0 | nop + iaddi vi07, vi00, -0x1 | nop + lq.xyzw vf04, 5(vi14) | mulw.xyzw vf16, vf00, vf00 + lq.xyzw vf07, 6(vi14) | mulw.xyzw vf17, vf00, vf00 + ibne vi00, vi14, L136 | mulw.xyzw vf18, vf00, vf00 + lq.xyzw vf08, 7(vi14) | mulw.xyzw vf19, vf00, vf00 + +;; vi14 = 0 version is here + ilwr.x vi02, vi03 | nop + lq.xyzw vf09, 8(vi14) | nop + iadd vi08, vi08, vi14 | nop + iadd vi09, vi09, vi14 | nop + lq.xyw vf28, 0(vi02) | nop + mtir vi06, vf03.x | nop + ilwr.x vi12, vi09 | nop + ilwr.z vi13, vi09 | nop + mtir vi04, vf28.w | subz.xyz vf24, vf28, vf02 + iaddiu vi11, vi00, 0x4000 | nop + iaddiu vi11, vi11, 0x4000 | nop + ilwr.y vi02, vi03 | nop + lq.xyzw vf12, 0(vi04) | nop + lq.xyzw vf20, 1(vi04) | nop + iaddiu vi12, vi12, 0x80 | nop + iadd vi13, vi13, vi08 | nop + lq.xyw vf28, 0(vi02) | itof0.xyzw vf12, vf12 + mfir.w vf24, vi06 | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + lqi.xyzw vf31, vi13 | nop + sqi.xyzw vf05, vi06 | subz.xyz vf25, vf28, vf02 + sqi.xyzw vf29, vi06 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | nop + sqi.xyzw vf30, vi06 | maddax.xyzw ACC, vf04, vf12 + sqi.xyzw vf31, vi06 | nop + ilwr.z vi02, vi03 | nop + lq.xyzw vf13, 0(vi04) | madday.xyzw ACC, vf07, vf12 + lq.xyzw vf21, 1(vi04) | maddz.xyzw vf12, vf08, vf12 + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + lq.xyw vf28, 0(vi02) | itof0.xyzw vf13, vf13 + div Q, vf01.x, vf12.w | mul.xyzw vf16, vf12, vf11 + sqi.xyzw vf29, vi06 | nop + sqi.xyzw vf30, vi06 | nop + iadd vi01, vi12, vi12 | subz.xyz vf26, vf28, vf02 + iadd vi01, vi01, vi12 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | nop + iadd vi05, vi06, vi01 | maddax.xyzw ACC, vf04, vf13 + ior vi10, vi06, vi00 | mul.xyz vf12, vf12, Q + ilwr.w vi02, vi03 | mul.xyz vf24, vf24, Q + lq.xyzw vf14, 0(vi04) | madday.xyzw ACC, vf07, vf13 + lq.xyzw vf22, 1(vi04) | maddz.xyzw vf13, vf08, vf13 + sqi.xyzw vf06, vi06 | add.xyzw vf12, vf12, vf10 + isw.x vi12, -1(vi06) | nop + lq.xyw vf28, 0(vi02) | itof0.xyzw vf14, vf14 + div Q, vf01.x, vf13.w | mul.xyzw vf17, vf13, vf11 + iaddi vi09, vi09, 0x1 | miniz.w vf12, vf12, vf01 + ilwr.x vi12, vi09 | clipw.xyz vf16, vf16 +L128: + iaddi vi03, vi03, 0x1 | subz.xyz vf27, vf28, vf02 + iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | maxy.w vf12, vf12, vf01 + fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf14 + ibeq vi00, vi01, L129 | mul.xyz vf13, vf13, Q + ilwr.x vi02, vi03 | mul.xyz vf25, vf25, Q + nop | addw.w vf12, vf12, vf01 +L129: + lq.xyzw vf15, 0(vi04) | madday.xyzw ACC, vf07, vf14 + lq.xyzw vf23, 1(vi04) | maddz.xyzw vf14, vf08, vf14 + sqi.xyz vf24, vi06 | add.xyzw vf13, vf13, vf10 + sqi.xyzw vf20, vi06 | ftoi4.xyzw vf12, vf12 + lq.xyw vf28, 0(vi02) | itof0.xyzw vf15, vf15 + div Q, vf01.x, vf14.w | mul.xyzw vf18, vf14, vf11 + ibeq vi05, vi06, L133 | miniz.w vf13, vf13, vf01 + sqi.xyzw vf12, vi06 | clipw.xyz vf17, vf17 +L0x6a1 + nop | subz.xyz vf24, vf28, vf02 + iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | maxy.w vf13, vf13, vf01 + fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf15 + ibeq vi00, vi01, L130 | mul.xyz vf14, vf14, Q + ilwr.y vi02, vi03 | mul.xyz vf26, vf26, Q + nop | addw.w vf13, vf13, vf01 +L130: + lq.xyzw vf12, 0(vi04) | madday.xyzw ACC, vf07, vf15 + lq.xyzw vf20, 1(vi04) | maddz.xyzw vf15, vf08, vf15 + sqi.xyzw vf25, vi06 | add.xyzw vf14, vf14, vf10 + sqi.xyzw vf21, vi06 | ftoi4.xyzw vf13, vf13 + lq.xyw vf28, 0(vi02) | itof0.xyzw vf12, vf12 + div Q, vf01.x, vf15.w | mul.xyzw vf19, vf15, vf11 + ibeq vi05, vi06, L134 | miniz.w vf14, vf14, vf01 + sqi.xyzw vf13, vi06 | clipw.xyz vf18, vf18 +L0x6b0 + nop | subz.xyz vf25, vf28, vf02 + iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | maxy.w vf14, vf14, vf01 + fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf12 + ibeq vi00, vi01, L131 | mul.xyz vf15, vf15, Q + ilwr.z vi02, vi03 | mul.xyz vf27, vf27, Q + nop | addw.w vf14, vf14, vf01 +L131: + lq.xyzw vf13, 0(vi04) | madday.xyzw ACC, vf07, vf12 + lq.xyzw vf21, 1(vi04) | maddz.xyzw vf12, vf08, vf12 + sqi.xyzw vf26, vi06 | add.xyzw vf15, vf15, vf10 + sqi.xyzw vf22, vi06 | ftoi4.xyzw vf14, vf14 + lq.xyw vf28, 0(vi02) | itof0.xyzw vf13, vf13 + div Q, vf01.x, vf12.w | mul.xyzw vf16, vf12, vf11 + ibeq vi05, vi06, L135 | miniz.w vf15, vf15, vf01 + sqi.xyzw vf14, vi06 | clipw.xyz vf19, vf19 +L0x6bf + nop | subz.xyz vf26, vf28, vf02 + iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | maxy.w vf15, vf15, vf01 + fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf13 + ibeq vi00, vi01, L132 | mul.xyz vf12, vf12, Q + ilwr.w vi02, vi03 | mul.xyz vf24, vf24, Q + nop | addw.w vf15, vf15, vf01 +L132: + lq.xyzw vf14, 0(vi04) | madday.xyzw ACC, vf07, vf13 + lq.xyzw vf22, 1(vi04) | maddz.xyzw vf13, vf08, vf13 + sqi.xyzw vf27, vi06 | add.xyzw vf12, vf12, vf10 + sqi.xyzw vf23, vi06 | ftoi4.xyzw vf15, vf15 + lq.xyw vf28, 0(vi02) | itof0.xyzw vf14, vf14 + div Q, vf01.x, vf13.w | mul.xyzw vf17, vf13, vf11 + ibne vi05, vi06, L128 | miniz.w vf12, vf12, vf01 + sqi.xyzw vf15, vi06 | clipw.xyz vf16, vf16 + b L122 | nop + iaddiu vi15, vi00, 0x692 | nop ;; L128 +L133: + b L122 | nop + iaddiu vi15, vi00, 0x6a1 | nop +L134: + b L122 | nop + iaddiu vi15, vi00, 0x6b0 | nop +L135: + b L122 | nop + iaddiu vi15, vi00, 0x6bf | nop +;; END of 0 VERSION of program 6 + +;; OTHER VERSION +L136: + ilw.x vi02, 328(vi03) | nop + lq.xyzw vf09, 8(vi14) | nop + iadd vi08, vi08, vi14 | nop + iadd vi09, vi09, vi14 | nop + lq.xyw vf28, 328(vi02) | nop + mtir vi06, vf03.x | nop + ilwr.x vi12, vi09 | nop + ilwr.z vi13, vi09 | nop + mtir vi04, vf28.w | subz.xyz vf24, vf28, vf02 + iaddiu vi11, vi00, 0x4000 | nop + iaddiu vi11, vi11, 0x4000 | nop + ilw.y vi02, 328(vi03) | nop + lq.xyzw vf12, 328(vi04) | nop + lq.xyzw vf20, 329(vi04) | nop + iaddiu vi12, vi12, 0x80 | nop + iadd vi13, vi13, vi08 | nop + lq.xyw vf28, 328(vi02) | itof0.xyzw vf12, vf12 + mfir.w vf24, vi06 | nop + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + lqi.xyzw vf31, vi13 | nop + sqi.xyzw vf05, vi06 | subz.xyz vf25, vf28, vf02 + sqi.xyzw vf29, vi06 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | nop + sqi.xyzw vf30, vi06 | maddax.xyzw ACC, vf04, vf12 + sqi.xyzw vf31, vi06 | nop + ilw.z vi02, 328(vi03) | nop + lq.xyzw vf13, 328(vi04) | madday.xyzw ACC, vf07, vf12 + lq.xyzw vf21, 329(vi04) | maddz.xyzw vf12, vf08, vf12 + lqi.xyzw vf29, vi13 | nop + lqi.xyzw vf30, vi13 | nop + lq.xyw vf28, 328(vi02) | itof0.xyzw vf13, vf13 + div Q, vf01.x, vf12.w | mul.xyzw vf16, vf12, vf11 + sqi.xyzw vf29, vi06 | nop + sqi.xyzw vf30, vi06 | nop + iadd vi01, vi12, vi12 | subz.xyz vf26, vf28, vf02 + iadd vi01, vi01, vi12 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | nop + iadd vi05, vi06, vi01 | maddax.xyzw ACC, vf04, vf13 + ior vi10, vi06, vi00 | mul.xyz vf12, vf12, Q + ilw.w vi02, 328(vi03) | mul.xyz vf24, vf24, Q + lq.xyzw vf14, 328(vi04) | madday.xyzw ACC, vf07, vf13 + lq.xyzw vf22, 329(vi04) | maddz.xyzw vf13, vf08, vf13 + sqi.xyzw vf06, vi06 | add.xyzw vf12, vf12, vf10 + isw.x vi12, -1(vi06) | nop + lq.xyw vf28, 328(vi02) | itof0.xyzw vf14, vf14 + div Q, vf01.x, vf13.w | mul.xyzw vf17, vf13, vf11 + iaddi vi09, vi09, 0x1 | miniz.w vf12, vf12, vf01 + ilwr.x vi12, vi09 | clipw.xyz vf16, vf16 +L137: + iaddi vi03, vi03, 0x1 | subz.xyz vf27, vf28, vf02 + iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | maxy.w vf12, vf12, vf01 + fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf14 + ibeq vi00, vi01, L138 | mul.xyz vf13, vf13, Q + ilw.x vi02, 328(vi03) | mul.xyz vf25, vf25, Q + nop | addw.w vf12, vf12, vf01 +L138: + lq.xyzw vf15, 328(vi04) | madday.xyzw ACC, vf07, vf14 + lq.xyzw vf23, 329(vi04) | maddz.xyzw vf14, vf08, vf14 + sqi.xyz vf24, vi06 | add.xyzw vf13, vf13, vf10 + sqi.xyzw vf20, vi06 | ftoi4.xyzw vf12, vf12 + lq.xyw vf28, 328(vi02) | itof0.xyzw vf15, vf15 + div Q, vf01.x, vf14.w | mul.xyzw vf18, vf14, vf11 + ibeq vi05, vi06, L142 | miniz.w vf13, vf13, vf01 + sqi.xyzw vf12, vi06 | clipw.xyz vf17, vf17 + nop | subz.xyz vf24, vf28, vf02 + iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | maxy.w vf13, vf13, vf01 + fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf15 + ibeq vi00, vi01, L139 | mul.xyz vf14, vf14, Q + ilw.y vi02, 328(vi03) | mul.xyz vf26, vf26, Q + nop | addw.w vf13, vf13, vf01 +L139: + lq.xyzw vf12, 328(vi04) | madday.xyzw ACC, vf07, vf15 + lq.xyzw vf20, 329(vi04) | maddz.xyzw vf15, vf08, vf15 + sqi.xyzw vf25, vi06 | add.xyzw vf14, vf14, vf10 + sqi.xyzw vf21, vi06 | ftoi4.xyzw vf13, vf13 + lq.xyw vf28, 328(vi02) | itof0.xyzw vf12, vf12 + div Q, vf01.x, vf15.w | mul.xyzw vf19, vf15, vf11 + ibeq vi05, vi06, L143 | miniz.w vf14, vf14, vf01 + sqi.xyzw vf13, vi06 | clipw.xyz vf18, vf18 + nop | subz.xyz vf25, vf28, vf02 + iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | maxy.w vf14, vf14, vf01 + fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf12 + ibeq vi00, vi01, L140 | mul.xyz vf15, vf15, Q + ilw.z vi02, 328(vi03) | mul.xyz vf27, vf27, Q + nop | addw.w vf14, vf14, vf01 +L140: + lq.xyzw vf13, 328(vi04) | madday.xyzw ACC, vf07, vf12 + lq.xyzw vf21, 329(vi04) | maddz.xyzw vf12, vf08, vf12 + sqi.xyzw vf26, vi06 | add.xyzw vf15, vf15, vf10 + sqi.xyzw vf22, vi06 | ftoi4.xyzw vf14, vf14 + lq.xyw vf28, 328(vi02) | itof0.xyzw vf13, vf13 + div Q, vf01.x, vf12.w | mul.xyzw vf16, vf12, vf11 + ibeq vi05, vi06, L144 | miniz.w vf15, vf15, vf01 + sqi.xyzw vf14, vi06 | clipw.xyz vf19, vf19 + nop | subz.xyz vf26, vf28, vf02 + iaddi vi07, vi07, 0x1 | mulaw.xyzw ACC, vf09, vf00 + mtir vi04, vf28.w | maxy.w vf15, vf15, vf01 + fcand vi01, 0x3ffff | maddax.xyzw ACC, vf04, vf13 + ibeq vi00, vi01, L141 | mul.xyz vf12, vf12, Q + ilw.w vi02, 328(vi03) | mul.xyz vf24, vf24, Q + nop | addw.w vf15, vf15, vf01 +L141: + lq.xyzw vf14, 328(vi04) | madday.xyzw ACC, vf07, vf13 + lq.xyzw vf22, 329(vi04) | maddz.xyzw vf13, vf08, vf13 + sqi.xyzw vf27, vi06 | add.xyzw vf12, vf12, vf10 + sqi.xyzw vf23, vi06 | ftoi4.xyzw vf15, vf15 + lq.xyw vf28, 328(vi02) | itof0.xyzw vf14, vf14 + div Q, vf01.x, vf13.w | mul.xyzw vf17, vf13, vf11 + ibne vi05, vi06, L137 | miniz.w vf12, vf12, vf01 + sqi.xyzw vf15, vi06 | clipw.xyz vf16, vf16 + b L122 | nop + iaddiu vi15, vi00, 0x707 | nop +L142: + b L122 | nop + iaddiu vi15, vi00, 0x716 | nop +L143: + b L122 | nop + iaddiu vi15, vi00, 0x725 | nop +L144: + b L122 | nop + iaddiu vi15, vi00, 0x734 | nop diff --git a/docs/scratch/tie.txt b/docs/scratch/tie.txt new file mode 100644 index 0000000000..f85a3017a8 --- /dev/null +++ b/docs/scratch/tie.txt @@ -0,0 +1,1041 @@ + b L9 | nop + mr32.xyzw vf21, vf21 | nop + b L42 | nop + nop | nop + b L43 | nop + nop | nop + b L1 | nop + +;; INITIALIZATION + iaddi vi02, vi00, 0x0 | subx.xz vf21, vf23, vf21 + iaddi vi01, vi00, 0x1 | nop + lq.xyzw vf21, 969(vi00) | nop + lq.xz vf23, 968(vi00) | nop + lq.xyzw vf22, 970(vi00) | nop + isw.x vi00, 971(vi00) | nop :e + isw.y vi01, 971(vi00) | nop + + +L1: + lq.xyz vf01, 966(vi00) | nop + ilwr.w vi04, vi02 | nop + ilw.w vi09, 1(vi02) | nop + mtir vi06, vf21.y | nop + lqi.xyzw vf02, vi02 | suby.xz vf21, vf21, vf21 + lqi.xyzw vf03, vi02 | nop + lqi.xyzw vf04, vi02 | nop + lqi.xyzw vf05, vi02 | nop + mtir vi05, vf21.x | nop + lqi.xyzw vf06, vi02 | subw.w vf01, vf01, vf01 +L2: + iadd vi03, vi04, vi05 | nop + iadd vi04, vi04, vi06 | nop + iaddi vi09, vi09, -0x1 | nop + sqi.xyzw vf01, vi03 | nop + sqi.xyzw vf02, vi03 | nop + sqi.xyzw vf03, vi03 | nop + sqi.xyzw vf04, vi03 | nop + sqi.xyzw vf05, vi03 | nop + sqi.xyzw vf06, vi03 | nop + sqi.xyzw vf01, vi04 | nop + sqi.xyzw vf02, vi04 | nop + sqi.xyzw vf03, vi04 | nop + sqi.xyzw vf04, vi04 | nop + sqi.xyzw vf05, vi04 | nop + sqi.xyzw vf06, vi04 | nop + ilwr.w vi04, vi02 | nop + lqi.xyzw vf02, vi02 | nop + lqi.xyzw vf03, vi02 | nop + lqi.xyzw vf04, vi02 | nop + lqi.xyzw vf05, vi02 | nop + ibgtz vi09, L2 | nop + lqi.xyzw vf06, vi02 | nop + mtir vi09, vf02.w | nop + iaddi vi02, vi02, -0x2 | subw.w vf07, vf07, vf07 + ilwr.x vi07, vi02 | nop + ilwr.y vi08, vi02 | nop + ilwr.z vi04, vi02 | nop + iaddi vi09, vi09, -0x1 | nop + iaddi vi02, vi02, 0x1 | nop + ibeq vi00, vi09, L4 | nop + lq.xyz vf07, 967(vi08) | nop +L3: + iadd vi03, vi04, vi05 | nop + iadd vi04, vi04, vi06 | nop + iaddi vi09, vi09, -0x1 | nop + sq.xyzw vf07, 0(vi03) | nop + iswr.x vi07, vi03 | nop + sq.xyzw vf07, 0(vi04) | nop + iswr.x vi07, vi04 | nop + ilwr.x vi07, vi02 | nop + ilwr.y vi08, vi02 | nop + ilwr.z vi04, vi02 | nop + iaddi vi02, vi02, 0x1 | nop + ibne vi00, vi09, L3 | nop + lq.xyz vf07, 967(vi08) | nop +L4: + iaddiu vi07, vi07, 0x4000 | nop + iaddiu vi07, vi07, 0x4000 | nop + iadd vi03, vi04, vi05 | nop + iadd vi04, vi04, vi06 | nop + sq.xyzw vf07, 0(vi03) | nop + iswr.x vi07, vi03 | nop + sq.xyzw vf07, 0(vi04) | nop + iswr.x vi07, vi04 | nop + mtir vi06, vf04.x | nop + lq.xyzw vf05, 50(vi00) | nop + lq.xyzw vf15, 51(vi00) | nop + iaddiu vi05, vi00, 0x34 | nop + nop | nop + iaddiu vi06, vi06, 0x32 | itof0.xyzw vf05, vf05 + lqi.xyzw vf06, vi05 | itof12.xyz vf15, vf15 + lqi.xyzw vf16, vi05 | itof0.w vf15, vf15 + 64.0 | nop :i + ibeq vi06, vi05, L6 | muli.xyz vf05, vf05, I + mtir vi07, vf04.y | itof0.xyzw vf06, vf06 +L5: + lqi.xyzw vf07, vi05 | itof12.xyz vf16, vf16 + lqi.xyzw vf17, vi05 | itof0.w vf16, vf16 + sq.xyzw vf15, -5(vi05) | nop + ibeq vi06, vi05, L6 | muli.xyz vf06, vf06, I + sq.xyzw vf05, -6(vi05) | itof0.xyzw vf07, vf07 + lqi.xyzw vf05, vi05 | itof12.xyz vf17, vf17 + lqi.xyzw vf15, vi05 | itof0.w vf17, vf17 + sq.xyzw vf16, -5(vi05) | nop + ibeq vi06, vi05, L6 | muli.xyz vf07, vf07, I + sq.xyzw vf06, -6(vi05) | itof0.xyzw vf05, vf05 + lqi.xyzw vf06, vi05 | itof12.xyz vf15, vf15 + lqi.xyzw vf16, vi05 | itof0.w vf15, vf15 + sq.xyzw vf17, -5(vi05) | nop + ibne vi06, vi05, L5 | muli.xyz vf05, vf05, I + sq.xyzw vf07, -6(vi05) | itof0.xyzw vf06, vf06 +L6: + lq.xyzw vf09, -4(vi05) | nop + lq.xyzw vf05, -3(vi05) | nop + lq.xyzw vf15, -2(vi05) | nop + iadd vi07, vi07, vi05 | nop + iaddi vi07, vi07, -0x4 | nop + iaddi vi05, vi05, -0x1 | nop + iaddi vi08, vi05, -0x3 | nop + ibeq vi07, vi05, L8 | nop + nop | itof0.xyzw vf09, vf09 + lqi.xyzw vf10, vi05 | itof0.xyzw vf05, vf05 + lqi.xyzw vf06, vi05 | itof0.w vf15, vf15 + lqi.xyzw vf16, vi05 | itof12.xyz vf15, vf15 + nop | nop + nop | muli.xyz vf09, vf09, I + ibeq vi07, vi05, L8 | muli.xyz vf05, vf05, I + nop | itof0.xyzw vf10, vf10 +L7: + lqi.xyzw vf11, vi05 | itof0.xyzw vf06, vf06 + lqi.xyzw vf07, vi05 | itof0.w vf16, vf16 + lqi.xyzw vf17, vi05 | itof12.xyz vf16, vf16 + sqi.xyzw vf09, vi08 | nop + sqi.xyzw vf05, vi08 | muli.xyz vf10, vf10, I + ibeq vi07, vi05, L8 | muli.xyz vf06, vf06, I + sqi.xyzw vf15, vi08 | itof0.xyzw vf11, vf11 + lqi.xyzw vf09, vi05 | itof0.xyzw vf07, vf07 + lqi.xyzw vf05, vi05 | itof0.w vf17, vf17 + lqi.xyzw vf15, vi05 | itof12.xyz vf17, vf17 + sqi.xyzw vf10, vi08 | nop + sqi.xyzw vf06, vi08 | muli.xyz vf11, vf11, I + ibeq vi07, vi05, L8 | muli.xyz vf07, vf07, I + sqi.xyzw vf16, vi08 | itof0.xyzw vf09, vf09 + lqi.xyzw vf10, vi05 | itof0.xyzw vf05, vf05 + lqi.xyzw vf06, vi05 | itof0.w vf15, vf15 + lqi.xyzw vf16, vi05 | itof12.xyz vf15, vf15 + sqi.xyzw vf11, vi08 | nop + sqi.xyzw vf07, vi08 | muli.xyz vf09, vf09, I + ibne vi07, vi05, L7 | muli.xyz vf05, vf05, I + sqi.xyzw vf17, vi08 | itof0.xyzw vf10, vf10 +L8: + mtir vi01, vf04.z | nop + mtir vi05, vf02.x | nop + mtir vi14, vf02.y | nop + mtir vi04, vf03.x | nop + mtir vi06, vf03.y | nop + mtir vi07, vf03.z | nop + mtir vi08, vf03.w | nop + isw.x vi01, 971(vi00) | nop + iaddi vi15, vi00, 0x0 | nop + mtir vi03, vf22.x | nop + iaddiu vi02, vi00, 0x32 | nop + mr32.xyzw vf21, vf21 | nop + mfir.y vf23, vi00 | nop :e + mfir.w vf23, vi00 | nop +L9: + lqi.xyzw vf05, vi02 | nop + lq.xyzw vf24, 6(vi03) | nop + lq.xyzw vf13, 3(vi03) | nop + lq.xyzw vf01, 0(vi03) | nop + lq.xyzw vf02, 1(vi03) | nop + lq.xyzw vf12, 2(vi03) | nop + mtir vi09, vf24.x | nop + lqi.xyzw vf15, vi02 | mulaw.xyzw ACC, vf13, vf00 + lq.xyzw vf27, 4(vi03) | maddax.xyzw ACC, vf01, vf05 + ilw.x vi01, 5(vi03) | madday.xyzw ACC, vf02, vf05 + lq.xyzw vf11, 838(vi09) | maddz.xyzw vf09, vf12, vf05 + lqi.xyzw vf06, vi02 | nop + lq.xyzw vf19, 5(vi03) | nop + iaddi vi03, vi03, 0x7 | nop + mtir vi09, vf24.y | addx.w vf20, vf19, vf00 + div Q, vf00.w, vf09.w | mulaw.xyzw ACC, vf13, vf00 + lqi.xyzw vf16, vi02 | maddax.xyzw ACC, vf01, vf06 + mtir vi01, vf21.x | madday.xyzw ACC, vf02, vf06 + lq.xyzw vf03, 838(vi09) | maddz.xyzw vf10, vf12, vf06 + isub vi01, vi01, vi15 | ftoi4.w vf19, vf19 + iadd vi04, vi04, vi01 | ftoi4.w vf20, vf20 + iadd vi06, vi06, vi01 | nop + lqi.xyzw vf07, vi02 | mul.xyz vf09, vf09, Q + div Q, vf00.w, vf10.w | mul.xyz vf15, vf15, Q + mtir vi09, vf24.z | addx.w vf05, vf05, vf21 + lqi.xyzw vf17, vi02 | mulaw.xyzw ACC, vf13, vf00 + iadd vi07, vi07, vi01 | maddax.xyzw ACC, vf01, vf07 + iadd vi08, vi08, vi01 | madday.xyzw ACC, vf02, vf07 + lq.xyzw vf04, 838(vi09) | ftoi4.xyz vf19, vf09 + ibeq vi04, vi12, L40 | maddz.xyzw vf09, vf12, vf07 + iadd vi15, vi15, vi01 | nop +L10: + mtir vi12, vf05.w | nop + lqi.xyzw vf08, vi02 | mul.xyz vf10, vf10, Q + div Q, vf00.w, vf09.w | mul.xyz vf16, vf16, Q + mtir vi09, vf24.w | addx.w vf06, vf06, vf21 + lqi.xyzw vf18, vi02 | mulaw.xyzw ACC, vf13, vf00 + sq.xyzw vf15, 0(vi12) | maddax.xyzw ACC, vf01, vf08 + sq.xyzw vf11, 1(vi12) | madday.xyzw ACC, vf02, vf08 + lq.xyzw vf14, 838(vi09) | ftoi4.xyz vf20, vf10 + ibeq vi04, vi12, L13 | maddz.xyzw vf10, vf12, vf08 + sq.xyzw vf19, 2(vi12) | nop + lqi.xyzw vf24, vi03 | nop + mtir vi12, vf06.w | nop + lqi.xyzw vf05, vi02 | mul.xyz vf09, vf09, Q + div Q, vf00.w, vf10.w | mul.xyz vf17, vf17, Q + mtir vi09, vf24.x | addx.w vf07, vf07, vf21 + lqi.xyzw vf15, vi02 | mulaw.xyzw ACC, vf13, vf00 + sq.xyzw vf16, 0(vi12) | maddax.xyzw ACC, vf01, vf05 + sq.xyzw vf03, 1(vi12) | madday.xyzw ACC, vf02, vf05 + lq.xyzw vf11, 838(vi09) | ftoi4.xyz vf19, vf09 + ibeq vi04, vi12, L12 | maddz.xyzw vf09, vf12, vf05 + sq.xyzw vf20, 2(vi12) | nop + mtir vi12, vf07.w | nop + lqi.xyzw vf06, vi02 | mul.xyz vf10, vf10, Q + div Q, vf00.w, vf09.w | mul.xyz vf18, vf18, Q + mtir vi09, vf24.y | addx.w vf08, vf08, vf21 + lqi.xyzw vf16, vi02 | mulaw.xyzw ACC, vf13, vf00 + sq.xyzw vf17, 0(vi12) | maddax.xyzw ACC, vf01, vf06 + sq.xyzw vf04, 1(vi12) | madday.xyzw ACC, vf02, vf06 + lq.xyzw vf03, 838(vi09) | ftoi4.xyz vf20, vf10 + ibeq vi04, vi12, L11 | maddz.xyzw vf10, vf12, vf06 + sq.xyzw vf19, 2(vi12) | nop + mtir vi12, vf08.w | nop + lqi.xyzw vf07, vi02 | mul.xyz vf09, vf09, Q + div Q, vf00.w, vf10.w | mul.xyz vf15, vf15, Q + mtir vi09, vf24.z | addx.w vf05, vf05, vf21 + lqi.xyzw vf17, vi02 | mulaw.xyzw ACC, vf13, vf00 + sq.xyzw vf18, 0(vi12) | maddax.xyzw ACC, vf01, vf07 + sq.xyzw vf14, 1(vi12) | madday.xyzw ACC, vf02, vf07 + lq.xyzw vf04, 838(vi09) | ftoi4.xyz vf19, vf09 + ibne vi04, vi12, L10 | maddz.xyzw vf09, vf12, vf07 + sq.xyzw vf20, 2(vi12) | nop + mtir vi12, vf05.w | nop + ibne vi00, vi05, L24 | mul.xyz vf10, vf10, Q + lqi.xyzw vf08, vi02 | mul.xyz vf16, vf16, Q + div Q, vf00.w, vf09.w | addx.w vf06, vf06, vf21 + mtir vi09, vf24.w | mulaw.xyzw ACC, vf13, vf00 + lqi.xyzw vf24, vi03 | nop + sq.xyzw vf15, 0(vi12) | addx.w vf07, vf07, vf21 + sq.xyzw vf11, 1(vi12) | maddax.xyzw ACC, vf01, vf08 + lqi.xyzw vf18, vi02 | madday.xyzw ACC, vf02, vf08 + lq.xyzw vf14, 838(vi09) | ftoi4.xyz vf20, vf10 + lqi.xyzw vf05, vi02 | maddz.xyzw vf10, vf12, vf08 + sq.xyzw vf19, 2(vi12) | mul.xyz vf09, vf09, Q + mtir vi12, vf06.w | mul.xyz vf17, vf17, Q + lqi.xyzw vf15, vi02 | mulaw.xyzw ACC, vf13, vf00 + mtir vi09, vf24.x | maddax.xyzw ACC, vf01, vf05 + nop | madday.xyzw ACC, vf02, vf05 + div Q, vf00.w, vf10.w | ftoi4.xyz vf19, vf09 + sq.xyzw vf16, 0(vi12) | maddz.xyzw vf09, vf12, vf05 + sq.xyzw vf03, 1(vi12) | nop + sq.xyzw vf20, 2(vi12) | nop + mtir vi12, vf07.w | nop + lq.xyzw vf11, 838(vi09) | addx.w vf08, vf08, vf21 + div Q, vf00.w, vf09.w | mul.xyz vf10, vf10, Q + sq.xyzw vf17, 0(vi12) | mul.xyz vf18, vf18, Q + sq.xyzw vf04, 1(vi12) | addx.w vf18, vf18, vf21 + sq.xyzw vf19, 2(vi12) | nop + b L14 | ftoi4.xyz vf20, vf10 + mtir vi12, vf08.w | nop +L11: + mtir vi12, vf08.w | nop + ibne vi00, vi05, L23 | mul.xyz vf09, vf09, Q + lqi.xyzw vf07, vi02 | mul.xyz vf15, vf15, Q + div Q, vf00.w, vf10.w | addx.w vf05, vf05, vf21 + mtir vi09, vf24.z | mulaw.xyzw ACC, vf13, vf00 + sq.xyzw vf18, 0(vi12) | nop + sq.xyzw vf14, 1(vi12) | maddax.xyzw ACC, vf01, vf07 + lqi.xyzw vf17, vi02 | madday.xyzw ACC, vf02, vf07 + lq.xyzw vf04, 838(vi09) | ftoi4.xyz vf19, vf09 + lqi.xyzw vf08, vi02 | maddz.xyzw vf09, vf12, vf07 + sq.xyzw vf20, 2(vi12) | mul.xyz vf10, vf10, Q + mtir vi12, vf05.w | mul.xyz vf16, vf16, Q + lqi.xyzw vf18, vi02 | mulaw.xyzw ACC, vf13, vf00 + div Q, vf00.w, vf09.w | maddax.xyzw ACC, vf01, vf08 + mtir vi09, vf24.w | addx.w vf06, vf06, vf21 + lqi.xyzw vf24, vi03 | nop + sq.xyzw vf15, 0(vi12) | madday.xyzw ACC, vf02, vf08 + sq.xyzw vf11, 1(vi12) | ftoi4.xyz vf20, vf10 + sq.xyzw vf19, 2(vi12) | maddz.xyzw vf10, vf12, vf08 + mtir vi12, vf06.w | nop + lq.xyzw vf14, 838(vi09) | addx.w vf07, vf07, vf21 + div Q, vf00.w, vf10.w | mul.xyz vf09, vf09, Q + sq.xyzw vf16, 0(vi12) | mul.xyz vf17, vf17, Q + sq.xyzw vf03, 1(vi12) | addx.w vf17, vf17, vf21 + sq.xyzw vf20, 2(vi12) | nop + b L17 | ftoi4.xyz vf19, vf09 + mtir vi12, vf07.w | nop +L12: + mtir vi12, vf07.w | nop + ibne vi00, vi05, L22 | mul.xyz vf10, vf10, Q + lqi.xyzw vf06, vi02 | mul.xyz vf18, vf18, Q + div Q, vf00.w, vf09.w | addx.w vf08, vf08, vf21 + mtir vi09, vf24.y | mulaw.xyzw ACC, vf13, vf00 + sq.xyzw vf17, 0(vi12) | nop + sq.xyzw vf04, 1(vi12) | maddax.xyzw ACC, vf01, vf06 + lqi.xyzw vf16, vi02 | madday.xyzw ACC, vf02, vf06 + lq.xyzw vf03, 838(vi09) | ftoi4.xyz vf20, vf10 + lqi.xyzw vf07, vi02 | maddz.xyzw vf10, vf12, vf06 + sq.xyzw vf19, 2(vi12) | mul.xyz vf09, vf09, Q + mtir vi12, vf08.w | mul.xyz vf15, vf15, Q + lqi.xyzw vf17, vi02 | mulaw.xyzw ACC, vf13, vf00 + div Q, vf00.w, vf10.w | maddax.xyzw ACC, vf01, vf07 + mtir vi09, vf24.z | addx.w vf05, vf05, vf21 + sq.xyzw vf18, 0(vi12) | madday.xyzw ACC, vf02, vf07 + sq.xyzw vf14, 1(vi12) | ftoi4.xyz vf19, vf09 + sq.xyzw vf20, 2(vi12) | maddz.xyzw vf09, vf12, vf07 + mtir vi12, vf05.w | nop + lq.xyzw vf04, 838(vi09) | addx.w vf06, vf06, vf21 + sq.xyzw vf15, 0(vi12) | mul.xyz vf10, vf10, Q + div Q, vf00.w, vf09.w | mul.xyz vf16, vf16, Q + sq.xyzw vf11, 1(vi12) | addx.w vf16, vf16, vf21 + sq.xyzw vf19, 2(vi12) | nop + b L16 | ftoi4.xyz vf20, vf10 + mtir vi12, vf06.w | nop +L13: + lqi.xyzw vf24, vi03 | nop + mtir vi12, vf06.w | nop + ibne vi00, vi05, L21 | mul.xyz vf09, vf09, Q + lqi.xyzw vf05, vi02 | mul.xyz vf17, vf17, Q + div Q, vf00.w, vf10.w | addx.w vf07, vf07, vf21 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf13, vf00 + sq.xyzw vf16, 0(vi12) | maddax.xyzw ACC, vf01, vf05 + sq.xyzw vf03, 1(vi12) | madday.xyzw ACC, vf02, vf05 + lqi.xyzw vf15, vi02 | ftoi4.xyz vf19, vf09 + lq.xyzw vf11, 838(vi09) | maddz.xyzw vf09, vf12, vf05 + lqi.xyzw vf06, vi02 | addx.w vf08, vf08, vf21 + sq.xyzw vf20, 2(vi12) | mul.xyz vf10, vf10, Q + mtir vi12, vf07.w | mul.xyz vf18, vf18, Q + div Q, vf00.w, vf09.w | mulaw.xyzw ACC, vf13, vf00 + mtir vi09, vf24.y | maddax.xyzw ACC, vf01, vf06 + lqi.xyzw vf16, vi02 | madday.xyzw ACC, vf02, vf06 + sq.xyzw vf17, 0(vi12) | ftoi4.xyz vf20, vf10 + sq.xyzw vf04, 1(vi12) | maddz.xyzw vf10, vf12, vf06 + sq.xyzw vf19, 2(vi12) | nop + mtir vi12, vf08.w | nop + lq.xyzw vf03, 838(vi09) | addx.w vf05, vf05, vf21 + div Q, vf00.w, vf10.w | mul.xyz vf09, vf09, Q + sq.xyzw vf18, 0(vi12) | mul.xyz vf15, vf15, Q + sq.xyzw vf14, 1(vi12) | addx.w vf15, vf15, vf21 + sq.xyzw vf20, 2(vi12) | nop + b L15 | ftoi4.xyz vf19, vf09 + mtir vi12, vf05.w | nop +L14: + lqi.xyzw vf06, vi02 | nop + mtir vi09, vf24.y | nop + mtir vi13, vf18.w | mulaw.xyzw ACC, vf13, vf00 + sq.xyzw vf18, 0(vi12) | addx.w vf05, vf05, vf21 + sq.xyzw vf14, 1(vi12) | maddax.xyzw ACC, vf01, vf06 + sq.xyzw vf20, 2(vi12) | madday.xyzw ACC, vf02, vf06 + lqi.xyzw vf16, vi02 | maddz.xyzw vf10, vf12, vf06 + lq.xyzw vf03, 838(vi09) | mul.xyz vf09, vf09, Q + sq.xyzw vf18, 0(vi13) | mul.xyz vf15, vf15, Q + sq.xyzw vf14, 1(vi13) | addx.w vf15, vf15, vf21 + sq.xyzw vf20, 2(vi13) | nop + div Q, vf00.w, vf10.w | nop + ibeq vi06, vi12, L18 | ftoi4.xyz vf19, vf09 + mtir vi12, vf05.w | nop +L15: + lqi.xyzw vf07, vi02 | nop + mtir vi09, vf24.z | nop + mtir vi13, vf15.w | mulaw.xyzw ACC, vf13, vf00 + sq.xyzw vf15, 0(vi12) | addx.w vf06, vf06, vf21 + sq.xyzw vf11, 1(vi12) | maddax.xyzw ACC, vf01, vf07 + sq.xyzw vf19, 2(vi12) | madday.xyzw ACC, vf02, vf07 + lqi.xyzw vf17, vi02 | maddz.xyzw vf09, vf12, vf07 + lq.xyzw vf04, 838(vi09) | mul.xyz vf10, vf10, Q + sq.xyzw vf15, 0(vi13) | mul.xyz vf16, vf16, Q + sq.xyzw vf11, 1(vi13) | addx.w vf16, vf16, vf21 + sq.xyzw vf19, 2(vi13) | nop + div Q, vf00.w, vf09.w | nop + ibeq vi06, vi12, L19 | ftoi4.xyz vf20, vf10 + mtir vi12, vf06.w | nop +L16: + lqi.xyzw vf08, vi02 | nop + mtir vi09, vf24.w | nop + lqi.xyzw vf24, vi03 | nop + mtir vi13, vf16.w | mulaw.xyzw ACC, vf13, vf00 + sq.xyzw vf16, 0(vi12) | addx.w vf07, vf07, vf21 + sq.xyzw vf03, 1(vi12) | maddax.xyzw ACC, vf01, vf08 + sq.xyzw vf20, 2(vi12) | madday.xyzw ACC, vf02, vf08 + lqi.xyzw vf18, vi02 | maddz.xyzw vf10, vf12, vf08 + lq.xyzw vf14, 838(vi09) | mul.xyz vf09, vf09, Q + sq.xyzw vf16, 0(vi13) | mul.xyz vf17, vf17, Q + sq.xyzw vf03, 1(vi13) | addx.w vf17, vf17, vf21 + sq.xyzw vf20, 2(vi13) | nop + div Q, vf00.w, vf10.w | nop + ibeq vi06, vi12, L20 | ftoi4.xyz vf19, vf09 + mtir vi12, vf07.w | nop +L17: + lqi.xyzw vf05, vi02 | nop + mtir vi09, vf24.x | nop + mtir vi13, vf17.w | mulaw.xyzw ACC, vf13, vf00 + sq.xyzw vf17, 0(vi12) | addx.w vf08, vf08, vf21 + sq.xyzw vf04, 1(vi12) | maddax.xyzw ACC, vf01, vf05 + sq.xyzw vf19, 2(vi12) | madday.xyzw ACC, vf02, vf05 + lqi.xyzw vf15, vi02 | maddz.xyzw vf09, vf12, vf05 + lq.xyzw vf11, 838(vi09) | mul.xyz vf10, vf10, Q + sq.xyzw vf17, 0(vi13) | mul.xyz vf18, vf18, Q + sq.xyzw vf04, 1(vi13) | addx.w vf18, vf18, vf21 + sq.xyzw vf19, 2(vi13) | nop + div Q, vf00.w, vf09.w | nop + ibne vi06, vi12, L14 | ftoi4.xyz vf20, vf10 + mtir vi12, vf08.w | nop + nop | nop + nop | nop + mtir vi13, vf18.w | nop + sq.xyzw vf18, 0(vi12) | addx.w vf05, vf05, vf21 + sq.xyzw vf14, 1(vi12) | addx.w vf15, vf15, vf21 + sq.xyzw vf20, 2(vi12) | mul.xyz vf09, vf09, Q + sq.xyzw vf18, 0(vi13) | mul.xyz vf15, vf15, Q + sq.xyzw vf14, 1(vi13) | nop + sq.xyzw vf20, 2(vi13) | nop + mtir vi12, vf05.w | ftoi4.xyz vf19, vf09 + mtir vi13, vf15.w | nop + sq.xyzw vf15, 0(vi12) | nop + sq.xyzw vf11, 1(vi12) | nop + sq.xyzw vf19, 2(vi12) | nop + sq.xyzw vf15, 0(vi13) | nop + sq.xyzw vf11, 1(vi13) | nop + ibeq vi00, vi14, L26 | nop + sq.xyzw vf19, 2(vi13) | nop + b L40 | nop + nop | nop +L18: + nop | nop + nop | nop + mtir vi13, vf15.w | nop + sq.xyzw vf15, 0(vi12) | addx.w vf06, vf06, vf21 + sq.xyzw vf11, 1(vi12) | addx.w vf16, vf16, vf21 + sq.xyzw vf19, 2(vi12) | mul.xyz vf10, vf10, Q + sq.xyzw vf15, 0(vi13) | mul.xyz vf16, vf16, Q + sq.xyzw vf11, 1(vi13) | nop + sq.xyzw vf19, 2(vi13) | nop + mtir vi12, vf06.w | ftoi4.xyz vf20, vf10 + mtir vi13, vf16.w | nop + sq.xyzw vf16, 0(vi12) | nop + sq.xyzw vf03, 1(vi12) | nop + sq.xyzw vf20, 2(vi12) | nop + sq.xyzw vf16, 0(vi13) | nop + sq.xyzw vf03, 1(vi13) | nop + ibeq vi00, vi14, L27 | nop + sq.xyzw vf20, 2(vi13) | nop + b L40 | nop + nop | nop +L19: + nop | nop + nop | nop + mtir vi13, vf16.w | nop + sq.xyzw vf16, 0(vi12) | addx.w vf07, vf07, vf21 + sq.xyzw vf03, 1(vi12) | addx.w vf17, vf17, vf21 + sq.xyzw vf20, 2(vi12) | mul.xyz vf09, vf09, Q + sq.xyzw vf16, 0(vi13) | mul.xyz vf17, vf17, Q + sq.xyzw vf03, 1(vi13) | nop + sq.xyzw vf20, 2(vi13) | nop + mtir vi12, vf07.w | ftoi4.xyz vf19, vf09 + mtir vi13, vf17.w | nop + sq.xyzw vf17, 0(vi12) | nop + sq.xyzw vf04, 1(vi12) | nop + sq.xyzw vf19, 2(vi12) | nop + sq.xyzw vf17, 0(vi13) | nop + sq.xyzw vf04, 1(vi13) | nop + ibeq vi00, vi14, L28 | nop + sq.xyzw vf19, 2(vi13) | nop + b L40 | nop + nop | nop +L20: + nop | nop + nop | nop + mtir vi13, vf17.w | nop + sq.xyzw vf17, 0(vi12) | addx.w vf08, vf08, vf21 + sq.xyzw vf04, 1(vi12) | addx.w vf18, vf18, vf21 + sq.xyzw vf19, 2(vi12) | mul.xyz vf10, vf10, Q + sq.xyzw vf17, 0(vi13) | mul.xyz vf18, vf18, Q + sq.xyzw vf04, 1(vi13) | nop + sq.xyzw vf19, 2(vi13) | nop + mtir vi12, vf08.w | ftoi4.xyz vf20, vf10 + mtir vi13, vf18.w | nop + sq.xyzw vf18, 0(vi12) | nop + sq.xyzw vf14, 1(vi12) | nop + sq.xyzw vf20, 2(vi12) | nop + sq.xyzw vf18, 0(vi13) | nop + sq.xyzw vf14, 1(vi13) | nop + ibeq vi00, vi14, L25 | nop + sq.xyzw vf20, 2(vi13) | nop + b L40 | nop + nop | nop +L21: + div Q, vf00.w, vf10.w | addx.w vf07, vf07, vf21 + sq.xyzw vf16, 0(vi12) | addx.w vf08, vf08, vf21 + sq.xyzw vf03, 1(vi12) | nop + sq.xyzw vf20, 2(vi12) | nop + iaddi vi02, vi02, -0x1 | ftoi4.xyz vf19, vf09 + nop | nop + nop | nop + nop | mul.xyz vf10, vf10, Q + mtir vi12, vf07.w | mul.xyz vf18, vf18, Q + mtir vi13, vf08.w | nop + sq.xyzw vf17, 0(vi12) | nop + sq.xyzw vf04, 1(vi12) | nop + sq.xyzw vf19, 2(vi12) | ftoi4.xyz vf20, vf10 + sq.xyzw vf18, 0(vi13) | nop + sq.xyzw vf14, 1(vi13) | nop + ibeq vi00, vi14, L25 | nop + sq.xyzw vf20, 2(vi13) | nop + b L40 | nop + nop | nop +L22: + div Q, vf00.w, vf09.w | addx.w vf08, vf08, vf21 + sq.xyzw vf17, 0(vi12) | addx.w vf05, vf05, vf21 + sq.xyzw vf04, 1(vi12) | nop + sq.xyzw vf19, 2(vi12) | nop + iaddi vi02, vi02, -0x1 | ftoi4.xyz vf20, vf10 + nop | nop + nop | nop + nop | mul.xyz vf09, vf09, Q + mtir vi12, vf08.w | mul.xyz vf15, vf15, Q + mtir vi13, vf05.w | nop + sq.xyzw vf18, 0(vi12) | nop + sq.xyzw vf14, 1(vi12) | nop + sq.xyzw vf20, 2(vi12) | ftoi4.xyz vf19, vf09 + sq.xyzw vf15, 0(vi13) | nop + sq.xyzw vf11, 1(vi13) | nop + ibeq vi00, vi14, L26 | nop + sq.xyzw vf19, 2(vi13) | nop + b L40 | nop + nop | nop +L23: + div Q, vf00.w, vf10.w | addx.w vf05, vf05, vf21 + sq.xyzw vf18, 0(vi12) | addx.w vf06, vf06, vf21 + sq.xyzw vf14, 1(vi12) | nop + sq.xyzw vf20, 2(vi12) | nop + iaddi vi02, vi02, -0x1 | ftoi4.xyz vf19, vf09 + nop | nop + nop | nop + nop | mul.xyz vf10, vf10, Q + mtir vi12, vf05.w | mul.xyz vf16, vf16, Q + mtir vi13, vf06.w | nop + sq.xyzw vf15, 0(vi12) | nop + sq.xyzw vf11, 1(vi12) | nop + sq.xyzw vf19, 2(vi12) | ftoi4.xyz vf20, vf10 + sq.xyzw vf16, 0(vi13) | nop + sq.xyzw vf03, 1(vi13) | nop + ibeq vi00, vi14, L27 | nop + sq.xyzw vf20, 2(vi13) | nop + b L40 | nop + nop | nop +L24: + div Q, vf00.w, vf09.w | addx.w vf06, vf06, vf21 + sq.xyzw vf15, 0(vi12) | addx.w vf07, vf07, vf21 + sq.xyzw vf11, 1(vi12) | nop + sq.xyzw vf19, 2(vi12) | nop + iaddi vi02, vi02, -0x1 | ftoi4.xyz vf20, vf10 + nop | nop + nop | nop + nop | mul.xyz vf09, vf09, Q + mtir vi12, vf06.w | mul.xyz vf17, vf17, Q + mtir vi13, vf07.w | nop + sq.xyzw vf16, 0(vi12) | nop + sq.xyzw vf03, 1(vi12) | nop + sq.xyzw vf20, 2(vi12) | ftoi4.xyz vf19, vf09 + sq.xyzw vf17, 0(vi13) | nop + sq.xyzw vf04, 1(vi13) | nop + ibeq vi00, vi14, L28 | nop + sq.xyzw vf19, 2(vi13) | nop + b L40 | nop + nop | nop +L25: + lqi.xyzw vf05, vi02 | nop + 0.5 | subw.w vf28, vf00, vf00 :i + lqi.xyz vf28, vi02 | addi.y vf27, vf00, I + lqi.xyzw vf15, vi02 | nop + mtir vi09, vf24.x | nop + mtir vi10, vf24.y | addx.w vf28, vf28, vf21 + mtir vi11, vf24.z | mulx.xyz vf05, vf05, vf27 + nop | nop + lq.xyzw vf11, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | add.xyzw vf05, vf05, vf28 + lq.xyzw vf30, 838(vi11) | nop + nop | nop + nop | nop + nop | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 + lqi.xyzw vf06, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf05 + lqi.xyzw vf16, vi02 | madday.xyzw ACC, vf02, vf05 + nop | maddz.xyzw vf09, vf12, vf05 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddz.xyzw vf11, vf11, vf27 + mtir vi11, vf24.z | mulx.xyz vf06, vf06, vf27 + nop | nop + lq.xyzw vf03, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | add.xyzw vf06, vf06, vf28 + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf09.w | nop + nop | nop + nop | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 + lqi.xyzw vf07, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf06 + lqi.xyzw vf17, vi02 | madday.xyzw ACC, vf02, vf06 + nop | maddz.xyzw vf10, vf12, vf06 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | mulx.xyz vf07, vf07, vf27 + mtir vi11, vf24.z | maddz.xyzw vf03, vf03, vf27 + nop | mul.xyz vf19, vf09, Q + lq.xyzw vf04, 838(vi09) | mul.xyz vf15, vf15, Q + lq.xyzw vf29, 838(vi10) | add.xyzw vf07, vf07, vf28 + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf10.w | ftoi4.xyz vf19, vf19 + nop | nop + b L29 | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 +L26: + lqi.xyzw vf06, vi02 | nop + lqi.xyzw vf24, vi03 | subw.w vf28, vf00, vf00 + 0.5 | nop :i + lqi.xyz vf28, vi02 | addi.y vf27, vf00, I + lqi.xyzw vf16, vi02 | nop + mtir vi09, vf24.x | addx.w vf28, vf28, vf21 + mtir vi10, vf24.y | nop + mtir vi11, vf24.z | mulx.xyz vf06, vf06, vf27 + nop | nop + lq.xyzw vf03, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | add.xyzw vf06, vf06, vf28 + lq.xyzw vf30, 838(vi11) | nop + nop | nop + nop | nop + nop | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 + lqi.xyzw vf07, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf06 + lqi.xyzw vf17, vi02 | madday.xyzw ACC, vf02, vf06 + nop | maddz.xyzw vf10, vf12, vf06 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddz.xyzw vf03, vf03, vf27 + mtir vi11, vf24.z | mulx.xyz vf07, vf07, vf27 + nop | nop + lq.xyzw vf04, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | add.xyzw vf07, vf07, vf28 + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf10.w | nop + nop | nop + nop | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 + lqi.xyzw vf08, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf07 + lqi.xyzw vf18, vi02 | madday.xyzw ACC, vf02, vf07 + nop | maddz.xyzw vf09, vf12, vf07 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | mulx.xyz vf08, vf08, vf27 + mtir vi11, vf24.z | maddz.xyzw vf04, vf04, vf27 + nop | mul.xyz vf20, vf10, Q + lq.xyzw vf14, 838(vi09) | mul.xyz vf16, vf16, Q + lq.xyzw vf29, 838(vi10) | add.xyzw vf08, vf08, vf28 + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf09.w | ftoi4.xyz vf20, vf20 + nop | nop + b L30 | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 +L27: + lqi.xyzw vf07, vi02 | nop + lqi.xyzw vf24, vi03 | subw.w vf28, vf00, vf00 + 0.5 | nop :i + lqi.xyz vf28, vi02 | addi.y vf27, vf00, I + lqi.xyzw vf17, vi02 | nop + mtir vi09, vf24.x | addx.w vf28, vf28, vf21 + mtir vi10, vf24.y | nop + mtir vi11, vf24.z | mulx.xyz vf07, vf07, vf27 + nop | nop + lq.xyzw vf04, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | add.xyzw vf07, vf07, vf28 + lq.xyzw vf30, 838(vi11) | nop + nop | nop + nop | nop + nop | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 + lqi.xyzw vf08, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf07 + lqi.xyzw vf18, vi02 | madday.xyzw ACC, vf02, vf07 + nop | maddz.xyzw vf09, vf12, vf07 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddz.xyzw vf04, vf04, vf27 + mtir vi11, vf24.z | mulx.xyz vf08, vf08, vf27 + nop | nop + lq.xyzw vf14, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | add.xyzw vf08, vf08, vf28 + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf09.w | nop + nop | nop + nop | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 + lqi.xyzw vf05, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf08 + lqi.xyzw vf15, vi02 | madday.xyzw ACC, vf02, vf08 + nop | maddz.xyzw vf10, vf12, vf08 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | mulx.xyz vf05, vf05, vf27 + mtir vi11, vf24.z | maddz.xyzw vf14, vf14, vf27 + nop | mul.xyz vf19, vf09, Q + lq.xyzw vf11, 838(vi09) | mul.xyz vf17, vf17, Q + lq.xyzw vf29, 838(vi10) | add.xyzw vf05, vf05, vf28 + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf10.w | ftoi4.xyz vf19, vf19 + nop | nop + b L31 | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 +L28: + lqi.xyzw vf08, vi02 | nop + lqi.xyzw vf24, vi03 | subw.w vf28, vf00, vf00 + 0.5 | nop :i + lqi.xyz vf28, vi02 | addi.y vf27, vf00, I + lqi.xyzw vf18, vi02 | nop + mtir vi09, vf24.x | addx.w vf28, vf28, vf21 + mtir vi10, vf24.y | nop + mtir vi11, vf24.z | mulx.xyz vf08, vf08, vf27 + nop | nop + lq.xyzw vf14, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | add.xyzw vf08, vf08, vf28 + lq.xyzw vf30, 838(vi11) | nop + nop | nop + nop | nop + nop | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 + lqi.xyzw vf05, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf08 + lqi.xyzw vf15, vi02 | madday.xyzw ACC, vf02, vf08 + nop | maddz.xyzw vf10, vf12, vf08 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddz.xyzw vf14, vf14, vf27 + mtir vi11, vf24.z | mulx.xyz vf05, vf05, vf27 + nop | nop + lq.xyzw vf11, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | add.xyzw vf05, vf05, vf28 + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf10.w | nop + nop | nop + nop | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 + lqi.xyzw vf06, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf05 + lqi.xyzw vf16, vi02 | madday.xyzw ACC, vf02, vf05 + nop | maddz.xyzw vf09, vf12, vf05 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | mulx.xyz vf06, vf06, vf27 + mtir vi11, vf24.z | maddz.xyzw vf11, vf11, vf27 + nop | mul.xyz vf20, vf10, Q + lq.xyzw vf03, 838(vi09) | mul.xyz vf18, vf18, Q + lq.xyzw vf29, 838(vi10) | add.xyzw vf06, vf06, vf28 + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf09.w | ftoi4.xyz vf20, vf20 + nop | nop + b L32 | mulay.xyzw ACC, vf29, vf27 + lqi.xyzw vf24, vi03 | maddy.xyzw vf29, vf30, vf27 +L29: + lqi.xyzw vf08, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf07 + lqi.xyzw vf18, vi02 | madday.xyzw ACC, vf02, vf07 + mtir vi12, vf05.w | maddz.xyzw vf09, vf12, vf07 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddz.xyzw vf04, vf04, vf27 + mtir vi11, vf24.z | mulx.xyz vf08, vf08, vf27 + sq.xyzw vf15, 0(vi12) | mul.xyz vf20, vf10, Q + lq.xyzw vf14, 838(vi09) | mul.xyz vf16, vf16, Q + lq.xyzw vf29, 838(vi10) | nop + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf09.w | ftoi4.xyz vf20, vf20 + sq.xyzw vf11, 1(vi12) | add.xyzw vf08, vf08, vf28 + lqi.xyzw vf24, vi03 | mulay.xyzw ACC, vf29, vf27 + ibeq vi07, vi12, L33 | nop + sq.xyzw vf19, 2(vi12) | maddy.xyzw vf29, vf30, vf27 +L30: + lqi.xyzw vf05, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf08 + lqi.xyzw vf15, vi02 | madday.xyzw ACC, vf02, vf08 + mtir vi12, vf06.w | maddz.xyzw vf10, vf12, vf08 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddz.xyzw vf14, vf14, vf27 + mtir vi11, vf24.z | mulx.xyz vf05, vf05, vf27 + sq.xyzw vf16, 0(vi12) | mul.xyz vf19, vf09, Q + lq.xyzw vf11, 838(vi09) | mul.xyz vf17, vf17, Q + lq.xyzw vf29, 838(vi10) | nop + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf10.w | ftoi4.xyz vf19, vf19 + sq.xyzw vf03, 1(vi12) | add.xyzw vf05, vf05, vf28 + lqi.xyzw vf24, vi03 | mulay.xyzw ACC, vf29, vf27 + ibeq vi07, vi12, L34 | nop + sq.xyzw vf20, 2(vi12) | maddy.xyzw vf29, vf30, vf27 +L31: + lqi.xyzw vf06, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf05 + lqi.xyzw vf16, vi02 | madday.xyzw ACC, vf02, vf05 + mtir vi12, vf07.w | maddz.xyzw vf09, vf12, vf05 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddz.xyzw vf11, vf11, vf27 + mtir vi11, vf24.z | mulx.xyz vf06, vf06, vf27 + sq.xyzw vf17, 0(vi12) | mul.xyz vf20, vf10, Q + lq.xyzw vf03, 838(vi09) | mul.xyz vf18, vf18, Q + lq.xyzw vf29, 838(vi10) | nop + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf09.w | ftoi4.xyz vf20, vf20 + sq.xyzw vf04, 1(vi12) | add.xyzw vf06, vf06, vf28 + lqi.xyzw vf24, vi03 | mulay.xyzw ACC, vf29, vf27 + ibeq vi07, vi12, L35 | nop + sq.xyzw vf19, 2(vi12) | maddy.xyzw vf29, vf30, vf27 +L32: + lqi.xyzw vf07, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf06 + lqi.xyzw vf17, vi02 | madday.xyzw ACC, vf02, vf06 + mtir vi12, vf08.w | maddz.xyzw vf10, vf12, vf06 + mtir vi09, vf24.x | mulaw.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddz.xyzw vf03, vf03, vf27 + mtir vi11, vf24.z | mul.xyz vf19, vf09, Q + sq.xyzw vf18, 0(vi12) | mul.xyz vf15, vf15, Q + lq.xyzw vf04, 838(vi09) | mulx.xyz vf07, vf07, vf27 + lq.xyzw vf29, 838(vi10) | nop + lq.xyzw vf30, 838(vi11) | nop + div Q, vf00.w, vf10.w | ftoi4.xyz vf19, vf19 + sq.xyzw vf14, 1(vi12) | add.xyzw vf07, vf07, vf28 + lqi.xyzw vf24, vi03 | mulay.xyzw ACC, vf29, vf27 + ibne vi07, vi12, L29 | nop + sq.xyzw vf20, 2(vi12) | maddy.xyzw vf29, vf30, vf27 + mtir vi12, vf05.w | addx.w vf15, vf15, vf21 + lqi.xyzw vf08, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf07 + lqi.xyzw vf18, vi02 | madday.xyzw ACC, vf02, vf07 + mtir vi13, vf15.w | maddz.xyzw vf09, vf12, vf07 + mtir vi09, vf24.x | mul.xyz vf16, vf16, Q + mtir vi10, vf24.y | mul.xyz vf10, vf10, Q + mtir vi11, vf24.z | nop + sq.xyzw vf11, 1(vi12) | addx.w vf17, vf17, vf21 + div Q, vf00.w, vf09.w | mulx.xyz vf08, vf08, vf27 + sq.xyzw vf15, 0(vi12) | ftoi4.xyz vf20, vf10 + sq.xyzw vf19, 2(vi12) | mulaw.xyzw ACC, vf29, vf27 + sq.xyzw vf15, 0(vi13) | maddz.xyzw vf04, vf04, vf27 + sq.xyzw vf11, 1(vi13) | add.xyzw vf08, vf08, vf28 + sq.xyzw vf19, 2(vi13) | addx.w vf16, vf16, vf21 + lq.xyzw vf14, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf13, vf00 + lq.xyzw vf30, 838(vi11) | nop + lqi.xyzw vf24, vi03 | nop + ibne vi08, vi12, L36 | nop + mtir vi12, vf06.w | maddax.xyzw ACC, vf01, vf08 + b L40 | nop + nop | nop +L33: + mtir vi12, vf06.w | addx.w vf16, vf16, vf21 + lqi.xyzw vf05, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf08 + lqi.xyzw vf15, vi02 | madday.xyzw ACC, vf02, vf08 + mtir vi13, vf16.w | maddz.xyzw vf10, vf12, vf08 + mtir vi09, vf24.x | mul.xyz vf17, vf17, Q + mtir vi10, vf24.y | mul.xyz vf09, vf09, Q + mtir vi11, vf24.z | nop + sq.xyzw vf03, 1(vi12) | addx.w vf18, vf18, vf21 + div Q, vf00.w, vf10.w | mulx.xyz vf05, vf05, vf27 + sq.xyzw vf16, 0(vi12) | ftoi4.xyz vf19, vf09 + sq.xyzw vf20, 2(vi12) | mulaw.xyzw ACC, vf29, vf27 + sq.xyzw vf16, 0(vi13) | maddz.xyzw vf14, vf14, vf27 + sq.xyzw vf03, 1(vi13) | add.xyzw vf05, vf05, vf28 + sq.xyzw vf20, 2(vi13) | addx.w vf17, vf17, vf21 + lq.xyzw vf11, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf13, vf00 + lq.xyzw vf30, 838(vi11) | nop + lqi.xyzw vf24, vi03 | nop + ibne vi08, vi12, L37 | nop + mtir vi12, vf07.w | maddax.xyzw ACC, vf01, vf05 + b L40 | nop + nop | nop +L34: + mtir vi12, vf07.w | addx.w vf17, vf17, vf21 + lqi.xyzw vf06, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf05 + lqi.xyzw vf16, vi02 | madday.xyzw ACC, vf02, vf05 + mtir vi13, vf17.w | maddz.xyzw vf09, vf12, vf05 + mtir vi09, vf24.x | mul.xyz vf18, vf18, Q + mtir vi10, vf24.y | mul.xyz vf10, vf10, Q + mtir vi11, vf24.z | nop + sq.xyzw vf04, 1(vi12) | addx.w vf15, vf15, vf21 + div Q, vf00.w, vf09.w | mulx.xyz vf06, vf06, vf27 + sq.xyzw vf17, 0(vi12) | ftoi4.xyz vf20, vf10 + sq.xyzw vf19, 2(vi12) | mulaw.xyzw ACC, vf29, vf27 + sq.xyzw vf17, 0(vi13) | maddz.xyzw vf11, vf11, vf27 + sq.xyzw vf04, 1(vi13) | add.xyzw vf06, vf06, vf28 + sq.xyzw vf19, 2(vi13) | addx.w vf18, vf18, vf21 + lq.xyzw vf03, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf13, vf00 + lq.xyzw vf30, 838(vi11) | nop + lqi.xyzw vf24, vi03 | nop + ibne vi08, vi12, L38 | nop + mtir vi12, vf08.w | maddax.xyzw ACC, vf01, vf06 + b L40 | nop + nop | nop +L35: + mtir vi12, vf08.w | addx.w vf18, vf18, vf21 + lqi.xyzw vf07, vi02 | mulaw.xyzw ACC, vf13, vf00 + lqi.xyz vf28, vi02 | maddax.xyzw ACC, vf01, vf06 + lqi.xyzw vf17, vi02 | madday.xyzw ACC, vf02, vf06 + mtir vi13, vf18.w | maddz.xyzw vf10, vf12, vf06 + mtir vi09, vf24.x | mul.xyz vf15, vf15, Q + mtir vi10, vf24.y | mul.xyz vf09, vf09, Q + mtir vi11, vf24.z | nop + sq.xyzw vf14, 1(vi12) | addx.w vf16, vf16, vf21 + div Q, vf00.w, vf10.w | mulx.xyz vf07, vf07, vf27 + sq.xyzw vf18, 0(vi12) | ftoi4.xyz vf19, vf09 + sq.xyzw vf20, 2(vi12) | mulaw.xyzw ACC, vf29, vf27 + sq.xyzw vf18, 0(vi13) | maddz.xyzw vf03, vf03, vf27 + sq.xyzw vf14, 1(vi13) | add.xyzw vf07, vf07, vf28 + sq.xyzw vf20, 2(vi13) | addx.w vf15, vf15, vf21 + lq.xyzw vf04, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf13, vf00 + lq.xyzw vf30, 838(vi11) | nop + lqi.xyzw vf24, vi03 | nop + ibne vi08, vi12, L39 | nop + mtir vi12, vf05.w | maddax.xyzw ACC, vf01, vf07 + b L40 | nop + nop | nop +L36: + lqi.xyzw vf05, vi02 | madday.xyzw ACC, vf02, vf08 + lqi.xyz vf28, vi02 | maddz.xyzw vf10, vf12, vf08 + lqi.xyzw vf15, vi02 | mul.xyz vf17, vf17, Q + mtir vi13, vf16.w | mul.xyz vf09, vf09, Q + mtir vi09, vf24.x | mulay.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddy.xyzw vf29, vf30, vf27 + mtir vi11, vf24.z | nop + sq.xyzw vf03, 1(vi12) | ftoi4.xyz vf19, vf09 + div Q, vf00.w, vf10.w | mulx.xyz vf05, vf05, vf27 + sq.xyzw vf16, 0(vi12) | addx.w vf18, vf18, vf21 + sq.xyzw vf20, 2(vi12) | mulaw.xyzw ACC, vf29, vf27 + sq.xyzw vf16, 0(vi13) | maddz.xyzw vf14, vf14, vf27 + sq.xyzw vf03, 1(vi13) | add.xyzw vf05, vf05, vf28 + sq.xyzw vf20, 2(vi13) | nop + lq.xyzw vf11, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf13, vf00 + lq.xyzw vf30, 838(vi11) | nop + lqi.xyzw vf24, vi03 | nop + ibeq vi08, vi12, L40 | nop + mtir vi12, vf07.w | maddax.xyzw ACC, vf01, vf05 +L37: + lqi.xyzw vf06, vi02 | madday.xyzw ACC, vf02, vf05 + lqi.xyz vf28, vi02 | maddz.xyzw vf09, vf12, vf05 + lqi.xyzw vf16, vi02 | mul.xyz vf18, vf18, Q + mtir vi13, vf17.w | mul.xyz vf10, vf10, Q + mtir vi09, vf24.x | mulay.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddy.xyzw vf29, vf30, vf27 + mtir vi11, vf24.z | nop + sq.xyzw vf04, 1(vi12) | ftoi4.xyz vf20, vf10 + div Q, vf00.w, vf09.w | mulx.xyz vf06, vf06, vf27 + sq.xyzw vf17, 0(vi12) | addx.w vf15, vf15, vf21 + sq.xyzw vf19, 2(vi12) | mulaw.xyzw ACC, vf29, vf27 + sq.xyzw vf17, 0(vi13) | maddz.xyzw vf11, vf11, vf27 + sq.xyzw vf04, 1(vi13) | add.xyzw vf06, vf06, vf28 + sq.xyzw vf19, 2(vi13) | nop + lq.xyzw vf03, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf13, vf00 + lq.xyzw vf30, 838(vi11) | nop + lqi.xyzw vf24, vi03 | nop + ibeq vi08, vi12, L40 | nop + mtir vi12, vf08.w | maddax.xyzw ACC, vf01, vf06 +L38: + lqi.xyzw vf07, vi02 | madday.xyzw ACC, vf02, vf06 + lqi.xyz vf28, vi02 | maddz.xyzw vf10, vf12, vf06 + lqi.xyzw vf17, vi02 | mul.xyz vf15, vf15, Q + mtir vi13, vf18.w | mul.xyz vf09, vf09, Q + mtir vi09, vf24.x | mulay.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddy.xyzw vf29, vf30, vf27 + mtir vi11, vf24.z | nop + sq.xyzw vf14, 1(vi12) | ftoi4.xyz vf19, vf09 + div Q, vf00.w, vf10.w | mulx.xyz vf07, vf07, vf27 + sq.xyzw vf18, 0(vi12) | addx.w vf16, vf16, vf21 + sq.xyzw vf20, 2(vi12) | mulaw.xyzw ACC, vf29, vf27 + sq.xyzw vf18, 0(vi13) | maddz.xyzw vf03, vf03, vf27 + sq.xyzw vf14, 1(vi13) | add.xyzw vf07, vf07, vf28 + sq.xyzw vf20, 2(vi13) | nop + lq.xyzw vf04, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf13, vf00 + lq.xyzw vf30, 838(vi11) | nop + lqi.xyzw vf24, vi03 | nop + ibeq vi08, vi12, L40 | nop + mtir vi12, vf05.w | maddax.xyzw ACC, vf01, vf07 +L39: + lqi.xyzw vf08, vi02 | madday.xyzw ACC, vf02, vf07 + lqi.xyz vf28, vi02 | maddz.xyzw vf09, vf12, vf07 + lqi.xyzw vf18, vi02 | mul.xyz vf16, vf16, Q + mtir vi13, vf15.w | mul.xyz vf10, vf10, Q + mtir vi09, vf24.x | mulay.xyzw ACC, vf29, vf27 + mtir vi10, vf24.y | maddy.xyzw vf29, vf30, vf27 + mtir vi11, vf24.z | nop + sq.xyzw vf11, 1(vi12) | ftoi4.xyz vf20, vf10 + div Q, vf00.w, vf09.w | mulx.xyz vf08, vf08, vf27 + sq.xyzw vf15, 0(vi12) | addx.w vf17, vf17, vf21 + sq.xyzw vf19, 2(vi12) | mulaw.xyzw ACC, vf29, vf27 + sq.xyzw vf15, 0(vi13) | maddz.xyzw vf04, vf04, vf27 + sq.xyzw vf11, 1(vi13) | add.xyzw vf08, vf08, vf28 + sq.xyzw vf19, 2(vi13) | nop + lq.xyzw vf14, 838(vi09) | nop + lq.xyzw vf29, 838(vi10) | mulaw.xyzw ACC, vf13, vf00 + lq.xyzw vf30, 838(vi11) | nop + lqi.xyzw vf24, vi03 | nop + ibne vi08, vi12, L36 | nop + mtir vi12, vf06.w | maddax.xyzw ACC, vf01, vf08 +L40: + ilw.x vi01, 971(vi00) | nop + ilw.y vi12, 971(vi00) | nop + lq.xyzw vf05, 972(vi00) | nop + lq.xyzw vf06, 973(vi00) | nop + lq.xyzw vf07, 974(vi00) | nop + sq.xyzw vf05, 976(vi00) | nop + ibne vi00, vi01, L41 | nop + sq.xyzw vf07, 977(vi00) | nop + sq.xyzw vf06, 977(vi00) | nop +L41: + iaddiu vi02, vi00, 0x3d0 | nop + isw.y vi01, 971(vi00) | nop + xgkick vi02 | nop + mtir vi01, vf21.x | nop + mtir vi12, vf23.w | nop + mr32.xyzw vf22, vf22 | nop + iaddiu vi02, vi00, 0x32 | nop + mfir.y vf23, vi00 | nop + xgkick vi01 | nop :e + mtir vi03, vf22.x | nop +L42: + mtir vi01, vf23.y | nop + mtir vi12, vf23.w | nop + mr32.xyzw vf22, vf22 | nop + iaddiu vi02, vi00, 0x32 | nop + iadd vi01, vi01, vi15 | nop + mfir.y vf23, vi12 | nop :e + mtir vi03, vf22.x | nop +L43: + nop | nop :e + nop | nop + lq. vf00, 0(vi00) | addx. vf00, vf00, vf00 diff --git a/docs/scratch/tie_ee.asm b/docs/scratch/tie_ee.asm new file mode 100644 index 0000000000..6e52e26ad0 --- /dev/null +++ b/docs/scratch/tie_ee.asm @@ -0,0 +1,1760 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; .function draw-inline-array-instance-tie +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;BAD PROLOGUE +;; Warnings: +;; INFO: Flagged as asm by config +;; INFO: Assembly Function + +# this is yet-another insane double-buffered DMA function. +# it tries to keep a SPR to or SPR from DMA going always, while processing other stuff +# on the Scratch Pad. +# The upload chunks are 2kB and complete in 128 cycles ideally. + +# it's documented, not including: +# -tie near (but this is very simple) +# -wind (probably not too hard) +# -generic (for another day) + +# args are: +# a0 : vis bits +# a1 : instances +# a2 : num instances +# a3 : output dma buf. + + +# common vars: +# t0 work +# t1 SPR to DMA bank +# a3 SPR from DMA bank + +# t5 *wind-work* +# t2 bank ptr +# a1 out ptr + +B0: +L137: + daddiu sp, sp, -128 + sd ra, 0(sp) + sq s0, 16(sp) + sq s1, 32(sp) + sq s2, 48(sp) + sq s3, 64(sp) + sq s4, 80(sp) + sq s5, 96(sp) + sq gp, 112(sp) + +# Initialization block. +# On exit, we set t6 to the first 32-bits of vis data. + + lui t4, 28672 # = 0x7000 + lw v1, 4(a3) # = dma buf base + lui t1, 4096 # = 0x1000 + lui t2, 4096 # = 0x1000 + + sync.l + cache dxwbin v1, 0 + sync.l + cache dxwbin v1, 1 + sync.l + + lw t0, *instance-tie-work-copy*(s7) # use the work copy. + ori t1, t1, 54272 # SPR TO + sw a3, 396(t0) # store dma-buffer + ori a3, t2, 53248 # SPR FROM + lw t5, *wind-work*(s7) + lw t6, 0(a0) # load first vis bits (32 bits) + ori t2, t4, 16 # spr + 16 (bank ptr?) + vmaxw.xyzw vf1, vf0, vf0 # vf1 = (1, 1, 1, 1) + addiu t3, a1, -4 # remove basic ptr. + ori a1, t4, 4112 # set out ptr + sw t1, 400(t0) # stash to-spr + addiu t9, r0, 0 # t9 = 0 + sw a3, 404(t0) # stash from-spr + or t8, a1, r0 # another copy of out ptr + sw t5, 408(t0) # stash wind work + sll r0, r0, 0 + lqc2 vf3, 64(t0) # constant (4096., 128., 0., 0.) + sll r0, r0, 0 + sw r0, 432(t0) # set the flags to 0. + +# Find the first group of 32 with a visible. +# Note: this only runs on the entrance, and this runs before any +# SPR DMA is started. +B1: +L138: + bne t6, r0, L139 # if we found something advance to 139 + sll r0, r0, 0 + +B2: + addiu a0, a0, 4 # advance vis pointer by 32 bits + addiu t3, t3, 2048 # advance instance ptr by 32 * 64 = 2048 + daddiu a2, a2, -32 # decrement remaining instances by 32. + lw t6, 0(a0) # load the next vis 32-bits. + blez a2, L177 # Return, if we got to the end. + sll r0, r0, 0 + +B3: + beq r0, r0, L138 + sll r0, r0, 0 + +# Here once we point to a visible thing. +# Now we should begin DMA to SPR. +# But first, we must wait for any in-progress transfer to end: +B4: +L139: + lw t4, 0(t1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi t4, t4, 256 + sll r0, r0, 0 + bne t4, r0, L139 + sll r0, r0, 0 + +# No more in progress tranfer. +# Initialize the first transfer (special case) +# Initialize the double buffering. +B5: + sw t3, 16(t1) # source is the input instance array + xori t4, t2, 2048 # dest is "the other" bank array (in spr) + sw t4, 128(t1) + addiu t4, r0, 128 # copy 128 qw = 2048 bytes = 32 instances + sw t4, 32(t1) + addiu t4, r0, 256 # start!!! + sw t4, 0(t1) + sll r0, r0, 0 + +# MAIN LOOP TOP +# In here, we have an in progress DMA SPR to transfer. +# we're going to do as much stuff as possible before waiting on the transfer. +# we'll start with finding the address of the next to transfer source. +# (we'll skip groups of 32 that have no visible) +B6: +L140: + or ra, a0, r0 # ra = vis ptr for the SPR to transfer's instances + xori t2, t2, 2048 # toggle spr bank to point to the uploading data + daddiu a0, a0, 4 # increment vis ptr to the next upload's vis data + or t7, a0, r0 # remember this as the end of this group's vis data + or t4, t2, r0 # t4 = the uploading bank + daddiu t6, a2, -32 # t6 = the number remaining after this group. + bgtz t6, L142 # if there's nothing left, fall through (will skip "find next") + lw t6, 0(a0) # t6 is the next 32-bits of vis + +B7: + beq r0, r0, L145 # nothing left after the current upload, don't look for next + sll r0, r0, 0 + +B8: + sll r0, r0, 0 + lw v1, 400(r0) # ?? probably will crash if we hit this, but it's unreachable. +B9: +L141: + daddiu a2, a2, -32 # skip 32 instances that are invisible + addiu a0, a0, 4 # advance vis ptr + blez a2, L145 # did we reach the end? if so, give up looking for another. + lw t6, 0(a0) # load next vis + +B10: + sll r0, r0, 0 + sll r0, r0, 0 +B11: +L142: + beq t6, r0, L141 # did we find visible? + addiu t3, t3, 2048 # advance input to bank upload ptr. + +# If we reach here, we've found the next SPR to transfer's source +B12: +L143: + # check if in progress upload is done? + lw t6, 0(t1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi t6, t6, 256 + sll r0, r0, 0 + beq t6, r0, L144 + sll r0, r0, 0 + +B13: + # nope, increment stall counter. + sll r0, r0, 0 + lw t6, 444(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu t6, t6, 1 + sll r0, r0, 0 + sw t6, 444(t0) + beq r0, r0, L143 + sll r0, r0, 0 + +# if we reach here, we've found the next SPR to source, and +# the previous upload is done. +B14: +L144: + sw t3, 16(t1) # source addr + xori t6, t2, 2048 # dest is the other + sw t6, 128(t1) + addiu t6, r0, 128 # 128 qw (see prev) + sw t6, 32(t1) + addiu t6, r0, 256 + beq r0, r0, L146 # skip waiting for completion! + sw t6, 0(t1) + +# if we reach here, we didn't find the next SPR to source +# because there's nothing to do next. So just wait for +# the current upload to finish +B15: +L145: + lw t6, 0(t1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi t6, t6, 256 + sll r0, r0, 0 + beq t6, r0, L146 + sll r0, r0, 0 + +B16: + # inc wait counter if we had to wait. + sll r0, r0, 0 + lw t6, 444(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu t6, t6, 1 + sll r0, r0, 0 + sw t6, 444(t0) + beq r0, r0, L145 + sll r0, r0, 0 + +# if we reach here, we've got some bank data. +# TOP for per-8 instances +B17: +L146: + lb t6, 0(ra) # load 8 bits of vis data for this bank. + addiu ra, ra, 1 # seek to next vis byte + sll r0, r0, 0 + sw ra, 412(t0) # stash our vis bits + bne t6, r0, L147 # branch if we have at least one visible in this group of 8. + sw t7, 416(t0) # stash end vis ptr. + +B18: + # here if the group of 8 was invisible + daddiu a2, a2, -8 # skip 8 + addiu t4, t4, 512 # advance input array pointer by 8 + beq r0, r0, L173 # skip to end of 8-block loop. + sll r0, r0, 0 + +# we have some visible! +B19: +L147: + addiu t7, r0, 128 # vis mask constant (start at highest bit) + lqc2 vf2, 16(t4) # load the bsphere of the instance. +B20: +L148: + daddiu ra, t9, -246 # do we have room left in the output for up to 8 visible? + sll r0, r0, 0 # note, really 10, not sure why they leave 2 empty. + blez ra, L151 # branch if we have room, vcallms 42 no matter what. + vcallms 42 # these people are insane. + # vi01 = in view frustum result + # vf05 = M(vf28) * bsphere (camera) + # vf06 = M(vf24) * bsphere (camera rot (I think includes trans, but no perspective)) + +# if we got here, we filled the output buffer. +# we should then copy the output FROM SPR to the dma buffer. +B21: +L149: + lw t8, 0(a3) # SPR FROM sync. + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi t8, t8, 256 + sll r0, r0, 0 + beq t8, r0, L150 + sll r0, r0, 0 + +B22: + sll r0, r0, 0 + lw t8, 440(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu t8, t8, 1 + sll r0, r0, 0 + sw t8, 440(t0) + beq r0, r0, L149 + sll r0, r0, 0 + +B23: +L150: + sw a1, 128(a3) # copy from out ptr + xori a1, a1, 12288 # toggle out + sw v1, 16(a3) # copy to dma buf ptr + sll t8, t9, 4 + addu v1, v1, t8 # seek dma buf ptr to the next + or t8, a1, r0 # t8 = the next out ptr + sw t9, 32(a3) # store qwc + addiu t9, r0, 256 + sw t9, 0(a3) # start the FROM. + addiu t9, r0, 0 # t9 = 0 (why?) + +# here, we have room in the scratchpad output for up to 10. +B24: +L151: + sll r0, r0, 0 + lw ra, 12(t4) # ra = our prototype bucket + and gp, t6, t7 # check if our vis bit is set. + ld s5, 56(t4) # s5 = origin_3 + beq gp, r0, L172 # are we even visible? + ld s2, 32(t4) # s2 = origin_0 + +B25: + sll gp, t9, 4 # gp = output offset + ld s4, 40(t4) # s4 = origin_1 + pextlh s3, s5, r0 # s3 = s5 << 16 (now 128-bits, origin3) + ld s5, 48(t4) # s5 = origin_2 + psraw s3, s3, 10 # s3 = origin3, now signed + lq s1, 28(ra) # s1 = our prototype bucket's dists + pextlh s2, s2, r0 # conversion of origin matrix + lq s0, 44(ra) # s0 = [rlength-near, rlength-stiff, rlength-mid, stiffness] + psraw s2, s2, 16 # conversion of origin matrix (see tie format doc) + qmtc2.ni vf14, s1 # vf14 = dists + pextlh s4, s4, r0 # conversion + qmtc2.ni vf15, s0 # vf15 = rlengths + psraw s4, s4, 16 # conversion + qmtc2.ni vf13, s3 # vf13 = origin3 + pextlh s5, s5, r0 # conversion + qmtc2.ni vf10, s2 # vf10 = origin0 + psraw s3, s5, 16 # conversion + lhu s2, 62(t4) # s2 = wind index + addu gp, gp, v1 # gp = output address (spr buffer) + qmtc2.ni vf11, s4 # vf11 = origin1 + dsll s5, s2, 4 # s5 = wind_idx * 16 + qmtc2.ni vf12, s3 # vf12 = origin2 + daddu s4, s2, t5 # s4 = wind_idx + time? (on the first time through, seems like it's not time...) + lw s2, 408(t0) # s2 = wind work + andi s4, s4, 63 # truncate upper bits so we fit in the 64 wind array. + lw s3, 384(t0) # s3 = wind vectors + sll s1, s4, 4 # index qw's with our [0, 64) index + lw s4, 4(ra) # flags + daddu s5, s3, s5 # s5 = wind vector pointer (not from wind work, in the static data.) + addu s3, s1, s2 # wind work vector array pointer + andi s1, s4, 1 # s1 = flag1 + andi s4, s4, 2 # s4 = flag2 + bne s1, r0, L172 # skip if flag1 is set. + cfc2.ni s1, vi1 # get the in-view-frustum result from VU0. + +B26: + vitof0.xyzw vf13, vf13 # convert origin matrix row to float. + lw t5, 1324(s2) # load wind time + bne s1, r0, L172 # skip if not in view frustum. + lqc2 vf25, 112(t0) # vf25 = current TIE min distance + +B27: + sll r0, r0, 0 + lqc2 vf16, 16(t0) # vf16 = hmge-d + sll r0, r0, 0 + lqc2 vf17, 32(t0) # vf17 = hvdf-offset + vmulaz.xyzw acc, vf1, vf6 # [z, z, z, z] (how in front of camera) + sw gp, 196(t0) # work.upload-color-0.addr = output_ptr + vmsubw.xyzw vf8, vf1, vf2 # vf8 = dist in front of cam (origin - r_bs) + sw gp, 276(t0) # work.generic-color-0.addr = output_ptr + vadd.xyz vf5, vf0, vf0 # vf5 = [0, 0, 0, transformed_w] + sll r0, r0, 0 + vadd.xyz vf13, vf13, vf2 # vf13 = (+, +, +) corner of bounding box (outside of bsphere) + sll r0, r0, 0 + vmula.xyzw acc, vf1, vf1 # acc = [1, 1, 1, 1] + sll r0, r0, 0 + vsub.xyzw vf14, vf8, vf14 # (origin - r_bs) - dists + sll r0, r0, 0 + vaddw.w vf5, vf5, vf17 # vf5 = [0, 0, 0, transformed_w + hvdf_offset.w] + sll r0, r0, 0 + sll r0, r0, 0 + lqc2 vf30, 80(t0) # vf30 = far_morph + vmini.xyzw vf25, vf8, vf25 # update TIE min distance + sll r0, r0, 0 + vmsub.xyz vf15, vf14, vf15 # dist weights + sll r0, r0, 0 + vminiy.w vf5, vf5, vf16 # clip w min + sll r0, r0, 0 + sll r0, r0, 0 + lqc2 vf24, 128(t0) # vf24 = guard plane 0 (i think w plane) + sll r0, r0, 0 + sqc2 vf25, 112(t0) # vf25 = min-dist + vmini.xyz vf15, vf15, vf1 # saturate dist weights + sll r0, r0, 0 + vmaxx.w vf5, vf5, vf16 # clip w max + sll r0, r0, 0 + vsubz.xyzw vf16, vf8, vf16 # vf16 = dist in front of the camera - some hmge thing. + sll r0, r0, 0 + sll r0, r0, 0 + lqc2 vf25, 144(t0) # vf25 = some plane + sll r0, r0, 0 + lqc2 vf26, 160(t0) # vf26 = another plane + sll r0, r0, 0 + lqc2 vf27, 176(t0) # vf27 = yet another plane + vmulax.xyzw acc, vf24, vf2 # perform clipping with this plane. + sll r0, r0, 0 + vmadday.xyzw acc, vf25, vf2 + sll r0, r0, 0 + vmaddaz.xyzw acc, vf26, vf2 + sll r0, r0, 0 + vmsubaw.xyzw acc, vf27, vf0 + sll r0, r0, 0 + vmsubw.xyzw vf24, vf1, vf2 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.i s2, vf16 # s2 = dists - hmge thing + vmulw.xyzw vf28, vf15, vf30 # vf28 = scaled dist weights + sll r0, r0, 0 + vmulw.xyzw vf29, vf15, vf30 # vf29 = scaled dist weights, again? + sll r0, r0, 0 + sll r0, r0, 0 + lqc2 vf19, 0(t0) # vf19 = wind const (WIND) + vitof12.xyzw vf10, vf10 # convert origin (back to this, I guess) + sll r0, r0, 0 + pcgtw s1, r0, s2 # dists check + qmfc2.i s0, vf24 # clip check + vmulx.xyzw vf28, vf1, vf28 # distweights x + sll r0, r0, 0 + vmulz.xyzw vf29, vf1, vf29 # distweights z + lw s2, 56(ra) # s2 = stiffness + pcgtw s0, r0, s0 # check clip again + sqc2 vf5, 80(t8) # store magic w. + ppach s0, r0, s0 # s0 = more clip + sw s4, 80(t8) # store some flags + or s1, s0, s1 # more clipping/distance crap + sqc2 vf14, 96(t0) # called "dist-test" + ppacb s1, r0, s1 # distance stuff. + mfc1 r0, f31 + beq s2, r0, L153 # if stiffness == 0, skip ahead + sw s1, 84(t8) # output some clip/dist info. + +# apply wind. +B28: + vftoi0.zw vf28, vf28 + ld s1, 8(s5) + vftoi0.zw vf29, vf29 + ld s2, 0(s5) + pextlw s1, r0, s1 + lqc2 vf16, 12(s3) + pextlw s3, r0, s2 + qmtc2.i vf18, s1 + sll r0, r0, 0 + qmtc2.i vf17, s3 + vmula.xyzw acc, vf16, vf1 + sll r0, r0, 0 + vmsubax.xyzw acc, vf18, vf19 + sll r0, r0, 0 + vmsuby.xyzw vf16, vf17, vf19 + sll r0, r0, 0 + vsubx.x vf28, vf30, vf15 + sll r0, r0, 0 + vsubz.x vf29, vf1, vf15 + sll r0, r0, 0 + vitof0.zw vf28, vf28 + sll r0, r0, 0 + vmulaz.xyzw acc, vf16, vf19 + sll r0, r0, 0 + vmadd.xyzw vf18, vf1, vf18 + sll r0, r0, 0 + vitof0.zw vf29, vf29 + sll r0, r0, 0 + vaddy.y vf28, vf0, vf0 + sll r0, r0, 0 + vaddy.y vf29, vf0, vf0 + sll r0, r0, 0 + vmulaz.xyzw acc, vf18, vf19 + sll r0, r0, 0 + vmadd.xyzw vf17, vf17, vf1 + sll r0, r0, 0 + vitof12.xyzw vf11, vf11 + sll r0, r0, 0 + vitof12.xyzw vf12, vf12 + sll r0, r0, 0 + vsubw.w vf28, vf30, vf28 + sll r0, r0, 0 + vminiw.xyzw vf17, vf17, vf0 + sll r0, r0, 0 + vsubw.w vf29, vf30, vf29 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.i s3, vf18 + vmaxw.xyzw vf27, vf17, vf19 + sll r0, r0, 0 + ppacw s3, r0, s3 + mfc1 r0, f31 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + vmulw.xyzw vf27, vf27, vf15 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + vmulax.yw acc, vf0, vf0 + sll r0, r0, 0 + vmulay.xz acc, vf27, vf10 + sll r0, r0, 0 + vmadd.xyzw vf10, vf1, vf10 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.i s2, vf27 + vmulax.yw acc, vf0, vf0 + lw s1, 436(t0) + vmulay.xz acc, vf27, vf11 + sll r0, r0, 0 + vmadd.xyzw vf11, vf1, vf11 + sll r0, r0, 0 + bne s1, s7, L152 + ppacw s2, r0, s2 + +B29: + vmulax.yw acc, vf0, vf0 + sd s3, 8(s5) + vmulay.xz acc, vf27, vf12 + sd s2, 0(s5) + bne s4, r0, L164 + vmadd.xyzw vf12, vf1, vf12 + +B30: + beq r0, r0, L154 + sll r0, r0, 0 + + +B31: +L152: + vmulax.yw acc, vf0, vf0 # acc = [? 0 ? 0] + sll r0, r0, 0 + vmulay.xz acc, vf27, vf12 # acc = [o2.y*gp3.x, 0, o2.y*gp3.z, 0] (wtf is this) + sll r0, r0, 0 + bne s4, r0, L164 + vmadd.xyzw vf12, vf1, vf12 + +B32: + beq r0, r0, L154 + sll r0, r0, 0 + +# Don't Apply Wind. +B33: +L153: + vftoi0.zw vf28, vf28 # dist weights + sll r0, r0, 0 + vftoi0.zw vf29, vf29 # more dist weights + sll r0, r0, 0 + vsubx.x vf28, vf30, vf15 + sll r0, r0, 0 + vsubz.x vf29, vf1, vf15 + sll r0, r0, 0 + vitof0.zw vf28, vf28 + sll r0, r0, 0 + vitof0.zw vf29, vf29 + sll r0, r0, 0 + vaddy.y vf28, vf0, vf0 + sll r0, r0, 0 + vaddy.y vf29, vf0, vf0 + sll r0, r0, 0 + vsubw.w vf28, vf30, vf28 + sll r0, r0, 0 + vsubw.w vf29, vf30, vf29 + sll r0, r0, 0 + vitof12.xyzw vf11, vf11 # convert origin1 + sll r0, r0, 0 + bne s4, r0, L164 + vitof12.xyzw vf12, vf12 # convert origin2 + + +# End of stiffness calcultion. +# S4 = 0 version. Goto L164 for S4 != 0 version. +# Maybe this is the version for non-generic?? +B34: +L154: + sll r0, r0, 0 + lw s5, 84(t8) # s5 = clipping/dist flags + sll r0, r0, 0 + lw s4, 108(t0) # s4 = dist_test_w (from work) + addiu t9, t9, 6 # advance output qwc by 6 (96 bytes) + lw s3, 104(t0) # s3 = dist_test_z (from work) + bne s5, r0, L158 # if we clip, go to TIE NEAR. + vsubw.w vf10, vf10, vf10 # clear the w component of origin0 + +B35: + bgtz s4, L156 + sll r0, r0, 0 + +B36: + bgtz s3, L155 + sll r0, r0, 0 + +B37: # GEOM 1 (this is the non-near version) (believed to be the high-LOD one) + sll r0, r0, 0 + lh s4, 78(ra) # load count 1 + sll r0, r0, 0 + lw s5, 64(ra) # load next + daddiu s4, s4, 1 # inc count + sqc2 vf28, 64(t8) # store morph constants + vmulax.xyzw acc, vf20, vf10 # matmul 0/16 + addiu gp, gp, 96 # output pointer, I think this is a pointer to the RAM dma buffer, not spr. + vmadday.xyzw acc, vf21, vf10 # matmul 1/16 + sw gp, 64(ra) # update next. + vmaddz.xyzw vf10, vf22, vf10 # matmul 2/16 (out vf10) + sh s4, 78(ra) # store updated count + vmulax.xyzw acc, vf20, vf11 # matmul 3/16 + lbu s4, 109(ra) # s4 = frag-count 1 + vmadday.xyzw acc, vf21, vf11 # matmul 4/16 + lhu gp, 118(ra) # gp = base-qw + vmaddz.xyzw vf11, vf22, vf11 # matmul 5/16 (out vf11) + lbu s3, 113(ra) # s3 = index-start + beq r0, r0, L157 + sll r0, r0, 0 + +B38: +L155: # geom 2 version (same as above) + sll r0, r0, 0 + lh s4, 80(ra) + sll r0, r0, 0 + lw s5, 68(ra) + daddiu s4, s4, 1 + sqc2 vf29, 64(t8) + vmulax.xyzw acc, vf20, vf10 + addiu gp, gp, 96 + vmadday.xyzw acc, vf21, vf10 + sw gp, 68(ra) + vmaddz.xyzw vf10, vf22, vf10 + sh s4, 80(ra) + vmulax.xyzw acc, vf20, vf11 + lbu s4, 110(ra) + vmadday.xyzw acc, vf21, vf11 + lhu gp, 120(ra) + vmaddz.xyzw vf11, vf22, vf11 + lbu s3, 114(ra) + beq r0, r0, L157 + sll r0, r0, 0 + +B39: +L156: # geom 3 version (same as above) + sll r0, r0, 0 + lh s4, 82(ra) + sll r0, r0, 0 + lw s5, 72(ra) + daddiu s4, s4, 1 + sqc2 vf30, 64(t8) + vmulax.xyzw acc, vf20, vf10 + addiu gp, gp, 96 + vmadday.xyzw acc, vf21, vf10 + sw gp, 72(ra) + vmaddz.xyzw vf10, vf22, vf10 + sh s4, 82(ra) + vmulax.xyzw acc, vf20, vf11 + lbu s4, 111(ra) + vmadday.xyzw acc, vf21, vf11 + lhu gp, 122(ra) + vmaddz.xyzw vf11, vf22, vf11 + lbu s3, 115(ra) + +# common for 1,2,3 geoms +B40: +L157: + vmulax.xyzw acc, vf20, vf12 # matmul 6/16 + lq s2, 224(t0) # s2 = upload color 2 + vmadday.xyzw acc, vf21, vf12 # matmul 7/16 + lq s1, 240(t0) # s1 = upload color ret + vmaddz.xyzw vf12, vf22, vf12 # matmul 8/16 (out vf12) + dsll gp, gp, 4 # base-offset (from base-qw in the prototype) + vmulax.xyzw acc, vf20, vf13 # matmul + daddu s3, s3, ra # s3 = prototype bucket + index zone (noe sure what's here yet.) + vmadday.xyzw acc, vf21, vf13 # matmul + sll r0, r0, 0 + vmaddaz.xyzw acc, vf22, vf13 # matmul + sll r0, r0, 0 + vmaddw.xyzw vf13, vf23, vf0 # matmul (out vf13, we're done) + sll r0, r0, 0 + sqc2 vf10, 0(t8) # store matrix + sll r0, r0, 0 + sqc2 vf11, 16(t8) # store matrix + movz s2, s1, s5 # if we're the first thing added, we'll be the last in the chain, and should put a upload-color-ret! + sqc2 vf12, 32(t8) # store matrix + daddiu t8, t8, 96 # inc SPR ptr. + beq r0, r0, L159 + sqc2 vf13, -48(t8) # store matrix + +# TIE NEAR DMA generation +# (don't care) +B41: +L158: + sll r0, r0, 0 + lqc2 vf24, 320(t0) + sll r0, r0, 0 + lqc2 vf25, 336(t0) + sll r0, r0, 0 + lqc2 vf26, 352(t0) + sll r0, r0, 0 + lqc2 vf27, 368(t0) + sll r0, r0, 0 + lh s4, 76(ra) + sll r0, r0, 0 + lw s5, 60(ra) + daddiu s4, s4, 1 + sqc2 vf28, 64(t8) + vmulax.xyzw acc, vf24, vf10 + addiu gp, gp, 96 + vmadday.xyzw acc, vf25, vf10 + sw gp, 60(ra) + vmaddz.xyzw vf10, vf26, vf10 + sh s4, 76(ra) + vmulax.xyzw acc, vf24, vf11 + lbu s4, 108(ra) + vmadday.xyzw acc, vf25, vf11 + lhu gp, 116(ra) + vmaddz.xyzw vf11, vf26, vf11 + lbu s3, 112(ra) + vmulax.xyzw acc, vf24, vf12 + lq s2, 224(t0) + vmadday.xyzw acc, vf25, vf12 + lq s1, 240(t0) + vmaddz.xyzw vf12, vf26, vf12 + dsll gp, gp, 4 + vmulax.xyzw acc, vf24, vf13 + daddu s3, s3, ra + vmadday.xyzw acc, vf25, vf13 + sll r0, r0, 0 + vmaddaz.xyzw acc, vf26, vf13 + sll r0, r0, 0 + vmaddw.xyzw vf13, vf27, vf0 + sll r0, r0, 0 + sqc2 vf10, 0(t8) + sll r0, r0, 0 + sqc2 vf11, 16(t8) + sll r0, r0, 0 + sqc2 vf12, 32(t8) + movz s2, s1, s5 + sqc2 vf13, 48(t8) + daddiu t8, t8, 96 + +# And Back to common non-generic TIE. +# it is time for colors. This is a loop over fragments +# s2 = tag2 +# s4 = frag-count 1 +# gp = base-qw * 16 +# s3 = prototype + index-start +B42: +L159: + sll r0, r0, 0 + lw ra, 8(t4) # ra = color-indices + sll r0, r0, 0 + sq s2, 256(t0) # upload color temp (either ret or 2, we'll refer to it as 2) + sll r0, r0, 0 + lbu s2, 144(s3) # load the first index (actually counts) + addu s1, gp, ra # s1 = color-indices + base-offset + sw s5, 260(t0) # store the address of next in the colors2 tag (doesn't do anything if ret) + daddiu t9, t9, 3 # another 3 qw for the colors. + sw s1, 212(t0) # color + base goes in colors 1 + sll s1, s2, 2 # s1 = index * 4 + sh s2, 208(t0) # color1's qwc = *index + sll s2, s2, 4 # s2 = index * 16 + sb s1, 222(t0) # set something in the vif tag of upload color 1 + daddu gp, gp, s2 # advance our colors ptrs + lq s2, 192(t0) # s2 = upload color 0 + daddiu s5, s5, 48 # inc next + lq s1, 208(t0) # s1 = upload color 1 + daddiu t8, t8, 48 # inc output + lq s0, 256(t0) # s0 = upload temp (2 or ret) + daddiu s3, s3, 1 # inc the instance pointer + sq s2, -48(t8) # upload color 0 (seems to be, a constant?) + daddiu s4, s4, -1 # decrement frag count. + sq s1, -32(t8) # upload color 1 + blez s4, L172 # did we run out of fragments? + sq s0, -16(t8) # upload color 2 + +# top of fragment loop +B43: +L160: + daddiu s2, t9, -252 # did we run out of room in the scratchpad? + sll r0, r0, 0 + blez s2, L163 + sll r0, r0, 0 + +# swap output buffer +B44: +L161: + lw t8, 0(a3) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi t8, t8, 256 + sll r0, r0, 0 + beq t8, r0, L162 + sll r0, r0, 0 + +B45: + sll r0, r0, 0 + lw t8, 440(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu t8, t8, 1 + sll r0, r0, 0 + sw t8, 440(t0) + beq r0, r0, L161 + sll r0, r0, 0 + +B46: +L162: + sw a1, 128(a3) + xori a1, a1, 12288 + sw v1, 16(a3) + sll t8, t9, 4 + addu v1, v1, t8 + or t8, a1, r0 + sw t9, 32(a3) + addiu t9, r0, 256 + sw t9, 0(a3) + addiu t9, r0, 0 + +# here we have an output buffer that has room for another fragment. +# and add another fragment. +B47: +L163: + sll r0, r0, 0 + lbu s2, 144(s3) # load next index + addu s1, gp, ra # s1 = color-indices + base-offset + sw s5, 260(t0) # store the address of next in the colors2 tag (doesn't do anything if ret) + daddiu t9, t9, 3 # another 3 qw for the colors. + sw s1, 212(t0) # color + base goes in colors 1 + sll s1, s2, 2 # s1 = index * 4 + sh s2, 208(t0) # color1's qwc = *index + sll s2, s2, 4 # s2 = index * 16 + sb s1, 222(t0) # set something in the vif tag of upload color 1 + daddu gp, gp, s2 # advance our colors ptrs + # same as B42 + lq s2, 192(t0) + daddiu s5, s5, 48 + lq s1, 208(t0) + daddiu t8, t8, 48 + lq s0, 256(t0) + daddiu s3, s3, 1 + sq s2, -48(t8) + daddiu s4, s4, -1 + sq s1, -32(t8) + bgtz s4, L160 # except we keep looping if there are fragments left. + sq s0, -16(t8) + +B48: + beq r0, r0, L172 + sll r0, r0, 0 + +# s4 != 0 version +# I think, for the GENERIC renderer. +# wtf there's a square root... +B49: +L164: + vmul.xyz vf16, vf6, vf6 + sll r0, r0, 0 + sll r0, r0, 0 + lqc2 vf9, 124(ra) + vsubw.w vf10, vf10, vf10 + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + vadday.x acc, vf16, vf16 + sll r0, r0, 0 + vmaddz.x vf16, vf1, vf16 + sll r0, r0, 0 + vsqrt Q, vf16.x + sll r0, r0, 0 + vmulay.xyzw acc, vf1, vf9 + sll r0, r0, 0 + vmaddaw.xyzw acc, vf1, vf2 + sll r0, r0, 0 + sll r0, r0, 0 + vwaitq + vmsubq.xyzw vf16, vf1, Q + sll r0, r0, 0 + vmulx.xyzw vf16, vf16, vf9 + sll r0, r0, 0 + vmaxx.x vf16, vf16, vf0 + sll r0, r0, 0 + vminiy.x vf16, vf16, vf3 + sll r0, r0, 0 + vftoi0.xyzw vf16, vf16 + sll r0, r0, 0 + sll r0, r0, 0 + qmfc2.i s5, vf16 + sll r0, r0, 0 + sll r0, r0, 0 + andi s5, s5, 255 + sll r0, r0, 0 + beq s5, r0, L154 + sll r0, r0, 0 + +B50: + vcallms 29 + sw s4, 432(t0) + sll r0, r0, 0 + lw s4, 108(t0) + addiu t9, t9, 6 + lw s3, 104(t0) + sll r0, r0, 0 + sw s5, 80(t8) + bgtz s4, L166 + sll r0, r0, 0 + +B51: + bgtz s3, L165 + sll r0, r0, 0 + +B52: + sll r0, r0, 0 + lh s4, 86(ra) + sll r0, r0, 0 + lw s5, 96(ra) + daddiu s4, s4, 1 + sqc2 vf28, 64(t8) + vmulax.xyzw acc, vf24, vf10 + addiu gp, gp, 96 + vmadday.xyzw acc, vf25, vf10 + sw gp, 96(ra) + vmaddz.xyzw vf10, vf26, vf10 + sh s4, 86(ra) + vmulax.xyzw acc, vf24, vf11 + lbu s3, 109(ra) + vmadday.xyzw acc, vf25, vf11 + lhu gp, 118(ra) + vmaddz.xyzw vf11, vf26, vf11 + lbu s4, 113(ra) + beq r0, r0, L167 + sll r0, r0, 0 + +B53: +L165: + sll r0, r0, 0 + lh s4, 88(ra) + sll r0, r0, 0 + lw s5, 100(ra) + daddiu s4, s4, 1 + sqc2 vf29, 64(t8) + vmulax.xyzw acc, vf24, vf10 + addiu gp, gp, 96 + vmadday.xyzw acc, vf25, vf10 + sw gp, 100(ra) + vmaddz.xyzw vf10, vf26, vf10 + sh s4, 88(ra) + vmulax.xyzw acc, vf24, vf11 + lbu s3, 110(ra) + vmadday.xyzw acc, vf25, vf11 + lhu gp, 120(ra) + vmaddz.xyzw vf11, vf26, vf11 + lbu s4, 114(ra) + beq r0, r0, L167 + sll r0, r0, 0 + +B54: +L166: + sll r0, r0, 0 + lh s4, 90(ra) + sll r0, r0, 0 + lw s5, 104(ra) + daddiu s4, s4, 1 + sqc2 vf30, 64(t8) + vmulax.xyzw acc, vf24, vf10 + addiu gp, gp, 96 + vmadday.xyzw acc, vf25, vf10 + sw gp, 104(ra) + vmaddz.xyzw vf10, vf26, vf10 + sh s4, 90(ra) + vmulax.xyzw acc, vf24, vf11 + lbu s3, 111(ra) + vmadday.xyzw acc, vf25, vf11 + lhu gp, 122(ra) + vmaddz.xyzw vf11, vf26, vf11 + lbu s4, 115(ra) +B55: +L167: + vmulax.xyzw acc, vf24, vf12 + dsll gp, gp, 4 + vmadday.xyzw acc, vf25, vf12 + daddu s4, s4, ra + vmaddz.xyzw vf12, vf26, vf12 + sll r0, r0, 0 + vmulax.xyzw acc, vf24, vf13 + sll r0, r0, 0 + vmadday.xyzw acc, vf25, vf13 + sll r0, r0, 0 + vmaddaz.xyzw acc, vf26, vf13 + sll r0, r0, 0 + vmaddw.xyzw vf13, vf27, vf0 + sll r0, r0, 0 + sqc2 vf10, 0(t8) + sll r0, r0, 0 + sqc2 vf11, 16(t8) + sll r0, r0, 0 + sqc2 vf12, 32(t8) + sll r0, r0, 0 + sqc2 vf13, 48(t8) + daddiu t8, t8, 96 + sll r0, r0, 0 + lw ra, 8(t4) + sll r0, r0, 0 + lbu s2, 144(s4) + addu s1, gp, ra + sw s5, 284(t0) + daddiu t9, t9, 3 + sw s1, 292(t0) + sll s1, s2, 4 + sh s2, 288(t0) + daddu gp, gp, s1 + lq s2, 272(t0) + daddiu s5, s5, 48 + lq s1, 288(t0) + daddiu t8, t8, 48 + lq s0, 304(t0) + daddiu s4, s4, 1 + sq s2, -48(t8) + daddiu s3, s3, -1 + sq s1, -32(t8) + blez s3, L172 + sq s0, -16(t8) + +B56: +L168: + daddiu s2, t9, -252 + sll r0, r0, 0 + blez s2, L171 + sll r0, r0, 0 + +B57: +L169: + lw t8, 0(a3) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi t8, t8, 256 + sll r0, r0, 0 + beq t8, r0, L170 + sll r0, r0, 0 + +B58: + sll r0, r0, 0 + lw t8, 440(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu t8, t8, 1 + sll r0, r0, 0 + sw t8, 440(t0) + beq r0, r0, L169 + sll r0, r0, 0 + +B59: +L170: + sw a1, 128(a3) + xori a1, a1, 12288 + sw v1, 16(a3) + sll t8, t9, 4 + addu v1, v1, t8 + or t8, a1, r0 + sw t9, 32(a3) + addiu t9, r0, 256 + sw t9, 0(a3) + addiu t9, r0, 0 +B60: +L171: + sll r0, r0, 0 + lbu s2, 144(s4) + addu s1, gp, ra + sw s5, 284(t0) + daddiu t9, t9, 3 + sw s1, 292(t0) + sll s1, s2, 4 + sh s2, 288(t0) + daddu gp, gp, s1 + lq s2, 272(t0) + daddiu s5, s5, 48 + lq s1, 288(t0) + daddiu t8, t8, 48 + lq s0, 304(t0) + daddiu s4, s4, 1 + sq s2, -48(t8) + daddiu s3, s3, -1 + sq s1, -32(t8) + bgtz s3, L168 + sq s0, -16(t8) + +B61: +# early exit for 1 in a per-8 +L172: + addiu a2, a2, -1 # decrement instance count + srl t7, t7, 1 # update vis mask + daddiu t4, t4, 64 # update instance ptr + sll r0, r0, 0 + bne t7, r0, L148 # reloop, if we've got any left in the group of 8. + lqc2 vf2, 16(t4) # load the bsphere. + +# early exit for per-8 +B62: +L173: + sll r0, r0, 0 + lw ra, 412(t0) + sll r0, r0, 0 + lw t7, 416(t0) + bne ra, t7, L146 + sll r0, r0, 0 + +B63: + bgtz a2, L140 + sll r0, r0, 0 + +B64: + beq t9, r0, L176 + sll r0, r0, 0 + +B65: +L174: + lw a0, 0(a3) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi a0, a0, 256 + sll r0, r0, 0 + beq a0, r0, L175 + sll r0, r0, 0 + +B66: + sll r0, r0, 0 + lw a0, 440(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu a0, a0, 1 + sll r0, r0, 0 + sw a0, 440(t0) + beq r0, r0, L174 + sll r0, r0, 0 + +B67: +L175: + sw a1, 128(a3) + xori a0, a1, 12288 + sw v1, 16(a3) + sll a1, t9, 4 + addu v1, v1, a1 + or a0, a0, r0 + sw t9, 32(a3) + addiu a0, r0, 256 + sw a0, 0(a3) + addiu a0, r0, 0 +B68: +L176: + lw a0, 0(a3) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi a0, a0, 256 + sll r0, r0, 0 + beq a0, r0, L177 + sll r0, r0, 0 + +B69: + sll r0, r0, 0 + lw a0, 440(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu a0, a0, 1 + sll r0, r0, 0 + sw a0, 440(t0) + beq r0, r0, L176 + sll r0, r0, 0 + +# Final exit +B70: +L177: + lw a0, 396(t0) + sll r0, r0, 0 + sw v1, 4(a0) + sll r0, r0, 0 + or v0, r0, r0 + ld ra, 0(sp) + lq gp, 112(sp) + lq s5, 96(sp) + lq s4, 80(sp) + lq s3, 64(sp) + lq s2, 48(sp) + lq s1, 32(sp) + lq s0, 16(sp) + jr ra + daddiu sp, sp, 128 + + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; .function draw-inline-array-prototype-tie-asm +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;BAD PROLOGUE +;; Warnings: +;; INFO: Flagged as asm by config +;; INFO: Assembly Function + +# runs after the instance drawing. +# a0 - dma buffer +# a1 - num prototypes +# a2 - prototype array (a prototype-array-tie) + +# t0 = work +# t1 = SPR from DMA bank +# t3 = SPR ptr to input buffer +# + + +B0: +L84: + daddiu sp, sp, -112 + sd ra, 0(sp) + sq s1, 16(sp) + sq s2, 32(sp) + sq s3, 48(sp) + sq s4, 64(sp) + sq s5, 80(sp) + sq gp, 96(sp) + sll r0, r0, 0 + + # SETUP + lui a3, 28672 # = 0x7000 + lw v1, 4(a0) # = dma buf base + lui t1, 4096 # = 0x1000 + lui t2, 4096 # = 0x1000 + + sync.l + cache dxwbin v1, 0 + sync.l + cache dxwbin v1, 1 + sync.l + + lw t0, *prototype-tie-work*(s7) + ori t1, t1, 53248 # spr FROM + ori t4, t2, 54272 # spr TO + ori t3, a3, 16 # setup spr input buffer pointer + ori t2, a3, 2064 # spr output buffer pointer + sw a0, 10260(a3) # stash dma buffer in spad (dma-buffer of prototype-tie-dma) + daddiu t7, a1, -1 # not sure, is the input data length off by one or something? + sll r0, r0, 0 + lw t5, 12(a2) # t5 = prototype-bucket-tie + addiu a0, r0, 0 # a0 = 0 (offset into the scratchpad's output offset) + or a1, t2, r0 # a1 = spr output + + +B1: +L85: + sll r0, r0, 0 + lq t6, 60(t5) # load next's for the bucket (start of dma chains per geom, in 0, 1, 2, 3 order) + daddiu t8, a2, 4 # t8 = array pointer (array prottype-bucket-tie), load at +12. + sw t7, 10256(a3) # stash length in spad + dsrl32 a2, t6, 0 # a2 = 1's next + sw t8, 280(t0) # stash prototype-array + pcpyud t8, t6, t6 # [2,3] + lw t7, 140(t5) # tie colors + or t8, a2, t8 # t8 = any next's nonzero? + lw a2, 108(t5) # a2 = frag counts array + beq t8, r0, L99 # skip ahead if 0 prototypes drawn. + lw t8, 4(a3) # load mood off of the terrain-context (spr) + +B2: + sll r0, r0, 0 + lq t5, 12(t5) # load geom's from bucket + sll r0, r0, 0 + sq t6, 10272(a3) # stash next's on spad + sll r0, r0, 0 + sw a2, 10304(a3) # stash frag counts on spad + sll r0, r0, 0 + sq t5, 10288(a3) # stash geom's on spad + sll r0, r0, 0 + ld a2, 272(t0) # clamp constant: #x0080'00ff'00ff'00ff (for time of day color interp) + sll r0, r0, 0 + lw t6, 4(t7) # t6 = time of day palette's height + daddiu ra, t7, 12 # ra = time of day palette's data + lq t5, 1852(t8) # t5 = itimes0 from mood + sra t7, t6, 2 # t7 = width / 4 + sll r0, r0, 0 + addu t7, t7, a0 # some spad out buffer ptr, or something. + addiu t9, r0, 221 # t9 = 221 (I think the max number of qws we can safely have used after colors) + dsubu t7, t9, t7 + sll r0, r0, 0 + bgez t7, L88 # see if we have too much crap in the DMA output buffer + sll r0, r0, 0 + +# dma output full, copy from scratchpad. +B3: +L86: + lw a1, 0(t1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi a1, a1, 256 + sll r0, r0, 0 + beq a1, r0, L87 + sll r0, r0, 0 + +B4: + sll r0, r0, 0 + lw a1, 292(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu a1, a1, 1 + sll r0, r0, 0 + sw a1, 292(t0) + beq r0, r0, L86 + sll r0, r0, 0 + +B5: +L87: + sw t2, 128(t1) + xori t2, t2, 4096 + sw v1, 16(t1) + sll a1, a0, 4 + addu v1, v1, a1 + or a1, t2, r0 + sw a0, 32(t1) + addiu a0, r0, 256 + sw a0, 0(t1) + addiu a0, r0, 0 + +# here it is safe to output colors to the current scratchpad +# output buffer. +B6: +L88: + addiu t7, t6, 31 # width + 31 + lq t6, 0(t0) # upload palette 0 + sra t7, t7, 5 # (width + 31) / 5 + addiu a0, a0, 2 # 2 qw's, I guess for upload palette 0 and upload palette 1 + sll t9, t7, 5 # ((width + 31) / 5) * 5 + sq t6, 0(a1) # store upload palette 0 + sra t6, t9, 2 # aligned width / 4 + sb t9, 30(t0) # probably the viftag's unpack count, or something. + addu a0, a0, t6 # going to use up (aligned width / 4) qw's in our dma buffer. + sh t6, 16(t0) # also put this in the giftag for upload1 + sll r0, r0, 0 + lq t6, 1868(t8) # t6 = itimes1 + sll r0, r0, 0 + lq gp, 16(t0) # gp = upload1's tag + sll r0, r0, 0 + lq t7, 1884(t8) # t7 = itimes2 + sll r0, r0, 0 + sq gp, 16(a1) # store upload palette 1 + addiu a1, a1, 32 # advance output pointer (spr) + lq t8, 1900(t8) # t8 = itimes3 + +# begin color stuff +B7: +L89: + lw gp, 0(t4) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi gp, gp, 256 + sll r0, r0, 0 + beq gp, r0, L90 + sll r0, r0, 0 + +B8: + sll r0, r0, 0 + lw gp, 296(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu gp, gp, 1 + sll r0, r0, 0 + sw gp, 296(t0) + beq r0, r0, L89 + sll r0, r0, 0 + +B9: +L90: + sw ra, 16(t4) + daddiu t9, t9, -32 + sw t3, 128(t4) + addiu gp, r0, 64 + sw gp, 32(t4) + addiu gp, r0, 256 + sw gp, 0(t4) + daddiu ra, ra, 1024 +B10: +L91: + or s5, t3, r0 + xori t3, t3, 1024 + blez t9, L94 + daddiu t9, t9, -32 + +B11: +L92: + lw gp, 0(t4) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi gp, gp, 256 + sll r0, r0, 0 + beq gp, r0, L93 + sll r0, r0, 0 + +B12: + sll r0, r0, 0 + lw gp, 296(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu gp, gp, 1 + sll r0, r0, 0 + sw gp, 296(t0) + beq r0, r0, L92 + sll r0, r0, 0 + +B13: +L93: + sw ra, 16(t4) + sll r0, r0, 0 + sw t3, 128(t4) + addiu gp, r0, 64 + sw gp, 32(t4) + addiu gp, r0, 256 + sw gp, 0(t4) + daddiu ra, ra, 1024 + beq r0, r0, L95 + sll r0, r0, 0 + +B14: +L94: + lw gp, 0(t4) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi gp, gp, 256 + sll r0, r0, 0 + beq gp, r0, L95 + sll r0, r0, 0 + +B15: + sll r0, r0, 0 + lw gp, 296(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu gp, gp, 1 + sll r0, r0, 0 + sw gp, 296(t0) + beq r0, r0, L94 + sll r0, r0, 0 + +B16: +L95: + addiu gp, a1, 128 + lq s2, 12(s5) + sll r0, r0, 0 + lq s4, 28(s5) + pextlb s3, r0, s2 + mfc1 r0, f31 + pextub s2, r0, s2 + mfc1 r0, f31 + pmulth r0, s3, t5 + mfc1 r0, f31 + pextlb s3, r0, s4 + mfc1 r0, f31 + pmaddh r0, s2, t6 + mfc1 r0, f31 + pextub s4, r0, s4 + mfc1 r0, f31 + pmaddh r0, s3, t7 + lq s3, 44(s5) + addiu s5, s5, 32 + sll r0, r0, 0 + pmaddh r0, s4, t8 + lq s4, 28(s5) + pextlb s2, r0, s3 + mfc1 r0, f31 +B17: +L96: + pextub s3, r0, s3 + mfc1 r0, f31 + pmfhl.lh s1 + mfc1 r0, f31 + pmulth r0, s2, t5 + mfc1 r0, f31 + psrlh s2, s1, 6 + mfc1 r0, f31 + pcpyud s1, s2, s2 + mfc1 r0, f31 + paddh s2, s1, s2 + mfc1 r0, f31 + pminh s2, s2, a2 + mfc1 r0, f31 + ppacb s1, r0, s2 + mfc1 r0, f31 + pextlb s2, r0, s4 + mfc1 r0, f31 + pmaddh r0, s3, t6 + sw s1, 0(a1) + pextub s4, r0, s4 + mfc1 r0, f31 + pmaddh r0, s2, t7 + lq s3, 44(s5) + addiu s5, s5, 32 + addiu a1, a1, 4 + pmaddh r0, s4, t8 + lq s4, 28(s5) + bne a1, gp, L96 + pextlb s2, r0, s3 + +B18: + bgez t9, L91 + sll r0, r0, 0 + +# done with colors +B19: + sll r0, r0, 0 + lw a2, 10276(a3) # next1 + sll r0, r0, 0 + lw t6, 10292(a3) # geom1 + beq a2, r0, L97 + lbu t5, 10305(a3) # frag1 + +B20: + bgezal r0, L100 # call sub at L100 for adding the geom. + sll r0, r0, 0 + +B21: +L97: + sll r0, r0, 0 + lw a2, 10280(a3) + sll r0, r0, 0 + lw t6, 10296(a3) + beq a2, r0, L98 + lbu t5, 10306(a3) + +B22: + bgezal r0, L100 + sll r0, r0, 0 + +B23: +L98: + sll r0, r0, 0 + lw a2, 10284(a3) + sll r0, r0, 0 + lw t6, 10300(a3) + beq a2, r0, L99 + lbu t5, 10307(a3) + +B24: + bgezal r0, L100 + sll r0, r0, 0 + +# early exit if we didn't draw any of this protytpe +B25: +L99: + sll r0, r0, 0 + lw a2, 280(t0) + sll r0, r0, 0 + lw t6, 10256(a3) + sll r0, r0, 0 + lw t5, 12(a2) + bne t6, r0, L85 + daddiu t7, t6, -1 + +B26: + beq r0, r0, L105 + sll r0, r0, 0 + +# geom upload thing. +B27: +L100: + addiu t6, t6, 32 # offset of first frag within a prototype-tie + sll r0, r0, 0 +B28: +L101: + addiu t7, a0, 4 # looks like we're going to use 4 qw's of our buffer + addiu t8, r0, 255 # and we can go all the way to the end this time (last time we did 251) + dsubu t7, t8, t7 + lw t8, 0(t6) # t8 = gif ref + bgez t7, L104 + lhu t7, 30(t6) + +B29: +L102: + lw a1, 0(t1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi a1, a1, 256 + sll r0, r0, 0 + beq a1, r0, L103 + sll r0, r0, 0 + +B30: + sll r0, r0, 0 + lw a1, 292(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu a1, a1, 1 + sll r0, r0, 0 + sw a1, 292(t0) + beq r0, r0, L102 + sll r0, r0, 0 + +B31: +L103: + sw t2, 128(t1) + xori t2, t2, 4096 + sw v1, 16(t1) + sll a1, a0, 4 + addu v1, v1, a1 + or a1, t2, r0 + sw a0, 32(t1) + addiu a0, r0, 256 + sw a0, 0(t1) + addiu a0, r0, 0 +# got enough room, make the output for this frag: +# a2 = next, t6 = tie-fragment, t5 = frag count, +B32: +L104: + sll r0, r0, 0 + lw s5, 4(t6) # s5 = point ref + sll r0, r0, 0 + lhu s4, 28(t6) # s4 = tex-count + sll r0, r0, 0 + lhu gp, 32(t6) # gp = vertex count + sll r0, r0, 0 + sw t8, 36(t0) # store gif ref ptr in upload model 0 + sll r0, r0, 0 + sh s4, 32(t0) # store tex count in upload 0 + daddiu t9, s4, 16384 # what's this doing... something with the unpack. maybe an offset. + sb s4, 46(t0) # store tex count in upload 0's vif tag + dsll s4, s4, 4 # s4 now bytes + sw s5, 68(t0) # point ref in upload 2 + daddu t8, t8, s4 # advance gif ref + sh gp, 64(t0) # vertex count in upload 2 dma + dsll gp, gp, 1 # multiply by 2 (I guess upacks to 2x as big?) + sw a2, 84(t0) # store next in upload 3 + sll r0, r0, 0 + sb gp, 78(t0) # vertex count * 2 in upload 2 viftag unpack + sll r0, r0, 0 + sw t8, 52(t0) # ?? in upload 1 (more "gif" stuff) + sll r0, r0, 0 + sh t7, 48(t0) # upload 1 gets gif-count + dsll t7, t7, 2 # also unpacks + sh t9, 60(t0) # really not sure what this is here... + sll r0, r0, 0 + sb t7, 62(t0) # unpack count for the extra gif stuff. + sll r0, r0, 0 + lq t7, 32(t0) # t7 = upload0 + sll r0, r0, 0 + lq t8, 48(t0) # t8 = upload1 + sll r0, r0, 0 + lq t9, 64(t0) # t9 = upload2 + sll r0, r0, 0 + lq gp, 80(t0) # gp = upload3 + daddiu a0, a0, 4 + sq t7, 0(a1) + daddiu t5, t5, -1 + sq t8, 16(a1) + daddiu a2, a2, 48 + sq t9, 32(a1) + sq gp, 48(a1) + daddiu a1, a1, 64 + bgtz t5, L101 + daddiu t6, t6, 64 + +B33: + jr ra + sll r0, r0, 0 +# end of geom upload subroutine + +B34: +L105: + beq a0, r0, L108 + sll r0, r0, 0 + +B35: +L106: + lw a1, 0(t1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi a1, a1, 256 + sll r0, r0, 0 + beq a1, r0, L107 + sll r0, r0, 0 + +B36: + sll r0, r0, 0 + lw a1, 292(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu a1, a1, 1 + sll r0, r0, 0 + sw a1, 292(t0) + beq r0, r0, L106 + sll r0, r0, 0 + +B37: +L107: + sw t2, 128(t1) + sll r0, r0, 0 + sw v1, 16(t1) + sll a1, a0, 4 + addu v1, v1, a1 + sll r0, r0, 0 + sw a0, 32(t1) + addiu a0, r0, 256 + sw a0, 0(t1) + sll r0, r0, 0 +B38: +L108: + lw a0, 0(t1) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + andi a0, a0, 256 + sll r0, r0, 0 + beq a0, r0, L109 + sll r0, r0, 0 + +B39: + sll r0, r0, 0 + lw a0, 292(t0) + sll r0, r0, 0 + sll r0, r0, 0 + sll r0, r0, 0 + daddiu a0, a0, 1 + sll r0, r0, 0 + sw a0, 292(t0) + beq r0, r0, L108 + sll r0, r0, 0 + +B40: +L109: + lw a0, 10260(a3) + sll r0, r0, 0 + sw v1, 4(a0) + sll r0, r0, 0 + or v0, r0, r0 + ld ra, 0(sp) + lq gp, 96(sp) + lq s5, 80(sp) + lq s4, 64(sp) + lq s3, 48(sp) + lq s2, 32(sp) + lq s1, 16(sp) + jr ra + daddiu sp, sp, 112 + + sll r0, r0, 0 + sll r0, r0, 0 \ No newline at end of file diff --git a/docs/scratch/tie_format.txt b/docs/scratch/tie_format.txt new file mode 100644 index 0000000000..2cf9f48584 --- /dev/null +++ b/docs/scratch/tie_format.txt @@ -0,0 +1,63 @@ +TIE Format Document +-------------------- + + +TIE Instances +---------------- + +The instances are arranged into a BVH like with tfrag. +The visibility bit strings work exactly the same way too. + +The "origin" matrix stores 16-bits per entry. +The final row is set with (x << 16) >> 10 and vitof0 +The other rows are set with (x << 16) >> 16 and vitof12 +row0's w is set to 0 manually. + + +There are 64 winds. The wind index of an instance is (wind-index + wind-time) & 0b111111. The wind time is stored in *wind-work*. +This indexes into the wind-vectors in the proxy-prototype-array-tie of the drawable-tree-instance-tie. +(and also the wind-vectors of *wind-work*, but this isn't known yet) + + +The flags mean: +0b01 : Do not generate instance DMA, in all conditions. Likely rare because it's checked pretty late. +0b10 : Use the GENERIC renderer. + +DMA in work: + work.upload-color-0.addr = output_ptr + work.generic-color-0.addr = output_ptr + + +calculation of clipped w: + multiply bsphere by camera matrix + add hvdf_offset.w to w + clamp to within (hmge-d.x hmdge-d.y) + +calculation of total camera matrix: +vf10+ is origin, vf20+ is the SHRUB MATRIX!!! + +vmulax.xyzw acc, vf20, vf10 +vmadday.xyzw acc, vf21, vf10 +vmaddz.xyzw vf10, vf22, vf10 +vmulax.xyzw acc, vf20, vf11 +vmadday.xyzw acc, vf21, vf11 +vmaddz.xyzw vf11, vf22, vf11 +vmulax.xyzw acc, vf20, vf12 +vmadday.xyzw acc, vf21, vf12 +vmaddz.xyzw vf12, vf22, vf12 +vmulax.xyzw acc, vf20, vf13 +vmadday.xyzw acc, vf21, vf13 +vmaddaz.xyzw acc, vf22, vf13 +vmaddw.xyzw vf13, vf23, vf0 + +Out data (6 qw = 96 bytes): + 0-64 : matrix. + 64 : morph constants + 80 : [flags & 2, clipping/dists, 0, clipped_w] + +Next are 3x "color" tags. +The final one will link to the "next" instance in the proto bucket +For the final instance in the bucket, it will be a ret. + +96: color0 + diff --git a/game/CMakeLists.txt b/game/CMakeLists.txt index 3e3da2c27e..21277bc446 100644 --- a/game/CMakeLists.txt +++ b/game/CMakeLists.txt @@ -19,7 +19,7 @@ if(UNIX) -Woverloaded-virtual \ -Wredundant-decls \ -Wshadow \ - -Wsign-promo") + -Wsign-promo -O3 -march=haswell") else() set(CMAKE_CXX_FLAGS "/EHsc") endif(UNIX) @@ -99,8 +99,10 @@ set(RUNTIME_SOURCE graphics/opengl_renderer/tfrag/BufferedRenderer.cpp graphics/opengl_renderer/tfrag/program6_cpu.cpp graphics/opengl_renderer/tfrag/Tfrag3.cpp + graphics/opengl_renderer/tfrag/tfrag_common.cpp graphics/opengl_renderer/tfrag/tfrag_unpack.cpp graphics/opengl_renderer/tfrag/TFragment.cpp + graphics/opengl_renderer/tfrag/Tie3.cpp graphics/texture/TextureConverter.cpp graphics/texture/TexturePool.cpp graphics/pipelines/opengl.cpp @@ -121,4 +123,3 @@ endif() add_executable(gk main.cpp) target_link_libraries(gk runtime) -install(TARGETS gk) diff --git a/game/graphics/opengl_renderer/BucketRenderer.h b/game/graphics/opengl_renderer/BucketRenderer.h index 44a8142277..45d27bd918 100644 --- a/game/graphics/opengl_renderer/BucketRenderer.h +++ b/game/graphics/opengl_renderer/BucketRenderer.h @@ -17,8 +17,10 @@ enum class BucketId { SKY_DRAW = 3, TFRAG_TEX_LEVEL0 = 5, TFRAG_LEVEL0 = 6, + TIE_LEVEL0 = 9, TFRAG_TEX_LEVEL1 = 12, TFRAG_LEVEL1 = 13, + TIE_LEVEL1 = 16, SHRUB_TEX_LEVEL0 = 19, SHRUB_TEX_LEVEL1 = 25, ALPHA_TEX_LEVEL0 = 31, diff --git a/game/graphics/opengl_renderer/Loader.cpp b/game/graphics/opengl_renderer/Loader.cpp index 58fa534e65..a0d9adfe83 100644 --- a/game/graphics/opengl_renderer/Loader.cpp +++ b/game/graphics/opengl_renderer/Loader.cpp @@ -26,7 +26,8 @@ tfrag3::Level* Loader::get_tfrag3_level(const std::string& level_name) { Serializer ser(data.data(), data.size()); result.serialize(ser); double import_time = import_timer.getSeconds(); - fmt::print("Load from file: {:.3f}s, import {:.3f}s\n", disk_load_time, import_time); + fmt::print("------------> Load from file: {:.3f}s, import {:.3f}s\n", disk_load_time, + import_time); return &result; } else { return &existing->second; diff --git a/game/graphics/opengl_renderer/OpenGLRenderer.cpp b/game/graphics/opengl_renderer/OpenGLRenderer.cpp index 7191b33c42..8cd1a1d066 100644 --- a/game/graphics/opengl_renderer/OpenGLRenderer.cpp +++ b/game/graphics/opengl_renderer/OpenGLRenderer.cpp @@ -9,6 +9,7 @@ #include "common/util/FileUtil.h" #include "game/graphics/opengl_renderer/SkyRenderer.h" #include "game/graphics/opengl_renderer/tfrag/TFragment.h" +#include "game/graphics/opengl_renderer/tfrag/Tie3.h" // for the vif callback #include "game/kernel/kmachine.h" @@ -73,8 +74,10 @@ void OpenGLRenderer::init_bucket_renderers() { init_bucket_renderer("tfrag-tex-0", BucketId::TFRAG_TEX_LEVEL0); init_bucket_renderer("tfrag-0", BucketId::TFRAG_LEVEL0, normal_tfrags, false); + init_bucket_renderer("tie-0", BucketId::TIE_LEVEL0); init_bucket_renderer("tfrag-tex-1", BucketId::TFRAG_TEX_LEVEL1); init_bucket_renderer("tfrag-1", BucketId::TFRAG_LEVEL1, normal_tfrags, false); + init_bucket_renderer("tie-1", BucketId::TIE_LEVEL1); init_bucket_renderer("shrub-tex-0", BucketId::SHRUB_TEX_LEVEL0); init_bucket_renderer("shrub-tex-1", BucketId::SHRUB_TEX_LEVEL1); init_bucket_renderer("alpha-tex-0", BucketId::ALPHA_TEX_LEVEL0); diff --git a/game/graphics/opengl_renderer/shaders/tfrag3.frag b/game/graphics/opengl_renderer/shaders/tfrag3.frag index acf564addb..aebae7884c 100644 --- a/game/graphics/opengl_renderer/shaders/tfrag3.frag +++ b/game/graphics/opengl_renderer/shaders/tfrag3.frag @@ -14,7 +14,7 @@ void main() { vec4 T0 = texture(tex_T0, tex_coord.xy / tex_coord.z); color = fragment_color * T0 * 2.0; - if (color.a <= alpha_min) { + if (color.a < alpha_min) { discard; } diff --git a/game/graphics/opengl_renderer/shaders/tfrag3_decal.frag b/game/graphics/opengl_renderer/shaders/tfrag3_decal.frag new file mode 100644 index 0000000000..e69de29bb2 diff --git a/game/graphics/opengl_renderer/shaders/tfrag3_decal.vert b/game/graphics/opengl_renderer/shaders/tfrag3_decal.vert new file mode 100644 index 0000000000..e69de29bb2 diff --git a/game/graphics/opengl_renderer/tfrag/TFragment.cpp b/game/graphics/opengl_renderer/tfrag/TFragment.cpp index 5beb74ae22..856dcecfd9 100644 --- a/game/graphics/opengl_renderer/tfrag/TFragment.cpp +++ b/game/graphics/opengl_renderer/tfrag/TFragment.cpp @@ -101,8 +101,8 @@ void TFragment::render(DmaFollower& dma, } assert(!level_name.empty()); - m_tfrag3.setup_for_level(level_name, render_state); - Tfrag3::RenderSettings settings; + m_tfrag3.setup_for_level(m_tree_kinds, level_name, render_state); + TfragRenderSettings settings; settings.hvdf_offset = m_tfrag_data.hvdf_offset; settings.fog_x = m_tfrag_data.fog.x(); memcpy(settings.math_camera.data(), &m_buffered_data[0].pad[TFragDataMem::TFragMatrix0 * 16], @@ -178,10 +178,22 @@ void TFragment::render(DmaFollower& dma, } if (m_hack_test_many_levels) { + std::vector all_kinds = { + tfrag3::TFragmentTreeKind::NORMAL, tfrag3::TFragmentTreeKind::TRANS, + tfrag3::TFragmentTreeKind::DIRT, tfrag3::TFragmentTreeKind::ICE, + tfrag3::TFragmentTreeKind::LOWRES, tfrag3::TFragmentTreeKind::LOWRES_TRANS}; for (int i = 0; i < HackManyLevels::NUM_LEVELS; i++) { if (m_many_level_render.level_enables[i]) { - m_many_level_render.level_renderers[i].setup_for_level(level_names[i], render_state); - Tfrag3::RenderSettings settings; + if (!m_many_level_render.tfrag_level_renderers[i]) { + m_many_level_render.tfrag_level_renderers[i] = std::make_unique(); + } + if (!m_many_level_render.tie_level_renderers[i]) { + m_many_level_render.tie_level_renderers[i] = std::make_unique("tie", m_my_id); + } + m_many_level_render.tfrag_level_renderers[i]->setup_for_level(all_kinds, level_names[i], + render_state); + m_many_level_render.tie_level_renderers[i]->setup_for_level(level_names[i], render_state); + TfragRenderSettings settings; settings.hvdf_offset = m_tfrag_data.hvdf_offset; settings.fog_x = m_tfrag_data.fog.x(); memcpy(settings.math_camera.data(), @@ -193,12 +205,15 @@ void TFragment::render(DmaFollower& dma, auto t3prof = prof.make_scoped_child(level_names[i]); - m_many_level_render.level_renderers[i].debug_render_all_trees_nolores(settings, - render_state, t3prof); + m_many_level_render.tfrag_level_renderers[i]->debug_render_all_trees_nolores( + settings, render_state, t3prof); + m_many_level_render.tie_level_renderers[i]->render_all_trees(settings, render_state, + t3prof); } } } } + void TFragment::draw_debug_window() { ImGui::Separator(); ImGui::Checkbox("Extra Debug", &m_extra_debug); @@ -307,8 +322,8 @@ void TFragment::handle_initialization(DmaFollower& dma, m_globals.vf04_ambient = m_tfrag_data.ambient; // TODO get rid? auto pc_port_data = dma.read_and_advance(); - assert(pc_port_data.size_bytes == sizeof(PcPortData)); - memcpy(&m_pc_port_data, pc_port_data.data, sizeof(PcPortData)); + assert(pc_port_data.size_bytes == sizeof(TfragPcPortData)); + memcpy(&m_pc_port_data, pc_port_data.data, sizeof(TfragPcPortData)); m_pc_port_data.level_name[11] = '\0'; for (int i = 0; i < 4; i++) { diff --git a/game/graphics/opengl_renderer/tfrag/TFragment.h b/game/graphics/opengl_renderer/tfrag/TFragment.h index 6b6bdb6a04..7ebb65e34c 100644 --- a/game/graphics/opengl_renderer/tfrag/TFragment.h +++ b/game/graphics/opengl_renderer/tfrag/TFragment.h @@ -212,12 +212,7 @@ class TFragment : public BucketRenderer { TFragData m_tfrag_data; TFragKickZone m_kick_data; - struct PcPortData { - Vector4f planes[4]; - math::Vector itimes[4]; - char level_name[12]; - u32 tree_idx; - } m_pc_port_data; + TfragPcPortData m_pc_port_data; // buffers TFragBufferedData m_buffered_data[2]; @@ -301,7 +296,8 @@ class TFragment : public BucketRenderer { struct HackManyLevels { static constexpr int NUM_LEVELS = 23; - Tfrag3 level_renderers[NUM_LEVELS]; + std::unique_ptr tfrag_level_renderers[NUM_LEVELS]; + std::unique_ptr tie_level_renderers[NUM_LEVELS]; bool level_enables[NUM_LEVELS] = {0}; } m_many_level_render; }; diff --git a/game/graphics/opengl_renderer/tfrag/Tfrag3.cpp b/game/graphics/opengl_renderer/tfrag/Tfrag3.cpp index 26fecd5bbc..4aa147e837 100644 --- a/game/graphics/opengl_renderer/tfrag/Tfrag3.cpp +++ b/game/graphics/opengl_renderer/tfrag/Tfrag3.cpp @@ -34,43 +34,49 @@ Tfrag3::~Tfrag3() { glDeleteVertexArrays(1, &m_debug_vao); } -void Tfrag3::setup_for_level(const std::string& level, SharedRenderState* render_state) { +void Tfrag3::setup_for_level(const std::vector& tree_kinds, + const std::string& level, + SharedRenderState* render_state) { // make sure we have the level data. auto lev_data = render_state->loader.get_tfrag3_level(level); if (m_level_name != level) { + Timer tfrag3_setup_timer; fmt::print("new level for tfrag3: {} -> {}\n", m_level_name, level); fmt::print("discarding old stuff\n"); discard_tree_cache(); - fmt::print("level has {} trees\n", lev_data->trees.size()); - m_cached_trees.resize(lev_data->trees.size()); + fmt::print("level has {} trees\n", lev_data->tfrag_trees.size()); + m_cached_trees.clear(); size_t idx_buffer_len = 0; size_t time_of_day_count = 0; + size_t vis_temp_len = 0; + size_t max_draw = 0; - for (size_t tree_idx = 0; tree_idx < lev_data->trees.size(); tree_idx++) { - const auto& tree = lev_data->trees[tree_idx]; - m_cached_trees[tree_idx].kind = tree.kind; - if (tree.kind != tfrag3::TFragmentTreeKind::INVALID) { + for (size_t tree_idx = 0; tree_idx < lev_data->tfrag_trees.size(); tree_idx++) { + const auto& tree = lev_data->tfrag_trees[tree_idx]; + m_cached_trees.emplace_back(); + auto& tree_cache = m_cached_trees.back(); + + tree_cache.kind = tree.kind; + if (std::find(tree_kinds.begin(), tree_kinds.end(), tree.kind) != tree_kinds.end()) { + max_draw = std::max(tree.draws.size(), max_draw); for (auto& draw : tree.draws) { - idx_buffer_len = std::max(idx_buffer_len, draw.vertex_index_stream.size()); + idx_buffer_len += draw.vertex_index_stream.size(); } time_of_day_count = std::max(tree.colors.size(), time_of_day_count); u32 verts = tree.vertices.size(); fmt::print(" tree {} has {} verts ({} kB) and {} draws\n", tree_idx, verts, verts * sizeof(tfrag3::PreloadedVertex) / 1024.f, tree.draws.size()); - glGenVertexArrays(1, &m_cached_trees[tree_idx].vao); - glBindVertexArray(m_cached_trees[tree_idx].vao); - glGenBuffers(1, &m_cached_trees[tree_idx].vertex_buffer); - m_cached_trees[tree_idx].vert_count = verts; - m_cached_trees[tree_idx].draws = &tree.draws; // todo - should we just copy this? - m_cached_trees[tree_idx].colors = &tree.colors; - m_cached_trees[tree_idx].vis = &tree.vis_nodes; - // don't bother with vis if we only have children. - m_cached_trees[tree_idx].num_vis_tree_roots = tree.only_children ? 0 : tree.num_roots; - m_cached_trees[tree_idx].vis_tree_root = tree.first_root; - m_cached_trees[tree_idx].vis_temp.resize(tree.vis_nodes.size()); - m_cached_trees[tree_idx].culled_indices.resize(idx_buffer_len); - glBindBuffer(GL_ARRAY_BUFFER, m_cached_trees[tree_idx].vertex_buffer); + glGenVertexArrays(1, &tree_cache.vao); + glBindVertexArray(tree_cache.vao); + glGenBuffers(1, &tree_cache.vertex_buffer); + tree_cache.vert_count = verts; + tree_cache.draws = &tree.draws; // todo - should we just copy this? + tree_cache.colors = &tree.colors; + tree_cache.vis = &tree.bvh; + tree_cache.tod_cache = swizzle_time_of_day(tree.colors); + vis_temp_len = std::max(vis_temp_len, tree.bvh.vis_nodes.size()); + glBindBuffer(GL_ARRAY_BUFFER, tree_cache.vertex_buffer); glBufferData(GL_ARRAY_BUFFER, verts * sizeof(tfrag3::PreloadedVertex), nullptr, GL_DYNAMIC_DRAW); glEnableVertexAttribArray(0); @@ -107,6 +113,11 @@ void Tfrag3::setup_for_level(const std::string& level, SharedRenderState* render } } + fmt::print("TFRAG temporary vis output size: {}\n", vis_temp_len); + m_cache.vis_temp.resize(vis_temp_len); + fmt::print("TFRAG max draws/tree: {}\n", max_draw); + m_cache.draw_idx_temp.resize(max_draw); + fmt::print("level has {} textures\n", lev_data->textures.size()); for (auto& tex : lev_data->textures) { GLuint gl_tex; @@ -127,6 +138,7 @@ void Tfrag3::setup_for_level(const std::string& level, SharedRenderState* render } fmt::print("level max index stream: {}\n", idx_buffer_len); + m_cache.index_list.resize(idx_buffer_len); m_has_index_buffer = true; glGenBuffers(1, &m_index_buffer); glActiveTexture(GL_TEXTURE1); @@ -149,155 +161,30 @@ void Tfrag3::setup_for_level(const std::string& level, SharedRenderState* render glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); m_level_name = level; + fmt::print("TFRAG3 setup: {:.3f}\n", tfrag3_setup_timer.getSeconds()); } } -void Tfrag3::first_draw_setup(const RenderSettings& settings, SharedRenderState* render_state) { - render_state->shaders[ShaderId::TFRAG3].activate(); - glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "tex_T0"), 0); - glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "tex_T1"), 1); - glUniformMatrix4fv(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "camera"), - 1, GL_FALSE, settings.math_camera.data()); - glUniform4f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "hvdf_offset"), - settings.hvdf_offset[0], settings.hvdf_offset[1], settings.hvdf_offset[2], - settings.hvdf_offset[3]); - glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "fog_constant"), - settings.fog_x); -} - -Tfrag3::DoubleDraw Tfrag3::setup_shader(const RenderSettings& /*settings*/, - SharedRenderState* render_state, - DrawMode mode) { - glActiveTexture(GL_TEXTURE0); - - if (mode.get_zt_enable()) { - glEnable(GL_DEPTH_TEST); - switch (mode.get_depth_test()) { - case GsTest::ZTest::NEVER: - glDepthFunc(GL_NEVER); - break; - case GsTest::ZTest::ALWAYS: - glDepthFunc(GL_ALWAYS); - break; - case GsTest::ZTest::GEQUAL: - glDepthFunc(GL_GEQUAL); - break; - case GsTest::ZTest::GREATER: - glDepthFunc(GL_GREATER); - break; - default: - assert(false); - } - } else { - glDisable(GL_DEPTH_TEST); - } - - if (mode.get_ab_enable() && mode.get_alpha_blend() != DrawMode::AlphaBlend::DISABLED) { - glEnable(GL_BLEND); - switch (mode.get_alpha_blend()) { - case DrawMode::AlphaBlend::SRC_DST_SRC_DST: - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - break; - case DrawMode::AlphaBlend::SRC_0_SRC_DST: - glBlendFunc(GL_SRC_ALPHA, GL_ONE); - break; - case DrawMode::AlphaBlend::SRC_0_FIX_DST: - glBlendFunc(GL_ONE, GL_ONE); - break; - default: - assert(false); - } - } else { - glDisable(GL_BLEND); - } - - if (mode.get_clamp_s_enable()) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - } else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); - } - - if (mode.get_clamp_t_enable()) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - } else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); - } - - if (mode.get_filt_enable()) { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - } else { - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - } - - // for some reason, they set atest NEVER + FB_ONLY to disable depth writes - bool alpha_hack_to_disable_z_write = false; - DoubleDraw double_draw; - - float alpha_min = 0.; - if (mode.get_at_enable()) { - switch (mode.get_alpha_test()) { - case DrawMode::AlphaTest::ALWAYS: - break; - case DrawMode::AlphaTest::GEQUAL: - alpha_min = mode.get_aref() / 127.f; - switch (mode.get_alpha_fail()) { - case GsTest::AlphaFail::KEEP: - // ok, no need for double draw - break; - case GsTest::AlphaFail::FB_ONLY: - // darn, we need to draw twice - double_draw.kind = DoubleDrawKind::AFAIL_NO_DEPTH_WRITE; - double_draw.aref = alpha_min; - break; - default: - assert(false); - } - break; - case DrawMode::AlphaTest::NEVER: - if (mode.get_alpha_fail() == GsTest::AlphaFail::FB_ONLY) { - alpha_hack_to_disable_z_write = true; - } else { - assert(false); - } - break; - default: - assert(false); - } - } - - if (mode.get_depth_write_enable()) { - glDepthMask(GL_TRUE); - } else { - glDepthMask(GL_FALSE); - } - - glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_min"), - alpha_min); - glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_max"), - 10.f); - - return double_draw; -} - -void Tfrag3::render_tree(const RenderSettings& settings, +void Tfrag3::render_tree(const TfragRenderSettings& settings, SharedRenderState* render_state, - ScopedProfilerNode& prof, - bool use_vis) { + ScopedProfilerNode& prof) { auto& tree = m_cached_trees.at(settings.tree_idx); assert(tree.kind != tfrag3::TFragmentTreeKind::INVALID); if (m_color_result.size() < tree.colors->size()) { m_color_result.resize(tree.colors->size()); } - interp_time_of_day_slow(settings.time_of_day_weights, *tree.colors, m_color_result.data()); + if (m_use_fast_time_of_day) { + interp_time_of_day_fast(settings.time_of_day_weights, tree.tod_cache, m_color_result.data()); + } else { + interp_time_of_day_slow(settings.time_of_day_weights, *tree.colors, m_color_result.data()); + } glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture); glTexSubImage1D(GL_TEXTURE_1D, 0, 0, tree.colors->size(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, m_color_result.data()); - first_draw_setup(settings, render_state); + first_tfrag_draw_setup(settings, render_state); glBindVertexArray(tree.vao); glBindBuffer(GL_ARRAY_BUFFER, tree.vertex_buffer); @@ -306,41 +193,33 @@ void Tfrag3::render_tree(const RenderSettings& settings, glEnable(GL_PRIMITIVE_RESTART); glPrimitiveRestartIndex(UINT32_MAX); - for (const auto& draw : *tree.draws) { - glBindTexture(GL_TEXTURE_2D, m_textures.at(draw.tree_tex_id)); - auto double_draw = setup_shader(settings, render_state, draw.mode); - tree.tris_this_frame += draw.num_triangles; - tree.draws_this_frame++; - int draw_size = draw.vertex_index_stream.size(); - if (use_vis) { - int vtx_idx = 0; - int out_idx = 0; - for (auto& grp : draw.vis_groups) { - if (grp.tfrag_idx == 0xffffffff || tree.vis_temp.at(grp.tfrag_idx)) { - memcpy(&tree.culled_indices[out_idx], &draw.vertex_index_stream[vtx_idx], - grp.num * sizeof(u32)); - out_idx += grp.num; - } + cull_check_all_slow(settings.planes, tree.vis->vis_nodes, m_cache.vis_temp.data()); - vtx_idx += grp.num; - } + int idx_buffer_ptr = make_index_list_from_vis_string( + m_cache.draw_idx_temp.data(), m_cache.index_list.data(), *tree.draws, m_cache.vis_temp); - draw_size = out_idx; - if (draw_size == 0) { - continue; - } + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32), + m_cache.index_list.data()); - prof.add_draw_call(); - prof.add_tri(draw.num_triangles * (float)out_idx / draw.vertex_index_stream.size()); + for (size_t draw_idx = 0; draw_idx < tree.draws->size(); draw_idx++) { + const auto& draw = tree.draws->operator[](draw_idx); + const auto& indices = m_cache.draw_idx_temp[draw_idx]; - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, out_idx * sizeof(u32), - tree.culled_indices.data()); - } else { - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, draw.vertex_index_stream.size() * sizeof(u32), - draw.vertex_index_stream.data()); + if (indices.second <= indices.first) { + continue; } - glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)0); + glBindTexture(GL_TEXTURE_2D, m_textures.at(draw.tree_tex_id)); + auto double_draw = setup_tfrag_shader(settings, render_state, draw.mode); + tree.tris_this_frame += draw.num_triangles; + tree.draws_this_frame++; + int draw_size = indices.second - indices.first; + void* offset = (void*)(indices.first * sizeof(u32)); + + prof.add_draw_call(); + prof.add_tri(draw.num_triangles * (float)draw_size / draw.vertex_index_stream.size()); + + glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset); switch (double_draw.kind) { case DoubleDrawKind::NONE: @@ -353,7 +232,7 @@ void Tfrag3::render_tree(const RenderSettings& settings, glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_max"), double_draw.aref); glDepthMask(GL_FALSE); - glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)0); + glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset); break; default: assert(false); @@ -362,63 +241,28 @@ void Tfrag3::render_tree(const RenderSettings& settings, glBindVertexArray(0); } -bool sphere_in_view_ref(const math::Vector4f& sphere, const math::Vector4f* planes) { - /* - *(let ((v1-0 *math-camera*)) - (.lvf vf6 (&-> arg0 quad)) - (.lvf vf1 (&-> v1-0 plane 0 quad)) - (.lvf vf2 (&-> v1-0 plane 1 quad)) - (.lvf vf3 (&-> v1-0 plane 2 quad)) - (.lvf vf4 (&-> v1-0 plane 3 quad)) - ) - (.mul.x.vf acc vf1 vf6) - (.add.mul.y.vf acc vf2 vf6 acc) - (.add.mul.z.vf acc vf3 vf6 acc) - (.sub.mul.w.vf vf5 vf4 vf0 acc) - (.add.w.vf vf5 vf5 vf6) - (.mov v1-1 vf5) - (.pcgtw v1-2 r0-0 v1-1) - (.ppach v1-3 r0-0 v1-2) - (zero? (the-as int v1-3)) - */ - - math::Vector4f acc = - planes[0] * sphere.x() + planes[1] * sphere.y() + planes[2] * sphere.z() - planes[3]; - - return acc.x() > -sphere.w() && acc.y() > -sphere.w() && acc.z() > -sphere.w() && - acc.w() > -sphere.w(); -} - -void cull_ref_all(const math::Vector4f* planes, - const std::vector& nodes, - u8* out) { - for (size_t i = 0; i < nodes.size(); i++) { - out[i] = sphere_in_view_ref(nodes[i].bsphere, planes); - } -} - /*! * Render all trees with settings for the given tree. * This is intended to be used only for debugging when we can't easily get commands for all trees * working. */ -void Tfrag3::render_all_trees(const RenderSettings& settings, +void Tfrag3::render_all_trees(const TfragRenderSettings& settings, SharedRenderState* render_state, ScopedProfilerNode& prof) { - RenderSettings settings_copy = settings; + TfragRenderSettings settings_copy = settings; for (size_t i = 0; i < m_cached_trees.size(); i++) { if (m_cached_trees[i].kind != tfrag3::TFragmentTreeKind::INVALID) { settings_copy.tree_idx = i; - render_tree(settings_copy, render_state, prof, false); + render_tree(settings_copy, render_state, prof); } } } void Tfrag3::render_matching_trees(const std::vector& trees, - const RenderSettings& settings, + const TfragRenderSettings& settings, SharedRenderState* render_state, ScopedProfilerNode& prof) { - RenderSettings settings_copy = settings; + TfragRenderSettings settings_copy = settings; for (size_t i = 0; i < m_cached_trees.size(); i++) { m_cached_trees[i].reset_stats(); if (!m_cached_trees[i].allowed) { @@ -428,8 +272,7 @@ void Tfrag3::render_matching_trees(const std::vector& m_cached_trees[i].forced) { m_cached_trees[i].rendered_this_frame = true; settings_copy.tree_idx = i; - cull_ref_all(settings.planes, *m_cached_trees[i].vis, m_cached_trees[i].vis_temp.data()); - render_tree(settings_copy, render_state, prof, true); + render_tree(settings_copy, render_state, prof); if (m_cached_trees[i].cull_debug) { render_tree_cull_debug(settings_copy, render_state, prof); } @@ -437,16 +280,16 @@ void Tfrag3::render_matching_trees(const std::vector& } } -void Tfrag3::debug_render_all_trees_nolores(const RenderSettings& settings, +void Tfrag3::debug_render_all_trees_nolores(const TfragRenderSettings& settings, SharedRenderState* render_state, ScopedProfilerNode& prof) { - RenderSettings settings_copy = settings; + TfragRenderSettings settings_copy = settings; for (size_t i = 0; i < m_cached_trees.size(); i++) { if (m_cached_trees[i].kind != tfrag3::TFragmentTreeKind::INVALID && m_cached_trees[i].kind != tfrag3::TFragmentTreeKind::LOWRES_TRANS && m_cached_trees[i].kind != tfrag3::TFragmentTreeKind::LOWRES) { settings_copy.tree_idx = i; - render_tree(settings_copy, render_state, prof, false); + render_tree(settings_copy, render_state, prof); } } @@ -477,16 +320,7 @@ void Tfrag3::draw_debug_window() { ImGui::PopID(); if (tree.rendered_this_frame) { ImGui::Text(" tris: %d draws: %d", tree.tris_this_frame, tree.draws_this_frame); - int vis = 0; - for (auto x : tree.vis_temp) { - if (x) { - vis++; - } - } - ImGui::Text(" cull: %d vis out of %d", vis, (int)tree.vis_temp.size()); } - ImGui::Text("root: %d, roots: %d, nodes %d", tree.vis_tree_root, tree.num_vis_tree_roots, - (int)tree.vis->size()); } } @@ -519,25 +353,6 @@ void Tfrag3::discard_tree_cache() { m_cached_trees.clear(); } -void Tfrag3::interp_time_of_day_slow(const float weights[8], - const std::vector& in, - math::Vector* out) { - // Timer interp_timer; - for (size_t color = 0; color < in.size(); color++) { - math::Vector4f result = math::Vector4f::zero(); - for (int component = 0; component < 8; component++) { - result += in[color].rgba[component].cast() * weights[component]; - } - result[0] = std::min(result[0], 255.f); - result[1] = std::min(result[1], 255.f); - result[2] = std::min(result[2], 255.f); - result[3] = std::min(result[3], 128.f); // note: different for alpha! - out[color] = result.cast(); - } - // about 70 us, not bad. - // fmt::print("interp {} colors {:.2f} ms\n", in.size(), interp_timer.getMs()); -} - namespace { float frac(float in) { @@ -617,15 +432,15 @@ void debug_vis_draw(int first_root, } // namespace -void Tfrag3::render_tree_cull_debug(const RenderSettings& settings, +void Tfrag3::render_tree_cull_debug(const TfragRenderSettings& settings, SharedRenderState* render_state, ScopedProfilerNode& prof) { // generate debug verts: m_debug_vert_data.clear(); auto& tree = m_cached_trees.at(settings.tree_idx); - debug_vis_draw(tree.vis_tree_root, tree.vis_tree_root, tree.num_vis_tree_roots, 1, *tree.vis, - m_debug_vert_data); + debug_vis_draw(tree.vis->first_root, tree.vis->first_root, tree.vis->num_roots, 1, + tree.vis->vis_nodes, m_debug_vert_data); render_state->shaders[ShaderId::TFRAG3_NO_TEX].activate(); glUniformMatrix4fv( diff --git a/game/graphics/opengl_renderer/tfrag/Tfrag3.h b/game/graphics/opengl_renderer/tfrag/Tfrag3.h index 8190f209e7..9cdb948851 100644 --- a/game/graphics/opengl_renderer/tfrag/Tfrag3.h +++ b/game/graphics/opengl_renderer/tfrag/Tfrag3.h @@ -4,48 +4,37 @@ #include "common/math/Vector.h" #include "game/graphics/opengl_renderer/BucketRenderer.h" #include "game/graphics/pipelines/opengl.h" +#include "game/graphics/opengl_renderer/tfrag/tfrag_common.h" +#include "game/graphics/opengl_renderer/tfrag/Tie3.h" class Tfrag3 { public: - struct RenderSettings { - math::Matrix4f math_camera; - math::Vector4f hvdf_offset; - float fog_x; - const u8* rgba_data; - int tree_idx; - float time_of_day_weights[8] = {0}; - math::Vector4f planes[4]; - bool do_culling = false; - bool debug_culling = false; - // todo culling planes - // todo occlusion culling string. - }; - Tfrag3(); ~Tfrag3(); - void debug_render_all_trees_nolores(const RenderSettings& settings, + void debug_render_all_trees_nolores(const TfragRenderSettings& settings, SharedRenderState* render_state, ScopedProfilerNode& prof); - void render_all_trees(const RenderSettings& settings, + void render_all_trees(const TfragRenderSettings& settings, SharedRenderState* render_state, ScopedProfilerNode& prof); void render_matching_trees(const std::vector& trees, - const RenderSettings& settings, + const TfragRenderSettings& settings, SharedRenderState* render_state, ScopedProfilerNode& prof); - void render_tree(const RenderSettings& settings, + void render_tree(const TfragRenderSettings& settings, SharedRenderState* render_state, - ScopedProfilerNode& prof, - bool use_vis); + ScopedProfilerNode& prof); - void setup_for_level(const std::string& level, SharedRenderState* render_state); + void setup_for_level(const std::vector& tree_kinds, + const std::string& level, + SharedRenderState* render_state); void discard_tree_cache(); - void render_tree_cull_debug(const RenderSettings& settings, + void render_tree_cull_debug(const TfragRenderSettings& settings, SharedRenderState* render_state, ScopedProfilerNode& prof); @@ -56,34 +45,15 @@ class Tfrag3 { }; private: - void first_draw_setup(const RenderSettings& settings, SharedRenderState* render_state); - enum class DoubleDrawKind { NONE, AFAIL_NO_DEPTH_WRITE }; - struct DoubleDraw { - DoubleDrawKind kind = DoubleDrawKind::NONE; - float aref = 0.; - }; - - DoubleDraw setup_shader(const RenderSettings& settings, - SharedRenderState* render_state, - DrawMode mode); - void interp_time_of_day_slow(const float weights[8], - const std::vector& in, - math::Vector* out); - struct TreeCache { tfrag3::TFragmentTreeKind kind; GLuint vertex_buffer = -1; GLuint vao; u32 vert_count = 0; - const std::vector* draws = nullptr; + const std::vector* draws = nullptr; const std::vector* colors = nullptr; - const std::vector* vis = nullptr; - - std::vector vis_temp; - std::vector culled_indices; - int num_vis_tree_roots = 0; - int vis_tree_root = 0; - int first_vis_leaf = 0; + const tfrag3::BVH* vis = nullptr; + SwizzledTimeOfDay tod_cache; void reset_stats() { rendered_this_frame = false; @@ -98,6 +68,12 @@ class Tfrag3 { bool cull_debug = false; }; + struct Cache { + std::vector vis_temp; + std::vector> draw_idx_temp; + std::vector index_list; + } m_cache; + std::string m_level_name; std::vector m_textures; @@ -115,8 +91,10 @@ class Tfrag3 { // in theory could be up to 4096, I think, but we don't see that many... // should be easy to increase (will require a shader change too for indexing) - static constexpr int TIME_OF_DAY_COLOR_COUNT = 2048; + static constexpr int TIME_OF_DAY_COLOR_COUNT = 8192; static constexpr int DEBUG_TRI_COUNT = 4096; std::vector m_debug_vert_data; + + bool m_use_fast_time_of_day = true; }; diff --git a/game/graphics/opengl_renderer/tfrag/Tie3.cpp b/game/graphics/opengl_renderer/tfrag/Tie3.cpp new file mode 100644 index 0000000000..1b41aee62f --- /dev/null +++ b/game/graphics/opengl_renderer/tfrag/Tie3.cpp @@ -0,0 +1,419 @@ +#include "Tie3.h" + +#include "third-party/imgui/imgui.h" + +Tie3::Tie3(const std::string& name, BucketId my_id) : BucketRenderer(name, my_id) {} + +Tie3::~Tie3() { + discard_tree_cache(); +} + +/*! + * Set up all OpenGL and temporary buffers for a given level name. + * The level name should be the 3 character short name. + */ +void Tie3::setup_for_level(const std::string& level, SharedRenderState* render_state) { + // make sure we have the level data. + // TODO: right now this will wait to load from disk and unpack it. + auto lev_data = render_state->loader.get_tfrag3_level(level); + + if (m_level_name != level) { + Timer tie_setup_timer; + // We changed level! + fmt::print("TIE3 level change! {} -> {}\n", m_level_name, level); + fmt::print(" Removing old level...\n"); + discard_tree_cache(); + fmt::print(" New level has {} tie trees\n", lev_data->tie_trees.size()); + m_trees.resize(lev_data->tie_trees.size()); + + size_t idx_buffer_len = 0; + size_t time_of_day_count = 0; + size_t vis_temp_len = 0; + size_t max_draw = 0; + size_t max_idx_per_draw = 0; + + // set up each tree + for (size_t tree_idx = 0; tree_idx < lev_data->tie_trees.size(); tree_idx++) { + const auto& tree = lev_data->tie_trees[tree_idx]; + max_draw = std::max(tree.static_draws.size(), max_draw); + for (auto& draw : tree.static_draws) { + idx_buffer_len += draw.vertex_index_stream.size(); + max_idx_per_draw = std::max(max_idx_per_draw, draw.vertex_index_stream.size()); + } + time_of_day_count = std::max(tree.colors.size(), time_of_day_count); + u32 verts = tree.vertices.size(); + fmt::print(" tree {} has {} verts ({} kB) and {} draws\n", tree_idx, verts, + verts * sizeof(tfrag3::PreloadedVertex) / 1024.f, tree.static_draws.size()); + glGenVertexArrays(1, &m_trees[tree_idx].vao); + glBindVertexArray(m_trees[tree_idx].vao); + glGenBuffers(1, &m_trees[tree_idx].vertex_buffer); + m_trees[tree_idx].vert_count = verts; + m_trees[tree_idx].draws = &tree.static_draws; // todo - should we just copy this? + m_trees[tree_idx].colors = &tree.colors; + m_trees[tree_idx].vis = &tree.bvh; + vis_temp_len = std::max(vis_temp_len, tree.bvh.vis_nodes.size()); + m_trees[tree_idx].tod_cache = swizzle_time_of_day(tree.colors); + glBindBuffer(GL_ARRAY_BUFFER, m_trees[tree_idx].vertex_buffer); + glBufferData(GL_ARRAY_BUFFER, verts * sizeof(tfrag3::PreloadedVertex), nullptr, + GL_STATIC_DRAW); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + + glBufferSubData(GL_ARRAY_BUFFER, 0, verts * sizeof(tfrag3::PreloadedVertex), + tree.vertices.data()); + + glVertexAttribPointer(0, // location 0 in the shader + 3, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::PreloadedVertex), // stride + (void*)offsetof(tfrag3::PreloadedVertex, x) // offset (0) + ); + + glVertexAttribPointer(1, // location 1 in the shader + 3, // 3 values per vert + GL_FLOAT, // floats + GL_FALSE, // normalized + sizeof(tfrag3::PreloadedVertex), // stride + (void*)offsetof(tfrag3::PreloadedVertex, s) // offset (0) + ); + + glVertexAttribPointer(2, // location 2 in the shader + 1, // 1 values per vert + GL_UNSIGNED_SHORT, // u16 + GL_FALSE, // don't normalize + sizeof(tfrag3::PreloadedVertex), // stride + (void*)offsetof(tfrag3::PreloadedVertex, color_index) // offset (0) + ); + glBindVertexArray(0); + } + + fmt::print("TIE temporary vis output size: {}\n", vis_temp_len); + m_cache.vis_temp.resize(vis_temp_len); + fmt::print("TIE max draws/tree: {}\n", max_draw); + m_cache.draw_idx_temp.resize(max_draw); + fmt::print("TIE draw with the most verts: {}\n", max_idx_per_draw); + + // todo share textures + fmt::print("level has {} textures\n", lev_data->textures.size()); + for (auto& tex : lev_data->textures) { + GLuint gl_tex; + glGenTextures(1, &gl_tex); + glBindTexture(GL_TEXTURE_2D, gl_tex); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, tex.w, tex.h, 0, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, + tex.data.data()); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, gl_tex); + glGenerateMipmap(GL_TEXTURE_2D); + + float aniso = 0.0f; + glGetFloatv(GL_MAX_TEXTURE_MAX_ANISOTROPY, &aniso); + glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAX_ANISOTROPY, aniso); + m_textures.push_back(gl_tex); + } + + fmt::print("level TIE index stream: {}\n", idx_buffer_len); + m_cache.index_list.resize(idx_buffer_len); + m_has_index_buffer = true; + glGenBuffers(1, &m_index_buffer); + glActiveTexture(GL_TEXTURE1); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, idx_buffer_len * sizeof(u32), nullptr, GL_STREAM_DRAW); + + fmt::print("level max time of day: {}\n", time_of_day_count); + assert(time_of_day_count <= TIME_OF_DAY_COLOR_COUNT); + // regardless of how many we use some fixed max + // we won't actually interp or upload to gpu the unused ones, but we need a fixed maximum so + // indexing works properly. + m_color_result.resize(TIME_OF_DAY_COLOR_COUNT); + glGenTextures(1, &m_time_of_day_texture); + m_has_time_of_day_texture = true; + glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture); + // just fill with zeros. this lets use use the faster texsubimage later + glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, TIME_OF_DAY_COLOR_COUNT, 0, GL_RGBA, + GL_UNSIGNED_INT_8_8_8_8, m_color_result.data()); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + m_level_name = level; + fmt::print("TIE setup: {:.3f}\n", tie_setup_timer.getSeconds()); + } +} + +void Tie3::discard_tree_cache() { + for (auto tex : m_textures) { + glBindTexture(GL_TEXTURE_2D, tex); + glDeleteTextures(1, &tex); + } + m_textures.clear(); + + for (auto& tree : m_trees) { + glDeleteBuffers(1, &tree.vertex_buffer); + glDeleteVertexArrays(1, &tree.vao); + } + + if (m_has_index_buffer) { + glDeleteBuffers(1, &m_index_buffer); + m_has_index_buffer = false; + } + + if (m_has_time_of_day_texture) { + glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture); + glDeleteTextures(1, &m_time_of_day_texture); + m_has_time_of_day_texture = false; + } + + m_trees.clear(); +} + +void Tie3::render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) { + if (!m_enabled) { + while (dma.current_tag_offset() != render_state->next_bucket) { + dma.read_and_advance(); + } + return; + } + + if (m_override_level && m_pending_user_level) { + setup_for_level(*m_pending_user_level, render_state); + m_pending_user_level = {}; + } + + auto data0 = dma.read_and_advance(); + assert(data0.vif1() == 0); + assert(data0.vif0() == 0); + assert(data0.size_bytes == 0); + + if (dma.current_tag().kind == DmaTag::Kind::CALL) { + // renderer didn't run, let's just get out of here. + for (int i = 0; i < 4; i++) { + dma.read_and_advance(); + } + assert(dma.current_tag_offset() == render_state->next_bucket); + return; + } + + auto gs_test = dma.read_and_advance(); + assert(gs_test.size_bytes == 32); + + auto tie_consts = dma.read_and_advance(); + assert(tie_consts.size_bytes == 9 * 16); + + auto mscalf = dma.read_and_advance(); + assert(mscalf.size_bytes == 0); + + auto row = dma.read_and_advance(); + assert(row.size_bytes == 32); + + auto next = dma.read_and_advance(); + assert(next.size_bytes == 0); + + auto pc_port_data = dma.read_and_advance(); + assert(pc_port_data.size_bytes == sizeof(TfragPcPortData)); + memcpy(&m_pc_port_data, pc_port_data.data, sizeof(TfragPcPortData)); + m_pc_port_data.level_name[11] = '\0'; + + while (dma.current_tag_offset() != render_state->next_bucket) { + dma.read_and_advance(); + } + + TfragRenderSettings settings; + settings.hvdf_offset = m_pc_port_data.hvdf_off; + settings.fog_x = m_pc_port_data.fogx; + + memcpy(settings.math_camera.data(), m_pc_port_data.camera[0].data(), 64); + settings.tree_idx = 0; + + for (int i = 0; i < 4; i++) { + settings.planes[i] = m_pc_port_data.planes[i]; + } + + if (false) { + // for (int i = 0; i < 8; i++) { + // settings.time_of_day_weights[i] = m_time_of_days[i]; + // } + } else { + for (int i = 0; i < 8; i++) { + settings.time_of_day_weights[i] = + 2 * (0xff & m_pc_port_data.itimes[i / 2].data()[2 * (i % 2)]) / 127.f; + } + } + if (!m_override_level) { + setup_for_level(m_pc_port_data.level_name, render_state); + } + render_all_trees(settings, render_state, prof); + // todo render all... +} + +void Tie3::render_all_trees(const TfragRenderSettings& settings, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + Timer all_tree_timer; + if (m_override_level && m_pending_user_level) { + setup_for_level(*m_pending_user_level, render_state); + m_pending_user_level = {}; + } + for (u32 i = 0; i < m_trees.size(); i++) { + render_tree(i, settings, render_state, prof); + } + m_all_tree_time.add(all_tree_timer.getSeconds()); +} + +void Tie3::render_tree(int idx, + const TfragRenderSettings& settings, + SharedRenderState* render_state, + ScopedProfilerNode& prof) { + Timer tree_timer; + auto& tree = m_trees.at(idx); + tree.perf.draws = 0; + tree.perf.verts = 0; + tree.perf.full_draws = 0; + + if (m_color_result.size() < tree.colors->size()) { + m_color_result.resize(tree.colors->size()); + } + + Timer interp_timer; + if (m_use_fast_time_of_day) { + interp_time_of_day_fast(settings.time_of_day_weights, tree.tod_cache, m_color_result.data()); + } else { + interp_time_of_day_slow(settings.time_of_day_weights, *tree.colors, m_color_result.data()); + } + tree.perf.tod_time.add(interp_timer.getSeconds()); + + Timer setup_timer; + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_1D, m_time_of_day_texture); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, tree.colors->size(), GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, + m_color_result.data()); + + first_tfrag_draw_setup(settings, render_state); + + glBindVertexArray(tree.vao); + glBindBuffer(GL_ARRAY_BUFFER, tree.vertex_buffer); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_index_buffer); + glActiveTexture(GL_TEXTURE0); + glEnable(GL_PRIMITIVE_RESTART); + glPrimitiveRestartIndex(UINT32_MAX); + tree.perf.tod_time.add(setup_timer.getSeconds()); + + int last_texture = -1; + + Timer cull_timer; + cull_check_all_slow(settings.planes, tree.vis->vis_nodes, m_cache.vis_temp.data()); + tree.perf.cull_time.add(cull_timer.getSeconds()); + + Timer index_timer; + int idx_buffer_ptr = make_index_list_from_vis_string( + m_cache.draw_idx_temp.data(), m_cache.index_list.data(), *tree.draws, m_cache.vis_temp); + tree.perf.index_time.add(index_timer.getSeconds()); + tree.perf.index_upload = sizeof(u32) * idx_buffer_ptr; + + Timer draw_timer; + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, idx_buffer_ptr * sizeof(u32), + m_cache.index_list.data()); + + for (size_t draw_idx = 0; draw_idx < tree.draws->size(); draw_idx++) { + const auto& draw = tree.draws->operator[](draw_idx); + const auto& indices = m_cache.draw_idx_temp[draw_idx]; + + if (indices.second <= indices.first) { + continue; + } + + if ((int)draw.tree_tex_id != last_texture) { + glBindTexture(GL_TEXTURE_2D, m_textures.at(draw.tree_tex_id)); + last_texture = draw.tree_tex_id; + } + + auto double_draw = setup_tfrag_shader(settings, render_state, draw.mode); + int draw_size = indices.second - indices.first; + void* offset = (void*)(indices.first * sizeof(u32)); + + prof.add_draw_call(); + prof.add_tri(draw.num_triangles * (float)draw_size / draw.vertex_index_stream.size()); + + bool is_full = draw_size == (int)draw.vertex_index_stream.size(); + + tree.perf.draws++; + if (is_full) { + tree.perf.full_draws++; + } + tree.perf.verts += draw_size; + + glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset); + + switch (double_draw.kind) { + case DoubleDrawKind::NONE: + break; + case DoubleDrawKind::AFAIL_NO_DEPTH_WRITE: + tree.perf.draws++; + tree.perf.verts += draw_size; + if (is_full) { + tree.perf.full_draws++; + } + prof.add_draw_call(); + prof.add_tri(draw_size); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_min"), + -10.f); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_max"), + double_draw.aref); + glDepthMask(GL_FALSE); + glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset); + break; + default: + assert(false); + } + + if (m_debug_wireframe) { + render_state->shaders[ShaderId::TFRAG3_NO_TEX].activate(); + glUniformMatrix4fv( + glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3_NO_TEX].id(), "camera"), 1, + GL_FALSE, settings.math_camera.data()); + glUniform4f( + glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3_NO_TEX].id(), "hvdf_offset"), + settings.hvdf_offset[0], settings.hvdf_offset[1], settings.hvdf_offset[2], + settings.hvdf_offset[3]); + glUniform1f( + glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3_NO_TEX].id(), "fog_constant"), + settings.fog_x); + glDisable(GL_BLEND); + glPolygonMode(GL_FRONT_AND_BACK, GL_LINE); + glDrawElements(GL_TRIANGLE_STRIP, draw_size, GL_UNSIGNED_INT, (void*)offset); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + prof.add_draw_call(); + prof.add_tri(draw_size); + render_state->shaders[ShaderId::TFRAG3].activate(); + } + } + glBindVertexArray(0); + tree.perf.draw_time.add(draw_timer.getSeconds()); + tree.perf.tree_time.add(tree_timer.getSeconds()); +} + +void Tie3::draw_debug_window() { + ImGui::InputText("Custom Level", m_user_level, sizeof(m_user_level)); + if (ImGui::Button("Go!")) { + m_pending_user_level = m_user_level; + } + ImGui::Checkbox("Override level", &m_override_level); + ImGui::Checkbox("Fast ToD", &m_use_fast_time_of_day); + ImGui::Checkbox("Wireframe", &m_debug_wireframe); + ImGui::Separator(); + for (u32 i = 0; i < m_trees.size(); i++) { + auto& perf = m_trees[i].perf; + ImGui::Text("Tree: %d", i); + ImGui::Text("index data bytes: %d", perf.index_upload); + ImGui::Text("time of days: %d", (int)m_trees[i].colors->size()); + ImGui::Text("draw: %d, full: %d, verts: %d", perf.draws, perf.full_draws, perf.verts); + ImGui::Text("total: %.2f", perf.tree_time.get()); + ImGui::Text("cull: %.2f index: %.2f tod: %.2f setup: %.2f draw: %.2f", + perf.cull_time.get() * 1000.f, perf.index_time.get() * 1000.f, + perf.tod_time.get() * 1000.f, perf.setup_time.get() * 1000.f, + perf.draw_time.get() * 1000.f); + ImGui::Separator(); + } + ImGui::Text("All trees: %.2f", 1000.f * m_all_tree_time.get()); +} \ No newline at end of file diff --git a/game/graphics/opengl_renderer/tfrag/Tie3.h b/game/graphics/opengl_renderer/tfrag/Tie3.h new file mode 100644 index 0000000000..3da4d397f3 --- /dev/null +++ b/game/graphics/opengl_renderer/tfrag/Tie3.h @@ -0,0 +1,79 @@ +#pragma once + +#include + +#include "game/graphics/opengl_renderer/tfrag/tfrag_common.h" +#include "game/graphics/opengl_renderer/BucketRenderer.h" +#include "game/graphics/pipelines/opengl.h" +#include "common/util/FilteredValue.h" + +class Tie3 : public BucketRenderer { + public: + Tie3(const std::string& name, BucketId my_id); + void render(DmaFollower& dma, SharedRenderState* render_state, ScopedProfilerNode& prof) override; + void draw_debug_window() override; + ~Tie3(); + + void render_all_trees(const TfragRenderSettings& settings, + SharedRenderState* render_state, + ScopedProfilerNode& prof); + void render_tree(int idx, + const TfragRenderSettings& settings, + SharedRenderState* render_state, + ScopedProfilerNode& prof); + void setup_for_level(const std::string& str, SharedRenderState* render_state); + + private: + void discard_tree_cache(); + struct Tree { + GLuint vertex_buffer; + GLuint vao; + u32 vert_count; + const std::vector* draws = nullptr; + const std::vector* colors = nullptr; + const tfrag3::BVH* vis = nullptr; + SwizzledTimeOfDay tod_cache; + + struct { + u32 index_upload = 0; + u32 verts = 0; + u32 draws = 0; + u32 full_draws = 0; // ones that have all visible + Filtered cull_time; + Filtered index_time; + Filtered tod_time; + Filtered setup_time; + Filtered draw_time; + Filtered tree_time; + } perf; + }; + + std::vector m_trees; + std::string m_level_name; + std::vector m_textures; // todo, can we share with tfrag in some cases? + + struct Cache { + std::vector vis_temp; + std::vector> draw_idx_temp; + std::vector index_list; + } m_cache; + + GLuint m_time_of_day_texture = -1; + bool m_has_time_of_day_texture = false; + + std::vector> m_color_result; + + bool m_has_index_buffer = false; + GLuint m_index_buffer = -1; + + static constexpr int TIME_OF_DAY_COLOR_COUNT = 8192; + + char m_user_level[255] = "vi1"; + std::optional m_pending_user_level = std::nullopt; + bool m_override_level = false; + bool m_use_fast_time_of_day = true; + bool m_debug_wireframe = false; + Filtered m_all_tree_time; + + TfragPcPortData m_pc_port_data; +}; diff --git a/game/graphics/opengl_renderer/tfrag/tfrag_common.cpp b/game/graphics/opengl_renderer/tfrag/tfrag_common.cpp new file mode 100644 index 0000000000..02f610068a --- /dev/null +++ b/game/graphics/opengl_renderer/tfrag/tfrag_common.cpp @@ -0,0 +1,349 @@ + + +#include "tfrag_common.h" +#include "game/graphics/opengl_renderer/BucketRenderer.h" +#include "game/graphics/pipelines/opengl.h" + +#include + +DoubleDraw setup_tfrag_shader(const TfragRenderSettings& /*settings*/, + SharedRenderState* render_state, + DrawMode mode) { + glActiveTexture(GL_TEXTURE0); + + if (mode.get_zt_enable()) { + glEnable(GL_DEPTH_TEST); + switch (mode.get_depth_test()) { + case GsTest::ZTest::NEVER: + glDepthFunc(GL_NEVER); + break; + case GsTest::ZTest::ALWAYS: + glDepthFunc(GL_ALWAYS); + break; + case GsTest::ZTest::GEQUAL: + glDepthFunc(GL_GEQUAL); + break; + case GsTest::ZTest::GREATER: + glDepthFunc(GL_GREATER); + break; + default: + assert(false); + } + } else { + glDisable(GL_DEPTH_TEST); + } + + if (mode.get_ab_enable() && mode.get_alpha_blend() != DrawMode::AlphaBlend::DISABLED) { + glEnable(GL_BLEND); + switch (mode.get_alpha_blend()) { + case DrawMode::AlphaBlend::SRC_DST_SRC_DST: + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + break; + case DrawMode::AlphaBlend::SRC_0_SRC_DST: + glBlendFunc(GL_SRC_ALPHA, GL_ONE); + break; + case DrawMode::AlphaBlend::SRC_0_FIX_DST: + glBlendFunc(GL_ONE, GL_ONE); + break; + case DrawMode::AlphaBlend::SRC_DST_FIX_DST: + // Cv = (Cs - Cd) * FIX + Cd + // Cs * FIX * 0.5 + // Cd * FIX * 0.5 + glBlendFunc(GL_CONSTANT_COLOR, GL_CONSTANT_COLOR); + glBlendColor(0.5, 0.5, 0.5, 0.5); + break; + default: + assert(false); + } + } else { + glDisable(GL_BLEND); + } + + if (mode.get_clamp_s_enable()) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + } else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + } + + if (mode.get_clamp_t_enable()) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + } + + if (mode.get_filt_enable()) { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } else { + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + } + + // for some reason, they set atest NEVER + FB_ONLY to disable depth writes + bool alpha_hack_to_disable_z_write = false; + DoubleDraw double_draw; + + float alpha_min = 0.; + if (mode.get_at_enable()) { + switch (mode.get_alpha_test()) { + case DrawMode::AlphaTest::ALWAYS: + break; + case DrawMode::AlphaTest::GEQUAL: + alpha_min = mode.get_aref() / 127.f; + switch (mode.get_alpha_fail()) { + case GsTest::AlphaFail::KEEP: + // ok, no need for double draw + break; + case GsTest::AlphaFail::FB_ONLY: + // darn, we need to draw twice + double_draw.kind = DoubleDrawKind::AFAIL_NO_DEPTH_WRITE; + double_draw.aref = alpha_min; + break; + default: + assert(false); + } + break; + case DrawMode::AlphaTest::NEVER: + if (mode.get_alpha_fail() == GsTest::AlphaFail::FB_ONLY) { + alpha_hack_to_disable_z_write = true; + } else { + assert(false); + } + break; + default: + assert(false); + } + } + + if (mode.get_depth_write_enable()) { + glDepthMask(GL_TRUE); + } else { + glDepthMask(GL_FALSE); + } + + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_min"), + alpha_min); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "alpha_max"), + 10.f); + + return double_draw; +} + +void first_tfrag_draw_setup(const TfragRenderSettings& settings, SharedRenderState* render_state) { + render_state->shaders[ShaderId::TFRAG3].activate(); + glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "tex_T0"), 0); + glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "tex_T1"), 1); + glUniformMatrix4fv(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "camera"), + 1, GL_FALSE, settings.math_camera.data()); + glUniform4f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "hvdf_offset"), + settings.hvdf_offset[0], settings.hvdf_offset[1], settings.hvdf_offset[2], + settings.hvdf_offset[3]); + glUniform1f(glGetUniformLocation(render_state->shaders[ShaderId::TFRAG3].id(), "fog_constant"), + settings.fog_x); +} + +void interp_time_of_day_slow(const float weights[8], + const std::vector& in, + math::Vector* out) { + // Timer interp_timer; + for (size_t color = 0; color < in.size(); color++) { + math::Vector4f result = math::Vector4f::zero(); + for (int component = 0; component < 8; component++) { + result += in[color].rgba[component].cast() * weights[component]; + } + result[0] = std::min(result[0], 255.f); + result[1] = std::min(result[1], 255.f); + result[2] = std::min(result[2], 255.f); + result[3] = std::min(result[3], 128.f); // note: different for alpha! + out[color] = result.cast(); + } +} + +// we want to absolutely minimize the number of time we have to "cross lanes" in AVX (meaning X +// component of one vector interacts with Y component of another). We can make this a lot better by +// taking groups of 4 time of day colors (each containing 8x RGBAs) and rearranging them with this +// pattern. We want to compute: +// [rgba][0][0] * weights[0] + [rgba][0][1] * weights[1] + [rgba][0][2]... + rgba[0][7] * weights[7] +// RGBA is already a vector of 4 components, but with AVX we have vectors with 32 bytes which fit +// 16 colors in them. + +// This makes each vector have: +// colors0 = [rgba][0][0], [rgba][1][0], [rgba][2][0], [rgba][3][0] +// colors1 = [rgba][0][1], [rgba][1][1], [rgba][2][1], [rgba][3][1] +// ... +// so we can basically add up the columns (multiplying by weights in between) +// and we'll end up with [final0, final1, final2, final3, final4] + +// the swizzle function below rearranges to get this pattern. +// it's not the most efficient way to do it, but it just runs during loading and not on every frame. + +SwizzledTimeOfDay swizzle_time_of_day(const std::vector& in) { + SwizzledTimeOfDay out; + out.data.resize(in.size() * 8 * 4); + + // we're rearranging per 4 colors (groups of 32 * 4 = 128) + // color (lots of these) + // component (8 of these) + // channel (4 of these, rgba) + + for (u32 color_quad = 0; color_quad < in.size() / 4; color_quad++) { + u8* quad_out = out.data.data() + color_quad * 128; + for (u32 component = 0; component < 8; component++) { + for (u32 color = 0; color < 4; color++) { + for (u32 channel = 0; channel < 4; channel++) { + *quad_out = in.at(color_quad * 4 + color).rgba[component][channel]; + quad_out++; + } + } + } + } + out.color_count = in.size(); + return out; +} + +// This does the same thing as interp_time_of_day_slow, but is faster. +// Due to using integers instead of floats, it may be a tiny bit different. +// TODO: it might be possible to reorder the loop into two blocks of loads and avoid spilling xmms. +// It's ~8x faster than the slow version. +void interp_time_of_day_fast(const float weights[8], + const SwizzledTimeOfDay& in, + math::Vector* out) { + // even though the colors are 8 bits, we'll use 16 bits so we can saturate correctly + + // weight multipliers + __m256i weights0 = _mm256_set1_epi16(weights[0] * 128.f); + __m256i weights1 = _mm256_set1_epi16(weights[1] * 128.f); + __m256i weights2 = _mm256_set1_epi16(weights[2] * 128.f); + __m256i weights3 = _mm256_set1_epi16(weights[3] * 128.f); + __m256i weights4 = _mm256_set1_epi16(weights[4] * 128.f); + __m256i weights5 = _mm256_set1_epi16(weights[5] * 128.f); + __m256i weights6 = _mm256_set1_epi16(weights[6] * 128.f); + __m256i weights7 = _mm256_set1_epi16(weights[7] * 128.f); + + // saturation: note that alpha is saturated to 128 but the rest are 255. + // TODO: maybe we should saturate to 255 for everybody (can do this using a single packus) and + // change the shader to deal with this. + __m256i sat = _mm256_set_epi16(128, 255, 255, 255, 128, 255, 255, 255, 128, 255, 255, 255, 128, + 255, 255, 255); + + for (u32 color_quad = 0; color_quad < in.color_count / 4; color_quad++) { + // first, load colors. We put 16 bytes / register and don't touch the upper half because we will + // convert u8s to u16s. + const u8* base = in.data.data() + color_quad * 128; + __m128i color0_p = _mm_loadu_si128((const __m128i*)(base + 0)); + __m128i color1_p = _mm_loadu_si128((const __m128i*)(base + 16)); + __m128i color2_p = _mm_loadu_si128((const __m128i*)(base + 32)); + __m128i color3_p = _mm_loadu_si128((const __m128i*)(base + 48)); + __m128i color4_p = _mm_loadu_si128((const __m128i*)(base + 64)); + __m128i color5_p = _mm_loadu_si128((const __m128i*)(base + 80)); + __m128i color6_p = _mm_loadu_si128((const __m128i*)(base + 96)); + __m128i color7_p = _mm_loadu_si128((const __m128i*)(base + 112)); + + // unpack to 16-bits. each has 16x 16 bit colors. + __m256i color0 = _mm256_cvtepu8_epi16(color0_p); + __m256i color1 = _mm256_cvtepu8_epi16(color1_p); + __m256i color2 = _mm256_cvtepu8_epi16(color2_p); + __m256i color3 = _mm256_cvtepu8_epi16(color3_p); + __m256i color4 = _mm256_cvtepu8_epi16(color4_p); + __m256i color5 = _mm256_cvtepu8_epi16(color5_p); + __m256i color6 = _mm256_cvtepu8_epi16(color6_p); + __m256i color7 = _mm256_cvtepu8_epi16(color7_p); + + // multiply by weights + color0 = _mm256_mullo_epi16(color0, weights0); + color1 = _mm256_mullo_epi16(color1, weights1); + color2 = _mm256_mullo_epi16(color2, weights2); + color3 = _mm256_mullo_epi16(color3, weights3); + color4 = _mm256_mullo_epi16(color4, weights4); + color5 = _mm256_mullo_epi16(color5, weights5); + color6 = _mm256_mullo_epi16(color6, weights6); + color7 = _mm256_mullo_epi16(color7, weights7); + + // add. This order minimizes dependencies. + color0 = _mm256_add_epi16(color0, color1); + color2 = _mm256_add_epi16(color2, color3); + color4 = _mm256_add_epi16(color4, color5); + color6 = _mm256_add_epi16(color6, color7); + + color0 = _mm256_add_epi16(color0, color2); + color4 = _mm256_add_epi16(color4, color6); + + color0 = _mm256_add_epi16(color0, color4); + + // divide, because we multiplied our weights by 2^7. + color0 = _mm256_srli_epi16(color0, 7); + + // saturate + color0 = _mm256_min_epu16(sat, color0); + + // back to u8s. + auto hi = _mm256_extracti128_si256(color0, 1); + auto result = _mm_packus_epi16(_mm256_castsi256_si128(color0), hi); + + // store result + _mm_storeu_si128((__m128i*)(&out[color_quad * 4]), result); + } +} + +bool sphere_in_view_ref(const math::Vector4f& sphere, const math::Vector4f* planes) { + math::Vector4f acc = + planes[0] * sphere.x() + planes[1] * sphere.y() + planes[2] * sphere.z() - planes[3]; + + return acc.x() > -sphere.w() && acc.y() > -sphere.w() && acc.z() > -sphere.w() && + acc.w() > -sphere.w(); +} + +// this isn't super efficient, but we spend so little time here it's not worth it to go faster. +void cull_check_all_slow(const math::Vector4f* planes, + const std::vector& nodes, + u8* out) { + for (size_t i = 0; i < nodes.size(); i++) { + out[i] = sphere_in_view_ref(nodes[i].bsphere, planes); + } +} + +u32 make_index_list_from_vis_string(std::pair* group_out, + u32* idx_out, + const std::vector& draws, + const std::vector& vis_data) { + int idx_buffer_ptr = 0; + for (size_t i = 0; i < draws.size(); i++) { + const auto& draw = draws[i]; + int vtx_idx = 0; + std::pair ds; + ds.first = idx_buffer_ptr; + bool building_run = false; + int run_start_out = 0; + int run_start_in = 0; + for (auto& grp : draw.vis_groups) { + bool vis = grp.vis_idx == 0xffffffff || vis_data[grp.vis_idx]; + if (building_run) { + if (vis) { + idx_buffer_ptr += grp.num; + } else { + building_run = false; + idx_buffer_ptr += grp.num; + memcpy(&idx_out[run_start_out], &draw.vertex_index_stream[run_start_in], + (idx_buffer_ptr - run_start_out) * sizeof(u32)); + } + } else { + if (vis) { + building_run = true; + run_start_out = idx_buffer_ptr; + run_start_in = vtx_idx; + idx_buffer_ptr += grp.num; + } else { + } + } + vtx_idx += grp.num; + } + if (building_run) { + memcpy(&idx_out[run_start_out], &draw.vertex_index_stream[run_start_in], + (idx_buffer_ptr - run_start_out) * sizeof(u32)); + } + + ds.second = idx_buffer_ptr; + group_out[i] = ds; + } + return idx_buffer_ptr; +} \ No newline at end of file diff --git a/game/graphics/opengl_renderer/tfrag/tfrag_common.h b/game/graphics/opengl_renderer/tfrag/tfrag_common.h new file mode 100644 index 0000000000..f2c54cc0a6 --- /dev/null +++ b/game/graphics/opengl_renderer/tfrag/tfrag_common.h @@ -0,0 +1,62 @@ +#pragma once + +#include "common/math/Vector.h" +#include "game/graphics/opengl_renderer/BucketRenderer.h" + +struct TfragRenderSettings { + math::Matrix4f math_camera; + math::Vector4f hvdf_offset; + float fog_x; + int tree_idx; + float time_of_day_weights[8] = {0}; + math::Vector4f planes[4]; + bool do_culling = false; + bool debug_culling = false; + // todo occlusion culling string. +}; + +enum class DoubleDrawKind { NONE, AFAIL_NO_DEPTH_WRITE }; + +struct DoubleDraw { + DoubleDrawKind kind = DoubleDrawKind::NONE; + float aref = 0.; +}; + +DoubleDraw setup_tfrag_shader(const TfragRenderSettings& /*settings*/, + SharedRenderState* render_state, + DrawMode mode); +void first_tfrag_draw_setup(const TfragRenderSettings& settings, SharedRenderState* render_state); +void interp_time_of_day_slow(const float weights[8], + const std::vector& in, + math::Vector* out); + +struct SwizzledTimeOfDay { + std::vector data; + u32 color_count = 0; +}; + +SwizzledTimeOfDay swizzle_time_of_day(const std::vector& in); + +void interp_time_of_day_fast(const float weights[8], + const SwizzledTimeOfDay& in, + math::Vector* out); + +void cull_check_all_slow(const math::Vector4f* planes, + const std::vector& nodes, + u8* out); + +struct TfragPcPortData { + math::Vector4f planes[4]; + math::Vector itimes[4]; + math::Vector4f camera[4]; + math::Vector4f hvdf_off; + float fogx; + float unused[3]; + char level_name[12]; + u32 tree_idx; +}; + +u32 make_index_list_from_vis_string(std::pair* group_out, + u32* idx_out, + const std::vector& draws, + const std::vector& vis_data); \ No newline at end of file diff --git a/game/runtime.cpp b/game/runtime.cpp index 0bca9f5062..534616170f 100644 --- a/game/runtime.cpp +++ b/game/runtime.cpp @@ -260,7 +260,7 @@ void dmac_runner(SystemThreadInterface& iface) { // } // } // avoid running the DMAC on full blast (this does not sync to its clockrate) - std::this_thread::sleep_for(std::chrono::microseconds(50)); + std::this_thread::sleep_for(std::chrono::microseconds(50000)); } VM::unsubscribe_component(); diff --git a/game/system/iop_thread.cpp b/game/system/iop_thread.cpp index cb8c22a6b0..a8f9efaed3 100644 --- a/game/system/iop_thread.cpp +++ b/game/system/iop_thread.cpp @@ -90,7 +90,7 @@ void IOP::kill_from_ee() { void IOP::signal_run_iop() { std::unique_lock lk(iters_mutex); - iop_iters_des += 100; // todo, tune this + iop_iters_des++; // todo, tune this if (iop_iters_des - iop_iters_act > 500) { iop_iters_des = iop_iters_act + 500; } diff --git a/goal_src/engine/camera/math-camera.gc b/goal_src/engine/camera/math-camera.gc index 71c74f5cde..b5c590f47f 100644 --- a/goal_src/engine/camera/math-camera.gc +++ b/goal_src/engine/camera/math-camera.gc @@ -34,6 +34,7 @@ ;; the x/y ratio are frustum slopes (set! (-> math-cam x-ratio) (tan (* 0.5 (-> math-cam fov)))) + ;;(format #t "aspect is ~A~%" aspect) (if (= aspect 'aspect4x3) (set! (-> math-cam y-ratio) (* 0.75 (-> math-cam x-ratio))) (set! (-> math-cam y-ratio) (* 0.5625 (-> math-cam x-ratio))) diff --git a/goal_src/engine/debug/memory-usage-h.gc b/goal_src/engine/debug/memory-usage-h.gc index 2f3ccdeaa7..35b7c2fc17 100644 --- a/goal_src/engine/debug/memory-usage-h.gc +++ b/goal_src/engine/debug/memory-usage-h.gc @@ -48,6 +48,13 @@ ;; Used internally for computing memory info (define *temp-mem-usage* (the-as memory-usage-block #f)) +;; TODO: flags. +;; bit 0 : count as a prototype definition. +;; bit 1 : count as an instance of a prototype. +;; bit 2 : count tie colors 1 (geom 1) +;; bit 3 : count tie colors 2 (geom 2) +;; bit 4 : ?? (geom 3) + ;; Memory usage stats are organized by the type of object. ;; This enum allows you to go from type to the index in the memory-usage-block's data array. diff --git a/goal_src/engine/dma/dma-h.gc b/goal_src/engine/dma/dma-h.gc index f4957e5de8..39239ebe82 100644 --- a/goal_src/engine/dma/dma-h.gc +++ b/goal_src/engine/dma/dma-h.gc @@ -220,6 +220,8 @@ (tfrag-tex0 5) (tfrag-0 6) (tfrag-near-0 7) + (tie-near-0 8) + (tie-0 9) ;; merc0 10 ;; generic0 11 (bucket-10 10) @@ -228,6 +230,8 @@ (tfrag-tex1 12) (tfrag-1 13) (tfrag-near-1 14) + (tie-near-1 15) + (tie-1 16) ;; merc1 17 ;; generic1 18 (bucket-17 17) diff --git a/goal_src/engine/gfx/background.gc b/goal_src/engine/gfx/background.gc index ef54c84e9f..f909e53af7 100644 --- a/goal_src/engine/gfx/background.gc +++ b/goal_src/engine/gfx/background.gc @@ -411,7 +411,7 @@ ;;;;;;;;;; TIE (TFRAG Instance Engine) - #| + ;; common setup (set! (-> *instance-tie-work* paused) (paused?)) (when (nonzero? (-> *background-work* tie-tree-count)) @@ -436,8 +436,8 @@ (set! gp-1 s4-2) ) ) - (set! (-> (the-as terrain-context #x70000000) bsp lev-index) (-> s4-2 index)) - (set! (-> (the-as terrain-context #x70000000) bsp mood) (-> s4-2 mood)) + (set! (-> (scratchpad-object terrain-context) bsp lev-index) (-> s4-2 index)) + (set! (-> (scratchpad-object terrain-context) bsp mood) (-> s4-2 mood)) (draw-drawable-tree-instance-tie (-> *background-work* tie-trees s5-3) s4-2) ) ;; todo, type here probably wrong. @@ -452,7 +452,7 @@ (new 'static 'rgba :r #x80 :g #x20 :b #x60 :a #x80) ) ) - + #| ;; TIE Generic (dotimes (gp-2 (-> *background-work* tie-tree-count)) (when (nonzero? (-> *background-work* tie-generic gp-2)) @@ -478,9 +478,9 @@ ) ) ) + |# ) - |# ) 0 (none) diff --git a/goal_src/engine/gfx/generic/generic-h.gc b/goal_src/engine/gfx/generic/generic-h.gc index 7c912d6f67..dae6f0510a 100644 --- a/goal_src/engine/gfx/generic/generic-h.gc +++ b/goal_src/engine/gfx/generic/generic-h.gc @@ -195,7 +195,7 @@ (quad uint128 :offset 0) (data uint64 :offset 0) (cmds uint64 :offset 8) - (cmd uint8 :offset 8) + (cmd gs-reg :offset 8) (x uint32 :offset 0) (y uint32 :offset 4) (z uint32 :offset 8) diff --git a/goal_src/engine/gfx/tfrag/tfrag.gc b/goal_src/engine/gfx/tfrag/tfrag.gc index 79a3fe0546..615d32689f 100644 --- a/goal_src/engine/gfx/tfrag/tfrag.gc +++ b/goal_src/engine/gfx/tfrag/tfrag.gc @@ -322,7 +322,7 @@ (defun add-pc-tfrag3-data ((dma-buf dma-buffer) (lev level)) "Add PC-port specific tfrag data" (let ((packet (the-as dma-packet (-> dma-buf base)))) - (set! (-> packet dma) (new 'static 'dma-tag :id (dma-tag-id cnt) :qwc 9)) + (set! (-> packet dma) (new 'static 'dma-tag :id (dma-tag-id cnt) :qwc 15)) (set! (-> packet vif0) (new 'static 'vif-tag)) (set! (-> packet vif1) (new 'static 'vif-tag :cmd (vif-cmd pc-port))) (set! (-> dma-buf base) (the pointer (&+ packet 16))) @@ -338,9 +338,17 @@ (set! (-> data-ptr 5) (-> lev mood itimes 1 quad)) (set! (-> data-ptr 6) (-> lev mood itimes 2 quad)) (set! (-> data-ptr 7) (-> lev mood itimes 3 quad)) - (charp<-string (the (pointer uint8) (&-> data-ptr 8)) (symbol->string (-> lev nickname))) + (set! (-> data-ptr 8) (-> *math-camera* camera-temp vector 0 quad)) + (set! (-> data-ptr 9) (-> *math-camera* camera-temp vector 1 quad)) + (set! (-> data-ptr 10) (-> *math-camera* camera-temp vector 2 quad)) + (set! (-> data-ptr 11) (-> *math-camera* camera-temp vector 3 quad)) + (set! (-> data-ptr 12) (-> *math-camera* hvdf-off quad)) + (let ((vec (-> (the (inline-array vector) data-ptr) 13))) + (set! (-> vec x) (-> *math-camera* pfog0)) + ) + (charp<-string (the (pointer uint8) (&-> data-ptr 14)) (symbol->string (-> lev nickname))) ) - (&+! (-> dma-buf base) (* 16 9)) + (&+! (-> dma-buf base) (* 16 15)) ) ;;;;;;;;;;;;;;;;;;;;; diff --git a/goal_src/engine/gfx/tie/prototype-h.gc b/goal_src/engine/gfx/tie/prototype-h.gc index 9e6c2ed874..2cee4734e9 100644 --- a/goal_src/engine/gfx/tie/prototype-h.gc +++ b/goal_src/engine/gfx/tie/prototype-h.gc @@ -73,6 +73,7 @@ ) (declare-type drawable-inline-array-collide-fragment drawable-inline-array) +(declare-type prototype-tie drawable) (deftype prototype-bucket-tie (prototype-bucket) ((generic-count uint16 4 :offset-assert 88) (generic-next uint32 4 :offset-assert 96) @@ -88,6 +89,7 @@ (color-index-qwc uint32 :dynamic :offset-assert 148) (generic-next-clear uint128 :offset 96) (generic-count-clear uint128 :offset 80) + (geometry-override prototype-tie 4 :offset 16 :score 1) ) :method-count-assert 9 :size-assert #x94 @@ -101,7 +103,7 @@ :size-assert #x10 :flag-assert #xa00000010 (:methods - (TODO-RENAME-9 (_type_) none 9) + (login (_type_) none 9) ) ) diff --git a/goal_src/engine/gfx/tie/prototype.gc b/goal_src/engine/gfx/tie/prototype.gc index 6202d51e0d..d8cc211a1a 100644 --- a/goal_src/engine/gfx/tie/prototype.gc +++ b/goal_src/engine/gfx/tie/prototype.gc @@ -5,3 +5,134 @@ ;; name in dgo: prototype ;; dgos: GAME, ENGINE +;; shared code for the tie and shrub prototypes + +(defmethod login prototype-array-tie ((obj prototype-array-tie)) + (dotimes (s5-0 (-> obj length)) + (let ((s4-0 (-> obj array-data s5-0))) + (dotimes (s3-0 4) + (let ((a0-1 (-> s4-0 geometry s3-0))) + (if (nonzero? a0-1) + (login a0-1) + ) + ) + ) + (let ((s4-1 (-> s4-0 envmap-shader))) + (when (nonzero? s4-1) + (adgif-shader-login-no-remap s4-1) + (set! (-> s4-1 tex1) (new 'static 'gs-tex1 :mmag #x1 :mmin #x1)) + (set! (-> s4-1 clamp) (new 'static 'gs-clamp :wms (gs-tex-wrap-mode clamp) :wmt (gs-tex-wrap-mode clamp))) + (set! (-> s4-1 alpha) (new 'static 'gs-miptbp :tbp1 #x58)) + (set! (-> s4-1 prims 1) (gs-reg64 tex0-1)) + (set! (-> s4-1 prims 3) (gs-reg64 tex1-1)) + (set! (-> s4-1 prims 5) (gs-reg64 miptbp1-1)) + (set! (-> s4-1 clamp-reg) (gs-reg64 clamp-1)) + (set! (-> s4-1 prims 9) (gs-reg64 alpha-1)) + ) + ) + ) + ) + (none) + ) + +(defmethod login prototype-inline-array-shrub ((obj prototype-inline-array-shrub)) + (dotimes (s5-0 (-> obj length)) + (let ((s4-0 (-> obj data s5-0))) + (dotimes (s3-0 4) + (let ((a0-1 (-> s4-0 geometry s3-0))) + (if (nonzero? a0-1) + (login a0-1) + ) + ) + ) + ) + ) + obj + ) + +(defmethod mem-usage prototype-array-tie ((obj prototype-array-tie) (arg0 memory-usage-block) (arg1 int)) + (set! (-> arg0 length) (max 1 (-> arg0 length))) + (set! (-> arg0 data 0 name) (symbol->string 'drawable-group)) + (+! (-> arg0 data 0 count) 1) + (let ((v1-8 (asize-of obj))) + (+! (-> arg0 data 0 used) v1-8) + (+! (-> arg0 data 0 total) (logand -16 (+ v1-8 15))) + ) + (dotimes (s3-0 (-> obj length)) + (mem-usage (-> obj array-data s3-0) arg0 arg1) + ) + obj + ) + +(defmethod mem-usage prototype-bucket-tie ((obj prototype-bucket-tie) (arg0 memory-usage-block) (arg1 int)) + (dotimes (s3-0 4) + (let ((a0-1 (-> obj geometry s3-0))) + (if (nonzero? a0-1) + (mem-usage a0-1 arg0 (logior arg1 1)) + ) + ) + ) + (set! (-> arg0 length) (max 81 (-> arg0 length))) + (set! (-> arg0 data 80 name) "string") + (+! (-> arg0 data 80 count) 1) + (let ((v1-13 ((method-of-type string asize-of) (the-as string (-> obj name))))) + (+! (-> arg0 data 80 used) v1-13) + (+! (-> arg0 data 80 total) (logand -16 (+ v1-13 15))) + ) + (when (nonzero? (-> obj tie-colors)) + (set! (-> arg0 length) (max 17 (-> arg0 length))) + (set! (-> arg0 data 16 name) "tie-pal") + (+! (-> arg0 data 16 count) 1) + (let ((v1-25 (asize-of (-> obj tie-colors)))) + (+! (-> arg0 data 16 used) v1-25) + (+! (-> arg0 data 16 total) (logand -16 (+ v1-25 15))) + ) + ) + (if (nonzero? (-> obj collide-frag)) + (mem-usage (-> obj collide-frag) arg0 (logior arg1 1)) + ) + obj + ) + +(defmethod mem-usage prototype-inline-array-shrub ((obj prototype-inline-array-shrub) (arg0 memory-usage-block) (arg1 int)) + (set! (-> arg0 length) (max 1 (-> arg0 length))) + (set! (-> arg0 data 0 name) (symbol->string 'drawable-group)) + (+! (-> arg0 data 0 count) 1) + (let ((v1-8 (asize-of obj))) + (+! (-> arg0 data 0 used) v1-8) + (+! (-> arg0 data 0 total) (logand -16 (+ v1-8 15))) + ) + (dotimes (s3-0 (-> obj length)) + (mem-usage (-> obj data s3-0) arg0 arg1) + ) + obj + ) + +(defmethod mem-usage prototype-bucket-shrub ((obj prototype-bucket-shrub) (arg0 memory-usage-block) (arg1 int)) + (set! (-> arg0 length) (max 25 (-> arg0 length))) + (set! (-> arg0 data 24 name) "prototype-bucket-shrub") + (+! (-> arg0 data 24 count) 1) + (let ((v1-5 112)) + (+! (-> arg0 data 24 used) v1-5) + (+! (-> arg0 data 24 total) (logand -16 (+ v1-5 15))) + ) + (dotimes (s3-0 4) + (let ((a0-5 (-> obj geometry s3-0))) + (if (nonzero? a0-5) + (mem-usage a0-5 arg0 (logior arg1 1)) + ) + ) + ) + (set! (-> arg0 length) (max 81 (-> arg0 length))) + (set! (-> arg0 data 80 name) "string") + (+! (-> arg0 data 80 count) 1) + (let ((v1-22 ((method-of-type string asize-of) (the-as string (-> obj name))))) + (+! (-> arg0 data 80 used) v1-22) + (+! (-> arg0 data 80 total) (logand -16 (+ v1-22 15))) + ) + obj + ) + + + + diff --git a/goal_src/engine/gfx/tie/tie-h.gc b/goal_src/engine/gfx/tie/tie-h.gc index fbc5956d1c..6cc0b17193 100644 --- a/goal_src/engine/gfx/tie/tie-h.gc +++ b/goal_src/engine/gfx/tie/tie-h.gc @@ -7,29 +7,34 @@ ;; DECOMP BEGINS +;; The TIE FRAGMENT is a record for a chunk of TIE DMA data. +;; The actual data isn't stored in the tie-fragment - this just has some meta-data and a pointer +;; to the actual data. +;; Unlike with tfrag, tie-fragments aren't part of the draw-node tree - instead they are associated with a prototype. (deftype tie-fragment (drawable) - ((gif-ref uint32 :offset 4) - (point-ref uint32 :offset 8) - (color-index uint16 :offset 12) - (base-colors uint8 :offset 14) - (tex-count uint16 :offset-assert 32) - (gif-count uint16 :offset-assert 34) - (vertex-count uint16 :offset-assert 36) - (color-count uint16 :offset-assert 38) - (num-tris uint16 :offset-assert 40) - (num-dverts uint16 :offset-assert 42) - (dp-ref uint32 :offset-assert 44) - (dp-qwc uint32 :offset-assert 48) - (generic-ref uint32 :offset-assert 52) - (generic-count uint32 :offset-assert 56) - (debug-lines basic :offset-assert 60) + ((gif-ref (inline-array adgif-shader) :offset 4) ;; starts with adgif shaders, may have more after. + (point-ref uint32 :offset 8) + (color-index uint16 :offset 12) + (base-colors uint8 :offset 14) + (tex-count uint16 :offset-assert 32) ;; number of qw's of adgif-shaders in gif-ref (5 qw/shader) + (gif-count uint16 :offset-assert 34) + (vertex-count uint16 :offset-assert 36) ;; number of qw's of vertex data + (color-count uint16 :offset-assert 38) + (num-tris uint16 :offset-assert 40) + (num-dverts uint16 :offset-assert 42) + (dp-ref uint32 :offset-assert 44) + (dp-qwc uint32 :offset-assert 48) ;; number of "draw points", in qw's. + (generic-ref uint32 :offset-assert 52) ;; L891 ish, just a pointer to data. + (generic-count uint32 :offset-assert 56) ;; number of qw's of generic data. + (debug-lines (array vector-array) :offset-assert 60) ) :method-count-assert 18 :size-assert #x40 :flag-assert #x1200000040 ) - +;; This is a specialization of the shared instance type for a TIE. +;; It is the child node type in the draw-node BVH tree. (deftype instance-tie (instance) ((color-indices uint32 :offset 8) (bucket-ptr prototype-bucket-tie :offset 12) @@ -41,9 +46,10 @@ :flag-assert #x1200000040 ) - +;; Wrapper class for lists of consecutive instances. +;; This is equivalent to drawable-inline-array-tfrag of tfrag. (deftype drawable-inline-array-instance-tie (drawable-inline-array) - ((data instance-tie 1 :inline :offset-assert 32) + ((data instance-tie 1 :inline :offset-assert 32) ;; dynamic sized (pad uint32 :offset-assert 96) ) :method-count-assert 18 @@ -51,6 +57,9 @@ :flag-assert #x1200000064 ) +;; Top-level drawable tree for TIE instances. +;; this is also a drawable-group, so it has a data array containing drawables. +;; based on the login methods it seems like the data field has all the drawables. (deftype drawable-tree-instance-tie (drawable-tree) ((prototypes proxy-prototype-array-tie :offset 8) ) @@ -59,26 +68,30 @@ :flag-assert #x1200000024 ) +;; Wrapper class for lists of consecutive prototypes. +;; It's not known if these are proper draw-node BVH trees. +;; you could imagine it being for things with only one instance (like generic stuff?) (deftype prototype-tie (drawable-inline-array) - ((data tie-fragment 1 :inline :offset-assert 32) - (pad uint32) + ((data tie-fragment 1 :inline :offset-assert 32) + (pad uint32 :offset-assert 96) ) :method-count-assert 18 :size-assert #x64 :flag-assert #x1200000064 ) +;; The actual matrix type we will upload to VU1 per instance. (deftype tie-matrix (structure) - ((mat matrix :inline :offset-assert 0) - (morph qword :inline :offset-assert 64) - (fog qword :inline :offset-assert 80) + ((mat matrix :inline :offset-assert 0) ;; the transformation matrix + (morph qword :inline :offset-assert 64) ;; ? LOD stuff? + (fog qword :inline :offset-assert 80) ;; ? why 4 values? ) :method-count-assert 9 :size-assert #x60 :flag-assert #x900000060 ) - +;; Temps used in the instance drawing asm functions (deftype instance-tie-work (structure) ((wind-const vector :inline :offset-assert 0) (hmge-d vector :inline :offset-assert 16) @@ -120,7 +133,7 @@ :flag-assert #x9000001c0 ) - +;; DMA storage for instance dma generation (mapped to scratchpad) (deftype instance-tie-dma (structure) ((banka instance-tie 32 :inline :offset-assert 0) (bankb instance-tie 32 :inline :offset-assert 2048) @@ -133,7 +146,7 @@ :flag-assert #x900003000 ) - +;; temps used in the prototype drawing (deftype prototype-tie-work (structure) ((upload-palette-0 dma-packet :inline :offset-assert 0) (upload-palette-1 dma-packet :inline :offset-assert 16) @@ -166,7 +179,7 @@ :flag-assert #x900000134 ) - +;; DMA storage for prototype dma generation (mapped to scratchpad) (deftype prototype-tie-dma (structure) ((colora rgba 256 :offset-assert 0) (colorb rgba 256 :offset-assert 1024) @@ -187,5 +200,5 @@ (define *instance-tie-work-copy* (the-as instance-tie-work #f)) (define-extern *instance-tie-work* instance-tie-work) -(define-extern tie-near-make-perspective-matrix (function matrix none)) -(define-extern draw-drawable-tree-instance-tie (function drawable-tree-instance-tie level none)) \ No newline at end of file +(define-extern tie-near-make-perspective-matrix (function matrix matrix)) +(define-extern draw-drawable-tree-instance-tie (function drawable-tree-instance-tie level none)) diff --git a/goal_src/engine/gfx/tie/tie-methods.gc b/goal_src/engine/gfx/tie/tie-methods.gc index 14e4287926..ff24173520 100644 --- a/goal_src/engine/gfx/tie/tie-methods.gc +++ b/goal_src/engine/gfx/tie/tie-methods.gc @@ -5,6 +5,732 @@ ;; name in dgo: tie-methods ;; dgos: GAME, ENGINE -(defun tie-init-buffers ((dma-buf dma-buffer)) - ;; TODO stub. +(defun tie-init-buffers ((arg0 dma-buffer)) + "Initialize the TIE buckets. + Note: the buffer passed in here is _not_ used. + this function should be called _after_ all TIE drawing is done. + It will skip setup if there is nothing drawn." + + ;; the TIE buckets are only used by TIE - so we can safely splice things at the beginning/end without + ;; messing things up. + (let ((gp-0 (-> *display* frames (-> *display* on-screen) frame bucket-group (bucket-id tie-0)))) + ;; only if we have something in the bucket. + (when (!= gp-0 (-> gp-0 last)) + (let* ((s5-0 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s4-1 (-> s5-0 base)) + ) + ;; add initialization data + (tie-init-engine + s5-0 + (new 'static 'gs-test :atst (gs-atest not-equal) :zte #x1 :ztst (gs-ztest greater-equal)) + 0 + ) + ;; patch to the start + (let ((v1-8 (the-as object (-> s5-0 base)))) + (set! (-> (the-as dma-packet v1-8) dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (-> gp-0 next))) + (set! (-> (the-as dma-packet v1-8) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-8) vif1) (new 'static 'vif-tag)) + (set! (-> s5-0 base) (&+ (the-as pointer v1-8) 16)) + ) + (set! (-> gp-0 next) (the-as uint s4-1)) + ) + ) + ) + + (let ((gp-1 (-> *display* frames (-> *display* on-screen) frame bucket-group (bucket-id tie-0)))) + ;; only if we have something in teh bucket + (when (!= gp-1 (-> gp-1 last)) + (let* ((s4-2 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s5-1 (-> s4-2 base)) + ) + ;; add the end data at the end. + (tie-end-buffer s4-2) + (let ((v1-19 (-> s4-2 base))) + (let ((a0-17 (the-as object (-> s4-2 base)))) + (set! (-> (the-as dma-packet a0-17) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet a0-17) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-17) vif1) (new 'static 'vif-tag)) + (set! (-> s4-2 base) (&+ (the-as pointer a0-17) 16)) + ) + (set! (-> (the-as (pointer uint32) (-> gp-1 last)) 1) (the-as uint s5-1)) + (set! (-> gp-1 last) (the-as (pointer dma-tag) v1-19)) + ) + ) + ) + ) + + ;; same as above, but for level 1's tie. + (let ((gp-2 (-> *display* frames (-> *display* on-screen) frame bucket-group (bucket-id tie-1)))) + (when (!= gp-2 (-> gp-2 last)) + (let* ((s5-2 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s4-4 (-> s5-2 base)) + ) + (tie-init-engine + s5-2 + (new 'static 'gs-test :atst (gs-atest not-equal) :zte #x1 :ztst (gs-ztest greater-equal)) + 0 + ) + (let ((v1-28 (the-as object (-> s5-2 base)))) + (set! (-> (the-as dma-packet v1-28) dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (-> gp-2 next))) + (set! (-> (the-as dma-packet v1-28) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-28) vif1) (new 'static 'vif-tag)) + (set! (-> s5-2 base) (&+ (the-as pointer v1-28) 16)) + ) + (set! (-> gp-2 next) (the-as uint s4-4)) + ) + ) + ) + (let ((gp-3 (-> *display* frames (-> *display* on-screen) frame bucket-group (bucket-id tie-1)))) + (when (!= gp-3 (-> gp-3 last)) + (let* ((s4-5 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s5-3 (-> s4-5 base)) + ) + (tie-end-buffer s4-5) + (let ((v1-39 (-> s4-5 base))) + (let ((a0-36 (the-as object (-> s4-5 base)))) + (set! (-> (the-as dma-packet a0-36) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet a0-36) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-36) vif1) (new 'static 'vif-tag)) + (set! (-> s4-5 base) (&+ (the-as pointer a0-36) 16)) + ) + (set! (-> (the-as (pointer uint32) (-> gp-3 last)) 1) (the-as uint s5-3)) + (set! (-> gp-3 last) (the-as (pointer dma-tag) v1-39)) + ) + ) + ) + ) + + #| + ;; level 0's tie near + (let ((gp-4 (-> *display* frames (-> *display* on-screen) frame bucket-group (bucket-id tie-near-0)))) + (when (!= gp-4 (-> gp-4 last)) + (let* ((s5-4 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s4-7 (-> s5-4 base)) + ) + (tie-near-init-engine + s5-4 + (new 'static 'gs-test + :ate #x1 + :atst (gs-atest greater-equal) + :aref #x26 + :zte #x1 + :ztst (gs-ztest greater-equal) + ) + 0 + ) + (let ((v1-48 (the-as object (-> s5-4 base)))) + (set! (-> (the-as dma-packet v1-48) dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (-> gp-4 next))) + (set! (-> (the-as dma-packet v1-48) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-48) vif1) (new 'static 'vif-tag)) + (set! (-> s5-4 base) (&+ (the-as pointer v1-48) 16)) + ) + (set! (-> gp-4 next) (the-as uint s4-7)) + ) + ) + ) + (let ((gp-5 (-> *display* frames (-> *display* on-screen) frame bucket-group (bucket-id tie-near-0)))) + (when (!= gp-5 (-> gp-5 last)) + (let* ((s4-8 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s5-5 (-> s4-8 base)) + ) + (tie-near-end-buffer s4-8) + (let ((v1-59 (-> s4-8 base))) + (let ((a0-55 (the-as object (-> s4-8 base)))) + (set! (-> (the-as dma-packet a0-55) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet a0-55) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-55) vif1) (new 'static 'vif-tag)) + (set! (-> s4-8 base) (&+ (the-as pointer a0-55) 16)) + ) + (set! (-> (the-as (pointer uint32) (-> gp-5 last)) 1) (the-as uint s5-5)) + (set! (-> gp-5 last) (the-as (pointer dma-tag) v1-59)) + ) + ) + ) + ) + + ;; level 1's tie near + (let ((gp-6 (-> *display* frames (-> *display* on-screen) frame bucket-group (bucket-id tie-near-1)))) + (when (!= gp-6 (-> gp-6 last)) + (let* ((s5-6 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s4-10 (-> s5-6 base)) + ) + (tie-near-init-engine + s5-6 + (new 'static 'gs-test + :ate #x1 + :atst (gs-atest greater-equal) + :aref #x26 + :zte #x1 + :ztst (gs-ztest greater-equal) + ) + 0 + ) + (let ((v1-68 (the-as object (-> s5-6 base)))) + (set! (-> (the-as dma-packet v1-68) dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (-> gp-6 next))) + (set! (-> (the-as dma-packet v1-68) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-68) vif1) (new 'static 'vif-tag)) + (set! (-> s5-6 base) (&+ (the-as pointer v1-68) 16)) + ) + (set! (-> gp-6 next) (the-as uint s4-10)) + ) + ) + ) + (let ((gp-7 (-> *display* frames (-> *display* on-screen) frame bucket-group (bucket-id tie-near-1)))) + (when (!= gp-7 (-> gp-7 last)) + (let* ((s4-11 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s5-7 (-> s4-11 base)) + ) + (tie-near-end-buffer s4-11) + (let ((v1-79 (-> s4-11 base))) + (let ((a0-74 (the-as object (-> s4-11 base)))) + (set! (-> (the-as dma-packet a0-74) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet a0-74) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-74) vif1) (new 'static 'vif-tag)) + (set! (-> s4-11 base) (&+ (the-as pointer a0-74) 16)) + ) + (set! (-> (the-as (pointer uint32) (-> gp-7 last)) 1) (the-as uint s5-7)) + (set! (-> gp-7 last) (the-as (pointer dma-tag) v1-79)) + ) + ) + ) + ) + |# + 0 + (none) ) + + +;;;;;;;;;;;;;;;;; +;; TIE debug +;;;;;;;;;;;;;;;;; + +;; most of this doesn't really do anything. + +;; a ranges of instances to debug +(deftype tie-instance-debug (structure) + ((max-instance uint32 :offset-assert 0) + (min-instance uint32 :offset-assert 4) + ) + :method-count-assert 9 + :size-assert #x8 + :flag-assert #x900000008 + ) + +;; unused +(define *tie* (new 'global 'tie-instance-debug)) + +(defun tie-debug-between ((arg0 uint) (arg1 uint)) + (set! (-> *instance-tie-work* test-id) arg1) + (set! (-> *instance-tie-work* test-id2) arg0) + arg0 + ) + +(defun tie-debug-one ((arg0 uint) (arg1 uint)) + (set! (-> *instance-tie-work* test-id) (+ arg1 -1 arg0)) + (set! (-> *instance-tie-work* test-id2) arg0) + arg0 + ) + +(defun walk-tie-generic-prototypes () + (none) + ) + +;; unused +(define *pke-hack* (new 'global 'vector)) + +;; draw-inline-array-instance-tie +;; draw-inline-array-prototype-tie-generic-asm +;; draw-inline-array-prototype-tie-asm +;; draw-inline-array-prototype-tie-near-asm + + +(defmethod login drawable-tree-instance-tie ((obj drawable-tree-instance-tie)) + (if (nonzero? (-> obj prototypes prototype-array-tie)) + (login (-> obj prototypes prototype-array-tie)) + ) + (dotimes (s5-0 (-> obj length)) + (login (-> obj data s5-0)) + ) + obj + ) + +(defun draw-drawable-tree-instance-tie ((arg0 drawable-tree-instance-tie) (arg1 level)) + "Actually draw TIE instances. + Will draw TIE, TIE-NEAR, and GENERIC" + + ;; todo kill + (local-vars + (r0-0 none) + (a0-31 int) + (a0-33 int) + (a0-46 int) + (a0-48 int) + (a0-62 int) + (a0-64 int) + (a0-82 int) + (a0-84 int) + (sv-16 int) + ) + + ;; only if one of our renderers is enabled. + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie-near tie generic)) + ;; setup work (TODO, what uses TIE wind?) + (set! (-> *instance-tie-work* first-generic-prototype) (the-as uint 0)) + (set! (-> *instance-tie-work* wind-vectors) (-> arg0 prototypes wind-vectors)) + + ;; + (let ((s4-0 (+ (-> arg0 length) -1))) ;; number of arrays of draw-nodes (depth of the BVH tree, not counting instance leaves) + + ;; perform draw node culling. TODO + #| + (when (nonzero? s4-0) + (dotimes (s3-0 s4-0) + (let* ((v1-10 (-> arg0 data s3-0)) + (a0-5 (-> arg0 data (+ s3-0 1))) + (a1-2 (/ (-> (the-as drawable-inline-array-node v1-10) data 0 id) 8)) + (a0-7 (/ (-> (the-as drawable-inline-array-node a0-5) data 0 id) 8)) + (a1-4 (+ a1-2 #x38b0 #x70000000)) + (a0-9 (+ a0-7 #x38b0 #x70000000)) + ) + (draw-node-cull + (the-as pointer a0-9) + (the-as pointer a1-4) + (-> (the-as drawable-inline-array-node v1-10) data) + (-> (the-as drawable-inline-array-node v1-10) length) + ) + ) + ) + ) + |# + + (let* ((v1-16 (-> arg0 data s4-0)) ;; leaves + (s4-1 (-> arg0 prototypes prototype-array-tie)) ;; prototypes + (s5-1 (-> s4-1 length)) ;; number of prototypes + ) + + (dotimes (a0-11 s5-1) ;; loop over prototypes, zero stuff?? + (let ((a1-7 (-> s4-1 array-data a0-11))) + (set! (-> a1-7 next-clear) (the-as uint128 0)) + (set! (-> a1-7 generic-count-clear) (the-as uint128 0)) + (set! (-> a1-7 generic-next-clear) (the-as uint128 0)) + ) + 0 + ) + + (let* ((s1-0 (-> (the-as drawable-inline-array-instance-tie v1-16) data)) ;; the inline array of instances + (s0-0 (&-> (scratchpad-object terrain-context) work background vis-list (/ (-> s1-0 0 id) 8))) ;; vis for first. + (s3-1 (-> *display* frames (-> *display* on-screen) frame global-buf)) ;; dma buf to write to + ) + (set! sv-16 (-> (the-as drawable-inline-array-node v1-16) length)) ;; number of instances + + ;; if we actually have things to draw + (when (nonzero? sv-16) + + ;; this is some buffer for the generic renderer + (let* ((v1-21 (logand (the-as int *gsf-buffer*) 8191)) + (v1-23 (logand (the-as int (&- (logand (the-as int (&-> (-> s4-1 data) -512)) 8191) (the-as uint v1-21))) 8191)) + ) + ;; not sure why, but we'll use some gsf-buffer space to store an instance-tie-work + ;; all the external stuff will dump into *instance-tie-work*, and we'll make a copy that's used + ;; in the actual DMA generation code. + (set! *instance-tie-work-copy* (the-as instance-tie-work (+ (the-as int *gsf-buffer*) v1-23))) + ) + + + ;;; TIE instance Drawing + ;; we do the instances first so the prototypes that aren't drawn can be skipped. + (let ((s2-0 (-> *display* frames (-> *display* on-screen) frame global-buf base))) + ;; actually copy the work + (quad-copy! (the-as pointer *instance-tie-work-copy*) (the-as pointer *instance-tie-work*) 28) + ;; clear perf counting stuff + (set! (-> *instance-tie-work-copy* wait-to-spr) (the-as uint 0)) + (set! (-> *instance-tie-work-copy* wait-from-spr) (the-as uint 0)) + (reset! (-> *perf-stats* data 9)) + + ;; DRAW! + ;;(draw-inline-array-instance-tie s0-0 s1-0 sv-16 s3-1) + ;; finish perf stats + (read! (-> *perf-stats* data 9)) + (update-wait-stats (-> *perf-stats* data 9) (the-as uint 0) + (-> *instance-tie-work-copy* wait-to-spr) + (-> *instance-tie-work-copy* wait-from-spr)) + + ;; copy out things from instance tie work + (let ((v1-42 (-> *instance-tie-work-copy* min-dist quad))) + (set! (-> *instance-tie-work* min-dist quad) v1-42) + ) + (set! (-> *instance-tie-work* flags) (-> *instance-tie-work-copy* flags)) + + ;; update memory usage + (let ((a0-38 *dma-mem-usage*)) + (when (nonzero? a0-38) + (set! (-> a0-38 length) (max 10 (-> a0-38 length))) + (set! (-> a0-38 data 9 name) "tie-fragment") + (+! (-> a0-38 data 9 count) 1) + (+! (-> a0-38 data 9 used) + (&- (-> *display* frames (-> *display* on-screen) frame global-buf base) (the-as uint s2-0)) + ) + (set! (-> a0-38 data 9 total) (-> a0-38 data 9 used)) + ) + ) + ) + + ;; Generic TIE prototype drawing + (when (logtest? *vu1-enable-user* (vu1-renderer-mask generic)) + (when (logtest? (-> *instance-tie-work* flags) 2) + (let ((s2-1 (-> *display* frames (-> *display* on-screen) frame global-buf base))) + (set! (-> *prototype-tie-work* generic-wait-to-spr) (the-as uint 0)) + (set! (-> *prototype-tie-work* generic-wait-from-spr) (the-as uint 0)) + (set! (-> *instance-tie-work* first-generic-prototype) (the-as uint (-> s3-1 base))) + + (reset! (-> *perf-stats* data 10)) + ;;(draw-inline-array-prototype-tie-generic-asm s3-1 s5-1 s4-1) + (read! (-> *perf-stats* data 10)) + (update-wait-stats (-> *perf-stats* data 10) (the-as uint 0) + (-> *prototype-tie-work* generic-wait-to-spr) + (-> *prototype-tie-work* generic-wait-from-spr) + ) + ;; Note: we don't add to a bucket. This lives in some buffer somewhere and generic will take care of actually adding it. + (let ((a0-51 *dma-mem-usage*)) + (when (nonzero? a0-51) + (set! (-> a0-51 length) (max 18 (-> a0-51 length))) + (set! (-> a0-51 data 17 name) "tie-generic") + (+! (-> a0-51 data 17 count) 1) + (+! (-> a0-51 data 17 used) + (&- (-> *display* frames (-> *display* on-screen) frame global-buf base) (the-as uint s2-1)) + ) + (set! (-> a0-51 data 17 total) (-> a0-51 data 17 used)) + ) + ) + ) + ) + ) + + ;; Normal TIE prototype drawing + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie)) + (let ((s3-2 (-> *display* frames (-> *display* on-screen) frame global-buf base))) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie)) + (let* ((s1-1 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s2-2 (-> s1-1 base)) + ) + (set! (-> *prototype-tie-work* wait-to-spr) (the-as uint 0)) + (set! (-> *prototype-tie-work* wait-from-spr) (the-as uint 0)) + (reset! (-> *perf-stats* data 11)) + ;;(draw-inline-array-prototype-tie-asm s1-1 s5-1 s4-1) + (add-pc-tfrag3-data s1-1 (-> *level* data (-> (scratchpad-object terrain-context) bsp lev-index))) + (read! (-> *perf-stats* data 11)) + (update-wait-stats (-> *perf-stats* data 11) (the-as uint 0) + (-> *prototype-tie-work* wait-to-spr) + (-> *prototype-tie-work* wait-from-spr) + ) + + ;; this actually generates real drawing DMA, so add it to the appropriate bucket. + (let ((a3-11 (-> s1-1 base))) + (let ((v1-94 (the-as object (-> s1-1 base)))) + (set! (-> (the-as dma-packet v1-94) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet v1-94) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-94) vif1) (new 'static 'vif-tag)) + (set! (-> s1-1 base) (&+ (the-as pointer v1-94) 16)) + ) + (dma-bucket-insert-tag + (-> *display* frames (-> *display* on-screen) frame bucket-group) + (the-as bucket-id (if (zero? (-> arg1 index)) + (bucket-id tie-0) + (bucket-id tie-1) + ) + ) + s2-2 + (the-as (pointer dma-tag) a3-11) + ) + ) + ) + ) + (let ((v1-100 *dma-mem-usage*)) + (when (nonzero? v1-100) + (set! (-> v1-100 length) (max 10 (-> v1-100 length))) + (set! (-> v1-100 data 9 name) "tie-fragment") + (+! (-> v1-100 data 9 count) 1) + (+! (-> v1-100 data 9 used) + (&- (-> *display* frames (-> *display* on-screen) frame global-buf base) (the-as uint s3-2)) + ) + (set! (-> v1-100 data 9 total) (-> v1-100 data 9 used)) + ) + ) + ) + ) + + #| + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie-near)) + (let ((s3-3 (-> *display* frames (-> *display* on-screen) frame global-buf base))) + (let* ((s1-2 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s2-3 (-> s1-2 base)) + ) + (set! (-> *prototype-tie-work* near-wait-to-spr) (the-as uint 0)) + (set! (-> *prototype-tie-work* near-wait-from-spr) (the-as uint 0)) + (reset! (-> *perf-stats* data 12)) + ;;(draw-inline-array-prototype-tie-near-asm s1-2 s5-1 s4-1) + (read! (-> *perf-stats* data 12)) + (update-wait-stats (-> *perf-stats* data 12) (the-as uint 0) + (-> *prototype-tie-work* near-wait-to-spr) + (-> *prototype-tie-work* near-wait-from-spr) + ) + (let ((a3-16 (-> s1-2 base))) + (let ((v1-123 (the-as object (-> s1-2 base)))) + (set! (-> (the-as dma-packet v1-123) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet v1-123) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-123) vif1) (new 'static 'vif-tag)) + (set! (-> s1-2 base) (&+ (the-as pointer v1-123) 16)) + ) + (dma-bucket-insert-tag + (-> *display* frames (-> *display* on-screen) frame bucket-group) + (the-as bucket-id (if (zero? (-> arg1 index)) + (bucket-id tie-near-0) + (bucket-id tie-near-1) + ) + ) + s2-3 + (the-as (pointer dma-tag) a3-16) + ) + ) + ) + (let ((a0-92 *dma-mem-usage*)) + (when (nonzero? a0-92) + (set! (-> a0-92 length) (max 16 (-> a0-92 length))) + (set! (-> a0-92 data 15 name) "tie-near") + (+! (-> a0-92 data 15 count) 1) + (+! (-> a0-92 data 15 used) + (&- (-> *display* frames (-> *display* on-screen) frame global-buf base) (the-as uint s3-3)) + ) + (set! (-> a0-92 data 15 total) (-> a0-92 data 15 used)) + ) + ) + ) + )|# + ) + ) + ) + ) + 0 + ) + (set! (-> arg1 closest-object 5) (-> *instance-tie-work* min-dist x)) + 0 + (none) + ) + +(defmethod draw drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 drawable-tree-instance-tie) (arg1 display-frame)) + "Add the tree to the background work list." + (let* ((v1-1 (-> *background-work* tie-tree-count)) + (a1-2 (-> (scratchpad-object terrain-context) bsp lev-index)) + (a1-5 (-> *level* level a1-2)) + ) + (set! (-> *background-work* tie-trees v1-1) obj) + (set! (-> *background-work* tie-levels v1-1) a1-5) + ) + (+! (-> *background-work* tie-tree-count) 1) + (none) + ) + +(defmethod collect-stats drawable-tree-instance-tie ((obj drawable-tree-instance-tie)) + "Collect statistics on TIE drawing." + + ;; only if tie/generic ran + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie-near tie generic)) + ;; unused? + (-> obj data (+ (-> obj length) -1)) + + ;; loop over all prototypes. + ;; the drawing process will write to the prototypes to say how many of each it draws + (let ((v1-8 (-> obj prototypes prototype-array-tie))) + (dotimes (a0-1 (-> v1-8 length)) + ;; grap the prototype + (let ((a1-2 (-> v1-8 array-data a0-1))) + + ;; GENERIC + (when (logtest? *vu1-enable-user* (vu1-renderer-mask generic)) + ;; there are 4 arrays of fragments per prototype. Looks like we check them all for generic. + (let ((a2-3 0) + (a3-0 3) + ) + (while (>= a3-0 a2-3) + (let ((t0-2 (-> a1-2 generic-count a2-3)) ;; number of times this geom was drawn with generic + (t2-0 (-> a1-2 geometry-override a2-3)) ;; the geom that was drawn + ) + (when (nonzero? t0-2) ;; were we drawn? + (let ((t1-3 (the-as object (-> t2-0 data))) ;; tie fragment array + (t2-1 (-> t2-0 length)) ;; number of tie fragments + ) + (+! (-> *terrain-stats* tie-generic groups) 1) ;; number of geometries drawn (unique) + (+! (-> *terrain-stats* tie-generic fragments) t2-1) ;; number of frags drawn (unique) + (+! (-> *terrain-stats* tie-generic instances) t0-2) ;; number of instances drawn (not unique) + + ;; now, collect stats per fragment + (dotimes (t3-9 t2-1) + (let ((t5-0 (* (-> (the-as tie-fragment t1-3) num-tris) t0-2)) ;; multiply by number of instances + (t4-5 (* (-> (the-as tie-fragment t1-3) num-dverts) t0-2)) + ) + (+! (-> *terrain-stats* tie-generic tris) t5-0) + (+! (-> *terrain-stats* tie-generic dverts) t4-5) + ) + (set! t1-3 (&+ (the-as tie-fragment t1-3) 64)) + ) + ) + ) + ) + (+! a2-3 1) + ) + ) + ) + + ;; normal tie + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie)) + (let ((a2-9 1) ;; looks like we never draw geom 0's with normal tie? + (a3-1 3) + ) + (while (>= a3-1 a2-9) + (let ((t0-6 (-> a1-2 count a2-9)) + (t2-2 (-> a1-2 geometry-override a2-9)) + ) + (when (nonzero? t0-6) + (let ((t1-8 (the-as object (-> t2-2 data))) + (t2-3 (-> t2-2 length)) + ) + (+! (-> *terrain-stats* tie groups) 1) + (+! (-> *terrain-stats* tie fragments) t2-3) + (+! (-> *terrain-stats* tie instances) t0-6) + (dotimes (t3-19 t2-3) + (let ((t5-5 (* (-> (the-as tie-fragment t1-8) num-tris) t0-6)) + (t4-12 (* (-> (the-as tie-fragment t1-8) num-dverts) t0-6)) + ) + (+! (-> *terrain-stats* tie tris) t5-5) + (+! (-> *terrain-stats* tie dverts) t4-12) + ) + (set! t1-8 (&+ (the-as tie-fragment t1-8) 64)) + ) + ) + ) + ) + (+! a2-9 1) + ) + ) + ) + + ;; near tie + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie-near)) + (let ((a2-14 (-> a1-2 count 0)) ;; always geom 0. + (a3-2 (-> a1-2 geometry-override 0)) + ) + (when (nonzero? a2-14) + (let ((a1-3 (the-as object (-> a3-2 data))) + (a3-3 (-> a3-2 length)) + ) + (+! (-> *terrain-stats* tie-near groups) 1) + (+! (-> *terrain-stats* tie-near fragments) a3-3) + (+! (-> *terrain-stats* tie-near instances) a2-14) + (dotimes (t0-19 a3-3) + (let ((t2-4 (* (-> (the-as tie-fragment a1-3) num-tris) a2-14)) + (t1-15 (* (-> (the-as tie-fragment a1-3) num-dverts) a2-14)) + ) + (+! (-> *terrain-stats* tie-near tris) t2-4) + (+! (-> *terrain-stats* tie-near dverts) t1-15) + ) + (set! a1-3 (&+ (the-as tie-fragment a1-3) 64)) + ) + ) + ) + ) + ) + ) + ) + ) + ) + (none) + ) + + + +(defmethod debug-draw drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 drawable) (arg1 display-frame)) + (-> obj data (+ (-> obj length) -1)) + (let* ((s5-0 (-> obj prototypes prototype-array-tie)) + (s4-0 (-> s5-0 length)) + ) + (dotimes (s3-0 s4-0) + (let ((a1-1 (-> s5-0 array-data s3-0 geometry 0))) + (debug-draw a1-1 a1-1 arg1) + ) + ) + ) + (none) + ) + +;;;;;;;;;;;;;;;;; +;; TIE collision +;;;;;;;;;;;;;;;;; + +;; note: the first three methods appear twice in the original code. + +(defmethod collide-with-box drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-with-box (-> obj data 0) (-> obj length) arg1) + 0 + (none) + ) + +(defmethod collide-y-probe drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-y-probe (-> obj data 0) (-> obj length) arg1) + 0 + (none) + ) + +(defmethod collide-ray drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-ray (-> obj data 0) (-> obj length) arg1) + 0 + (none) + ) + + +(defmethod collide-with-box drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-with-box (the-as instance-tie (-> obj data)) (-> obj length) arg1) + 0 + (none) + ) + +(defmethod collide-y-probe drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-y-probe (the-as instance-tie (-> obj data)) (-> obj length) arg1) + 0 + (none) + ) + +(defmethod collide-ray drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-ray (the-as instance-tie (-> obj data)) (-> obj length) arg1) + 0 + (none) + ) + +(defun tie-test-cam-restore () + (let ((a0-0 (new-stack-vector0)) + (a1-0 (new-stack-matrix0)) + ) + (set! (-> a0-0 x) 1246582.6) + (set! (-> a0-0 y) 57026.02) + (set! (-> a0-0 z) -490734.78) + (set! (-> a0-0 w) 1.0) + (set! (-> a1-0 vector 0 x) -0.9873) + (set! (-> a1-0 vector 0 y) 0.0) + (set! (-> a1-0 vector 0 z) -0.1587) + (set! (-> a1-0 vector 0 w) 0.0) + (set! (-> a1-0 vector 1 x) 0.0014) + (set! (-> a1-0 vector 1 y) 0.9999) + (set! (-> a1-0 vector 1 z) -0.0092) + (set! (-> a1-0 vector 1 w) 0.0) + (set! (-> a1-0 vector 2 x) 0.1587) + (set! (-> a1-0 vector 2 y) -0.0093) + (set! (-> a1-0 vector 2 z) -0.9872) + (set! (-> a1-0 vector 2 w) 0.0) + (set! (-> a1-0 vector 3 x) 0.0) + (set! (-> a1-0 vector 3 y) 0.0) + (set! (-> a1-0 vector 3 z) 0.0) + (set! (-> a1-0 vector 3 w) 1.0) + (debug-set-camera-pos-rot! a0-0 a1-0) + ) + (send-event *camera* 'set-fov 11650.845) + (none) + ) \ No newline at end of file diff --git a/goal_src/engine/gfx/tie/tie-near.gc b/goal_src/engine/gfx/tie/tie-near.gc index 54c92b79ac..f1eada9781 100644 --- a/goal_src/engine/gfx/tie/tie-near.gc +++ b/goal_src/engine/gfx/tie/tie-near.gc @@ -5,3 +5,268 @@ ;; name in dgo: tie-near ;; dgos: GAME, ENGINE +;; The "near" version of TIE. +;; This correctly handles scissoring triangles. +;; Like tfrag near, we plan to not port this because it's complicated, and instead let opengl do +;; the hard work for us. + +;; This isn't super well documented or even complete, see tie.gc. + +;; uploaded to VU1 once per frame. +(deftype tie-near-consts (structure) + ((extra qword :inline :offset-assert 0) + (gifbufs qword :inline :offset-assert 16) + (clrbufs qword :inline :offset-assert 32) + (adgif gs-gif-tag :inline :offset-assert 48) + (strgif gs-gif-tag :inline :offset-assert 64) + (fangif gs-gif-tag :inline :offset-assert 80) + (hvdfoffs vector :inline :offset-assert 96) + (invhscale vector :inline :offset-assert 112) + (guard vector :inline :offset-assert 128) + (atest ad-cmd 2 :inline :offset-assert 144) + (atest-tra ad-cmd :inline :offset 144) + (atest-def ad-cmd :inline :offset 160) + ) + :method-count-assert 9 + :size-assert #xb0 + :flag-assert #x9000000b0 + ) + +;; the actual program. +(define tie-near-vu1-block (new 'static 'vu-function :length #x6f8 :qlength #x37c)) + +(defun tie-near-init-consts ((arg0 tie-near-consts) (arg1 int)) + "Initialize tie near constant data." + (set! (-> arg0 adgif tag) (new 'static 'gif-tag64 :nloop #x5 :nreg #x1)) + (set! (-> arg0 adgif regs) (new 'static 'gif-tag-regs :regs0 (gif-reg-id a+d))) + (set! (-> arg0 atest-tra cmds) (the-as uint 71)) + (set! (-> arg0 atest-tra data) (the-as uint #x5026b)) + (set! (-> arg0 atest-def cmds) (the-as uint 71)) + (set! (-> arg0 atest-def data) (the-as uint #x5000e)) + (cond + ((zero? *subdivide-draw-mode*) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 3) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :tme #x1 :fge #x1) + :nreg #x3 + ) + ) + ) + ((= *subdivide-draw-mode* 1) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type line-strip) :iip #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 2) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :fge #x1 :abe arg1) + ) + ) + ) + ) + (set! (-> arg0 strgif regs) + (new 'static 'gif-tag-regs :regs0 (gif-reg-id st) :regs1 (gif-reg-id rgbaq) :regs2 (gif-reg-id xyzf2)) + ) + (set! (-> arg0 fangif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-fan) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + (set! (-> arg0 fangif regs) + (new 'static 'gif-tag-regs :regs0 (gif-reg-id st) :regs1 (gif-reg-id rgbaq) :regs2 (gif-reg-id xyzf2)) + ) + (let ((f1-0 8388894.0) + (f2-0 8389078.0) + (f0-0 8389262.0) + ) + (set! (-> arg0 gifbufs vector4w x) (the-as int f0-0)) + (set! (-> arg0 gifbufs vector4w y) (the-as int f2-0)) + (set! (-> arg0 gifbufs vector4w z) (the-as int f0-0)) + (set! (-> arg0 gifbufs vector4w w) (the-as int f2-0)) + (set! (-> arg0 extra vector4w x) (the-as int (+ f1-0 f2-0 f0-0))) + (set! (-> arg0 extra vector4w y) (the-as int 0.0)) + (set! (-> arg0 extra vector4w z) (the-as int (+ f1-0 f2-0 f0-0))) + ) + (set! (-> arg0 clrbufs vector4w x) 198) + (set! (-> arg0 clrbufs vector4w y) 242) + (set! (-> arg0 clrbufs vector4w z) 198) + (set! (-> arg0 clrbufs vector4w w) 242) + (let ((v1-41 *math-camera*)) + (set! (-> arg0 invhscale quad) (-> v1-41 inv-hmge-scale quad)) + (set! (-> arg0 hvdfoffs quad) (-> v1-41 hvdf-off quad)) + (set! (-> arg0 guard quad) (-> v1-41 guard quad)) + ) + (none) + ) + +;; SKIPPED tie-near-init-engine +;; SKIPPED tie-near-end-buffer + +(defun tie-near-make-perspective-matrix ((arg0 matrix)) + (column-scale-matrix! arg0 (-> *math-camera* hmge-scale) (-> *math-camera* camera-temp)) + ) + +(defun tie-near-int-reg ((arg0 int)) + (let ((v1-0 arg0)) + (cond + ((zero? v1-0) + "zero" + ) + ((= v1-0 1) + "itemp" + ) + ((= v1-0 2) + "delta" + ) + ((= v1-0 3) + "dest-0" + ) + ((= v1-0 4) + "dest-1" + ) + ((= v1-0 5) + "dest-2" + ) + ((= v1-0 6) + "dest-3" + ) + ((= v1-0 7) + "delta-ptr" + ) + ((= v1-0 8) + "prev" + ) + ((= v1-0 9) + "itemp2" + ) + ) + ) + ) + +(defun tie-near-float-reg ((arg0 int)) + (let ((v1-0 arg0)) + (cond + ((zero? v1-0) + "zero" + ) + ((= v1-0 1) + "vtx-0" + ) + ((= v1-0 2) + "vtx-1" + ) + ((= v1-0 3) + "vtx-2" + ) + ((= v1-0 4) + "vtx-3" + ) + ((= v1-0 5) + "hvtx-0" + ) + ((= v1-0 6) + "hvtx-1" + ) + ((= v1-0 7) + "hvtx-2" + ) + ((= v1-0 8) + "hvtx-3" + ) + ((= v1-0 9) + "tex-0" + ) + ((= v1-0 10) + "tex-1" + ) + ((= v1-0 11) + "tex-2" + ) + ((= v1-0 12) + "tex-3" + ) + ((= v1-0 13) + "deltas" + ) + ((= v1-0 14) + "invh" + ) + ((= v1-0 15) + "hvdfcl" + ) + ((= v1-0 16) + "hvdfnc" + ) + ((= v1-0 17) + "--" + ) + ((= v1-0 18) + "--" + ) + ((= v1-0 19) + "--" + ) + ((= v1-0 20) + "--" + ) + ((= v1-0 19) + "--" + ) + ((= v1-0 20) + "--" + ) + ((= v1-0 21) + "gifbuf" + ) + ((= v1-0 22) + "clrbuf" + ) + ((= v1-0 23) + "extra" + ) + ((= v1-0 24) + "inds" + ) + ((= v1-0 25) + "--" + ) + ((= v1-0 26) + "--" + ) + ((= v1-0 27) + "morph" + ) + ((= v1-0 28) + "xyzofs" + ) + ((= v1-0 29) + "--" + ) + ((= v1-0 30) + "--" + ) + ((= v1-0 31) + "--" + ) + ) + ) + ) diff --git a/goal_src/engine/gfx/tie/tie-work.gc b/goal_src/engine/gfx/tie/tie-work.gc index e6da0f5056..36a66cdc2b 100644 --- a/goal_src/engine/gfx/tie/tie-work.gc +++ b/goal_src/engine/gfx/tie/tie-work.gc @@ -6,199 +6,122 @@ ;; dgos: GAME, ENGINE ;; definition for symbol *instance-tie-work*, type instance-tie-work -(define - *instance-tie-work* - (new 'static 'instance-tie-work - :wind-const - (new 'static 'vector :x 0.5 :y 100.0 :z 0.0166 :w -1.0) - :constant - (new 'static 'vector :x 4096.0 :y 128.0) - :far-morph (new 'static 'vector :x 1.0 :w 256.0) - :upload-color-0 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :qwc #x6 :id (dma-tag-id ref)) - :vif1 - (new 'static 'vif-tag :imm #x80c6 :num #x6 :cmd (vif-cmd unpack-v4-32)) - ) - :upload-color-1 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id ref)) - :vif0 - (new 'static 'vif-tag :cmd (vif-cmd stmod)) - :vif1 - (new 'static 'vif-tag :imm #xc0cc :cmd (vif-cmd unpack-v4-8)) - ) - :upload-color-2 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id next)) - :vif0 - (new 'static 'vif-tag :cmd (vif-cmd stmod)) - ) - :upload-color-ret - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id ret)) - :vif0 - (new 'static 'vif-tag :cmd (vif-cmd stmod)) - ) - :generic-color-0 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :qwc #x6 :id (dma-tag-id ref)) - :vif0 (new 'static 'vif-tag :imm #x3) - ) - :generic-color-1 - (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id ref))) - :generic-color-end - (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id end))) - :refl-fade-fac -0.000625 - :refl-fade-end 409600.0 - ) - ) -;; failed to figure out what this is: -(set! - (-> *instance-tie-work* upload-color-2 vif1) - (new 'static 'vif-tag :cmd (vif-cmd mscal) :msk #x1) - ) +;; helpful constants for the instance drawing EE asm. +(define *instance-tie-work* + (new 'static 'instance-tie-work + :wind-const (new 'static 'vector :x 0.5 :y 100.0 :z 0.0166 :w -1.0) + :constant (new 'static 'vector :x 4096.0 :y 128.0) + :far-morph (new 'static 'vector :x 1.0 :w 256.0) + :upload-color-0 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :qwc #x6 :id (dma-tag-id ref)) + :vif1 (new 'static 'vif-tag :imm #x80c6 :num #x6 :cmd (vif-cmd unpack-v4-32)) + ) + :upload-color-1 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id ref)) + :vif0 (new 'static 'vif-tag :cmd (vif-cmd stmod)) + :vif1 (new 'static 'vif-tag :imm #xc0cc :cmd (vif-cmd unpack-v4-8)) + ) + :upload-color-2 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id next)) + :vif0 (new 'static 'vif-tag :cmd (vif-cmd stmod)) + ) + :upload-color-ret (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id ret)) + :vif0 (new 'static 'vif-tag :cmd (vif-cmd stmod)) + ) + :generic-color-0 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :qwc #x6 :id (dma-tag-id ref)) + :vif0 (new 'static 'vif-tag :imm #x3) + ) + :generic-color-1 (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id ref))) + :generic-color-end (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id end))) + :refl-fade-fac -0.000625 + :refl-fade-end 409600.0 + ) + ) -;; failed to figure out what this is: -(set! - (-> *instance-tie-work* upload-color-ret vif1) - (new 'static 'vif-tag :cmd (vif-cmd mscal) :msk #x1) - ) +(set! (-> *instance-tie-work* upload-color-2 vif1) (new 'static 'vif-tag :cmd (vif-cmd mscal) :msk #x1)) +(set! (-> *instance-tie-work* upload-color-ret vif1) (new 'static 'vif-tag :cmd (vif-cmd mscal) :msk #x1)) -;; definition for symbol *prototype-tie-work*, type prototype-tie-work -(define - *prototype-tie-work* - (new 'static 'prototype-tie-work - :upload-palette-0 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id cnt)) - :vif0 - (new 'static 'vif-tag :cmd (vif-cmd flusha) :msk #x1) - ) - :upload-palette-1 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :qwc #x20 :id (dma-tag-id cnt)) - :vif0 - (new 'static 'vif-tag :imm #x1 :cmd (vif-cmd stmod)) - :vif1 - (new 'static 'vif-tag :imm #x4346 :num #x80 :cmd (vif-cmd unpack-v4-8)) - ) - :upload-model-0 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id ref)) - :vif0 - (new 'static 'vif-tag :cmd (vif-cmd stmod)) - :vif1 - (new 'static 'vif-tag :cmd (vif-cmd unpack-v4-32)) - ) - :upload-model-1 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id ref)) - :vif1 - (new 'static 'vif-tag :imm #x4000 :cmd (vif-cmd unpack-v4-8)) - ) - :upload-model-2 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id ref)) - :vif1 - (new 'static 'vif-tag :imm #x32 :cmd (vif-cmd unpack-v4-16)) - ) - :upload-model-3 - (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id call))) - :upload-model-near-0 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id ref)) - :vif0 - (new 'static 'vif-tag :cmd (vif-cmd stmod)) - :vif1 - (new 'static 'vif-tag :cmd (vif-cmd unpack-v4-32)) - ) - :upload-model-near-1 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id ref)) - :vif1 - (new 'static 'vif-tag :imm #x4000 :cmd (vif-cmd unpack-v4-8)) - ) - :upload-model-near-2 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id ref)) - :vif1 - (new 'static 'vif-tag :imm #x1e :cmd (vif-cmd unpack-v4-8)) - ) - :upload-model-near-3 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id ref)) - :vif1 - (new 'static 'vif-tag :imm #x32 :cmd (vif-cmd unpack-v4-16)) - ) - :upload-model-near-4 - (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id call))) - :generic-envmap-shader - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :qwc #x5 :id (dma-tag-id ref)) - :vif0 (new 'static 'vif-tag :imm #x1) - ) - :generic-palette - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :qwc #x20 :id (dma-tag-id cnt)) - :vif0 (new 'static 'vif-tag :imm #x1) - ) - :generic-model-0 - (new 'static 'dma-packet - :dma - (new 'static 'dma-tag :id (dma-tag-id ref)) - :vif0 (new 'static 'vif-tag :imm #x2) - ) - :generic-model-1 - (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id ref))) - :generic-model-2 - (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id ref))) - :generic-model-next - (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id next))) - :clamp #x8000ff00ff00ff - ) - ) +;; helpful constants for the prototype drawing EE asm +(define *prototype-tie-work* + (new 'static 'prototype-tie-work + :upload-palette-0 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id cnt)) + :vif0 (new 'static 'vif-tag :cmd (vif-cmd flusha) :msk #x1) + ) + :upload-palette-1 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :qwc #x20 :id (dma-tag-id cnt)) + :vif0 (new 'static 'vif-tag :imm #x1 :cmd (vif-cmd stmod)) + :vif1 (new 'static 'vif-tag :imm #x4346 :num #x80 :cmd (vif-cmd unpack-v4-8)) + ) + :upload-model-0 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id ref)) + :vif0 (new 'static 'vif-tag :cmd (vif-cmd stmod)) + :vif1 (new 'static 'vif-tag :cmd (vif-cmd unpack-v4-32)) + ) + :upload-model-1 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id ref)) + :vif1 (new 'static 'vif-tag :imm #x4000 :cmd (vif-cmd unpack-v4-8)) + ) + :upload-model-2 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id ref)) + :vif1 (new 'static 'vif-tag :imm #x32 :cmd (vif-cmd unpack-v4-16)) + ) + :upload-model-3 (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id call))) + :upload-model-near-0 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id ref)) + :vif0 (new 'static 'vif-tag :cmd (vif-cmd stmod)) + :vif1 (new 'static 'vif-tag :cmd (vif-cmd unpack-v4-32)) + ) + :upload-model-near-1 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id ref)) + :vif1 (new 'static 'vif-tag :imm #x4000 :cmd (vif-cmd unpack-v4-8)) + ) + :upload-model-near-2 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id ref)) + :vif1 (new 'static 'vif-tag :imm #x1e :cmd (vif-cmd unpack-v4-8)) + ) + :upload-model-near-3 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id ref)) + :vif1 (new 'static 'vif-tag :imm #x32 :cmd (vif-cmd unpack-v4-16)) + ) + :upload-model-near-4 (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id call))) + :generic-envmap-shader (new 'static 'dma-packet + :dma (new 'static 'dma-tag :qwc #x5 :id (dma-tag-id ref)) + :vif0 (new 'static 'vif-tag :imm #x1) + ) + :generic-palette (new 'static 'dma-packet + :dma (new 'static 'dma-tag :qwc #x20 :id (dma-tag-id cnt)) + :vif0 (new 'static 'vif-tag :imm #x1) + ) + :generic-model-0 (new 'static 'dma-packet + :dma (new 'static 'dma-tag :id (dma-tag-id ref)) + :vif0 (new 'static 'vif-tag :imm #x2) + ) + :generic-model-1 (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id ref))) + :generic-model-2 (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id ref))) + :generic-model-next (new 'static 'dma-packet :dma (new 'static 'dma-tag :id (dma-tag-id next))) + :clamp #x8000ff00ff00ff + ) + ) -;; failed to figure out what this is: -(set! - (-> *prototype-tie-work* upload-model-1 vif0) - (new 'static 'vif-tag :imm #x4 :cmd (vif-cmd mscal) :msk #x1) - ) +(set! (-> *prototype-tie-work* upload-model-1 vif0) + (new 'static 'vif-tag :imm #x4 :cmd (vif-cmd mscal) :msk #x1) + ) -;; failed to figure out what this is: -(set! - (-> *prototype-tie-work* upload-model-3 vif0) - (new 'static 'vif-tag :imm #x6 :cmd (vif-cmd mscal) :msk #x1) - ) +(set! (-> *prototype-tie-work* upload-model-3 vif0) + (new 'static 'vif-tag :imm #x6 :cmd (vif-cmd mscal) :msk #x1) + ) -;; failed to figure out what this is: -(set! - (-> *prototype-tie-work* upload-model-near-1 vif0) - (new 'static 'vif-tag :imm #x4 :cmd (vif-cmd mscal) :msk #x1) - ) +(set! (-> *prototype-tie-work* upload-model-near-1 vif0) + (new 'static 'vif-tag :imm #x4 :cmd (vif-cmd mscal) :msk #x1) + ) -;; failed to figure out what this is: -(set! - (-> *prototype-tie-work* upload-model-near-4 vif0) - (new 'static 'vif-tag :imm #x6 :cmd (vif-cmd mscal) :msk #x1) - ) +(set! (-> *prototype-tie-work* upload-model-near-4 vif0) + (new 'static 'vif-tag :imm #x6 :cmd (vif-cmd mscal) :msk #x1) + ) diff --git a/goal_src/engine/gfx/tie/tie.gc b/goal_src/engine/gfx/tie/tie.gc index d60325d0b5..fd9ac50c9c 100644 --- a/goal_src/engine/gfx/tie/tie.gc +++ b/goal_src/engine/gfx/tie/tie.gc @@ -5,3 +5,811 @@ ;; name in dgo: tie ;; dgos: GAME, ENGINE + +;; TIE +;; tesselating fragment instance engine + +;; The TIE renderer is one of three main background renderers. +;; TIE has the following features: +;; - instanced rendering (you can draw the same thing multiple times!) +;; - time of day lighting (believed to be slightly different in implementation than tfrag) + + +;; Background elements that use the GENERIC render will store their data in tie-fragments. +;; The exact procedure for GENERIC through TIE is unknown. +;; The functions are generic-tie-execute and generic-tie-convert + +;;;;;;;;;;;;;;;;; +;; Basic Methods +;;;;;;;;;;;;;;;;; + +;; something is going wrong mem-usage (believed fixed) + +(defmethod login tie-fragment ((obj tie-fragment)) + "Initialize the shaders for a tie-fragment" + + ;; the gif data is just adgif shaders, each are 5 qw's + (let ((s5-0 (-> obj gif-ref)) + (s4-0 (/ (-> obj tex-count) (the-as uint 5))) + ) + (dotimes (s3-0 (the-as int s4-0)) + ;; will modify the adgif-shaders in place to have the appropriate tbp. + (adgif-shader-login-no-remap (-> s5-0 s3-0)) + ) + ) + obj + ) + +(defmethod inspect drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie)) + "Inspect an array of instances" + (format #t "[~8x] ~A~%" obj (-> obj type)) + (format #t "~Tlength: ~D~%" (-> obj length)) + (format #t "~Tdata[~D]: @ #x~X~%" (-> obj length) (-> obj data)) + (dotimes (s5-0 (-> obj length)) + (format #t "~T [~D] ~A~%" s5-0 (-> obj data s5-0)) + ) + obj + ) + +(defmethod asize-of drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie)) + "Compute the size in memory of an array of instances." + (the-as int (+ (-> drawable-inline-array-instance-tie size) + (* (+ (-> obj length) -1) 64) ;; 64 bytes / instance, minus the 1 in the type. + ) + ) + ) + +#| +;; for some reason, this showed up twice. +(defmethod login drawable-tree-instance-tie ((obj drawable-tree-instance-tie)) +obj +) +|# + +(defmethod login drawable-tree-instance-tie ((obj drawable-tree-instance-tie)) + "Login method for the tie instance tree." + ;; just log in all of the drawables. + (dotimes (s5-0 (-> obj length)) + (login (-> obj data s5-0)) + ) + (the-as drawable-tree-instance-tie #f) + ) + +(defmethod inspect prototype-tie ((obj prototype-tie)) + "Inspect the inline-array of tie" + (format #t "[~8x] ~A~%" obj (-> obj type)) + (format #t "~Tlength: ~D~%" (-> obj length)) + (format #t "~Tdata[~D]: @ #x~X~%" (-> obj length) (-> obj data)) + ;; print each fragment + (dotimes (s5-0 (-> obj length)) + (format #t "~T [~D] ~A~%" s5-0 (-> obj data s5-0)) + ) + obj + ) + +(defmethod login prototype-tie ((obj prototype-tie)) + "Login each tie-fragment." + (dotimes (s5-0 (-> obj length)) + (login (-> obj data s5-0)) + ) + obj + ) + +(defmethod mem-usage drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 memory-usage-block) (arg1 int)) + "Compute memory usage for a drawable tree of TIE instances" + (set! (-> arg0 length) (max 1 (-> arg0 length))) + (set! (-> arg0 data 0 name) (symbol->string 'drawable-group)) + (+! (-> arg0 data 0 count) 1) + (let ((v1-7 32)) + (+! (-> arg0 data 0 used) v1-7) + (+! (-> arg0 data 0 total) (logand -16 (+ v1-7 15))) + ) + + ;; do our drawables + (dotimes (s3-0 (-> obj length)) + (mem-usage (-> obj data s3-0) arg0 arg1) + ) + + ;; do our prototypes, but with a flag set! + (mem-usage (-> obj prototypes prototype-array-tie) arg0 (logior arg1 1)) + obj + ) + +(defmethod mem-usage tie-fragment ((obj tie-fragment) (arg0 memory-usage-block) (arg1 int)) + "Compute the memory usage of a TIE prototype." + (when (logtest? arg1 2) + ;; count for an instance of this prototype. + (let ((v1-3 (* (-> obj color-count) 4)) + ;; pick one of the instance color categories. + (a0-2 (cond + ((logtest? arg1 4) + 20 + ) + ((logtest? arg1 8) + 21 + ) + (else + 22 + ) + ) + ) + ) + (+! (-> arg0 data a0-2 count) 1) + (+! (-> arg0 data a0-2 used) v1-3) + (+! (-> arg0 data a0-2 total) (logand -4 (+ v1-3 3))) + ) + (set! (-> arg0 length) (max 23 (-> arg0 length))) + (set! obj obj) + (goto cfg-13) + ) + + ;; not an instance, count the memory of the prototype. + (set! (-> arg0 length) (max 18 (-> arg0 length))) + (set! (-> arg0 data 9 name) "tie-fragment") + (set! (-> arg0 data 10 name) "tie-gif") + (set! (-> arg0 data 11 name) "tie-points") + (set! (-> arg0 data 12 name) "tie-colors") + (set! (-> arg0 data 14 name) "tie-debug") + (set! (-> arg0 data 13 name) "tie-draw-points") + (set! (-> arg0 data 17 name) "tie-generic") + (+! (-> arg0 data 9 count) 1) + (let ((v1-21 (asize-of obj))) + (+! (-> arg0 data 9 used) v1-21) + (+! (-> arg0 data 9 total) (logand -16 (+ v1-21 15))) + ) + (let ((v1-26 (* (-> obj gif-count) 16))) + (+! (-> arg0 data 10 count) (-> obj tex-count)) + (+! (-> arg0 data 10 used) v1-26) + (+! (-> arg0 data 10 total) (logand -16 (+ v1-26 15))) + ) + (let ((v1-31 (* (-> obj vertex-count) 16))) + (+! (-> arg0 data 11 count) (-> obj vertex-count)) + (+! (-> arg0 data 11 used) v1-31) + (+! (-> arg0 data 11 total) (logand -16 (+ v1-31 15))) + ) + (let ((v1-36 (* (-> obj dp-qwc) 16))) + (+! (-> arg0 data 13 count) (* (-> obj dp-qwc) 16)) + (+! (-> arg0 data 13 used) v1-36) + (+! (-> arg0 data 13 total) (logand -16 (+ v1-36 15))) + ) + (let ((v1-41 (* (-> obj generic-count) 16))) + (+! (-> arg0 data 17 count) 1) + (+! (-> arg0 data 17 used) v1-41) + (+! (-> arg0 data 17 total) (logand -16 (+ v1-41 15))) + ) + (when (nonzero? (-> obj debug-lines)) + (dotimes (s4-0 (-> obj debug-lines length)) + (+! + (-> arg0 data 14 count) + (-> (the-as (pointer int32) (-> obj debug-lines s4-0)) 0) + ) + (let ((v1-52 (asize-of (the-as basic (-> obj debug-lines s4-0))))) + (+! (-> arg0 data 12 used) v1-52) + (+! (-> arg0 data 12 total) (logand -16 (+ v1-52 15))) + ) + ) + ) + (label cfg-13) + obj + ) + +(defmethod mem-usage instance-tie ((obj instance-tie) (arg0 memory-usage-block) (arg1 int)) + "Compute the memory usage of TIE instance." + (set! (-> arg0 length) (max 19 (-> arg0 length))) + (set! (-> arg0 data 18 name) "instance-tie") + (+! (-> arg0 data 18 count) 1) + (let ((v1-6 (asize-of obj))) + (+! (-> arg0 data 18 used) v1-6) + (+! (-> arg0 data 18 total) (logand -16 (+ v1-6 15))) + ) + (when (nonzero? (-> obj error)) + (set! (-> arg0 length) (max 24 (-> arg0 length))) + (set! (-> arg0 data 23 name) "instance-tie-colors*") + (set! (-> arg0 data 19 name) "instance-tie-colors0") + (set! (-> arg0 data 20 name) "instance-tie-colors1") + (set! (-> arg0 data 21 name) "instance-tie-colors2") + (set! (-> arg0 data 22 name) "instance-tie-colors3") + (+! (-> arg0 data 23 count) 1) + (let ((s3-0 (-> obj bucket-ptr))) + ;; unused + (+ (-> arg0 data 19 used) (-> arg0 data 20 used) (-> arg0 data 21 used) (-> arg0 data 22 used)) + + ;; loop over all 4 possible geometries + (dotimes (s2-0 4) + (let ((a0-10 (-> s3-0 geometry-override s2-0))) + (when (nonzero? a0-10) ;; only if we actually have it. + (mem-usage a0-10 arg0 + (logior (logior (cond ;; based on which geom we are, pick the right color bucket. + ((= s2-0 1) 4) + ((= s2-0 2) 8) + ((= s2-0 3) 16) + (else 0) + ) + 2 ;; set so we count it as an instance of a prototype. + ) + arg1 + ) + ) + ) + ) + ) + ) + ) + obj + ) + +(defmethod mem-usage drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie) (arg0 memory-usage-block) (arg1 int)) + "Compute the memory usage of an entire array of instances" + (set! (-> arg0 length) (max 1 (-> arg0 length))) + (set! (-> arg0 data 0 name) (symbol->string 'drawable-group)) + (+! (-> arg0 data 0 count) 1) + (let ((v1-7 32)) + (+! (-> arg0 data 0 used) v1-7) + (+! (-> arg0 data 0 total) (logand -16 (+ v1-7 15))) + ) + ;; just call mem-usage on every element. + (dotimes (s3-0 (-> obj length)) + (mem-usage (-> obj data s3-0) arg0 arg1) + ) + obj + ) + +(defmethod mem-usage prototype-tie ((obj prototype-tie) (arg0 memory-usage-block) (arg1 int)) + "Compute the memory usage of an entire array of prototypes." + (set! (-> arg0 length) (max 1 (-> arg0 length))) + (set! (-> arg0 data 0 name) (symbol->string 'drawable-group)) + (+! (-> arg0 data 0 count) 1) + (let ((v1-7 32)) + (+! (-> arg0 data 0 used) v1-7) + (+! (-> arg0 data 0 total) (logand -16 (+ v1-7 15))) + ) + ;; just call mem-usage on each. + (dotimes (s3-0 (-> obj length)) + (mem-usage (-> obj data s3-0) arg0 arg1) + ) + obj + ) + +(defmethod asize-of prototype-tie ((obj prototype-tie)) + "Compute the size in memory of a prototype array" + ;; 64 bytes/fragment, minus 1 in the type. + (the-as int (+ (-> prototype-tie size) (* (+ (-> obj length) -1) 64))) + ) + +;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TIE Renderer +;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; these constants are uploaded to the VU once per frame. +(deftype tie-consts (structure) + ((data uint32 24 :offset-assert 0) + (vector vector 6 :inline :offset 0) + (quads uint128 6 :offset 0) + (adgif gs-gif-tag :inline :offset 0) ;; was qword + (strgif gs-gif-tag :inline :offset 16) ;; was qword + (extra vector :inline :offset 32) ;; was qword + (gifbufs vector :inline :offset 48) ;; was qword + (clrbufs qword :inline :offset 64) + (misc qword :inline :offset 80) + (atestgif gs-gif-tag :inline :offset 96) + (atest ad-cmd 2 :inline :offset 112) + (atest-tra ad-cmd :inline :offset 112) + (atest-def ad-cmd :inline :offset 128) + ) + :method-count-assert 9 + :size-assert #x90 + :flag-assert #x900000090 + ) +;; definition for symbol tie-vu1-block, type vu-function +(define tie-vu1-block (new 'static 'vu-function :length 0 :qlength 0)) ;; was 0x3e1, 0x1f1 + +;; definition for function tie-init-consts +;; INFO: Return type mismatch int vs none. +(defun tie-init-consts ((arg0 tie-consts) (arg1 int)) + "Initialize TIE constants. arg1 enables alpha blending" + + ;; set the adgif shader tag (just 5x a+d's) + (set! (-> arg0 adgif tag) (new 'static 'gif-tag64 :nloop #x5 :nreg #x1)) + (set! (-> arg0 adgif regs) (new 'static 'gif-tag-regs :regs0 (gif-reg-id a+d))) + + ;; based on the menu drawing mode, create the template tag for geometry. + (cond + ((zero? *subdivide-draw-mode*) ;; normal textured + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 3) ;; "hack". same as normal. + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 1) ;; outline (wireframe). just switch to line-strip + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type line-strip) :iip #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 2) ;; gouraud - turn off tme. + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :fge #x1 :abe arg1) + ) + ) + ) + ) + + ;; the main drawing tag: st, rgbaq, xyzf per vertex (same in all draws modes) + (set! (-> arg0 strgif regs) + (new 'static 'gif-tag-regs :regs0 (gif-reg-id st) :regs1 (gif-reg-id rgbaq) :regs2 (gif-reg-id xyzf2)) + ) + + ;; some magic constants + (let ((f1-0 8388894.0) + (f2-0 8389078.0) + (f0-0 8389262.0) + ) + (set! (-> arg0 gifbufs x) f0-0) + (set! (-> arg0 gifbufs y) f2-0) + (set! (-> arg0 gifbufs z) f0-0) + (set! (-> arg0 gifbufs w) f2-0) + (set! (-> arg0 extra x) (+ f1-0 f2-0 f0-0)) + (set! (-> arg0 extra y) 0.0) + (set! (-> arg0 extra z) (+ f1-0 f2-0 f0-0)) + ) + (set! (-> arg0 clrbufs vector4w x) 198) + (set! (-> arg0 clrbufs vector4w y) 242) + (set! (-> arg0 clrbufs vector4w z) 198) + (set! (-> arg0 clrbufs vector4w w) 242) + + ;; looks like tie can toggle on and off alpha testing during the draw + ;; tra = transparent, def = default? + (set! (-> arg0 atestgif tag) (new 'static 'gif-tag64 :nloop #x1 :eop #x1 :nreg #x1)) + (set! (-> arg0 atestgif regs) (new 'static 'gif-tag-regs :regs0 (gif-reg-id a+d))) + (set! (-> arg0 atest-tra cmd) (gs-reg test-1)) + (set! (-> arg0 atest-tra data) + (the uint + (new 'static 'gs-test + :ate #x1 + :atst (gs-atest greater-equal) + :aref #x26 + :zte #x1 + :ztst (gs-ztest greater-equal) + ) + ) + ) + (set! (-> arg0 atest-def cmd) (gs-reg test-1)) + (set! (-> arg0 atest-def data) + (the uint (new 'static 'gs-test :atst (gs-atest not-equal) :zte #x1 :ztst (gs-ztest greater-equal))) + ) + + ;; more magic constants + (set! (-> arg0 misc vector4w x) 0) + (set! (-> arg0 misc vector4w y) -1) + (none) + ) + +(defun tie-init-engine ((arg0 dma-buffer) (arg1 gs-test) (arg2 int)) + "Set up tie initialization DMA in the given buffer. arg2 picks abe." + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie)) + ;; add the tie code + (dma-buffer-add-vu-function arg0 tie-vu1-block 1) + + ;; set up the given gs-test register + (let* ((v1-3 arg0) + (a0-2 (the-as object (-> v1-3 base))) + ) + (set! (-> (the-as dma-packet a0-2) dma) (new 'static 'dma-tag :qwc #x2 :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a0-2) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-2) vif1) (new 'static 'vif-tag :imm #x2 :cmd (vif-cmd direct) :msk #x1)) + (set! (-> v1-3 base) (&+ (the-as pointer a0-2) 16)) + ) + (let* ((v1-4 arg0) + (a0-4 (the-as object (-> v1-4 base))) + ) + (set! (-> (the-as gs-gif-tag a0-4) tag) (new 'static 'gif-tag64 :nloop #x1 :eop #x1 :nreg #x1)) + (set! (-> (the-as gs-gif-tag a0-4) regs) + (new 'static 'gif-tag-regs + :regs0 (gif-reg-id a+d) + :regs1 (gif-reg-id a+d) + :regs2 (gif-reg-id a+d) + :regs3 (gif-reg-id a+d) + :regs4 (gif-reg-id a+d) + :regs5 (gif-reg-id a+d) + :regs6 (gif-reg-id a+d) + :regs7 (gif-reg-id a+d) + :regs8 (gif-reg-id a+d) + :regs9 (gif-reg-id a+d) + :regs10 (gif-reg-id a+d) + :regs11 (gif-reg-id a+d) + :regs12 (gif-reg-id a+d) + :regs13 (gif-reg-id a+d) + :regs14 (gif-reg-id a+d) + :regs15 (gif-reg-id a+d) + ) + ) + (set! (-> v1-4 base) (the-as pointer (&+ (the-as gs-gif-tag a0-4) 16))) + ) + (let* ((v1-5 arg0) + (a0-6 (-> v1-5 base)) + ) + (set! (-> (the-as (pointer gs-test) a0-6) 0) arg1) + (set! (-> (the-as (pointer gs-reg64) a0-6) 1) (gs-reg64 test-1)) + (set! (-> v1-5 base) (&+ a0-6 16)) + ) + + ;; set up the tie constants + (let ((s4-1 9)) + (let* ((v1-6 arg0) + (a0-8 (the-as object (-> v1-6 base))) + ) + (set! (-> (the-as dma-packet a0-8) dma) (new 'static 'dma-tag :id (dma-tag-id cnt) :qwc s4-1)) + (set! (-> (the-as dma-packet a0-8) vif0) (new 'static 'vif-tag :cmd (vif-cmd stmod))) + (set! (-> (the-as dma-packet a0-8) vif1) + (new 'static 'vif-tag :imm #x3c6 :cmd (vif-cmd unpack-v4-32) :num s4-1) + ) + (set! (-> v1-6 base) (&+ (the-as pointer a0-8) 16)) + ) + (tie-init-consts (the-as tie-consts (-> arg0 base)) arg2) + (&+! (-> arg0 base) (* s4-1 16)) + ) + + ;; initialize the microprogram + (let* ((v1-9 arg0) + (a0-12 (the-as object (-> v1-9 base))) + ) + (set! (-> (the-as dma-packet a0-12) dma) (new 'static 'dma-tag :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a0-12) vif0) (new 'static 'vif-tag :imm #x8 :cmd (vif-cmd mscalf) :msk #x1)) + (set! (-> (the-as dma-packet a0-12) vif1) (new 'static 'vif-tag :cmd (vif-cmd flusha) :msk #x1)) + (set! (-> v1-9 base) (&+ (the-as pointer a0-12) 16)) + ) + + ;; initialize the VIF's ROW register + (let* ((v1-10 arg0) + (a0-14 (the-as object (-> v1-10 base))) + ) + (set! (-> (the-as dma-packet a0-14) dma) (new 'static 'dma-tag :qwc #x2 :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a0-14) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-14) vif1) (new 'static 'vif-tag :cmd (vif-cmd strow) :msk #x1)) + (set! (-> v1-10 base) (&+ (the-as pointer a0-14) 16)) + ) + (let ((v1-11 (the-as object (-> arg0 base)))) + ;; row contants + (set! (-> (the-as (inline-array vector4w) v1-11) 0 x) #x4b000000) + (set! (-> (the-as (inline-array vector4w) v1-11) 0 y) #x4b000000) + (set! (-> (the-as (inline-array vector4w) v1-11) 0 z) #x4b000000) + (set! (-> (the-as (inline-array vector4w) v1-11) 0 w) #x4b000000) + ;; setup VIF unpack and double buffering modes. + (set! (-> (the-as (pointer vif-tag) v1-11) 4) (new 'static 'vif-tag :cmd (vif-cmd base))) + (set! (-> (the-as (pointer vif-tag) v1-11) 5) (new 'static 'vif-tag :imm #x2c :cmd (vif-cmd offset))) + (set! (-> (the-as (pointer vif-tag) v1-11) 6) (new 'static 'vif-tag :cmd (vif-cmd stmod))) + (set! (-> (the-as (pointer vif-tag) v1-11) 7) (new 'static 'vif-tag :imm #x404 :cmd (vif-cmd stcycl))) + (set! (-> arg0 base) (&+ (the-as pointer v1-11) 32)) + ) + 0 + ) + 0 + (none) + ) + +(defun tie-end-buffer ((arg0 dma-buffer)) + "Add to dma buffer after drawing. This resets things to the usual state." + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie)) + (let* ((v1-3 arg0) + (a1-0 (the-as object (-> v1-3 base))) + ) + (set! (-> (the-as dma-packet a1-0) dma) (new 'static 'dma-tag :qwc #x2 :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a1-0) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a1-0) vif1) (new 'static 'vif-tag :imm #x2 :cmd (vif-cmd direct) :msk #x1)) + (set! (-> v1-3 base) (&+ (the-as pointer a1-0) 16)) + ) + ;; restore the test register + (let* ((v1-4 arg0) + (a1-2 (the-as object (-> v1-4 base))) + ) + (set! (-> (the-as gs-gif-tag a1-2) tag) (new 'static 'gif-tag64 :nloop #x1 :eop #x1 :nreg #x1)) + (set! (-> (the-as gs-gif-tag a1-2) regs) + (new 'static 'gif-tag-regs + :regs0 (gif-reg-id a+d) + :regs1 (gif-reg-id a+d) + :regs2 (gif-reg-id a+d) + :regs3 (gif-reg-id a+d) + :regs4 (gif-reg-id a+d) + :regs5 (gif-reg-id a+d) + :regs6 (gif-reg-id a+d) + :regs7 (gif-reg-id a+d) + :regs8 (gif-reg-id a+d) + :regs9 (gif-reg-id a+d) + :regs10 (gif-reg-id a+d) + :regs11 (gif-reg-id a+d) + :regs12 (gif-reg-id a+d) + :regs13 (gif-reg-id a+d) + :regs14 (gif-reg-id a+d) + :regs15 (gif-reg-id a+d) + ) + ) + (set! (-> v1-4 base) (&+ (the-as pointer a1-2) 16)) + ) + (let* ((v1-5 arg0) + (a1-4 (-> v1-5 base)) + ) + (set! (-> (the-as (pointer gs-test) a1-4) 0) + (new 'static 'gs-test :atst (gs-atest not-equal) :zte #x1 :ztst (gs-ztest greater-equal)) + ) + (set! (-> (the-as (pointer gs-reg64) a1-4) 1) (gs-reg64 test-1)) + (set! (-> v1-5 base) (&+ a1-4 16)) + ) + + ;; restore the stmask register + (let* ((v1-6 arg0) + (a1-6 (the-as object (-> v1-6 base))) + ) + (set! (-> (the-as dma-packet a1-6) dma) (new 'static 'dma-tag :qwc #x2 :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a1-6) vif0) (new 'static 'vif-tag :cmd (vif-cmd stmask))) + (set! (-> (the-as dma-packet a1-6) vif1) (new 'static 'vif-tag)) + (set! (-> v1-6 base) (&+ (the-as pointer a1-6) 16)) + ) + + ;; this calls a TIE program that... does nothing? + (let* ((v1-7 arg0) + (a0-1 (-> v1-7 base)) + ) + ;; run the nothing program + (set! (-> (the-as (pointer vif-tag) a0-1) 0) (new 'static 'vif-tag :imm #x4 :cmd (vif-cmd mscalf) :msk #x1)) + ;; reset VIF stuff we changed + (set! (-> (the-as (pointer vif-tag) a0-1) 1) (new 'static 'vif-tag :cmd (vif-cmd stmod))) + (set! (-> (the-as (pointer vif-tag) a0-1) 2) (new 'static 'vif-tag :cmd (vif-cmd flusha) :msk #x1)) + (set! (-> (the-as (pointer vif-tag) a0-1) 3) (new 'static 'vif-tag :cmd (vif-cmd strow) :msk #x1)) + ;; actually row constants = 0 + (set! (-> (the-as (pointer vif-tag) a0-1) 4) (new 'static 'vif-tag)) + (set! (-> (the-as (pointer vif-tag) a0-1) 5) (new 'static 'vif-tag)) + (set! (-> (the-as (pointer vif-tag) a0-1) 6) (new 'static 'vif-tag)) + (set! (-> (the-as (pointer vif-tag) a0-1) 7) (new 'static 'vif-tag)) + (set! (-> v1-7 base) (&+ a0-1 32)) + ) + 0 + ) + 0 + (none) + ) + +;;;;;;;;;;;;;;;; +;; debug print +;;;;;;;;;;;;;;;; + +(defun-debug tie-int-reg ((arg0 int)) + "Convert a VU1 int register ID to the name given to it in the TIE VU1 program" + (let ((v1-0 arg0)) + (cond + ((zero? v1-0) + "zero" + ) + ((= v1-0 1) + "itemp" + ) + ((= v1-0 2) + "point-ptr" + ) + ((= v1-0 3) + "clr-ptr" + ) + ((= v1-0 4) + "target-bp1-ptr" + ) + ((= v1-0 5) + "skip-bp2" + ) + ((= v1-0 6) + "target-bp2-ptr" + ) + ((= v1-0 7) + "target-ip1-ptr" + ) + ((= v1-0 8) + "target-ip2-ptr" + ) + ((= v1-0 9) + "ind/ind0" + ) + ((= v1-0 10) + " ind1" + ) + ((= v1-0 11) + " ind2" + ) + ((= v1-0 12) + "dest-ptr" + ) + ((= v1-0 13) + "dest2-ptr" + ) + ((= v1-0 14) + "skip-ips" + ) + ((= v1-0 15) + "kick-addr" + ) + ) + ) + ) + +(defun-debug tie-float-reg ((arg0 int)) + "Convert a VU1 float register ID to the name given to it in the TIE VU1 program" + (let ((v1-0 arg0)) + (cond + ((zero? v1-0) + "zero" + ) + ((= v1-0 1) + "t-mtx0" + ) + ((= v1-0 2) + "t-mtx1" + ) + ((= v1-0 3) + "t-mtx2" + ) + ((= v1-0 4) + "t-mtx3" + ) + ((= v1-0 5) + "vtx-0" + ) + ((= v1-0 6) + "vtx-1" + ) + ((= v1-0 7) + "vtx-2" + ) + ((= v1-0 8) + "vtx-3" + ) + ((= v1-0 9) + "pos-0/2" + ) + ((= v1-0 10) + "pos-1/3" + ) + ((= v1-0 11) + "clr-0" + ) + ((= v1-0 12) + "clr-1" + ) + ((= v1-0 13) + "clr-2" + ) + ((= v1-0 14) + "clr-3" + ) + ((= v1-0 15) + "tex-0" + ) + ((= v1-0 16) + "tex-1" + ) + ((= v1-0 17) + "tex-2" + ) + ((= v1-0 18) + "tex-3" + ) + ((= v1-0 19) + "res-0/2" + ) + ((= v1-0 20) + "res-1/3" + ) + ((= v1-0 21) + "gifbuf" + ) + ((= v1-0 22) + "clrbuf" + ) + ((= v1-0 23) + "extra" + ) + ((= v1-0 24) + "inds" + ) + ((= v1-0 25) + "--" + ) + ((= v1-0 26) + "--" + ) + ((= v1-0 27) + "morph" + ) + ((= v1-0 28) + "xyzofs" + ) + ((= v1-0 29) + "clr1" + ) + ((= v1-0 30) + "clr2" + ) + ((= v1-0 31) + "--" + ) + ) + ) + ) + + +;; NOTE: these dump programs rely on a not-present function that stashes the register values +;; at the top of the VU memory. + +(defun-debug tie-ints () + "Dump the VU1 integer registers to stdout." + (local-vars (sv-16 uint)) + (let ((gp-0 (the-as (pointer uint32) (+ #x3fa0 #x1100c000)))) + (dotimes (s5-0 16) + (if (< s5-0 10) + (format 0 " ") + ) + (let ((s4-0 format) + (s3-0 0) + (s2-0 "vi~d: ~6d #x~4,'0X ~s~%") + (s1-0 s5-0) + (s0-0 (-> gp-0 (* s5-0 4))) + ) + (set! sv-16 (-> gp-0 (* s5-0 4))) + (let ((t1-0 (tie-int-reg s5-0))) + (s4-0 s3-0 s2-0 s1-0 s0-0 sv-16 t1-0) + ) + ) + ) + ) + (none) + ) + +(defun-debug tie-floats () + "Dump the VU1 float registers to stdout." + (local-vars (sv-16 uint) (sv-32 uint)) + (let ((gp-0 (the-as (pointer uint32) (+ #x3da0 #x1100c000)))) + (dotimes (s5-0 32) + (if (< s5-0 10) + (format 0 " ") + ) + (format + 0 + "vf~d: #x~8,'0X #x~8,'0X #x~8,'0X #x~8,'0X " + s5-0 + (-> gp-0 (* s5-0 4)) + (-> gp-0 (+ (* s5-0 4) 1)) + (-> gp-0 (+ (* s5-0 4) 2)) + (-> gp-0 (+ (* s5-0 4) 3)) + ) + (let ((s4-0 format) + (s3-0 0) + (s2-0 "~F ~F ~F ~F ~s~%") + (s1-0 (-> gp-0 (* s5-0 4))) + (s0-0 (-> gp-0 (+ (* s5-0 4) 1))) + ) + (set! sv-16 (-> gp-0 (+ (* s5-0 4) 2))) + (set! sv-32 (-> gp-0 (+ (* s5-0 4) 3))) + (let ((t2-1 (tie-float-reg s5-0))) + (s4-0 s3-0 s2-0 s1-0 s0-0 sv-16 sv-32 t2-1) + ) + ) + ) + ) + (none) + ) + + diff --git a/goal_src/game.gp b/goal_src/game.gp index 355c4875a3..a53b618002 100644 --- a/goal_src/game.gp +++ b/goal_src/game.gp @@ -76,10 +76,12 @@ ) (defun tpage-name (id) + "Get the name of the tpage obj file with the given id" (fmt #f "tpage-{}.go" id) ) (defmacro copy-texture (tpage-id) + "Copy a texture from the game, using the given tpage ID" (let* ((folder (get-environment-variable "OPENGOAL_DECOMP_DIR" :default "")) (path (string-append "decompiler_out/" folder "raw_obj/" (tpage-name tpage-id)))) `(defstep :in ,path @@ -105,6 +107,18 @@ ) ) +(defmacro copy-strs (&rest strs) + `(begin ,@(apply (lambda (x) `(copy-str ,x)) strs))) + +(defmacro copy-str (name) + (let* ((folder (get-environment-variable "OPENGOAL_DECOMP_DIR" :default "")) + (path (string-append "iso_data/" folder "/STR/" name ".STR"))) + `(defstep :in ,path + :tool 'copy + :out '(,(string-append "out/iso/" name ".STR"))))) + + + (defmacro group (name &rest stuff) `(defstep :in "" :tool 'group @@ -166,6 +180,13 @@ (copy-textures 463 2 880 256 1278 1032 62 1532) +;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Streaming anim (common) +;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(copy-strs "FUCV3" + "FUCV4") + ;;;;;;;;;;;;;;;;;;;;; ;; Art (Common) @@ -220,6 +241,8 @@ "out/iso/BEA.DGO" "out/iso/CIT.DGO" "out/iso/SUN.DGO" + "out/iso/FUCV3.STR" + "out/iso/FUCV4.STR" ) diff --git a/goalc/CMakeLists.txt b/goalc/CMakeLists.txt index b31834dfa8..d2cc8a4859 100644 --- a/goalc/CMakeLists.txt +++ b/goalc/CMakeLists.txt @@ -59,4 +59,3 @@ endif () add_executable(goalc main.cpp) target_link_libraries(goalc common Zydis compiler) -install(TARGETS goalc) diff --git a/test/decompiler/reference/engine/game/powerups_REF.gc b/test/decompiler/reference/engine/game/powerups_REF.gc index cb0596c028..280a57971f 100644 --- a/test/decompiler/reference/engine/game/powerups_REF.gc +++ b/test/decompiler/reference/engine/game/powerups_REF.gc @@ -776,7 +776,7 @@ (when (and (< 0.0 (-> self fact-info-target eco-level)) (zero? (logand (-> self state-flags) 512)) - (zero? (logand (-> self draw status) 6)) + (zero? (logand (-> self draw status) (draw-status drwf01 drwf02))) (not (movie?)) (rand-vu-percent? (lerp-scale @@ -968,7 +968,3 @@ 0 (none) ) - - - - diff --git a/test/decompiler/reference/engine/gfx/generic/generic-h_REF.gc b/test/decompiler/reference/engine/gfx/generic/generic-h_REF.gc index a313596956..03f2252d38 100644 --- a/test/decompiler/reference/engine/gfx/generic/generic-h_REF.gc +++ b/test/decompiler/reference/engine/gfx/generic/generic-h_REF.gc @@ -355,7 +355,7 @@ (quad uint128 :offset 0) (data uint64 :offset 0) (cmds uint64 :offset 8) - (cmd uint8 :offset 8) + (cmd gs-reg :offset 8) (x uint32 :offset 0) (y uint32 :offset 4) (z uint32 :offset 8) diff --git a/test/decompiler/reference/engine/gfx/tie/prototype-h_REF.gc b/test/decompiler/reference/engine/gfx/tie/prototype-h_REF.gc index e0a75189e1..94cde73c04 100644 --- a/test/decompiler/reference/engine/gfx/tie/prototype-h_REF.gc +++ b/test/decompiler/reference/engine/gfx/tie/prototype-h_REF.gc @@ -149,6 +149,7 @@ (color-index-qwc uint32 :dynamic :offset-assert 148) (generic-next-clear uint128 :offset 96) (generic-count-clear uint128 :offset 80) + (geometry-override prototype-tie 4 :offset 16) ) :method-count-assert 9 :size-assert #x94 @@ -163,7 +164,7 @@ (format #t "~Tflags: ~D~%" (-> obj flags)) (format #t "~Tin-level: ~D~%" (-> obj in-level)) (format #t "~Tutextures: ~D~%" (-> obj utextures)) - (format #t "~Tgeometry[4] @ #x~X~%" (-> obj geometry)) + (format #t "~Tgeometry[4] @ #x~X~%" (-> obj geometry-override)) (format #t "~Tdists: #~%" (-> obj dists)) (format #t "~Trdists: #~%" (-> obj rdists)) (format #t "~Tnext[4] @ #x~X~%" (-> obj next)) @@ -203,7 +204,7 @@ :size-assert #x10 :flag-assert #xa00000010 (:methods - (TODO-RENAME-9 (_type_) none 9) + (login (_type_) none 9) ) ) diff --git a/test/decompiler/reference/engine/gfx/tie/prototype_REF.gc b/test/decompiler/reference/engine/gfx/tie/prototype_REF.gc index 52206db936..fa68682de7 100644 --- a/test/decompiler/reference/engine/gfx/tie/prototype_REF.gc +++ b/test/decompiler/reference/engine/gfx/tie/prototype_REF.gc @@ -3,11 +3,11 @@ ;; definition for method 9 of type prototype-array-tie ;; INFO: Return type mismatch prototype-array-tie vs none. -(defmethod TODO-RENAME-9 prototype-array-tie ((obj prototype-array-tie)) +(defmethod login prototype-array-tie ((obj prototype-array-tie)) (dotimes (s5-0 (-> obj length)) (let ((s4-0 (-> obj array-data s5-0))) (dotimes (s3-0 4) - (let ((a0-1 (-> s4-0 geometry s3-0))) + (let ((a0-1 (-> s4-0 geometry-override s3-0))) (if (nonzero? a0-1) (login a0-1) ) @@ -65,7 +65,7 @@ ;; definition for method 8 of type prototype-bucket-tie (defmethod mem-usage prototype-bucket-tie ((obj prototype-bucket-tie) (arg0 memory-usage-block) (arg1 int)) (dotimes (s3-0 4) - (let ((a0-1 (-> obj geometry s3-0))) + (let ((a0-1 (-> obj geometry-override s3-0))) (if (nonzero? a0-1) (mem-usage a0-1 arg0 (logior arg1 1)) ) diff --git a/test/decompiler/reference/engine/gfx/tie/tie-h_REF.gc b/test/decompiler/reference/engine/gfx/tie/tie-h_REF.gc index 403ee6c251..4a21012aa4 100644 --- a/test/decompiler/reference/engine/gfx/tie/tie-h_REF.gc +++ b/test/decompiler/reference/engine/gfx/tie/tie-h_REF.gc @@ -3,21 +3,21 @@ ;; definition of type tie-fragment (deftype tie-fragment (drawable) - ((gif-ref uint32 :offset 4) - (point-ref uint32 :offset 8) - (color-index uint16 :offset 12) - (base-colors uint8 :offset 14) - (tex-count uint16 :offset-assert 32) - (gif-count uint16 :offset-assert 34) - (vertex-count uint16 :offset-assert 36) - (color-count uint16 :offset-assert 38) - (num-tris uint16 :offset-assert 40) - (num-dverts uint16 :offset-assert 42) - (dp-ref uint32 :offset-assert 44) - (dp-qwc uint32 :offset-assert 48) - (generic-ref uint32 :offset-assert 52) - (generic-count uint32 :offset-assert 56) - (debug-lines basic :offset-assert 60) + ((gif-ref (inline-array adgif-shader) :offset 4) + (point-ref uint32 :offset 8) + (color-index uint16 :offset 12) + (base-colors uint8 :offset 14) + (tex-count uint16 :offset-assert 32) + (gif-count uint16 :offset-assert 34) + (vertex-count uint16 :offset-assert 36) + (color-count uint16 :offset-assert 38) + (num-tris uint16 :offset-assert 40) + (num-dverts uint16 :offset-assert 42) + (dp-ref uint32 :offset-assert 44) + (dp-qwc uint32 :offset-assert 48) + (generic-ref uint32 :offset-assert 52) + (generic-count uint32 :offset-assert 56) + (debug-lines (array vector-array) :offset-assert 60) ) :method-count-assert 18 :size-assert #x40 diff --git a/test/decompiler/reference/engine/gfx/tie/tie-methods_REF.gc b/test/decompiler/reference/engine/gfx/tie/tie-methods_REF.gc new file mode 100644 index 0000000000..1dafcb768b --- /dev/null +++ b/test/decompiler/reference/engine/gfx/tie/tie-methods_REF.gc @@ -0,0 +1,897 @@ +;;-*-Lisp-*- +(in-package goal) + +;; definition for function tie-init-buffers +;; INFO: Return type mismatch int vs none. +(defun tie-init-buffers ((arg0 dma-buffer)) + (let ((gp-0 (-> *display* frames (-> *display* on-screen) frame bucket-group 9))) + (when (!= gp-0 (-> gp-0 last)) + (let* ((s5-0 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s4-1 (-> s5-0 base)) + ) + (tie-init-engine + s5-0 + (new 'static 'gs-test :atst (gs-atest not-equal) :zte #x1 :ztst (gs-ztest greater-equal)) + 0 + ) + (let ((v1-8 (the-as object (-> s5-0 base)))) + (set! (-> (the-as dma-packet v1-8) dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (-> gp-0 next))) + (set! (-> (the-as dma-packet v1-8) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-8) vif1) (new 'static 'vif-tag)) + (set! (-> s5-0 base) (&+ (the-as pointer v1-8) 16)) + ) + (set! (-> gp-0 next) (the-as uint s4-1)) + ) + ) + ) + (let ((gp-1 (-> *display* frames (-> *display* on-screen) frame bucket-group 9))) + (when (!= gp-1 (-> gp-1 last)) + (let* ((s4-2 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s5-1 (-> s4-2 base)) + ) + (tie-end-buffer s4-2) + (let ((v1-19 (-> s4-2 base))) + (let ((a0-17 (the-as object (-> s4-2 base)))) + (set! (-> (the-as dma-packet a0-17) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet a0-17) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-17) vif1) (new 'static 'vif-tag)) + (set! (-> s4-2 base) (&+ (the-as pointer a0-17) 16)) + ) + (set! (-> (the-as (pointer uint32) (-> gp-1 last)) 1) (the-as uint s5-1)) + (set! (-> gp-1 last) (the-as (pointer dma-tag) v1-19)) + ) + ) + ) + ) + (let ((gp-2 (-> *display* frames (-> *display* on-screen) frame bucket-group 16))) + (when (!= gp-2 (-> gp-2 last)) + (let* ((s5-2 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s4-4 (-> s5-2 base)) + ) + (tie-init-engine + s5-2 + (new 'static 'gs-test :atst (gs-atest not-equal) :zte #x1 :ztst (gs-ztest greater-equal)) + 0 + ) + (let ((v1-28 (the-as object (-> s5-2 base)))) + (set! (-> (the-as dma-packet v1-28) dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (-> gp-2 next))) + (set! (-> (the-as dma-packet v1-28) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-28) vif1) (new 'static 'vif-tag)) + (set! (-> s5-2 base) (&+ (the-as pointer v1-28) 16)) + ) + (set! (-> gp-2 next) (the-as uint s4-4)) + ) + ) + ) + (let ((gp-3 (-> *display* frames (-> *display* on-screen) frame bucket-group 16))) + (when (!= gp-3 (-> gp-3 last)) + (let* ((s4-5 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s5-3 (-> s4-5 base)) + ) + (tie-end-buffer s4-5) + (let ((v1-39 (-> s4-5 base))) + (let ((a0-36 (the-as object (-> s4-5 base)))) + (set! (-> (the-as dma-packet a0-36) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet a0-36) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-36) vif1) (new 'static 'vif-tag)) + (set! (-> s4-5 base) (&+ (the-as pointer a0-36) 16)) + ) + (set! (-> (the-as (pointer uint32) (-> gp-3 last)) 1) (the-as uint s5-3)) + (set! (-> gp-3 last) (the-as (pointer dma-tag) v1-39)) + ) + ) + ) + ) + (let ((gp-4 (-> *display* frames (-> *display* on-screen) frame bucket-group 8))) + (when (!= gp-4 (-> gp-4 last)) + (let* ((s5-4 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s4-7 (-> s5-4 base)) + ) + (tie-near-init-engine + s5-4 + (new 'static 'gs-test + :ate #x1 + :atst (gs-atest greater-equal) + :aref #x26 + :zte #x1 + :ztst (gs-ztest greater-equal) + ) + 0 + ) + (let ((v1-48 (the-as object (-> s5-4 base)))) + (set! (-> (the-as dma-packet v1-48) dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (-> gp-4 next))) + (set! (-> (the-as dma-packet v1-48) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-48) vif1) (new 'static 'vif-tag)) + (set! (-> s5-4 base) (&+ (the-as pointer v1-48) 16)) + ) + (set! (-> gp-4 next) (the-as uint s4-7)) + ) + ) + ) + (let ((gp-5 (-> *display* frames (-> *display* on-screen) frame bucket-group 8))) + (when (!= gp-5 (-> gp-5 last)) + (let* ((s4-8 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s5-5 (-> s4-8 base)) + ) + (tie-near-end-buffer s4-8) + (let ((v1-59 (-> s4-8 base))) + (let ((a0-55 (the-as object (-> s4-8 base)))) + (set! (-> (the-as dma-packet a0-55) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet a0-55) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-55) vif1) (new 'static 'vif-tag)) + (set! (-> s4-8 base) (&+ (the-as pointer a0-55) 16)) + ) + (set! (-> (the-as (pointer uint32) (-> gp-5 last)) 1) (the-as uint s5-5)) + (set! (-> gp-5 last) (the-as (pointer dma-tag) v1-59)) + ) + ) + ) + ) + (let ((gp-6 (-> *display* frames (-> *display* on-screen) frame bucket-group 15))) + (when (!= gp-6 (-> gp-6 last)) + (let* ((s5-6 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s4-10 (-> s5-6 base)) + ) + (tie-near-init-engine + s5-6 + (new 'static 'gs-test + :ate #x1 + :atst (gs-atest greater-equal) + :aref #x26 + :zte #x1 + :ztst (gs-ztest greater-equal) + ) + 0 + ) + (let ((v1-68 (the-as object (-> s5-6 base)))) + (set! (-> (the-as dma-packet v1-68) dma) (new 'static 'dma-tag :id (dma-tag-id next) :addr (-> gp-6 next))) + (set! (-> (the-as dma-packet v1-68) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-68) vif1) (new 'static 'vif-tag)) + (set! (-> s5-6 base) (&+ (the-as pointer v1-68) 16)) + ) + (set! (-> gp-6 next) (the-as uint s4-10)) + ) + ) + ) + (let ((gp-7 (-> *display* frames (-> *display* on-screen) frame bucket-group 15))) + (when (!= gp-7 (-> gp-7 last)) + (let* ((s4-11 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s5-7 (-> s4-11 base)) + ) + (tie-near-end-buffer s4-11) + (let ((v1-79 (-> s4-11 base))) + (let ((a0-74 (the-as object (-> s4-11 base)))) + (set! (-> (the-as dma-packet a0-74) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet a0-74) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-74) vif1) (new 'static 'vif-tag)) + (set! (-> s4-11 base) (&+ (the-as pointer a0-74) 16)) + ) + (set! (-> (the-as (pointer uint32) (-> gp-7 last)) 1) (the-as uint s5-7)) + (set! (-> gp-7 last) (the-as (pointer dma-tag) v1-79)) + ) + ) + ) + ) + 0 + (none) + ) + +;; definition of type tie-instance-debug +(deftype tie-instance-debug (structure) + ((max-instance uint32 :offset-assert 0) + (min-instance uint32 :offset-assert 4) + ) + :method-count-assert 9 + :size-assert #x8 + :flag-assert #x900000008 + ) + +;; definition for method 3 of type tie-instance-debug +(defmethod inspect tie-instance-debug ((obj tie-instance-debug)) + (format #t "[~8x] ~A~%" obj 'tie-instance-debug) + (format #t "~Tmax-instance: ~D~%" (-> obj max-instance)) + (format #t "~Tmin-instance: ~D~%" (-> obj min-instance)) + obj + ) + +;; definition for symbol *tie*, type tie-instance-debug +(define *tie* (new 'global 'tie-instance-debug)) + +;; definition for function tie-debug-between +(defun tie-debug-between ((arg0 uint) (arg1 uint)) + (set! (-> *instance-tie-work* test-id) arg1) + (set! (-> *instance-tie-work* test-id2) arg0) + arg0 + ) + +;; definition for function tie-debug-one +(defun tie-debug-one ((arg0 uint) (arg1 uint)) + (set! (-> *instance-tie-work* test-id) (+ arg1 -1 arg0)) + (set! (-> *instance-tie-work* test-id2) arg0) + arg0 + ) + +;; definition for function walk-tie-generic-prototypes +;; INFO: Return type mismatch symbol vs none. +(defun walk-tie-generic-prototypes () + (none) + ) + +;; definition for symbol *pke-hack*, type vector +(define *pke-hack* (new 'global 'vector)) + +;; definition for function draw-inline-array-instance-tie +;; ERROR: function was not converted to expressions. Cannot decompile. + +;; definition for function draw-inline-array-prototype-tie-generic-asm +;; ERROR: function was not converted to expressions. Cannot decompile. + +;; definition for function draw-inline-array-prototype-tie-asm +;; ERROR: function was not converted to expressions. Cannot decompile. + +;; definition for function draw-inline-array-prototype-tie-near-asm +;; ERROR: function was not converted to expressions. Cannot decompile. + +;; definition for method 9 of type drawable-tree-instance-tie +;; INFO: this function exists in multiple non-identical object files +(defmethod login drawable-tree-instance-tie ((obj drawable-tree-instance-tie)) + (if (nonzero? (-> obj prototypes prototype-array-tie)) + (login (-> obj prototypes prototype-array-tie)) + ) + (dotimes (s5-0 (-> obj length)) + (login (-> obj data s5-0)) + ) + obj + ) + +;; definition for function draw-drawable-tree-instance-tie +;; INFO: Return type mismatch int vs none. +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtpc pcr0, r0] +;; WARN: Unsupported inline assembly instruction kind - [mtpc pcr1, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, a0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mfpc a0, pcr0] +;; WARN: Unsupported inline assembly instruction kind - [mfpc a0, pcr1] +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtpc pcr0, r0] +;; WARN: Unsupported inline assembly instruction kind - [mtpc pcr1, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, a0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mfpc a0, pcr0] +;; WARN: Unsupported inline assembly instruction kind - [mfpc a0, pcr1] +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtpc pcr0, r0] +;; WARN: Unsupported inline assembly instruction kind - [mtpc pcr1, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, a0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mfpc a0, pcr0] +;; WARN: Unsupported inline assembly instruction kind - [mfpc a0, pcr1] +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtpc pcr0, r0] +;; WARN: Unsupported inline assembly instruction kind - [mtpc pcr1, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, a0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mtc0 Perf, r0] +;; WARN: Unsupported inline assembly instruction kind - [sync.l] +;; WARN: Unsupported inline assembly instruction kind - [sync.p] +;; WARN: Unsupported inline assembly instruction kind - [mfpc a0, pcr0] +;; WARN: Unsupported inline assembly instruction kind - [mfpc a0, pcr1] +;; Used lq/sq +(defun draw-drawable-tree-instance-tie ((arg0 drawable-tree-instance-tie) (arg1 level)) + (local-vars + (r0-0 none) + (a0-31 int) + (a0-33 int) + (a0-46 int) + (a0-48 int) + (a0-62 int) + (a0-64 int) + (a0-82 int) + (a0-84 int) + (sv-16 int) + ) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie-near tie generic)) + (set! (-> *instance-tie-work* first-generic-prototype) (the-as uint 0)) + (set! (-> *instance-tie-work* wind-vectors) (-> arg0 prototypes wind-vectors)) + (let ((s4-0 (+ (-> arg0 length) -1))) + (when (nonzero? s4-0) + (dotimes (s3-0 s4-0) + (let* ((v1-10 (-> arg0 data s3-0)) + (a0-5 (-> arg0 data (+ s3-0 1))) + (a1-2 (/ (-> (the-as drawable-inline-array-node v1-10) data 0 id) 8)) + (a0-7 (/ (-> (the-as drawable-inline-array-node a0-5) data 0 id) 8)) + (a1-4 (+ a1-2 #x38b0 #x70000000)) + (a0-9 (+ a0-7 #x38b0 #x70000000)) + ) + (draw-node-cull + (the-as pointer a0-9) + (the-as pointer a1-4) + (-> (the-as drawable-inline-array-node v1-10) data) + (-> (the-as drawable-inline-array-node v1-10) length) + ) + ) + ) + ) + (let* ((v1-16 (-> arg0 data s4-0)) + (s4-1 (-> arg0 prototypes prototype-array-tie)) + (s5-1 (-> s4-1 length)) + ) + (dotimes (a0-11 s5-1) + (let ((a1-7 (-> s4-1 array-data a0-11))) + (set! (-> a1-7 next-clear) (the-as uint128 0)) + (set! (-> a1-7 generic-count-clear) (the-as uint128 0)) + (set! (-> a1-7 generic-next-clear) (the-as uint128 0)) + ) + 0 + ) + (let* ((s1-0 (-> (the-as drawable-inline-array-instance-tie v1-16) data)) + (s0-0 (&-> (the-as terrain-context #x70000000) work background vis-list (/ (-> s1-0 0 id) 8))) + (s3-1 (-> *display* frames (-> *display* on-screen) frame global-buf)) + ) + (set! sv-16 (-> (the-as drawable-inline-array-node v1-16) length)) + (when (nonzero? sv-16) + (let* ((v1-21 (logand (the-as int *gsf-buffer*) 8191)) + (v1-23 + (logand (the-as int (&- (logand (the-as int (&-> (-> s4-1 data) -512)) 8191) (the-as uint v1-21))) 8191) + ) + ) + (set! *instance-tie-work-copy* (the-as instance-tie-work (+ (the-as int *gsf-buffer*) v1-23))) + ) + (let ((s2-0 (-> *display* frames (-> *display* on-screen) frame global-buf base))) + (quad-copy! (the-as pointer *instance-tie-work-copy*) (the-as pointer *instance-tie-work*) 28) + (set! (-> *instance-tie-work-copy* wait-to-spr) (the-as uint 0)) + (set! (-> *instance-tie-work-copy* wait-from-spr) (the-as uint 0)) + (let* ((v1-32 (-> *perf-stats* data 9)) + (a0-28 (-> v1-32 ctrl)) + ) + (+! (-> v1-32 count) 1) + (b! (zero? a0-28) cfg-12 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mtpc pcr0 r0-0) + (.mtpc pcr1 r0-0) + (.sync.l) + (.sync.p) + (.mtc0 Perf a0-28) + ) + (.sync.l) + (.sync.p) + (label cfg-12) + 0 + (let ((t9-2 draw-inline-array-instance-tie) + (a3-1 s3-1) + ) + (t9-2 s0-0 (the-as drawable s1-0) sv-16 a3-1) + ) + (let ((v1-35 (-> *perf-stats* data 9))) + (b! (zero? (-> v1-35 ctrl)) cfg-14 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mfpc a0-31 pcr0) + (+! (-> v1-35 accum0) a0-31) + (.mfpc a0-33 pcr1) + (+! (-> v1-35 accum1) a0-33) + ) + (label cfg-14) + 0 + (update-wait-stats + (-> *perf-stats* data 9) + (the-as uint 0) + (-> *instance-tie-work-copy* wait-to-spr) + (-> *instance-tie-work-copy* wait-from-spr) + ) + (let ((v1-42 (-> *instance-tie-work-copy* min-dist quad))) + (set! (-> *instance-tie-work* min-dist quad) v1-42) + ) + (set! (-> *instance-tie-work* flags) (-> *instance-tie-work-copy* flags)) + (let ((a0-38 *dma-mem-usage*)) + (when (nonzero? a0-38) + (set! (-> a0-38 length) (max 10 (-> a0-38 length))) + (set! (-> a0-38 data 9 name) "tie-fragment") + (+! (-> a0-38 data 9 count) 1) + (+! + (-> a0-38 data 9 used) + (&- (-> *display* frames (-> *display* on-screen) frame global-buf base) (the-as uint s2-0)) + ) + (set! (-> a0-38 data 9 total) (-> a0-38 data 9 used)) + ) + ) + ) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask generic)) + (when (logtest? (-> *instance-tie-work* flags) 2) + (let ((s2-1 (-> *display* frames (-> *display* on-screen) frame global-buf base))) + (set! (-> *prototype-tie-work* generic-wait-to-spr) (the-as uint 0)) + (set! (-> *prototype-tie-work* generic-wait-from-spr) (the-as uint 0)) + (set! (-> *instance-tie-work* first-generic-prototype) (the-as uint (-> s3-1 base))) + (let* ((v1-60 (-> *perf-stats* data 10)) + (a0-43 (-> v1-60 ctrl)) + ) + (+! (-> v1-60 count) 1) + (b! (zero? a0-43) cfg-20 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mtpc pcr0 r0-0) + (.mtpc pcr1 r0-0) + (.sync.l) + (.sync.p) + (.mtc0 Perf a0-43) + ) + (.sync.l) + (.sync.p) + (label cfg-20) + 0 + (draw-inline-array-prototype-tie-generic-asm s3-1 s5-1 s4-1) + (let ((v1-63 (-> *perf-stats* data 10))) + (b! (zero? (-> v1-63 ctrl)) cfg-22 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mfpc a0-46 pcr0) + (+! (-> v1-63 accum0) a0-46) + (.mfpc a0-48 pcr1) + (+! (-> v1-63 accum1) a0-48) + ) + (label cfg-22) + 0 + (update-wait-stats + (-> *perf-stats* data 10) + (the-as uint 0) + (-> *prototype-tie-work* generic-wait-to-spr) + (-> *prototype-tie-work* generic-wait-from-spr) + ) + (let ((a0-51 *dma-mem-usage*)) + (when (nonzero? a0-51) + (set! (-> a0-51 length) (max 18 (-> a0-51 length))) + (set! (-> a0-51 data 17 name) "tie-generic") + (+! (-> a0-51 data 17 count) 1) + (+! + (-> a0-51 data 17 used) + (&- (-> *display* frames (-> *display* on-screen) frame global-buf base) (the-as uint s2-1)) + ) + (set! (-> a0-51 data 17 total) (-> a0-51 data 17 used)) + ) + ) + ) + ) + ) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie)) + (let ((s3-2 (-> *display* frames (-> *display* on-screen) frame global-buf base))) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie)) + (let* ((s1-1 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s2-2 (-> s1-1 base)) + ) + (set! (-> *prototype-tie-work* wait-to-spr) (the-as uint 0)) + (set! (-> *prototype-tie-work* wait-from-spr) (the-as uint 0)) + (let* ((v1-85 (-> *perf-stats* data 11)) + (a0-59 (-> v1-85 ctrl)) + ) + (+! (-> v1-85 count) 1) + (b! (zero? a0-59) cfg-28 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mtpc pcr0 r0-0) + (.mtpc pcr1 r0-0) + (.sync.l) + (.sync.p) + (.mtc0 Perf a0-59) + ) + (.sync.l) + (.sync.p) + (label cfg-28) + 0 + (draw-inline-array-prototype-tie-asm s1-1 s5-1 s4-1) + (let ((v1-88 (-> *perf-stats* data 11))) + (b! (zero? (-> v1-88 ctrl)) cfg-30 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mfpc a0-62 pcr0) + (+! (-> v1-88 accum0) a0-62) + (.mfpc a0-64 pcr1) + (+! (-> v1-88 accum1) a0-64) + ) + (label cfg-30) + 0 + (update-wait-stats + (-> *perf-stats* data 11) + (the-as uint 0) + (-> *prototype-tie-work* wait-to-spr) + (-> *prototype-tie-work* wait-from-spr) + ) + (let ((a3-11 (-> s1-1 base))) + (let ((v1-94 (the-as object (-> s1-1 base)))) + (set! (-> (the-as dma-packet v1-94) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet v1-94) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-94) vif1) (new 'static 'vif-tag)) + (set! (-> s1-1 base) (&+ (the-as pointer v1-94) 16)) + ) + (dma-bucket-insert-tag + (-> *display* frames (-> *display* on-screen) frame bucket-group) + (the-as bucket-id (if (zero? (-> arg1 index)) + 9 + 16 + ) + ) + s2-2 + (the-as (pointer dma-tag) a3-11) + ) + ) + ) + ) + (let ((v1-100 *dma-mem-usage*)) + (when (nonzero? v1-100) + (set! (-> v1-100 length) (max 10 (-> v1-100 length))) + (set! (-> v1-100 data 9 name) "tie-fragment") + (+! (-> v1-100 data 9 count) 1) + (+! + (-> v1-100 data 9 used) + (&- (-> *display* frames (-> *display* on-screen) frame global-buf base) (the-as uint s3-2)) + ) + (set! (-> v1-100 data 9 total) (-> v1-100 data 9 used)) + ) + ) + ) + ) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie-near)) + (let ((s3-3 (-> *display* frames (-> *display* on-screen) frame global-buf base))) + (let* ((s1-2 (-> *display* frames (-> *display* on-screen) frame global-buf)) + (s2-3 (-> s1-2 base)) + ) + (set! (-> *prototype-tie-work* near-wait-to-spr) (the-as uint 0)) + (set! (-> *prototype-tie-work* near-wait-from-spr) (the-as uint 0)) + (let* ((v1-114 (-> *perf-stats* data 12)) + (a0-79 (-> v1-114 ctrl)) + ) + (+! (-> v1-114 count) 1) + (b! (zero? a0-79) cfg-39 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mtpc pcr0 r0-0) + (.mtpc pcr1 r0-0) + (.sync.l) + (.sync.p) + (.mtc0 Perf a0-79) + ) + (.sync.l) + (.sync.p) + (label cfg-39) + 0 + (draw-inline-array-prototype-tie-near-asm s1-2 s5-1 s4-1) + (let ((v1-117 (-> *perf-stats* data 12))) + (b! (zero? (-> v1-117 ctrl)) cfg-41 :delay (nop!)) + (.mtc0 Perf r0-0) + (.sync.l) + (.sync.p) + (.mfpc a0-82 pcr0) + (+! (-> v1-117 accum0) a0-82) + (.mfpc a0-84 pcr1) + (+! (-> v1-117 accum1) a0-84) + ) + (label cfg-41) + 0 + (update-wait-stats + (-> *perf-stats* data 12) + (the-as uint 0) + (-> *prototype-tie-work* near-wait-to-spr) + (-> *prototype-tie-work* near-wait-from-spr) + ) + (let ((a3-16 (-> s1-2 base))) + (let ((v1-123 (the-as object (-> s1-2 base)))) + (set! (-> (the-as dma-packet v1-123) dma) (new 'static 'dma-tag :id (dma-tag-id next))) + (set! (-> (the-as dma-packet v1-123) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet v1-123) vif1) (new 'static 'vif-tag)) + (set! (-> s1-2 base) (&+ (the-as pointer v1-123) 16)) + ) + (dma-bucket-insert-tag + (-> *display* frames (-> *display* on-screen) frame bucket-group) + (the-as bucket-id (if (zero? (-> arg1 index)) + 8 + 15 + ) + ) + s2-3 + (the-as (pointer dma-tag) a3-16) + ) + ) + ) + (let ((a0-92 *dma-mem-usage*)) + (when (nonzero? a0-92) + (set! (-> a0-92 length) (max 16 (-> a0-92 length))) + (set! (-> a0-92 data 15 name) "tie-near") + (+! (-> a0-92 data 15 count) 1) + (+! + (-> a0-92 data 15 used) + (&- (-> *display* frames (-> *display* on-screen) frame global-buf base) (the-as uint s3-3)) + ) + (set! (-> a0-92 data 15 total) (-> a0-92 data 15 used)) + ) + ) + ) + ) + ) + ) + ) + ) + 0 + ) + (set! (-> arg1 closest-object 5) (-> *instance-tie-work* min-dist x)) + 0 + (none) + ) + +;; definition for method 10 of type drawable-tree-instance-tie +;; INFO: Return type mismatch drawable-tree-instance-tie vs none. +(defmethod draw drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 drawable-tree-instance-tie) (arg1 display-frame)) + (let* ((v1-1 (-> *background-work* tie-tree-count)) + (a1-2 (-> (the-as terrain-context #x70000000) bsp lev-index)) + (a1-5 (-> *level* level a1-2)) + ) + (set! (-> *background-work* tie-trees v1-1) obj) + (set! (-> *background-work* tie-levels v1-1) a1-5) + ) + (+! (-> *background-work* tie-tree-count) 1) + (none) + ) + +;; definition for method 14 of type drawable-tree-instance-tie +;; INFO: Return type mismatch symbol vs none. +(defmethod collect-stats drawable-tree-instance-tie ((obj drawable-tree-instance-tie)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie-near tie generic)) + (-> obj data (+ (-> obj length) -1)) + (let ((v1-8 (-> obj prototypes prototype-array-tie))) + (dotimes (a0-1 (-> v1-8 length)) + (let ((a1-2 (-> v1-8 array-data a0-1))) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask generic)) + (let ((a2-3 0) + (a3-0 3) + ) + (while (>= a3-0 a2-3) + (let ((t0-2 (-> a1-2 generic-count a2-3)) + (t2-0 (-> a1-2 geometry-override a2-3)) + ) + (when (nonzero? t0-2) + (let ((t1-3 (the-as object (-> t2-0 data))) + (t2-1 (-> t2-0 length)) + ) + (+! (-> *terrain-stats* tie-generic groups) 1) + (+! (-> *terrain-stats* tie-generic fragments) t2-1) + (+! (-> *terrain-stats* tie-generic instances) t0-2) + (dotimes (t3-9 t2-1) + (let ((t5-0 (* (-> (the-as tie-fragment t1-3) num-tris) t0-2)) + (t4-5 (* (-> (the-as tie-fragment t1-3) num-dverts) t0-2)) + ) + (+! (-> *terrain-stats* tie-generic tris) t5-0) + (+! (-> *terrain-stats* tie-generic dverts) t4-5) + ) + (set! t1-3 (&+ (the-as tie-fragment t1-3) 64)) + ) + ) + ) + ) + (+! a2-3 1) + ) + ) + ) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie)) + (let ((a2-9 1) + (a3-1 3) + ) + (while (>= a3-1 a2-9) + (let ((t0-6 (-> a1-2 count a2-9)) + (t2-2 (-> a1-2 geometry-override a2-9)) + ) + (when (nonzero? t0-6) + (let ((t1-8 (the-as object (-> t2-2 data))) + (t2-3 (-> t2-2 length)) + ) + (+! (-> *terrain-stats* tie groups) 1) + (+! (-> *terrain-stats* tie fragments) t2-3) + (+! (-> *terrain-stats* tie instances) t0-6) + (dotimes (t3-19 t2-3) + (let ((t5-5 (* (-> (the-as tie-fragment t1-8) num-tris) t0-6)) + (t4-12 (* (-> (the-as tie-fragment t1-8) num-dverts) t0-6)) + ) + (+! (-> *terrain-stats* tie tris) t5-5) + (+! (-> *terrain-stats* tie dverts) t4-12) + ) + (set! t1-8 (&+ (the-as tie-fragment t1-8) 64)) + ) + ) + ) + ) + (+! a2-9 1) + ) + ) + ) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie-near)) + (let ((a2-14 (-> a1-2 count 0)) + (a3-2 (-> a1-2 geometry-override 0)) + ) + (when (nonzero? a2-14) + (let ((a1-3 (the-as object (-> a3-2 data))) + (a3-3 (-> a3-2 length)) + ) + (+! (-> *terrain-stats* tie-near groups) 1) + (+! (-> *terrain-stats* tie-near fragments) a3-3) + (+! (-> *terrain-stats* tie-near instances) a2-14) + (dotimes (t0-19 a3-3) + (let ((t2-4 (* (-> (the-as tie-fragment a1-3) num-tris) a2-14)) + (t1-15 (* (-> (the-as tie-fragment a1-3) num-dverts) a2-14)) + ) + (+! (-> *terrain-stats* tie-near tris) t2-4) + (+! (-> *terrain-stats* tie-near dverts) t1-15) + ) + (set! a1-3 (&+ (the-as tie-fragment a1-3) 64)) + ) + ) + ) + ) + ) + ) + ) + ) + ) + (none) + ) + +;; definition for method 15 of type drawable-tree-instance-tie +;; INFO: Return type mismatch symbol vs none. +(defmethod debug-draw drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 drawable) (arg1 display-frame)) + (-> obj data (+ (-> obj length) -1)) + (let* ((s5-0 (-> obj prototypes prototype-array-tie)) + (s4-0 (-> s5-0 length)) + ) + (dotimes (s3-0 s4-0) + (let ((a1-1 (-> s5-0 array-data s3-0 geometry-override 0))) + (debug-draw a1-1 a1-1 arg1) + ) + ) + ) + (none) + ) + +;; definition for method 11 of type drawable-tree-instance-tie +;; INFO: this function exists in multiple non-identical object files +;; INFO: Return type mismatch int vs none. +(defmethod collide-with-box drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-with-box (-> obj data 0) (-> obj length) arg1) + 0 + (none) + ) + +;; definition for method 12 of type drawable-tree-instance-tie +;; INFO: this function exists in multiple non-identical object files +;; INFO: Return type mismatch int vs none. +(defmethod collide-y-probe drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-y-probe (-> obj data 0) (-> obj length) arg1) + 0 + (none) + ) + +;; definition for method 13 of type drawable-tree-instance-tie +;; INFO: this function exists in multiple non-identical object files +;; INFO: Return type mismatch int vs none. +(defmethod collide-ray drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-ray (-> obj data 0) (-> obj length) arg1) + 0 + (none) + ) + +;; definition for method 11 of type drawable-tree-instance-tie +;; INFO: this function exists in multiple non-identical object files +;; INFO: Return type mismatch int vs none. +(defmethod collide-with-box drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-with-box (-> obj data 0) (-> obj length) arg1) + 0 + (none) + ) + +;; definition for method 12 of type drawable-tree-instance-tie +;; INFO: this function exists in multiple non-identical object files +;; INFO: Return type mismatch int vs none. +(defmethod collide-y-probe drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-y-probe (-> obj data 0) (-> obj length) arg1) + 0 + (none) + ) + +;; definition for method 13 of type drawable-tree-instance-tie +;; INFO: this function exists in multiple non-identical object files +;; INFO: Return type mismatch int vs none. +(defmethod collide-ray drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-ray (-> obj data 0) (-> obj length) arg1) + 0 + (none) + ) + +;; definition for method 11 of type drawable-inline-array-instance-tie +;; INFO: Return type mismatch int vs none. +(defmethod collide-with-box drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-with-box (the-as instance-tie (-> obj data)) (-> obj length) arg1) + 0 + (none) + ) + +;; definition for method 12 of type drawable-inline-array-instance-tie +;; INFO: Return type mismatch int vs none. +(defmethod collide-y-probe drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-y-probe (the-as instance-tie (-> obj data)) (-> obj length) arg1) + 0 + (none) + ) + +;; definition for method 13 of type drawable-inline-array-instance-tie +;; INFO: Return type mismatch int vs none. +(defmethod collide-ray drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie) (arg0 int) (arg1 collide-list)) + (collide-ray (the-as instance-tie (-> obj data)) (-> obj length) arg1) + 0 + (none) + ) + +;; definition (debug) for function tie-test-cam-restore +;; INFO: Return type mismatch object vs none. +;; Used lq/sq +(defun-debug tie-test-cam-restore () + (let ((a0-0 (new-stack-vector0)) + (a1-0 (new-stack-matrix0)) + ) + (set! (-> a0-0 x) 1246582.6) + (set! (-> a0-0 y) 57026.02) + (set! (-> a0-0 z) -490734.78) + (set! (-> a0-0 w) 1.0) + (set! (-> a1-0 vector 0 x) -0.9873) + (set! (-> a1-0 vector 0 y) 0.0) + (set! (-> a1-0 vector 0 z) -0.1587) + (set! (-> a1-0 vector 0 w) 0.0) + (set! (-> a1-0 vector 1 x) 0.0014) + (set! (-> a1-0 vector 1 y) 0.9999) + (set! (-> a1-0 vector 1 z) -0.0092) + (set! (-> a1-0 vector 1 w) 0.0) + (set! (-> a1-0 vector 2 x) 0.1587) + (set! (-> a1-0 vector 2 y) -0.0093) + (set! (-> a1-0 vector 2 z) -0.9872) + (set! (-> a1-0 vector 2 w) 0.0) + (set! (-> a1-0 vector 3 x) 0.0) + (set! (-> a1-0 vector 3 y) 0.0) + (set! (-> a1-0 vector 3 z) 0.0) + (set! (-> a1-0 vector 3 w) 1.0) + (debug-set-camera-pos-rot! a0-0 a1-0) + ) + (send-event *camera* 'set-fov 11650.845) + (none) + ) diff --git a/test/decompiler/reference/engine/gfx/tie/tie-near_REF.gc b/test/decompiler/reference/engine/gfx/tie/tie-near_REF.gc new file mode 100644 index 0000000000..433fa50812 --- /dev/null +++ b/test/decompiler/reference/engine/gfx/tie/tie-near_REF.gc @@ -0,0 +1,424 @@ +;;-*-Lisp-*- +(in-package goal) + +;; definition of type tie-near-consts +(deftype tie-near-consts (structure) + ((extra qword :inline :offset-assert 0) + (gifbufs qword :inline :offset-assert 16) + (clrbufs qword :inline :offset-assert 32) + (adgif gs-gif-tag :inline :offset-assert 48) + (strgif gs-gif-tag :inline :offset-assert 64) + (fangif gs-gif-tag :inline :offset-assert 80) + (hvdfoffs vector :inline :offset-assert 96) + (invhscale vector :inline :offset-assert 112) + (guard vector :inline :offset-assert 128) + (atest ad-cmd 2 :inline :offset-assert 144) + (atest-tra ad-cmd :inline :offset 144) + (atest-def ad-cmd :inline :offset 160) + ) + :method-count-assert 9 + :size-assert #xb0 + :flag-assert #x9000000b0 + ) + +;; definition for method 3 of type tie-near-consts +(defmethod inspect tie-near-consts ((obj tie-near-consts)) + (format #t "[~8x] ~A~%" obj 'tie-near-consts) + (format #t "~Textra: #~%" (-> obj extra)) + (format #t "~Tgifbufs: #~%" (-> obj gifbufs)) + (format #t "~Tclrbufs: #~%" (-> obj clrbufs)) + (format #t "~Tadgif: #~%" (-> obj adgif)) + (format #t "~Tstrgif: #~%" (-> obj strgif)) + (format #t "~Tfangif: #~%" (-> obj fangif)) + (format #t "~Thvdfoffs: #~%" (-> obj hvdfoffs)) + (format #t "~Tinvhscale: #~%" (-> obj invhscale)) + (format #t "~Tguard: #~%" (-> obj guard)) + (format #t "~Tatest[2] @ #x~X~%" (-> obj atest)) + (format #t "~Tatest-tra: #~%" (-> obj atest)) + (format #t "~Tatest-def: #~%" (-> obj atest-def)) + obj + ) + +;; definition for symbol tie-near-vu1-block, type vu-function +(define tie-near-vu1-block (new 'static 'vu-function :length #x6f8 :qlength #x37c)) + +;; definition for function tie-near-init-consts +;; INFO: Return type mismatch vector vs none. +;; Used lq/sq +(defun tie-near-init-consts ((arg0 tie-near-consts) (arg1 int)) + (set! (-> arg0 adgif tag) (new 'static 'gif-tag64 :nloop #x5 :nreg #x1)) + (set! (-> arg0 adgif regs) (new 'static 'gif-tag-regs :regs0 (gif-reg-id a+d))) + (set! (-> arg0 atest-tra cmds) (the-as uint 71)) + (set! (-> arg0 atest-tra data) (the-as uint #x5026b)) + (set! (-> arg0 atest-def cmds) (the-as uint 71)) + (set! (-> arg0 atest-def data) (the-as uint #x5000e)) + (cond + ((zero? *subdivide-draw-mode*) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 3) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :tme #x1 :fge #x1) + :nreg #x3 + ) + ) + ) + ((= *subdivide-draw-mode* 1) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type line-strip) :iip #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 2) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :fge #x1 :abe arg1) + ) + ) + ) + ) + (set! (-> arg0 strgif regs) + (new 'static 'gif-tag-regs :regs0 (gif-reg-id st) :regs1 (gif-reg-id rgbaq) :regs2 (gif-reg-id xyzf2)) + ) + (set! (-> arg0 fangif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-fan) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + (set! (-> arg0 fangif regs) + (new 'static 'gif-tag-regs :regs0 (gif-reg-id st) :regs1 (gif-reg-id rgbaq) :regs2 (gif-reg-id xyzf2)) + ) + (let ((f1-0 8388894.0) + (f2-0 8389078.0) + (f0-0 8389262.0) + ) + (set! (-> arg0 gifbufs vector4w x) (the-as int f0-0)) + (set! (-> arg0 gifbufs vector4w y) (the-as int f2-0)) + (set! (-> arg0 gifbufs vector4w z) (the-as int f0-0)) + (set! (-> arg0 gifbufs vector4w w) (the-as int f2-0)) + (set! (-> arg0 extra vector4w x) (the-as int (+ f1-0 f2-0 f0-0))) + (set! (-> arg0 extra vector4w y) (the-as int 0.0)) + (set! (-> arg0 extra vector4w z) (the-as int (+ f1-0 f2-0 f0-0))) + ) + (set! (-> arg0 clrbufs vector4w x) 198) + (set! (-> arg0 clrbufs vector4w y) 242) + (set! (-> arg0 clrbufs vector4w z) 198) + (set! (-> arg0 clrbufs vector4w w) 242) + (let ((v1-41 *math-camera*)) + (set! (-> arg0 invhscale quad) (-> v1-41 inv-hmge-scale quad)) + (set! (-> arg0 hvdfoffs quad) (-> v1-41 hvdf-off quad)) + (set! (-> arg0 guard quad) (-> v1-41 guard quad)) + ) + (none) + ) + +;; definition for function tie-near-init-engine +;; INFO: Return type mismatch int vs none. +(defun tie-near-init-engine ((arg0 dma-buffer) (arg1 gs-test) (arg2 int)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie-near)) + (dma-buffer-add-vu-function arg0 tie-near-vu1-block 1) + (let ((s4-0 11)) + (let* ((v1-2 arg0) + (a0-2 (-> v1-2 base)) + ) + (set! (-> (the-as (pointer int64) a0-2)) (logior #x10000000 (shr (shl s4-0 48) 48))) + (let ((a1-4 #x5000000)) + (s.w! (+ a0-2 8) a1-4) + ) + (let ((a1-6 (logior #x6c0003c6 (shr (shl s4-0 56) 40)))) + (s.w! (+ a0-2 12) a1-6) + ) + (set! (-> v1-2 base) (&+ a0-2 16)) + ) + (tie-near-init-consts (the-as tie-near-consts (-> arg0 base)) arg2) + (&+! (-> arg0 base) (* s4-0 16)) + ) + (let* ((v1-5 arg0) + (a0-6 (-> v1-5 base)) + ) + (set! (-> (the-as (pointer int64) a0-6)) #x10000000) + (let ((a1-9 #x15000008)) + (s.w! (+ a0-6 8) a1-9) + ) + (let ((a1-10 #x13000000)) + (s.w! (+ a0-6 12) a1-10) + ) + (set! (-> v1-5 base) (&+ a0-6 16)) + ) + (let* ((v1-6 arg0) + (a0-8 (-> v1-6 base)) + ) + (set! (-> (the-as (pointer int64) a0-8)) #x10000002) + (s.w! (+ a0-8 8) 0) + (let ((a1-12 #x30000000)) + (s.w! (+ a0-8 12) a1-12) + ) + (set! (-> v1-6 base) (&+ a0-8 16)) + ) + (let ((v1-7 (-> arg0 base))) + (set! (-> (the-as (pointer int32) v1-7)) #x4b000000) + (let ((a0-11 #x4b000000)) + (s.w! (+ v1-7 4) a0-11) + ) + (let ((a0-12 #x4b000000)) + (s.w! (+ v1-7 8) a0-12) + ) + (let ((a0-13 #x4b000000)) + (s.w! (+ v1-7 12) a0-13) + ) + (let ((a0-14 #x3000000)) + (s.w! (+ v1-7 16) a0-14) + ) + (let ((a0-15 #x200002c)) + (s.w! (+ v1-7 20) a0-15) + ) + (let ((a0-16 #x5000000)) + (s.w! (+ v1-7 24) a0-16) + ) + (let ((a0-17 #x1000404)) + (s.w! (+ v1-7 28) a0-17) + ) + (set! (-> arg0 base) (&+ v1-7 32)) + ) + 0 + ) + (none) + ) + +;; definition for function tie-near-end-buffer +;; INFO: Return type mismatch int vs none. +(defun tie-near-end-buffer ((arg0 dma-buffer)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie-near)) + (let* ((v1-2 arg0) + (a1-0 (-> v1-2 base)) + ) + (set! (-> (the-as (pointer int64) a1-0)) #x10000002) + (s.w! (+ a1-0 8) 0) + (let ((a2-1 #x50000002)) + (s.w! (+ a1-0 12) a2-1) + ) + (set! (-> v1-2 base) (&+ a1-0 16)) + ) + (let* ((v1-3 arg0) + (a1-2 (-> v1-3 base)) + ) + (set! (-> (the-as (pointer uint64) a1-2)) (make-u128 0 (the-as uint #x1000000000008001))) + (let ((a2-4 (the-as uint #xeeeeeeeeeeeeeeee))) + (s.d! (+ a1-2 8) a2-4) + ) + (set! (-> v1-3 base) (&+ a1-2 16)) + ) + (let* ((v1-4 arg0) + (a1-4 (-> v1-4 base)) + ) + (set! (-> (the-as (pointer int64) a1-4)) #x5026b) + (let ((a2-6 71)) + (s.d! (+ a1-4 8) a2-6) + ) + (set! (-> v1-4 base) (&+ a1-4 16)) + ) + (let* ((v1-5 arg0) + (a1-6 (-> v1-5 base)) + ) + (set! (-> (the-as (pointer int64) a1-6)) #x10000002) + (let ((a2-8 #x20000000)) + (s.w! (+ a1-6 8) a2-8) + ) + (s.w! (+ a1-6 12) 0) + (set! (-> v1-5 base) (&+ a1-6 16)) + ) + (let* ((v1-6 arg0) + (a0-1 (-> v1-6 base)) + ) + (set! (-> (the-as (pointer int32) a0-1)) #x15000004) + (let ((a1-9 #x5000000)) + (s.w! (+ a0-1 4) a1-9) + ) + (let ((a1-10 #x13000000)) + (s.w! (+ a0-1 8) a1-10) + ) + (let ((a1-11 #x30000000)) + (s.w! (+ a0-1 12) a1-11) + ) + (s.w! (+ a0-1 16) 0) + (s.w! (+ a0-1 20) 0) + (s.w! (+ a0-1 24) 0) + (s.w! (+ a0-1 28) 0) + (set! (-> v1-6 base) (&+ a0-1 32)) + ) + 0 + ) + (none) + ) + +;; definition for function tie-near-make-perspective-matrix +(defun tie-near-make-perspective-matrix ((arg0 matrix)) + (column-scale-matrix! arg0 (-> *math-camera* hmge-scale) (-> *math-camera* camera-temp)) + ) + +;; definition for function tie-near-int-reg +(defun tie-near-int-reg ((arg0 int)) + (let ((v1-0 arg0)) + (cond + ((zero? v1-0) + "zero" + ) + ((= v1-0 1) + "itemp" + ) + ((= v1-0 2) + "delta" + ) + ((= v1-0 3) + "dest-0" + ) + ((= v1-0 4) + "dest-1" + ) + ((= v1-0 5) + "dest-2" + ) + ((= v1-0 6) + "dest-3" + ) + ((= v1-0 7) + "delta-ptr" + ) + ((= v1-0 8) + "prev" + ) + ((= v1-0 9) + "itemp2" + ) + ) + ) + ) + +;; definition for function tie-near-float-reg +(defun tie-near-float-reg ((arg0 int)) + (let ((v1-0 arg0)) + (cond + ((zero? v1-0) + "zero" + ) + ((= v1-0 1) + "vtx-0" + ) + ((= v1-0 2) + "vtx-1" + ) + ((= v1-0 3) + "vtx-2" + ) + ((= v1-0 4) + "vtx-3" + ) + ((= v1-0 5) + "hvtx-0" + ) + ((= v1-0 6) + "hvtx-1" + ) + ((= v1-0 7) + "hvtx-2" + ) + ((= v1-0 8) + "hvtx-3" + ) + ((= v1-0 9) + "tex-0" + ) + ((= v1-0 10) + "tex-1" + ) + ((= v1-0 11) + "tex-2" + ) + ((= v1-0 12) + "tex-3" + ) + ((= v1-0 13) + "deltas" + ) + ((= v1-0 14) + "invh" + ) + ((= v1-0 15) + "hvdfcl" + ) + ((= v1-0 16) + "hvdfnc" + ) + ((= v1-0 17) + "--" + ) + ((= v1-0 18) + "--" + ) + ((= v1-0 19) + "--" + ) + ((= v1-0 20) + "--" + ) + ((= v1-0 19) + "--" + ) + ((= v1-0 20) + "--" + ) + ((= v1-0 21) + "gifbuf" + ) + ((= v1-0 22) + "clrbuf" + ) + ((= v1-0 23) + "extra" + ) + ((= v1-0 24) + "inds" + ) + ((= v1-0 25) + "--" + ) + ((= v1-0 26) + "--" + ) + ((= v1-0 27) + "morph" + ) + ((= v1-0 28) + "xyzofs" + ) + ((= v1-0 29) + "--" + ) + ((= v1-0 30) + "--" + ) + ((= v1-0 31) + "--" + ) + ) + ) + ) + + + + diff --git a/test/decompiler/reference/engine/gfx/tie/tie_REF.gc b/test/decompiler/reference/engine/gfx/tie/tie_REF.gc new file mode 100644 index 0000000000..b7a6d433fd --- /dev/null +++ b/test/decompiler/reference/engine/gfx/tie/tie_REF.gc @@ -0,0 +1,754 @@ +;;-*-Lisp-*- +(in-package goal) + +;; definition for method 9 of type tie-fragment +(defmethod login tie-fragment ((obj tie-fragment)) + (let ((s5-0 (-> obj gif-ref)) + (s4-0 (/ (-> obj tex-count) (the-as uint 5))) + ) + (dotimes (s3-0 (the-as int s4-0)) + (adgif-shader-login-no-remap (-> s5-0 s3-0)) + ) + ) + obj + ) + +;; definition for method 3 of type drawable-inline-array-instance-tie +(defmethod inspect drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie)) + (format #t "[~8x] ~A~%" obj (-> obj type)) + (format #t "~Tlength: ~D~%" (-> obj length)) + (format #t "~Tdata[~D]: @ #x~X~%" (-> obj length) (-> obj data)) + (dotimes (s5-0 (-> obj length)) + (format #t "~T [~D] ~A~%" s5-0 (-> obj data s5-0)) + ) + obj + ) + +;; definition for method 5 of type drawable-inline-array-instance-tie +;; INFO: Return type mismatch uint vs int. +(defmethod asize-of drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie)) + (the-as int (+ (-> drawable-inline-array-instance-tie size) (* (+ (-> obj length) -1) 64))) + ) + +;; definition for method 9 of type drawable-tree-instance-tie +;; INFO: this function exists in multiple non-identical object files +(defmethod login drawable-tree-instance-tie ((obj drawable-tree-instance-tie)) + obj + ) + +;; definition for method 9 of type drawable-tree-instance-tie +;; INFO: this function exists in multiple non-identical object files +;; INFO: Return type mismatch symbol vs drawable-tree-instance-tie. +(defmethod login drawable-tree-instance-tie ((obj drawable-tree-instance-tie)) + (dotimes (s5-0 (-> obj length)) + (login (-> obj data s5-0)) + ) + (the-as drawable-tree-instance-tie #f) + ) + +;; definition for method 3 of type prototype-tie +(defmethod inspect prototype-tie ((obj prototype-tie)) + (format #t "[~8x] ~A~%" obj (-> obj type)) + (format #t "~Tlength: ~D~%" (-> obj length)) + (format #t "~Tdata[~D]: @ #x~X~%" (-> obj length) (-> obj data)) + (dotimes (s5-0 (-> obj length)) + (format #t "~T [~D] ~A~%" s5-0 (-> obj data s5-0)) + ) + obj + ) + +;; definition for method 9 of type prototype-tie +(defmethod login prototype-tie ((obj prototype-tie)) + (dotimes (s5-0 (-> obj length)) + (login (-> obj data s5-0)) + ) + obj + ) + +;; definition for method 8 of type drawable-tree-instance-tie +(defmethod mem-usage drawable-tree-instance-tie ((obj drawable-tree-instance-tie) (arg0 memory-usage-block) (arg1 int)) + (set! (-> arg0 length) (max 1 (-> arg0 length))) + (set! (-> arg0 data 0 name) (symbol->string 'drawable-group)) + (+! (-> arg0 data 0 count) 1) + (let ((v1-7 32)) + (+! (-> arg0 data 0 used) v1-7) + (+! (-> arg0 data 0 total) (logand -16 (+ v1-7 15))) + ) + (dotimes (s3-0 (-> obj length)) + (mem-usage (-> obj data s3-0) arg0 arg1) + ) + (mem-usage (-> obj prototypes prototype-array-tie) arg0 (logior arg1 1)) + obj + ) + +;; definition for method 8 of type tie-fragment +(defmethod mem-usage tie-fragment ((obj tie-fragment) (arg0 memory-usage-block) (arg1 int)) + (when (logtest? arg1 2) + (let ((v1-3 (* (-> obj color-count) 4)) + (a0-2 (cond + ((logtest? arg1 4) + 20 + ) + ((logtest? arg1 8) + 21 + ) + (else + 22 + ) + ) + ) + ) + (+! (-> arg0 data a0-2 count) 1) + (+! (-> arg0 data a0-2 used) v1-3) + (+! (-> arg0 data a0-2 total) (logand -4 (+ v1-3 3))) + ) + (set! (-> arg0 length) (max 23 (-> arg0 length))) + (set! obj obj) + (goto cfg-13) + ) + (set! (-> arg0 length) (max 18 (-> arg0 length))) + (set! (-> arg0 data 9 name) "tie-fragment") + (set! (-> arg0 data 10 name) "tie-gif") + (set! (-> arg0 data 11 name) "tie-points") + (set! (-> arg0 data 12 name) "tie-colors") + (set! (-> arg0 data 14 name) "tie-debug") + (set! (-> arg0 data 13 name) "tie-draw-points") + (set! (-> arg0 data 17 name) "tie-generic") + (+! (-> arg0 data 9 count) 1) + (let ((v1-21 (asize-of obj))) + (+! (-> arg0 data 9 used) v1-21) + (+! (-> arg0 data 9 total) (logand -16 (+ v1-21 15))) + ) + (let ((v1-26 (* (-> obj gif-count) 16))) + (+! (-> arg0 data 10 count) (-> obj tex-count)) + (+! (-> arg0 data 10 used) v1-26) + (+! (-> arg0 data 10 total) (logand -16 (+ v1-26 15))) + ) + (let ((v1-31 (* (-> obj vertex-count) 16))) + (+! (-> arg0 data 11 count) (-> obj vertex-count)) + (+! (-> arg0 data 11 used) v1-31) + (+! (-> arg0 data 11 total) (logand -16 (+ v1-31 15))) + ) + (let ((v1-36 (* (-> obj dp-qwc) 16))) + (+! (-> arg0 data 13 count) (* (-> obj dp-qwc) 16)) + (+! (-> arg0 data 13 used) v1-36) + (+! (-> arg0 data 13 total) (logand -16 (+ v1-36 15))) + ) + (let ((v1-41 (* (-> obj generic-count) 16))) + (+! (-> arg0 data 17 count) 1) + (+! (-> arg0 data 17 used) v1-41) + (+! (-> arg0 data 17 total) (logand -16 (+ v1-41 15))) + ) + (when (nonzero? (-> obj debug-lines)) + (dotimes (s4-0 (-> obj debug-lines length)) + (+! (-> arg0 data 14 count) (-> (the-as (pointer int32) (-> obj debug-lines s4-0)) 0)) + (let ((v1-52 (asize-of (the-as basic (-> obj debug-lines s4-0))))) + (+! (-> arg0 data 12 used) v1-52) + (+! (-> arg0 data 12 total) (logand -16 (+ v1-52 15))) + ) + ) + ) + (label cfg-13) + obj + ) + +;; definition for method 8 of type instance-tie +(defmethod mem-usage instance-tie ((obj instance-tie) (arg0 memory-usage-block) (arg1 int)) + (set! (-> arg0 length) (max 19 (-> arg0 length))) + (set! (-> arg0 data 18 name) "instance-tie") + (+! (-> arg0 data 18 count) 1) + (let ((v1-6 (asize-of obj))) + (+! (-> arg0 data 18 used) v1-6) + (+! (-> arg0 data 18 total) (logand -16 (+ v1-6 15))) + ) + (when (nonzero? (-> obj error)) + (set! (-> arg0 length) (max 24 (-> arg0 length))) + (set! (-> arg0 data 23 name) "instance-tie-colors*") + (set! (-> arg0 data 19 name) "instance-tie-colors0") + (set! (-> arg0 data 20 name) "instance-tie-colors1") + (set! (-> arg0 data 21 name) "instance-tie-colors2") + (set! (-> arg0 data 22 name) "instance-tie-colors3") + (+! (-> arg0 data 23 count) 1) + (let ((s3-0 (-> obj bucket-ptr))) + (+ (-> arg0 data 19 used) (-> arg0 data 20 used) (-> arg0 data 21 used) (-> arg0 data 22 used)) + (dotimes (s2-0 4) + (let ((a0-10 (-> s3-0 geometry-override s2-0))) + (when (nonzero? a0-10) + (let ((t9-1 (method-of-object a0-10 mem-usage)) + (a1-2 arg0) + (v1-29 s2-0) + ) + (t9-1 a0-10 a1-2 (logior + (logior + (cond + ((= v1-29 1) + 4 + ) + ((= v1-29 2) + 8 + ) + ((= v1-29 3) + 16 + ) + (else + 0 + ) + ) + 2 + ) + arg1 + ) + ) + ) + ) + ) + ) + ) + ) + obj + ) + +;; definition for method 8 of type drawable-inline-array-instance-tie +(defmethod mem-usage drawable-inline-array-instance-tie ((obj drawable-inline-array-instance-tie) (arg0 memory-usage-block) (arg1 int)) + (set! (-> arg0 length) (max 1 (-> arg0 length))) + (set! (-> arg0 data 0 name) (symbol->string 'drawable-group)) + (+! (-> arg0 data 0 count) 1) + (let ((v1-7 32)) + (+! (-> arg0 data 0 used) v1-7) + (+! (-> arg0 data 0 total) (logand -16 (+ v1-7 15))) + ) + (dotimes (s3-0 (-> obj length)) + (mem-usage (-> obj data s3-0) arg0 arg1) + ) + obj + ) + +;; definition for method 8 of type prototype-tie +(defmethod mem-usage prototype-tie ((obj prototype-tie) (arg0 memory-usage-block) (arg1 int)) + (set! (-> arg0 length) (max 1 (-> arg0 length))) + (set! (-> arg0 data 0 name) (symbol->string 'drawable-group)) + (+! (-> arg0 data 0 count) 1) + (let ((v1-7 32)) + (+! (-> arg0 data 0 used) v1-7) + (+! (-> arg0 data 0 total) (logand -16 (+ v1-7 15))) + ) + (dotimes (s3-0 (-> obj length)) + (mem-usage (-> obj data s3-0) arg0 arg1) + ) + obj + ) + +;; definition for method 5 of type prototype-tie +;; INFO: Return type mismatch uint vs int. +(defmethod asize-of prototype-tie ((obj prototype-tie)) + (the-as int (+ (-> prototype-tie size) (* (+ (-> obj length) -1) 64))) + ) + +;; definition of type tie-consts +(deftype tie-consts (structure) + ((data uint32 24 :offset-assert 0) + (vector vector 6 :inline :offset 0) + (quads uint128 6 :offset 0) + (adgif gs-gif-tag :inline :offset 0) + (strgif gs-gif-tag :inline :offset 16) + (extra vector :inline :offset 32) + (gifbufs vector :inline :offset 48) + (clrbufs qword :inline :offset 64) + (misc qword :inline :offset 80) + (atestgif gs-gif-tag :inline :offset 96) + (atest ad-cmd 2 :inline :offset 112) + (atest-tra ad-cmd :inline :offset 112) + (atest-def ad-cmd :inline :offset 128) + ) + :method-count-assert 9 + :size-assert #x90 + :flag-assert #x900000090 + ) + +;; definition for method 3 of type tie-consts +(defmethod inspect tie-consts ((obj tie-consts)) + (format #t "[~8x] ~A~%" obj 'tie-consts) + (format #t "~Tdata[24] @ #x~X~%" (-> obj data)) + (format #t "~Tvector[6] @ #x~X~%" (-> obj data)) + (format #t "~Tquads[6] @ #x~X~%" (-> obj data)) + (format #t "~Tadgif: #~%" (-> obj data)) + (format #t "~Tstrgif: #~%" (-> obj strgif)) + (format #t "~Textra: #~%" (-> obj extra)) + (format #t "~Tgifbufs: #~%" (-> obj gifbufs)) + (format #t "~Tclrbufs: #~%" (-> obj clrbufs)) + (format #t "~Tmisc: #~%" (-> obj misc)) + (format #t "~Tatestgif: #~%" (-> obj atestgif)) + (format #t "~Tatest[2] @ #x~X~%" (-> obj atest)) + (format #t "~Tatest-tra: #~%" (-> obj atest)) + (format #t "~Tatest-def: #~%" (-> obj atest-def)) + obj + ) + +;; definition for symbol tie-vu1-block, type vu-function +(define tie-vu1-block (new 'static 'vu-function :length #x3e1 :qlength #x1f1)) + +;; definition for function tie-init-consts +;; INFO: Return type mismatch int vs none. +(defun tie-init-consts ((arg0 tie-consts) (arg1 int)) + (set! (-> arg0 adgif tag) (new 'static 'gif-tag64 :nloop #x5 :nreg #x1)) + (set! (-> arg0 adgif regs) (new 'static 'gif-tag-regs :regs0 (gif-reg-id a+d))) + (cond + ((zero? *subdivide-draw-mode*) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 3) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :tme #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 1) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type line-strip) :iip #x1 :fge #x1 :abe arg1) + ) + ) + ) + ((= *subdivide-draw-mode* 2) + (set! (-> arg0 strgif tag) + (new 'static 'gif-tag64 + :pre #x1 + :nreg #x3 + :prim (new 'static 'gs-prim :prim (gs-prim-type tri-strip) :iip #x1 :fge #x1 :abe arg1) + ) + ) + ) + ) + (set! (-> arg0 strgif regs) + (new 'static 'gif-tag-regs :regs0 (gif-reg-id st) :regs1 (gif-reg-id rgbaq) :regs2 (gif-reg-id xyzf2)) + ) + (let ((f1-0 8388894.0) + (f2-0 8389078.0) + (f0-0 8389262.0) + ) + (set! (-> arg0 gifbufs x) f0-0) + (set! (-> arg0 gifbufs y) f2-0) + (set! (-> arg0 gifbufs z) f0-0) + (set! (-> arg0 gifbufs w) f2-0) + (set! (-> arg0 extra x) (+ f1-0 f2-0 f0-0)) + (set! (-> arg0 extra y) 0.0) + (set! (-> arg0 extra z) (+ f1-0 f2-0 f0-0)) + ) + (set! (-> arg0 clrbufs vector4w x) 198) + (set! (-> arg0 clrbufs vector4w y) 242) + (set! (-> arg0 clrbufs vector4w z) 198) + (set! (-> arg0 clrbufs vector4w w) 242) + (set! (-> arg0 atestgif tag) (new 'static 'gif-tag64 :nloop #x1 :eop #x1 :nreg #x1)) + (set! (-> arg0 atestgif regs) (new 'static 'gif-tag-regs :regs0 (gif-reg-id a+d))) + (set! (-> arg0 atest-tra cmd) (gs-reg test-1)) + (set! (-> arg0 atest-tra data) (the-as uint #x5026b)) + (set! (-> arg0 atest-def cmd) (gs-reg test-1)) + (set! (-> arg0 atest-def data) (the-as uint #x5000e)) + (set! (-> arg0 misc vector4w x) 0) + (set! (-> arg0 misc vector4w y) -1) + (none) + ) + +;; definition for function tie-init-engine +;; INFO: Return type mismatch int vs none. +(defun tie-init-engine ((arg0 dma-buffer) (arg1 gs-test) (arg2 int)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie)) + (dma-buffer-add-vu-function arg0 tie-vu1-block 1) + (let* ((v1-3 arg0) + (a0-2 (the-as object (-> v1-3 base))) + ) + (set! (-> (the-as dma-packet a0-2) dma) (new 'static 'dma-tag :qwc #x2 :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a0-2) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-2) vif1) (new 'static 'vif-tag :imm #x2 :cmd (vif-cmd direct) :msk #x1)) + (set! (-> v1-3 base) (&+ (the-as pointer a0-2) 16)) + ) + (let* ((v1-4 arg0) + (a0-4 (the-as object (-> v1-4 base))) + ) + (set! (-> (the-as gs-gif-tag a0-4) tag) (new 'static 'gif-tag64 :nloop #x1 :eop #x1 :nreg #x1)) + (set! (-> (the-as gs-gif-tag a0-4) regs) + (new 'static 'gif-tag-regs + :regs0 (gif-reg-id a+d) + :regs1 (gif-reg-id a+d) + :regs2 (gif-reg-id a+d) + :regs3 (gif-reg-id a+d) + :regs4 (gif-reg-id a+d) + :regs5 (gif-reg-id a+d) + :regs6 (gif-reg-id a+d) + :regs7 (gif-reg-id a+d) + :regs8 (gif-reg-id a+d) + :regs9 (gif-reg-id a+d) + :regs10 (gif-reg-id a+d) + :regs11 (gif-reg-id a+d) + :regs12 (gif-reg-id a+d) + :regs13 (gif-reg-id a+d) + :regs14 (gif-reg-id a+d) + :regs15 (gif-reg-id a+d) + ) + ) + (set! (-> v1-4 base) (the-as pointer (&+ (the-as gs-gif-tag a0-4) 16))) + ) + (let* ((v1-5 arg0) + (a0-6 (-> v1-5 base)) + ) + (set! (-> (the-as (pointer gs-test) a0-6) 0) arg1) + (set! (-> (the-as (pointer gs-reg64) a0-6) 1) (gs-reg64 test-1)) + (set! (-> v1-5 base) (&+ a0-6 16)) + ) + (let ((s4-1 9)) + (let* ((v1-6 arg0) + (a0-8 (the-as object (-> v1-6 base))) + ) + (set! (-> (the-as dma-packet a0-8) dma) (new 'static 'dma-tag :id (dma-tag-id cnt) :qwc s4-1)) + (set! (-> (the-as dma-packet a0-8) vif0) (new 'static 'vif-tag :cmd (vif-cmd stmod))) + (set! (-> (the-as dma-packet a0-8) vif1) + (new 'static 'vif-tag :imm #x3c6 :cmd (vif-cmd unpack-v4-32) :num s4-1) + ) + (set! (-> v1-6 base) (&+ (the-as pointer a0-8) 16)) + ) + (tie-init-consts (the-as tie-consts (-> arg0 base)) arg2) + (&+! (-> arg0 base) (* s4-1 16)) + ) + (let* ((v1-9 arg0) + (a0-12 (the-as object (-> v1-9 base))) + ) + (set! (-> (the-as dma-packet a0-12) dma) (new 'static 'dma-tag :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a0-12) vif0) (new 'static 'vif-tag :imm #x8 :cmd (vif-cmd mscalf) :msk #x1)) + (set! (-> (the-as dma-packet a0-12) vif1) (new 'static 'vif-tag :cmd (vif-cmd flusha) :msk #x1)) + (set! (-> v1-9 base) (&+ (the-as pointer a0-12) 16)) + ) + (let* ((v1-10 arg0) + (a0-14 (the-as object (-> v1-10 base))) + ) + (set! (-> (the-as dma-packet a0-14) dma) (new 'static 'dma-tag :qwc #x2 :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a0-14) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a0-14) vif1) (new 'static 'vif-tag :cmd (vif-cmd strow) :msk #x1)) + (set! (-> v1-10 base) (&+ (the-as pointer a0-14) 16)) + ) + (let ((v1-11 (the-as object (-> arg0 base)))) + (set! (-> (the-as (inline-array vector4w) v1-11) 0 x) #x4b000000) + (set! (-> (the-as (inline-array vector4w) v1-11) 0 y) #x4b000000) + (set! (-> (the-as (inline-array vector4w) v1-11) 0 z) #x4b000000) + (set! (-> (the-as (inline-array vector4w) v1-11) 0 w) #x4b000000) + (set! (-> (the-as (pointer vif-tag) v1-11) 4) (new 'static 'vif-tag :cmd (vif-cmd base))) + (set! (-> (the-as (pointer vif-tag) v1-11) 5) (new 'static 'vif-tag :imm #x2c :cmd (vif-cmd offset))) + (set! (-> (the-as (pointer vif-tag) v1-11) 6) (new 'static 'vif-tag :cmd (vif-cmd stmod))) + (set! (-> (the-as (pointer vif-tag) v1-11) 7) (new 'static 'vif-tag :imm #x404 :cmd (vif-cmd stcycl))) + (set! (-> arg0 base) (&+ (the-as pointer v1-11) 32)) + ) + 0 + ) + 0 + (none) + ) + +;; definition for function tie-end-buffer +;; INFO: Return type mismatch int vs none. +(defun tie-end-buffer ((arg0 dma-buffer)) + (when (logtest? *vu1-enable-user* (vu1-renderer-mask tie)) + (let* ((v1-3 arg0) + (a1-0 (the-as object (-> v1-3 base))) + ) + (set! (-> (the-as dma-packet a1-0) dma) (new 'static 'dma-tag :qwc #x2 :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a1-0) vif0) (new 'static 'vif-tag)) + (set! (-> (the-as dma-packet a1-0) vif1) (new 'static 'vif-tag :imm #x2 :cmd (vif-cmd direct) :msk #x1)) + (set! (-> v1-3 base) (&+ (the-as pointer a1-0) 16)) + ) + (let* ((v1-4 arg0) + (a1-2 (the-as object (-> v1-4 base))) + ) + (set! (-> (the-as gs-gif-tag a1-2) tag) (new 'static 'gif-tag64 :nloop #x1 :eop #x1 :nreg #x1)) + (set! (-> (the-as gs-gif-tag a1-2) regs) + (new 'static 'gif-tag-regs + :regs0 (gif-reg-id a+d) + :regs1 (gif-reg-id a+d) + :regs2 (gif-reg-id a+d) + :regs3 (gif-reg-id a+d) + :regs4 (gif-reg-id a+d) + :regs5 (gif-reg-id a+d) + :regs6 (gif-reg-id a+d) + :regs7 (gif-reg-id a+d) + :regs8 (gif-reg-id a+d) + :regs9 (gif-reg-id a+d) + :regs10 (gif-reg-id a+d) + :regs11 (gif-reg-id a+d) + :regs12 (gif-reg-id a+d) + :regs13 (gif-reg-id a+d) + :regs14 (gif-reg-id a+d) + :regs15 (gif-reg-id a+d) + ) + ) + (set! (-> v1-4 base) (&+ (the-as pointer a1-2) 16)) + ) + (let* ((v1-5 arg0) + (a1-4 (-> v1-5 base)) + ) + (set! (-> (the-as (pointer gs-test) a1-4) 0) + (new 'static 'gs-test :atst (gs-atest not-equal) :zte #x1 :ztst (gs-ztest greater-equal)) + ) + (set! (-> (the-as (pointer gs-reg64) a1-4) 1) (gs-reg64 test-1)) + (set! (-> v1-5 base) (&+ a1-4 16)) + ) + (let* ((v1-6 arg0) + (a1-6 (the-as object (-> v1-6 base))) + ) + (set! (-> (the-as dma-packet a1-6) dma) (new 'static 'dma-tag :qwc #x2 :id (dma-tag-id cnt))) + (set! (-> (the-as dma-packet a1-6) vif0) (new 'static 'vif-tag :cmd (vif-cmd stmask))) + (set! (-> (the-as dma-packet a1-6) vif1) (new 'static 'vif-tag)) + (set! (-> v1-6 base) (&+ (the-as pointer a1-6) 16)) + ) + (let* ((v1-7 arg0) + (a0-1 (-> v1-7 base)) + ) + (set! (-> (the-as (pointer vif-tag) a0-1) 0) (new 'static 'vif-tag :imm #x4 :cmd (vif-cmd mscalf) :msk #x1)) + (set! (-> (the-as (pointer vif-tag) a0-1) 1) (new 'static 'vif-tag :cmd (vif-cmd stmod))) + (set! (-> (the-as (pointer vif-tag) a0-1) 2) (new 'static 'vif-tag :cmd (vif-cmd flusha) :msk #x1)) + (set! (-> (the-as (pointer vif-tag) a0-1) 3) (new 'static 'vif-tag :cmd (vif-cmd strow) :msk #x1)) + (set! (-> (the-as (pointer vif-tag) a0-1) 4) (new 'static 'vif-tag)) + (set! (-> (the-as (pointer vif-tag) a0-1) 5) (new 'static 'vif-tag)) + (set! (-> (the-as (pointer vif-tag) a0-1) 6) (new 'static 'vif-tag)) + (set! (-> (the-as (pointer vif-tag) a0-1) 7) (new 'static 'vif-tag)) + (set! (-> v1-7 base) (&+ a0-1 32)) + ) + 0 + ) + 0 + (none) + ) + +;; definition (debug) for function tie-int-reg +(defun-debug tie-int-reg ((arg0 int)) + (let ((v1-0 arg0)) + (cond + ((zero? v1-0) + "zero" + ) + ((= v1-0 1) + "itemp" + ) + ((= v1-0 2) + "point-ptr" + ) + ((= v1-0 3) + "clr-ptr" + ) + ((= v1-0 4) + "target-bp1-ptr" + ) + ((= v1-0 5) + "skip-bp2" + ) + ((= v1-0 6) + "target-bp2-ptr" + ) + ((= v1-0 7) + "target-ip1-ptr" + ) + ((= v1-0 8) + "target-ip2-ptr" + ) + ((= v1-0 9) + "ind/ind0" + ) + ((= v1-0 10) + " ind1" + ) + ((= v1-0 11) + " ind2" + ) + ((= v1-0 12) + "dest-ptr" + ) + ((= v1-0 13) + "dest2-ptr" + ) + ((= v1-0 14) + "skip-ips" + ) + ((= v1-0 15) + "kick-addr" + ) + ) + ) + ) + +;; definition (debug) for function tie-float-reg +(defun-debug tie-float-reg ((arg0 int)) + (let ((v1-0 arg0)) + (cond + ((zero? v1-0) + "zero" + ) + ((= v1-0 1) + "t-mtx0" + ) + ((= v1-0 2) + "t-mtx1" + ) + ((= v1-0 3) + "t-mtx2" + ) + ((= v1-0 4) + "t-mtx3" + ) + ((= v1-0 5) + "vtx-0" + ) + ((= v1-0 6) + "vtx-1" + ) + ((= v1-0 7) + "vtx-2" + ) + ((= v1-0 8) + "vtx-3" + ) + ((= v1-0 9) + "pos-0/2" + ) + ((= v1-0 10) + "pos-1/3" + ) + ((= v1-0 11) + "clr-0" + ) + ((= v1-0 12) + "clr-1" + ) + ((= v1-0 13) + "clr-2" + ) + ((= v1-0 14) + "clr-3" + ) + ((= v1-0 15) + "tex-0" + ) + ((= v1-0 16) + "tex-1" + ) + ((= v1-0 17) + "tex-2" + ) + ((= v1-0 18) + "tex-3" + ) + ((= v1-0 19) + "res-0/2" + ) + ((= v1-0 20) + "res-1/3" + ) + ((= v1-0 21) + "gifbuf" + ) + ((= v1-0 22) + "clrbuf" + ) + ((= v1-0 23) + "extra" + ) + ((= v1-0 24) + "inds" + ) + ((= v1-0 25) + "--" + ) + ((= v1-0 26) + "--" + ) + ((= v1-0 27) + "morph" + ) + ((= v1-0 28) + "xyzofs" + ) + ((= v1-0 29) + "clr1" + ) + ((= v1-0 30) + "clr2" + ) + ((= v1-0 31) + "--" + ) + ) + ) + ) + +;; definition (debug) for function tie-ints +;; INFO: Return type mismatch symbol vs none. +;; Used lq/sq +(defun-debug tie-ints () + (local-vars (sv-16 uint)) + (let ((gp-0 (the-as (pointer uint32) (+ #x3fa0 #x1100c000)))) + (dotimes (s5-0 16) + (if (< s5-0 10) + (format 0 " ") + ) + (let ((s4-0 format) + (s3-0 0) + (s2-0 "vi~d: ~6d #x~4,'0X ~s~%") + (s1-0 s5-0) + (s0-0 (-> gp-0 (* s5-0 4))) + ) + (set! sv-16 (-> gp-0 (* s5-0 4))) + (let ((t1-0 (tie-int-reg s5-0))) + (s4-0 s3-0 s2-0 s1-0 s0-0 sv-16 t1-0) + ) + ) + ) + ) + (none) + ) + +;; definition (debug) for function tie-floats +;; INFO: Return type mismatch symbol vs none. +;; Used lq/sq +(defun-debug tie-floats () + (local-vars (sv-16 uint) (sv-32 uint)) + (let ((gp-0 (the-as (pointer uint32) (+ #x3da0 #x1100c000)))) + (dotimes (s5-0 32) + (if (< s5-0 10) + (format 0 " ") + ) + (format + 0 + "vf~d: #x~8,'0X #x~8,'0X #x~8,'0X #x~8,'0X " + s5-0 + (-> gp-0 (* s5-0 4)) + (-> gp-0 (+ (* s5-0 4) 1)) + (-> gp-0 (+ (* s5-0 4) 2)) + (-> gp-0 (+ (* s5-0 4) 3)) + ) + (let ((s4-0 format) + (s3-0 0) + (s2-0 "~F ~F ~F ~F ~s~%") + (s1-0 (-> gp-0 (* s5-0 4))) + (s0-0 (-> gp-0 (+ (* s5-0 4) 1))) + ) + (set! sv-16 (-> gp-0 (+ (* s5-0 4) 2))) + (set! sv-32 (-> gp-0 (+ (* s5-0 4) 3))) + (let ((t2-1 (tie-float-reg s5-0))) + (s4-0 s3-0 s2-0 s1-0 s0-0 sv-16 sv-32 t2-1) + ) + ) + ) + ) + (none) + ) + + + + diff --git a/test/decompiler/reference/engine/level/level_REF.gc b/test/decompiler/reference/engine/level/level_REF.gc index 9b2304b300..4cd0c8d809 100644 --- a/test/decompiler/reference/engine/level/level_REF.gc +++ b/test/decompiler/reference/engine/level/level_REF.gc @@ -577,7 +577,7 @@ (set! sv-16 (-> s1-2 array-data (the-as uint current-login-pos))) (set! sv-32 0) (while (< sv-32 4) - (let ((a0-28 (-> sv-16 geometry sv-32))) + (let ((a0-28 (-> sv-16 geometry-override sv-32))) (if (nonzero? a0-28) (login a0-28) ) diff --git a/test/offline/config.jsonc b/test/offline/config.jsonc index fd8018db7c..3293f39957 100644 --- a/test/offline/config.jsonc +++ b/test/offline/config.jsonc @@ -186,7 +186,18 @@ "command-get-process", // handle casts // sage-finalboss - "(method 7 sage-finalboss)" // inline-array stuff + "(method 7 sage-finalboss)", // inline-array stuff + + // appears twice + "(method 9 drawable-tree-instance-tie)", + "(method 11 drawable-tree-instance-tie)", + "(method 12 drawable-tree-instance-tie)", + "(method 13 drawable-tree-instance-tie)", + + // not in use in PC port + "tie-near-init-engine", + "tie-near-end-buffer" + ], "skip_compile_states": { diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index dfcb45849e..bde3973898 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -12,4 +12,3 @@ add_executable(memory_dump_tool MemoryDumpTool/main.cpp) target_link_libraries(memory_dump_tool common decomp elzip) -install(TARGETS dgo_unpacker dgo_packer) diff --git a/tools/MemoryDumpTool/main.cpp b/tools/MemoryDumpTool/main.cpp index ba518dfb3c..f7ad61fce2 100644 --- a/tools/MemoryDumpTool/main.cpp +++ b/tools/MemoryDumpTool/main.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include "third-party/fmt/core.h" #include "third-party/11zip/include/elzip/elzip.hpp" #include "third-party/json.hpp"