

// The actual "tfrag" type. This only lives on the EE and gets converted to DMA data.
// some of the DMA data is pointers to chains that live in the static level data.
// other is colors that are looked up from the palette (on the EE!) then thrown in the
// double-buffered frame global buffer.

(deftype tfragment (drawable)
  (
   (color-index       uint16                       :offset 6)
   (debug-data        tfragment-debug-data         :offset 8)
   (color-indices     uint32                       :offset 12)
   (colors            uint32                       :offset 12)
   (dma-chain         uint32              3       :offset-assert 32)
   (dma-common        uint32                       :offset 32)
   (dma-level-0       uint32                       :offset 32)
   (dma-base          uint32                       :offset 36)
   (dma-level-1       uint32                       :offset 40)
   (dma-qwc           uint8              4       :offset 44)
   (shader            (inline-array adgif-shader)                       :offset 48)
   (num-shaders       uint8                        :offset 52)
   (num-base-colors   uint8                        :offset 53)
   (num-level0-colors uint8                        :offset 54)
   (num-level1-colors uint8                        :offset 55)
   (color-offset      uint8                        :offset 56)
   (color-count       uint8                        :offset 57)
   (pad0              uint8                        :offset 58)
   (pad1              uint8                        :offset 59)
   (generic           generic-tfragment            :offset-assert 60)
   (generic-u32       uint32            :offset 60) ;; added
   )
  :method-count-assert 18
  :size-assert         #x40
  :flag-assert         #x1200000040
  )

// This is the temp/debug structure used for the EE code

(deftype tfrag-work (structure)
  ((base-tmpl             dma-packet :inline :offset-assert 0)
   (level-0-tmpl          dma-packet :inline :offset-assert 16)
   (common-tmpl           dma-packet :inline :offset-assert 32)
   (level-1-tmpl          dma-packet :inline :offset-assert 48)
   (color-tmpl            dma-packet :inline :offset-assert 64)
   (frag-dists            vector     :inline :offset-assert 80)
   (max-dist              vector     :inline :offset-assert 96)
   (min-dist              vector     :inline :offset-assert 112)
   (color-ptr             vector4w   :inline :offset-assert 128)
   (tr-stat-tfrag         tr-stat            :offset-assert 144)
   (tr-stat-tfrag-near    tr-stat            :offset-assert 148)
   (vu1-enable-tfrag      int32              :offset-assert 152)
   (vu1-enable-tfrag-near int32              :offset-assert 156)
   (cur-vis-bits          uint32             :offset-assert 160)
   (end-vis-bits          uint32             :offset-assert 164)
   (src-ptr               uint32             :offset-assert 168)
   (last-call             uint32             :offset-assert 172)
   (dma-buffer            basic              :offset-assert 176)
   (test-id               uint32             :offset-assert 180)
   (wait-from-spr         uint32             :offset-assert 184)
   (wait-to-spr           uint32             :offset-assert 188)
   (near-wait-from-spr    uint32             :offset-assert 192)
   (near-wait-to-spr      uint32             :offset-assert 196)
   )
  :method-count-assert 9
  :size-assert         #xc8
  :flag-assert         #x9000000c8
  )
 */

// base vifs:
// ??
// t3

// l0:
// ??
// t3

// common:
// ??
// t3

// color
// ??
// 12 sb color-offset
// 14 sb num colors, 4 aligned.

/*
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; .function draw-inline-array-tfrag
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; there's two double-buffered spad buffers + culling data

   ;; arguments:
   ;; a0 - occlusion cull list (on spad)
   ;; a1 - tfrags
   ;; a2 - num tfrags
   ;; a3 - dma buf

   ;; constants:
   ;; t0 = *tfrag-work*
   ;; t1 = SPR FROM
   ;; t2 = 0x14000000 ??
   ;; t4 = SPR TO

   ;; vars:
   ;; v1 = ptr to dma buffer data
   ;; t5 = SPR BUFFER 0 (tfrags)
   ;; a3 = SPR BUFFER 1
   ;; t3 = ?? (init to 0)
   ;; t6 = spr buffer 1 use (qwc)

   ;; vf3 = frag-dists
   ;; vf1 = (1, 1, 1, 1)
   ;; vf2 = bsphere
B0:
L40:
TFRAG INITIALIZE
 ;; set up constants
    daddiu sp, sp, -128
    sd ra, 0(sp)
    sq s0, 16(sp)
    sq s1, 32(sp)
    sq s2, 48(sp)
    sq s3, 64(sp)
    sq s4, 80(sp)
    sq s5, 96(sp)
    sq gp, 112(sp)
    lui t2, 5120     = (0x14000000), a constant (mscal)
    lw v1, 4(a3)     (-> dma-buf base)
    lui t3, 4096     = (0x10000000)
    lui t1, 4096     = (0x10000000)
    sync.l
    cache dxwbin v1, 0
    sync.l
    cache dxwbin v1, 1
    sync.l
    lw t0, *tfrag-work*(s7)
    ori t4, t3, 54272           = (0x1000D400) SPR TO
    ori t1, t1, 53248           = (0x1000D000) SPR FROM
    lui t5, 28672               = (0x70000000)
    lqc2 vf3, 80(t0)            = (-> *tfrag-work* frag-dists)
    sw a3, 176(t0)              (set! (-> *tfrag-work* dma-buffer) dma-ptr)
    ori a3, t5, 2064            setup buffer 1
    addiu t3, r0, 0             t3 = 0
    ori t5, t5, 1040            setup buffer 0
    vmaxw.xyzw vf1, vf0, vf0    vf1 = (1, 1, 1, 1)
    lh t7, 0(a0)                vis cull load
    lqc2 vf4, 96(t0)            max-dist
    addiu a1, a1, -4            remove basic offset
    addiu t6, r0, 0             t6 = 0
    or ra, a3, r0               ra = SPAD BUFFER 1


SKIP TO FIRST VISIBLE
 ;; skips ahead until we find some visible tfrags.
B1:
L41:
    bne t7, r0, L42
    sll r0, r0, 0

B2:
    addiu a0, a0, 2     ;; + 16 bits in the vis list
    addiu a1, a1, 1024  ;; 16 * 0x40 = 1024 bytes in tfrag list
    daddiu a2, a2, -16  ;; num tfrags -= 16
    lh t7, 0(a0)        ;; next vis
    blez a2, L69_CLEANUP ;; no visible tfrags, abort!
    sll r0, r0, 0

B3:
    beq r0, r0, L41 ;; keep looking
    sll r0, r0, 0

WAIT_FOR_PREV_DMA
 ;; waits for any previously running spad dma transfer to end
B4:
L42:
    lw t7, 0(t4)
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    andi t7, t7, 256
    sll r0, r0, 0
    bne t7, r0, L42
    sll r0, r0, 0

INIT_FIRST_SPAD_TO
 ;; initializes the first scratchpad upload of tfrags
B5:
    sw a1, 16(t4)       ;; madr = a1
    xori t7, t5, 1024   ;; t7 = upload addr of tfrags double buffer
    sw t7, 128(t4)      ;; sadr
    addiu t7, r0, 64    ;; 64 qw = 16 tfrags
    sw t7, 32(t4)       ;; qwc
    addiu t7, r0, 256   ;; go
    sw t7, 0(t4)        ;; go!
    sll r0, r0, 0

TFRAG MAIN LOOP TOP
B6:
L43:
    or gp, a0, r0                     ;; gp = temp addr of vis list
    xori t5, t5, 1024                 ;; toggle to addr of upload tfrags
    daddiu a0, a0, 2                  ;; advance vis list ptr (16 tfrags)
    or t9, a0, r0                     ;; t9 = temp addr of next vis list
    or t8, t5, r0                     ;; t8 = tfrags to use

  ;; next, let's find next block of visible tfrags so we can start it's dma early
    daddiu t7, a2, -16                ;; t7 = tfrags left after this loop
    bgtz t7, L45                      ;; if we have them left, jump
    lh t7, 0(a0)                      ;; and load their vis

B7:
    beq r0, r0, L48                   ;; none left, skip dma kickoff.
    sll r0, r0, 0

B8:
L44:
    daddiu a2, a2, -16                ;; skip invisible block (dec tfrag counter)
    addiu a0, a0, 2                   ;; increment vis list
    blez a2, L48                      ;; did we get to the end of the tfrag list?
    lh t7, 0(a0)                      ;; check vis again.

B9:
    sll r0, r0, 0
    sll r0, r0, 0
B10:
L45:
    beq t7, r0, L44                  ;; we have tfrags left. if 0, they are all hidden, so loop
    addiu a1, a1, 1024               ;; and advance upload pointer (not done at all yet)

 ;; we reach here if we have tfrags left after this block.
 ;; so let's upload the next ones to the scratchpad so they are ready by next time.
B11:
L46:
    lw t7, 0(t4)      ;; make sure to-spr is done
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    andi t7, t7, 256
    sll r0, r0, 0
    beq t7, r0, L47
    sll r0, r0, 0

B12:
    sll r0, r0, 0
    lw t7, 188(t0)
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    daddiu t7, t7, 1 ;; counting how many times we wait
    sll r0, r0, 0
    sw t7, 188(t0)
    beq r0, r0, L46
    sll r0, r0, 0

B13:
L47:
    sw a1, 16(t4)     ;; start the to!
    xori t7, t5, 1024
    sw t7, 128(t4)
    addiu t7, r0, 64
    sw t7, 32(t4)
    addiu t7, r0, 256
    beq r0, r0, L49 ;; skip ahead
    sw t7, 0(t4)

 ;; only reach here if we dont have any more spr to's
 ;; still need to sync the to for the block we're about to process
B14:
L48:
    lw t7, 0(t4)
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    andi t7, t7, 256
    sll r0, r0, 0
    beq t7, r0, L49
    sll r0, r0, 0

B15:
    sll r0, r0, 0
    lw t7, 188(t0)
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    daddiu t7, t7, 1
    sll r0, r0, 0
    sw t7, 188(t0)
    beq r0, r0, L48
    sll r0, r0, 0

 ;; common op start
 ;; at this point:
 ;;  t8 is our spad tfrag buffer, with 16 tfrags. at least 1 is visible.
 ;;  gp is our vis-list pointer
 ;; we run through this loop 2x, each time doing 8 tfrags.

B16:
L49:
    lb t7, 0(gp)      ;; load first 8 frag vis bits
    addiu gp, gp, 1   ;; inc vis bit ptr.
    sll r0, r0, 0
    sw gp, 160(t0)    ;; store cur-vis-bits
    bne t7, r0, L50   ;; are any visible in the first 8?
    sw t9, 164(t0)    ;; set end-vis-bits (why?)

B17: ;; none are visible
    daddiu a2, a2, -8  ;; dec tfrags
    addiu t8, t8, 512  ;; skip tfrags
    beq r0, r0, L65    ;; skip ahead!
    sll r0, r0, 0

B18:
L50:
    addiu t9, r0, 128    ;; vis mask init (gets shifted in each run of the 8-loop)
    lqc2 vf2, 16(t8)     ;; bsphere load
B19:
L51:
    daddiu gp, t6, -124  ;; are we full of stuff in buffer 1?
    sll r0, r0, 0
    blez gp, L54
    sll r0, r0, 0

B20:
L52:
    lw ra, 0(t1)       ;; wait for spr-from
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    andi ra, ra, 256
    sll r0, r0, 0
    beq ra, r0, L53
    sll r0, r0, 0

B21:
    sll r0, r0, 0     ;; count it
    lw ra, 184(t0)
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    daddiu ra, ra, 1
    sll r0, r0, 0
    sw ra, 184(t0)
    beq r0, r0, L52
    sll r0, r0, 0

B22:
L53:
    sw a3, 128(t1)       ;; kick off the next spr-from.
    xori a3, a3, 6144
    sw v1, 16(t1)        ;; to the dma-buf
    sll ra, t6, 4
    addu v1, v1, ra      ;; add qwc
    or ra, a3, r0        ;; ra is the spad-side dma buffer to write to
    sw t6, 32(t1)        ;; qwc
    addiu t6, r0, 256
    sw t6, 0(t1)         ;; go!
    addiu t6, r0, 0      ;; reset use.

 ;; actually building dma.
B23:
L54:
    and gp, t7, t9               ;; vis check
    vmulax.xyzw acc, vf16, vf2   ;; plane ?
    beq gp, r0, L64_8loop_reject ;; vis check failed, reject!
    lwu gp, 36(t8)                      ;; DMA BASE (chain) -------------

B24:
    vmadday.xyzw acc, vf17, vf2 ;; plane
    lbu s5, 45(t8)                     ;; DMA QWC1
    vmaddaz.xyzw acc, vf18, vf2 ;; plane
    sw gp, 4(t0)                       ;; base tmpl set addr
    vmsubaw.xyzw acc, vf19, vf0 ;; plane
    sh s5, 0(t0)                       ;; base tmpl set qwc
    vmaddw.xyzw vf5, vf1, vf2   ;; plane
    lwu gp, 32(t8)                     ;; DMA level0 -------------
    vmulaw.xyzw acc, vf27, vf0  ;; camrot
    lbu s5, 47(t8)                     ;; DMA QWC3
    vmaddax.xyzw acc, vf24, vf2 ;; camrot
    sw gp, 20(t0)                      ;; l0 tmpl set addr
    vmadday.xyzw acc, vf25, vf2 ;; camrot
    sh s5, 16(t0)                      ;; l0 tmpl set qwc
    vmaddaz.xyzw acc, vf26, vf2 ;; camrot
    lwu gp, 32(t8)                     ;; DMA common --------------
    qmfc2.i s5, vf5             ;; plane
    lbu s4, 44(t8)                     ;; DMA QWC0
    vmaddw.xyzw vf6, vf1, vf2   ;; ??
    sw gp, 36(t0)                      ;; common tmpl set addr
    vmsubw.xyzw vf8, vf1, vf2   ;; ??
    sh s4, 32(t0)                      ;; common tmpl set qwc
    pcgtw s5, r0, s5            ;; plane check
    lwu gp, 40(t8)                     ;; DMA level1 -------------
    ppach s5, r0, s5            ;; plane check
    lbu s4, 46(t8)                     ;; DMA QWC2
    vaddz.xyzw vf6, vf3, vf6    ;; dist
    sw gp, 52(t0)                      ;; l1 tmpl addr
    vaddz.xyzw vf7, vf3, vf8    ;; dist
    sw t3, 12(t0)                      ;; !!! set a vif on base, 0 on the first round, at least.
    bne s5, r0, L63_8loop_reject_tog_vis
    sh s4, 48(t0)                      ;; l1 tmpl qwc

B25:
    vmini.xyzw vf4, vf4, vf8 ;; max dist
    sw t3, 28(t0)            ;; !!! set a vif on l0
    sll r0, r0, 0
    lbu s5, 53(t8)           ;; s5 = num-base-colors
    qmfc2.i gp, vf6          ;; dist
    sw t3, 44(t0)            ;; !!! set a vif on common
    qmfc2.i s3, vf7          ;; dist
    lbu s4, 56(t8)           ;; s4 = color-offset
    pcgtw s2, r0, gp         ;; dist
    lw gp, 12(t8)            ;; gp = colors-indices
    pcgtw s3, r0, s3         ;; dist
    sb s4, 76(t0)            ;; store color-offset
    pinteh s4, s2, s3        ;; dist
    lbu s2, 54(t8)           ;; s2 = num-level0-colors
    ppacb s3, r0, s4         ;; dist
    lbu s1, 55(t8)           ;; s1 = num-level1-colors
    beq s3, r0, L56          ;; jump if dist fails?
    dsrl32 s4, s3, 8         ;; s4 is the level or something?

B26:
    beq s2, r0, L56          ;; if we have no level0 colors, use base
    sll r0, r0, 0

B27:
    beq s1, r0, L55          ;; if we have no level1 colors, use level0
    dsrl s5, s3, 16

B28:
    beq s5, r0, L55          ;; possible l1 skip based on lod
    dsrl32 s5, s3, 24

B29:
    bne s5, r0, L64_8loop_reject     ;; possible all skip based on lod.
    addiu s5, s1, 3                  ;; s5 = num-level1-colors + 3

B30:                  ;; level 1 color setup
    sra s4, s5, 2   ;; s4  = (num_color + 3) >> 4
    or s5, s1, r0   ;; s5  = (num_color)
    sll t3, s4, 2   ;; t3 = num colors, 4 aligned
    sh s4, 64(t0)   ;; color-tmpl qwc.
    sll r0, r0, 0
    sb t3, 78(t0)   ;; vif store
    daddiu t6, t6, 3 ;; use 3 qw's of global dma.
    lq s2, 32(t0) ;; load the common-tmpl!
    sll r0, r0, 0
    lq s1, 48(t0) ;; load the l1 tmpl!
    sll r0, r0, 0
    lq t3, 64(t0) ;; load the color tmpl!
    sq s2, 0(ra)  ;; store the common!
    sll r0, r0, 0
    sq s1, 16(ra) ;; store the l1!
    dsrl32 s2, s3, 16
    sq t3, 32(ra) ;; store the color
    daddiu ra, ra, 48 ;; advance the dma buffer pointer
    bne s2, r0, L57
    ori t3, t2, 18    ;; is this.. program 18?

B31:
    dsrl32 t3, s3, 8
    sll r0, r0, 0
    bne t3, r0, L57
    ori t3, t2, 16

B32:
    beq r0, r0, L57
    ori t3, t2, 14

B33:
L55:
    bne s4, r0, L64_8loop_reject
    addiu s5, s2, 3 ;; l0 colors + 3

B34:  l0 color setup
    sra s4, s5, 2 ;;  >> 2
    or s5, s2, r0 ;; s5 = qwc
    sll t3, s4, 2 ;; << 2
    sh s4, 64(t0) ;; color-tmpl qwc
    sll r0, r0, 0
    sb t3, 78(t0)  ;; vif store for unpack??
    daddiu t6, t6, 2 ;; only 2 qw's
    lq s2, 16(t0) ;; l0 tmpl
    sll r0, r0, 0
    lq t3, 64(t0) ;; color tmp
    sq s2, 0(ra)
    dsrl s3, s3, 8
    sq t3, 16(ra)
    daddiu ra, ra, 32
    bne s3, r0, L57
    ori t3, t2, 10

B35:
    beq r0, r0, L57
    ori t3, t2, 8

B36:
L56:
    bne s4, r0, L64_8loop_reject
    addiu s4, s5, 3 ;; base colors + 3

B37:
    sra s4, s4, 2
    sll r0, r0, 0
    sll t3, s4, 2
    sh s4, 64(t0)
    sll r0, r0, 0
    sb t3, 78(t0)
    ori t3, t2, 6
    lq s3, 0(t0) ;; base
    daddiu t6, t6, 2
    lq s2, 64(t0)  ;; color
    sq s3, 0(ra)
    sll r0, r0, 0
    sq s2, 16(ra)
    daddiu ra, ra, 32

;; END of the color setup
B38:
L57: ;; another opportunity to do some spad swappin
    addiu s3, r0, 127 ;; s3 = 127
    daddu s2, t6, s4  ;; s2 = dma-use + color qwc
    dsubu s3, s3, s2
    sll r0, r0, 0
    bgez s3, L60
    sll r0, r0, 0

B39:
L58:
    lw ra, 0(t1)
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    andi ra, ra, 256
    sll r0, r0, 0
    beq ra, r0, L59
    sll r0, r0, 0

B40:
    sll r0, r0, 0
    lw ra, 184(t0)
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    daddiu ra, ra, 1
    sll r0, r0, 0
    sw ra, 184(t0)
    beq r0, r0, L58
    sll r0, r0, 0

B41:
L59:
    sw a3, 128(t1)
    xori a3, a3, 6144
    sw v1, 16(t1)
    sll ra, t6, 4
    addu v1, v1, ra
    or ra, a3, r0
    sw t6, 32(t1)
    addiu t6, r0, 256
    sw t6, 0(t1)
    addiu t6, r0, 0
B42:

L60:
    daddu t6, t6, s4      ;; add color imm's to dma buffer length
    sw t8, 168(t0)        ;; back up tfrag... not enough regs
    ld s4, 0(gp)          ;; load color-indices (u64 = u16 x 4)
    daddiu t8, gp, 8      ;; inc colors ptr
    daddiu gp, s5, -4     ;; gp is color counter. we're using the rounded up to 4 color count.
    lq s5, 128(t0)        ;; color-ptr x4
    pextlh s4, r0, s4     ;; expand packed u16's to u32's
    mfc1 r0, f31          ;; nop
    paddw s2, s4, s5      ;; add to color pointers
    mfc1 r0, f31
    lw s4, 0(s2)          ;; s4 = colors[0]
    dsra32 s3, s2, 0
    lw s3, 0(s3)          ;; s5 = colors[1]
    pcpyud s1, s2, s2
    lw s2, 0(s1)          ;; s2 = colors[2]
    dsra32 s1, s1, 0
    blez gp, L62
    lw s1, 0(s1)          ;; s1 = colors[3]

B43:
L61:
    ld s0, 0(t8)
    daddiu ra, ra, 16
    daddiu t8, t8, 8
    sw s4, -16(ra)
    daddiu gp, gp, -4
    sw s3, -12(ra)
    pextlh s4, r0, s0
    sw s2, -8(ra)
    paddw s2, s4, s5
    sw s1, -4(ra)
    lw s4, 0(s2)
    dsra32 s3, s2, 0
    lw s3, 0(s3)
    pcpyud s1, s2, s2
    lw s2, 0(s1)
    dsra32 s1, s1, 0
    bgtz gp, L61
    lw s1, 0(s1)

B44:
L62:
    daddiu ra, ra, 16
    lw t8, 168(t0)
    sll r0, r0, 0
    sw s4, -16(ra)
    sll r0, r0, 0
    sw s3, -12(ra)
    sll r0, r0, 0
    sw s2, -8(ra)
    sll r0, r0, 0
    sw s1, -4(ra)
B45:
L63_8loop_reject_tog_vis:
    xor t7, t7, t9 ;; update vis
    sll r0, r0, 0
B46:
L64_8loop_reject:
    daddiu t8, t8, 64
    srl t9, t9, 1
    addiu a2, a2, -1
    sll r0, r0, 0
    bne t9, r0, L51
    lqc2 vf2, 16(t8)

B47:
L65:
    sll r0, r0, 0
    lw gp, 160(t0)
    sll r0, r0, 0
    lw t9, 164(t0)
    bne gp, t9, L49
    sb t7, -1(gp)

B48:
    bgtz a2, L43_MAIN_LOOP_TOP
    sll r0, r0, 0

B49:
    beq t6, r0, L68
    sll r0, r0, 0

B50:
L66:
    lw a0, 0(t1)
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    andi a0, a0, 256
    sll r0, r0, 0
    beq a0, r0, L67
    sll r0, r0, 0

B51:
    sll r0, r0, 0
    lw a0, 184(t0)
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    daddiu a0, a0, 1
    sll r0, r0, 0
    sw a0, 184(t0)
    beq r0, r0, L66
    sll r0, r0, 0

B52:
L67:
    sw a3, 128(t1)
    xori a0, a3, 6144
    sw v1, 16(t1)
    sll a1, t6, 4
    addu v1, v1, a1
    or a0, a0, r0
    sw t6, 32(t1)
    addiu a0, r0, 256
    sw a0, 0(t1)
    addiu a0, r0, 0
B53:
L68:
    lw a0, 0(t1)
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    andi a0, a0, 256
    sll r0, r0, 0
    beq a0, r0, L69_CLEANUP
    sll r0, r0, 0

B54:
    sll r0, r0, 0
    lw a0, 184(t0)
    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0
    daddiu a0, a0, 1
    sll r0, r0, 0
    sw a0, 184(t0)
    beq r0, r0, L68
    sll r0, r0, 0

B55:
L69_CLEANUP:
    lw a0, 176(t0)
    sll r0, r0, 0
    sw t3, 172(t0)
    sll r0, r0, 0
    sqc2 vf4, 112(t0)
    sll r0, r0, 0
    sw v1, 4(a0)
    sll r0, r0, 0
    or v0, r0, r0
    ld ra, 0(sp)
    lq gp, 112(sp)
    lq s5, 96(sp)
    lq s4, 80(sp)
    lq s3, 64(sp)
    lq s2, 48(sp)
    lq s1, 32(sp)
    lq s0, 16(sp)
    jr ra
    daddiu sp, sp, 128

    sll r0, r0, 0
    sll r0, r0, 0
    sll r0, r0, 0


    Notes on the VU program
  vi03 is a pointer to an "address book" - a sequence of addresses
  vi02 contains addresses in this book
  from these xyw are loaded for vf28 (v3-32, with 2, 1)
    xy are floats. w is address of next vertex data.

  vi08 is a pointer to adgifs?
  vi09 is a pointer to some data like [vi12, ?, ?, vi13] ??

  vi12 counter, started negative?
  vi13 is adgif offset?

  vi04 is a pointer to tri-data:
    - vertex (w = 128.0?)
    - ?? (vf20)