jak-project/game/graphics/opengl_renderer/ocean/OceanTexture_PC.cpp

#include "OceanTexture.h"

void OceanTexture::run_L1_PC() {
  //  L1:
  //  lq.xyzw vf14_startx, 988(vi00)    |  maxw.xyzw vf01_ones, vf00, vf00
  vu.startx = Vf(m_texture_constants.start);
  //  lq.xyzw vf02_offset, 989(vi00)
  //  lq.xyzw vf03_tbuf, 986(vi00)
  //  lq.xyzw vf04_dbuf, 987(vi00)
  //  lq.xyzw vf05_giftag, 985(vi00)
  //  lq.xyzw vf06_cam_nrm, 991(vi00)
  //  lq.xyzw vf07_constants, 990(vi00)
  //  iaddiu vi11_0x80, vi00, 0x80
  //  mtir vi08_tptr, vf03_tbuf.x
  vu.tptr = get_tbuf();
  //  mtir vi09_tbase, vf03_tbuf.x
  vu.tbase = get_tbuf();
  //  mr32.xyzw vf03_tbuf, vf03_tbuf
  swap_tbuf();
  //  xtop vi05_in_ptr
  vu.in_ptr = swap_vu_upload_buffers();

  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC();

  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC();

  //  mtir vi03_dbuf_read_a, vf04_dbuf.x
  vu.dbuf_read_a = get_dbuf();
  //  bal vi12_ra, L5
  //  mtir vi04_dbuf_read_b, vf04_dbuf.y
  vu.dbuf_read_b = get_dbuf_other();
  run_L5_PC();

  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC();

  //  mtir vi03_dbuf_read_a, vf04_dbuf.x
  vu.dbuf_read_a = get_dbuf();
  //  bal vi12_ra, L5
  //  mtir vi04_dbuf_read_b, vf04_dbuf.y
  vu.dbuf_read_b = get_dbuf_other();
  run_L5_PC();

  //  nop                     :e
  //  nop
}

void OceanTexture::run_L2_PC() {
  //  L2:
  //  xtop vi05_in_ptr
  vu.in_ptr = swap_vu_upload_buffers();
  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC();

  //  mtir vi03_dbuf_read_a, vf04_dbuf.x
  vu.dbuf_read_a = get_dbuf();
  //  bal vi12_ra, L5
  //  mtir vi04_dbuf_read_b, vf04_dbuf.y
  vu.dbuf_read_b = get_dbuf_other();
  run_L5_PC();

  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC();

  //  mtir vi03_dbuf_read_a, vf04_dbuf.x
  vu.dbuf_read_a = get_dbuf();
  //  bal vi12_ra, L5
  //  mtir vi04_dbuf_read_b, vf04_dbuf.y
  vu.dbuf_read_b = get_dbuf_other();
  run_L5_PC();

  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC();

  //  mtir vi03_dbuf_read_a, vf04_dbuf.x
  vu.dbuf_read_a = get_dbuf();
  //  bal vi12_ra, L5
  //  mtir vi04_dbuf_read_b, vf04_dbuf.y
  vu.dbuf_read_b = get_dbuf_other();
  run_L5_PC();

  //  nop                     :e
  //  nop
}

namespace {
void lq_buffer(Mask mask, Vf& dest, Vf* src) {
  for (int i = 0; i < 4; i++) {
    if ((u64)mask & (1 << i)) {
      dest.data[i] = src->data[i];
    }
  }
}

void sq_buffer(Mask mask, const Vf& src, Vf* dest) {
  for (int i = 0; i < 4; i++) {
    if ((u64)mask & (1 << i)) {
      dest->data[i] = src.data[i];
    }
  }
}

void sq_buffer_giftag(const u8* src, Vf* dest) {
  memcpy(dest, src, 16);
}
}  // namespace

void OceanTexture::run_L3_PC() {
  Vf base_pos;  // vf15
  u16 loop_idx;

  Vf vtx0;  // vf16
  Vf vtx1;  // vf17
  Vf vtx2;  // vf18
  Vf vtx3;  // vf19

  Vf res0;  // vf20
  Vf res1;  // vf21
  Vf res2;  // vf22
  Vf res3;  // vf23

  Vf nrm0;  // vf24
  Vf nrm1;  // vf25
  Vf nrm2;  // vf26

  Vf reflect;  // vf27

  Vf cout0;  // vf28
  Vf cout1;  // vf29
  Vf cout2;  // vf30
  Vf cout3;  // vf31

  Accumulator acc;
  const Vf ones(1, 1, 1, 1);
  const Vf vf00(0, 0, 0, 1);
  const u16 vi11 = 0x80;
  bool bc;

  // clang-format off
  // L3:
  // ior vi07, vi06, vi00       |  nop                            56
  vu.dbuf_write_base = vu.dbuf_write;
  // move.xyzw vf15, vf14       |  nop                            57
  base_pos.move(Mask::xyzw, vu.startx);
  // iaddi vi01, vi00, 0x8      |  nop                            58
  loop_idx = 8;
  // lq.xyzw vf24, 1(vi05)      |  mulw.xyzw vf20, vf15, vf00     59 (?? what are they doing here)
  res0.mul(Mask::xyzw, base_pos, 1.f);   lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
  // lq.xyzw vf25, 3(vi05)      |  mulw.xyzw vf21, vf15, vf00     60
  res1.mul(Mask::xyzw, base_pos, 1.f);   lq_buffer(Mask::xyzw, nrm1, vu.in_ptr + 3);
  // lq.xyzw vf26, 5(vi05)      |  mulw.xyzw vf22, vf15, vf00     61
  res2.mul(Mask::xyzw, base_pos, 1.f);   lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
  // nop                        |  mulw.xyzw vf23, vf15, vf00     62
  res3.mul(Mask::xyzw, base_pos, 1.f);
  // nop                        |  mulax.xyzw ACC, vf24, vf06     63
  acc.mula(Mask::xyzw, nrm0, m_texture_constants.cam_nrm.x());
  // nop                        |  madday.xyzw ACC, vf25, vf06    64
  acc.madda(Mask::xyzw, nrm1, m_texture_constants.cam_nrm.y());
  // nop                        |  maddz.xyzw vf27, vf26, vf06    65
  acc.madd(Mask::xyzw, reflect, nrm2, m_texture_constants.cam_nrm.z());
  // nop                        |  addx.x vf21, vf21, vf02        66
  res1.add(Mask::x, res1, m_texture_constants.offsets.x());
  // nop                        |  addy.x vf22, vf22, vf02        67
  res2.add(Mask::x, res2, m_texture_constants.offsets.y());
  L4:
  // nop                        |  addz.x vf23, vf23, vf02        68
  res3.add(Mask::x, res3, m_texture_constants.offsets.z());
  // nop                        |  addw.x vf15, vf15, vf02        69
  base_pos.add(Mask::x, base_pos, m_texture_constants.offsets.w());
  // sq.xyzw vf20, 2(vi06)      |  mulx.x vf28, vf01, vf24        70
  cout0.mul(Mask::x, ones, nrm0.x());   sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
  // sq.xyzw vf21, 5(vi06)      |  muly.x vf29, vf01, vf24        71
  cout1.mul(Mask::x, ones, nrm0.y());   sq_buffer(Mask::xyzw, res1, vu.dbuf_write + 5);
  // sq.xyzw vf22, 8(vi06)      |  mulz.x vf30, vf01, vf24        72
  cout2.mul(Mask::x, ones, nrm0.z());   sq_buffer(Mask::xyzw, res2, vu.dbuf_write + 8);
  // sq.xyzw vf23, 11(vi06)     |  mulw.x vf31, vf01, vf24        73
  cout3.mul(Mask::x, ones, nrm0.w());   sq_buffer(Mask::xyzw, res3, vu.dbuf_write + 11);
  // lq.xyzw vf16, 0(vi05)      |  mulx.y vf28, vf01, vf25        74
  cout0.mul(Mask::y, ones, nrm1.x());   lq_buffer(Mask::xyzw, vtx0, vu.in_ptr);
  // lq.xyzw vf17, 2(vi05)      |  muly.y vf29, vf01, vf25        75
  cout1.mul(Mask::y, ones, nrm1.y());   lq_buffer(Mask::xyzw, vtx1, vu.in_ptr + 2);
  // lq.xyzw vf18, 4(vi05)      |  mulz.y vf30, vf01, vf25        76
  cout2.mul(Mask::y, ones, nrm1.z());   lq_buffer(Mask::xyzw, vtx2, vu.in_ptr + 4);
  // lq.xyzw vf19, 6(vi05)      |  mulw.y vf31, vf01, vf25        77
  cout3.mul(Mask::y, ones, nrm1.w());   lq_buffer(Mask::xyzw, vtx3, vu.in_ptr + 6);
  // iaddi vi05, vi05, 0x8      |  mulx.xy vf28, vf28, vf27       78
  cout0.mul(Mask::xy, cout0, reflect.x());   vu.in_ptr = vu.in_ptr + 8;
  // nop                        |  muly.xy vf29, vf29, vf27       79
  cout1.mul(Mask::xy, cout1, reflect.y());
  // nop                        |  mulz.xy vf30, vf30, vf27       80
  cout2.mul(Mask::xy, cout2, reflect.z());
  // nop                        |  mulw.xy vf31, vf31, vf27       81
  cout3.mul(Mask::xy, cout3, reflect.w());
  // nop                        |  mulw.xy vf28, vf28, vf16       82
  cout0.mul(Mask::xy, cout0, vtx0.w());
  // nop                        |  mulw.xy vf29, vf29, vf17       83
  cout1.mul(Mask::xy, cout1, vtx1.w());
  // nop                        |  mulw.xy vf30, vf30, vf18       84
  cout2.mul(Mask::xy, cout2, vtx2.w());
  // nop                        |  mulw.xy vf31, vf31, vf19       85
  cout3.mul(Mask::xy, cout3, vtx3.w());
  // nop                        |  ftoi0.xyzw vf16, vf16          86
  vtx0.ftoi0(Mask::xyzw, vtx0);
  // nop                        |  ftoi0.xyzw vf17, vf17          87
  vtx1.ftoi0(Mask::xyzw, vtx1);
  // nop                        |  ftoi0.xyzw vf18, vf18          88
  vtx2.ftoi0(Mask::xyzw, vtx2);
  // iaddi vi01, vi01, -0x1     |  ftoi0.xyzw vf19, vf19          89
  vtx3.ftoi0(Mask::xyzw, vtx3);   loop_idx = loop_idx + -1;
  // mfir.w vf16, vi11          |  add.xyzw vf28, vf28, vf06      90
  cout0.add(Mask::xyzw, cout0, m_texture_constants.cam_nrm);   vtx0.mfir(Mask::w, vi11);
  // mfir.w vf17, vi11          |  add.xyzw vf29, vf29, vf06      91
  cout1.add(Mask::xyzw, cout1, m_texture_constants.cam_nrm);   vtx1.mfir(Mask::w, vi11);
  // mfir.w vf18, vi11          |  add.xyzw vf30, vf30, vf06      92
  cout2.add(Mask::xyzw, cout2, m_texture_constants.cam_nrm);   vtx2.mfir(Mask::w, vi11);
  // mfir.w vf19, vi11          |  add.xyzw vf31, vf31, vf06      93
  cout3.add(Mask::xyzw, cout3, m_texture_constants.cam_nrm);   vtx3.mfir(Mask::w, vi11);
  // nop                        |  mulx.xyzw vf28, vf28, vf07     94
  cout0.mul(Mask::xyzw, cout0, m_texture_constants.constants.x());
  // nop                        |  mulx.xyzw vf29, vf29, vf07     95
  cout1.mul(Mask::xyzw, cout1, m_texture_constants.constants.x());
  // nop                        |  mulx.xyzw vf30, vf30, vf07     96
  cout2.mul(Mask::xyzw, cout2, m_texture_constants.constants.x());
  // nop                        |  mulx.xyzw vf31, vf31, vf07     97
  cout3.mul(Mask::xyzw, cout3, m_texture_constants.constants.x());
  // nop                        |  addy.xyzw vf28, vf28, vf07     98
  cout0.add(Mask::xyzw, cout0, m_texture_constants.constants.y());
  // nop                        |  addy.xyzw vf29, vf29, vf07     99
  cout1.add(Mask::xyzw, cout1, m_texture_constants.constants.y());
  // nop                        |  addy.xyzw vf30, vf30, vf07     100
  cout2.add(Mask::xyzw, cout2, m_texture_constants.constants.y());
  // nop                        |  addy.xyzw vf31, vf31, vf07     101
  cout3.add(Mask::xyzw, cout3, m_texture_constants.constants.y());
  // sq.xyzw vf16, 1(vi06)      |  sub.zw vf28, vf01, vf00        102
  cout0.sub(Mask::zw, ones, vf00);   sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
  // sq.xyzw vf17, 4(vi06)      |  sub.zw vf29, vf01, vf00        103
  cout1.sub(Mask::zw, ones, vf00);   sq_buffer(Mask::xyzw, vtx1, vu.dbuf_write + 4);
  // sq.xyzw vf18, 7(vi06)      |  sub.zw vf30, vf01, vf00        104
  cout2.sub(Mask::zw, ones, vf00);   sq_buffer(Mask::xyzw, vtx2, vu.dbuf_write + 7);
  // sq.xyzw vf19, 10(vi06)     |  sub.zw vf31, vf01, vf00        105
  cout3.sub(Mask::zw, ones, vf00);   sq_buffer(Mask::xyzw, vtx3, vu.dbuf_write + 10);
  // lq.xyzw vf24, 1(vi05)      |  mulw.xyzw vf20, vf15, vf00     106
  res0.mul(Mask::xyzw, base_pos, 1.f);   lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
  // lq.xyzw vf25, 3(vi05)      |  mulw.xyzw vf21, vf15, vf00     107
  res1.mul(Mask::xyzw, base_pos, 1.f);   lq_buffer(Mask::xyzw, nrm1, vu.in_ptr + 3);
  // lq.xyzw vf26, 5(vi05)      |  mulw.xyzw vf22, vf15, vf00     108
  res2.mul(Mask::xyzw, base_pos, 1.f);   lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
  // sq.xyzw vf28, 0(vi06)      |  mulw.xyzw vf23, vf15, vf00     109
  res3.mul(Mask::xyzw, base_pos, 1.f);   sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);

  // sq.xyzw vf29, 3(vi06)      |  mulax.xyzw ACC, vf24, vf06     110
  acc.mula(Mask::xyzw, nrm0, m_texture_constants.cam_nrm.x());   sq_buffer(Mask::xyzw, cout1, vu.dbuf_write + 3);
  // sq.xyzw vf30, 6(vi06)      |  madday.xyzw ACC, vf25, vf06    111
  acc.madda(Mask::xyzw, nrm1, m_texture_constants.cam_nrm.y());   sq_buffer(Mask::xyzw, cout2, vu.dbuf_write + 6);
  // sq.xyzw vf31, 9(vi06)      |  maddz.xyzw vf27, vf26, vf06    112
  acc.madd(Mask::xyzw, reflect, nrm2, m_texture_constants.cam_nrm.z());   sq_buffer(Mask::xyzw, cout3, vu.dbuf_write + 9);
  // BRANCH!
  // ibgtz vi01, L4             |  addx.x vf21, vf21, vf02        113
  res1.add(Mask::x, res1, m_texture_constants.offsets.x());   bc = ((s16)loop_idx) > 0;
  // iaddi vi06, vi06, 0xc      |  addy.x vf22, vf22, vf02        114
  res2.add(Mask::x, res2, m_texture_constants.offsets.y());   vu.dbuf_write = vu.dbuf_write + 12;
  if (bc) { goto L4; }

  // lq.xyzw vf28, 0(vi07)      |  addx.y vf14, vf14, vf02        115
  vu.startx.add(Mask::y, vu.startx, m_texture_constants.offsets.x());   lq_buffer(Mask::xyzw, cout0, vu.dbuf_write_base);
  // lq.xyzw vf16, 1(vi07)      |  nop                            116
  lq_buffer(Mask::xyzw, vtx0, vu.dbuf_write_base + 1);
  // sq.xyzw vf20, 2(vi06)      |  nop                            117
  sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
  // sq.xyzw vf28, 0(vi06)      |  nop                            118
  sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);
  // jr vi12                    |  nop                            119
  // sq.xyzw vf16, 1(vi06)      |  nop                            120
  sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
  // clang-format on
}

void OceanTexture::run_L5_PC() {
  // clang-format off
  u16 loop_idx;
  Vf res0;
  Vf res1;
  Vf cout0;
  Vf cout1;
  Vf vtx0;
  Vf vtx1;
  bool bc;
  // L5:
  // iaddiu vi01, vi00, 0x21    |  nop                            121
  loop_idx = 0x21; /* 33 */
  // sq.xyzw vf05, 0(vi08)      |  nop                            122
  sq_buffer_giftag(m_texture_constants.giftag, vu.tptr);
  // iaddi vi08, vi08, 0x1      |  nop                            123
  vu.tptr = vu.tptr + 1;
  L6:
  // iaddi vi01, vi01, -0x1     |  nop                            124
  loop_idx = loop_idx + -1;
  // lq.xyzw vf20, 2(vi03)      |  nop                            125
  lq_buffer(Mask::xyzw, res0, vu.dbuf_read_a + 2);
  // lq.xyzw vf21, 2(vi04)      |  nop                            126
  lq_buffer(Mask::xyzw, res1, vu.dbuf_read_b + 2);
  // lq.xyzw vf28, 0(vi03)      |  nop                            127
  lq_buffer(Mask::xyzw, cout0, vu.dbuf_read_a);
  // lq.xyzw vf16, 1(vi03)      |  nop                            128
  lq_buffer(Mask::xyzw, vtx0, vu.dbuf_read_a + 1);
  // lq.xyzw vf29, 0(vi04)      |  ftoi4.xyzw vf20, vf20          129
  res0.ftoi4(Mask::xyzw, res0);   lq_buffer(Mask::xyzw, cout1, vu.dbuf_read_b);
  // lq.xyzw vf17, 1(vi04)      |  ftoi4.xyzw vf21, vf21          130
  res1.ftoi4(Mask::xyzw, res1);   lq_buffer(Mask::xyzw, vtx1, vu.dbuf_read_b + 1);
  // sq.xyzw vf28, 0(vi08)      |  nop                            131
  sq_buffer(Mask::xyzw, cout0, vu.tptr);
  // sq.xyzw vf16, 1(vi08)      |  nop                            132
  sq_buffer(Mask::xyzw, vtx0, vu.tptr + 1);
  // sq.xyzw vf20, 2(vi08)      |  nop                            133
  sq_buffer(Mask::xyzw, res0, vu.tptr + 2);
  // sq.xyzw vf29, 3(vi08)      |  nop                            134
  sq_buffer(Mask::xyzw, cout1, vu.tptr + 3);
  // sq.xyzw vf17, 4(vi08)      |  nop                            135
  sq_buffer(Mask::xyzw, vtx1, vu.tptr + 4);
  // sq.xyzw vf21, 5(vi08)      |  nop                            136
  sq_buffer(Mask::xyzw, res1, vu.tptr + 5);

  // iaddi vi03, vi03, 0x3      |  nop                            137
  vu.dbuf_read_a = vu.dbuf_read_a + 3;
  // iaddi vi04, vi04, 0x3      |  nop                            138
  vu.dbuf_read_b = vu.dbuf_read_b + 3;
  // BRANCH!
  // ibgtz vi01, L6             |  nop                            139
  bc = ((s16)loop_idx) > 0;
  // iaddi vi08, vi08, 0x6      |  nop                            140
  vu.tptr = vu.tptr + 6;
  if (bc) { goto L6; }

  // xgkick vi09                |  nop                            141
  xgkick_PC(vu.tbase);
  // mtir vi08, vf03.x          |  nop                            142
  // vu.tptr = vu.vf03.x_as_u16();
  vu.tptr = get_tbuf();
  // mtir vi09, vf03.x          |  nop                            143
  // vu.vi09 = vu.vf03.x_as_u16();
  vu.tbase = get_tbuf();
  // jr vi12                    |  nop                            144
  // ASSERT(false);
  // mr32.xyzw vf03, vf03       |  nop                            145
  // vu.vf03.mr32(Mask::xyzw, vu.vf03);
  swap_tbuf();
  // clang-format on
}

void OceanTexture::xgkick_PC(Vf* src) {
  // we're going to rely on the data being the exact layout we expect here.
  u32 offset = 16;
  const u8* data = (const u8*)src;

  for (u32 i = 0; i < NUM_VERTS_PER_STRIP; i++) {
    auto& v = m_pc.vertex_dynamic[m_pc.vtx_idx];
    // st
    memcpy(&v.s, data + offset, sizeof(float) * 2);
    // rgbaq
    v.rgba.x() = data[offset + 16];
    v.rgba.y() = data[offset + 20];
    v.rgba.z() = data[offset + 24];
    v.rgba.w() = data[offset + 28];  // we don't actually need it, it's always 0x80

    // xyz2
    offset += 48;
    m_pc.vtx_idx++;
  }
}

void OceanTexture::run_L1_PC_jak2() {
  //  L1:
  //  lq.xyzw vf14_startx, 988(vi00)    |  maxw.xyzw vf01_ones, vf00, vf00
  vu.startx = Vf(m_texture_constants.start);
  //  lq.xyzw vf02_offset, 989(vi00)
  //  lq.xyzw vf03_tbuf, 986(vi00)
  //  lq.xyzw vf04_dbuf, 987(vi00)
  //  lq.xyzw vf05_giftag, 985(vi00)
  //  lq.xyzw vf06_cam_nrm, 991(vi00)
  //  lq.xyzw vf07_constants, 990(vi00)
  //  iaddiu vi11_0x80, vi00, 0x80
  //  mtir vi08_tptr, vf03_tbuf.x
  vu.tptr = get_tbuf();
  //  mtir vi09_tbase, vf03_tbuf.x
  vu.tbase = get_tbuf();
  //  mr32.xyzw vf03_tbuf, vf03_tbuf
  swap_tbuf();
  //  xtop vi05_in_ptr
  vu.in_ptr = swap_vu_upload_buffers();

  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC_jak2();

  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC_jak2();

  //  mtir vi03_dbuf_read_a, vf04_dbuf.x
  vu.dbuf_read_a = get_dbuf();
  //  bal vi12_ra, L5
  //  mtir vi04_dbuf_read_b, vf04_dbuf.y
  vu.dbuf_read_b = get_dbuf_other();
  run_L5_PC();

  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC_jak2();

  //  mtir vi03_dbuf_read_a, vf04_dbuf.x
  vu.dbuf_read_a = get_dbuf();
  //  bal vi12_ra, L5
  //  mtir vi04_dbuf_read_b, vf04_dbuf.y
  vu.dbuf_read_b = get_dbuf_other();
  run_L5_PC();

  //  nop                     :e
  //  nop
}

void OceanTexture::run_L2_PC_jak2() {
  //  L2:
  //  xtop vi05_in_ptr
  vu.in_ptr = swap_vu_upload_buffers();
  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC_jak2();

  //  mtir vi03_dbuf_read_a, vf04_dbuf.x
  vu.dbuf_read_a = get_dbuf();
  //  bal vi12_ra, L5
  //  mtir vi04_dbuf_read_b, vf04_dbuf.y
  vu.dbuf_read_b = get_dbuf_other();
  run_L5_PC();

  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC_jak2();

  //  mtir vi03_dbuf_read_a, vf04_dbuf.x
  vu.dbuf_read_a = get_dbuf();
  //  bal vi12_ra, L5
  //  mtir vi04_dbuf_read_b, vf04_dbuf.y
  vu.dbuf_read_b = get_dbuf_other();
  run_L5_PC();

  //  mtir vi06_dbuf_write, vf04_dbuf.x
  vu.dbuf_write = get_dbuf();
  //  bal vi12_ra, L3
  //  mr32.xyzw vf04_dbuf, vf04_dbuf
  swap_dbuf();
  run_L3_PC_jak2();

  //  mtir vi03_dbuf_read_a, vf04_dbuf.x
  vu.dbuf_read_a = get_dbuf();
  //  bal vi12_ra, L5
  //  mtir vi04_dbuf_read_b, vf04_dbuf.y
  vu.dbuf_read_b = get_dbuf_other();
  run_L5_PC();

  //  nop                     :e
  //  nop
}

void OceanTexture::run_L3_PC_jak2() {
  Vf base_pos;  // vf15
  u16 loop_idx;

  Vf vtx0;  // vf16
  Vf vtx1;  // vf17
  Vf vtx2;  // vf18
  Vf vtx3;  // vf19

  Vf res0;  // vf20
  Vf res1;  // vf21
  Vf res2;  // vf22
  Vf res3;  // vf23

  Vf nrm0;  // vf24
  // Vf nrm1;  // vf25
  Vf nrm2;  // vf26

  // Vf reflect;  // vf27

  Vf cout0;  // vf28
  Vf cout1;  // vf29
  Vf cout2;  // vf30
  Vf cout3;  // vf31

  // Accumulator acc;
  const Vf ones(1, 1, 1, 1);
  const Vf vf00(0, 0, 0, 1);
  // const u16 vi11 = 0x80;
  bool bc;

  // clang-format off
  // L3:
  // ior vi07, vi06, vi00       |  nop                            56
  vu.dbuf_write_base = vu.dbuf_write;
  // move.xyzw vf15, vf14       |  nop                            57
  base_pos.move(Mask::xyzw, vu.startx);
  // iaddi vi01, vi00, 0x8      |  nop                            58
  loop_idx = 8;
  // lq.xyzw vf24, 1(vi05)      |  mulw.xyzw vf20, vf15, vf00     59 (?? what are they doing here)
  res0.mul(Mask::xyzw, base_pos, 1.f);   lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
  // lq.xyzw vf26, 3(vi05)      |  mulw.xyzw vf21, vf15, vf00     60
  res1.mul(Mask::xyzw, base_pos, 1.f);   lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
  // nop                        |  mulw.xyzw vf22, vf15, vf00     61
  lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
  // nop                        |  mulw.xyzw vf23, vf15, vf00     62
  res3.mul(Mask::xyzw, base_pos, 1.f);
  // nop                        |  addx.x vf21, vf21, vf02        63
  res1.add(Mask::x, res1, m_texture_constants.offsets.x());
  // nop                        |  addy.x vf22, vf22, vf02        64
  res2.add(Mask::x, res2, m_texture_constants.offsets.y());
  L4:
  // nop                        |  addz.x vf23, vf23, vf02        65
  res3.add(Mask::x, res3, m_texture_constants.offsets.z());
  // nop                        |  addw.x vf15, vf15, vf02        66
  base_pos.add(Mask::x, base_pos, m_texture_constants.offsets.w());
  // sq.xyzw vf20, 2(vi06)      |  mulx.x vf28, vf01, vf24        67
  cout0.mul(Mask::x, ones, nrm0.x());   sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
  // sq.xyzw vf21, 5(vi06)      |  muly.x vf29, vf01, vf24        68
  cout1.mul(Mask::x, ones, nrm0.y());   sq_buffer(Mask::xyzw, res1, vu.dbuf_write + 5);
  // sq.xyzw vf22, 8(vi06)      |  mulz.x vf30, vf01, vf24        69
  cout2.mul(Mask::x, ones, nrm0.z());   sq_buffer(Mask::xyzw, res2, vu.dbuf_write + 8);
  // sq.xyzw vf23, 11(vi06)     |  mulw.x vf31, vf01, vf24        70
  cout3.mul(Mask::x, ones, nrm0.w());   sq_buffer(Mask::xyzw, res3, vu.dbuf_write + 11);
  // lq.xyzw vf16, 0(vi05)      |  mulx.y vf28, vf01, vf26        71
  cout0.mul(Mask::y, ones, nrm2.x());   lq_buffer(Mask::xyzw, vtx0, vu.in_ptr);
  // lq.xyzw vf17, 2(vi05)      |  muly.y vf29, vf01, vf26        72
  cout1.mul(Mask::y, ones, nrm2.y());   lq_buffer(Mask::xyzw, vtx1, vu.in_ptr + 2);
  // lq.xyzw vf18, 4(vi05)      |  mulz.y vf30, vf01, vf26        73
  cout2.mul(Mask::y, ones, nrm2.z());   lq_buffer(Mask::xyzw, vtx2, vu.in_ptr + 4);
  // lq.xyzw vf19, 6(vi05)      |  mulw.y vf31, vf01, vf26        74
  cout3.mul(Mask::y, ones, nrm2.w());   lq_buffer(Mask::xyzw, vtx3, vu.in_ptr + 6);
  // iaddi vi05, vi05, 0x8      |  ftoi0.xyzw vf16, vf16          75
  vtx0.ftoi0(Mask::xyzw, vtx0);   vu.in_ptr = vu.in_ptr + 8;
  // nop                        |  ftoi0.xyzw vf17, vf17          76
  vtx1.ftoi0(Mask::xyzw, vtx1);
  // nop                        |  ftoi0.xyzw vf18, vf18          77
  vtx2.ftoi0(Mask::xyzw, vtx2);
  // iaddi vi01, vi01, -0x1     |  ftoi0.xyzw vf19, vf19          78
  vtx3.ftoi0(Mask::xyzw, vtx3);   loop_idx = loop_idx + -1;
  // sq.xyzw vf16, 1(vi06)      |  add.xyzw vf28, vf28, vf07      79
  cout0.add(Mask::xyzw, cout0, m_texture_constants.cam_nrm);   sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
  // sq.xyzw vf17, 4(vi06)      |  add.xyzw vf29, vf29, vf07      80
  cout1.add(Mask::xyzw, cout1, m_texture_constants.cam_nrm);   sq_buffer(Mask::xyzw, vtx1, vu.dbuf_write + 4);
  // sq.xyzw vf18, 7(vi06)      |  add.xyzw vf30, vf30, vf07      81
  cout2.add(Mask::xyzw, cout2, m_texture_constants.cam_nrm);   sq_buffer(Mask::xyzw, vtx2, vu.dbuf_write + 7);
  // sq.xyzw vf19, 10(vi06)     |  add.xyzw vf31, vf31, vf07      82
  cout3.add(Mask::xyzw, cout3, m_texture_constants.cam_nrm);   sq_buffer(Mask::xyzw, vtx3, vu.dbuf_write + 10);
  // lq.xyzw vf24, 1(vi05)      |  sub.zw vf28, vf01, vf00        83
  cout0.sub(Mask::zw, ones, vf00); lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
  // lq.xyzw vf26, 5(vi05)      |  sub.zw vf29, vf01, vf00        84
  cout1.sub(Mask::zw, ones, vf00); lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
  // nop                        |  sub.zw vf30, vf01, vf00        85
  cout2.sub(Mask::zw, ones, vf00);
  // nop                        |  sub.zw vf31, vf01, vf00        86
  cout3.sub(Mask::zw, ones, vf00);
  // sq.xyzw vf28, 0(vi06)      |  mulw.xyzw vf20, vf15, vf00     87
  res0.mul(Mask::xyzw, base_pos, 1.f);   sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);
  // sq.xyzw vf29, 3(vi06)      |  mulw.xyzw vf21, vf15, vf00     88
  res1.mul(Mask::xyzw, base_pos, 1.f);   sq_buffer(Mask::xyzw, cout1, vu.dbuf_write + 3);
  // sq.xyzw vf30, 6(vi06)      |  mulw.xyzw vf22, vf15, vf00     89
  res2.mul(Mask::xyzw, base_pos, 1.f);   sq_buffer(Mask::xyzw, cout2, vu.dbuf_write + 6);
  // sq.xyzw vf31, 9(vi06)      |  mulw.xyzw vf23, vf15, vf00     90
  res3.mul(Mask::xyzw, base_pos, 1.f);   sq_buffer(Mask::xyzw, cout3, vu.dbuf_write + 9);
  // BRANCH!
  // ibgtz vi01, L4             |  addx.x vf21, vf21, vf02        91
  res1.add(Mask::x, res1, m_texture_constants.offsets.x());   bc = ((s16)loop_idx) > 0;
  // iaddi vi06, vi06, 0xc      |  addy.x vf22, vf22, vf02        92
  res2.add(Mask::x, res2, m_texture_constants.offsets.y());   vu.dbuf_write = vu.dbuf_write + 12;
  if (bc) { goto L4; }

  // lq.xyzw vf28, 0(vi07)      |  addx.y vf14, vf14, vf02        93
  vu.startx.add(Mask::y, vu.startx, m_texture_constants.offsets.x());   lq_buffer(Mask::xyzw, cout0, vu.dbuf_write_base);
  // lq.xyzw vf16, 1(vi07)      |  nop                            94
  lq_buffer(Mask::xyzw, vtx0, vu.dbuf_write_base + 1);
  // sq.xyzw vf20, 2(vi06)      |  nop                            95
  sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
  // sq.xyzw vf28, 0(vi06)      |  nop                            96
  sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);
  // jr vi12                    |  nop                            97
  // sq.xyzw vf16, 1(vi06)      |  nop                            98
  sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
  // clang-format on
}

void OceanTexture::setup_renderer() {
  m_pc.vtx_idx = 0;
}

void OceanTexture::flush(SharedRenderState* render_state, ScopedProfilerNode& prof) {
  ASSERT(m_pc.vtx_idx == 2112);
  glBindVertexArray(m_pc.vao);
  glBindBuffer(GL_ARRAY_BUFFER, m_pc.dynamic_vertex_buffer);
  glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * NUM_VERTS, m_pc.vertex_dynamic.data(),
               GL_DYNAMIC_DRAW);
  glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_pc.gl_index_buffer);

  render_state->shaders[ShaderId::OCEAN_TEXTURE].activate();

  GsTex0 tex0(m_envmap_adgif.tex0_data);
  auto lookup = render_state->texture_pool->lookup(tex0.tbp0());
  if (!lookup) {
    lookup = render_state->texture_pool->get_placeholder_texture();
  }
  // no decal
  // yes tcc
  glActiveTexture(GL_TEXTURE0);
  glBindTexture(GL_TEXTURE_2D, *lookup);
  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
  glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);

  glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::OCEAN_TEXTURE].id(), "tex_T0"),
              0);

  glDisable(GL_DEPTH_TEST);
  glDisable(GL_BLEND);
  // glDrawArrays(GL_TRIANGLE_STRIP, 0, NUM_VERTS);
  glEnable(GL_PRIMITIVE_RESTART);
  glPrimitiveRestartIndex(UINT32_MAX);
  glDrawElements(GL_TRIANGLE_STRIP, m_pc.index_buffer.size(), GL_UNSIGNED_INT, (void*)0);
  prof.add_draw_call();
  prof.add_tri(NUM_STRIPS * NUM_STRIPS * 2);

  glBindVertexArray(0);
}

void OceanTexture::init_pc() {
  int i = 0;
  m_pc.vertex_positions.resize(NUM_VERTS);
  m_pc.vertex_dynamic.resize(NUM_VERTS);
  m_pc.index_buffer.clear();
  for (u32 strip = 0; strip < NUM_STRIPS; strip++) {
    u32 lo = 64 * strip;
    u32 hi = 64 * (strip + 1);
    for (u32 vert_pair = 0; vert_pair < NUM_VERTS_PER_STRIP / 2; vert_pair++) {
      m_pc.index_buffer.push_back(i);
      auto& v0 = m_pc.vertex_positions[i++];
      v0 = math::Vector2f(vert_pair * 64, lo);
      m_pc.index_buffer.push_back(i);
      auto& v1 = m_pc.vertex_positions[i++];
      v1 = math::Vector2f(vert_pair * 64, hi);
    }
    m_pc.index_buffer.push_back(UINT32_MAX);
  }

  glGenVertexArrays(1, &m_pc.vao);
  glBindVertexArray(m_pc.vao);

  glGenBuffers(1, &m_pc.gl_index_buffer);
  glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_pc.gl_index_buffer);
  glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(u32) * m_pc.index_buffer.size(),
               m_pc.index_buffer.data(), GL_STATIC_DRAW);

  glGenBuffers(1, &m_pc.static_vertex_buffer);
  glBindBuffer(GL_ARRAY_BUFFER, m_pc.static_vertex_buffer);
  glBufferData(GL_ARRAY_BUFFER, sizeof(math::Vector2f) * NUM_VERTS, m_pc.vertex_positions.data(),
               GL_STATIC_DRAW);
  glEnableVertexAttribArray(0);
  glEnableVertexAttribArray(1);
  glEnableVertexAttribArray(2);

  glVertexAttribPointer(0,         // location 0 in the shader
                        2,         // 3 floats per vert
                        GL_FLOAT,  // floats
                        GL_TRUE,   // normalized, ignored,
                        0,         // tightly packed
                        0

  );

  glGenBuffers(1, &m_pc.dynamic_vertex_buffer);
  glBindBuffer(GL_ARRAY_BUFFER, m_pc.dynamic_vertex_buffer);
  glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * NUM_VERTS, nullptr, GL_DYNAMIC_DRAW);
  glVertexAttribPointer(1,                             // location 0 in the shader
                        4,                             // 4 color components
                        GL_UNSIGNED_BYTE,              // floats
                        GL_TRUE,                       // normalized, ignored,
                        sizeof(Vertex),                //
                        (void*)offsetof(Vertex, rgba)  // offset in array (why is this a pointer...)
  );
  glVertexAttribPointer(2,                          // location 0 in the shader
                        2,                          // 2 floats per vert
                        GL_FLOAT,                   // floats
                        GL_FALSE,                   // normalized, ignored,
                        sizeof(Vertex),             //
                        (void*)offsetof(Vertex, s)  // offset in array (why is this a pointer...)
  );
}

void OceanTexture::destroy_pc() {}