Files
jak-project/game/graphics/opengl_renderer/ocean/OceanTexture_PC.cpp
T
water111 71cb1aef6f [merc2] support vertex updates, use this for blerc in jak 1 and jak 2 (#2179)
This PR adds a feature to merc2 to update vertices. This will be needed
to efficient do effects like blerc/ripple/texture scroll. It's enabled
for blerc in jak 1 and jak 2, but with a few disclaimers:
- currently we still use the mips2c blerc implementation, which is slow
and has some "jittering" because of integer precision. When porting to
PC, there was an additional synchronization problem because blerc
overwrites the merc data as its being read by the renderers. I _think_
this wasn't an issue on PS2 because the blerc dma is higher priority
than the VIF1 DMA, but I'm not certain. Either way, I had to add a mutex
for this on PC to avoid very slight flickering/gaps. This isn't ideal
for performance, but still beats generic by a significant amount in
every place I tested. If you see merc taking 2ms to draw, it is likely
because it is stuck waiting on blerc to finish. This will go away once
blerc itself is ported to C++.
- in jak 1, we end up using generic in some cases where we could use
merc. In particular maia in village3 hut. This will be fixed later once
we can use merc in more places. I don't want to mess with the
merc/generic selection logic when we're hopefully going to get rid of it
soon.
- There is no support for ripple or texture scroll. These use generic on
jak 1, and remain broken on jak 2.
- Like with `emerc`, jak 1 has a toggle to go back to the old behavior
`*blerc-hack*`.
- In most cases, toggling this causes no visual differences. One
exception is Gol's teeth. I believe this is caused by texture coordinate
rounding issues, where generic has an additional float -> int -> float
compared to PC merc. It is very hard to notice so I'm not going to worry
about it.
2023-01-31 18:23:39 -05:00

735 lines
30 KiB
C++

#include "OceanTexture.h"
void OceanTexture::run_L1_PC() {
// L1:
// lq.xyzw vf14_startx, 988(vi00) | maxw.xyzw vf01_ones, vf00, vf00
vu.startx = Vf(m_texture_constants.start);
// lq.xyzw vf02_offset, 989(vi00)
// lq.xyzw vf03_tbuf, 986(vi00)
// lq.xyzw vf04_dbuf, 987(vi00)
// lq.xyzw vf05_giftag, 985(vi00)
// lq.xyzw vf06_cam_nrm, 991(vi00)
// lq.xyzw vf07_constants, 990(vi00)
// iaddiu vi11_0x80, vi00, 0x80
// mtir vi08_tptr, vf03_tbuf.x
vu.tptr = get_tbuf();
// mtir vi09_tbase, vf03_tbuf.x
vu.tbase = get_tbuf();
// mr32.xyzw vf03_tbuf, vf03_tbuf
swap_tbuf();
// xtop vi05_in_ptr
vu.in_ptr = swap_vu_upload_buffers();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC();
// mtir vi03_dbuf_read_a, vf04_dbuf.x
vu.dbuf_read_a = get_dbuf();
// bal vi12_ra, L5
// mtir vi04_dbuf_read_b, vf04_dbuf.y
vu.dbuf_read_b = get_dbuf_other();
run_L5_PC();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC();
// mtir vi03_dbuf_read_a, vf04_dbuf.x
vu.dbuf_read_a = get_dbuf();
// bal vi12_ra, L5
// mtir vi04_dbuf_read_b, vf04_dbuf.y
vu.dbuf_read_b = get_dbuf_other();
run_L5_PC();
// nop :e
// nop
}
void OceanTexture::run_L2_PC() {
// L2:
// xtop vi05_in_ptr
vu.in_ptr = swap_vu_upload_buffers();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC();
// mtir vi03_dbuf_read_a, vf04_dbuf.x
vu.dbuf_read_a = get_dbuf();
// bal vi12_ra, L5
// mtir vi04_dbuf_read_b, vf04_dbuf.y
vu.dbuf_read_b = get_dbuf_other();
run_L5_PC();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC();
// mtir vi03_dbuf_read_a, vf04_dbuf.x
vu.dbuf_read_a = get_dbuf();
// bal vi12_ra, L5
// mtir vi04_dbuf_read_b, vf04_dbuf.y
vu.dbuf_read_b = get_dbuf_other();
run_L5_PC();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC();
// mtir vi03_dbuf_read_a, vf04_dbuf.x
vu.dbuf_read_a = get_dbuf();
// bal vi12_ra, L5
// mtir vi04_dbuf_read_b, vf04_dbuf.y
vu.dbuf_read_b = get_dbuf_other();
run_L5_PC();
// nop :e
// nop
}
namespace {
void lq_buffer(Mask mask, Vf& dest, Vf* src) {
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
dest.data[i] = src->data[i];
}
}
}
void sq_buffer(Mask mask, const Vf& src, Vf* dest) {
for (int i = 0; i < 4; i++) {
if ((u64)mask & (1 << i)) {
dest->data[i] = src.data[i];
}
}
}
void sq_buffer_giftag(const u8* src, Vf* dest) {
memcpy(dest, src, 16);
}
} // namespace
void OceanTexture::run_L3_PC() {
Vf base_pos; // vf15
u16 loop_idx;
Vf vtx0; // vf16
Vf vtx1; // vf17
Vf vtx2; // vf18
Vf vtx3; // vf19
Vf res0; // vf20
Vf res1; // vf21
Vf res2; // vf22
Vf res3; // vf23
Vf nrm0; // vf24
Vf nrm1; // vf25
Vf nrm2; // vf26
Vf reflect; // vf27
Vf cout0; // vf28
Vf cout1; // vf29
Vf cout2; // vf30
Vf cout3; // vf31
Accumulator acc;
const Vf ones(1, 1, 1, 1);
const Vf vf00(0, 0, 0, 1);
const u16 vi11 = 0x80;
bool bc;
// clang-format off
// L3:
// ior vi07, vi06, vi00 | nop 56
vu.dbuf_write_base = vu.dbuf_write;
// move.xyzw vf15, vf14 | nop 57
base_pos.move(Mask::xyzw, vu.startx);
// iaddi vi01, vi00, 0x8 | nop 58
loop_idx = 8;
// lq.xyzw vf24, 1(vi05) | mulw.xyzw vf20, vf15, vf00 59 (?? what are they doing here)
res0.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
// lq.xyzw vf25, 3(vi05) | mulw.xyzw vf21, vf15, vf00 60
res1.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm1, vu.in_ptr + 3);
// lq.xyzw vf26, 5(vi05) | mulw.xyzw vf22, vf15, vf00 61
res2.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
// nop | mulw.xyzw vf23, vf15, vf00 62
res3.mul(Mask::xyzw, base_pos, 1.f);
// nop | mulax.xyzw ACC, vf24, vf06 63
acc.mula(Mask::xyzw, nrm0, m_texture_constants.cam_nrm.x());
// nop | madday.xyzw ACC, vf25, vf06 64
acc.madda(Mask::xyzw, nrm1, m_texture_constants.cam_nrm.y());
// nop | maddz.xyzw vf27, vf26, vf06 65
acc.madd(Mask::xyzw, reflect, nrm2, m_texture_constants.cam_nrm.z());
// nop | addx.x vf21, vf21, vf02 66
res1.add(Mask::x, res1, m_texture_constants.offsets.x());
// nop | addy.x vf22, vf22, vf02 67
res2.add(Mask::x, res2, m_texture_constants.offsets.y());
L4:
// nop | addz.x vf23, vf23, vf02 68
res3.add(Mask::x, res3, m_texture_constants.offsets.z());
// nop | addw.x vf15, vf15, vf02 69
base_pos.add(Mask::x, base_pos, m_texture_constants.offsets.w());
// sq.xyzw vf20, 2(vi06) | mulx.x vf28, vf01, vf24 70
cout0.mul(Mask::x, ones, nrm0.x()); sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
// sq.xyzw vf21, 5(vi06) | muly.x vf29, vf01, vf24 71
cout1.mul(Mask::x, ones, nrm0.y()); sq_buffer(Mask::xyzw, res1, vu.dbuf_write + 5);
// sq.xyzw vf22, 8(vi06) | mulz.x vf30, vf01, vf24 72
cout2.mul(Mask::x, ones, nrm0.z()); sq_buffer(Mask::xyzw, res2, vu.dbuf_write + 8);
// sq.xyzw vf23, 11(vi06) | mulw.x vf31, vf01, vf24 73
cout3.mul(Mask::x, ones, nrm0.w()); sq_buffer(Mask::xyzw, res3, vu.dbuf_write + 11);
// lq.xyzw vf16, 0(vi05) | mulx.y vf28, vf01, vf25 74
cout0.mul(Mask::y, ones, nrm1.x()); lq_buffer(Mask::xyzw, vtx0, vu.in_ptr);
// lq.xyzw vf17, 2(vi05) | muly.y vf29, vf01, vf25 75
cout1.mul(Mask::y, ones, nrm1.y()); lq_buffer(Mask::xyzw, vtx1, vu.in_ptr + 2);
// lq.xyzw vf18, 4(vi05) | mulz.y vf30, vf01, vf25 76
cout2.mul(Mask::y, ones, nrm1.z()); lq_buffer(Mask::xyzw, vtx2, vu.in_ptr + 4);
// lq.xyzw vf19, 6(vi05) | mulw.y vf31, vf01, vf25 77
cout3.mul(Mask::y, ones, nrm1.w()); lq_buffer(Mask::xyzw, vtx3, vu.in_ptr + 6);
// iaddi vi05, vi05, 0x8 | mulx.xy vf28, vf28, vf27 78
cout0.mul(Mask::xy, cout0, reflect.x()); vu.in_ptr = vu.in_ptr + 8;
// nop | muly.xy vf29, vf29, vf27 79
cout1.mul(Mask::xy, cout1, reflect.y());
// nop | mulz.xy vf30, vf30, vf27 80
cout2.mul(Mask::xy, cout2, reflect.z());
// nop | mulw.xy vf31, vf31, vf27 81
cout3.mul(Mask::xy, cout3, reflect.w());
// nop | mulw.xy vf28, vf28, vf16 82
cout0.mul(Mask::xy, cout0, vtx0.w());
// nop | mulw.xy vf29, vf29, vf17 83
cout1.mul(Mask::xy, cout1, vtx1.w());
// nop | mulw.xy vf30, vf30, vf18 84
cout2.mul(Mask::xy, cout2, vtx2.w());
// nop | mulw.xy vf31, vf31, vf19 85
cout3.mul(Mask::xy, cout3, vtx3.w());
// nop | ftoi0.xyzw vf16, vf16 86
vtx0.ftoi0(Mask::xyzw, vtx0);
// nop | ftoi0.xyzw vf17, vf17 87
vtx1.ftoi0(Mask::xyzw, vtx1);
// nop | ftoi0.xyzw vf18, vf18 88
vtx2.ftoi0(Mask::xyzw, vtx2);
// iaddi vi01, vi01, -0x1 | ftoi0.xyzw vf19, vf19 89
vtx3.ftoi0(Mask::xyzw, vtx3); loop_idx = loop_idx + -1;
// mfir.w vf16, vi11 | add.xyzw vf28, vf28, vf06 90
cout0.add(Mask::xyzw, cout0, m_texture_constants.cam_nrm); vtx0.mfir(Mask::w, vi11);
// mfir.w vf17, vi11 | add.xyzw vf29, vf29, vf06 91
cout1.add(Mask::xyzw, cout1, m_texture_constants.cam_nrm); vtx1.mfir(Mask::w, vi11);
// mfir.w vf18, vi11 | add.xyzw vf30, vf30, vf06 92
cout2.add(Mask::xyzw, cout2, m_texture_constants.cam_nrm); vtx2.mfir(Mask::w, vi11);
// mfir.w vf19, vi11 | add.xyzw vf31, vf31, vf06 93
cout3.add(Mask::xyzw, cout3, m_texture_constants.cam_nrm); vtx3.mfir(Mask::w, vi11);
// nop | mulx.xyzw vf28, vf28, vf07 94
cout0.mul(Mask::xyzw, cout0, m_texture_constants.constants.x());
// nop | mulx.xyzw vf29, vf29, vf07 95
cout1.mul(Mask::xyzw, cout1, m_texture_constants.constants.x());
// nop | mulx.xyzw vf30, vf30, vf07 96
cout2.mul(Mask::xyzw, cout2, m_texture_constants.constants.x());
// nop | mulx.xyzw vf31, vf31, vf07 97
cout3.mul(Mask::xyzw, cout3, m_texture_constants.constants.x());
// nop | addy.xyzw vf28, vf28, vf07 98
cout0.add(Mask::xyzw, cout0, m_texture_constants.constants.y());
// nop | addy.xyzw vf29, vf29, vf07 99
cout1.add(Mask::xyzw, cout1, m_texture_constants.constants.y());
// nop | addy.xyzw vf30, vf30, vf07 100
cout2.add(Mask::xyzw, cout2, m_texture_constants.constants.y());
// nop | addy.xyzw vf31, vf31, vf07 101
cout3.add(Mask::xyzw, cout3, m_texture_constants.constants.y());
// sq.xyzw vf16, 1(vi06) | sub.zw vf28, vf01, vf00 102
cout0.sub(Mask::zw, ones, vf00); sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
// sq.xyzw vf17, 4(vi06) | sub.zw vf29, vf01, vf00 103
cout1.sub(Mask::zw, ones, vf00); sq_buffer(Mask::xyzw, vtx1, vu.dbuf_write + 4);
// sq.xyzw vf18, 7(vi06) | sub.zw vf30, vf01, vf00 104
cout2.sub(Mask::zw, ones, vf00); sq_buffer(Mask::xyzw, vtx2, vu.dbuf_write + 7);
// sq.xyzw vf19, 10(vi06) | sub.zw vf31, vf01, vf00 105
cout3.sub(Mask::zw, ones, vf00); sq_buffer(Mask::xyzw, vtx3, vu.dbuf_write + 10);
// lq.xyzw vf24, 1(vi05) | mulw.xyzw vf20, vf15, vf00 106
res0.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
// lq.xyzw vf25, 3(vi05) | mulw.xyzw vf21, vf15, vf00 107
res1.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm1, vu.in_ptr + 3);
// lq.xyzw vf26, 5(vi05) | mulw.xyzw vf22, vf15, vf00 108
res2.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
// sq.xyzw vf28, 0(vi06) | mulw.xyzw vf23, vf15, vf00 109
res3.mul(Mask::xyzw, base_pos, 1.f); sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);
// sq.xyzw vf29, 3(vi06) | mulax.xyzw ACC, vf24, vf06 110
acc.mula(Mask::xyzw, nrm0, m_texture_constants.cam_nrm.x()); sq_buffer(Mask::xyzw, cout1, vu.dbuf_write + 3);
// sq.xyzw vf30, 6(vi06) | madday.xyzw ACC, vf25, vf06 111
acc.madda(Mask::xyzw, nrm1, m_texture_constants.cam_nrm.y()); sq_buffer(Mask::xyzw, cout2, vu.dbuf_write + 6);
// sq.xyzw vf31, 9(vi06) | maddz.xyzw vf27, vf26, vf06 112
acc.madd(Mask::xyzw, reflect, nrm2, m_texture_constants.cam_nrm.z()); sq_buffer(Mask::xyzw, cout3, vu.dbuf_write + 9);
// BRANCH!
// ibgtz vi01, L4 | addx.x vf21, vf21, vf02 113
res1.add(Mask::x, res1, m_texture_constants.offsets.x()); bc = ((s16)loop_idx) > 0;
// iaddi vi06, vi06, 0xc | addy.x vf22, vf22, vf02 114
res2.add(Mask::x, res2, m_texture_constants.offsets.y()); vu.dbuf_write = vu.dbuf_write + 12;
if (bc) { goto L4; }
// lq.xyzw vf28, 0(vi07) | addx.y vf14, vf14, vf02 115
vu.startx.add(Mask::y, vu.startx, m_texture_constants.offsets.x()); lq_buffer(Mask::xyzw, cout0, vu.dbuf_write_base);
// lq.xyzw vf16, 1(vi07) | nop 116
lq_buffer(Mask::xyzw, vtx0, vu.dbuf_write_base + 1);
// sq.xyzw vf20, 2(vi06) | nop 117
sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
// sq.xyzw vf28, 0(vi06) | nop 118
sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);
// jr vi12 | nop 119
// sq.xyzw vf16, 1(vi06) | nop 120
sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
// clang-format on
}
void OceanTexture::run_L5_PC() {
// clang-format off
u16 loop_idx;
Vf res0;
Vf res1;
Vf cout0;
Vf cout1;
Vf vtx0;
Vf vtx1;
bool bc;
// L5:
// iaddiu vi01, vi00, 0x21 | nop 121
loop_idx = 0x21; /* 33 */
// sq.xyzw vf05, 0(vi08) | nop 122
sq_buffer_giftag(m_texture_constants.giftag, vu.tptr);
// iaddi vi08, vi08, 0x1 | nop 123
vu.tptr = vu.tptr + 1;
L6:
// iaddi vi01, vi01, -0x1 | nop 124
loop_idx = loop_idx + -1;
// lq.xyzw vf20, 2(vi03) | nop 125
lq_buffer(Mask::xyzw, res0, vu.dbuf_read_a + 2);
// lq.xyzw vf21, 2(vi04) | nop 126
lq_buffer(Mask::xyzw, res1, vu.dbuf_read_b + 2);
// lq.xyzw vf28, 0(vi03) | nop 127
lq_buffer(Mask::xyzw, cout0, vu.dbuf_read_a);
// lq.xyzw vf16, 1(vi03) | nop 128
lq_buffer(Mask::xyzw, vtx0, vu.dbuf_read_a + 1);
// lq.xyzw vf29, 0(vi04) | ftoi4.xyzw vf20, vf20 129
res0.ftoi4(Mask::xyzw, res0); lq_buffer(Mask::xyzw, cout1, vu.dbuf_read_b);
// lq.xyzw vf17, 1(vi04) | ftoi4.xyzw vf21, vf21 130
res1.ftoi4(Mask::xyzw, res1); lq_buffer(Mask::xyzw, vtx1, vu.dbuf_read_b + 1);
// sq.xyzw vf28, 0(vi08) | nop 131
sq_buffer(Mask::xyzw, cout0, vu.tptr);
// sq.xyzw vf16, 1(vi08) | nop 132
sq_buffer(Mask::xyzw, vtx0, vu.tptr + 1);
// sq.xyzw vf20, 2(vi08) | nop 133
sq_buffer(Mask::xyzw, res0, vu.tptr + 2);
// sq.xyzw vf29, 3(vi08) | nop 134
sq_buffer(Mask::xyzw, cout1, vu.tptr + 3);
// sq.xyzw vf17, 4(vi08) | nop 135
sq_buffer(Mask::xyzw, vtx1, vu.tptr + 4);
// sq.xyzw vf21, 5(vi08) | nop 136
sq_buffer(Mask::xyzw, res1, vu.tptr + 5);
// iaddi vi03, vi03, 0x3 | nop 137
vu.dbuf_read_a = vu.dbuf_read_a + 3;
// iaddi vi04, vi04, 0x3 | nop 138
vu.dbuf_read_b = vu.dbuf_read_b + 3;
// BRANCH!
// ibgtz vi01, L6 | nop 139
bc = ((s16)loop_idx) > 0;
// iaddi vi08, vi08, 0x6 | nop 140
vu.tptr = vu.tptr + 6;
if (bc) { goto L6; }
// xgkick vi09 | nop 141
xgkick_PC(vu.tbase);
// mtir vi08, vf03.x | nop 142
// vu.tptr = vu.vf03.x_as_u16();
vu.tptr = get_tbuf();
// mtir vi09, vf03.x | nop 143
// vu.vi09 = vu.vf03.x_as_u16();
vu.tbase = get_tbuf();
// jr vi12 | nop 144
// ASSERT(false);
// mr32.xyzw vf03, vf03 | nop 145
// vu.vf03.mr32(Mask::xyzw, vu.vf03);
swap_tbuf();
// clang-format on
}
void OceanTexture::xgkick_PC(Vf* src) {
// we're going to rely on the data being the exact layout we expect here.
u32 offset = 16;
const u8* data = (const u8*)src;
for (u32 i = 0; i < NUM_VERTS_PER_STRIP; i++) {
auto& v = m_pc.vertex_dynamic[m_pc.vtx_idx];
// st
memcpy(&v.s, data + offset, sizeof(float) * 2);
// rgbaq
v.rgba.x() = data[offset + 16];
v.rgba.y() = data[offset + 20];
v.rgba.z() = data[offset + 24];
v.rgba.w() = data[offset + 28]; // we don't actually need it, it's always 0x80
// xyz2
offset += 48;
m_pc.vtx_idx++;
}
}
void OceanTexture::run_L1_PC_jak2() {
// L1:
// lq.xyzw vf14_startx, 988(vi00) | maxw.xyzw vf01_ones, vf00, vf00
vu.startx = Vf(m_texture_constants.start);
// lq.xyzw vf02_offset, 989(vi00)
// lq.xyzw vf03_tbuf, 986(vi00)
// lq.xyzw vf04_dbuf, 987(vi00)
// lq.xyzw vf05_giftag, 985(vi00)
// lq.xyzw vf06_cam_nrm, 991(vi00)
// lq.xyzw vf07_constants, 990(vi00)
// iaddiu vi11_0x80, vi00, 0x80
// mtir vi08_tptr, vf03_tbuf.x
vu.tptr = get_tbuf();
// mtir vi09_tbase, vf03_tbuf.x
vu.tbase = get_tbuf();
// mr32.xyzw vf03_tbuf, vf03_tbuf
swap_tbuf();
// xtop vi05_in_ptr
vu.in_ptr = swap_vu_upload_buffers();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC_jak2();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC_jak2();
// mtir vi03_dbuf_read_a, vf04_dbuf.x
vu.dbuf_read_a = get_dbuf();
// bal vi12_ra, L5
// mtir vi04_dbuf_read_b, vf04_dbuf.y
vu.dbuf_read_b = get_dbuf_other();
run_L5_PC();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC_jak2();
// mtir vi03_dbuf_read_a, vf04_dbuf.x
vu.dbuf_read_a = get_dbuf();
// bal vi12_ra, L5
// mtir vi04_dbuf_read_b, vf04_dbuf.y
vu.dbuf_read_b = get_dbuf_other();
run_L5_PC();
// nop :e
// nop
}
void OceanTexture::run_L2_PC_jak2() {
// L2:
// xtop vi05_in_ptr
vu.in_ptr = swap_vu_upload_buffers();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC_jak2();
// mtir vi03_dbuf_read_a, vf04_dbuf.x
vu.dbuf_read_a = get_dbuf();
// bal vi12_ra, L5
// mtir vi04_dbuf_read_b, vf04_dbuf.y
vu.dbuf_read_b = get_dbuf_other();
run_L5_PC();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC_jak2();
// mtir vi03_dbuf_read_a, vf04_dbuf.x
vu.dbuf_read_a = get_dbuf();
// bal vi12_ra, L5
// mtir vi04_dbuf_read_b, vf04_dbuf.y
vu.dbuf_read_b = get_dbuf_other();
run_L5_PC();
// mtir vi06_dbuf_write, vf04_dbuf.x
vu.dbuf_write = get_dbuf();
// bal vi12_ra, L3
// mr32.xyzw vf04_dbuf, vf04_dbuf
swap_dbuf();
run_L3_PC_jak2();
// mtir vi03_dbuf_read_a, vf04_dbuf.x
vu.dbuf_read_a = get_dbuf();
// bal vi12_ra, L5
// mtir vi04_dbuf_read_b, vf04_dbuf.y
vu.dbuf_read_b = get_dbuf_other();
run_L5_PC();
// nop :e
// nop
}
void OceanTexture::run_L3_PC_jak2() {
Vf base_pos; // vf15
u16 loop_idx;
Vf vtx0; // vf16
Vf vtx1; // vf17
Vf vtx2; // vf18
Vf vtx3; // vf19
Vf res0; // vf20
Vf res1; // vf21
Vf res2; // vf22
Vf res3; // vf23
Vf nrm0; // vf24
// Vf nrm1; // vf25
Vf nrm2; // vf26
// Vf reflect; // vf27
Vf cout0; // vf28
Vf cout1; // vf29
Vf cout2; // vf30
Vf cout3; // vf31
// Accumulator acc;
const Vf ones(1, 1, 1, 1);
const Vf vf00(0, 0, 0, 1);
// const u16 vi11 = 0x80;
bool bc;
// clang-format off
// L3:
// ior vi07, vi06, vi00 | nop 56
vu.dbuf_write_base = vu.dbuf_write;
// move.xyzw vf15, vf14 | nop 57
base_pos.move(Mask::xyzw, vu.startx);
// iaddi vi01, vi00, 0x8 | nop 58
loop_idx = 8;
// lq.xyzw vf24, 1(vi05) | mulw.xyzw vf20, vf15, vf00 59 (?? what are they doing here)
res0.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
// lq.xyzw vf26, 3(vi05) | mulw.xyzw vf21, vf15, vf00 60
res1.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
// nop | mulw.xyzw vf22, vf15, vf00 61
lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
// nop | mulw.xyzw vf23, vf15, vf00 62
res3.mul(Mask::xyzw, base_pos, 1.f);
// nop | addx.x vf21, vf21, vf02 63
res1.add(Mask::x, res1, m_texture_constants.offsets.x());
// nop | addy.x vf22, vf22, vf02 64
res2.add(Mask::x, res2, m_texture_constants.offsets.y());
L4:
// nop | addz.x vf23, vf23, vf02 65
res3.add(Mask::x, res3, m_texture_constants.offsets.z());
// nop | addw.x vf15, vf15, vf02 66
base_pos.add(Mask::x, base_pos, m_texture_constants.offsets.w());
// sq.xyzw vf20, 2(vi06) | mulx.x vf28, vf01, vf24 67
cout0.mul(Mask::x, ones, nrm0.x()); sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
// sq.xyzw vf21, 5(vi06) | muly.x vf29, vf01, vf24 68
cout1.mul(Mask::x, ones, nrm0.y()); sq_buffer(Mask::xyzw, res1, vu.dbuf_write + 5);
// sq.xyzw vf22, 8(vi06) | mulz.x vf30, vf01, vf24 69
cout2.mul(Mask::x, ones, nrm0.z()); sq_buffer(Mask::xyzw, res2, vu.dbuf_write + 8);
// sq.xyzw vf23, 11(vi06) | mulw.x vf31, vf01, vf24 70
cout3.mul(Mask::x, ones, nrm0.w()); sq_buffer(Mask::xyzw, res3, vu.dbuf_write + 11);
// lq.xyzw vf16, 0(vi05) | mulx.y vf28, vf01, vf26 71
cout0.mul(Mask::y, ones, nrm2.x()); lq_buffer(Mask::xyzw, vtx0, vu.in_ptr);
// lq.xyzw vf17, 2(vi05) | muly.y vf29, vf01, vf26 72
cout1.mul(Mask::y, ones, nrm2.y()); lq_buffer(Mask::xyzw, vtx1, vu.in_ptr + 2);
// lq.xyzw vf18, 4(vi05) | mulz.y vf30, vf01, vf26 73
cout2.mul(Mask::y, ones, nrm2.z()); lq_buffer(Mask::xyzw, vtx2, vu.in_ptr + 4);
// lq.xyzw vf19, 6(vi05) | mulw.y vf31, vf01, vf26 74
cout3.mul(Mask::y, ones, nrm2.w()); lq_buffer(Mask::xyzw, vtx3, vu.in_ptr + 6);
// iaddi vi05, vi05, 0x8 | ftoi0.xyzw vf16, vf16 75
vtx0.ftoi0(Mask::xyzw, vtx0); vu.in_ptr = vu.in_ptr + 8;
// nop | ftoi0.xyzw vf17, vf17 76
vtx1.ftoi0(Mask::xyzw, vtx1);
// nop | ftoi0.xyzw vf18, vf18 77
vtx2.ftoi0(Mask::xyzw, vtx2);
// iaddi vi01, vi01, -0x1 | ftoi0.xyzw vf19, vf19 78
vtx3.ftoi0(Mask::xyzw, vtx3); loop_idx = loop_idx + -1;
// sq.xyzw vf16, 1(vi06) | add.xyzw vf28, vf28, vf07 79
cout0.add(Mask::xyzw, cout0, m_texture_constants.cam_nrm); sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
// sq.xyzw vf17, 4(vi06) | add.xyzw vf29, vf29, vf07 80
cout1.add(Mask::xyzw, cout1, m_texture_constants.cam_nrm); sq_buffer(Mask::xyzw, vtx1, vu.dbuf_write + 4);
// sq.xyzw vf18, 7(vi06) | add.xyzw vf30, vf30, vf07 81
cout2.add(Mask::xyzw, cout2, m_texture_constants.cam_nrm); sq_buffer(Mask::xyzw, vtx2, vu.dbuf_write + 7);
// sq.xyzw vf19, 10(vi06) | add.xyzw vf31, vf31, vf07 82
cout3.add(Mask::xyzw, cout3, m_texture_constants.cam_nrm); sq_buffer(Mask::xyzw, vtx3, vu.dbuf_write + 10);
// lq.xyzw vf24, 1(vi05) | sub.zw vf28, vf01, vf00 83
cout0.sub(Mask::zw, ones, vf00); lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
// lq.xyzw vf26, 5(vi05) | sub.zw vf29, vf01, vf00 84
cout1.sub(Mask::zw, ones, vf00); lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
// nop | sub.zw vf30, vf01, vf00 85
cout2.sub(Mask::zw, ones, vf00);
// nop | sub.zw vf31, vf01, vf00 86
cout3.sub(Mask::zw, ones, vf00);
// sq.xyzw vf28, 0(vi06) | mulw.xyzw vf20, vf15, vf00 87
res0.mul(Mask::xyzw, base_pos, 1.f); sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);
// sq.xyzw vf29, 3(vi06) | mulw.xyzw vf21, vf15, vf00 88
res1.mul(Mask::xyzw, base_pos, 1.f); sq_buffer(Mask::xyzw, cout1, vu.dbuf_write + 3);
// sq.xyzw vf30, 6(vi06) | mulw.xyzw vf22, vf15, vf00 89
res2.mul(Mask::xyzw, base_pos, 1.f); sq_buffer(Mask::xyzw, cout2, vu.dbuf_write + 6);
// sq.xyzw vf31, 9(vi06) | mulw.xyzw vf23, vf15, vf00 90
res3.mul(Mask::xyzw, base_pos, 1.f); sq_buffer(Mask::xyzw, cout3, vu.dbuf_write + 9);
// BRANCH!
// ibgtz vi01, L4 | addx.x vf21, vf21, vf02 91
res1.add(Mask::x, res1, m_texture_constants.offsets.x()); bc = ((s16)loop_idx) > 0;
// iaddi vi06, vi06, 0xc | addy.x vf22, vf22, vf02 92
res2.add(Mask::x, res2, m_texture_constants.offsets.y()); vu.dbuf_write = vu.dbuf_write + 12;
if (bc) { goto L4; }
// lq.xyzw vf28, 0(vi07) | addx.y vf14, vf14, vf02 93
vu.startx.add(Mask::y, vu.startx, m_texture_constants.offsets.x()); lq_buffer(Mask::xyzw, cout0, vu.dbuf_write_base);
// lq.xyzw vf16, 1(vi07) | nop 94
lq_buffer(Mask::xyzw, vtx0, vu.dbuf_write_base + 1);
// sq.xyzw vf20, 2(vi06) | nop 95
sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
// sq.xyzw vf28, 0(vi06) | nop 96
sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);
// jr vi12 | nop 97
// sq.xyzw vf16, 1(vi06) | nop 98
sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
// clang-format on
}
void OceanTexture::setup_renderer() {
m_pc.vtx_idx = 0;
}
void OceanTexture::flush(SharedRenderState* render_state, ScopedProfilerNode& prof) {
ASSERT(m_pc.vtx_idx == 2112);
glBindVertexArray(m_pc.vao);
glBindBuffer(GL_ARRAY_BUFFER, m_pc.dynamic_vertex_buffer);
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * NUM_VERTS, m_pc.vertex_dynamic.data(),
GL_DYNAMIC_DRAW);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_pc.gl_index_buffer);
render_state->shaders[ShaderId::OCEAN_TEXTURE].activate();
GsTex0 tex0(m_envmap_adgif.tex0_data);
auto lookup = render_state->texture_pool->lookup(tex0.tbp0());
if (!lookup) {
lookup = render_state->texture_pool->get_placeholder_texture();
}
// no decal
// yes tcc
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, *lookup);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::OCEAN_TEXTURE].id(), "tex_T0"),
0);
glDisable(GL_DEPTH_TEST);
glDisable(GL_BLEND);
// glDrawArrays(GL_TRIANGLE_STRIP, 0, NUM_VERTS);
glEnable(GL_PRIMITIVE_RESTART);
glPrimitiveRestartIndex(UINT32_MAX);
glDrawElements(GL_TRIANGLE_STRIP, m_pc.index_buffer.size(), GL_UNSIGNED_INT, (void*)0);
prof.add_draw_call();
prof.add_tri(NUM_STRIPS * NUM_STRIPS * 2);
glBindVertexArray(0);
}
void OceanTexture::init_pc() {
int i = 0;
m_pc.vertex_positions.resize(NUM_VERTS);
m_pc.vertex_dynamic.resize(NUM_VERTS);
m_pc.index_buffer.clear();
for (u32 strip = 0; strip < NUM_STRIPS; strip++) {
u32 lo = 64 * strip;
u32 hi = 64 * (strip + 1);
for (u32 vert_pair = 0; vert_pair < NUM_VERTS_PER_STRIP / 2; vert_pair++) {
m_pc.index_buffer.push_back(i);
auto& v0 = m_pc.vertex_positions[i++];
v0 = math::Vector2f(vert_pair * 64, lo);
m_pc.index_buffer.push_back(i);
auto& v1 = m_pc.vertex_positions[i++];
v1 = math::Vector2f(vert_pair * 64, hi);
}
m_pc.index_buffer.push_back(UINT32_MAX);
}
glGenVertexArrays(1, &m_pc.vao);
glBindVertexArray(m_pc.vao);
glGenBuffers(1, &m_pc.gl_index_buffer);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_pc.gl_index_buffer);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(u32) * m_pc.index_buffer.size(),
m_pc.index_buffer.data(), GL_STATIC_DRAW);
glGenBuffers(1, &m_pc.static_vertex_buffer);
glBindBuffer(GL_ARRAY_BUFFER, m_pc.static_vertex_buffer);
glBufferData(GL_ARRAY_BUFFER, sizeof(math::Vector2f) * NUM_VERTS, m_pc.vertex_positions.data(),
GL_STATIC_DRAW);
glEnableVertexAttribArray(0);
glEnableVertexAttribArray(1);
glEnableVertexAttribArray(2);
glVertexAttribPointer(0, // location 0 in the shader
2, // 3 floats per vert
GL_FLOAT, // floats
GL_TRUE, // normalized, ignored,
0, // tightly packed
0
);
glGenBuffers(1, &m_pc.dynamic_vertex_buffer);
glBindBuffer(GL_ARRAY_BUFFER, m_pc.dynamic_vertex_buffer);
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * NUM_VERTS, nullptr, GL_DYNAMIC_DRAW);
glVertexAttribPointer(1, // location 0 in the shader
4, // 4 color components
GL_UNSIGNED_BYTE, // floats
GL_TRUE, // normalized, ignored,
sizeof(Vertex), //
(void*)offsetof(Vertex, rgba) // offset in array (why is this a pointer...)
);
glVertexAttribPointer(2, // location 0 in the shader
2, // 2 floats per vert
GL_FLOAT, // floats
GL_FALSE, // normalized, ignored,
sizeof(Vertex), //
(void*)offsetof(Vertex, s) // offset in array (why is this a pointer...)
);
}
void OceanTexture::destroy_pc() {}