mirror of
https://github.com/open-goal/jak-project
synced 2026-06-18 23:37:22 -04:00
71cb1aef6f
This PR adds a feature to merc2 to update vertices. This will be needed to efficient do effects like blerc/ripple/texture scroll. It's enabled for blerc in jak 1 and jak 2, but with a few disclaimers: - currently we still use the mips2c blerc implementation, which is slow and has some "jittering" because of integer precision. When porting to PC, there was an additional synchronization problem because blerc overwrites the merc data as its being read by the renderers. I _think_ this wasn't an issue on PS2 because the blerc dma is higher priority than the VIF1 DMA, but I'm not certain. Either way, I had to add a mutex for this on PC to avoid very slight flickering/gaps. This isn't ideal for performance, but still beats generic by a significant amount in every place I tested. If you see merc taking 2ms to draw, it is likely because it is stuck waiting on blerc to finish. This will go away once blerc itself is ported to C++. - in jak 1, we end up using generic in some cases where we could use merc. In particular maia in village3 hut. This will be fixed later once we can use merc in more places. I don't want to mess with the merc/generic selection logic when we're hopefully going to get rid of it soon. - There is no support for ripple or texture scroll. These use generic on jak 1, and remain broken on jak 2. - Like with `emerc`, jak 1 has a toggle to go back to the old behavior `*blerc-hack*`. - In most cases, toggling this causes no visual differences. One exception is Gol's teeth. I believe this is caused by texture coordinate rounding issues, where generic has an additional float -> int -> float compared to PC merc. It is very hard to notice so I'm not going to worry about it.
735 lines
30 KiB
C++
735 lines
30 KiB
C++
#include "OceanTexture.h"
|
|
|
|
void OceanTexture::run_L1_PC() {
|
|
// L1:
|
|
// lq.xyzw vf14_startx, 988(vi00) | maxw.xyzw vf01_ones, vf00, vf00
|
|
vu.startx = Vf(m_texture_constants.start);
|
|
// lq.xyzw vf02_offset, 989(vi00)
|
|
// lq.xyzw vf03_tbuf, 986(vi00)
|
|
// lq.xyzw vf04_dbuf, 987(vi00)
|
|
// lq.xyzw vf05_giftag, 985(vi00)
|
|
// lq.xyzw vf06_cam_nrm, 991(vi00)
|
|
// lq.xyzw vf07_constants, 990(vi00)
|
|
// iaddiu vi11_0x80, vi00, 0x80
|
|
// mtir vi08_tptr, vf03_tbuf.x
|
|
vu.tptr = get_tbuf();
|
|
// mtir vi09_tbase, vf03_tbuf.x
|
|
vu.tbase = get_tbuf();
|
|
// mr32.xyzw vf03_tbuf, vf03_tbuf
|
|
swap_tbuf();
|
|
// xtop vi05_in_ptr
|
|
vu.in_ptr = swap_vu_upload_buffers();
|
|
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC();
|
|
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC();
|
|
|
|
// mtir vi03_dbuf_read_a, vf04_dbuf.x
|
|
vu.dbuf_read_a = get_dbuf();
|
|
// bal vi12_ra, L5
|
|
// mtir vi04_dbuf_read_b, vf04_dbuf.y
|
|
vu.dbuf_read_b = get_dbuf_other();
|
|
run_L5_PC();
|
|
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC();
|
|
|
|
// mtir vi03_dbuf_read_a, vf04_dbuf.x
|
|
vu.dbuf_read_a = get_dbuf();
|
|
// bal vi12_ra, L5
|
|
// mtir vi04_dbuf_read_b, vf04_dbuf.y
|
|
vu.dbuf_read_b = get_dbuf_other();
|
|
run_L5_PC();
|
|
|
|
// nop :e
|
|
// nop
|
|
}
|
|
|
|
void OceanTexture::run_L2_PC() {
|
|
// L2:
|
|
// xtop vi05_in_ptr
|
|
vu.in_ptr = swap_vu_upload_buffers();
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC();
|
|
|
|
// mtir vi03_dbuf_read_a, vf04_dbuf.x
|
|
vu.dbuf_read_a = get_dbuf();
|
|
// bal vi12_ra, L5
|
|
// mtir vi04_dbuf_read_b, vf04_dbuf.y
|
|
vu.dbuf_read_b = get_dbuf_other();
|
|
run_L5_PC();
|
|
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC();
|
|
|
|
// mtir vi03_dbuf_read_a, vf04_dbuf.x
|
|
vu.dbuf_read_a = get_dbuf();
|
|
// bal vi12_ra, L5
|
|
// mtir vi04_dbuf_read_b, vf04_dbuf.y
|
|
vu.dbuf_read_b = get_dbuf_other();
|
|
run_L5_PC();
|
|
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC();
|
|
|
|
// mtir vi03_dbuf_read_a, vf04_dbuf.x
|
|
vu.dbuf_read_a = get_dbuf();
|
|
// bal vi12_ra, L5
|
|
// mtir vi04_dbuf_read_b, vf04_dbuf.y
|
|
vu.dbuf_read_b = get_dbuf_other();
|
|
run_L5_PC();
|
|
|
|
// nop :e
|
|
// nop
|
|
}
|
|
|
|
namespace {
|
|
void lq_buffer(Mask mask, Vf& dest, Vf* src) {
|
|
for (int i = 0; i < 4; i++) {
|
|
if ((u64)mask & (1 << i)) {
|
|
dest.data[i] = src->data[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
void sq_buffer(Mask mask, const Vf& src, Vf* dest) {
|
|
for (int i = 0; i < 4; i++) {
|
|
if ((u64)mask & (1 << i)) {
|
|
dest->data[i] = src.data[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
void sq_buffer_giftag(const u8* src, Vf* dest) {
|
|
memcpy(dest, src, 16);
|
|
}
|
|
} // namespace
|
|
|
|
void OceanTexture::run_L3_PC() {
|
|
Vf base_pos; // vf15
|
|
u16 loop_idx;
|
|
|
|
Vf vtx0; // vf16
|
|
Vf vtx1; // vf17
|
|
Vf vtx2; // vf18
|
|
Vf vtx3; // vf19
|
|
|
|
Vf res0; // vf20
|
|
Vf res1; // vf21
|
|
Vf res2; // vf22
|
|
Vf res3; // vf23
|
|
|
|
Vf nrm0; // vf24
|
|
Vf nrm1; // vf25
|
|
Vf nrm2; // vf26
|
|
|
|
Vf reflect; // vf27
|
|
|
|
Vf cout0; // vf28
|
|
Vf cout1; // vf29
|
|
Vf cout2; // vf30
|
|
Vf cout3; // vf31
|
|
|
|
Accumulator acc;
|
|
const Vf ones(1, 1, 1, 1);
|
|
const Vf vf00(0, 0, 0, 1);
|
|
const u16 vi11 = 0x80;
|
|
bool bc;
|
|
|
|
// clang-format off
|
|
// L3:
|
|
// ior vi07, vi06, vi00 | nop 56
|
|
vu.dbuf_write_base = vu.dbuf_write;
|
|
// move.xyzw vf15, vf14 | nop 57
|
|
base_pos.move(Mask::xyzw, vu.startx);
|
|
// iaddi vi01, vi00, 0x8 | nop 58
|
|
loop_idx = 8;
|
|
// lq.xyzw vf24, 1(vi05) | mulw.xyzw vf20, vf15, vf00 59 (?? what are they doing here)
|
|
res0.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
|
|
// lq.xyzw vf25, 3(vi05) | mulw.xyzw vf21, vf15, vf00 60
|
|
res1.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm1, vu.in_ptr + 3);
|
|
// lq.xyzw vf26, 5(vi05) | mulw.xyzw vf22, vf15, vf00 61
|
|
res2.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
|
|
// nop | mulw.xyzw vf23, vf15, vf00 62
|
|
res3.mul(Mask::xyzw, base_pos, 1.f);
|
|
// nop | mulax.xyzw ACC, vf24, vf06 63
|
|
acc.mula(Mask::xyzw, nrm0, m_texture_constants.cam_nrm.x());
|
|
// nop | madday.xyzw ACC, vf25, vf06 64
|
|
acc.madda(Mask::xyzw, nrm1, m_texture_constants.cam_nrm.y());
|
|
// nop | maddz.xyzw vf27, vf26, vf06 65
|
|
acc.madd(Mask::xyzw, reflect, nrm2, m_texture_constants.cam_nrm.z());
|
|
// nop | addx.x vf21, vf21, vf02 66
|
|
res1.add(Mask::x, res1, m_texture_constants.offsets.x());
|
|
// nop | addy.x vf22, vf22, vf02 67
|
|
res2.add(Mask::x, res2, m_texture_constants.offsets.y());
|
|
L4:
|
|
// nop | addz.x vf23, vf23, vf02 68
|
|
res3.add(Mask::x, res3, m_texture_constants.offsets.z());
|
|
// nop | addw.x vf15, vf15, vf02 69
|
|
base_pos.add(Mask::x, base_pos, m_texture_constants.offsets.w());
|
|
// sq.xyzw vf20, 2(vi06) | mulx.x vf28, vf01, vf24 70
|
|
cout0.mul(Mask::x, ones, nrm0.x()); sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
|
|
// sq.xyzw vf21, 5(vi06) | muly.x vf29, vf01, vf24 71
|
|
cout1.mul(Mask::x, ones, nrm0.y()); sq_buffer(Mask::xyzw, res1, vu.dbuf_write + 5);
|
|
// sq.xyzw vf22, 8(vi06) | mulz.x vf30, vf01, vf24 72
|
|
cout2.mul(Mask::x, ones, nrm0.z()); sq_buffer(Mask::xyzw, res2, vu.dbuf_write + 8);
|
|
// sq.xyzw vf23, 11(vi06) | mulw.x vf31, vf01, vf24 73
|
|
cout3.mul(Mask::x, ones, nrm0.w()); sq_buffer(Mask::xyzw, res3, vu.dbuf_write + 11);
|
|
// lq.xyzw vf16, 0(vi05) | mulx.y vf28, vf01, vf25 74
|
|
cout0.mul(Mask::y, ones, nrm1.x()); lq_buffer(Mask::xyzw, vtx0, vu.in_ptr);
|
|
// lq.xyzw vf17, 2(vi05) | muly.y vf29, vf01, vf25 75
|
|
cout1.mul(Mask::y, ones, nrm1.y()); lq_buffer(Mask::xyzw, vtx1, vu.in_ptr + 2);
|
|
// lq.xyzw vf18, 4(vi05) | mulz.y vf30, vf01, vf25 76
|
|
cout2.mul(Mask::y, ones, nrm1.z()); lq_buffer(Mask::xyzw, vtx2, vu.in_ptr + 4);
|
|
// lq.xyzw vf19, 6(vi05) | mulw.y vf31, vf01, vf25 77
|
|
cout3.mul(Mask::y, ones, nrm1.w()); lq_buffer(Mask::xyzw, vtx3, vu.in_ptr + 6);
|
|
// iaddi vi05, vi05, 0x8 | mulx.xy vf28, vf28, vf27 78
|
|
cout0.mul(Mask::xy, cout0, reflect.x()); vu.in_ptr = vu.in_ptr + 8;
|
|
// nop | muly.xy vf29, vf29, vf27 79
|
|
cout1.mul(Mask::xy, cout1, reflect.y());
|
|
// nop | mulz.xy vf30, vf30, vf27 80
|
|
cout2.mul(Mask::xy, cout2, reflect.z());
|
|
// nop | mulw.xy vf31, vf31, vf27 81
|
|
cout3.mul(Mask::xy, cout3, reflect.w());
|
|
// nop | mulw.xy vf28, vf28, vf16 82
|
|
cout0.mul(Mask::xy, cout0, vtx0.w());
|
|
// nop | mulw.xy vf29, vf29, vf17 83
|
|
cout1.mul(Mask::xy, cout1, vtx1.w());
|
|
// nop | mulw.xy vf30, vf30, vf18 84
|
|
cout2.mul(Mask::xy, cout2, vtx2.w());
|
|
// nop | mulw.xy vf31, vf31, vf19 85
|
|
cout3.mul(Mask::xy, cout3, vtx3.w());
|
|
// nop | ftoi0.xyzw vf16, vf16 86
|
|
vtx0.ftoi0(Mask::xyzw, vtx0);
|
|
// nop | ftoi0.xyzw vf17, vf17 87
|
|
vtx1.ftoi0(Mask::xyzw, vtx1);
|
|
// nop | ftoi0.xyzw vf18, vf18 88
|
|
vtx2.ftoi0(Mask::xyzw, vtx2);
|
|
// iaddi vi01, vi01, -0x1 | ftoi0.xyzw vf19, vf19 89
|
|
vtx3.ftoi0(Mask::xyzw, vtx3); loop_idx = loop_idx + -1;
|
|
// mfir.w vf16, vi11 | add.xyzw vf28, vf28, vf06 90
|
|
cout0.add(Mask::xyzw, cout0, m_texture_constants.cam_nrm); vtx0.mfir(Mask::w, vi11);
|
|
// mfir.w vf17, vi11 | add.xyzw vf29, vf29, vf06 91
|
|
cout1.add(Mask::xyzw, cout1, m_texture_constants.cam_nrm); vtx1.mfir(Mask::w, vi11);
|
|
// mfir.w vf18, vi11 | add.xyzw vf30, vf30, vf06 92
|
|
cout2.add(Mask::xyzw, cout2, m_texture_constants.cam_nrm); vtx2.mfir(Mask::w, vi11);
|
|
// mfir.w vf19, vi11 | add.xyzw vf31, vf31, vf06 93
|
|
cout3.add(Mask::xyzw, cout3, m_texture_constants.cam_nrm); vtx3.mfir(Mask::w, vi11);
|
|
// nop | mulx.xyzw vf28, vf28, vf07 94
|
|
cout0.mul(Mask::xyzw, cout0, m_texture_constants.constants.x());
|
|
// nop | mulx.xyzw vf29, vf29, vf07 95
|
|
cout1.mul(Mask::xyzw, cout1, m_texture_constants.constants.x());
|
|
// nop | mulx.xyzw vf30, vf30, vf07 96
|
|
cout2.mul(Mask::xyzw, cout2, m_texture_constants.constants.x());
|
|
// nop | mulx.xyzw vf31, vf31, vf07 97
|
|
cout3.mul(Mask::xyzw, cout3, m_texture_constants.constants.x());
|
|
// nop | addy.xyzw vf28, vf28, vf07 98
|
|
cout0.add(Mask::xyzw, cout0, m_texture_constants.constants.y());
|
|
// nop | addy.xyzw vf29, vf29, vf07 99
|
|
cout1.add(Mask::xyzw, cout1, m_texture_constants.constants.y());
|
|
// nop | addy.xyzw vf30, vf30, vf07 100
|
|
cout2.add(Mask::xyzw, cout2, m_texture_constants.constants.y());
|
|
// nop | addy.xyzw vf31, vf31, vf07 101
|
|
cout3.add(Mask::xyzw, cout3, m_texture_constants.constants.y());
|
|
// sq.xyzw vf16, 1(vi06) | sub.zw vf28, vf01, vf00 102
|
|
cout0.sub(Mask::zw, ones, vf00); sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
|
|
// sq.xyzw vf17, 4(vi06) | sub.zw vf29, vf01, vf00 103
|
|
cout1.sub(Mask::zw, ones, vf00); sq_buffer(Mask::xyzw, vtx1, vu.dbuf_write + 4);
|
|
// sq.xyzw vf18, 7(vi06) | sub.zw vf30, vf01, vf00 104
|
|
cout2.sub(Mask::zw, ones, vf00); sq_buffer(Mask::xyzw, vtx2, vu.dbuf_write + 7);
|
|
// sq.xyzw vf19, 10(vi06) | sub.zw vf31, vf01, vf00 105
|
|
cout3.sub(Mask::zw, ones, vf00); sq_buffer(Mask::xyzw, vtx3, vu.dbuf_write + 10);
|
|
// lq.xyzw vf24, 1(vi05) | mulw.xyzw vf20, vf15, vf00 106
|
|
res0.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
|
|
// lq.xyzw vf25, 3(vi05) | mulw.xyzw vf21, vf15, vf00 107
|
|
res1.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm1, vu.in_ptr + 3);
|
|
// lq.xyzw vf26, 5(vi05) | mulw.xyzw vf22, vf15, vf00 108
|
|
res2.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
|
|
// sq.xyzw vf28, 0(vi06) | mulw.xyzw vf23, vf15, vf00 109
|
|
res3.mul(Mask::xyzw, base_pos, 1.f); sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);
|
|
|
|
// sq.xyzw vf29, 3(vi06) | mulax.xyzw ACC, vf24, vf06 110
|
|
acc.mula(Mask::xyzw, nrm0, m_texture_constants.cam_nrm.x()); sq_buffer(Mask::xyzw, cout1, vu.dbuf_write + 3);
|
|
// sq.xyzw vf30, 6(vi06) | madday.xyzw ACC, vf25, vf06 111
|
|
acc.madda(Mask::xyzw, nrm1, m_texture_constants.cam_nrm.y()); sq_buffer(Mask::xyzw, cout2, vu.dbuf_write + 6);
|
|
// sq.xyzw vf31, 9(vi06) | maddz.xyzw vf27, vf26, vf06 112
|
|
acc.madd(Mask::xyzw, reflect, nrm2, m_texture_constants.cam_nrm.z()); sq_buffer(Mask::xyzw, cout3, vu.dbuf_write + 9);
|
|
// BRANCH!
|
|
// ibgtz vi01, L4 | addx.x vf21, vf21, vf02 113
|
|
res1.add(Mask::x, res1, m_texture_constants.offsets.x()); bc = ((s16)loop_idx) > 0;
|
|
// iaddi vi06, vi06, 0xc | addy.x vf22, vf22, vf02 114
|
|
res2.add(Mask::x, res2, m_texture_constants.offsets.y()); vu.dbuf_write = vu.dbuf_write + 12;
|
|
if (bc) { goto L4; }
|
|
|
|
// lq.xyzw vf28, 0(vi07) | addx.y vf14, vf14, vf02 115
|
|
vu.startx.add(Mask::y, vu.startx, m_texture_constants.offsets.x()); lq_buffer(Mask::xyzw, cout0, vu.dbuf_write_base);
|
|
// lq.xyzw vf16, 1(vi07) | nop 116
|
|
lq_buffer(Mask::xyzw, vtx0, vu.dbuf_write_base + 1);
|
|
// sq.xyzw vf20, 2(vi06) | nop 117
|
|
sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
|
|
// sq.xyzw vf28, 0(vi06) | nop 118
|
|
sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);
|
|
// jr vi12 | nop 119
|
|
// sq.xyzw vf16, 1(vi06) | nop 120
|
|
sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
|
|
// clang-format on
|
|
}
|
|
|
|
void OceanTexture::run_L5_PC() {
|
|
// clang-format off
|
|
u16 loop_idx;
|
|
Vf res0;
|
|
Vf res1;
|
|
Vf cout0;
|
|
Vf cout1;
|
|
Vf vtx0;
|
|
Vf vtx1;
|
|
bool bc;
|
|
// L5:
|
|
// iaddiu vi01, vi00, 0x21 | nop 121
|
|
loop_idx = 0x21; /* 33 */
|
|
// sq.xyzw vf05, 0(vi08) | nop 122
|
|
sq_buffer_giftag(m_texture_constants.giftag, vu.tptr);
|
|
// iaddi vi08, vi08, 0x1 | nop 123
|
|
vu.tptr = vu.tptr + 1;
|
|
L6:
|
|
// iaddi vi01, vi01, -0x1 | nop 124
|
|
loop_idx = loop_idx + -1;
|
|
// lq.xyzw vf20, 2(vi03) | nop 125
|
|
lq_buffer(Mask::xyzw, res0, vu.dbuf_read_a + 2);
|
|
// lq.xyzw vf21, 2(vi04) | nop 126
|
|
lq_buffer(Mask::xyzw, res1, vu.dbuf_read_b + 2);
|
|
// lq.xyzw vf28, 0(vi03) | nop 127
|
|
lq_buffer(Mask::xyzw, cout0, vu.dbuf_read_a);
|
|
// lq.xyzw vf16, 1(vi03) | nop 128
|
|
lq_buffer(Mask::xyzw, vtx0, vu.dbuf_read_a + 1);
|
|
// lq.xyzw vf29, 0(vi04) | ftoi4.xyzw vf20, vf20 129
|
|
res0.ftoi4(Mask::xyzw, res0); lq_buffer(Mask::xyzw, cout1, vu.dbuf_read_b);
|
|
// lq.xyzw vf17, 1(vi04) | ftoi4.xyzw vf21, vf21 130
|
|
res1.ftoi4(Mask::xyzw, res1); lq_buffer(Mask::xyzw, vtx1, vu.dbuf_read_b + 1);
|
|
// sq.xyzw vf28, 0(vi08) | nop 131
|
|
sq_buffer(Mask::xyzw, cout0, vu.tptr);
|
|
// sq.xyzw vf16, 1(vi08) | nop 132
|
|
sq_buffer(Mask::xyzw, vtx0, vu.tptr + 1);
|
|
// sq.xyzw vf20, 2(vi08) | nop 133
|
|
sq_buffer(Mask::xyzw, res0, vu.tptr + 2);
|
|
// sq.xyzw vf29, 3(vi08) | nop 134
|
|
sq_buffer(Mask::xyzw, cout1, vu.tptr + 3);
|
|
// sq.xyzw vf17, 4(vi08) | nop 135
|
|
sq_buffer(Mask::xyzw, vtx1, vu.tptr + 4);
|
|
// sq.xyzw vf21, 5(vi08) | nop 136
|
|
sq_buffer(Mask::xyzw, res1, vu.tptr + 5);
|
|
|
|
// iaddi vi03, vi03, 0x3 | nop 137
|
|
vu.dbuf_read_a = vu.dbuf_read_a + 3;
|
|
// iaddi vi04, vi04, 0x3 | nop 138
|
|
vu.dbuf_read_b = vu.dbuf_read_b + 3;
|
|
// BRANCH!
|
|
// ibgtz vi01, L6 | nop 139
|
|
bc = ((s16)loop_idx) > 0;
|
|
// iaddi vi08, vi08, 0x6 | nop 140
|
|
vu.tptr = vu.tptr + 6;
|
|
if (bc) { goto L6; }
|
|
|
|
// xgkick vi09 | nop 141
|
|
xgkick_PC(vu.tbase);
|
|
// mtir vi08, vf03.x | nop 142
|
|
// vu.tptr = vu.vf03.x_as_u16();
|
|
vu.tptr = get_tbuf();
|
|
// mtir vi09, vf03.x | nop 143
|
|
// vu.vi09 = vu.vf03.x_as_u16();
|
|
vu.tbase = get_tbuf();
|
|
// jr vi12 | nop 144
|
|
// ASSERT(false);
|
|
// mr32.xyzw vf03, vf03 | nop 145
|
|
// vu.vf03.mr32(Mask::xyzw, vu.vf03);
|
|
swap_tbuf();
|
|
// clang-format on
|
|
}
|
|
|
|
void OceanTexture::xgkick_PC(Vf* src) {
|
|
// we're going to rely on the data being the exact layout we expect here.
|
|
u32 offset = 16;
|
|
const u8* data = (const u8*)src;
|
|
|
|
for (u32 i = 0; i < NUM_VERTS_PER_STRIP; i++) {
|
|
auto& v = m_pc.vertex_dynamic[m_pc.vtx_idx];
|
|
// st
|
|
memcpy(&v.s, data + offset, sizeof(float) * 2);
|
|
// rgbaq
|
|
v.rgba.x() = data[offset + 16];
|
|
v.rgba.y() = data[offset + 20];
|
|
v.rgba.z() = data[offset + 24];
|
|
v.rgba.w() = data[offset + 28]; // we don't actually need it, it's always 0x80
|
|
|
|
// xyz2
|
|
offset += 48;
|
|
m_pc.vtx_idx++;
|
|
}
|
|
}
|
|
|
|
void OceanTexture::run_L1_PC_jak2() {
|
|
// L1:
|
|
// lq.xyzw vf14_startx, 988(vi00) | maxw.xyzw vf01_ones, vf00, vf00
|
|
vu.startx = Vf(m_texture_constants.start);
|
|
// lq.xyzw vf02_offset, 989(vi00)
|
|
// lq.xyzw vf03_tbuf, 986(vi00)
|
|
// lq.xyzw vf04_dbuf, 987(vi00)
|
|
// lq.xyzw vf05_giftag, 985(vi00)
|
|
// lq.xyzw vf06_cam_nrm, 991(vi00)
|
|
// lq.xyzw vf07_constants, 990(vi00)
|
|
// iaddiu vi11_0x80, vi00, 0x80
|
|
// mtir vi08_tptr, vf03_tbuf.x
|
|
vu.tptr = get_tbuf();
|
|
// mtir vi09_tbase, vf03_tbuf.x
|
|
vu.tbase = get_tbuf();
|
|
// mr32.xyzw vf03_tbuf, vf03_tbuf
|
|
swap_tbuf();
|
|
// xtop vi05_in_ptr
|
|
vu.in_ptr = swap_vu_upload_buffers();
|
|
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC_jak2();
|
|
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC_jak2();
|
|
|
|
// mtir vi03_dbuf_read_a, vf04_dbuf.x
|
|
vu.dbuf_read_a = get_dbuf();
|
|
// bal vi12_ra, L5
|
|
// mtir vi04_dbuf_read_b, vf04_dbuf.y
|
|
vu.dbuf_read_b = get_dbuf_other();
|
|
run_L5_PC();
|
|
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC_jak2();
|
|
|
|
// mtir vi03_dbuf_read_a, vf04_dbuf.x
|
|
vu.dbuf_read_a = get_dbuf();
|
|
// bal vi12_ra, L5
|
|
// mtir vi04_dbuf_read_b, vf04_dbuf.y
|
|
vu.dbuf_read_b = get_dbuf_other();
|
|
run_L5_PC();
|
|
|
|
// nop :e
|
|
// nop
|
|
}
|
|
|
|
void OceanTexture::run_L2_PC_jak2() {
|
|
// L2:
|
|
// xtop vi05_in_ptr
|
|
vu.in_ptr = swap_vu_upload_buffers();
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC_jak2();
|
|
|
|
// mtir vi03_dbuf_read_a, vf04_dbuf.x
|
|
vu.dbuf_read_a = get_dbuf();
|
|
// bal vi12_ra, L5
|
|
// mtir vi04_dbuf_read_b, vf04_dbuf.y
|
|
vu.dbuf_read_b = get_dbuf_other();
|
|
run_L5_PC();
|
|
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC_jak2();
|
|
|
|
// mtir vi03_dbuf_read_a, vf04_dbuf.x
|
|
vu.dbuf_read_a = get_dbuf();
|
|
// bal vi12_ra, L5
|
|
// mtir vi04_dbuf_read_b, vf04_dbuf.y
|
|
vu.dbuf_read_b = get_dbuf_other();
|
|
run_L5_PC();
|
|
|
|
// mtir vi06_dbuf_write, vf04_dbuf.x
|
|
vu.dbuf_write = get_dbuf();
|
|
// bal vi12_ra, L3
|
|
// mr32.xyzw vf04_dbuf, vf04_dbuf
|
|
swap_dbuf();
|
|
run_L3_PC_jak2();
|
|
|
|
// mtir vi03_dbuf_read_a, vf04_dbuf.x
|
|
vu.dbuf_read_a = get_dbuf();
|
|
// bal vi12_ra, L5
|
|
// mtir vi04_dbuf_read_b, vf04_dbuf.y
|
|
vu.dbuf_read_b = get_dbuf_other();
|
|
run_L5_PC();
|
|
|
|
// nop :e
|
|
// nop
|
|
}
|
|
|
|
void OceanTexture::run_L3_PC_jak2() {
|
|
Vf base_pos; // vf15
|
|
u16 loop_idx;
|
|
|
|
Vf vtx0; // vf16
|
|
Vf vtx1; // vf17
|
|
Vf vtx2; // vf18
|
|
Vf vtx3; // vf19
|
|
|
|
Vf res0; // vf20
|
|
Vf res1; // vf21
|
|
Vf res2; // vf22
|
|
Vf res3; // vf23
|
|
|
|
Vf nrm0; // vf24
|
|
// Vf nrm1; // vf25
|
|
Vf nrm2; // vf26
|
|
|
|
// Vf reflect; // vf27
|
|
|
|
Vf cout0; // vf28
|
|
Vf cout1; // vf29
|
|
Vf cout2; // vf30
|
|
Vf cout3; // vf31
|
|
|
|
// Accumulator acc;
|
|
const Vf ones(1, 1, 1, 1);
|
|
const Vf vf00(0, 0, 0, 1);
|
|
// const u16 vi11 = 0x80;
|
|
bool bc;
|
|
|
|
// clang-format off
|
|
// L3:
|
|
// ior vi07, vi06, vi00 | nop 56
|
|
vu.dbuf_write_base = vu.dbuf_write;
|
|
// move.xyzw vf15, vf14 | nop 57
|
|
base_pos.move(Mask::xyzw, vu.startx);
|
|
// iaddi vi01, vi00, 0x8 | nop 58
|
|
loop_idx = 8;
|
|
// lq.xyzw vf24, 1(vi05) | mulw.xyzw vf20, vf15, vf00 59 (?? what are they doing here)
|
|
res0.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
|
|
// lq.xyzw vf26, 3(vi05) | mulw.xyzw vf21, vf15, vf00 60
|
|
res1.mul(Mask::xyzw, base_pos, 1.f); lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
|
|
// nop | mulw.xyzw vf22, vf15, vf00 61
|
|
lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
|
|
// nop | mulw.xyzw vf23, vf15, vf00 62
|
|
res3.mul(Mask::xyzw, base_pos, 1.f);
|
|
// nop | addx.x vf21, vf21, vf02 63
|
|
res1.add(Mask::x, res1, m_texture_constants.offsets.x());
|
|
// nop | addy.x vf22, vf22, vf02 64
|
|
res2.add(Mask::x, res2, m_texture_constants.offsets.y());
|
|
L4:
|
|
// nop | addz.x vf23, vf23, vf02 65
|
|
res3.add(Mask::x, res3, m_texture_constants.offsets.z());
|
|
// nop | addw.x vf15, vf15, vf02 66
|
|
base_pos.add(Mask::x, base_pos, m_texture_constants.offsets.w());
|
|
// sq.xyzw vf20, 2(vi06) | mulx.x vf28, vf01, vf24 67
|
|
cout0.mul(Mask::x, ones, nrm0.x()); sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
|
|
// sq.xyzw vf21, 5(vi06) | muly.x vf29, vf01, vf24 68
|
|
cout1.mul(Mask::x, ones, nrm0.y()); sq_buffer(Mask::xyzw, res1, vu.dbuf_write + 5);
|
|
// sq.xyzw vf22, 8(vi06) | mulz.x vf30, vf01, vf24 69
|
|
cout2.mul(Mask::x, ones, nrm0.z()); sq_buffer(Mask::xyzw, res2, vu.dbuf_write + 8);
|
|
// sq.xyzw vf23, 11(vi06) | mulw.x vf31, vf01, vf24 70
|
|
cout3.mul(Mask::x, ones, nrm0.w()); sq_buffer(Mask::xyzw, res3, vu.dbuf_write + 11);
|
|
// lq.xyzw vf16, 0(vi05) | mulx.y vf28, vf01, vf26 71
|
|
cout0.mul(Mask::y, ones, nrm2.x()); lq_buffer(Mask::xyzw, vtx0, vu.in_ptr);
|
|
// lq.xyzw vf17, 2(vi05) | muly.y vf29, vf01, vf26 72
|
|
cout1.mul(Mask::y, ones, nrm2.y()); lq_buffer(Mask::xyzw, vtx1, vu.in_ptr + 2);
|
|
// lq.xyzw vf18, 4(vi05) | mulz.y vf30, vf01, vf26 73
|
|
cout2.mul(Mask::y, ones, nrm2.z()); lq_buffer(Mask::xyzw, vtx2, vu.in_ptr + 4);
|
|
// lq.xyzw vf19, 6(vi05) | mulw.y vf31, vf01, vf26 74
|
|
cout3.mul(Mask::y, ones, nrm2.w()); lq_buffer(Mask::xyzw, vtx3, vu.in_ptr + 6);
|
|
// iaddi vi05, vi05, 0x8 | ftoi0.xyzw vf16, vf16 75
|
|
vtx0.ftoi0(Mask::xyzw, vtx0); vu.in_ptr = vu.in_ptr + 8;
|
|
// nop | ftoi0.xyzw vf17, vf17 76
|
|
vtx1.ftoi0(Mask::xyzw, vtx1);
|
|
// nop | ftoi0.xyzw vf18, vf18 77
|
|
vtx2.ftoi0(Mask::xyzw, vtx2);
|
|
// iaddi vi01, vi01, -0x1 | ftoi0.xyzw vf19, vf19 78
|
|
vtx3.ftoi0(Mask::xyzw, vtx3); loop_idx = loop_idx + -1;
|
|
// sq.xyzw vf16, 1(vi06) | add.xyzw vf28, vf28, vf07 79
|
|
cout0.add(Mask::xyzw, cout0, m_texture_constants.cam_nrm); sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
|
|
// sq.xyzw vf17, 4(vi06) | add.xyzw vf29, vf29, vf07 80
|
|
cout1.add(Mask::xyzw, cout1, m_texture_constants.cam_nrm); sq_buffer(Mask::xyzw, vtx1, vu.dbuf_write + 4);
|
|
// sq.xyzw vf18, 7(vi06) | add.xyzw vf30, vf30, vf07 81
|
|
cout2.add(Mask::xyzw, cout2, m_texture_constants.cam_nrm); sq_buffer(Mask::xyzw, vtx2, vu.dbuf_write + 7);
|
|
// sq.xyzw vf19, 10(vi06) | add.xyzw vf31, vf31, vf07 82
|
|
cout3.add(Mask::xyzw, cout3, m_texture_constants.cam_nrm); sq_buffer(Mask::xyzw, vtx3, vu.dbuf_write + 10);
|
|
// lq.xyzw vf24, 1(vi05) | sub.zw vf28, vf01, vf00 83
|
|
cout0.sub(Mask::zw, ones, vf00); lq_buffer(Mask::xyzw, nrm0, vu.in_ptr + 1);
|
|
// lq.xyzw vf26, 5(vi05) | sub.zw vf29, vf01, vf00 84
|
|
cout1.sub(Mask::zw, ones, vf00); lq_buffer(Mask::xyzw, nrm2, vu.in_ptr + 5);
|
|
// nop | sub.zw vf30, vf01, vf00 85
|
|
cout2.sub(Mask::zw, ones, vf00);
|
|
// nop | sub.zw vf31, vf01, vf00 86
|
|
cout3.sub(Mask::zw, ones, vf00);
|
|
// sq.xyzw vf28, 0(vi06) | mulw.xyzw vf20, vf15, vf00 87
|
|
res0.mul(Mask::xyzw, base_pos, 1.f); sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);
|
|
// sq.xyzw vf29, 3(vi06) | mulw.xyzw vf21, vf15, vf00 88
|
|
res1.mul(Mask::xyzw, base_pos, 1.f); sq_buffer(Mask::xyzw, cout1, vu.dbuf_write + 3);
|
|
// sq.xyzw vf30, 6(vi06) | mulw.xyzw vf22, vf15, vf00 89
|
|
res2.mul(Mask::xyzw, base_pos, 1.f); sq_buffer(Mask::xyzw, cout2, vu.dbuf_write + 6);
|
|
// sq.xyzw vf31, 9(vi06) | mulw.xyzw vf23, vf15, vf00 90
|
|
res3.mul(Mask::xyzw, base_pos, 1.f); sq_buffer(Mask::xyzw, cout3, vu.dbuf_write + 9);
|
|
// BRANCH!
|
|
// ibgtz vi01, L4 | addx.x vf21, vf21, vf02 91
|
|
res1.add(Mask::x, res1, m_texture_constants.offsets.x()); bc = ((s16)loop_idx) > 0;
|
|
// iaddi vi06, vi06, 0xc | addy.x vf22, vf22, vf02 92
|
|
res2.add(Mask::x, res2, m_texture_constants.offsets.y()); vu.dbuf_write = vu.dbuf_write + 12;
|
|
if (bc) { goto L4; }
|
|
|
|
// lq.xyzw vf28, 0(vi07) | addx.y vf14, vf14, vf02 93
|
|
vu.startx.add(Mask::y, vu.startx, m_texture_constants.offsets.x()); lq_buffer(Mask::xyzw, cout0, vu.dbuf_write_base);
|
|
// lq.xyzw vf16, 1(vi07) | nop 94
|
|
lq_buffer(Mask::xyzw, vtx0, vu.dbuf_write_base + 1);
|
|
// sq.xyzw vf20, 2(vi06) | nop 95
|
|
sq_buffer(Mask::xyzw, res0, vu.dbuf_write + 2);
|
|
// sq.xyzw vf28, 0(vi06) | nop 96
|
|
sq_buffer(Mask::xyzw, cout0, vu.dbuf_write);
|
|
// jr vi12 | nop 97
|
|
// sq.xyzw vf16, 1(vi06) | nop 98
|
|
sq_buffer(Mask::xyzw, vtx0, vu.dbuf_write + 1);
|
|
// clang-format on
|
|
}
|
|
|
|
void OceanTexture::setup_renderer() {
|
|
m_pc.vtx_idx = 0;
|
|
}
|
|
|
|
void OceanTexture::flush(SharedRenderState* render_state, ScopedProfilerNode& prof) {
|
|
ASSERT(m_pc.vtx_idx == 2112);
|
|
glBindVertexArray(m_pc.vao);
|
|
glBindBuffer(GL_ARRAY_BUFFER, m_pc.dynamic_vertex_buffer);
|
|
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * NUM_VERTS, m_pc.vertex_dynamic.data(),
|
|
GL_DYNAMIC_DRAW);
|
|
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_pc.gl_index_buffer);
|
|
|
|
render_state->shaders[ShaderId::OCEAN_TEXTURE].activate();
|
|
|
|
GsTex0 tex0(m_envmap_adgif.tex0_data);
|
|
auto lookup = render_state->texture_pool->lookup(tex0.tbp0());
|
|
if (!lookup) {
|
|
lookup = render_state->texture_pool->get_placeholder_texture();
|
|
}
|
|
// no decal
|
|
// yes tcc
|
|
glActiveTexture(GL_TEXTURE0);
|
|
glBindTexture(GL_TEXTURE_2D, *lookup);
|
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
|
|
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
|
|
|
glUniform1i(glGetUniformLocation(render_state->shaders[ShaderId::OCEAN_TEXTURE].id(), "tex_T0"),
|
|
0);
|
|
|
|
glDisable(GL_DEPTH_TEST);
|
|
glDisable(GL_BLEND);
|
|
// glDrawArrays(GL_TRIANGLE_STRIP, 0, NUM_VERTS);
|
|
glEnable(GL_PRIMITIVE_RESTART);
|
|
glPrimitiveRestartIndex(UINT32_MAX);
|
|
glDrawElements(GL_TRIANGLE_STRIP, m_pc.index_buffer.size(), GL_UNSIGNED_INT, (void*)0);
|
|
prof.add_draw_call();
|
|
prof.add_tri(NUM_STRIPS * NUM_STRIPS * 2);
|
|
|
|
glBindVertexArray(0);
|
|
}
|
|
|
|
void OceanTexture::init_pc() {
|
|
int i = 0;
|
|
m_pc.vertex_positions.resize(NUM_VERTS);
|
|
m_pc.vertex_dynamic.resize(NUM_VERTS);
|
|
m_pc.index_buffer.clear();
|
|
for (u32 strip = 0; strip < NUM_STRIPS; strip++) {
|
|
u32 lo = 64 * strip;
|
|
u32 hi = 64 * (strip + 1);
|
|
for (u32 vert_pair = 0; vert_pair < NUM_VERTS_PER_STRIP / 2; vert_pair++) {
|
|
m_pc.index_buffer.push_back(i);
|
|
auto& v0 = m_pc.vertex_positions[i++];
|
|
v0 = math::Vector2f(vert_pair * 64, lo);
|
|
m_pc.index_buffer.push_back(i);
|
|
auto& v1 = m_pc.vertex_positions[i++];
|
|
v1 = math::Vector2f(vert_pair * 64, hi);
|
|
}
|
|
m_pc.index_buffer.push_back(UINT32_MAX);
|
|
}
|
|
|
|
glGenVertexArrays(1, &m_pc.vao);
|
|
glBindVertexArray(m_pc.vao);
|
|
|
|
glGenBuffers(1, &m_pc.gl_index_buffer);
|
|
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, m_pc.gl_index_buffer);
|
|
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(u32) * m_pc.index_buffer.size(),
|
|
m_pc.index_buffer.data(), GL_STATIC_DRAW);
|
|
|
|
glGenBuffers(1, &m_pc.static_vertex_buffer);
|
|
glBindBuffer(GL_ARRAY_BUFFER, m_pc.static_vertex_buffer);
|
|
glBufferData(GL_ARRAY_BUFFER, sizeof(math::Vector2f) * NUM_VERTS, m_pc.vertex_positions.data(),
|
|
GL_STATIC_DRAW);
|
|
glEnableVertexAttribArray(0);
|
|
glEnableVertexAttribArray(1);
|
|
glEnableVertexAttribArray(2);
|
|
|
|
glVertexAttribPointer(0, // location 0 in the shader
|
|
2, // 3 floats per vert
|
|
GL_FLOAT, // floats
|
|
GL_TRUE, // normalized, ignored,
|
|
0, // tightly packed
|
|
0
|
|
|
|
);
|
|
|
|
glGenBuffers(1, &m_pc.dynamic_vertex_buffer);
|
|
glBindBuffer(GL_ARRAY_BUFFER, m_pc.dynamic_vertex_buffer);
|
|
glBufferData(GL_ARRAY_BUFFER, sizeof(Vertex) * NUM_VERTS, nullptr, GL_DYNAMIC_DRAW);
|
|
glVertexAttribPointer(1, // location 0 in the shader
|
|
4, // 4 color components
|
|
GL_UNSIGNED_BYTE, // floats
|
|
GL_TRUE, // normalized, ignored,
|
|
sizeof(Vertex), //
|
|
(void*)offsetof(Vertex, rgba) // offset in array (why is this a pointer...)
|
|
);
|
|
glVertexAttribPointer(2, // location 0 in the shader
|
|
2, // 2 floats per vert
|
|
GL_FLOAT, // floats
|
|
GL_FALSE, // normalized, ignored,
|
|
sizeof(Vertex), //
|
|
(void*)offsetof(Vertex, s) // offset in array (why is this a pointer...)
|
|
);
|
|
}
|
|
|
|
void OceanTexture::destroy_pc() {} |