diff --git a/misc/n64/rsp_gpu.S b/misc/n64/rsp_gpu.S index 2c75b6f3e..26dc22633 100644 --- a/misc/n64/rsp_gpu.S +++ b/misc/n64/rsp_gpu.S @@ -543,21 +543,14 @@ GPUCmd_DrawQuad: #undef v2_cflags #undef v3_cflags -// ######################## -// Clipped triangle path -// ######################## - // If not, go with slow clipping path - - # now do the actual drawing +// ########################### +// Slow clipped triangle path +// ########################### li a1, %lo(VERTEX_CACHE) + V0_OFFSET li a2, %lo(VERTEX_CACHE) + V1_OFFSET - jal DrawClippedTriangle li a3, %lo(VERTEX_CACHE) + V2_OFFSET - - li a1, %lo(VERTEX_CACHE) + V0_OFFSET - li a2, %lo(VERTEX_CACHE) + V2_OFFSET - jal DrawClippedTriangle - li a3, %lo(VERTEX_CACHE) + V3_OFFSET + jal DrawClippedQuad + li a0, %lo(VERTEX_CACHE) + V3_OFFSET j RSPQ_Loop nop @@ -566,15 +559,17 @@ GPUCmd_DrawQuad: ################################################################ # DrawClippedTriangle - Breaks a triangle into one or more clipped tris ################################################################ -.func DrawClippedTriangle -DrawClippedTriangle: +.func DrawClippedQuad +DrawClippedQuad: sw ra, %lo(DRAW_TRI_RA) // TODO find a register for this lbu t0, SCREEN_VTX_CLIP_CODE(vtx1) lbu t1, SCREEN_VTX_CLIP_CODE(vtx2) lbu t2, SCREEN_VTX_CLIP_CODE(vtx3) + lbu t3, SCREEN_VTX_CLIP_CODE(vtx4) or t5, t0, t1 or t5, t2 + or t5, t3 move s1, zero jal GL_ClipTriangle diff --git a/misc/n64/rsp_gpu_clipping.inc b/misc/n64/rsp_gpu_clipping.inc index f406650fb..b0a498899 100644 --- a/misc/n64/rsp_gpu_clipping.inc +++ b/misc/n64/rsp_gpu_clipping.inc @@ -1,5 +1,5 @@ #define CLIPPING_PLANE_COUNT 6 -#define CLIPPING_CACHE_SIZE 9 +#define CLIPPING_CACHE_SIZE 10 #define CLIPPING_PLANE_SIZE 8 .section .data.gl_clipping @@ -14,7 +14,7 @@ CLIP_PLANES: .half 0, 0, 1, -1 .align 4 -CACHE_OFFSETS: .half 2,4,6,8,10,12,14,16,18 +CACHE_OFFSETS: .half 2,4,6,8, 10,12,14,16, 18,20 .section .bss.gl_clipping @@ -33,8 +33,8 @@ CLIP_LISTS: # Clip a triangle against the view-frustum by using the Sutherland-Hodgman algorithm # https://en.wikipedia.org/wiki/Sutherland%E2%80%93Hodgman_algorithm # Args: - # a1-a3 = Vertices - # t5 = OR'd clip flags of the triangle's vertices + # a1-a3,a0 = Vertices + # t5 = OR'd clip flags of the triangle's vertices # Returns: # s1 = Pointer to list of output vertices # s2 = Pointer to end of list @@ -59,6 +59,7 @@ GL_ClipTriangle: #define vtx1 a1 #define vtx2 a2 #define vtx3 a3 + #define vtx4 a0 #define vplane $v01 #define vint_f $v02 @@ -85,13 +86,14 @@ GL_ClipTriangle: li in_list, %lo(CLIP_LIST0) move in_count, zero - # Put three original vertices in the out_list + # Put four original vertices in the out_list # (So after the initial swap they will be in the in_list) li out_list, %lo(CLIP_LIST1) sh vtx1, 0(out_list) sh vtx2, 2(out_list) sh vtx3, 4(out_list) - li out_count, 3*2 + sh vtx4, 6(out_list) + li out_count, 4*2 li plane, %lo(CLIP_PLANES) li plane_flag, 1 @@ -100,7 +102,7 @@ GL_ClipTriangle: li t0, %lo(CACHE_OFFSETS) vxor voff1, voff1 lqv voff0, 0,t0 - lsv voff1, 16,t0 + ldv voff1, 16,t0 # Temporarily use the RDP staging area as a map of which cache slots are used # Init to zero @@ -201,6 +203,7 @@ gl_clip_no_swap: vlt vcache0, vcache0.h2 vlt vcache0, vcache0.e4 vlt vcache0, vcache1.e0 + vlt vcache0, vcache1.e1 mfc2 t0, vcache0.e0