Dreamcast: Further WIP on assembly code

This commit is contained in:
UnknownShadow200 2024-07-06 23:21:30 +10:00
parent 67a47a0a9b
commit b3abb0f054
6 changed files with 179 additions and 63 deletions

View File

@ -25,7 +25,7 @@ endif
#---------------------------------------------------------------------------------
# main targets
#---------------------------------------------------------------------------------
default: $(GLDC_LIB) $(BUILD_DIR) $(TARGET).cdi
default: $(BUILD_DIR) $(TARGET).cdi
clean:
rm $(TARGET).cdi $(TARGET).iso $(TARGET).elf $(TARGET)-scr.bin $(TARGET).bin $(OBJS)
@ -37,7 +37,7 @@ $(BUILD_DIR):
#---------------------------------------------------------------------------------
# executable generation
#---------------------------------------------------------------------------------
$(TARGET).elf: $(OBJS)
$(TARGET).elf: $(OBJS) $(GLDC_LIB)
kos-cc $(LDFLAGS) $^ -o $@ $(LIBS)
$(TARGET).bin: $(TARGET).elf

View File

@ -71,33 +71,33 @@ _ClipLine:
add #4, r6 ! EX, OUT = &OUT->x
fsub fr11,fr10 ! FE, fr10 = 1.0 - t --> invT
fmov.s @r4+, fr2 ! LS, A = v1->x
fmov.s @r5+, fr3 ! LS, B = v2->x
fipr fv8, fv0 ! FE, LERP = A * t + B * invT
fmov.s @r4+, fr2 ! LS, A1 = v1->x
fmov.s @r5+, fr3 ! LS, B1 = v2->x
fipr fv8, fv0 ! FE, LERP(A1, B1)
fmov.s @r4+, fr6 ! LS, A2 = v1->y
fmov.s @r5+, fr7 ! LS, B2 = v2->y
fmov.s fr3, @r6 ! LS, OUT->x = LERP
add #4, r6 ! EX, OUT += 4
fmov.s @r4+, fr6 ! LS, A = v1->y
fmov.s @r5+, fr7 ! LS, B = v2->y
fipr fv8, fv4 ! FE, LERP = A * t + B * invT
fipr fv8, fv4 ! FE, LERP(A2, B2)
add #4, r4 ! EX, v1 += 4
add #4, r5 ! EX, v2 += 4
fmov.s fr7, @r6 ! LS, OUT->y = LERP
add #4, r6 ! EX, OUT += 4
add #4, r4 ! EX, A = &v1->z
add #4, r5 ! EX, B = &v2->z
fmov.s fr1, @r6 ! LS, OUT->z = 0
add #4, r6 ! EX, OUT += 4
fmov.s @r4+, fr2 ! LS, A = v1->u
fmov.s @r5+, fr3 ! LS, B = v2->u
fipr fv8, fv0 ! FE, LERP = A * t + B * invT
fmov.s @r4+, fr2 ! LS, A1 = v1->u
fmov.s @r5+, fr3 ! LS, B1 = v2->u
fipr fv8, fv0 ! FE, LERP(A1, B1)
fmov.s @r4+, fr6 ! LS, A2 = v1->v
fmov.s @r5+, fr7 ! LS, B2 = v2->v
fmov.s fr3, @r6 ! LS, OUT->u = LERP
add #4, r6 ! EX, OUT += 4
fmov.s @r4+, fr6 ! LS, A = v1->v
fmov.s @r5+, fr7 ! LS, B = v2->v
fipr fv8, fv4 ! FE, lerp = A * t + B * invT
fmov.s fr7, @r6 ! LS, OUT->v = lerp
fipr fv8, fv4 ! FE, LERP(A2, B2)
fmov.s fr7, @r6 ! LS, OUT->v = LERP
add #4, r6 ! EX, OUT += 4
mov.l @r4+,r0 ! LS, ACOLOR = v1->bgra
@ -108,7 +108,7 @@ _ClipLine:
extu.b r2,r3 ! EX, tmp = BCOLOR.b
lds r3,fpul ! CO, FPUL = tmp
float fpul,fr3 ! EX, fr3 = float(FPUL)
fipr fv8, fv0 ! FE, lerp = A * t + B * invT
fipr fv8, fv0 ! FE, LERP(A1, B1)
ftrc fr3,fpul ! FE, FPUL = int(lerp)
sts fpul,r3 ! CO, tmp = FPUL
extu.b r3,r3 ! EX, tmp = (uint8)tmp
@ -122,7 +122,7 @@ _ClipLine:
extu.b r2,r3 ! EX, tmp = BCOLOR.g
lds r3,fpul ! CO, FPUL = tmp
float fpul,fr3 ! EX, fr3 = float(FPUL)
fipr fv8, fv0 ! FE, lerp = A * t + B * invT
fipr fv8, fv0 ! FE, LERP(A1, B1)
ftrc fr3,fpul ! FE, FPUL = int(lerp)
sts fpul,r3 ! CO, tmp = FPUL
extu.b r3,r3 ! EX, tmp = (uint8)tmp
@ -137,7 +137,7 @@ _ClipLine:
extu.b r2,r3 ! EX, tmp = BCOLOR.b
lds r3,fpul ! CO, FPUL = tmp
float fpul,fr3 ! EX, fr3 = float(FPUL)
fipr fv8, fv0 ! FE, lerp = A * t + B * invT
fipr fv8, fv0 ! FE, LERP(A1, B1)
ftrc fr3,fpul ! FE, FPUL = int(lerp)
sts fpul,r3 ! CO, tmp = FPUL
extu.b r3,r3 ! EX, tmp = (uint8)tmp
@ -152,7 +152,7 @@ _ClipLine:
extu.b r2,r3 ! EX, tmp = BCOLOR.r
lds r3,fpul ! CO, FPUL = tmp
float fpul,fr3 ! EX, fr3 = float(FPUL)
fipr fv8, fv0 ! FE, lerp = A * t + B * invT
fipr fv8, fv0 ! FE, LERP(A1, B1)
ftrc fr3,fpul ! FE, FPUL = int(lerp)
sts fpul,r3 ! CO, tmp = FPUL
extu.b r3,r3 ! EX, tmp = (uint8)tmp
@ -161,9 +161,9 @@ _ClipLine:
or r3,r7 ! EX, OUTCOLOR.a |= tmp
mov.l r7, @r6 ! LS, OUT->color = OUTCOLOR
fmov.s @r4+,fr2 ! LS, A = v1->w
fmov.s @r5+,fr3 ! LS, B = v2->w
fipr fv8, fv0 ! FE, lerp = A * t + B * invT
fmov.s @r4+,fr2 ! LS, A1 = v1->w
fmov.s @r5+,fr3 ! LS, B1 = v2->w
fipr fv8, fv0 ! FE, LERP(A1, B1)
add #4, r6 ! EX, OUT += 4
rts ! CO, return after executing instruction in delay slot
fmov.s fr3, @r6 ! LS, OUT->w = lerp

View File

@ -2,10 +2,15 @@
! r9 = num vertices left
! r10 = PVR_CMD_VERTEX
! r11 = PVR_CMD_VERTEX_EOL
! r12 = 0xFF000000
! r12 = ??????
! r13 = cur vertex
! r14 = next vertex (prefetch)
!fr8 = VIEWPORT_HWIDTH
!fr9 = VIEWPORT_HHEIGHT
!fr10 = VIEWPORT_X_PLUS_HWIDTH
!fr11 = VIEWPORT_Y_PLUS_HHEIGHT
#define REG_V0 r4
#define REG_V1 r5
#define REG_V2 r6
@ -40,41 +45,140 @@
.endm
! Pushes a vertex to the store queue
! CLOBBERS: fr0, fr4, fr5
! INPUTS: R (vertex)
! OUTPUTS:
! TODO optimise greatly
.macro ViewportTransform R
! INVERSE W CALCULATION
add #28, \R ! EX, \R = &vertex->w
fmov.s @\R,fr0 ! LS, fr0 = vertex->w
fmul fr0,fr0 ! FE, fr0 = fr0 * fr0
add #-24, \R ! EX, \R = &vertex->x
fsrra fr0 ! FE, fr0 = 1 / sqrt(fr0) -> 1 / vertex->w
! TRANSFORM X
fmov.s @\R,fr4 ! LS, fr4 = vertex->x
fmov fr10,fr5 ! LS, fr5 = VIEWPORT_X_PLUS_HWIDTH
fmul fr8,fr4 ! FE, fr4 = VIEWPORT_HWIDTH * vertex->x
fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (X * F * hwidth) + x_plus_hwidth
fmov.s fr5,@r5 ! LS, vertex->x = fr5
add #4, \R ! EX, \R = &vertex->y
! TRANSFORM Y
fmov.s @\R,fr4 ! LS, fr4 = vertex->y
fmov fr11,fr5 ! LS, fr5 = VIEWPORT_Y_PLUS_HHEIGHT
fmul fr9,fr4 ! FE, fr4 = VIEWPORT_HHEIGHT * vertex->y
fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (Y * F * hheight) + y_plus_hheight
fmov.s fr5,@\R ! LS, vertex->y = fr5
add #4, \R ! EX, \R = &vertex->z
! ASSIGN Z
fmov.s fr0,@\R ! LS, vertex->z = fr0
add #-12, \R ! EX, \R -= 12 (back to start of vertex)
.endm
! Handles a viewport update or PowerVR GPU command
! CLOBBERS: r0, r2, r3
! INPUTS: r4, r8 (SQ global)
! OUTPUTS: r4,r8 altered, fr8-fr12
_HandleCommand1:
mov.l @REG_V0,r2 ! r2 = v->flags
extu.b r2,r0 ! r2 = v->flags & 0xFF
cmp/eq #35,r0 ! T = r2 == 0x23
bt.s 1f ! if (T) goto 1;
nop
PushVertex REG_V0
rts
nop
1:
add #4,r4
mov.l .VP_0,r2
! Load VIEWPORT registers
fmov.s @r4+, fr8 ! VIEWPORT_HWIDTH = src->x
fmov.s @r4+, fr9 ! VIEWPORT_HHEIGHT = src->y
fmov.s @r4+,fr10 ! VIEWPORT_X_PLUS_HWIDTH = src->z
add #16,r2
fmov.s @r4+,fr11 ! VIEWPORT_Y_PLUS_HHEIGHT = src->u
! And store to vp global
fmov.s fr11,@-r2
fmov.s fr10,@-r2
fmov.s fr9,@-r2
rts
fmov.s fr8,@-r2
.align 4
.VP_0:
.long _vp
_Case_0_0_0_1:
_Case_0_0_1_0:
_Case_0_0_1_1:
_Case_0_1_0_0:
_Case_0_1_0_1:
_Case_0_1_1_0:
_Case_0_1_1_1:
_Case_1_0_0_0:
_Case_1_0_0_1:
_Case_1_0_1_0:
_Case_1_0_1_1:
_Case_1_1_0_0:
_Case_1_1_0_1:
_Case_1_1_1_0:
rts
nop
_Case_1_1_1_1:
! Triangle strip: {1,2,0} {2,0,3}
!_glPerspectiveDivideVertex(v1);
ViewportTransform REG_V1
PushVertex REG_V1
!_glPerspectiveDivideVertex(v2);
ViewportTransform REG_V2
PushVertex REG_V2
!_glPerspectiveDivideVertex(v0);
ViewportTransform REG_V0
PushVertex REG_V0
!_glPerspectiveDivideVertex(v3);
ViewportTransform REG_V3
PushVertex REG_V3
rts
nop
.global _ProcessVertexList
.align 4
.type _ProcessVertexList,%function
_ProcessVertexList:
! STORE REGISTERS
mov.l r8,@-r15
mov.l r9,@-r15
! STORE CPU REGISTERS
mov.l r8,@-r15
mov.l r9,@-r15
mov.l r10,@-r15
mov.l r11,@-r15
mov.l r12,@-r15
mov.l r13,@-r15
mov.l r14,@-r15
sts.l pr,@-r15
mov r4,r14
mov r4,r13
mov.l .L33,r12
mov.l .L35,r11
mov.l .L34,r10
mov r5,r9
sts.l pr,@-r15
! STORE FPU REGISTERS
fmov.s fr8,@-r15
fmov.s fr9,@-r15
fmov.s fr10,@-r15
fmov.s fr11,@-r15
! VIEWPORT SETUP
mov.l .VP_1,r0 ! LS, r0 = &vp
fmov.s @r0+,fr8 ! LS, fr8 = vp.HWIDTH
fmov.s @r0+,fr9 ! LS, fr9 = vp.HHEIGHT
fmov.s @r0+,fr10 ! LS, fr10 = vp.X_PLUS_HWIDTH
fmov.s @r0+,fr11 ! LS, fr11 = vp.Y_PLUS_HHEIGHT
! REGISTER SETUP
mov r4,r14
mov r4,r13
!mov.l .CMD_MSK,r12
mov.l .PVR_EOL,r11
mov.l .PVR_VTX,r10
mov r5,r9
bra SUBMIT_LOOP
nop
mov r6,r8
DO_CMD:
mov.l .L37,r2
@ -86,8 +190,8 @@ DO_CMD:
SUBMIT_LOOP:
mov.l @r13,r0 ! FLAGS = CUR->flags
add #32,r14 ! NEXT += sizeof(Vertex)
mov r1,r2 ! TYPE = FLAGS
and r12,r2 ! TYPE = FLAGS & 0xFF000000
mov r0,r2 ! TYPE = FLAGS
and r11,r2 ! TYPE = FLAGS & 0xF000000 (reuse PVR_CMD_VERTEX_EOL as type mask)
! Check for PVR_CMD_VERTEX
cmp/eq r10,r2 ! T = r2 == PVR_CMD_VERTEX
bt.s NEXT_ITER ! if (T) goto NEXT_ITER
@ -96,11 +200,11 @@ SUBMIT_LOOP:
cmp/eq r11,r2 ! T = r2 == PVR_CMD_VERTEX_EOL
bf.s DO_CMD ! if (!T) goto DO_CMD
! PVR_CMD_VERTEX_EOL case
extu.b r1,r1 ! EX, MASK = FLAGS & 0xFF (branch delay slot)
mov.l r12,@r13 ! LS, CUR->flags = TYPE
extu.b r0,r1 ! EX, MASK = FLAGS & 0xFF (branch delay slot)
mov.l r11,@r13 ! LS, CUR->flags = PVR_CMD_VERTEX_EOL
! Prepare and then jump to quad drawing function, based on quad clipflags
mova CASES,r0 ! LS, r0 = CASES
mova .CASES,r0 ! LS, r0 = CASES
mov r13,r7 ! MT, r7 = v3
shll2 r1 ! EX, MASK <<= 2
mov r13,r6 ! MT, r6 = v3
@ -116,7 +220,19 @@ NEXT_ITER:
bf.s SUBMIT_LOOP
mov r14,r13 ! CUR = NEXT
! RESTORE REGISTERS
! VIEWPORT SAVE
mov.l .VP_1,r0
add #16,r0
fmov.s fr11,@-r0
fmov.s fr10,@-r0
fmov.s fr9,@-r0
fmov.s fr8,@-r0
! RESTORE FPU REGISTERS
fmov.s @r15+, fr8
fmov.s @r15+, fr9
fmov.s @r15+,fr10
fmov.s @r15+,fr11
! RESTORE CPU REGISTERS
lds.l @r15+,pr
mov.l @r15+,r14
mov.l @r15+,r13
@ -128,20 +244,20 @@ NEXT_ITER:
mov.l @r15+,r8
.align 4
.L33:
.long -16777216
.L34:
.long -536870912
.L35:
.long -268435456
.VP_1:
.long _vp
.PVR_VTX:
.long 0xE0000000
.PVR_EOL:
.long 0xF0000000
.L37:
.long _HandleCommand
! CASES table holds the functions to transfer a quad,
! based on the visibility clipflags of the 4 vertices
! e.g. CASES[15] = V0_VIS | V1_VIS | V2_VIS | V3_VIS (all 4 visible)
CASES:
.long 0 ! Should never happen
.CASES:
.long _arch_exit ! Should never happen
.long _Case_0_0_0_1
.long _Case_0_0_1_0
.long _Case_0_0_1_1
@ -156,4 +272,4 @@ CASES:
.long _Case_1_1_0_0
.long _Case_1_1_0_1
.long _Case_1_1_1_0
.long _Case_1_1_1_1
.long _Case1111

View File

@ -657,7 +657,7 @@ void Gfx_SetViewport(int x, int y, int w, int h) {
c.x = w * 0.5f; // hwidth
c.y = h * -0.5f; // hheight
c.z = x + w * 0.5f; // x_plus_hwidth
c.w = y + h * 0.5f; // y_plus_hheight
c.u = y + h * 0.5f; // y_plus_hheight
PushCommand(&c);
}

View File

@ -50,10 +50,10 @@ typedef struct {
} PolyList;
typedef struct {
float x_plus_hwidth;
float y_plus_hheight;
float hwidth; /* width * 0.5f */
float hheight; /* height * 0.5f */
float x_plus_hwidth;
float y_plus_hheight;
} Viewport;
extern Viewport VIEWPORTS[3];

View File

@ -386,7 +386,7 @@ static __attribute__((noinline)) void HandleCommand(Vertex* v) {
vp.hwidth = v->x;
vp.hheight = v->y;
vp.x_plus_hwidth = v->z;
vp.y_plus_hheight = v->w;
vp.y_plus_hheight = v->u;
}
void SceneListSubmit(Vertex* v3, int n, int type) {