mirror of https://github.com/ClassiCube/ClassiCube
Dreamcast: Further WIP on assembly code
This commit is contained in:
parent
67a47a0a9b
commit
b3abb0f054
|
|
@ -25,7 +25,7 @@ endif
|
|||
#---------------------------------------------------------------------------------
|
||||
# main targets
|
||||
#---------------------------------------------------------------------------------
|
||||
default: $(GLDC_LIB) $(BUILD_DIR) $(TARGET).cdi
|
||||
default: $(BUILD_DIR) $(TARGET).cdi
|
||||
|
||||
clean:
|
||||
rm $(TARGET).cdi $(TARGET).iso $(TARGET).elf $(TARGET)-scr.bin $(TARGET).bin $(OBJS)
|
||||
|
|
@ -37,7 +37,7 @@ $(BUILD_DIR):
|
|||
#---------------------------------------------------------------------------------
|
||||
# executable generation
|
||||
#---------------------------------------------------------------------------------
|
||||
$(TARGET).elf: $(OBJS)
|
||||
$(TARGET).elf: $(OBJS) $(GLDC_LIB)
|
||||
kos-cc $(LDFLAGS) $^ -o $@ $(LIBS)
|
||||
|
||||
$(TARGET).bin: $(TARGET).elf
|
||||
|
|
|
|||
|
|
@ -71,33 +71,33 @@ _ClipLine:
|
|||
add #4, r6 ! EX, OUT = &OUT->x
|
||||
fsub fr11,fr10 ! FE, fr10 = 1.0 - t --> invT
|
||||
|
||||
fmov.s @r4+, fr2 ! LS, A = v1->x
|
||||
fmov.s @r5+, fr3 ! LS, B = v2->x
|
||||
fipr fv8, fv0 ! FE, LERP = A * t + B * invT
|
||||
fmov.s @r4+, fr2 ! LS, A1 = v1->x
|
||||
fmov.s @r5+, fr3 ! LS, B1 = v2->x
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
fmov.s @r4+, fr6 ! LS, A2 = v1->y
|
||||
fmov.s @r5+, fr7 ! LS, B2 = v2->y
|
||||
|
||||
fmov.s fr3, @r6 ! LS, OUT->x = LERP
|
||||
add #4, r6 ! EX, OUT += 4
|
||||
|
||||
fmov.s @r4+, fr6 ! LS, A = v1->y
|
||||
fmov.s @r5+, fr7 ! LS, B = v2->y
|
||||
fipr fv8, fv4 ! FE, LERP = A * t + B * invT
|
||||
fipr fv8, fv4 ! FE, LERP(A2, B2)
|
||||
add #4, r4 ! EX, v1 += 4
|
||||
add #4, r5 ! EX, v2 += 4
|
||||
|
||||
fmov.s fr7, @r6 ! LS, OUT->y = LERP
|
||||
add #4, r6 ! EX, OUT += 4
|
||||
|
||||
add #4, r4 ! EX, A = &v1->z
|
||||
add #4, r5 ! EX, B = &v2->z
|
||||
fmov.s fr1, @r6 ! LS, OUT->z = 0
|
||||
add #4, r6 ! EX, OUT += 4
|
||||
|
||||
fmov.s @r4+, fr2 ! LS, A = v1->u
|
||||
fmov.s @r5+, fr3 ! LS, B = v2->u
|
||||
fipr fv8, fv0 ! FE, LERP = A * t + B * invT
|
||||
fmov.s @r4+, fr2 ! LS, A1 = v1->u
|
||||
fmov.s @r5+, fr3 ! LS, B1 = v2->u
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
fmov.s @r4+, fr6 ! LS, A2 = v1->v
|
||||
fmov.s @r5+, fr7 ! LS, B2 = v2->v
|
||||
|
||||
fmov.s fr3, @r6 ! LS, OUT->u = LERP
|
||||
add #4, r6 ! EX, OUT += 4
|
||||
|
||||
fmov.s @r4+, fr6 ! LS, A = v1->v
|
||||
fmov.s @r5+, fr7 ! LS, B = v2->v
|
||||
fipr fv8, fv4 ! FE, lerp = A * t + B * invT
|
||||
fmov.s fr7, @r6 ! LS, OUT->v = lerp
|
||||
fipr fv8, fv4 ! FE, LERP(A2, B2)
|
||||
fmov.s fr7, @r6 ! LS, OUT->v = LERP
|
||||
add #4, r6 ! EX, OUT += 4
|
||||
|
||||
mov.l @r4+,r0 ! LS, ACOLOR = v1->bgra
|
||||
|
|
@ -108,7 +108,7 @@ _ClipLine:
|
|||
extu.b r2,r3 ! EX, tmp = BCOLOR.b
|
||||
lds r3,fpul ! CO, FPUL = tmp
|
||||
float fpul,fr3 ! EX, fr3 = float(FPUL)
|
||||
fipr fv8, fv0 ! FE, lerp = A * t + B * invT
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
ftrc fr3,fpul ! FE, FPUL = int(lerp)
|
||||
sts fpul,r3 ! CO, tmp = FPUL
|
||||
extu.b r3,r3 ! EX, tmp = (uint8)tmp
|
||||
|
|
@ -122,7 +122,7 @@ _ClipLine:
|
|||
extu.b r2,r3 ! EX, tmp = BCOLOR.g
|
||||
lds r3,fpul ! CO, FPUL = tmp
|
||||
float fpul,fr3 ! EX, fr3 = float(FPUL)
|
||||
fipr fv8, fv0 ! FE, lerp = A * t + B * invT
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
ftrc fr3,fpul ! FE, FPUL = int(lerp)
|
||||
sts fpul,r3 ! CO, tmp = FPUL
|
||||
extu.b r3,r3 ! EX, tmp = (uint8)tmp
|
||||
|
|
@ -137,7 +137,7 @@ _ClipLine:
|
|||
extu.b r2,r3 ! EX, tmp = BCOLOR.b
|
||||
lds r3,fpul ! CO, FPUL = tmp
|
||||
float fpul,fr3 ! EX, fr3 = float(FPUL)
|
||||
fipr fv8, fv0 ! FE, lerp = A * t + B * invT
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
ftrc fr3,fpul ! FE, FPUL = int(lerp)
|
||||
sts fpul,r3 ! CO, tmp = FPUL
|
||||
extu.b r3,r3 ! EX, tmp = (uint8)tmp
|
||||
|
|
@ -152,7 +152,7 @@ _ClipLine:
|
|||
extu.b r2,r3 ! EX, tmp = BCOLOR.r
|
||||
lds r3,fpul ! CO, FPUL = tmp
|
||||
float fpul,fr3 ! EX, fr3 = float(FPUL)
|
||||
fipr fv8, fv0 ! FE, lerp = A * t + B * invT
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
ftrc fr3,fpul ! FE, FPUL = int(lerp)
|
||||
sts fpul,r3 ! CO, tmp = FPUL
|
||||
extu.b r3,r3 ! EX, tmp = (uint8)tmp
|
||||
|
|
@ -161,9 +161,9 @@ _ClipLine:
|
|||
or r3,r7 ! EX, OUTCOLOR.a |= tmp
|
||||
mov.l r7, @r6 ! LS, OUT->color = OUTCOLOR
|
||||
|
||||
fmov.s @r4+,fr2 ! LS, A = v1->w
|
||||
fmov.s @r5+,fr3 ! LS, B = v2->w
|
||||
fipr fv8, fv0 ! FE, lerp = A * t + B * invT
|
||||
fmov.s @r4+,fr2 ! LS, A1 = v1->w
|
||||
fmov.s @r5+,fr3 ! LS, B1 = v2->w
|
||||
fipr fv8, fv0 ! FE, LERP(A1, B1)
|
||||
add #4, r6 ! EX, OUT += 4
|
||||
rts ! CO, return after executing instruction in delay slot
|
||||
fmov.s fr3, @r6 ! LS, OUT->w = lerp
|
||||
|
|
|
|||
|
|
@ -2,10 +2,15 @@
|
|||
! r9 = num vertices left
|
||||
! r10 = PVR_CMD_VERTEX
|
||||
! r11 = PVR_CMD_VERTEX_EOL
|
||||
! r12 = 0xFF000000
|
||||
! r12 = ??????
|
||||
! r13 = cur vertex
|
||||
! r14 = next vertex (prefetch)
|
||||
|
||||
!fr8 = VIEWPORT_HWIDTH
|
||||
!fr9 = VIEWPORT_HHEIGHT
|
||||
!fr10 = VIEWPORT_X_PLUS_HWIDTH
|
||||
!fr11 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
|
||||
#define REG_V0 r4
|
||||
#define REG_V1 r5
|
||||
#define REG_V2 r6
|
||||
|
|
@ -40,41 +45,140 @@
|
|||
.endm
|
||||
|
||||
|
||||
! Pushes a vertex to the store queue
|
||||
! CLOBBERS: fr0, fr4, fr5
|
||||
! INPUTS: R (vertex)
|
||||
! OUTPUTS:
|
||||
! TODO optimise greatly
|
||||
.macro ViewportTransform R
|
||||
! INVERSE W CALCULATION
|
||||
add #28, \R ! EX, \R = &vertex->w
|
||||
fmov.s @\R,fr0 ! LS, fr0 = vertex->w
|
||||
fmul fr0,fr0 ! FE, fr0 = fr0 * fr0
|
||||
add #-24, \R ! EX, \R = &vertex->x
|
||||
fsrra fr0 ! FE, fr0 = 1 / sqrt(fr0) -> 1 / vertex->w
|
||||
|
||||
! TRANSFORM X
|
||||
fmov.s @\R,fr4 ! LS, fr4 = vertex->x
|
||||
fmov fr10,fr5 ! LS, fr5 = VIEWPORT_X_PLUS_HWIDTH
|
||||
fmul fr8,fr4 ! FE, fr4 = VIEWPORT_HWIDTH * vertex->x
|
||||
fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (X * F * hwidth) + x_plus_hwidth
|
||||
fmov.s fr5,@r5 ! LS, vertex->x = fr5
|
||||
add #4, \R ! EX, \R = &vertex->y
|
||||
|
||||
! TRANSFORM Y
|
||||
fmov.s @\R,fr4 ! LS, fr4 = vertex->y
|
||||
fmov fr11,fr5 ! LS, fr5 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
fmul fr9,fr4 ! FE, fr4 = VIEWPORT_HHEIGHT * vertex->y
|
||||
fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (Y * F * hheight) + y_plus_hheight
|
||||
fmov.s fr5,@\R ! LS, vertex->y = fr5
|
||||
add #4, \R ! EX, \R = &vertex->z
|
||||
|
||||
! ASSIGN Z
|
||||
fmov.s fr0,@\R ! LS, vertex->z = fr0
|
||||
add #-12, \R ! EX, \R -= 12 (back to start of vertex)
|
||||
.endm
|
||||
|
||||
|
||||
! Handles a viewport update or PowerVR GPU command
|
||||
! CLOBBERS: r0, r2, r3
|
||||
! INPUTS: r4, r8 (SQ global)
|
||||
! OUTPUTS: r4,r8 altered, fr8-fr12
|
||||
_HandleCommand1:
|
||||
mov.l @REG_V0,r2 ! r2 = v->flags
|
||||
extu.b r2,r0 ! r2 = v->flags & 0xFF
|
||||
cmp/eq #35,r0 ! T = r2 == 0x23
|
||||
bt.s 1f ! if (T) goto 1;
|
||||
nop
|
||||
PushVertex REG_V0
|
||||
rts
|
||||
nop
|
||||
1:
|
||||
add #4,r4
|
||||
mov.l .VP_0,r2
|
||||
! Load VIEWPORT registers
|
||||
fmov.s @r4+, fr8 ! VIEWPORT_HWIDTH = src->x
|
||||
fmov.s @r4+, fr9 ! VIEWPORT_HHEIGHT = src->y
|
||||
fmov.s @r4+,fr10 ! VIEWPORT_X_PLUS_HWIDTH = src->z
|
||||
add #16,r2
|
||||
fmov.s @r4+,fr11 ! VIEWPORT_Y_PLUS_HHEIGHT = src->u
|
||||
! And store to vp global
|
||||
fmov.s fr11,@-r2
|
||||
fmov.s fr10,@-r2
|
||||
fmov.s fr9,@-r2
|
||||
rts
|
||||
fmov.s fr8,@-r2
|
||||
.align 4
|
||||
.VP_0:
|
||||
.long _vp
|
||||
|
||||
|
||||
_Case_0_0_0_1:
|
||||
_Case_0_0_1_0:
|
||||
_Case_0_0_1_1:
|
||||
_Case_0_1_0_0:
|
||||
_Case_0_1_0_1:
|
||||
_Case_0_1_1_0:
|
||||
_Case_0_1_1_1:
|
||||
_Case_1_0_0_0:
|
||||
_Case_1_0_0_1:
|
||||
_Case_1_0_1_0:
|
||||
_Case_1_0_1_1:
|
||||
_Case_1_1_0_0:
|
||||
_Case_1_1_0_1:
|
||||
_Case_1_1_1_0:
|
||||
rts
|
||||
nop
|
||||
|
||||
_Case_1_1_1_1:
|
||||
! Triangle strip: {1,2,0} {2,0,3}
|
||||
!_glPerspectiveDivideVertex(v1);
|
||||
ViewportTransform REG_V1
|
||||
PushVertex REG_V1
|
||||
|
||||
!_glPerspectiveDivideVertex(v2);
|
||||
ViewportTransform REG_V2
|
||||
PushVertex REG_V2
|
||||
|
||||
!_glPerspectiveDivideVertex(v0);
|
||||
ViewportTransform REG_V0
|
||||
PushVertex REG_V0
|
||||
|
||||
!_glPerspectiveDivideVertex(v3);
|
||||
ViewportTransform REG_V3
|
||||
PushVertex REG_V3
|
||||
rts
|
||||
nop
|
||||
|
||||
|
||||
.global _ProcessVertexList
|
||||
.align 4
|
||||
.type _ProcessVertexList,%function
|
||||
_ProcessVertexList:
|
||||
! STORE REGISTERS
|
||||
mov.l r8,@-r15
|
||||
mov.l r9,@-r15
|
||||
! STORE CPU REGISTERS
|
||||
mov.l r8,@-r15
|
||||
mov.l r9,@-r15
|
||||
mov.l r10,@-r15
|
||||
mov.l r11,@-r15
|
||||
mov.l r12,@-r15
|
||||
mov.l r13,@-r15
|
||||
mov.l r14,@-r15
|
||||
sts.l pr,@-r15
|
||||
mov r4,r14
|
||||
mov r4,r13
|
||||
mov.l .L33,r12
|
||||
mov.l .L35,r11
|
||||
mov.l .L34,r10
|
||||
mov r5,r9
|
||||
sts.l pr,@-r15
|
||||
! STORE FPU REGISTERS
|
||||
fmov.s fr8,@-r15
|
||||
fmov.s fr9,@-r15
|
||||
fmov.s fr10,@-r15
|
||||
fmov.s fr11,@-r15
|
||||
! VIEWPORT SETUP
|
||||
mov.l .VP_1,r0 ! LS, r0 = &vp
|
||||
fmov.s @r0+,fr8 ! LS, fr8 = vp.HWIDTH
|
||||
fmov.s @r0+,fr9 ! LS, fr9 = vp.HHEIGHT
|
||||
fmov.s @r0+,fr10 ! LS, fr10 = vp.X_PLUS_HWIDTH
|
||||
fmov.s @r0+,fr11 ! LS, fr11 = vp.Y_PLUS_HHEIGHT
|
||||
! REGISTER SETUP
|
||||
mov r4,r14
|
||||
mov r4,r13
|
||||
!mov.l .CMD_MSK,r12
|
||||
mov.l .PVR_EOL,r11
|
||||
mov.l .PVR_VTX,r10
|
||||
mov r5,r9
|
||||
bra SUBMIT_LOOP
|
||||
nop
|
||||
mov r6,r8
|
||||
|
||||
DO_CMD:
|
||||
mov.l .L37,r2
|
||||
|
|
@ -86,8 +190,8 @@ DO_CMD:
|
|||
SUBMIT_LOOP:
|
||||
mov.l @r13,r0 ! FLAGS = CUR->flags
|
||||
add #32,r14 ! NEXT += sizeof(Vertex)
|
||||
mov r1,r2 ! TYPE = FLAGS
|
||||
and r12,r2 ! TYPE = FLAGS & 0xFF000000
|
||||
mov r0,r2 ! TYPE = FLAGS
|
||||
and r11,r2 ! TYPE = FLAGS & 0xF000000 (reuse PVR_CMD_VERTEX_EOL as type mask)
|
||||
! Check for PVR_CMD_VERTEX
|
||||
cmp/eq r10,r2 ! T = r2 == PVR_CMD_VERTEX
|
||||
bt.s NEXT_ITER ! if (T) goto NEXT_ITER
|
||||
|
|
@ -96,11 +200,11 @@ SUBMIT_LOOP:
|
|||
cmp/eq r11,r2 ! T = r2 == PVR_CMD_VERTEX_EOL
|
||||
bf.s DO_CMD ! if (!T) goto DO_CMD
|
||||
! PVR_CMD_VERTEX_EOL case
|
||||
extu.b r1,r1 ! EX, MASK = FLAGS & 0xFF (branch delay slot)
|
||||
mov.l r12,@r13 ! LS, CUR->flags = TYPE
|
||||
extu.b r0,r1 ! EX, MASK = FLAGS & 0xFF (branch delay slot)
|
||||
mov.l r11,@r13 ! LS, CUR->flags = PVR_CMD_VERTEX_EOL
|
||||
|
||||
! Prepare and then jump to quad drawing function, based on quad clipflags
|
||||
mova CASES,r0 ! LS, r0 = CASES
|
||||
mova .CASES,r0 ! LS, r0 = CASES
|
||||
mov r13,r7 ! MT, r7 = v3
|
||||
shll2 r1 ! EX, MASK <<= 2
|
||||
mov r13,r6 ! MT, r6 = v3
|
||||
|
|
@ -116,7 +220,19 @@ NEXT_ITER:
|
|||
bf.s SUBMIT_LOOP
|
||||
mov r14,r13 ! CUR = NEXT
|
||||
|
||||
! RESTORE REGISTERS
|
||||
! VIEWPORT SAVE
|
||||
mov.l .VP_1,r0
|
||||
add #16,r0
|
||||
fmov.s fr11,@-r0
|
||||
fmov.s fr10,@-r0
|
||||
fmov.s fr9,@-r0
|
||||
fmov.s fr8,@-r0
|
||||
! RESTORE FPU REGISTERS
|
||||
fmov.s @r15+, fr8
|
||||
fmov.s @r15+, fr9
|
||||
fmov.s @r15+,fr10
|
||||
fmov.s @r15+,fr11
|
||||
! RESTORE CPU REGISTERS
|
||||
lds.l @r15+,pr
|
||||
mov.l @r15+,r14
|
||||
mov.l @r15+,r13
|
||||
|
|
@ -128,20 +244,20 @@ NEXT_ITER:
|
|||
mov.l @r15+,r8
|
||||
|
||||
.align 4
|
||||
.L33:
|
||||
.long -16777216
|
||||
.L34:
|
||||
.long -536870912
|
||||
.L35:
|
||||
.long -268435456
|
||||
.VP_1:
|
||||
.long _vp
|
||||
.PVR_VTX:
|
||||
.long 0xE0000000
|
||||
.PVR_EOL:
|
||||
.long 0xF0000000
|
||||
.L37:
|
||||
.long _HandleCommand
|
||||
|
||||
! CASES table holds the functions to transfer a quad,
|
||||
! based on the visibility clipflags of the 4 vertices
|
||||
! e.g. CASES[15] = V0_VIS | V1_VIS | V2_VIS | V3_VIS (all 4 visible)
|
||||
CASES:
|
||||
.long 0 ! Should never happen
|
||||
.CASES:
|
||||
.long _arch_exit ! Should never happen
|
||||
.long _Case_0_0_0_1
|
||||
.long _Case_0_0_1_0
|
||||
.long _Case_0_0_1_1
|
||||
|
|
@ -156,4 +272,4 @@ CASES:
|
|||
.long _Case_1_1_0_0
|
||||
.long _Case_1_1_0_1
|
||||
.long _Case_1_1_1_0
|
||||
.long _Case_1_1_1_1
|
||||
.long _Case1111
|
||||
|
|
|
|||
|
|
@ -657,7 +657,7 @@ void Gfx_SetViewport(int x, int y, int w, int h) {
|
|||
c.x = w * 0.5f; // hwidth
|
||||
c.y = h * -0.5f; // hheight
|
||||
c.z = x + w * 0.5f; // x_plus_hwidth
|
||||
c.w = y + h * 0.5f; // y_plus_hheight
|
||||
c.u = y + h * 0.5f; // y_plus_hheight
|
||||
PushCommand(&c);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -50,10 +50,10 @@ typedef struct {
|
|||
} PolyList;
|
||||
|
||||
typedef struct {
|
||||
float x_plus_hwidth;
|
||||
float y_plus_hheight;
|
||||
float hwidth; /* width * 0.5f */
|
||||
float hheight; /* height * 0.5f */
|
||||
float x_plus_hwidth;
|
||||
float y_plus_hheight;
|
||||
} Viewport;
|
||||
|
||||
extern Viewport VIEWPORTS[3];
|
||||
|
|
|
|||
|
|
@ -386,7 +386,7 @@ static __attribute__((noinline)) void HandleCommand(Vertex* v) {
|
|||
vp.hwidth = v->x;
|
||||
vp.hheight = v->y;
|
||||
vp.x_plus_hwidth = v->z;
|
||||
vp.y_plus_hheight = v->w;
|
||||
vp.y_plus_hheight = v->u;
|
||||
}
|
||||
|
||||
void SceneListSubmit(Vertex* v3, int n, int type) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue