Dreamcast: Few minor assembly improvements

This commit is contained in:
UnknownShadow200 2024-07-07 17:43:23 +10:00
parent 8831f6a589
commit 06ac94db53
4 changed files with 77 additions and 80 deletions

View File

@ -20,12 +20,12 @@ jobs:
- uses: actions/checkout@v4
- name: Compile Dreamcast build
id: compile
shell: bash
run: |
apt-get update
apt-get -y install genisoimage
wget https://github.com/ClassiCube/rpi-compiling-stuff/raw/main/cdi4dc -O /opt/toolchains/dc/kos/utils/cdi4dc
chmod +x /opt/toolchains/dc/kos/utils/cdi4dc
source /opt/toolchains/dc/kos/environ.sh
export PATH=/opt/toolchains/dc/kos/utils/:$PATH
make dreamcast

View File

@ -2,10 +2,19 @@
! r1 CLOBBERS
! r2 CLOBBERS
! r3 CLOBBERS
! r7 CLOBBERS
! r4 = v1
! r5 = v2
! r6 = OUT
#define TM1 r1
#define TM2 r3
#define TM3 r7
#define IN1 r4
#define IN2 r5
#define OUT r6
! FR0 = 0
! FR1 = 0
! FR2 = A.1
@ -48,13 +57,13 @@
.align 4
.type _ClipLine,%function
_ClipLine:
mov r4, r1 ! MT, r1 = &v1
mov IN1, TM1 ! MT, tmp = &v1
fldi0 fr4 ! LS, fr4 = 0
add #12, r1 ! EX, r1 = &v1->z
add #12, TM1 ! EX, tmp = &v1->z
fmov.s @r1, fr2 ! LS, fr2 = v1->z
mov r5, r1 ! MT, r1 = &v2
mov IN2, TM1 ! MT, tmp = &v2
fldi0 fr5 ! LS, fr5 = 0
add #12, r1 ! EX, r1 = &v2->z
add #12, TM1 ! EX, tmp = &v2->z
fmov.s @r1,fr11 ! LS, fr11 = v2->z
fsub fr2,fr11 ! FE, fr11 = v2->z - v1->z
fldi0 fr8 ! LS, fr8 = 0
@ -65,46 +74,46 @@ _ClipLine:
fsrra fr11 ! FE, fr11 = 1 / abs(v2->z - v1->z)
fabs fr2 ! LS, fr2 = abs(v1->z)
fmul fr2,fr11 ! FE, fr11 = abs(v1->Z) / abs(v2->z - v1->z) --> t
add #4, r4 ! EX, A = &v1->x
add #4, IN1 ! EX, A = &v1->x
fldi1 fr10 ! LS, fr10 = 1
add #4, r5 ! EX, B = &v2->x
add #4, r6 ! EX, OUT = &OUT->x
add #4, IN2 ! EX, B = &v2->x
add #4, OUT ! EX, OUT = &OUT->x
fsub fr11,fr10 ! FE, fr10 = 1.0 - t --> invT
fmov.s @r4+, fr2 ! LS, A1 = v1->x
fmov.s @r5+, fr3 ! LS, B1 = v2->x
fmov.s @IN1+, fr2 ! LS, A1 = v1->x
fmov.s @IN2+, fr3 ! LS, B1 = v2->x
fipr fv8, fv0 ! FE, LERP(A1, B1)
fmov.s @r4+, fr6 ! LS, A2 = v1->y
fmov.s @r5+, fr7 ! LS, B2 = v2->y
fmov.s @IN1+, fr6 ! LS, A2 = v1->y
fmov.s @IN2+, fr7 ! LS, B2 = v2->y
fmov.s fr3, @r6 ! LS, OUT->x = LERP
add #4, r6 ! EX, OUT += 4
fmov.s fr3,@OUT ! LS, OUT->x = LERP
add #4, OUT ! EX, OUT += 4
fipr fv8, fv4 ! FE, LERP(A2, B2)
add #4, r4 ! EX, v1 += 4
add #4, r5 ! EX, v2 += 4
add #4, IN1 ! EX, v1 += 4
add #4, IN2 ! EX, v2 += 4
fmov.s fr7, @r6 ! LS, OUT->y = LERP
add #4, r6 ! EX, OUT += 4
fmov.s fr1, @r6 ! LS, OUT->z = 0
add #4, r6 ! EX, OUT += 4
fmov.s fr7,@OUT ! LS, OUT->y = LERP
add #4, OUT ! EX, OUT += 4
fmov.s fr1,@OUT ! LS, OUT->z = 0
add #4, OUT ! EX, OUT += 4
fmov.s @r4+, fr2 ! LS, A1 = v1->u
fmov.s @r5+, fr3 ! LS, B1 = v2->u
fmov.s @IN1+, fr2 ! LS, A1 = v1->u
fmov.s @IN2+, fr3 ! LS, B1 = v2->u
fipr fv8, fv0 ! FE, LERP(A1, B1)
fmov.s @r4+, fr6 ! LS, A2 = v1->v
fmov.s @r5+, fr7 ! LS, B2 = v2->v
fmov.s @IN1+, fr6 ! LS, A2 = v1->v
fmov.s @IN2+, fr7 ! LS, B2 = v2->v
fmov.s fr3, @r6 ! LS, OUT->u = LERP
add #4, r6 ! EX, OUT += 4
fmov.s fr3,@OUT ! LS, OUT->u = LERP
add #4, OUT ! EX, OUT += 4
fipr fv8, fv4 ! FE, LERP(A2, B2)
fmov.s fr7, @r6 ! LS, OUT->v = LERP
add #4, r6 ! EX, OUT += 4
fmov.s fr7,@OUT ! LS, OUT->v = LERP
add #4, OUT ! EX, OUT += 4
mov.l @r4+,r0 ! LS, ACOLOR = v1->bgra
mov.l @IN1+,r0 ! LS, ACOLOR = v1->bgra
extu.b r0,r1 ! EX, tmp = ACOLOR.b
lds r1,fpul ! CO, FPUL = tmp
float fpul,fr2 ! EX, fr2 = float(FPUL)
mov.l @r5+,r2 ! LS, BCOLOR = v2->bgra
mov.l @IN2+,r2 ! LS, BCOLOR = v2->bgra
extu.b r2,r3 ! EX, tmp = BCOLOR.b
lds r3,fpul ! CO, FPUL = tmp
float fpul,fr3 ! EX, fr3 = float(FPUL)
@ -159,11 +168,11 @@ _ClipLine:
shll16 r3 ! EX, tmp <<= 16
shll8 r3 ! EX, tmp <<= 8
or r3,r7 ! EX, OUTCOLOR.a |= tmp
mov.l r7, @r6 ! LS, OUT->color = OUTCOLOR
mov.l r7,@OUT ! LS, OUT->color = OUTCOLOR
fmov.s @r4+,fr2 ! LS, A1 = v1->w
fmov.s @r5+,fr3 ! LS, B1 = v2->w
fmov.s @IN1+,fr2 ! LS, A1 = v1->w
fmov.s @IN2+,fr3 ! LS, B1 = v2->w
fipr fv8, fv0 ! FE, LERP(A1, B1)
add #4, r6 ! EX, OUT += 4
add #4, OUT ! EX, OUT += 4
rts ! CO, return after executing instruction in delay slot
fmov.s fr3, @r6 ! LS, OUT->w = lerp
fmov.s fr3,@OUT ! LS, OUT->w = lerp

View File

@ -2,7 +2,7 @@
! r9 = num vertices left
! r10 = PVR_CMD_VERTEX
! r11 = PVR_CMD_VERTEX_EOL
! r12 = ??????
! r12 = ClipLine function
! r13 = cur vertex
! r14 = next vertex (prefetch)
@ -80,39 +80,6 @@
.endm
! Handles a viewport update or PowerVR GPU command
! CLOBBERS: r0, r2, r3
! INPUTS: r4, r8 (SQ global)
! OUTPUTS: r4,r8 altered, fr8-fr12
_HandleCommand:
mov.l @REG_V0,r2 ! r2 = v->flags
extu.b r2,r0 ! r2 = v->flags & 0xFF
cmp/eq #35,r0 ! T = r2 == 0x23
bt.s 1f ! if (T) goto 1;
nop
PushVertex REG_V0
rts
nop
1:
add #4,r4
mov.l .VP_0,r2
! Load VIEWPORT registers
fmov.s @r4+, fr8 ! VIEWPORT_HWIDTH = src->x
fmov.s @r4+, fr9 ! VIEWPORT_HHEIGHT = src->y
fmov.s @r4+,fr10 ! VIEWPORT_X_PLUS_HWIDTH = src->z
add #16,r2
fmov.s @r4+,fr11 ! VIEWPORT_Y_PLUS_HHEIGHT = src->u
! And store to vp global
fmov.s fr11,@-r2
fmov.s fr10,@-r2
fmov.s fr9,@-r2
rts
fmov.s fr8,@-r2
.align 4
.VP_0:
.long _vp
_Case_0_0_0_1:
_Case_0_0_1_0:
_Case_0_0_1_1:
@ -173,19 +140,40 @@ _ProcessVertexList:
! REGISTER SETUP
mov r4,r14
mov r4,r13
!mov.l .CMD_MSK,r12
mov.l .PVR_EOL,r11
mov.l .PVR_VTX,r10
mov.l .CLIPFUNC,r12
mov.l .PVR_EOL, r11
mov.l .PVR_VTX, r10
mov r5,r9
bra SUBMIT_LOOP
mov r6,r8
! Handles a non-vertex command
DO_CMD:
mov.l .L37,r1
jsr @r1
mov r13,r4
mov r13,r4 ! r4 = CUR
mov r1,r0 ! r0 = MASK
cmp/eq #35,r0 ! T = MASK == 0x23
bt.s 9f ! if (T) goto 9;
nop
! PowerVR GPU command
PushVertex REG_V0
bra NEXT_ITER
nop
! Viewport update command
9:
add #4,r4
mov.l .VP_1,r2
! Load VIEWPORT registers
fmov.s @r4+, fr8 ! VIEWPORT_HWIDTH = src->x
fmov.s @r4+, fr9 ! VIEWPORT_HHEIGHT = src->y
fmov.s @r4+,fr10 ! VIEWPORT_X_PLUS_HWIDTH = src->z
add #16,r2
fmov.s @r4+,fr11 ! VIEWPORT_Y_PLUS_HHEIGHT = src->u
! And store to vp global
fmov.s fr11,@-r2
fmov.s fr10,@-r2
fmov.s fr9,@-r2
bra NEXT_ITER
fmov.s fr8,@-r2
SUBMIT_LOOP:
mov.l @r13,r0 ! FLAGS = CUR->flags
@ -250,8 +238,8 @@ NEXT_ITER:
.long 0xE0000000
.PVR_EOL:
.long 0xF0000000
.L37:
.long _HandleCommand
.CLIPFUNC:
.long _ClipLine
! CASES table holds the functions to transfer a quad,
! based on the visibility clipflags of the 4 vertices
@ -262,12 +250,12 @@ NEXT_ITER:
.long _Case_0_0_1_0
.long _Case_0_0_1_1
.long _Case_0_1_0_0
.long _Case_0_1_0_1
.long _arch_exit ! V0_VIS | V2_VIS, Should never happen
.long _Case_0_1_1_0
.long _Case_0_1_1_1
.long _Case_1_0_0_0
.long _Case_1_0_0_1
.long _Case_1_0_1_0
.long _arch_exit ! V1_VIS | V3_VIS, Should never happen
.long _Case_1_0_1_1
.long _Case_1_1_0_0
.long _Case_1_1_0_1

View File

@ -8,7 +8,7 @@
#define SQ_BASE_ADDRESS (void*) 0xe0000000
#define PREFETCH(addr) __builtin_prefetch((addr))
static Viewport vp;
Viewport vp;
GL_FORCE_INLINE float _glFastInvert(float x) {
return MATH_fsrra(x * x);