From 06ac94db53be991b9158c64be9b0f00db490a10e Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Sun, 7 Jul 2024 17:43:23 +1000 Subject: [PATCH] Dreamcast: Few minor assembly improvements --- .github/workflows/build_dreamcast.yml | 2 +- misc/dreamcast/VertexClip.S | 77 +++++++++++++++------------ misc/dreamcast/VertexDraw.S | 76 +++++++++++--------------- third_party/gldc/src/sh4.c | 2 +- 4 files changed, 77 insertions(+), 80 deletions(-) diff --git a/.github/workflows/build_dreamcast.yml b/.github/workflows/build_dreamcast.yml index b6cb02dbc..a4603e786 100644 --- a/.github/workflows/build_dreamcast.yml +++ b/.github/workflows/build_dreamcast.yml @@ -20,12 +20,12 @@ jobs: - uses: actions/checkout@v4 - name: Compile Dreamcast build id: compile + shell: bash run: | apt-get update apt-get -y install genisoimage wget https://github.com/ClassiCube/rpi-compiling-stuff/raw/main/cdi4dc -O /opt/toolchains/dc/kos/utils/cdi4dc chmod +x /opt/toolchains/dc/kos/utils/cdi4dc - source /opt/toolchains/dc/kos/environ.sh export PATH=/opt/toolchains/dc/kos/utils/:$PATH make dreamcast diff --git a/misc/dreamcast/VertexClip.S b/misc/dreamcast/VertexClip.S index 02e5a20ba..19b58ba4d 100644 --- a/misc/dreamcast/VertexClip.S +++ b/misc/dreamcast/VertexClip.S @@ -2,10 +2,19 @@ ! r1 CLOBBERS ! r2 CLOBBERS ! r3 CLOBBERS +! r7 CLOBBERS ! r4 = v1 ! r5 = v2 ! r6 = OUT +#define TM1 r1 +#define TM2 r3 +#define TM3 r7 + +#define IN1 r4 +#define IN2 r5 +#define OUT r6 + ! FR0 = 0 ! FR1 = 0 ! FR2 = A.1 @@ -48,13 +57,13 @@ .align 4 .type _ClipLine,%function _ClipLine: - mov r4, r1 ! MT, r1 = &v1 + mov IN1, TM1 ! MT, tmp = &v1 fldi0 fr4 ! LS, fr4 = 0 - add #12, r1 ! EX, r1 = &v1->z + add #12, TM1 ! EX, tmp = &v1->z fmov.s @r1, fr2 ! LS, fr2 = v1->z - mov r5, r1 ! MT, r1 = &v2 + mov IN2, TM1 ! MT, tmp = &v2 fldi0 fr5 ! LS, fr5 = 0 - add #12, r1 ! EX, r1 = &v2->z + add #12, TM1 ! EX, tmp = &v2->z fmov.s @r1,fr11 ! LS, fr11 = v2->z fsub fr2,fr11 ! FE, fr11 = v2->z - v1->z fldi0 fr8 ! LS, fr8 = 0 @@ -65,46 +74,46 @@ _ClipLine: fsrra fr11 ! FE, fr11 = 1 / abs(v2->z - v1->z) fabs fr2 ! LS, fr2 = abs(v1->z) fmul fr2,fr11 ! FE, fr11 = abs(v1->Z) / abs(v2->z - v1->z) --> t - add #4, r4 ! EX, A = &v1->x + add #4, IN1 ! EX, A = &v1->x fldi1 fr10 ! LS, fr10 = 1 - add #4, r5 ! EX, B = &v2->x - add #4, r6 ! EX, OUT = &OUT->x + add #4, IN2 ! EX, B = &v2->x + add #4, OUT ! EX, OUT = &OUT->x fsub fr11,fr10 ! FE, fr10 = 1.0 - t --> invT - fmov.s @r4+, fr2 ! LS, A1 = v1->x - fmov.s @r5+, fr3 ! LS, B1 = v2->x + fmov.s @IN1+, fr2 ! LS, A1 = v1->x + fmov.s @IN2+, fr3 ! LS, B1 = v2->x fipr fv8, fv0 ! FE, LERP(A1, B1) - fmov.s @r4+, fr6 ! LS, A2 = v1->y - fmov.s @r5+, fr7 ! LS, B2 = v2->y + fmov.s @IN1+, fr6 ! LS, A2 = v1->y + fmov.s @IN2+, fr7 ! LS, B2 = v2->y - fmov.s fr3, @r6 ! LS, OUT->x = LERP - add #4, r6 ! EX, OUT += 4 + fmov.s fr3,@OUT ! LS, OUT->x = LERP + add #4, OUT ! EX, OUT += 4 fipr fv8, fv4 ! FE, LERP(A2, B2) - add #4, r4 ! EX, v1 += 4 - add #4, r5 ! EX, v2 += 4 + add #4, IN1 ! EX, v1 += 4 + add #4, IN2 ! EX, v2 += 4 - fmov.s fr7, @r6 ! LS, OUT->y = LERP - add #4, r6 ! EX, OUT += 4 - fmov.s fr1, @r6 ! LS, OUT->z = 0 - add #4, r6 ! EX, OUT += 4 + fmov.s fr7,@OUT ! LS, OUT->y = LERP + add #4, OUT ! EX, OUT += 4 + fmov.s fr1,@OUT ! LS, OUT->z = 0 + add #4, OUT ! EX, OUT += 4 - fmov.s @r4+, fr2 ! LS, A1 = v1->u - fmov.s @r5+, fr3 ! LS, B1 = v2->u + fmov.s @IN1+, fr2 ! LS, A1 = v1->u + fmov.s @IN2+, fr3 ! LS, B1 = v2->u fipr fv8, fv0 ! FE, LERP(A1, B1) - fmov.s @r4+, fr6 ! LS, A2 = v1->v - fmov.s @r5+, fr7 ! LS, B2 = v2->v + fmov.s @IN1+, fr6 ! LS, A2 = v1->v + fmov.s @IN2+, fr7 ! LS, B2 = v2->v - fmov.s fr3, @r6 ! LS, OUT->u = LERP - add #4, r6 ! EX, OUT += 4 + fmov.s fr3,@OUT ! LS, OUT->u = LERP + add #4, OUT ! EX, OUT += 4 fipr fv8, fv4 ! FE, LERP(A2, B2) - fmov.s fr7, @r6 ! LS, OUT->v = LERP - add #4, r6 ! EX, OUT += 4 + fmov.s fr7,@OUT ! LS, OUT->v = LERP + add #4, OUT ! EX, OUT += 4 - mov.l @r4+,r0 ! LS, ACOLOR = v1->bgra + mov.l @IN1+,r0 ! LS, ACOLOR = v1->bgra extu.b r0,r1 ! EX, tmp = ACOLOR.b lds r1,fpul ! CO, FPUL = tmp float fpul,fr2 ! EX, fr2 = float(FPUL) - mov.l @r5+,r2 ! LS, BCOLOR = v2->bgra + mov.l @IN2+,r2 ! LS, BCOLOR = v2->bgra extu.b r2,r3 ! EX, tmp = BCOLOR.b lds r3,fpul ! CO, FPUL = tmp float fpul,fr3 ! EX, fr3 = float(FPUL) @@ -159,11 +168,11 @@ _ClipLine: shll16 r3 ! EX, tmp <<= 16 shll8 r3 ! EX, tmp <<= 8 or r3,r7 ! EX, OUTCOLOR.a |= tmp - mov.l r7, @r6 ! LS, OUT->color = OUTCOLOR + mov.l r7,@OUT ! LS, OUT->color = OUTCOLOR - fmov.s @r4+,fr2 ! LS, A1 = v1->w - fmov.s @r5+,fr3 ! LS, B1 = v2->w + fmov.s @IN1+,fr2 ! LS, A1 = v1->w + fmov.s @IN2+,fr3 ! LS, B1 = v2->w fipr fv8, fv0 ! FE, LERP(A1, B1) - add #4, r6 ! EX, OUT += 4 + add #4, OUT ! EX, OUT += 4 rts ! CO, return after executing instruction in delay slot - fmov.s fr3, @r6 ! LS, OUT->w = lerp + fmov.s fr3,@OUT ! LS, OUT->w = lerp diff --git a/misc/dreamcast/VertexDraw.S b/misc/dreamcast/VertexDraw.S index a707796de..4508afd2a 100644 --- a/misc/dreamcast/VertexDraw.S +++ b/misc/dreamcast/VertexDraw.S @@ -2,7 +2,7 @@ ! r9 = num vertices left ! r10 = PVR_CMD_VERTEX ! r11 = PVR_CMD_VERTEX_EOL -! r12 = ?????? +! r12 = ClipLine function ! r13 = cur vertex ! r14 = next vertex (prefetch) @@ -80,39 +80,6 @@ .endm -! Handles a viewport update or PowerVR GPU command -! CLOBBERS: r0, r2, r3 -! INPUTS: r4, r8 (SQ global) -! OUTPUTS: r4,r8 altered, fr8-fr12 -_HandleCommand: - mov.l @REG_V0,r2 ! r2 = v->flags - extu.b r2,r0 ! r2 = v->flags & 0xFF - cmp/eq #35,r0 ! T = r2 == 0x23 - bt.s 1f ! if (T) goto 1; - nop - PushVertex REG_V0 - rts - nop -1: - add #4,r4 - mov.l .VP_0,r2 - ! Load VIEWPORT registers - fmov.s @r4+, fr8 ! VIEWPORT_HWIDTH = src->x - fmov.s @r4+, fr9 ! VIEWPORT_HHEIGHT = src->y - fmov.s @r4+,fr10 ! VIEWPORT_X_PLUS_HWIDTH = src->z - add #16,r2 - fmov.s @r4+,fr11 ! VIEWPORT_Y_PLUS_HHEIGHT = src->u - ! And store to vp global - fmov.s fr11,@-r2 - fmov.s fr10,@-r2 - fmov.s fr9,@-r2 - rts - fmov.s fr8,@-r2 -.align 4 -.VP_0: - .long _vp - - _Case_0_0_0_1: _Case_0_0_1_0: _Case_0_0_1_1: @@ -173,19 +140,40 @@ _ProcessVertexList: ! REGISTER SETUP mov r4,r14 mov r4,r13 - !mov.l .CMD_MSK,r12 - mov.l .PVR_EOL,r11 - mov.l .PVR_VTX,r10 + mov.l .CLIPFUNC,r12 + mov.l .PVR_EOL, r11 + mov.l .PVR_VTX, r10 mov r5,r9 bra SUBMIT_LOOP mov r6,r8 +! Handles a non-vertex command DO_CMD: - mov.l .L37,r1 - jsr @r1 - mov r13,r4 + mov r13,r4 ! r4 = CUR + mov r1,r0 ! r0 = MASK + cmp/eq #35,r0 ! T = MASK == 0x23 + bt.s 9f ! if (T) goto 9; + nop +! PowerVR GPU command + PushVertex REG_V0 bra NEXT_ITER nop +! Viewport update command +9: + add #4,r4 + mov.l .VP_1,r2 + ! Load VIEWPORT registers + fmov.s @r4+, fr8 ! VIEWPORT_HWIDTH = src->x + fmov.s @r4+, fr9 ! VIEWPORT_HHEIGHT = src->y + fmov.s @r4+,fr10 ! VIEWPORT_X_PLUS_HWIDTH = src->z + add #16,r2 + fmov.s @r4+,fr11 ! VIEWPORT_Y_PLUS_HHEIGHT = src->u + ! And store to vp global + fmov.s fr11,@-r2 + fmov.s fr10,@-r2 + fmov.s fr9,@-r2 + bra NEXT_ITER + fmov.s fr8,@-r2 SUBMIT_LOOP: mov.l @r13,r0 ! FLAGS = CUR->flags @@ -250,8 +238,8 @@ NEXT_ITER: .long 0xE0000000 .PVR_EOL: .long 0xF0000000 -.L37: - .long _HandleCommand +.CLIPFUNC: + .long _ClipLine ! CASES table holds the functions to transfer a quad, ! based on the visibility clipflags of the 4 vertices @@ -262,12 +250,12 @@ NEXT_ITER: .long _Case_0_0_1_0 .long _Case_0_0_1_1 .long _Case_0_1_0_0 - .long _Case_0_1_0_1 + .long _arch_exit ! V0_VIS | V2_VIS, Should never happen .long _Case_0_1_1_0 .long _Case_0_1_1_1 .long _Case_1_0_0_0 .long _Case_1_0_0_1 - .long _Case_1_0_1_0 + .long _arch_exit ! V1_VIS | V3_VIS, Should never happen .long _Case_1_0_1_1 .long _Case_1_1_0_0 .long _Case_1_1_0_1 diff --git a/third_party/gldc/src/sh4.c b/third_party/gldc/src/sh4.c index fc1c46c8d..e19fa84c8 100644 --- a/third_party/gldc/src/sh4.c +++ b/third_party/gldc/src/sh4.c @@ -8,7 +8,7 @@ #define SQ_BASE_ADDRESS (void*) 0xe0000000 #define PREFETCH(addr) __builtin_prefetch((addr)) -static Viewport vp; +Viewport vp; GL_FORCE_INLINE float _glFastInvert(float x) { return MATH_fsrra(x * x);