From 8621415f501b9bbf9377027565491f3bccbe63f9 Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Tue, 14 May 2024 17:52:02 +1000 Subject: [PATCH] Dreamcast: Move clipflag calculation into vertex transform code --- .gitignore | 2 + misc/dreamcast/DrawColouredQuads.S | 139 +++++++++--------- misc/dreamcast/DrawTexturedQuads.S | 196 ++++++++++++++------------ misc/dreamcast/ViewportTransform.S | 74 ++++++++++ src/Graphics_Dreamcast.c | 36 ++++- third_party/gldc/src/aligned_vector.h | 5 +- third_party/gldc/src/draw.c | 27 +--- third_party/gldc/src/sh4.c | 15 +- 8 files changed, 291 insertions(+), 203 deletions(-) diff --git a/.gitignore b/.gitignore index 48865746a..550acea84 100644 --- a/.gitignore +++ b/.gitignore @@ -97,6 +97,8 @@ CMakeCache.txt #GCC object files *.o +# Build dependency files +*.d # Roslyn cache directories *.ide/ diff --git a/misc/dreamcast/DrawColouredQuads.S b/misc/dreamcast/DrawColouredQuads.S index fb298bd00..715402afc 100644 --- a/misc/dreamcast/DrawColouredQuads.S +++ b/misc/dreamcast/DrawColouredQuads.S @@ -1,7 +1,7 @@ -!r0 = prefetch address -!r1 = GPU VERT command -!r2 = GPU EOS command -!r3 = colour +!r0 = clip flags +!r1 = GPU command +!r2 = temp +!r3 = prefetch address !r4 = src pointer ARG !r5 = dst pointer ARG !r6 = quads count ARG @@ -10,117 +10,124 @@ !fr5 = y !fr6 = z !fr7 = w -!fr9 = 0 -!fr10 = ? -!fr11 = ? +!fr9 = u (0.0) +!fr10 = v (0.0) +!fr11 = saved fr0 (for viewport transform) !fv4 = XYZW -.globl _DrawColouredQuads +#include "ViewportTransform.S" +.global _DrawColouredQuads .align 4 .type _DrawColouredQuads,%function -! See https://shared-ptr.com/sh_insns.html -! Although there is fmov.s @Rm+,FRn, there isn't fmov.s FRn,@Rm+ -! So have to use fmov.s FRn,@-Rm instead - _DrawColouredQuads: ! Setup - mov.l CMD_COL_VERT, r1 ! r1 = GPU VERT command - mov.l CMD_COL_EOS, r2 ! r2 = GPU EOS command - fldi0 fr9 ! fr9 = 0 - mov r4,r0 ! r0 = src - add #-32, r5 ! r5 -= sizeof(VERTEX) - nop ! align loop + fldi0 fr9 ! U = 0 + fldi0 fr10 ! V = 0 + mov r4,r3 ! r3 = src + add #-32, r5 ! r5 -= sizeof(VERTEX) + fmov fr0,fr11 ! fr11 = saved fr0 + nop ! loop align .TRANSFORM_QUAD: + mov.l CMD_COL_VERT, r1 ! r1 = GPU VERT command + ! TRANSFORM VERTEX 1 - add #16, r0 ! r0 += VERTEX_STRIDE + add #16, r3 ! r3 += VERTEX_STRIDE add #64, r5 ! r5 += 2 * sizeof(VERTEX) - pref @r0 ! PREFETCH r0 (next vertex) + pref @r3 ! PREFETCH r3 (next vertex) fmov @r4+, fr4 ! fr4 = src->x fmov @r4+, fr5 ! fr5 = src->y fmov @r4+, fr6 ! fr6 = src->z fldi1 fr7 ! fr7 = 1.0 ftrv xmtrx, fv4 ! TRANSFORM(fr4..fr7) - mov.l @r4+,r3 ! r3 = src->color - fmov.s fr7,@-r5 ! dst->w = fr7 - mov.l r3,@-r5 ! dst->bgra = r3 - fmov.s fr9,@-r5 ! dst->v = 0.0 - fmov.s fr9,@-r5 ! dst->u = 0.0 - fmov.s fr6,@-r5 ! dst->z = fr6 - fmov.s fr5,@-r5 ! dst->y = fr5 - fmov.s fr4,@-r5 ! dst->x = fr4 - mov.l r1,@-r5 ! dst->flags = CMD_VERT + fmov @r4+,fr8 ! C = src->color + ProcessVertex1 ! TRANSFORM VERTEX 2 - add #16, r0 ! r0 += VERTEX_STRIDE + add #16, r3 ! r3 += VERTEX_STRIDE add #64, r5 ! r5 += 2 * sizeof(VERTEX) - pref @r0 ! PREFETCH r0 (next vertex) + pref @r3 ! PREFETCH r3 (next vertex) fmov @r4+, fr4 ! fr4 = src->x fmov @r4+, fr5 ! fr5 = src->y fmov @r4+, fr6 ! fr6 = src->z fldi1 fr7 ! fr7 = 1.0 ftrv xmtrx, fv4 ! TRANSFORM(fr4..fr7) - mov.l @r4+,r3 ! r3 = src->color - fmov.s fr7,@-r5 ! dst->w = fr7 - mov.l r3,@-r5 ! dst->bgra = r3 - fmov.s fr9,@-r5 ! dst->v = 0.0 - fmov.s fr9,@-r5 ! dst->u = 0.0 - fmov.s fr6,@-r5 ! dst->z = fr6 - fmov.s fr5,@-r5 ! dst->y = fr5 - fmov.s fr4,@-r5 ! dst->x = fr4 - mov.l r1,@-r5 ! dst->flags = CMD_VERT + fmov @r4+,fr8 ! C = src->color + ProcessVertex2 ! TRANSFORM VERTEX 3 - add #16, r0 ! r0 += VERTEX_STRIDE + add #16, r3 ! r3 += VERTEX_STRIDE add #64, r5 ! r5 += 2 * sizeof(VERTEX) - pref @r0 ! PREFETCH r0 (next vertex) + pref @r3 ! PREFETCH r3 (next vertex) fmov @r4+, fr4 ! fr4 = src->x fmov @r4+, fr5 ! fr5 = src->y fmov @r4+, fr6 ! fr6 = src->z fldi1 fr7 ! fr7 = 1.0 ftrv xmtrx, fv4 ! TRANSFORM(fr4..fr7) - mov.l @r4+,r3 ! r3 = src->color - fmov.s fr7,@-r5 ! dst->w = fr7 - mov.l r3,@-r5 ! dst->bgra = r3 - fmov.s fr9,@-r5 ! dst->v = 0.0 - fmov.s fr9,@-r5 ! dst->u = 0.0 - fmov.s fr6,@-r5 ! dst->z = fr6 - fmov.s fr5,@-r5 ! dst->y = fr5 - fmov.s fr4,@-r5 ! dst->x = fr4 - mov.l r1,@-r5 ! dst->flags = CMD_VERT + fmov @r4+,fr8 ! C = src->color + ProcessVertex3 ! TRANSFORM VERTEX 4 - add #16, r0 ! r0 += VERTEX_STRIDE + add #16, r3 ! r3 += VERTEX_STRIDE add #64, r5 ! r5 += 2 * sizeof(VERTEX) - pref @r0 ! PREFETCH r0 (next vertex) + pref @r3 ! PREFETCH r3 (next vertex) fmov @r4+, fr4 ! fr4 = src->x fmov @r4+, fr5 ! fr5 = src->y fmov @r4+, fr6 ! fr6 = src->z fldi1 fr7 ! fr7 = 1.0 ftrv xmtrx, fv4 ! TRANSFORM(fr4..fr7) - mov.l @r4+,r3 ! r3 = src->color - fmov.s fr7,@-r5 ! dst->w = fr7 - mov.l r3,@-r5 ! dst->bgra = r3 - fmov.s fr9,@-r5 ! dst->v = 0.0 - fmov.s fr9,@-r5 ! dst->u = 0.0 - fmov.s fr6,@-r5 ! dst->z = fr6 - fmov.s fr5,@-r5 ! dst->y = fr5 - fmov.s fr4,@-r5 ! dst->x = fr4 - mov.l r2,@-r5 ! dst->flags = CMD_EOS + fmov @r4+,fr8 ! C = src->color + ProcessVertex4 CMD_COL_EOS - dt r6 ! r6--; T = 1 if r6 == 0, else 0 - bf .TRANSFORM_QUAD ! if T == 0 then goto TRANSFORM_QUAD +! CLIPFLAGS TESTING + cmp/eq #0,r0 ! T = r0 == 0 (all points invisible) + bt/s .NO_POINTS_VISIBLE ! if T goto NO_POINTS_VISIBLE + nop + bra .SOME_POINTS_VISIBLE + nop + +.NO_POINTS_VISIBLE: + bra .LOOP_END ! jump to loop end after executing instruction in delay slot + add #-128, r5 ! r5 -= 4 * sizeof(VERTEX), move back to 1 vertex before start of quad + +.SOME_POINTS_VISIBLE: + +.LOOP_END: + dt r6 ! r6--; T = r6 == 0 + bf .TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD nop - rts ! return - nop + add #32, r5 ! r5 += sizeof(VERTEX) + mov r5,r0 ! r0 = r5 + rts ! return after executing instruction in delay slot + fmov fr11,fr0 ! fr0 = original fr0 .align 2 CMD_COL_VERT: .long 0xe0000000 CMD_COL_EOS: .long 0xf0000000 + +.global _VP_COL_HWIDTH +.type _VP_COL_HWIDTH,%object +.size _VP_COL_HWIDTH,4 +_VP_COL_HWIDTH: .long 0 + +.global _VP_COL_HHEIGHT +.type _VP_COL_HHEIGHT,%object +.size _VP_COL_HHEIGHT,4 +_VP_COL_HHEIGHT: .long 0 + +.global _VP_COL_X_PLUS_HWIDTH +.type _VP_COL_X_PLUS_HWIDTH,%object +.size _VP_COL_X_PLUS_HWIDTH,4 +_VP_COL_X_PLUS_HWIDTH: .long 0 + +.global _VP_COL_Y_PLUS_HHEIGHT +.type _VP_COL_Y_PLUS_HHEIGHT,%object +.size _VP_COL_Y_PLUS_HHEIGHT,4 +_VP_COL_Y_PLUS_HHEIGHT: .long 0 \ No newline at end of file diff --git a/misc/dreamcast/DrawTexturedQuads.S b/misc/dreamcast/DrawTexturedQuads.S index 113553ef1..4b48697f8 100644 --- a/misc/dreamcast/DrawTexturedQuads.S +++ b/misc/dreamcast/DrawTexturedQuads.S @@ -1,7 +1,7 @@ -!r0 = prefetch address -!r1 = GPU VERT command -!r2 = GPU EOS command -!r3 = colour +!r0 = clip flags +!r1 = GPU command +!r2 = temp +!r3 = prefetch address !r4 = src pointer ARG !r5 = dst pointer ARG !r6 = quads count ARG @@ -10,119 +10,131 @@ !fr5 = y !fr6 = z !fr7 = w -!fr9 = ? -!fr10 = u -!fr11 = v +!fr8 = c +!fr9 = u +!fr10 = v +!fr11 = saved fr0 (for viewport transform) !fv4 = XYZW -.globl _DrawTexturedQuads +#include "ViewportTransform.S" +.global _DrawTexturedQuads .align 4 .type _DrawTexturedQuads,%function _DrawTexturedQuads: ! Setup - mov.l CMD_TEX_VERT, r1 ! r1 = GPU VERT command - mov.l CMD_TEX_EOS, r2 ! r2 = GPU EOS command - mov r4,r0 ! r0 = src - add #-32, r5 ! r5 -= sizeof(VERTEX) + mov r4,r3 ! r3 = src + add #-32, r5 ! r5 -= sizeof(VERTEX) + fmov fr0,fr11 ! fr11 = saved fr0 + nop ! loop align .TRANSFORM_QUAD: -! TRANSFORM VERTEX 1 - add #24, r0 ! r0 += VERTEX_STRIDE - add #64, r5 ! r5 += 2 * sizeof(VERTEX) - pref @r0 ! PREFETCH r0 (next vertex) - fmov @r4+, fr4 ! fr4 = src->x - fmov @r4+, fr5 ! fr5 = src->y - fmov @r4+, fr6 ! fr6 = src->z - fldi1 fr7 ! fr7 = 1.0 - ftrv xmtrx, fv4 ! TRANSFORM(fr4..fr7) + mov.l CMD_TEX_VERT, r1 ! r1 = GPU VERT command - mov.l @r4+,r3 ! r3 = src->color - fmov @r4+,fr10! fr10 = src->u - fmov @r4+,fr11! fr11 = src->v - fmov.s fr7,@-r5 ! dst->w = fr7 - mov.l r3,@-r5 ! dst->bgra = r3 - fmov.s fr11,@-r5 ! dst->v = fr11 - fmov.s fr10,@-r5 ! dst->u = fr10 - fmov.s fr6,@-r5 ! dst->z = fr6 - fmov.s fr5,@-r5 ! dst->y = fr5 - fmov.s fr4,@-r5 ! dst->x = fr4 - mov.l r1,@-r5 ! dst->flags = CMD_VERT +! TRANSFORM VERTEX 1 + add #24, r3 ! r3 += VERTEX_STRIDE + add #64, r5 ! r5 += 2 * sizeof(VERTEX) + pref @r3 ! PREFETCH r3 (next vertex) + fmov @r4+, fr4 ! X = src->x + fmov @r4+, fr5 ! Y = src->y + fmov @r4+, fr6 ! Z = src->z + fldi1 fr7 ! W = 1.0 + ftrv xmtrx, fv4 ! TRANSFORM(XYZW) +! OUTPUT + CLIPFLAGS VERTEX 1 + fmov @r4+,fr8 ! C = src->color + fmov @r4+,fr9 ! U = src->u + fmov @r4+,fr10! V = src->v + ProcessVertex1 ! TRANSFORM VERTEX 2 - add #24, r0 ! r0 += VERTEX_STRIDE + add #24, r3 ! r3 += VERTEX_STRIDE add #64, r5 ! r5 += 2 * sizeof(VERTEX) - pref @r0 ! PREFETCH r0 (next vertex) - fmov @r4+, fr4 ! fr4 = src->x - fmov @r4+, fr5 ! fr5 = src->y - fmov @r4+, fr6 ! fr6 = src->z - fldi1 fr7 ! fr7 = 1.0 - ftrv xmtrx, fv4 ! TRANSFORM(fr4..fr7) - - mov.l @r4+,r3 ! r3 = src->color - fmov @r4+,fr10! fr10 = src->u - fmov @r4+,fr11! fr11 = src->v - fmov.s fr7,@-r5 ! dst->w = fr7 - mov.l r3,@-r5 ! dst->bgra = r3 - fmov.s fr11,@-r5 ! dst->v = fr11 - fmov.s fr10,@-r5 ! dst->u = fr10 - fmov.s fr6,@-r5 ! dst->z = fr6 - fmov.s fr5,@-r5 ! dst->y = fr5 - fmov.s fr4,@-r5 ! dst->x = fr4 - mov.l r1,@-r5 ! dst->flags = CMD_VERT + pref @r3 ! PREFETCH r3 (next vertex) + fmov @r4+, fr4 ! X = src->x + fmov @r4+, fr5 ! Y = src->y + fmov @r4+, fr6 ! Z = src->z + fldi1 fr7 ! W = 1.0 + ftrv xmtrx, fv4 ! TRANSFORM(XYZW) +! OUTPUT + CLIPFLAGS VERTEX 2 + fmov @r4+,fr8 ! C = src->color + fmov @r4+,fr9 ! U = src->u + fmov @r4+,fr10! V = src->v + ProcessVertex2 ! TRANSFORM VERTEX 3 - add #24, r0 ! r0 += VERTEX_STRIDE + add #24, r3 ! r3 += VERTEX_STRIDE add #64, r5 ! r5 += 2 * sizeof(VERTEX) - pref @r0 ! PREFETCH r0 (next vertex) - fmov @r4+, fr4 ! fr4 = src->x - fmov @r4+, fr5 ! fr5 = src->y - fmov @r4+, fr6 ! fr6 = src->z - fldi1 fr7 ! fr7 = 1.0 - ftrv xmtrx, fv4 ! TRANSFORM(fr4..fr7) - - mov.l @r4+,r3 ! r3 = src->color - fmov @r4+,fr10! fr10 = src->u - fmov @r4+,fr11! fr11 = src->v - fmov.s fr7,@-r5 ! dst->w = fr7 - mov.l r3,@-r5 ! dst->bgra = r3 - fmov.s fr11,@-r5 ! dst->v = fr11 - fmov.s fr10,@-r5 ! dst->u = fr10 - fmov.s fr6,@-r5 ! dst->z = fr6 - fmov.s fr5,@-r5 ! dst->y = fr5 - fmov.s fr4,@-r5 ! dst->x = fr4 - mov.l r1,@-r5 ! dst->flags = CMD_VERT + pref @r3 ! PREFETCH r3 (next vertex) + fmov @r4+, fr4 ! X = src->x + fmov @r4+, fr5 ! Y = src->y + fmov @r4+, fr6 ! Z = src->z + fldi1 fr7 ! W = 1.0 + ftrv xmtrx, fv4 ! TRANSFORM(XYZW) +! OUTPUT + CLIPFLAGS VERTEX 3 + fmov @r4+,fr8 ! C = src->color + fmov @r4+,fr9 ! U = src->u + fmov @r4+,fr10! V = src->v + ProcessVertex3 ! TRANSFORM VERTEX 4 - add #24, r0 ! r0 += VERTEX_STRIDE + add #24, r3 ! r3 += VERTEX_STRIDE add #64, r5 ! r5 += 2 * sizeof(VERTEX) - pref @r0 ! PREFETCH r0 (next vertex) - fmov @r4+, fr4 ! fr4 = src->x - fmov @r4+, fr5 ! fr5 = src->y - fmov @r4+, fr6 ! fr6 = src->z - fldi1 fr7 ! fr7 = 1.0 - ftrv xmtrx, fv4 ! TRANSFORM(fr4..fr7) + pref @r3 ! PREFETCH r3 (next vertex) + fmov @r4+, fr4 ! X = src->x + fmov @r4+, fr5 ! Y = src->y + fmov @r4+, fr6 ! Z = src->z + fldi1 fr7 ! W = 1.0 + ftrv xmtrx, fv4 ! TRANSFORM(XYZW) +! OUTPUT + CLIPFLAGS VERTEX 4 + fmov @r4+,fr8 ! C = src->color + fmov @r4+,fr9 ! U = src->u + fmov @r4+,fr10! V = src->v + ProcessVertex4 CMD_TEX_EOS - mov.l @r4+,r3 ! r3 = src->color - fmov @r4+,fr10! fr10 = src->u - fmov @r4+,fr11! fr11 = src->v - fmov.s fr7,@-r5 ! dst->w = fr7 - mov.l r3,@-r5 ! dst->bgra = r3 - fmov.s fr11,@-r5 ! dst->v = fr11 - fmov.s fr10,@-r5 ! dst->u = fr10 - fmov.s fr6,@-r5 ! dst->z = fr6 - fmov.s fr5,@-r5 ! dst->y = fr5 - fmov.s fr4,@-r5 ! dst->x = fr4 - mov.l r2,@-r5 ! dst->flags = CMD_EOS +! CLIPFLAGS TESTING + cmp/eq #0,r0 ! T = r0 == 0 (all points invisible) + bt/s .NO_POINTS_VISIBLE ! if T goto NO_POINTS_VISIBLE + nop + bra .SOME_POINTS_VISIBLE + nop - dt r6 ! r6--; T = 1 if r6 == 0, else 0 - bf .TRANSFORM_QUAD ! if T == 0 then goto TRANSFORM_QUAD +.NO_POINTS_VISIBLE: + bra .LOOP_END ! jump to loop end after executing instruction in delay slot + add #-128, r5 ! r5 -= 4 * sizeof(VERTEX), move back to prior quad, so that this invisible quad gets overwritten in next iteration + +.SOME_POINTS_VISIBLE: + +.LOOP_END: + dt r6 ! r6--; T = r6 == 0 + bf .TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD nop - rts ! return - nop + add #32, r5 ! r5 += sizeof(VERTEX) + mov r5,r0 ! r0 = r5 + rts ! return after executing instruction in delay slot + fmov fr11,fr0 ! fr0 = original fr0 .align 2 CMD_TEX_VERT: .long 0xe0000000 CMD_TEX_EOS: .long 0xf0000000 + +.global _VP_TEX_HWIDTH +.type _VP_TEX_HWIDTH,%object +.size _VP_TEX_HWIDTH,4 +_VP_TEX_HWIDTH: .long 0 + +.global _VP_TEX_HHEIGHT +.type _VP_TEX_HHEIGHT,%object +.size _VP_TEX_HHEIGHT,4 +_VP_TEX_HHEIGHT: .long 0 + +.global _VP_TEX_X_PLUS_HWIDTH +.type _VP_TEX_X_PLUS_HWIDTH,%object +.size _VP_TEX_X_PLUS_HWIDTH,4 +_VP_TEX_X_PLUS_HWIDTH: .long 0 + +.global _VP_TEX_Y_PLUS_HHEIGHT +.type _VP_TEX_Y_PLUS_HHEIGHT,%object +.size _VP_TEX_Y_PLUS_HHEIGHT,4 +_VP_TEX_Y_PLUS_HHEIGHT: .long 0 \ No newline at end of file diff --git a/misc/dreamcast/ViewportTransform.S b/misc/dreamcast/ViewportTransform.S index 514eda6c3..4656920d0 100644 --- a/misc/dreamcast/ViewportTransform.S +++ b/misc/dreamcast/ViewportTransform.S @@ -1,3 +1,77 @@ +! ========================================================= +! ========================= VERTEX OUTPUT ================= +! ========================================================= +! To take advantage of SH4 dual instruction processing, interleave +! the clipflag calculation and vertex output instructions +.macro ProcessVertex1 + fmov.s fr7,@-r5 ! dst->w = W + fmov.s fr8,@-r5 ! dst->c = C + fneg fr7 ! W = -W + fmov.s fr10,@-r5 ! dst->v = V + fcmp/gt fr7,fr6 ! T = Z > W (i.e. Z > -W) + fmov.s fr9,@-r5 ! dst->u = U + movt r0 ! CLIPFLAGS = T + fmov.s fr6,@-r5 ! dst->z = Z + fmov.s fr5,@-r5 ! dst->y = Y + fmov.s fr4,@-r5 ! dst->x = X + mov.l r1,@-r5 ! dst->flags = CMD_VERT +.endm + +.macro ProcessVertex2 + fmov.s fr7,@-r5 ! dst->w = W + fmov.s fr8,@-r5 ! dst->c = C + fneg fr7 ! W = -W + fmov.s fr10,@-r5 ! dst->v = V + fcmp/gt fr7,fr6 ! T = Z > W (i.e. Z > -W) + fmov.s fr9,@-r5 ! dst->u = U + movt r2 ! tmp = T + fmov.s fr6,@-r5 ! dst->z = Z + add r2,r2 ! tmp = tmp + tmp + fmov.s fr5,@-r5 ! dst->y = Y + or r2,r0 ! CLIPFLAGS |= tmp (T << 1) + fmov.s fr4,@-r5 ! dst->x = X + mov.l r1,@-r5 ! dst->flags = CMD_VERT +.endm + +.macro ProcessVertex3 + fmov.s fr7,@-r5 ! dst->w = W + fmov.s fr8,@-r5 ! dst->c = C + fneg fr7 ! W = -W + fmov.s fr10,@-r5 ! dst->v = V + fcmp/gt fr7,fr6 ! T = Z > W (i.e. Z > -W) + fmov.s fr9,@-r5 ! dst->u = U + movt r2 ! tmp = T + fmov.s fr6,@-r5 ! dst->z = Z + fmov.s fr5,@-r5 ! dst->y = Y + shll2 r2 ! tmp = tmp << 2 + fmov.s fr4,@-r5 ! dst->x = X + or r2,r0 ! CLIPFLAGS |= tmp (T << 2) + mov.l r1,@-r5 ! dst->flags = CMD_VERT +.endm + +.macro ProcessVertex4 eos_addr + fmov.s fr7,@-r5 ! dst->w = W + fmov.s fr8,@-r5 ! dst->c = C + fneg fr7 ! W = -W + fmov.s fr10,@-r5 ! dst->v = V + fcmp/gt fr7,fr6 ! T = Z > W (i.e. Z > -W) + fmov.s fr9,@-r5 ! dst->u = U + movt r2 ! tmp = T + fmov.s fr6,@-r5 ! dst->z = Z + shll2 r2 ! tmp = tmp << 2 + fmov.s fr5,@-r5 ! dst->y = Y + add r2,r2 ! tmp = (tmp << 2) + (tmp << 2) + fmov.s fr4,@-r5 ! dst->x = X + mov.l \eos_addr, r1 ! r1 = GPU EOS command + or r2,r0 ! CLIPFLAGS |= tmp (T << 3) + or r0,r1 ! r1 |= CLIPFLAGS + mov.l r1,@-r5 ! dst->flags = GPU EOS | CLIPFLAGS +.endm + + +! ========================================================= +! ====================== VIEWPORT TRANSFORM =============== +! ========================================================= !r2 = return addr !r0 = temp !r5 = dst pointer diff --git a/src/Graphics_Dreamcast.c b/src/Graphics_Dreamcast.c index 900fa7709..d7f82ebac 100644 --- a/src/Graphics_Dreamcast.c +++ b/src/Graphics_Dreamcast.c @@ -467,19 +467,34 @@ cc_bool Gfx_WarnIfNecessary(void) { *#########################################################################################################################*/ #define VB_PTR gfx_vertices static const void* VERTEX_PTR; +extern void apply_poly_header(PolyHeader* header, PolyList* activePolyList); -extern void DrawColouredQuads(const void* src, Vertex* dst, int numQuads); -extern void DrawTexturedQuads(const void* src, Vertex* dst, int numQuads); +extern Vertex* DrawColouredQuads(const void* src, Vertex* dst, int numQuads); +extern Vertex* DrawTexturedQuads(const void* src, Vertex* dst, int numQuads); void DrawQuads(int count) { if (!count) return; - Vertex* start = submitVertices(count); + PolyList* output = _glActivePolyList(); + AlignedVectorHeader* hdr = &output->vector.hdr; + + uint32_t header_required = (hdr->size == 0) || STATE_DIRTY; + // Reserve room for the vertices and header + Vertex* beg = aligned_vector_reserve(&output->vector, hdr->size + (header_required) + count); + + if (header_required) { + apply_poly_header((PolyHeader*)beg, output); + STATE_DIRTY = GL_FALSE; + beg++; + hdr->size += 1; + } + Vertex* end; if (TEXTURES_ENABLED) { - DrawTexturedQuads(VERTEX_PTR, start, count >> 2); + end = DrawTexturedQuads(VERTEX_PTR, beg, count >> 2); } else { - DrawColouredQuads(VERTEX_PTR, start, count >> 2); + end = DrawColouredQuads(VERTEX_PTR, beg, count >> 2); } + hdr->size += (end - beg); } void Gfx_SetVertexFormat(VertexFormat fmt) { @@ -569,6 +584,11 @@ void Gfx_OnWindowResize(void) { Gfx_SetViewport(0, 0, Game.Width, Game.Height); } +extern float VP_COL_HWIDTH, VP_TEX_HWIDTH; +extern float VP_COL_HHEIGHT, VP_TEX_HHEIGHT; +extern float VP_COL_X_PLUS_HWIDTH, VP_TEX_X_PLUS_HWIDTH; +extern float VP_COL_Y_PLUS_HHEIGHT, VP_TEX_Y_PLUS_HHEIGHT; + void Gfx_SetViewport(int x, int y, int w, int h) { if (x == 0 && y == 0 && w == Game.Width && h == Game.Height) { glDisable(GL_SCISSOR_TEST); @@ -578,5 +598,11 @@ void Gfx_SetViewport(int x, int y, int w, int h) { glViewport(x, y, w, h); glScissor (x, y, w, h); + + VP_COL_HWIDTH = VP_TEX_HWIDTH = w * 0.5f; + VP_COL_HHEIGHT = VP_TEX_HHEIGHT = h * -0.5f; + + VP_COL_X_PLUS_HWIDTH = VP_TEX_X_PLUS_HWIDTH = x + w * 0.5f; + VP_COL_Y_PLUS_HHEIGHT = VP_TEX_Y_PLUS_HHEIGHT = y + h * 0.5f; } #endif diff --git a/third_party/gldc/src/aligned_vector.h b/third_party/gldc/src/aligned_vector.h index 3367be00f..7b152dbe0 100644 --- a/third_party/gldc/src/aligned_vector.h +++ b/third_party/gldc/src/aligned_vector.h @@ -83,13 +83,12 @@ AV_FORCE_INLINE void* aligned_vector_at(const AlignedVector* vector, const uint3 AV_FORCE_INLINE void* aligned_vector_reserve(AlignedVector* vector, uint32_t element_count) { AlignedVectorHeader* hdr = &vector->hdr; + uint32_t original_byte_size = (hdr->size * AV_ELEMENT_SIZE); if(element_count < hdr->capacity) { - return aligned_vector_at(vector, element_count); + return vector->data + original_byte_size; } - uint32_t original_byte_size = (hdr->size * AV_ELEMENT_SIZE); - /* We overallocate so that we don't make small allocations during push backs */ element_count = ROUND_TO_CHUNK_SIZE(element_count); diff --git a/third_party/gldc/src/draw.c b/third_party/gldc/src/draw.c index da6efd8ae..417c6c99a 100644 --- a/third_party/gldc/src/draw.c +++ b/third_party/gldc/src/draw.c @@ -1,28 +1,3 @@ #include #include "private.h" -#include "platform.h" - -extern void apply_poly_header(PolyHeader* header, PolyList* activePolyList); - -GL_FORCE_INLINE Vertex* submitVertices(GLuint vertexCount) { - TRACE(); - PolyList* output = _glActivePolyList(); - uint32_t header_offset; - uint32_t start_offset; - - uint32_t vector_size = aligned_vector_size(&output->vector); - GLboolean header_required = (vector_size == 0) || STATE_DIRTY; - - header_offset = vector_size; - start_offset = header_offset + (header_required ? 1 : 0); - - /* Make room for the vertices and header */ - aligned_vector_extend(&output->vector, (header_required) + vertexCount); - gl_assert(header_offset < aligned_vector_size(&output->vector)); - - if (header_required) { - apply_poly_header(aligned_vector_at(&output->vector, header_offset), output); - STATE_DIRTY = GL_FALSE; - } - return aligned_vector_at(&output->vector, start_offset); -} +#include "platform.h" \ No newline at end of file diff --git a/third_party/gldc/src/sh4.c b/third_party/gldc/src/sh4.c index a921f8d01..9060c8bac 100644 --- a/third_party/gldc/src/sh4.c +++ b/third_party/gldc/src/sh4.c @@ -465,7 +465,7 @@ void SceneListSubmit(Vertex* v3, int n) { for(int i = 0; i < n; ++i, ++v3) { PREFETCH(v3 + 1); - switch(v3->flags) { + switch(v3->flags & 0xFF000000) { case GPU_CMD_VERTEX_EOL: break; case GPU_CMD_VERTEX: @@ -480,12 +480,8 @@ void SceneListSubmit(Vertex* v3, int n) { Vertex* const v1 = v3 - 2; Vertex* const v2 = v3 - 1; - visible_mask = ( - (v0->xyz[2] > -v0->w) << 0 | - (v1->xyz[2] > -v1->w) << 1 | - (v2->xyz[2] > -v2->w) << 2 | - (v3->xyz[2] > -v3->w) << 3 - ); + visible_mask = v3->flags & 0xFF; + v3->flags &= ~0xFF; // Stats gathering found that when testing a 64x64x64 sized world, at most // ~400-500 triangles needed clipping @@ -505,7 +501,7 @@ void SceneListSubmit(Vertex* v3, int n) { _glPerspectiveDivideVertex(v2); _glPushHeaderOrVertex(v2); - + _glPerspectiveDivideVertex(v0); _glPushHeaderOrVertex(v0); @@ -514,9 +510,6 @@ void SceneListSubmit(Vertex* v3, int n) { } break; - case 0: // No vertices visible - break; - default: // Some vertices visible SubmitClipped(v0, v1, v2, v3, visible_mask); break;