From e493d806716f9f440dc55235da990826b398320a Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Sun, 23 Jun 2024 13:00:46 +1000 Subject: [PATCH] PS2: Draw coloured vertices in same way as textured ones --- misc/ps2/VertexTransform.S | 108 +++++++++++++++++++++++++++---------- src/Graphics_PS2.c | 69 +++--------------------- src/Platform_Dreamcast.c | 1 + 3 files changed, 87 insertions(+), 91 deletions(-) diff --git a/misc/ps2/VertexTransform.S b/misc/ps2/VertexTransform.S index b24dd70f2..8ae49a2fa 100644 --- a/misc/ps2/VertexTransform.S +++ b/misc/ps2/VertexTransform.S @@ -15,6 +15,8 @@ .type LoadClipScaleFactors,%function .global TransformTexturedQuad .type TransformTexturedQuad,%function +.global TransformColouredQuad +.type TransformColouredQuad,%function # Loads matrix into VU0 registers # $a0 = addresss of mvp @@ -36,6 +38,7 @@ LoadClipScaleFactors: .macro TransformVertex1 + # TRANSFORM VERTEX 1 lqc2 $vf10, 0x00($a2) # IN = tmp vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) vmaddax $ACC, $vf1, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x @@ -47,6 +50,11 @@ LoadClipScaleFactors: .endm .macro TransformVertex2 + # STORE CLIP FLAGS 1 RESULT + cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] + sw $t0,0x00($a3) # clip_flags[0] = t0 + + # TRANSFORM VERTEX 2 lqc2 $vf12, 0x00($a2) # IN = tmp vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) vmaddax $ACC, $vf1, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x @@ -58,6 +66,11 @@ LoadClipScaleFactors: .endm .macro TransformVertex3 + # STORE CLIP FLAGS 2 RESULT + cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] + sw $t0,0x04($a3) # clip_flags[1] = t0 + + # TRANSFORM VERTEX 3 lqc2 $vf14, 0x00($a2) # IN = tmp vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) vmaddax $ACC, $vf1, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x @@ -69,6 +82,11 @@ LoadClipScaleFactors: .endm .macro TransformVertex4 + # STORE CLIP FLAGS 3 RESULT + cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] + sw $t0,0x08($a3) # clip_flags[2] = t0 + + # TRANSFORM VERTEX 4 lqc2 $vf16, 0x00($a2) # IN = tmp vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) vmaddax $ACC, $vf1, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x @@ -78,6 +96,25 @@ LoadClipScaleFactors: vclipw.xyz $vf16, $vf16 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) .endm +.macro TransformFinish + # Vertex output + # dst[0] = V0 (done by TransformVertex1) + # dst[1] = V1 (done by TransformVertex2) + # dst[2] = V2 (done by TransformVertex3) + # dst[3] = V2 + # dst[4] = V3 + # dst[5] = V0 + sqc2 $vf15, 0x30($a1) # dst[3] = TRANSFORMED(V2) + sqc2 $vf17, 0x40($a1) # dst[4] = TRANSFORMED(V3) + sqc2 $vf11, 0x50($a1) # dst[5] = TRANSFORMED(V0) + vnop # adjust for delay + + # STORE CLIP FLAGS 4 RESULT + cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] + sw $t0,0x0C($a3) # clip_flags[3] = t0 +.endm + + # Transforms 4 vertices with size of 24 bytes # $a0 = addresss of src vertices # $a1 = addresss of dst vertices @@ -93,7 +130,6 @@ TransformTexturedQuad: sd $t0,0x00($a2) # tmp.x,y = t0 lw $t0,0x08($a0) # t0 = src[0].z sw $t0,0x08($a2) # tmp.z = t0 - # TRANSFORM VERTEX 1 TransformVertex1 # LOAD VERTEX 2 @@ -101,10 +137,6 @@ TransformTexturedQuad: sd $t0,0x00($a2) # tmp.x,y = t0 lw $t0,0x20($a0) # t0 = src[1].z sw $t0,0x08($a2) # tmp.z = t0 - # STORE CLIP FLAGS 1 RESULT - cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] - sw $t0,0x00($a3) # clip_flags[0] = t0 - # TRANSFORM VERTEX 2 TransformVertex2 # LOAD VERTEX 3 @@ -112,10 +144,6 @@ TransformTexturedQuad: sd $t0,0x00($a2) # tmp.x,y = t0 lw $t0,0x38($a0) # t0 = src[2].z sw $t0,0x08($a2) # tmp.z = t0 - # STORE CLIP FLAGS 2 RESULT - cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] - sw $t0,0x04($a3) # clip_flags[1] = t0 - # TRANSFORM VERTEX 3 TransformVertex3 # LOAD VERTEX 4 @@ -123,31 +151,53 @@ TransformTexturedQuad: sd $t0,0x00($a2) # tmp.x,y = t0 lw $t0,0x50($a0) # t0 = src[3].z sw $t0,0x08($a2) # tmp.z = t0 - # STORE CLIP FLAGS 3 RESULT - cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] - sw $t0,0x08($a3) # clip_flags[2] = t0 - # TRANSFORM VERTEX 4 TransformVertex4 - # Desired output - # dst[0] = V0 - # dst[1] = V1 - # dst[2] = V2 - # dst[3] = V2 - # dst[4] = V3 - # dst[5] = V0 - sqc2 $vf15, 0x30($a1) # dst[3] = TRANSFORMED(V2) - sqc2 $vf17, 0x40($a1) # dst[4] = TRANSFORMED(V3) - sqc2 $vf11, 0x50($a1) # dst[5] = TRANSFORMED(V0) - vnop # adjust for delay - # STORE CLIP FLAGS 4 RESULT - cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] - sw $t0,0x0C($a3) # clip_flags[3] = t0 - + TransformFinish jr $ra nop -.align 4 +# Transforms 4 vertices with size of 16 bytes +# $a0 = addresss of src vertices +# $a1 = addresss of dst vertices +# $a2 = address of tmp vertex +# $a3 = address of clip flags +TransformColouredQuad: + # LOAD 1.0 into W + lw $t0,ONE_VALUE # t0 = 1.0f + sw $t0,0x0C($a2) # tmp.w = f5 + + # LOAD VERTEX 1 + ld $t0,0x00($a0) # t0 = src[0].x,y + sd $t0,0x00($a2) # tmp.x,y = t0 + lw $t0,0x08($a0) # t0 = src[0].z + sw $t0,0x08($a2) # tmp.z = t0 + TransformVertex1 + + # LOAD VERTEX 2 + ld $t0,0x10($a0) # t0 = src[1].x,y + sd $t0,0x00($a2) # tmp.x,y = t0 + lw $t0,0x18($a0) # t0 = src[1].z + sw $t0,0x08($a2) # tmp.z = t0 + TransformVertex2 + + # LOAD VERTEX 3 + ld $t0,0x20($a0) # t0 = src[2].x,y + sd $t0,0x00($a2) # tmp.x,y = t0 + lw $t0,0x28($a0) # t0 = src[2].z + sw $t0,0x08($a2) # tmp.z = t0 + TransformVertex3 + + # LOAD VERTEX 4 + ld $t0,0x30($a0) # t0 = src[3].x,y + sd $t0,0x00($a2) # tmp.x,y = t0 + lw $t0,0x38($a0) # t0 = src[3].z + sw $t0,0x08($a2) # tmp.z = t0 + TransformVertex4 + + TransformFinish + jr $ra + nop .global ONE_VALUE ONE_VALUE: .float 1.0 diff --git a/src/Graphics_PS2.c b/src/Graphics_PS2.c index 00887bf32..a4df0821b 100644 --- a/src/Graphics_PS2.c +++ b/src/Graphics_PS2.c @@ -521,53 +521,6 @@ void Gfx_SetVertexFormat(VertexFormat fmt) { formatDirty = true; } -static cc_bool NotClipped(VU0_VECTOR pos) { - // The code below clips to the viewport clip planes - // For e.g. X this is [2048 - vp_width / 2, 2048 + vp_width / 2] - // However the guard band itself ranges from 0 to 4096 - // To reduce need to clip, clip against guard band on X/Y axes instead - /*return - xAdj >= -pos.w && xAdj <= pos.w && - yAdj >= -pos.w && yAdj <= pos.w && - pos.z >= -pos.w && pos.z <= pos.w;*/ - - // Rescale clip planes to guard band extent: - // X/W * vp_hwidth <= vp_hwidth -- clipping against viewport - // X/W <= 1 - // X <= W - // X/W * vp_hwidth <= 2048 -- clipping against guard band - // X/W <= 2048 / vp_hwidth - // X * vp_hwidth / 2048 <= W - float xAdj = pos.x * (vp_hwidth/2048); - float yAdj = pos.y * (vp_hheight/2048); - - // X/W * vp_hwidth <= 2048 - // - - // Clip X/Y to INSIDE the guard band regions - return - xAdj > -pos.w && xAdj < pos.w && - yAdj > -pos.w && yAdj < pos.w && - pos.z >= -pos.w && pos.z <= pos.w; -} - -static void TransformVertex(void* raw, VU0_VECTOR* dst) { - Vec3* pos = raw; - - VU0_VECTOR coord; coord.x = pos->x; coord.y = pos->y; coord.z = pos->z; coord.w = 1.0f; - asm __volatile__ ( - "lqc2 $vf8, 0x00(%0) \n" // vf8 = coord - "vmulaw $ACC, $vf4, $vf0\n" // ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) - "vmaddax $ACC, $vf1, $vf8\n" // ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * coord.x - "vmadday $ACC, $vf2, $vf8\n" // ACC[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * coord.y - "vmaddz $vf9, $vf3, $vf8\n" // vf9[xyzw] = ACC[xyzw] + mvp.row3[xyzw] * coord.z - "sqc2 $vf9, 0x00(%1) \n" // dst = vf9 - : - : "r" (&coord), "r" (dst) - : "memory" - ); -} - //#define VCopy(dst, src) dst.x = vp_hwidth * (1 + src.x / src.w); dst.y = vp_hheight * (1 - src.y / src.w); dst.z = src.z / src.w; dst.w = src.w; static xyz_t FinishVertex(VU0_VECTOR* src, float invW) { float x = vp_hwidth * (src->x * invW); @@ -640,6 +593,7 @@ static u64* DrawTexturedTriangle(u64* dw, VU0_VECTOR* coords, } extern void TransformTexturedQuad(void* src, VU0_VECTOR* dst, VU0_VECTOR* tmp, int* clip_flags); +extern void TransformColouredQuad(void* src, VU0_VECTOR* dst, VU0_VECTOR* tmp, int* clip_flags); static void DrawTexturedTriangles(int verticesCount, int startVertex) { struct VertexTextured* v = (struct VertexTextured*)gfx_vertices + startVertex; @@ -685,29 +639,20 @@ static void DrawColouredTriangles(int verticesCount, int startVertex) { u64* dw = (u64*)q; unsigned numVerts = 0; - VU0_VECTOR V[4]; + VU0_VECTOR V[6], tmp; + int clip[4]; for (int i = 0; i < verticesCount / 4; i++, v += 4) { - TransformVertex(v + 0, &V[0]); - TransformVertex(v + 1, &V[1]); - TransformVertex(v + 2, &V[2]); - TransformVertex(v + 3, &V[3]); + TransformColouredQuad(v, V, &tmp, clip); - // V0, V1, V2 - if (NotClipped(V[0]) && NotClipped(V[1]) && NotClipped(V[2])) { + if (((clip[0] | clip[1] | clip[2]) & 0x3F) == 0) { dw = DrawColouredTriangle(dw, V, v + 0, v + 1, v + 2); numVerts += 3; } - - VU0_VECTOR v0 = V[0]; - V[0] = V[2]; - V[1] = V[3]; - V[2] = v0; - // V2, V3, V0 - if (NotClipped(V[0]) && NotClipped(V[1]) && NotClipped(V[2])) { - dw = DrawColouredTriangle(dw, V, v + 2, v + 3, v + 0); + if (((clip[2] | clip[3] | clip[0]) & 0x3F) == 0) { + dw = DrawColouredTriangle(dw, V + 3, v + 2, v + 3, v + 0); numVerts += 3; } } diff --git a/src/Platform_Dreamcast.c b/src/Platform_Dreamcast.c index 962f701f5..efd590aa3 100644 --- a/src/Platform_Dreamcast.c +++ b/src/Platform_Dreamcast.c @@ -541,6 +541,7 @@ static void InitSDCard(void) { if (sd_blockdev_for_partition(0, &sd_dev, &partition_type)) { Platform_LogConst("Unable to find first partition on SD card"); return; } + Platform_Log1("Found SD card (partitioned using: %b)", &partition_type); if (fs_fat_init()) { Platform_LogConst("Failed to init FAT filesystem"); return;