Dreamcast: Fix not using second store queue

2025-09-09 15:28:21 -04:00 · 2025-05-31 20:18:58 +10:00 · 2025-05-31 20:18:58 +10:00 · f082d17ee4
commit f082d17ee4
parent ee2e521f5d
2 changed files with 259 additions and 264 deletions
--- a/misc/dreamcast/VertexClip2.S
+++ b/misc/dreamcast/VertexClip2.S
@ -23,17 +23,18 @@
 #define CL2 r5 // input colour 2
 #define CLO r7 // output colour

-! Calculates the near plane intersection point between two points:
+! Writes output vertex as the near plane intersection point between two points:
 !    float t  = fabsf(v1->z) / fabsf(v2->z - v1->z)
 !    float invt = 1.0f - t;
+!    // note: w = invt * v1->w + t * v2->w;, always ends up being zNear
 !    
-!    out->x = invt * v1->x + t * v2->x;
-!    out->y = invt * v1->y + t * v2->y;
-!    out->z = 0.0f; // clipped against near plane anyways (I.e Z/W = 0 --> Z = 0)
+!    out->c = type << 24
+!    out->x = (invt * v1->x + t * v2->x) * 1/zNear
+!    out->y = (invt * v1->y + t * v2->y) * 1/zNear
+!    out->w = 1/zNear  
 !    
 !    out->u = invt * v1->u + t * v2->u;
 !    out->v = invt * v1->v + t * v2->v;
-!    out->w = zNear // invt * v1->w + t * v2->w;, always ends up being zNear
 !    
 !    out->b = invt * v1->b + t * v2->b;
 !    out->g = invt * v1->g + t * v2->g;
@ -41,7 +42,7 @@
 !    out->a = invt * v1->a + t * v2->a;
 ! To optimise these calculations, FIPR is used:
 !   FIPR = FVm.x*FVn.x + FVm.y*FVn.x + FVm.z*FVn.z + FVm.w*FVn.w --> FVn.w
-! FIPR can be used to accomplish "vout->Q invt * v1->Q + t * v2->Q" by:
+! FIPR can be used to accomplish "vout->Q = invt * v1->Q + t * v2->Q" by:
 !   - assigning x/y components to 0 for both vectors
 !   - assigning t and invT to z/w of FVm vector
 !   - assigning v1 and v2  to z/w  of FVn vector
@ -51,74 +52,74 @@
 .global _ClipEdge
 .align 4
 _ClipEdge:
-	mov      IN1, TM1 ! MT, tmp  = &v1
+	add      #12, IN1 ! EX, IN1  = &v1->z
 	fldi0    fr4      ! LS, fr4  = 0
-	add      #12, TM1 ! EX, tmp  = &v1->z
-	fmov.s  @TM1, fr2 ! LS, fr2  = v1->z
-	mov      IN2, TM1 ! MT, tmp  = &v2
+	fmov.s  @IN1, fr2 ! LS, fr2  = v1->z
+	add      #12, IN2 ! EX, IN   = &v2->z
 	fldi0    fr5      ! LS, fr5  = 0
-	add      #12, TM1 ! EX, tmp   = &v2->z
-	fmov.s  @TM1,fr11 ! LS, fr11 = v2->z
+	fmov.s  @IN2,fr11 ! LS, fr11 = v2->z
 	fsub     fr2,fr11 ! FE, fr11 = v2->z - v1->z
 	fldi0    fr8      ! LS, fr8  = 0
-	mov.l  TYP,@OUT   ! LS, OUT->cmd = TYPE
+	shll16   TYP      ! EX, TYP <<= 16
 	fmul    fr11,fr11 ! FE, fr11 = (v2->z - v1->z) * (v2->z * v1->z)
 	fldi0    fr9      ! LS, fr9  = 0
 	fldi0    fr0      ! LS, fr0  = 0
 	fldi0    fr1      ! LS, fr1  = 0
 	fsrra   fr11      ! FE, fr11 = 1 / abs(v2->z - v1->z)
+	shll8    TYP      ! EX, TYP <<= 8
 	fabs     fr2      ! LS, fr2  = abs(v1->z)
+	mov.l  TYP,@OUT   ! LS, dst->cmd = TYPE
 	fmul     fr2,fr11 ! FE, fr11 = abs(v1->Z) / abs(v2->z - v1->z)  --> t
-	add       #4, IN1 ! EX, v1   += 4
+	add      #-8, IN1 ! EX, IN1  = &v1->x
 	fldi1   fr10      ! LS, fr10 = 1
-	add       #4, IN2 ! EX, v2   += 4
-	add       #4, OUT ! EX, OUT  += 4
-	fsub    fr11,fr10 ! FE, fr10 = 1.0 - t  --> invT
+	add      #-8, IN2 ! EX, IN2  = &v2->x
+	add       #4, OUT ! EX, OUT  = &dst->x
+	fsub    fr11,fr10 ! FE, invT = 1.0 - t  --> invT

 ! Load X components	
-	fmov.s @IN1+, fr2 ! LS, A1 = v1->x, v1 += 4
-	fmov.s @IN2+, fr3 ! LS, B1 = v2->x, v2 += 4
+	fmov.s @IN1+, fr2 ! LS, A1 = v1->x, IN1 = &v1->y
+	fmov.s @IN2+, fr3 ! LS, B1 = v2->x, IN2 = &v2->y

 ! Start interpolating X
 	fipr     fv8, fv0 ! FE, LERP(A1, B1)
 ! Load Y components
-	fmov.s @IN1+, fr6 ! LS, A2 = v1->y, v1 += 4
-	fmov.s @IN2+, fr7 ! LS, B2 = v2->y, v2 += 4
+	fmov.s  @IN1, fr6 ! LS, A2 = v1->y
+	fmov.s  @IN2, fr7 ! LS, B2 = v2->y
 ! Load W
 	mov.l _NEAR_CLIP_W,TM1 ! tmp = zNear
 	lds 	 TM1,fpul ! LS, FPUL = zNear
 	fsts     fpul,fr2 ! LS, fr2  = FPUL
 ! Store interpolated X
 	fmul	 fr2,fr3  ! EX, fr7 = LERP * invW
-	fmov.s   fr3,@OUT ! LS, OUT->x = LERP * invW
-	add       #4, OUT ! EX, OUT += 4
+	fmov.s   fr3,@OUT ! LS, dst->x = LERP * invW
+	add       #4, OUT ! EX, OUT = &dst->y

 ! Start interpolating Y
 	fipr     fv8, fv4 ! FE, LERP(A2, B2)
 ! Skip Z of input vertices
-	add       #4, IN1 ! EX, v1 += 4
-	add       #4, IN2 ! EX, v2 += 4
+	add       #8, IN1 ! EX, IN1 = &v1->u
+	add       #8, IN2 ! EX, IN2 = &v2->u

 ! Store interpolated Y
 	fmul	 fr2,fr7  ! EX, fr7 = LERP * invW
 	fmov.s   fr7,@OUT ! LS, OUT->y = LERP * invW
-	add       #4, OUT ! EX, OUT += 4
+	add       #4, OUT ! EX, OUT = &dst->w
 ! Store W
 	fmov.s   fr2,@OUT ! LS, OUT->w = 1/zNear
-	add       #4, OUT ! EX, OUT += 4
+	add       #4, OUT ! EX, OUT = &dst->u
 	
 ! Load U components
-	fmov.s @IN1+, fr2 ! LS, A1 = v1->u, v1 += 4
-	fmov.s @IN2+, fr3 ! LS, B1 = v2->u, v2 += 4
+	fmov.s @IN1+, fr2 ! LS, A1 = v1->u, IN1 = &v1->v
+	fmov.s @IN2+, fr3 ! LS, B1 = v2->u, IN2 = &v1->v

 ! Start interpolating U
 	fipr     fv8, fv0 ! FE, LERP(A1, B1)
 ! Load V components
-	fmov.s @IN1+, fr6 ! LS, A2 = v1->v, v1 += 4
-	fmov.s @IN2+, fr7 ! LS, B2 = v2->v, v2 += 4
+	fmov.s @IN1+, fr6 ! LS, A2 = v1->v, IN1 = &v1->bgra
+	fmov.s @IN2+, fr7 ! LS, B2 = v2->v, IN2 = &v2->bgra
 ! Store interpolated U
-	fmov.s   fr3,@OUT ! LS, OUT->u = LERP
-	add       #4, OUT ! EX, OUT += 4
+	fmov.s   fr3,@OUT ! LS, dst->u = LERP
+	add       #4, OUT ! EX, OUT = &dst->v

 ! Start interpolating V
 	fipr     fv8, fv4 ! FE, LERP(A2, B2)
@ -127,8 +128,8 @@ _ClipEdge:
 	mov.l  @IN2,CL2   ! LS, BCOLOR = v2->bgra
 	cmp/eq  CL1,CL2   ! MT, T = ACOLOR == BCOLOR
 ! Store V
-	fmov.s   fr7,@OUT ! LS, OUT->v = LERP
-	add       #4, OUT ! EX, OUT += 4
+	fmov.s   fr7,@OUT ! LS, dst->v = LERP
+	add       #4, OUT ! EX, OUT = &dst->bgra
 	
 ! Bypass RGBA interpolation if unnecessary
 	bt.s    1f        ! BR, if (T) goto 1;
--- a/third_party/gldc/sh4.c
+++ b/third_party/gldc/sh4.c
@ -2,9 +2,6 @@
 #include <dc/pvr.h>
 #include "gldc.h"

-#define PREFETCH(addr) __builtin_prefetch((addr))
-static volatile uint32_t* sq;
-
 // calculates 1/sqrt(x)
 static GLDC_FORCE_INLINE float sh4_fsrra(float x) {
  asm volatile ("fsrra %[value]\n"
@ -15,251 +12,248 @@ static GLDC_FORCE_INLINE float sh4_fsrra(float x) {
  return x;
 }

-static GLDC_FORCE_INLINE void PushVertex(Vertex* v) {
-    volatile Vertex* dst = (Vertex*)(sq);
+static GLDC_FORCE_INLINE void PushVertex(Vertex* v, volatile Vertex* dst) {
 	float ww   = v->w * v->w;
-    dst->flags = v->flags;
-    float f    = sh4_fsrra(ww); // 1/sqrt(w^2) ~ 1/w
-    // Convert to NDC (viewport already applied)
-    float x    = v->x * f;
-    float y    = v->y * f;
+	dst->flags = v->flags;
+	float f	= sh4_fsrra(ww); // 1/sqrt(w^2) ~ 1/w
+	// Convert to NDC (viewport already applied)
+	float x	= v->x * f;
+	float y	= v->y * f;

-    dst->x     = x;
-    dst->y     = y;
-    dst->z     = f;
-    dst->u     = v->u;
-    dst->v     = v->v;
-    dst->bgra  = v->bgra;
-    __asm__("pref @%0" : : "r"(dst));
-    dst++;
+	dst->x	 = x;
+	dst->y	 = y;
+	dst->z	 = f;
+	dst->u	 = v->u;
+	dst->v	 = v->v;
+	dst->bgra  = v->bgra;
+	__asm__("pref @%0" : : "r"(dst));
 }

-static inline void PushCommand(Vertex* v)  {
-    uint32_t* s = (uint32_t*)v;
-    sq[0] = *(s++);
-    sq[1] = *(s++);
-    sq[2] = *(s++);
-    sq[3] = *(s++);
-    sq[4] = *(s++);
-    sq[5] = *(s++);
-    sq[6] = *(s++);
-    sq[7] = *(s++);
-    __asm__("pref @%0" : : "r"(sq));
-    sq += 8;
+static inline void PushCommand(Vertex* v, volatile Vertex* dst)  {
+	uint32_t* s = (uint32_t*)v;
+	volatile uint32_t* sq = (volatile uint32_t*)dst;
+
+	sq[0] = *(s++);
+	sq[1] = *(s++);
+	sq[2] = *(s++);
+	sq[3] = *(s++);
+	sq[4] = *(s++);
+	sq[5] = *(s++);
+	sq[6] = *(s++);
+	sq[7] = *(s++);
+	__asm__("pref @%0" : : "r"(sq));
 }

-extern void ClipEdge(const Vertex* const v1, const Vertex* const v2, volatile void* vout, int type);
+extern void ClipEdge(Vertex* const v1, Vertex* const v2, volatile Vertex* vout, char type);

 #define V0_VIS (1 << 0)
 #define V1_VIS (1 << 1)
 #define V2_VIS (1 << 2)
 #define V3_VIS (1 << 3)

-
-// https://casual-effects.com/research/McGuire2011Clipping/clip.glsl
-static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_t visible_mask) {
-    switch(visible_mask) {
-    case V0_VIS:
-        //          v0
-        //         / |
-        //       /   |
-        // .....A....B...
-        //    /      |
-        //  v3--v2---v1
-        PushVertex(v0);
-        ClipEdge(v0, v1, sq, PVR_CMD_VERTEX);     // B
-        ClipEdge(v3, v0, sq, PVR_CMD_VERTEX_EOL); // A
-    	break;
-
-    case V1_VIS:
-        //          v1
-        //         / |
-        //       /   |
-        // ....A.....B...
-        //    /      |
-        //  v0--v3---v2
-        ClipEdge(v0, v1, sq, PVR_CMD_VERTEX);     // A
-        PushVertex(v1);                           // v1
-        ClipEdge(v1, v2, sq, PVR_CMD_VERTEX_EOL); // B
-    	break;
-
-    case V2_VIS:
-        //          v2
-        //         / |
-        //       /   |
-        // ....A.....B...
-        //    /      |
-        //  v1--v0---v3
-        ClipEdge(v1, v2, sq, PVR_CMD_VERTEX);     // A
-        PushVertex(v2);                           // v2
-        ClipEdge(v2, v3, sq, PVR_CMD_VERTEX_EOL); // B
-		break;
-
-    case V3_VIS:
-        //          v3
-        //         / |
-        //       /   |
-        // ....A.....B...
-        //    /      |
-        //  v2--v1---v0
-        ClipEdge(v3, v0, sq, PVR_CMD_VERTEX);     // B
-        ClipEdge(v2, v3, sq, PVR_CMD_VERTEX_EOL); // A
-        PushVertex(v3);                           // v3
-    	break;
-
-    case V0_VIS | V1_VIS:
-        //    v0-----------v1
-        //      \           |
-        //   ....B..........A...
-        //         \        |
-        //          v3-----v2
-        PushVertex(v1);                           // v1
-        ClipEdge(v1, v2, sq, PVR_CMD_VERTEX);     // A
-        PushVertex(v0);                           // v0
-        ClipEdge(v3, v0, sq, PVR_CMD_VERTEX_EOL); // B
-    	break;
-
-    // case V0_VIS | V2_VIS: degenerate case that should never happen
-    case V0_VIS | V3_VIS:
-        //    v3-----------v0
-        //      \           |
-        //   ....B..........A...
-        //         \        |
-        //          v2-----v1
-        ClipEdge(v0, v1, sq, PVR_CMD_VERTEX);     // A
-        ClipEdge(v2, v3, sq, PVR_CMD_VERTEX);     // B
-        PushVertex(v0);                           // v0
-        PushVertex(v3);                           // v3
-    	break;
-
-    case V1_VIS | V2_VIS:
-        //    v1-----------v2
-        //      \           |
-        //   ....B..........A...
-        //         \        |
-        //          v0-----v3
-        PushVertex(v1);                           // v1
-        PushVertex(v2);                           // v2
-        ClipEdge(v0, v1, sq, PVR_CMD_VERTEX);     // B
-        ClipEdge(v2, v3, sq, PVR_CMD_VERTEX_EOL); // A
-    	break;
-
-    // case V1_VIS | V3_VIS: degenerate case that should never happen
-    case V2_VIS | V3_VIS:
-        //    v2-----------v3
-        //      \           |
-        //   ....B..........A...
-        //         \        |
-        //          v1-----v0
-        ClipEdge(v1, v2, sq, PVR_CMD_VERTEX);     // B
-        PushVertex(v2);                           // v2
-        ClipEdge(v3, v0, sq, PVR_CMD_VERTEX);     // A
-        PushVertex(v3);                           // v3
-    	break;
-
-    case V0_VIS | V1_VIS | V2_VIS:
-        //        --v1--
-        //    v0--      --v2
-        //      \        |
-        //   .....B.....A...
-        //          \   |
-        //            v3
-        // v1,v2,v0  v2,v0,A  v0,A,B
-        PushVertex(v1);                           // v1
-        PushVertex(v2);                           // v2
-        PushVertex(v0);                           // v0
-        ClipEdge(v2, v3, sq, PVR_CMD_VERTEX);     // A
-        ClipEdge(v3, v0, sq, PVR_CMD_VERTEX_EOL); // B
-    	break;
-
-    case V0_VIS | V1_VIS | V3_VIS:
-        //        --v0--
-        //    v3--      --v1
-        //      \        |
-        //   .....B.....A...
-        //          \   |
-        //            v2
-        // v0,v1,v3  v1,v3,A  v3,A,B
-        v3->flags = PVR_CMD_VERTEX;
-        PushVertex(v0);                           // v0
-        PushVertex(v1);                           // v1
-        PushVertex(v3);                           // v3
-        ClipEdge(v1, v2, sq, PVR_CMD_VERTEX);     // A
-        ClipEdge(v2, v3, sq, PVR_CMD_VERTEX_EOL); // B
-    	break;
-
-    case V0_VIS | V2_VIS | V3_VIS:
-        //        --v3--
-        //    v2--      --v0
-        //      \        |
-        //   .....B.....A...
-        //          \   |
-        //            v1
-        // v3,v0,v2  v0,v2,A  v2,A,B
-        v3->flags = PVR_CMD_VERTEX;
-        PushVertex(v3);                           // v3
-        PushVertex(v0);                           // v0
-        PushVertex(v2);                           // v2
-        ClipEdge(v0, v1, sq, PVR_CMD_VERTEX);     // A
-        ClipEdge(v1, v2, sq, PVR_CMD_VERTEX_EOL); // B
-    	break;
-
-    case V1_VIS | V2_VIS | V3_VIS:
-        //        --v2--
-        //    v1--      --v3
-        //      \        |
-        //   .....B.....A...
-        //          \   |
-        //            v0
-        // v2,v3,v1  v3,v1,A  v1,A,B
-        v3->flags = PVR_CMD_VERTEX;
-        PushVertex(v2);                           // v2
-        PushVertex(v3);                           // v3
-        PushVertex(v1);                           // v1
-        ClipEdge(v3, v0, sq, PVR_CMD_VERTEX);     // A
-        ClipEdge(v0, v1, sq, PVR_CMD_VERTEX_EOL); // B
-    	break;
-    }
-}
+#define TYPE_VTX 0xE0 // PVR vertex, data
+#define TYPE_EOS 0xF0 // PVR vertex, end of strip

 extern void ProcessVertexList(Vertex* v3, int n, void* sq_addr);

 void SceneListSubmit(Vertex* v3, int n) {
-	sq = (uint32_t*)MEM_AREA_SQ_BASE;
+	volatile Vertex* dst = (volatile Vertex*)MEM_AREA_SQ_BASE;

-    for (int i = 0; i < n; i++, v3++) 
+	for (int i = 0; i < n; i++, v3++) 
 	{
-        PREFETCH(v3 + 1);
-        switch(v3->flags & 0xFF000000) {
-        case PVR_CMD_VERTEX_EOL:
-            break;
-        case PVR_CMD_VERTEX:
-            continue;
-        default:
-            PushCommand(v3);
-            continue;
-        };
+		// Preload next vertex into memory
+		__builtin_prefetch(v3 + 1);
+
+		switch(v3->flags & 0xFF000000) {
+		case PVR_CMD_VERTEX_EOL:
+			break;
+		case PVR_CMD_VERTEX:
+			continue;
+		default:
+			PushCommand(v3, dst++);
+			continue;
+		};

 		// Quads [0, 1, 2, 3] -> Triangles [{0, 1, 2}  {2, 3, 0}]
-        Vertex* const v0 = v3 - 3;
-        Vertex* const v1 = v3 - 2;
-        Vertex* const v2 = v3 - 1;
-        uint8_t visible_mask = v3->flags & 0xFF;
+		Vertex* const v0 = v3 - 3;
+		Vertex* const v1 = v3 - 2;
+		Vertex* const v2 = v3 - 1;
+		uint8_t mask = v3->flags & 0xFF;

-        switch(visible_mask) {
-        case V0_VIS | V1_VIS | V2_VIS | V3_VIS: // All vertices visible
-        {
-            // Triangle strip: {1,2,0} {2,0,3}
-            PushVertex(v1);
-            PushVertex(v2);
-            PushVertex(v0);
-            PushVertex(v3);
-        }
-        break;
-        
-        default: // Some vertices visible
-            SubmitClipped(v0, v1, v2, v3, visible_mask);
-            break;
-        }
-    }
+		// Check if all vertices visible
+		if (__builtin_expect(mask == (V0_VIS | V1_VIS | V2_VIS | V3_VIS), 1)) {
+			// Triangle strip: {1,2,0} {2,0,3}
+			PushVertex(v1, dst++);
+			PushVertex(v2, dst++);
+			PushVertex(v0, dst++);
+			PushVertex(v3, dst++);
+			continue;
+		}
+
+
+		// Only some vertices visible
+		// https://casual-effects.com/research/McGuire2011Clipping/clip.glsl
+		switch(mask) {
+		case V0_VIS:
+			//		  v0
+			//		 / |
+			//	   /   |
+			// .....A....B...
+			//	/	  |
+			//  v3--v2---v1
+			PushVertex(v0,   dst++);           // v0
+			ClipEdge(v0, v1, dst++, TYPE_VTX); // B
+			ClipEdge(v3, v0, dst++, TYPE_EOS); // A
+			break;
+
+		case V1_VIS:
+			//		  v1
+			//		 / |
+			//	   /   |
+			// ....A.....B...
+			//	/	  |
+			//  v0--v3---v2
+			ClipEdge(v0, v1, dst++, TYPE_VTX); // A
+			PushVertex(v1,   dst++);           // v1
+			ClipEdge(v1, v2, dst++, TYPE_EOS); // B
+			break;
+
+		case V2_VIS:
+			//		  v2
+			//		 / |
+			//	   /   |
+			// ....A.....B...
+			//	/	  |
+			//  v1--v0---v3
+			ClipEdge(v1, v2, dst++, TYPE_VTX); // A
+			PushVertex(v2,   dst++);	       // v2
+			ClipEdge(v2, v3, dst++, TYPE_EOS); // B
+		break;
+
+		case V3_VIS:
+			//		  v3
+			//		 / |
+			//	   /   |
+			// ....A.....B...
+			//	/	  |
+			//  v2--v1---v0
+			ClipEdge(v3, v0, dst++, TYPE_VTX); // B
+			ClipEdge(v2, v3, dst++, TYPE_EOS); // A
+			PushVertex(v3,   dst++);           // v3
+			break;
+
+		case V0_VIS | V1_VIS:
+			//	v0-----------v1
+			//	  \		   |
+			//   ....B..........A...
+			//		 \		|
+			//		  v3-----v2
+			PushVertex(v1,   dst++);           // v1
+			ClipEdge(v1, v2, dst++, TYPE_VTX); // A
+			PushVertex(v0,   dst++);           // v0
+			ClipEdge(v3, v0, dst++, TYPE_EOS); // B
+			break;
+
+		// case V0_VIS | V2_VIS: degenerate case that should never happen
+		case V0_VIS | V3_VIS:
+			//	v3-----------v0
+			//	  \		   |
+			//   ....B..........A...
+			//		 \		|
+			//		  v2-----v1
+			ClipEdge(v0, v1, dst++, TYPE_VTX); // A
+			ClipEdge(v2, v3, dst++, TYPE_VTX); // B
+			PushVertex(v0,   dst++);           // v0
+			PushVertex(v3,   dst++);           // v3
+			break;
+
+		case V1_VIS | V2_VIS:
+			//	v1-----------v2
+			//	  \		   |
+			//   ....B..........A...
+			//		 \		|
+			//		  v0-----v3
+			PushVertex(v1,   dst++);           // v1
+			PushVertex(v2,   dst++);           // v2
+			ClipEdge(v0, v1, dst++, TYPE_VTX); // B
+			ClipEdge(v2, v3, dst++, TYPE_EOS); // A
+			break;
+
+		// case V1_VIS | V3_VIS: degenerate case that should never happen
+		case V2_VIS | V3_VIS:
+			//	v2-----------v3
+			//	  \		   |
+			//   ....B..........A...
+			//		 \		|
+			//		  v1-----v0
+			ClipEdge(v1, v2, dst++, TYPE_VTX); // B
+			PushVertex(v2,   dst++);           // v2
+			ClipEdge(v3, v0, dst++, TYPE_VTX); // A
+			PushVertex(v3,   dst++);           // v3
+			break;
+
+		case V0_VIS | V1_VIS | V2_VIS:
+			//		--v1--
+			//	v0--	  --v2
+			//	  \		|
+			//   .....B.....A...
+			//		  \   |
+			//			v3
+			// v1,v2,v0  v2,v0,A  v0,A,B
+			PushVertex(v1,   dst++);           // v1
+			PushVertex(v2,   dst++);           // v2
+			PushVertex(v0,   dst++);           // v0
+			ClipEdge(v2, v3, dst++, TYPE_VTX); // A
+			ClipEdge(v3, v0, dst++, TYPE_EOS); // B
+			break;
+
+		case V0_VIS | V1_VIS | V3_VIS:
+			//		--v0--
+			//	v3--	  --v1
+			//	  \		|
+			//   .....B.....A...
+			//		  \   |
+			//			v2
+			// v0,v1,v3  v1,v3,A  v3,A,B
+			v3->flags = PVR_CMD_VERTEX;
+			PushVertex(v0,   dst++);           // v0
+			PushVertex(v1,   dst++);           // v1
+			PushVertex(v3,   dst++);           // v3
+			ClipEdge(v1, v2, dst++, TYPE_VTX); // A
+			ClipEdge(v2, v3, dst++, TYPE_EOS); // B
+			break;
+
+		case V0_VIS | V2_VIS | V3_VIS:
+			//		--v3--
+			//	v2--	  --v0
+			//	  \		|
+			//   .....B.....A...
+			//		  \   |
+			//			v1
+			// v3,v0,v2  v0,v2,A  v2,A,B
+			v3->flags = PVR_CMD_VERTEX;
+			PushVertex(v3,   dst++);           // v3
+			PushVertex(v0,   dst++);           // v0
+			PushVertex(v2,   dst++);           // v2
+			ClipEdge(v0, v1, dst++, TYPE_VTX); // A
+			ClipEdge(v1, v2, dst++, TYPE_EOS); // B
+			break;
+
+		case V1_VIS | V2_VIS | V3_VIS:
+			//		--v2--
+			//	v1--	  --v3
+			//	  \		|
+			//   .....B.....A...
+			//		  \   |
+			//			v0
+			// v2,v3,v1  v3,v1,A  v1,A,B
+			v3->flags = PVR_CMD_VERTEX;
+			PushVertex(v2,   dst++);           // v2
+			PushVertex(v3,   dst++);           // v3
+			PushVertex(v1,   dst++);           // v1
+			ClipEdge(v3, v0, dst++, TYPE_VTX); // A
+			ClipEdge(v0, v1, dst++, TYPE_EOS); // B
+			break;
+		}
+	}
 }