diff --git a/third_party/gldc/src/draw.c b/third_party/gldc/src/draw.c index f5c7f49cf..b63e1ecc6 100644 --- a/third_party/gldc/src/draw.c +++ b/third_party/gldc/src/draw.c @@ -75,22 +75,13 @@ static void generateQuads(SubmissionTarget* target, const GLsizei first, const G *((Float2*)it->uv) = F2ZERO; } - src += stride; + src += stride; + it->flags = GPU_CMD_VERTEX; it++; } - - // Quads [0, 1, 2, 3] -> Triangles [{0, 1, 2} {2, 3, 0}] - PREFETCH(dst); // TODO: more prefetching? - memcpy_vertex(dst + 5, dst + 0); dst[5].flags = GPU_CMD_VERTEX_EOL; - memcpy_vertex(dst + 4, dst + 3); dst[4].flags = GPU_CMD_VERTEX; - memcpy_vertex(dst + 3, dst + 2); dst[3].flags = GPU_CMD_VERTEX; - - dst[2].flags = GPU_CMD_VERTEX_EOL; - dst[1].flags = GPU_CMD_VERTEX; - dst[0].flags = GPU_CMD_VERTEX; - // TODO copy straight to dst?? - - dst += 6; + + dst[3].flags = GPU_CMD_VERTEX_EOL; + dst += 4; } } @@ -131,7 +122,7 @@ void APIENTRY glDrawArrays(GLenum mode, GLint first, GLsizei count) { TRACE(); if (!count) return; - submitVertices(count * 6 / 4); // quads -> triangles + submitVertices(count); generateQuads(&SUBMISSION_TARGET, first, count); } diff --git a/third_party/gldc/src/sh4.c b/third_party/gldc/src/sh4.c index 29ab58cd9..6cb2fd562 100644 --- a/third_party/gldc/src/sh4.c +++ b/third_party/gldc/src/sh4.c @@ -39,13 +39,13 @@ GL_FORCE_INLINE float _glFastInvert(float x) { return MATH_fsrra(x * x); } -GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { +GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) { TRACE(); const float f = _glFastInvert(vertex->w); /* Convert to NDC and apply viewport */ - vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320; + vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320; vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240; /* Orthographic projections need to use invZ otherwise we lose @@ -120,15 +120,191 @@ static volatile uint32_t* QACR = (uint32_t*) 0xFF000038; #define V0_VIS (1 << 0) #define V1_VIS (1 << 1) #define V2_VIS (1 << 2) +#define V3_VIS (1 << 3) -void SceneListSubmit(Vertex* v2, int n) { +static void SubmitTriangle(Vertex* v0, Vertex* v1, Vertex* v2, uint8_t visible_mask) { + Vertex __attribute__((aligned(32))) scratch[4]; + + switch(visible_mask) { + case V0_VIS | V1_VIS | V2_VIS: // All vertices visible + { + _glPerspectiveDivideVertex(v0); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(v1); + _glPushHeaderOrVertex(v1); + + _glPerspectiveDivideVertex(v2); + _glPushHeaderOrVertex(v2); + } + break; + case V0_VIS: // First vertex was visible + { + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX_EOL; + + _glPerspectiveDivideVertex(v0); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + } + break; + case V1_VIS: // Second vertex was visible + { + /* Second vertex was visible. In self case we need to create a triangle and produce + two new vertices: 1-2, and 2-3. */ + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v1); + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX_EOL; + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(c); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + } + break; + case V0_VIS | V1_VIS: // First and second vertex were visible + { + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v1); + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0); + _glPushHeaderOrVertex(v0); + + _glClipEdge(v1, v2, a); + a->flags = GPU_CMD_VERTEX_EOL; + + _glPerspectiveDivideVertex(c); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(c); + _glPushHeaderOrVertex(a); + } + break; + case V2_VIS: // Third vertex was visible + { + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v2); + + _glClipEdge(v2, v0, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + + _glPerspectiveDivideVertex(c); + _glPushHeaderOrVertex(c); + } + break; + case V0_VIS | V2_VIS: // First and third vertex were visible + { + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v2); + c->flags = GPU_CMD_VERTEX; + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v1, v2, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(v0); + _glPushHeaderOrVertex(v0); + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(c); + _glPushHeaderOrVertex(c); + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + + c->flags = GPU_CMD_VERTEX_EOL; + _glPushHeaderOrVertex(c); + } + break; + case V1_VIS | V2_VIS: // Second and third vertex were visible + { + Vertex* a = &scratch[0]; + Vertex* b = &scratch[1]; + Vertex* c = &scratch[2]; + + memcpy_vertex(c, v1); + + _glClipEdge(v0, v1, a); + a->flags = GPU_CMD_VERTEX; + + _glClipEdge(v2, v0, b); + b->flags = GPU_CMD_VERTEX; + + _glPerspectiveDivideVertex(a); + _glPushHeaderOrVertex(a); + + _glPerspectiveDivideVertex(c); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(b); + _glPushHeaderOrVertex(b); + _glPushHeaderOrVertex(c); + + _glPerspectiveDivideVertex(v2); + _glPushHeaderOrVertex(v2); + } + break; + } +} + +extern int PASSED, CLIPPED, SKIPPED; + +void SceneListSubmit(Vertex* v3, int n) { TRACE(); - /* You need at least a header, and 3 vertices to render anything */ if(n < 4) return; - const float h = vid_mode->height; - PVR_SET(SPAN_SORT_CFG, 0x0); //Set PVR DMA registers @@ -150,198 +326,93 @@ void SceneListSubmit(Vertex* v2, int n) { sq = SQ_BASE_ADDRESS; - for(int i = 0; i < n; ++i, ++v2) { - PREFETCH(v2 + 1); - switch(v2->flags) { + for(int i = 0; i < n; ++i, ++v3) { + PREFETCH(v3 + 1); + switch(v3->flags) { case GPU_CMD_VERTEX_EOL: break; case GPU_CMD_VERTEX: continue; default: - _glPushHeaderOrVertex(v2); + _glPushHeaderOrVertex(v3); continue; }; - Vertex* const v0 = v2 - 2; - Vertex* const v1 = v2 - 1; + // Quads [0, 1, 2, 3] -> Triangles [{0, 1, 2} {2, 3, 0}] + Vertex* const v0 = v3 - 3; + Vertex* const v1 = v3 - 2; + Vertex* const v2 = v3 - 1; visible_mask = ( (v0->xyz[2] > -v0->w) << 0 | (v1->xyz[2] > -v1->w) << 1 | - (v2->xyz[2] > -v2->w) << 2 + (v2->xyz[2] > -v2->w) << 2 | + (v3->xyz[2] > -v3->w) << 3 ); - Vertex __attribute__((aligned(32))) scratch[4]; + + // Stats gathering found that when testing a 64x64x64 sized world, at most + // ~400-500 triangles needed clipping + // ~13% of the triangles in a frame needed clipping (percentage increased when less triangles overall) + // Based on this, the decision was made to optimise for rendering quads there + // were either entirely visible or entirely culled, at the expensive at making + // partially visible quads a bit slower due to needing to be split into two triangles first + // Performance measuring indicated that overall FPS improved from this change + // to switching to try to process 1 quad instead of 2 triangles though + + if (visible_mask == 15) PASSED += 2; + else if (visible_mask == 0) SKIPPED += 2; + else CLIPPED += 2; switch(visible_mask) { - case V0_VIS | V1_VIS | V2_VIS: /* All vertices visible */ + case V0_VIS | V1_VIS | V2_VIS | V3_VIS: // All vertices visible { - _glPerspectiveDivideVertex(v0, h); + _glPerspectiveDivideVertex(v0); _glPushHeaderOrVertex(v0); - _glPerspectiveDivideVertex(v1, h); + _glPerspectiveDivideVertex(v1); _glPushHeaderOrVertex(v1); - _glPerspectiveDivideVertex(v2, h); + v2->flags = GPU_CMD_VERTEX_EOL; + _glPerspectiveDivideVertex(v2); + _glPushHeaderOrVertex(v2); + + + v2->flags = GPU_CMD_VERTEX; _glPushHeaderOrVertex(v2); - } - break; - case V0_VIS: /* First vertex was visible */ - { - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v2, v0, b); - b->flags = GPU_CMD_VERTEX_EOL; - - _glPerspectiveDivideVertex(v0, h); + v3->flags = GPU_CMD_VERTEX; + _glPerspectiveDivideVertex(v3); + _glPushHeaderOrVertex(v3); + + v0->flags = GPU_CMD_VERTEX_EOL; _glPushHeaderOrVertex(v0); - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); } break; - case V1_VIS: /* Second vertex was visible */ - { - /* Second vertex was visible. In self case we need to create a triangle and produce - two new vertices: 1-2, and 2-3. */ - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - - memcpy_vertex(c, v1); - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v1, v2, b); - b->flags = GPU_CMD_VERTEX_EOL; - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - } + + case 0: // No vertices visible break; - case V0_VIS | V1_VIS: /* First and second vertex were visible */ + + default: // Some vertices visible { - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - - memcpy_vertex(c, v1); - - _glClipEdge(v2, v0, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); - - _glClipEdge(v1, v2, a); - a->flags = GPU_CMD_VERTEX_EOL; - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(c); - _glPushHeaderOrVertex(a); - } - break; - case V2_VIS: /* Third vertex was visible */ - { - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - - memcpy_vertex(c, v2); - - _glClipEdge(v2, v0, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v1, v2, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - } - break; - case V0_VIS | V2_VIS: /* First and third vertex were visible */ - { - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - - memcpy_vertex(c, v2); - c->flags = GPU_CMD_VERTEX; - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v1, v2, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(v0, h); - _glPushHeaderOrVertex(v0); - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - - c->flags = GPU_CMD_VERTEX_EOL; - _glPushHeaderOrVertex(c); - } - break; - case V1_VIS | V2_VIS: /* Second and third vertex were visible */ - { - Vertex* a = &scratch[0]; - Vertex* b = &scratch[1]; - Vertex* c = &scratch[2]; - Vertex* d = &scratch[3]; - - memcpy_vertex(c, v1); - memcpy_vertex(d, v2); - - _glClipEdge(v0, v1, a); - a->flags = GPU_CMD_VERTEX; - - _glClipEdge(v2, v0, b); - b->flags = GPU_CMD_VERTEX; - - _glPerspectiveDivideVertex(a, h); - _glPushHeaderOrVertex(a); - - _glPerspectiveDivideVertex(c, h); - _glPushHeaderOrVertex(c); - - _glPerspectiveDivideVertex(b, h); - _glPushHeaderOrVertex(b); - _glPushHeaderOrVertex(c); - - _glPerspectiveDivideVertex(d, h); - _glPushHeaderOrVertex(d); + // vertices are modified in SubmitTriangle, so need to copy them + Vertex __attribute__((aligned(32))) scratch[4]; + Vertex* a0 = &scratch[0]; + Vertex* a2 = &scratch[1]; + memcpy_vertex(a0, v0); + memcpy_vertex(a2, v2); + + visible_mask &= (V0_VIS | V1_VIS | V2_VIS); + v2->flags = GPU_CMD_VERTEX_EOL; + SubmitTriangle(v0, v1, v2, visible_mask); + + visible_mask = ( + (a2->xyz[2] > -v2->w) << 0 | + (v3->xyz[2] > -v3->w) << 1 | + (a0->xyz[2] > -a0->w) << 2 + ); + v3->flags = GPU_CMD_VERTEX; + a0->flags = GPU_CMD_VERTEX_EOL; + SubmitTriangle(a2, v3, a0, visible_mask); } break; }