Dreamcast: Slightly boost performance by attempting to perform polygon perspective division and clipping as a quad instead of 2 triangles when possible

This commit is contained in:
UnknownShadow200 2023-11-24 08:00:55 +11:00
parent 064be092e8
commit 48f0cb7b1a
2 changed files with 248 additions and 186 deletions

View File

@ -76,21 +76,12 @@ static void generateQuads(SubmissionTarget* target, const GLsizei first, const G
} }
src += stride; src += stride;
it->flags = GPU_CMD_VERTEX;
it++; it++;
} }
// Quads [0, 1, 2, 3] -> Triangles [{0, 1, 2} {2, 3, 0}] dst[3].flags = GPU_CMD_VERTEX_EOL;
PREFETCH(dst); // TODO: more prefetching? dst += 4;
memcpy_vertex(dst + 5, dst + 0); dst[5].flags = GPU_CMD_VERTEX_EOL;
memcpy_vertex(dst + 4, dst + 3); dst[4].flags = GPU_CMD_VERTEX;
memcpy_vertex(dst + 3, dst + 2); dst[3].flags = GPU_CMD_VERTEX;
dst[2].flags = GPU_CMD_VERTEX_EOL;
dst[1].flags = GPU_CMD_VERTEX;
dst[0].flags = GPU_CMD_VERTEX;
// TODO copy straight to dst??
dst += 6;
} }
} }
@ -131,7 +122,7 @@ void APIENTRY glDrawArrays(GLenum mode, GLint first, GLsizei count) {
TRACE(); TRACE();
if (!count) return; if (!count) return;
submitVertices(count * 6 / 4); // quads -> triangles submitVertices(count);
generateQuads(&SUBMISSION_TARGET, first, count); generateQuads(&SUBMISSION_TARGET, first, count);
} }

View File

@ -39,13 +39,13 @@ GL_FORCE_INLINE float _glFastInvert(float x) {
return MATH_fsrra(x * x); return MATH_fsrra(x * x);
} }
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) { GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) {
TRACE(); TRACE();
const float f = _glFastInvert(vertex->w); const float f = _glFastInvert(vertex->w);
/* Convert to NDC and apply viewport */ /* Convert to NDC and apply viewport */
vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320; vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320;
vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240; vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240;
/* Orthographic projections need to use invZ otherwise we lose /* Orthographic projections need to use invZ otherwise we lose
@ -120,15 +120,191 @@ static volatile uint32_t* QACR = (uint32_t*) 0xFF000038;
#define V0_VIS (1 << 0) #define V0_VIS (1 << 0)
#define V1_VIS (1 << 1) #define V1_VIS (1 << 1)
#define V2_VIS (1 << 2) #define V2_VIS (1 << 2)
#define V3_VIS (1 << 3)
void SceneListSubmit(Vertex* v2, int n) { static void SubmitTriangle(Vertex* v0, Vertex* v1, Vertex* v2, uint8_t visible_mask) {
Vertex __attribute__((aligned(32))) scratch[4];
switch(visible_mask) {
case V0_VIS | V1_VIS | V2_VIS: // All vertices visible
{
_glPerspectiveDivideVertex(v0);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(v1);
_glPushHeaderOrVertex(v1);
_glPerspectiveDivideVertex(v2);
_glPushHeaderOrVertex(v2);
}
break;
case V0_VIS: // First vertex was visible
{
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(v0);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b);
_glPushHeaderOrVertex(b);
}
break;
case V1_VIS: // Second vertex was visible
{
/* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(a);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b);
_glPushHeaderOrVertex(b);
}
break;
case V0_VIS | V1_VIS: // First and second vertex were visible
{
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0);
_glPushHeaderOrVertex(v0);
_glClipEdge(v1, v2, a);
a->flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(c);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(a);
_glPushHeaderOrVertex(c);
_glPushHeaderOrVertex(a);
}
break;
case V2_VIS: // Third vertex was visible
{
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
_glClipEdge(v2, v0, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(c);
_glPushHeaderOrVertex(c);
}
break;
case V0_VIS | V2_VIS: // First and third vertex were visible
{
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b);
_glPushHeaderOrVertex(b);
c->flags = GPU_CMD_VERTEX_EOL;
_glPushHeaderOrVertex(c);
}
break;
case V1_VIS | V2_VIS: // Second and third vertex were visible
{
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(v2);
_glPushHeaderOrVertex(v2);
}
break;
}
}
extern int PASSED, CLIPPED, SKIPPED;
void SceneListSubmit(Vertex* v3, int n) {
TRACE(); TRACE();
/* You need at least a header, and 3 vertices to render anything */ /* You need at least a header, and 3 vertices to render anything */
if(n < 4) return; if(n < 4) return;
const float h = vid_mode->height;
PVR_SET(SPAN_SORT_CFG, 0x0); PVR_SET(SPAN_SORT_CFG, 0x0);
//Set PVR DMA registers //Set PVR DMA registers
@ -150,198 +326,93 @@ void SceneListSubmit(Vertex* v2, int n) {
sq = SQ_BASE_ADDRESS; sq = SQ_BASE_ADDRESS;
for(int i = 0; i < n; ++i, ++v2) { for(int i = 0; i < n; ++i, ++v3) {
PREFETCH(v2 + 1); PREFETCH(v3 + 1);
switch(v2->flags) { switch(v3->flags) {
case GPU_CMD_VERTEX_EOL: case GPU_CMD_VERTEX_EOL:
break; break;
case GPU_CMD_VERTEX: case GPU_CMD_VERTEX:
continue; continue;
default: default:
_glPushHeaderOrVertex(v2); _glPushHeaderOrVertex(v3);
continue; continue;
}; };
Vertex* const v0 = v2 - 2; // Quads [0, 1, 2, 3] -> Triangles [{0, 1, 2} {2, 3, 0}]
Vertex* const v1 = v2 - 1; Vertex* const v0 = v3 - 3;
Vertex* const v1 = v3 - 2;
Vertex* const v2 = v3 - 1;
visible_mask = ( visible_mask = (
(v0->xyz[2] > -v0->w) << 0 | (v0->xyz[2] > -v0->w) << 0 |
(v1->xyz[2] > -v1->w) << 1 | (v1->xyz[2] > -v1->w) << 1 |
(v2->xyz[2] > -v2->w) << 2 (v2->xyz[2] > -v2->w) << 2 |
(v3->xyz[2] > -v3->w) << 3
); );
Vertex __attribute__((aligned(32))) scratch[4];
// Stats gathering found that when testing a 64x64x64 sized world, at most
// ~400-500 triangles needed clipping
// ~13% of the triangles in a frame needed clipping (percentage increased when less triangles overall)
// Based on this, the decision was made to optimise for rendering quads there
// were either entirely visible or entirely culled, at the expensive at making
// partially visible quads a bit slower due to needing to be split into two triangles first
// Performance measuring indicated that overall FPS improved from this change
// to switching to try to process 1 quad instead of 2 triangles though
if (visible_mask == 15) PASSED += 2;
else if (visible_mask == 0) SKIPPED += 2;
else CLIPPED += 2;
switch(visible_mask) { switch(visible_mask) {
case V0_VIS | V1_VIS | V2_VIS: /* All vertices visible */ case V0_VIS | V1_VIS | V2_VIS | V3_VIS: // All vertices visible
{ {
_glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(v0);
_glPushHeaderOrVertex(v0); _glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(v1, h); _glPerspectiveDivideVertex(v1);
_glPushHeaderOrVertex(v1); _glPushHeaderOrVertex(v1);
_glPerspectiveDivideVertex(v2, h); v2->flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(v2);
_glPushHeaderOrVertex(v2); _glPushHeaderOrVertex(v2);
}
break;
case V0_VIS: /* First vertex was visible */
{
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b); v2->flags = GPU_CMD_VERTEX;
b->flags = GPU_CMD_VERTEX_EOL; _glPushHeaderOrVertex(v2);
_glPerspectiveDivideVertex(v0, h); v3->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v3);
_glPushHeaderOrVertex(v3);
v0->flags = GPU_CMD_VERTEX_EOL;
_glPushHeaderOrVertex(v0); _glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
} }
break; break;
case V1_VIS: /* Second vertex was visible */
{
/* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v1); case 0: // No vertices visible
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
}
break; break;
case V0_VIS | V1_VIS: /* First and second vertex were visible */
default: // Some vertices visible
{ {
Vertex* a = &scratch[0]; // vertices are modified in SubmitTriangle, so need to copy them
Vertex* b = &scratch[1]; Vertex __attribute__((aligned(32))) scratch[4];
Vertex* c = &scratch[2]; Vertex* a0 = &scratch[0];
Vertex* a2 = &scratch[1];
memcpy_vertex(a0, v0);
memcpy_vertex(a2, v2);
memcpy_vertex(c, v1); visible_mask &= (V0_VIS | V1_VIS | V2_VIS);
v2->flags = GPU_CMD_VERTEX_EOL;
SubmitTriangle(v0, v1, v2, visible_mask);
_glClipEdge(v2, v0, b); visible_mask = (
b->flags = GPU_CMD_VERTEX; (a2->xyz[2] > -v2->w) << 0 |
(v3->xyz[2] > -v3->w) << 1 |
_glPerspectiveDivideVertex(v0, h); (a0->xyz[2] > -a0->w) << 2
_glPushHeaderOrVertex(v0); );
v3->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, a); a0->flags = GPU_CMD_VERTEX_EOL;
a->flags = GPU_CMD_VERTEX_EOL; SubmitTriangle(a2, v3, a0, visible_mask);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(c);
_glPushHeaderOrVertex(a);
}
break;
case V2_VIS: /* Third vertex was visible */
{
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
_glClipEdge(v2, v0, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
}
break;
case V0_VIS | V2_VIS: /* First and third vertex were visible */
{
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
c->flags = GPU_CMD_VERTEX_EOL;
_glPushHeaderOrVertex(c);
}
break;
case V1_VIS | V2_VIS: /* Second and third vertex were visible */
{
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
Vertex* d = &scratch[3];
memcpy_vertex(c, v1);
memcpy_vertex(d, v2);
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(d, h);
_glPushHeaderOrVertex(d);
} }
break; break;
} }