mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-09 07:18:34 -04:00
Dreamcast: Slightly boost performance by attempting to perform polygon perspective division and clipping as a quad instead of 2 triangles when possible
This commit is contained in:
parent
064be092e8
commit
48f0cb7b1a
21
third_party/gldc/src/draw.c
vendored
21
third_party/gldc/src/draw.c
vendored
@ -75,22 +75,13 @@ static void generateQuads(SubmissionTarget* target, const GLsizei first, const G
|
||||
*((Float2*)it->uv) = F2ZERO;
|
||||
}
|
||||
|
||||
src += stride;
|
||||
src += stride;
|
||||
it->flags = GPU_CMD_VERTEX;
|
||||
it++;
|
||||
}
|
||||
|
||||
// Quads [0, 1, 2, 3] -> Triangles [{0, 1, 2} {2, 3, 0}]
|
||||
PREFETCH(dst); // TODO: more prefetching?
|
||||
memcpy_vertex(dst + 5, dst + 0); dst[5].flags = GPU_CMD_VERTEX_EOL;
|
||||
memcpy_vertex(dst + 4, dst + 3); dst[4].flags = GPU_CMD_VERTEX;
|
||||
memcpy_vertex(dst + 3, dst + 2); dst[3].flags = GPU_CMD_VERTEX;
|
||||
|
||||
dst[2].flags = GPU_CMD_VERTEX_EOL;
|
||||
dst[1].flags = GPU_CMD_VERTEX;
|
||||
dst[0].flags = GPU_CMD_VERTEX;
|
||||
// TODO copy straight to dst??
|
||||
|
||||
dst += 6;
|
||||
|
||||
dst[3].flags = GPU_CMD_VERTEX_EOL;
|
||||
dst += 4;
|
||||
}
|
||||
}
|
||||
|
||||
@ -131,7 +122,7 @@ void APIENTRY glDrawArrays(GLenum mode, GLint first, GLsizei count) {
|
||||
TRACE();
|
||||
if (!count) return;
|
||||
|
||||
submitVertices(count * 6 / 4); // quads -> triangles
|
||||
submitVertices(count);
|
||||
generateQuads(&SUBMISSION_TARGET, first, count);
|
||||
}
|
||||
|
||||
|
413
third_party/gldc/src/sh4.c
vendored
413
third_party/gldc/src/sh4.c
vendored
@ -39,13 +39,13 @@ GL_FORCE_INLINE float _glFastInvert(float x) {
|
||||
return MATH_fsrra(x * x);
|
||||
}
|
||||
|
||||
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex, const float h) {
|
||||
GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) {
|
||||
TRACE();
|
||||
|
||||
const float f = _glFastInvert(vertex->w);
|
||||
|
||||
/* Convert to NDC and apply viewport */
|
||||
vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320;
|
||||
vertex->xyz[0] = (vertex->xyz[0] * f * 320) + 320;
|
||||
vertex->xyz[1] = (vertex->xyz[1] * f * -240) + 240;
|
||||
|
||||
/* Orthographic projections need to use invZ otherwise we lose
|
||||
@ -120,15 +120,191 @@ static volatile uint32_t* QACR = (uint32_t*) 0xFF000038;
|
||||
#define V0_VIS (1 << 0)
|
||||
#define V1_VIS (1 << 1)
|
||||
#define V2_VIS (1 << 2)
|
||||
#define V3_VIS (1 << 3)
|
||||
|
||||
void SceneListSubmit(Vertex* v2, int n) {
|
||||
static void SubmitTriangle(Vertex* v0, Vertex* v1, Vertex* v2, uint8_t visible_mask) {
|
||||
Vertex __attribute__((aligned(32))) scratch[4];
|
||||
|
||||
switch(visible_mask) {
|
||||
case V0_VIS | V1_VIS | V2_VIS: // All vertices visible
|
||||
{
|
||||
_glPerspectiveDivideVertex(v0);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glPerspectiveDivideVertex(v1);
|
||||
_glPushHeaderOrVertex(v1);
|
||||
|
||||
_glPerspectiveDivideVertex(v2);
|
||||
_glPushHeaderOrVertex(v2);
|
||||
}
|
||||
break;
|
||||
case V0_VIS: // First vertex was visible
|
||||
{
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glPerspectiveDivideVertex(v0);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glPerspectiveDivideVertex(a);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(b);
|
||||
_glPushHeaderOrVertex(b);
|
||||
}
|
||||
break;
|
||||
case V1_VIS: // Second vertex was visible
|
||||
{
|
||||
/* Second vertex was visible. In self case we need to create a triangle and produce
|
||||
two new vertices: 1-2, and 2-3. */
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v1);
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glPerspectiveDivideVertex(a);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(c);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(b);
|
||||
_glPushHeaderOrVertex(b);
|
||||
}
|
||||
break;
|
||||
case V0_VIS | V1_VIS: // First and second vertex were visible
|
||||
{
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v1);
|
||||
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(v0);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glClipEdge(v1, v2, a);
|
||||
a->flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glPerspectiveDivideVertex(c);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(b);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
_glPerspectiveDivideVertex(a);
|
||||
_glPushHeaderOrVertex(c);
|
||||
_glPushHeaderOrVertex(a);
|
||||
}
|
||||
break;
|
||||
case V2_VIS: // Third vertex was visible
|
||||
{
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v2);
|
||||
|
||||
_glClipEdge(v2, v0, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(a);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(b);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
_glPerspectiveDivideVertex(c);
|
||||
_glPushHeaderOrVertex(c);
|
||||
}
|
||||
break;
|
||||
case V0_VIS | V2_VIS: // First and third vertex were visible
|
||||
{
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v2);
|
||||
c->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(v0);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glPerspectiveDivideVertex(a);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(c);
|
||||
_glPushHeaderOrVertex(c);
|
||||
_glPerspectiveDivideVertex(b);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
c->flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPushHeaderOrVertex(c);
|
||||
}
|
||||
break;
|
||||
case V1_VIS | V2_VIS: // Second and third vertex were visible
|
||||
{
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v1);
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(a);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(c);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(b);
|
||||
_glPushHeaderOrVertex(b);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(v2);
|
||||
_glPushHeaderOrVertex(v2);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
extern int PASSED, CLIPPED, SKIPPED;
|
||||
|
||||
void SceneListSubmit(Vertex* v3, int n) {
|
||||
TRACE();
|
||||
|
||||
/* You need at least a header, and 3 vertices to render anything */
|
||||
if(n < 4) return;
|
||||
|
||||
const float h = vid_mode->height;
|
||||
|
||||
PVR_SET(SPAN_SORT_CFG, 0x0);
|
||||
|
||||
//Set PVR DMA registers
|
||||
@ -150,198 +326,93 @@ void SceneListSubmit(Vertex* v2, int n) {
|
||||
|
||||
sq = SQ_BASE_ADDRESS;
|
||||
|
||||
for(int i = 0; i < n; ++i, ++v2) {
|
||||
PREFETCH(v2 + 1);
|
||||
switch(v2->flags) {
|
||||
for(int i = 0; i < n; ++i, ++v3) {
|
||||
PREFETCH(v3 + 1);
|
||||
switch(v3->flags) {
|
||||
case GPU_CMD_VERTEX_EOL:
|
||||
break;
|
||||
case GPU_CMD_VERTEX:
|
||||
continue;
|
||||
default:
|
||||
_glPushHeaderOrVertex(v2);
|
||||
_glPushHeaderOrVertex(v3);
|
||||
continue;
|
||||
};
|
||||
|
||||
Vertex* const v0 = v2 - 2;
|
||||
Vertex* const v1 = v2 - 1;
|
||||
// Quads [0, 1, 2, 3] -> Triangles [{0, 1, 2} {2, 3, 0}]
|
||||
Vertex* const v0 = v3 - 3;
|
||||
Vertex* const v1 = v3 - 2;
|
||||
Vertex* const v2 = v3 - 1;
|
||||
|
||||
visible_mask = (
|
||||
(v0->xyz[2] > -v0->w) << 0 |
|
||||
(v1->xyz[2] > -v1->w) << 1 |
|
||||
(v2->xyz[2] > -v2->w) << 2
|
||||
(v2->xyz[2] > -v2->w) << 2 |
|
||||
(v3->xyz[2] > -v3->w) << 3
|
||||
);
|
||||
Vertex __attribute__((aligned(32))) scratch[4];
|
||||
|
||||
// Stats gathering found that when testing a 64x64x64 sized world, at most
|
||||
// ~400-500 triangles needed clipping
|
||||
// ~13% of the triangles in a frame needed clipping (percentage increased when less triangles overall)
|
||||
// Based on this, the decision was made to optimise for rendering quads there
|
||||
// were either entirely visible or entirely culled, at the expensive at making
|
||||
// partially visible quads a bit slower due to needing to be split into two triangles first
|
||||
// Performance measuring indicated that overall FPS improved from this change
|
||||
// to switching to try to process 1 quad instead of 2 triangles though
|
||||
|
||||
if (visible_mask == 15) PASSED += 2;
|
||||
else if (visible_mask == 0) SKIPPED += 2;
|
||||
else CLIPPED += 2;
|
||||
|
||||
switch(visible_mask) {
|
||||
case V0_VIS | V1_VIS | V2_VIS: /* All vertices visible */
|
||||
case V0_VIS | V1_VIS | V2_VIS | V3_VIS: // All vertices visible
|
||||
{
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPerspectiveDivideVertex(v0);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glPerspectiveDivideVertex(v1, h);
|
||||
_glPerspectiveDivideVertex(v1);
|
||||
_glPushHeaderOrVertex(v1);
|
||||
|
||||
_glPerspectiveDivideVertex(v2, h);
|
||||
v2->flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPerspectiveDivideVertex(v2);
|
||||
_glPushHeaderOrVertex(v2);
|
||||
|
||||
|
||||
v2->flags = GPU_CMD_VERTEX;
|
||||
_glPushHeaderOrVertex(v2);
|
||||
}
|
||||
break;
|
||||
case V0_VIS: /* First vertex was visible */
|
||||
{
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
v3->flags = GPU_CMD_VERTEX;
|
||||
_glPerspectiveDivideVertex(v3);
|
||||
_glPushHeaderOrVertex(v3);
|
||||
|
||||
v0->flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
}
|
||||
break;
|
||||
case V1_VIS: /* Second vertex was visible */
|
||||
{
|
||||
/* Second vertex was visible. In self case we need to create a triangle and produce
|
||||
two new vertices: 1-2, and 2-3. */
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v1);
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
}
|
||||
|
||||
case 0: // No vertices visible
|
||||
break;
|
||||
case V0_VIS | V1_VIS: /* First and second vertex were visible */
|
||||
|
||||
default: // Some vertices visible
|
||||
{
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v1);
|
||||
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glClipEdge(v1, v2, a);
|
||||
a->flags = GPU_CMD_VERTEX_EOL;
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
_glPushHeaderOrVertex(a);
|
||||
}
|
||||
break;
|
||||
case V2_VIS: /* Third vertex was visible */
|
||||
{
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v2);
|
||||
|
||||
_glClipEdge(v2, v0, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
}
|
||||
break;
|
||||
case V0_VIS | V2_VIS: /* First and third vertex were visible */
|
||||
{
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
|
||||
memcpy_vertex(c, v2);
|
||||
c->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v1, v2, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(v0, h);
|
||||
_glPushHeaderOrVertex(v0);
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
|
||||
c->flags = GPU_CMD_VERTEX_EOL;
|
||||
_glPushHeaderOrVertex(c);
|
||||
}
|
||||
break;
|
||||
case V1_VIS | V2_VIS: /* Second and third vertex were visible */
|
||||
{
|
||||
Vertex* a = &scratch[0];
|
||||
Vertex* b = &scratch[1];
|
||||
Vertex* c = &scratch[2];
|
||||
Vertex* d = &scratch[3];
|
||||
|
||||
memcpy_vertex(c, v1);
|
||||
memcpy_vertex(d, v2);
|
||||
|
||||
_glClipEdge(v0, v1, a);
|
||||
a->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glClipEdge(v2, v0, b);
|
||||
b->flags = GPU_CMD_VERTEX;
|
||||
|
||||
_glPerspectiveDivideVertex(a, h);
|
||||
_glPushHeaderOrVertex(a);
|
||||
|
||||
_glPerspectiveDivideVertex(c, h);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(b, h);
|
||||
_glPushHeaderOrVertex(b);
|
||||
_glPushHeaderOrVertex(c);
|
||||
|
||||
_glPerspectiveDivideVertex(d, h);
|
||||
_glPushHeaderOrVertex(d);
|
||||
// vertices are modified in SubmitTriangle, so need to copy them
|
||||
Vertex __attribute__((aligned(32))) scratch[4];
|
||||
Vertex* a0 = &scratch[0];
|
||||
Vertex* a2 = &scratch[1];
|
||||
memcpy_vertex(a0, v0);
|
||||
memcpy_vertex(a2, v2);
|
||||
|
||||
visible_mask &= (V0_VIS | V1_VIS | V2_VIS);
|
||||
v2->flags = GPU_CMD_VERTEX_EOL;
|
||||
SubmitTriangle(v0, v1, v2, visible_mask);
|
||||
|
||||
visible_mask = (
|
||||
(a2->xyz[2] > -v2->w) << 0 |
|
||||
(v3->xyz[2] > -v3->w) << 1 |
|
||||
(a0->xyz[2] > -a0->w) << 2
|
||||
);
|
||||
v3->flags = GPU_CMD_VERTEX;
|
||||
a0->flags = GPU_CMD_VERTEX_EOL;
|
||||
SubmitTriangle(a2, v3, a0, visible_mask);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user