Dreamcast: Squeeze a bit more performance out of the system

This commit is contained in:
UnknownShadow200 2023-11-23 21:57:37 +11:00
parent e57d0e2575
commit 064be092e8
3 changed files with 75 additions and 167 deletions

View File

@ -1,7 +1,6 @@
#include "platform.h" #include "platform.h"
#include "sh4.h" #include "sh4.h"
#define CLIP_DEBUG 0 #define CLIP_DEBUG 0
#define PVR_VERTEX_BUF_SIZE 2560 * 256 #define PVR_VERTEX_BUF_SIZE 2560 * 256
@ -118,13 +117,15 @@ static volatile uint32_t* PVR_LMMODE0 = (uint32_t*) 0xA05F6884;
static volatile uint32_t* PVR_LMMODE1 = (uint32_t*) 0xA05F6888; static volatile uint32_t* PVR_LMMODE1 = (uint32_t*) 0xA05F6888;
static volatile uint32_t* QACR = (uint32_t*) 0xFF000038; static volatile uint32_t* QACR = (uint32_t*) 0xFF000038;
#define V0_VIS (1 << 0)
#define V1_VIS (1 << 1)
#define V2_VIS (1 << 2)
void SceneListSubmit(Vertex* v2, int n) { void SceneListSubmit(Vertex* v2, int n) {
TRACE(); TRACE();
/* You need at least a header, and 3 vertices to render anything */ /* You need at least a header, and 3 vertices to render anything */
if(n < 4) { if(n < 4) return;
return;
}
const float h = vid_mode->height; const float h = vid_mode->height;
@ -146,7 +147,6 @@ void SceneListSubmit(Vertex* v2, int n) {
fprintf(stderr, "----\n"); fprintf(stderr, "----\n");
#endif #endif
uint8_t visible_mask = 0; uint8_t visible_mask = 0;
uint8_t counter = 0;
sq = SQ_BASE_ADDRESS; sq = SQ_BASE_ADDRESS;
@ -154,20 +154,11 @@ void SceneListSubmit(Vertex* v2, int n) {
PREFETCH(v2 + 1); PREFETCH(v2 + 1);
switch(v2->flags) { switch(v2->flags) {
case GPU_CMD_VERTEX_EOL: case GPU_CMD_VERTEX_EOL:
if(counter < 2) {
continue;
}
counter = 0;
break; break;
case GPU_CMD_VERTEX: case GPU_CMD_VERTEX:
++counter;
if(counter < 3) {
continue; continue;
}
break;
default: default:
_glPushHeaderOrVertex(v2); _glPushHeaderOrVertex(v2);
counter = 0;
continue; continue;
}; };
@ -177,12 +168,12 @@ void SceneListSubmit(Vertex* v2, int n) {
visible_mask = ( visible_mask = (
(v0->xyz[2] > -v0->w) << 0 | (v0->xyz[2] > -v0->w) << 0 |
(v1->xyz[2] > -v1->w) << 1 | (v1->xyz[2] > -v1->w) << 1 |
(v2->xyz[2] > -v2->w) << 2 | (v2->xyz[2] > -v2->w) << 2
(counter == 0) << 3
); );
Vertex __attribute__((aligned(32))) scratch[4];
switch(visible_mask) { switch(visible_mask) {
case 15: /* All visible, but final vertex in strip */ case V0_VIS | V1_VIS | V2_VIS: /* All vertices visible */
{ {
_glPerspectiveDivideVertex(v0, h); _glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0); _glPushHeaderOrVertex(v0);
@ -194,15 +185,8 @@ void SceneListSubmit(Vertex* v2, int n) {
_glPushHeaderOrVertex(v2); _glPushHeaderOrVertex(v2);
} }
break; break;
case 7: case V0_VIS: /* First vertex was visible */
/* All visible, push the first vertex and move on */
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
break;
case 9:
/* First vertex was visible, last in strip */
{ {
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0]; Vertex* a = &scratch[0];
Vertex* b = &scratch[1]; Vertex* b = &scratch[1];
@ -222,36 +206,10 @@ void SceneListSubmit(Vertex* v2, int n) {
_glPushHeaderOrVertex(b); _glPushHeaderOrVertex(b);
} }
break; break;
case 1: case V1_VIS: /* Second vertex was visible */
/* First vertex was visible, but not last in strip */
{ {
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v2, v0, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(b);
}
break;
case 10:
case 2:
/* Second vertex was visible. In self case we need to create a triangle and produce /* Second vertex was visible. In self case we need to create a triangle and produce
two new vertices: 1-2, and 2-3. */ two new vertices: 1-2, and 2-3. */
{
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0]; Vertex* a = &scratch[0];
Vertex* b = &scratch[1]; Vertex* b = &scratch[1];
Vertex* c = &scratch[2]; Vertex* c = &scratch[2];
@ -262,7 +220,7 @@ void SceneListSubmit(Vertex* v2, int n) {
a->flags = GPU_CMD_VERTEX; a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b); _glClipEdge(v1, v2, b);
b->flags = v2->flags; b->flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(a, h); _glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a); _glPushHeaderOrVertex(a);
@ -274,10 +232,8 @@ void SceneListSubmit(Vertex* v2, int n) {
_glPushHeaderOrVertex(b); _glPushHeaderOrVertex(b);
} }
break; break;
case 11: case V0_VIS | V1_VIS: /* First and second vertex were visible */
case 3: /* First and second vertex were visible */
{ {
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0]; Vertex* a = &scratch[0];
Vertex* b = &scratch[1]; Vertex* b = &scratch[1];
Vertex* c = &scratch[2]; Vertex* c = &scratch[2];
@ -291,7 +247,7 @@ void SceneListSubmit(Vertex* v2, int n) {
_glPushHeaderOrVertex(v0); _glPushHeaderOrVertex(v0);
_glClipEdge(v1, v2, a); _glClipEdge(v1, v2, a);
a->flags = v2->flags; a->flags = GPU_CMD_VERTEX_EOL;
_glPerspectiveDivideVertex(c, h); _glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c); _glPushHeaderOrVertex(c);
@ -304,11 +260,8 @@ void SceneListSubmit(Vertex* v2, int n) {
_glPushHeaderOrVertex(a); _glPushHeaderOrVertex(a);
} }
break; break;
case 12: case V2_VIS: /* Third vertex was visible */
case 4:
/* Third vertex was visible. */
{ {
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0]; Vertex* a = &scratch[0];
Vertex* b = &scratch[1]; Vertex* b = &scratch[1];
Vertex* c = &scratch[2]; Vertex* c = &scratch[2];
@ -324,10 +277,6 @@ void SceneListSubmit(Vertex* v2, int n) {
_glPerspectiveDivideVertex(a, h); _glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a); _glPushHeaderOrVertex(a);
if(counter % 2 == 1) {
_glPushHeaderOrVertex(a);
}
_glPerspectiveDivideVertex(b, h); _glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b); _glPushHeaderOrVertex(b);
@ -335,9 +284,8 @@ void SceneListSubmit(Vertex* v2, int n) {
_glPushHeaderOrVertex(c); _glPushHeaderOrVertex(c);
} }
break; break;
case 13: case V0_VIS | V2_VIS: /* First and third vertex were visible */
{ {
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0]; Vertex* a = &scratch[0];
Vertex* b = &scratch[1]; Vertex* b = &scratch[1];
Vertex* c = &scratch[2]; Vertex* c = &scratch[2];
@ -366,39 +314,8 @@ void SceneListSubmit(Vertex* v2, int n) {
_glPushHeaderOrVertex(c); _glPushHeaderOrVertex(c);
} }
break; break;
case 5: /* First and third vertex were visible */ case V1_VIS | V2_VIS: /* Second and third vertex were visible */
{ {
Vertex __attribute__((aligned(32))) scratch[3];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
Vertex* c = &scratch[2];
memcpy_vertex(c, v2);
c->flags = GPU_CMD_VERTEX;
_glClipEdge(v0, v1, a);
a->flags = GPU_CMD_VERTEX;
_glClipEdge(v1, v2, b);
b->flags = GPU_CMD_VERTEX;
_glPerspectiveDivideVertex(v0, h);
_glPushHeaderOrVertex(v0);
_glPerspectiveDivideVertex(a, h);
_glPushHeaderOrVertex(a);
_glPerspectiveDivideVertex(c, h);
_glPushHeaderOrVertex(c);
_glPerspectiveDivideVertex(b, h);
_glPushHeaderOrVertex(b);
_glPushHeaderOrVertex(c);
}
break;
case 14:
case 6: /* Second and third vertex were visible */
{
Vertex __attribute__((aligned(32))) scratch[4];
Vertex* a = &scratch[0]; Vertex* a = &scratch[0];
Vertex* b = &scratch[1]; Vertex* b = &scratch[1];
Vertex* c = &scratch[2]; Vertex* c = &scratch[2];
@ -427,9 +344,6 @@ void SceneListSubmit(Vertex* v2, int n) {
_glPushHeaderOrVertex(d); _glPushHeaderOrVertex(d);
} }
break; break;
case 8:
default:
break;
} }
} }

View File

@ -62,8 +62,6 @@ GL_FORCE_INLINE void* memcpy_fast(void *dest, const void *src, size_t len) {
#define MEMCPY4(dst, src, bytes) memcpy_fast(dst, src, bytes) #define MEMCPY4(dst, src, bytes) memcpy_fast(dst, src, bytes)
#define MEMSET4(dst, v, size) memset4((dst), (v), (size))
GL_FORCE_INLINE void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) { GL_FORCE_INLINE void TransformVertex(const float* xyz, const float* w, float* oxyz, float* ow) {
register float __x __asm__("fr12") = (xyz[0]); register float __x __asm__("fr12") = (xyz[0]);
register float __y __asm__("fr13") = (xyz[1]); register float __y __asm__("fr13") = (xyz[1]);
@ -85,10 +83,6 @@ GL_FORCE_INLINE void TransformVertex(const float* xyz, const float* w, float* ox
void InitGPU(_Bool autosort, _Bool fsaa); void InitGPU(_Bool autosort, _Bool fsaa);
static inline size_t GPUMemoryAvailable() {
return pvr_mem_available();
}
static inline void* GPUMemoryAlloc(size_t size) { static inline void* GPUMemoryAlloc(size_t size) {
return pvr_mem_malloc(size); return pvr_mem_malloc(size);
} }

View File

@ -58,7 +58,7 @@ GLubyte _glInitTextures() {
_glInitializeTextureObject(default_tex, 0); _glInitializeTextureObject(default_tex, 0);
TEXTURE_ACTIVE = default_tex; TEXTURE_ACTIVE = default_tex;
size_t vram_free = GPUMemoryAvailable(); size_t vram_free = pvr_mem_available();
YALLOC_SIZE = vram_free - PVR_MEM_BUFFER_SIZE; /* Take all but 64kb VRAM */ YALLOC_SIZE = vram_free - PVR_MEM_BUFFER_SIZE; /* Take all but 64kb VRAM */
YALLOC_BASE = GPUMemoryAlloc(YALLOC_SIZE); YALLOC_BASE = GPUMemoryAlloc(YALLOC_SIZE);