Optimise SoftGPU backend a bit

On test linux system, FPS went from 31 to 33
This commit is contained in:
UnknownShadow200 2024-06-04 17:20:00 +10:00
parent f50afc5c24
commit 4f732af28b
2 changed files with 60 additions and 59 deletions

View File

@ -59,10 +59,3 @@ jobs:
with: with:
SOURCE_FILE: 'misc/ps1/build/template.cue' SOURCE_FILE: 'misc/ps1/build/template.cue'
DEST_NAME: 'ClassiCube-PS1.cue' DEST_NAME: 'ClassiCube-PS1.cue'
- uses: ./.github/actions/notify_success
if: ${{ always() && steps.compile.outcome == 'success' }}
with:
DESTINATION_URL: '${{ secrets.NOTIFY_URL }}'
WORKFLOW_NAME: 'ps1'

View File

@ -5,10 +5,10 @@
#include "Window.h" #include "Window.h"
static cc_bool faceCulling; static cc_bool faceCulling;
static int width, height; static int fb_width, fb_height;
static struct Bitmap fb_bmp; static struct Bitmap fb_bmp;
static float vp_hwidth, vp_hheight; static float vp_hwidth, vp_hheight;
static int sc_maxX, sc_maxY; static int fb_maxX, fb_maxY;
static PackedCol* colorBuffer; static PackedCol* colorBuffer;
static PackedCol clearColor; static PackedCol clearColor;
@ -63,6 +63,7 @@ typedef struct CCTexture {
static CCTexture* curTexture; static CCTexture* curTexture;
static BitmapCol* curTexPixels; static BitmapCol* curTexPixels;
static int curTexWidth, curTexHeight; static int curTexWidth, curTexHeight;
static int texWidthMask, texHeightMask;
void Gfx_BindTexture(GfxResourceID texId) { void Gfx_BindTexture(GfxResourceID texId) {
if (!texId) texId = white_square; if (!texId) texId = white_square;
@ -72,6 +73,9 @@ void Gfx_BindTexture(GfxResourceID texId) {
curTexPixels = tex->pixels; curTexPixels = tex->pixels;
curTexWidth = tex->width; curTexWidth = tex->width;
curTexHeight = tex->height; curTexHeight = tex->height;
texWidthMask = (1 << Math_ilog2(tex->width)) - 1;
texHeightMask = (1 << Math_ilog2(tex->height)) - 1;
} }
void Gfx_DeleteTexture(GfxResourceID* texId) { void Gfx_DeleteTexture(GfxResourceID* texId) {
@ -123,7 +127,7 @@ static void SetAlphaBlend(cc_bool enabled) {
void Gfx_SetAlphaArgBlend(cc_bool enabled) { } void Gfx_SetAlphaArgBlend(cc_bool enabled) { }
void Gfx_ClearBuffers(GfxBuffers buffers) { void Gfx_ClearBuffers(GfxBuffers buffers) {
int i, size = width * height; int i, size = fb_width * fb_height;
if (buffers & GFX_BUFFER_COLOR) { if (buffers & GFX_BUFFER_COLOR) {
for (i = 0; i < size; i++) colorBuffer[i] = clearColor; for (i = 0; i < size; i++) colorBuffer[i] = clearColor;
@ -275,35 +279,39 @@ void Gfx_CalcPerspectiveMatrix(struct Matrix* matrix, float fov, float aspect, f
/*########################################################################################################################* /*########################################################################################################################*
*---------------------------------------------------------Rendering-------------------------------------------------------* *---------------------------------------------------------Rendering-------------------------------------------------------*
*#########################################################################################################################*/ *#########################################################################################################################*/
typedef struct Vector4 { float x, y, z, w; } Vector4;
typedef struct Vector3 { float x, y, z; } Vector3; typedef struct Vector3 { float x, y, z; } Vector3;
typedef struct Vector2 { float x, y; } Vector2; typedef struct Vector2 { float x, y; } Vector2;
typedef struct Vertex_ {
float x, y, z, w;
float u, v;
PackedCol c;
} Vertex;
static void TransformVertex(int index, Vector4* frag, Vector2* uv, PackedCol* color) { static void TransformVertex(int index, Vertex* vertex) {
// TODO: avoid the multiply, just add down in DrawTriangles // TODO: avoid the multiply, just add down in DrawTriangles
char* ptr = (char*)gfx_vertices + index * gfx_stride; char* ptr = (char*)gfx_vertices + index * gfx_stride;
Vector3* pos = (Vector3*)ptr; Vector3* pos = (Vector3*)ptr;
Vector4 coord; struct Vec4 coord;
coord.x = pos->x * mvp.row1.x + pos->y * mvp.row2.x + pos->z * mvp.row3.x + mvp.row4.x; coord.x = pos->x * mvp.row1.x + pos->y * mvp.row2.x + pos->z * mvp.row3.x + mvp.row4.x;
coord.y = pos->x * mvp.row1.y + pos->y * mvp.row2.y + pos->z * mvp.row3.y + mvp.row4.y; coord.y = pos->x * mvp.row1.y + pos->y * mvp.row2.y + pos->z * mvp.row3.y + mvp.row4.y;
coord.z = pos->x * mvp.row1.z + pos->y * mvp.row2.z + pos->z * mvp.row3.z + mvp.row4.z; coord.z = pos->x * mvp.row1.z + pos->y * mvp.row2.z + pos->z * mvp.row3.z + mvp.row4.z;
coord.w = pos->x * mvp.row1.w + pos->y * mvp.row2.w + pos->z * mvp.row3.w + mvp.row4.w; coord.w = pos->x * mvp.row1.w + pos->y * mvp.row2.w + pos->z * mvp.row3.w + mvp.row4.w;
float invW = 1.0f / coord.w; float invW = 1.0f / coord.w;
frag->x = vp_hwidth * (1 + coord.x * invW); vertex->x = vp_hwidth * (1 + coord.x * invW);
frag->y = vp_hheight * (1 - coord.y * invW); vertex->y = vp_hheight * (1 - coord.y * invW);
frag->z = coord.z * invW; vertex->z = coord.z * invW;
frag->w = invW; vertex->w = invW;
if (gfx_format != VERTEX_FORMAT_TEXTURED) { if (gfx_format != VERTEX_FORMAT_TEXTURED) {
struct VertexColoured* v = (struct VertexColoured*)ptr; struct VertexColoured* v = (struct VertexColoured*)ptr;
*color = v->Col; vertex->c = v->Col;
} else { } else {
struct VertexTextured* v = (struct VertexTextured*)ptr; struct VertexTextured* v = (struct VertexTextured*)ptr;
*color = v->Col; vertex->u = (v->U + texOffsetX) * invW;
uv->x = (v->U + texOffsetX) * invW; vertex->v = (v->V + texOffsetY) * invW;
uv->y = (v->V + texOffsetY) * invW; vertex->c = v->Col;
} }
} }
@ -319,11 +327,10 @@ static int MultiplyColours(PackedCol vColor, BitmapCol tColor) {
return PackedCol_Make(r, g, b, a); return PackedCol_Make(r, g, b, a);
} }
static void DrawTriangle(Vector4 frag1, Vector4 frag2, Vector4 frag3, static void DrawTriangle(Vertex* frag1, Vertex* frag2, Vertex* frag3) {
Vector2 uv1, Vector2 uv2, Vector2 uv3, PackedCol color) { int x1 = (int)frag1->x, y1 = (int)frag1->y;
int x1 = (int)frag1.x, y1 = (int)frag1.y; int x2 = (int)frag2->x, y2 = (int)frag2->y;
int x2 = (int)frag2.x, y2 = (int)frag2.y; int x3 = (int)frag3->x, y3 = (int)frag3->y;
int x3 = (int)frag3.x, y3 = (int)frag3.y;
int minX = min(x1, min(x2, x3)); int minX = min(x1, min(x2, x3));
int minY = min(y1, min(y2, y3)); int minY = min(y1, min(y2, y3));
int maxX = max(x1, max(x2, x3)); int maxX = max(x1, max(x2, x3));
@ -337,18 +344,24 @@ static void DrawTriangle(Vector4 frag1, Vector4 frag2, Vector4 frag3,
} }
// Reject triangles completely outside // Reject triangles completely outside
if (minX < 0 && maxX < 0 || minX >= width && maxX >= width ) return; if ((minX < 0 && maxX < 0) || (minX > fb_maxX && maxX > fb_maxX)) return;
if (minY < 0 && maxY < 0 || minY >= height && maxY >= height) return; if ((minY < 0 && maxY < 0) || (minY > fb_maxY && maxY > fb_maxY)) return;
// Perform scissoring // Perform scissoring
minX = max(minX, 0); maxX = min(maxX, sc_maxX); minX = max(minX, 0); maxX = min(maxX, fb_maxX);
minY = max(minY, 0); maxY = min(maxY, sc_maxY); minY = max(minY, 0); maxY = min(maxY, fb_maxY);
// NOTE: W in frag variables below is actually 1/W // NOTE: W in frag variables below is actually 1/W
float factor = 1.0f / ((y2 - y3) * (x1 - x3) + (x3 - x2) * (y1 - y3)); float factor = 1.0f / ((y2 - y3) * (x1 - x3) + (x3 - x2) * (y1 - y3));
float w1 = frag1->w, w2 = frag2->w, w3 = frag3->w;
// TODO proper clipping // TODO proper clipping
if (frag1.w <= 0 || frag2.w <= 0 || frag3.w <= 0) return; if (w1 <= 0 || w2 <= 0 || w3 <= 0) return;
float z1 = frag1->z, z2 = frag2->z, z3 = frag3->z;
float u1 = frag1->u, u2 = frag2->u, u3 = frag3->u;
float v1 = frag1->v, v2 = frag2->v, v3 = frag3->v;
PackedCol color = frag1->c;
for (int y = minY; y <= maxY; y++) { for (int y = minY; y <= maxY; y++) {
float yy = y + 0.5f; float yy = y + 0.5f;
@ -362,9 +375,9 @@ static void DrawTriangle(Vector4 frag1, Vector4 frag2, Vector4 frag3,
float ic2 = 1.0f - ic0 - ic1; float ic2 = 1.0f - ic0 - ic1;
if (ic2 < 0 || ic2 > 1) continue; if (ic2 < 0 || ic2 > 1) continue;
int index = y * width + x; int index = y * fb_width + x;
float w = 1 / (ic0 * frag1.w + ic1 * frag2.w + ic2 * frag3.w); float w = 1 / (ic0 * w1 + ic1 * w2 + ic2 * w3);
float z = (ic0 * frag1.z + ic1 * frag2.z + ic2 * frag3.z) * w; float z = (ic0 * z1 + ic1 * z2 + ic2 * z3) * w;
if (depthTest && (z < 0 || z > depthBuffer[index])) continue; if (depthTest && (z < 0 || z > depthBuffer[index])) continue;
if (!colWrite) { if (!colWrite) {
@ -374,10 +387,10 @@ static void DrawTriangle(Vector4 frag1, Vector4 frag2, Vector4 frag3,
PackedCol fragColor = color; PackedCol fragColor = color;
if (gfx_format == VERTEX_FORMAT_TEXTURED) { if (gfx_format == VERTEX_FORMAT_TEXTURED) {
float u = (ic0 * uv1.x + ic1 * uv2.x + ic2 * uv3.x) * w; float u = (ic0 * u1 + ic1 * u2 + ic2 * u3) * w;
float v = (ic0 * uv1.y + ic1 * uv2.y + ic2 * uv3.y) * w; float v = (ic0 * v1 + ic1 * v2 + ic2 * v3) * w;
int texX = ((int)(Math_AbsF(u - Math_Floor(u)) * curTexWidth )) % curTexWidth; // TODO avoid slow % int texX = ((int)(Math_AbsF(u - Math_Floor(u)) * curTexWidth )) & texWidthMask;
int texY = ((int)(Math_AbsF(v - Math_Floor(v)) * curTexHeight)) % curTexHeight; int texY = ((int)(Math_AbsF(v - Math_Floor(v)) * curTexHeight)) & texHeightMask;
int texIndex = texY * curTexWidth + texX; int texIndex = texY * curTexWidth + texX;
fragColor = MultiplyColours(fragColor, curTexPixels[texIndex]); fragColor = MultiplyColours(fragColor, curTexPixels[texIndex]);
@ -407,24 +420,19 @@ static void DrawTriangle(Vector4 frag1, Vector4 frag2, Vector4 frag3,
} }
void DrawQuads(int startVertex, int verticesCount) { void DrawQuads(int startVertex, int verticesCount) {
Vector4 frag[4]; Vertex vertices[4];
Vector2 uv[4];
PackedCol color[4];
int j = startVertex; int j = startVertex;
// 4 vertices = 1 quad = 2 triangles // 4 vertices = 1 quad = 2 triangles
for (int i = 0; i < verticesCount / 4; i++, j += 4) for (int i = 0; i < verticesCount / 4; i++, j += 4)
{ {
TransformVertex(j + 0, &frag[0], &uv[0], &color[0]); TransformVertex(j + 0, &vertices[0]);
TransformVertex(j + 1, &frag[1], &uv[1], &color[1]); TransformVertex(j + 1, &vertices[1]);
TransformVertex(j + 2, &frag[2], &uv[2], &color[2]); TransformVertex(j + 2, &vertices[2]);
TransformVertex(j + 3, &frag[3], &uv[3], &color[3]); TransformVertex(j + 3, &vertices[3]);
DrawTriangle(frag[0], frag[1], frag[2], DrawTriangle(&vertices[0], &vertices[1], &vertices[2]);
uv[0], uv[1], uv[2], color[0]); DrawTriangle(&vertices[2], &vertices[3], &vertices[0]);
DrawTriangle(frag[2], frag[3], frag[0],
uv[2], uv[3], uv[0], color[2]);
} }
} }
@ -453,7 +461,7 @@ void Gfx_DrawIndexedTris_T2fC4b(int verticesCount, int startVertex) {
*#########################################################################################################################*/ *#########################################################################################################################*/
cc_result Gfx_TakeScreenshot(struct Stream* output) { cc_result Gfx_TakeScreenshot(struct Stream* output) {
struct Bitmap bmp; struct Bitmap bmp;
Bitmap_Init(bmp, width, height, colorBuffer); Bitmap_Init(bmp, fb_width, fb_height, colorBuffer);
return Png_Encode(&bmp, output, NULL, false, NULL); return Png_Encode(&bmp, output, NULL, false, NULL);
} }
@ -464,7 +472,7 @@ cc_bool Gfx_WarnIfNecessary(void) {
void Gfx_BeginFrame(void) { } void Gfx_BeginFrame(void) { }
void Gfx_EndFrame(void) { void Gfx_EndFrame(void) {
Rect2D r = { 0, 0, width, height }; Rect2D r = { 0, 0, fb_width, fb_height };
Window_DrawFramebuffer(r, &fb_bmp); Window_DrawFramebuffer(r, &fb_bmp);
} }
@ -476,17 +484,17 @@ void Gfx_SetFpsLimit(cc_bool vsync, float minFrameMs) {
void Gfx_OnWindowResize(void) { void Gfx_OnWindowResize(void) {
if (depthBuffer) DestroyBuffers(); if (depthBuffer) DestroyBuffers();
fb_bmp.width = width = Game.Width; fb_bmp.width = fb_width = Game.Width;
fb_bmp.height = height = Game.Height; fb_bmp.height = fb_height = Game.Height;
vp_hwidth = width / 2.0f; vp_hwidth = fb_width / 2.0f;
vp_hheight = height / 2.0f; vp_hheight = fb_height / 2.0f;
sc_maxX = width - 1; fb_maxX = fb_width - 1;
sc_maxY = height - 1; fb_maxY = fb_height - 1;
Window_AllocFramebuffer(&fb_bmp); Window_AllocFramebuffer(&fb_bmp);
depthBuffer = Mem_Alloc(width * height, 4, "depth buffer"); depthBuffer = Mem_Alloc(fb_width * fb_height, 4, "depth buffer");
colorBuffer = fb_bmp.scan0; colorBuffer = fb_bmp.scan0;
} }