PS1: Minorly optimise T&L to save a few cycles

This commit is contained in:
UnknownShadow200 2025-05-31 22:32:05 +10:00
parent 4286c2d0c1
commit 72eb16bc7b
2 changed files with 52 additions and 33 deletions

View File

@ -23,13 +23,16 @@
#define GTE_Exec_RTPS() __asm__ volatile ("nop; nop; cop2 0x00180001;")
// e.g. expands to "swc2 $14, 8(%0);"
#define GTE_Store_XY0(dst, off) __asm__ volatile ("swc2 $12, " #off "(%0);" :: "r" (dst) : "memory" )
#define GTE_Store_XY1(dst, off) __asm__ volatile ("swc2 $13, " #off "(%0);" :: "r" (dst) : "memory" )
#define GTE_Store_XY2(dst, off) __asm__ volatile ("swc2 $14, " #off "(%0);" :: "r" (dst) : "memory" )
#define GTE_Store_XY0(dst, off) __asm__ volatile ("swc2 $12, %1(%0);" :: "r" (dst), "i" (off) : "memory" )
#define GTE_Store_XY1(dst, off) __asm__ volatile ("swc2 $13, %1(%0);" :: "r" (dst), "i" (off) : "memory" )
#define GTE_Store_XY2(dst, off) __asm__ volatile ("swc2 $14, %1(%0);" :: "r" (dst), "i" (off) : "memory" )
#define GTE_Load_XYZ0(src) __asm__ volatile ("lwc2 $0, 0(%0);" "lwc2 $1, 4(%0);" :: "r" (src) : )
#define GTE_Load_XYZ1(src) __asm__ volatile ("lwc2 $2, 0(%0);" "lwc2 $3, 4(%0);" :: "r" (src) : )
#define GTE_Load_XYZ2(src) __asm__ volatile ("lwc2 $4, 0(%0);" "lwc2 $5, 4(%0);" :: "r" (src) : )
#define GTE_Load_XY0(src, off) __asm__ volatile ("lwc2 $0, 0 + %1(%0);" :: "r" (src), "i" (off) : )
#define GTE_Load__Z0(src, off) __asm__ volatile ("lwc2 $1, 4 + %1(%0);" :: "r" (src), "i" (off) : )
#define GTE_Load_XY1(src, off) __asm__ volatile ("lwc2 $2, 0 + %1(%0);" :: "r" (src), "i" (off) : )
#define GTE_Load__Z1(src, off) __asm__ volatile ("lwc2 $3, 4 + %1(%0);" :: "r" (src), "i" (off) : )
#define GTE_Load_XY2(src, off) __asm__ volatile ("lwc2 $4, 0 + %1(%0);" :: "r" (src), "i" (off) : )
#define GTE_Load__Z2(src, off) __asm__ volatile ("lwc2 $5, 4 + %1(%0);" :: "r" (src), "i" (off) : )
#define GTE_Load_RotMatrix(mat) __asm__ volatile ( \
"lw $t0, 0(%0);\n" \
@ -134,18 +137,18 @@ enum gp1_cmd_display_mode {
// === GP0 POLYGON COMMANDS ===
#define POLY_CODE_F4 (GP0_CMD_POLYGON | POLY_CMD_QUAD)
#define POLY_LEN_F4 5
struct PSX_POLY_F4 {
typedef struct psx_poly_F4 {
uint32_t tag;
uint32_t rgbc; // r0, g0, b0, code;
int16_t x0, y0;
int16_t x1, y1;
int16_t x2, y2;
int16_t x3, y3;
};
} psx_poly_F4;
#define POLY_CODE_FT4 (GP0_CMD_POLYGON | POLY_CMD_QUAD | POLY_CMD_TEXTURED)
#define POLY_LEN_FT4 9
struct PSX_POLY_FT4 {
typedef struct psx_poly_FT4 {
uint32_t tag;
uint32_t rgbc; // r0, g0, b0, code;
uint16_t x0, y0;
@ -160,4 +163,4 @@ struct PSX_POLY_FT4 {
int16_t x3, y3;
uint8_t u3, v3;
uint16_t pad1;
};
} psx_poly_FT4;

View File

@ -818,6 +818,9 @@ void Gfx_CalcPerspectiveMatrix(struct Matrix* matrix, float fov, float aspect, f
/*########################################################################################################################*
*---------------------------------------------------------Rendering-------------------------------------------------------*
*#########################################################################################################################*/
#define VERTEX_TEX_SIZE sizeof(struct PS1VertexTextured)
#define VERTEX_COL_SIZE sizeof(struct PS1VertexColoured)
void Gfx_SetVertexFormat(VertexFormat fmt) {
gfx_format = fmt;
gfx_stride = strideSizes[fmt];
@ -829,7 +832,7 @@ void Gfx_DrawVb_Lines(int verticesCount) {
static void DrawColouredQuads2D(int verticesCount, int startVertex) {
struct PS1VertexColoured* v = (struct PS1VertexColoured*)gfx_vertices + startVertex;
struct PSX_POLY_F4* poly = next_packet;
psx_poly_F4* poly = next_packet;
cc_uint8* max = next_packet_end - sizeof(*poly);
for (int i = 0; i < verticesCount; i += 4, v += 4)
@ -861,7 +864,7 @@ static void DrawTexturedQuads2D(int verticesCount, int startVertex) {
int uShift = curTex->u_shift, vShift = curTex->v_shift;
int tpage = curTex->tpage, clut = curTex->clut;
struct PSX_POLY_FT4* poly = next_packet;
psx_poly_FT4* poly = next_packet;
cc_uint8* max = next_packet_end - sizeof(*poly);
for (int i = 0; i < verticesCount; i += 4, v += 4)
@ -903,7 +906,7 @@ static void DrawColouredQuads3D(int verticesCount, int startVertex) {
struct PS1VertexColoured* v = (struct PS1VertexColoured*)gfx_vertices + startVertex;
uint32_t* ot = cur_buffer->ot;
struct PSX_POLY_F4* poly = next_packet;
psx_poly_F4* poly = next_packet;
cc_uint8* max = next_packet_end - sizeof(*poly);
for (int i = 0; i < verticesCount; i += 4, v += 4)
@ -914,9 +917,13 @@ static void DrawColouredQuads3D(int verticesCount, int startVertex) {
struct PS1VertexColoured* v3 = &v[3];
if ((cc_uint8*)poly > max) break;
GTE_Load_XYZ0(&v0->vx);
GTE_Load_XYZ1(&v1->vx);
GTE_Load_XYZ2(&v3->vx);
GTE_Load_XY0(v, 0 * VERTEX_COL_SIZE); // GTE_XY0 = v0->xy
GTE_Load__Z0(v, 0 * VERTEX_COL_SIZE); // GTE__Z0 = v0->z
GTE_Load_XY1(v, 1 * VERTEX_COL_SIZE); // GTE_XY1 = v1->xy
GTE_Load__Z1(v, 1 * VERTEX_COL_SIZE); // GTE__Z1 = v1->z
GTE_Load_XY2(v, 3 * VERTEX_COL_SIZE); // GTE_XY2 = v3->xy
GTE_Load__Z2(v, 3 * VERTEX_COL_SIZE); // GTE__Z2 = v3->z
GTE_Exec_RTPT(); // 23 cycles
setlen(poly, POLY_LEN_F4);
poly->rgbc = v0->rgbc;
@ -926,14 +933,16 @@ static void DrawColouredQuads3D(int verticesCount, int startVertex) {
int p; GTE_Get_OTZ(p);
if (p == 0 || (p >> 2) > OT_LENGTH) continue;
GTE_Store_XY0(poly, 8); // &poly->x0
GTE_Store_XY1(poly, 12); // &poly->x1
GTE_Store_XY2(poly, 16); // &poly->x2
GTE_Store_XY0(poly, offsetof(psx_poly_F4, x0));
GTE_Store_XY1(poly, offsetof(psx_poly_F4, x1));
GTE_Store_XY2(poly, offsetof(psx_poly_F4, x2));
GTE_Load_XY0(v, 2 * VERTEX_COL_SIZE); // GTE_XY2 = v2->xy
GTE_Load__Z0(v, 2 * VERTEX_COL_SIZE); // GTE__Z2 = v2->z
GTE_Load_XYZ0(&v2->vx);
GTE_Exec_RTPS(); // 15 cycles
addPrim(&ot[p >> 2], poly);
GTE_Store_XY2(poly, 20); // &poly->x3
GTE_Store_XY2(poly, offsetof(psx_poly_F4, x3));
poly++;
}
@ -948,9 +957,10 @@ static void DrawTexturedQuads3D(int verticesCount, int startVertex) {
int vShift = curTex->v_shift;
int tpage = curTex->tpage, clut = curTex->clut;
int bmode = blend_mode;
uint32_t* ot = cur_buffer->ot;
struct PSX_POLY_FT4* poly = next_packet;
psx_poly_FT4* poly = next_packet;
cc_uint8* max = next_packet_end - sizeof(*poly);
for (int i = 0; i < verticesCount; i += 4, v += 4)
@ -961,12 +971,17 @@ static void DrawTexturedQuads3D(int verticesCount, int startVertex) {
struct PS1VertexTextured* v3 = &v[3];
if ((cc_uint8*)poly > max) break;
GTE_Load_XYZ0(&v0->vx);
GTE_Load_XYZ1(&v1->vx);
GTE_Load_XYZ2(&v3->vx);
GTE_Load_XY0(v, 0 * VERTEX_TEX_SIZE); // GTE_XY0 = v0->xy
GTE_Load__Z0(v, 0 * VERTEX_TEX_SIZE); // GTE__Z0 = v0->z
GTE_Load_XY1(v, 1 * VERTEX_TEX_SIZE); // GTE_XY1 = v1->xy
GTE_Load__Z1(v, 1 * VERTEX_TEX_SIZE); // GTE__Z1 = v1->z
GTE_Load_XY2(v, 3 * VERTEX_TEX_SIZE); // GTE_XY2 = v3->xy
GTE_Load__Z2(v, 3 * VERTEX_TEX_SIZE); // GTE__Z2 = v3->z
GTE_Exec_RTPT(); // 23 cycles
setlen(poly, POLY_LEN_FT4);
poly->rgbc = v0->rgbc | blend_mode;
poly->rgbc = v0->rgbc | bmode;
poly->u0 = (v1->u >> uShift) + uOffset;
// Check for backface culling
GTE_Exec_NCLIP(); // 8 cycles
@ -980,16 +995,17 @@ static void DrawTexturedQuads3D(int verticesCount, int startVertex) {
int p; GTE_Get_OTZ(p);
if (p == 0 || (p >> 2) > OT_LENGTH) continue;
GTE_Store_XY0(poly, 8); // &poly->x0
GTE_Store_XY1(poly, 16); // &poly->x1
GTE_Store_XY2(poly, 24); // &poly->x2
GTE_Store_XY0(poly, offsetof(psx_poly_FT4, x0));
GTE_Store_XY1(poly, offsetof(psx_poly_FT4, x1));
GTE_Store_XY2(poly, offsetof(psx_poly_FT4, x2));
GTE_Load_XY0(v, 2 * VERTEX_TEX_SIZE); // GTE_XY2 = v2->xy
GTE_Load__Z0(v, 2 * VERTEX_TEX_SIZE); // GTE__Z2 = v2->z
GTE_Load_XYZ0(&v2->vx);
GTE_Exec_RTPS(); // 15 cycles
addPrim(&ot[p >> 2], poly);
GTE_Store_XY2(poly, 32); // &poly->x3
GTE_Store_XY2(poly, offsetof(psx_poly_FT4, x3));
poly->u0 = (v1->u >> uShift) + uOffset;
poly->v0 = (v1->v >> vShift) + vOffset;
poly->u1 = (v0->u >> uShift) + uOffset;
poly->v1 = (v0->v >> vShift) + vOffset;
@ -1053,7 +1069,7 @@ static void SendDrawCommands(RenderBuffer* buf) {
}
void Gfx_EndFrame(void) {
if ((cc_uint8*)next_packet >= next_packet_end - sizeof(struct PSX_POLY_FT4)) {
if ((cc_uint8*)next_packet >= next_packet_end - sizeof(psx_poly_FT4)) {
Platform_LogConst("OUT OF VERTEX RAM");
}
WaitUntilFinished();