Dreamcast: Optimise clipping slightly

This commit is contained in:
UnknownShadow200 2025-05-31 18:50:37 +10:00
parent 36785ad6db
commit ee2e521f5d
4 changed files with 140 additions and 160 deletions

View File

@ -33,7 +33,7 @@
!
! out->u = invt * v1->u + t * v2->u;
! out->v = invt * v1->v + t * v2->v;
! out->w = invt * v1->w + t * v2->w;
! out->w = zNear // invt * v1->w + t * v2->w;, always ends up being zNear
!
! out->b = invt * v1->b + t * v2->b;
! out->g = invt * v1->g + t * v2->g;
@ -61,7 +61,7 @@ _ClipEdge:
fmov.s @TM1,fr11 ! LS, fr11 = v2->z
fsub fr2,fr11 ! FE, fr11 = v2->z - v1->z
fldi0 fr8 ! LS, fr8 = 0
mov.l TYP,@OUT ! LS, OUT->flags = TYPE
mov.l TYP,@OUT ! LS, OUT->cmd = TYPE
fmul fr11,fr11 ! FE, fr11 = (v2->z - v1->z) * (v2->z * v1->z)
fldi0 fr9 ! LS, fr9 = 0
fldi0 fr0 ! LS, fr0 = 0
@ -74,117 +74,137 @@ _ClipEdge:
add #4, IN2 ! EX, v2 += 4
add #4, OUT ! EX, OUT += 4
fsub fr11,fr10 ! FE, fr10 = 1.0 - t --> invT
! Load X components
fmov.s @IN1+, fr2 ! LS, A1 = v1->x, v1 += 4
fmov.s @IN2+, fr3 ! LS, B1 = v2->x, v2 += 4
! Start interpolating X
fipr fv8, fv0 ! FE, LERP(A1, B1)
! Load Y components
fmov.s @IN1+, fr6 ! LS, A2 = v1->y, v1 += 4
fmov.s @IN2+, fr7 ! LS, B2 = v2->y, v2 += 4
fmov.s fr3,@OUT ! LS, OUT->x = LERP
! Load W
mov.l _NEAR_CLIP_W,TM1 ! tmp = zNear
lds TM1,fpul ! LS, FPUL = zNear
fsts fpul,fr2 ! LS, fr2 = FPUL
! Store interpolated X
fmul fr2,fr3 ! EX, fr7 = LERP * invW
fmov.s fr3,@OUT ! LS, OUT->x = LERP * invW
add #4, OUT ! EX, OUT += 4
! Start interpolating Y
fipr fv8, fv4 ! FE, LERP(A2, B2)
! Skip Z of input vertices
add #4, IN1 ! EX, v1 += 4
add #4, IN2 ! EX, v2 += 4
fmov.s fr7,@OUT ! LS, OUT->y = LERP
! Store interpolated Y
fmul fr2,fr7 ! EX, fr7 = LERP * invW
fmov.s fr7,@OUT ! LS, OUT->y = LERP * invW
add #4, OUT ! EX, OUT += 4
fmov.s fr1,@OUT ! LS, OUT->z = 0
! Store W
fmov.s fr2,@OUT ! LS, OUT->w = 1/zNear
add #4, OUT ! EX, OUT += 4
! Load U components
fmov.s @IN1+, fr2 ! LS, A1 = v1->u, v1 += 4
fmov.s @IN2+, fr3 ! LS, B1 = v2->u, v2 += 4
! Start interpolating U
fipr fv8, fv0 ! FE, LERP(A1, B1)
! Load V components
fmov.s @IN1+, fr6 ! LS, A2 = v1->v, v1 += 4
fmov.s @IN2+, fr7 ! LS, B2 = v2->v, v2 += 4
! Store interpolated U
fmov.s fr3,@OUT ! LS, OUT->u = LERP
add #4, OUT ! EX, OUT += 4
! Start interpolating V
fipr fv8, fv4 ! FE, LERP(A2, B2)
add #4, IN1 ! EX, v1 += 4
add #4, IN2 ! EX, v2 += 4
fmov.s @IN1,fr2 ! LS, A1 = v1->w
fmov.s @IN2,fr3 ! LS, B1 = v2->w
fmov.s fr7,@OUT ! LS, OUT->v = LERP
add #8, OUT ! EX, OUT += 8
fipr fv8, fv0 ! FE, LERP(A1, B1)
add #-4, IN1 ! EX, v1 -= 4
add #-4, IN2 ! EX, v2 -= 4
fmov.s fr3,@OUT ! LS, OUT->w = lerp
add #-4, OUT ! EX, OUT -= 4
! Load colours and check if equal
mov.l @IN1,CL1 ! LS, ACOLOR = v1->bgra
mov.l @IN2,CL2 ! LS, BCOLOR = v2->bgra
! Bypass RGBA interpolation if unnecessary
cmp/eq CL1,CL2 ! MT, T = ACOLOR == BCOLOR
! Store V
fmov.s fr7,@OUT ! LS, OUT->v = LERP
add #4, OUT ! EX, OUT += 4
! Bypass RGBA interpolation if unnecessary
bt.s 1f ! BR, if (T) goto 1;
mov CL1,CLO ! MT, OUTCOLOR = ACOLOR (branch delay instruction)
! Interpolate B
extu.b CL1,TM1 ! EX, val = ACOLOR.b
lds TM1,fpul ! CO, FPUL = val
float fpul,fr2 ! EX, fr2 = float(FPUL)
lds TM1,fpul ! LS, FPUL = val
float fpul,fr2 ! FE, fr2 = float(FPUL)
extu.b CL2,TM1 ! EX, val = BCOLOR.b
lds TM1,fpul ! CO, FPUL = val
float fpul,fr3 ! EX, fr3 = float(FPUL)
lds TM1,fpul ! LS, FPUL = val
float fpul,fr3 ! FE, fr3 = float(FPUL)
fipr fv8, fv0 ! FE, LERP(A1, B1)
shlr8 CL1 ! EX, ACOLOR >>= 8
ftrc fr3,fpul ! FE, FPUL = int(lerp)
shlr8 CL2 ! EX, BCOLOR >>= 8
sts fpul,TM2 ! CO, tmp = FPUL
ftrc fr3,fpul ! FE, FPUL = int(lerp)
sts fpul,TM2 ! LS, tmp = FPUL
! Interpolate G
extu.b CL1,TM1 ! EX, val = ACOLOR.g
lds TM1,fpul ! CO, FPUL = val
float fpul,fr2 ! EX, fr2 = float(FPUL)
lds TM1,fpul ! LS, FPUL = val
float fpul,fr2 ! FE, fr2 = float(FPUL)
extu.b CL2,TM1 ! EX, val = BCOLOR.g
lds TM1,fpul ! CO, FPUL = val
float fpul,fr3 ! EX, fr3 = float(FPUL)
lds TM1,fpul ! LS, FPUL = val
float fpul,fr3 ! FE, fr3 = float(FPUL)
fipr fv8, fv0 ! FE, LERP(A1, B1)
shlr8 CL1 ! EX, ACOLOR >>= 8
ftrc fr3,fpul ! FE, FPUL = int(lerp)
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
mov TM2,CLO ! MT, OUTCOLOR.b = tmp
shlr8 CL2 ! EX, BCOLOR >>= 8
sts fpul,TM2 ! CO, tmp = FPUL
ftrc fr3,fpul ! FE, FPUL = int(lerp)
sts fpul,TM2 ! LS, tmp = FPUL
! Interpolate R
extu.b CL1,TM1 ! EX, val = ACOLOR.r
lds TM1,fpul ! CO, FPUL = val
float fpul,fr2 ! EX, fr2 = float(FPUL)
lds TM1,fpul ! LS, FPUL = val
float fpul,fr2 ! FE, fr2 = float(FPUL)
extu.b CL2,TM1 ! EX, val = BCOLOR.r
lds TM1,fpul ! CO, FPUL = val
float fpul,fr3 ! EX, fr3 = float(FPUL)
lds TM1,fpul ! LS, FPUL = val
float fpul,fr3 ! FE, fr3 = float(FPUL)
fipr fv8, fv0 ! FE, LERP(A1, B1)
shlr8 CL1 ! EX, ACOLOR >>= 8
ftrc fr3,fpul ! FE, FPUL = int(lerp)
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
shll8 TM2 ! EX, tmp <<= 8
or TM2,CLO ! EX, OUTCOLOR.g |= tmp
shlr8 CL2 ! EX, BCOLOR >>= 8
sts fpul,TM2 ! CO, tmp = FPUL
ftrc fr3,fpul ! FE, FPUL = int(lerp)
sts fpul,TM2 ! LS, tmp = FPUL
! Interpolate A
extu.b CL1,TM1 ! EX, val = ACOLOR.a
lds TM1,fpul ! CO, FPUL = val
float fpul,fr2 ! EX, fr2 = float(FPUL)
lds TM1,fpul ! LS, FPUL = val
float fpul,fr2 ! FE, fr2 = float(FPUL)
extu.b CL2,TM1 ! EX, val = BCOLOR.a
lds TM1,fpul ! CO, FPUL = val
float fpul,fr3 ! EX, fr3 = float(FPUL)
lds TM1,fpul ! LS, FPUL = val
float fpul,fr3 ! FE, fr3 = float(FPUL)
fipr fv8, fv0 ! FE, LERP(A1, B1)
ftrc fr3,fpul ! FE, FPUL = int(lerp)
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
shll16 TM2 ! EX, tmp <<= 16
or TM2,CLO ! EX, OUTCOLOR.r |= tmp
sts fpul,TM2 ! CO, tmp = FPUL
ftrc fr3,fpul ! FE, FPUL = int(lerp)
sts fpul,TM2 ! LS, tmp = FPUL
extu.b TM2,TM2 ! EX, tmp = (uint8)tmp
shll16 TM2 ! EX, tmp <<= 16
shll8 TM2 ! EX, tmp <<= 8
or TM2,CLO ! EX, OUTCOLOR.a |= tmp
1:
rts ! CO, return after executing instruction in delay slot
mov.l CLO,@OUT ! LS, OUT->color = OUTCOLOR
add #-24, OUT ! EX, OUT += 8
rts ! CO, return after executing instruction in delay slot
pref @OUT ! LS, trigger store queue flush
.size _ClipEdge, .-_ClipEdge
.type _ClipEdge, %function
.align 4
_NEAR_CLIP_W:
.float 0
.global _NEAR_CLIP_W

View File

@ -361,8 +361,11 @@ void Gfx_CalcOrthoMatrix(struct Matrix* matrix, float width, float height, float
}
static float Cotangent(float x) { return Math_CosF(x) / Math_SinF(x); }
extern float NEAR_CLIP_W;
void Gfx_CalcPerspectiveMatrix(struct Matrix* matrix, float fov, float aspect, float zFar) {
float zNear = 0.1f;
NEAR_CLIP_W = 1.0f / zNear;
/* Source https://learn.microsoft.com/en-us/windows/win32/direct3d9/d3dxmatrixperspectivefovrh */
float c = Cotangent(0.5f * fov);

View File

@ -926,14 +926,14 @@ static void DrawColouredQuads3D(int verticesCount, int startVertex) {
int p; GTE_Get_OTZ(p);
if (p == 0 || (p >> 2) > OT_LENGTH) continue;
GTE_Store_XY0(&poly->x0, 0);
GTE_Store_XY1(&poly->x1, 0);
GTE_Store_XY2(&poly->x2, 0);
GTE_Store_XY0(poly, 8); // &poly->x0
GTE_Store_XY1(poly, 12); // &poly->x1
GTE_Store_XY2(poly, 16); // &poly->x2
GTE_Load_XYZ0(&v2->vx);
GTE_Exec_RTPS(); // 15 cycles
addPrim(&ot[p >> 2], poly);
GTE_Store_XY2(&poly->x3, 0);
GTE_Store_XY2(poly, 20); // &poly->x3
poly++;
}
@ -980,14 +980,14 @@ static void DrawTexturedQuads3D(int verticesCount, int startVertex) {
int p; GTE_Get_OTZ(p);
if (p == 0 || (p >> 2) > OT_LENGTH) continue;
GTE_Store_XY0(&poly->x0, 0);
GTE_Store_XY1(&poly->x1, 0);
GTE_Store_XY2(&poly->x2, 0);
GTE_Load_XYZ0(&v2->vx);
GTE_Store_XY0(poly, 8); // &poly->x0
GTE_Store_XY1(poly, 16); // &poly->x1
GTE_Store_XY2(poly, 24); // &poly->x2
GTE_Load_XYZ0(&v2->vx);
GTE_Exec_RTPS(); // 15 cycles
addPrim(&ot[p >> 2], poly);
GTE_Store_XY2(&poly->x3, 0);
GTE_Store_XY2(poly, 32); // &poly->x3
poly->u0 = (v1->u >> uShift) + uOffset;
poly->v0 = (v1->v >> vShift) + vOffset;

165
third_party/gldc/sh4.c vendored
View File

@ -48,7 +48,7 @@ static inline void PushCommand(Vertex* v) {
sq += 8;
}
extern void ClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout, int type);
extern void ClipEdge(const Vertex* const v1, const Vertex* const v2, volatile void* vout, int type);
#define V0_VIS (1 << 0)
#define V1_VIS (1 << 1)
@ -58,137 +58,106 @@ extern void ClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vou
// https://casual-effects.com/research/McGuire2011Clipping/clip.glsl
static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_t visible_mask) {
Vertex __attribute__((aligned(32))) scratch[2];
Vertex* a = &scratch[0];
Vertex* b = &scratch[1];
switch(visible_mask) {
case V0_VIS:
{
// v0
// / |
// / |
// .....A....B...
// / |
// v3--v2---v1
ClipEdge(v3, v0, a, PVR_CMD_VERTEX_EOL);
ClipEdge(v0, v1, b, PVR_CMD_VERTEX);
PushVertex(v0);
PushVertex(b);
PushVertex(a);
}
break;
ClipEdge(v0, v1, sq, PVR_CMD_VERTEX); // B
ClipEdge(v3, v0, sq, PVR_CMD_VERTEX_EOL); // A
break;
case V1_VIS:
{
// v1
// / |
// / |
// ....A.....B...
// / |
// v0--v3---v2
ClipEdge(v0, v1, a, PVR_CMD_VERTEX);
ClipEdge(v1, v2, b, PVR_CMD_VERTEX_EOL);
ClipEdge(v0, v1, sq, PVR_CMD_VERTEX); // A
PushVertex(v1); // v1
ClipEdge(v1, v2, sq, PVR_CMD_VERTEX_EOL); // B
break;
PushVertex(a);
PushVertex(v1);
PushVertex(b);
} break;
case V2_VIS:
{
// v2
// / |
// / |
// ....A.....B...
// / |
// v1--v0---v3
ClipEdge(v1, v2, a, PVR_CMD_VERTEX);
ClipEdge(v2, v3, b, PVR_CMD_VERTEX_EOL);
ClipEdge(v1, v2, sq, PVR_CMD_VERTEX); // A
PushVertex(v2); // v2
ClipEdge(v2, v3, sq, PVR_CMD_VERTEX_EOL); // B
break;
PushVertex(a);
PushVertex(v2);
PushVertex(b);
} break;
case V3_VIS:
{
// v3
// / |
// / |
// ....A.....B...
// / |
// v2--v1---v0
ClipEdge(v2, v3, a, PVR_CMD_VERTEX);
ClipEdge(v3, v0, b, PVR_CMD_VERTEX);
ClipEdge(v3, v0, sq, PVR_CMD_VERTEX); // B
ClipEdge(v2, v3, sq, PVR_CMD_VERTEX_EOL); // A
PushVertex(v3); // v3
break;
PushVertex(b);
PushVertex(a);
PushVertex(v3);
}
break;
case V0_VIS | V1_VIS:
{
// v0-----------v1
// \ |
// ....B..........A...
// \ |
// v3-----v2
ClipEdge(v1, v2, a, PVR_CMD_VERTEX);
ClipEdge(v3, v0, b, PVR_CMD_VERTEX_EOL);
PushVertex(v1); // v1
ClipEdge(v1, v2, sq, PVR_CMD_VERTEX); // A
PushVertex(v0); // v0
ClipEdge(v3, v0, sq, PVR_CMD_VERTEX_EOL); // B
break;
PushVertex(v1);
PushVertex(a);
PushVertex(v0);
PushVertex(b);
} break;
// case V0_VIS | V2_VIS: degenerate case that should never happen
case V0_VIS | V3_VIS:
{
// v3-----------v0
// \ |
// ....B..........A...
// \ |
// v2-----v1
ClipEdge(v0, v1, a, PVR_CMD_VERTEX);
ClipEdge(v2, v3, b, PVR_CMD_VERTEX);
ClipEdge(v0, v1, sq, PVR_CMD_VERTEX); // A
ClipEdge(v2, v3, sq, PVR_CMD_VERTEX); // B
PushVertex(v0); // v0
PushVertex(v3); // v3
break;
PushVertex(a);
PushVertex(b);
PushVertex(v0);
PushVertex(v3);
} break;
case V1_VIS | V2_VIS:
{
// v1-----------v2
// \ |
// ....B..........A...
// \ |
// v0-----v3
ClipEdge(v2, v3, a, PVR_CMD_VERTEX_EOL);
ClipEdge(v0, v1, b, PVR_CMD_VERTEX);
PushVertex(v1); // v1
PushVertex(v2); // v2
ClipEdge(v0, v1, sq, PVR_CMD_VERTEX); // B
ClipEdge(v2, v3, sq, PVR_CMD_VERTEX_EOL); // A
break;
PushVertex(v1);
PushVertex(v2);
PushVertex(b);
PushVertex(a);
} break;
// case V1_VIS | V3_VIS: degenerate case that should never happen
case V2_VIS | V3_VIS:
{
// v2-----------v3
// \ |
// ....B..........A...
// \ |
// v1-----v0
ClipEdge(v3, v0, a, PVR_CMD_VERTEX);
ClipEdge(v1, v2, b, PVR_CMD_VERTEX);
ClipEdge(v1, v2, sq, PVR_CMD_VERTEX); // B
PushVertex(v2); // v2
ClipEdge(v3, v0, sq, PVR_CMD_VERTEX); // A
PushVertex(v3); // v3
break;
PushVertex(b);
PushVertex(v2);
PushVertex(a);
PushVertex(v3);
} break;
case V0_VIS | V1_VIS | V2_VIS:
{
// --v1--
// v0-- --v2
// \ |
@ -196,17 +165,14 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
// \ |
// v3
// v1,v2,v0 v2,v0,A v0,A,B
ClipEdge(v2, v3, a, PVR_CMD_VERTEX);
ClipEdge(v3, v0, b, PVR_CMD_VERTEX_EOL);
PushVertex(v1); // v1
PushVertex(v2); // v2
PushVertex(v0); // v0
ClipEdge(v2, v3, sq, PVR_CMD_VERTEX); // A
ClipEdge(v3, v0, sq, PVR_CMD_VERTEX_EOL); // B
break;
PushVertex(v1);
PushVertex(v2);
PushVertex(v0);
PushVertex(a);
PushVertex(b);
} break;
case V0_VIS | V1_VIS | V3_VIS:
{
// --v0--
// v3-- --v1
// \ |
@ -214,18 +180,15 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
// \ |
// v2
// v0,v1,v3 v1,v3,A v3,A,B
ClipEdge(v1, v2, a, PVR_CMD_VERTEX);
ClipEdge(v2, v3, b, PVR_CMD_VERTEX_EOL);
v3->flags = PVR_CMD_VERTEX;
PushVertex(v0); // v0
PushVertex(v1); // v1
PushVertex(v3); // v3
ClipEdge(v1, v2, sq, PVR_CMD_VERTEX); // A
ClipEdge(v2, v3, sq, PVR_CMD_VERTEX_EOL); // B
break;
PushVertex(v0);
PushVertex(v1);
PushVertex(v3);
PushVertex(a);
PushVertex(b);
} break;
case V0_VIS | V2_VIS | V3_VIS:
{
// --v3--
// v2-- --v0
// \ |
@ -233,18 +196,15 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
// \ |
// v1
// v3,v0,v2 v0,v2,A v2,A,B
ClipEdge(v0, v1, a, PVR_CMD_VERTEX);
ClipEdge(v1, v2, b, PVR_CMD_VERTEX_EOL);
v3->flags = PVR_CMD_VERTEX;
PushVertex(v3); // v3
PushVertex(v0); // v0
PushVertex(v2); // v2
ClipEdge(v0, v1, sq, PVR_CMD_VERTEX); // A
ClipEdge(v1, v2, sq, PVR_CMD_VERTEX_EOL); // B
break;
PushVertex(v3);
PushVertex(v0);
PushVertex(v2);
PushVertex(a);
PushVertex(b);
} break;
case V1_VIS | V2_VIS | V3_VIS:
{
// --v2--
// v1-- --v3
// \ |
@ -252,16 +212,13 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
// \ |
// v0
// v2,v3,v1 v3,v1,A v1,A,B
ClipEdge(v3, v0, a, PVR_CMD_VERTEX);
ClipEdge(v0, v1, b, PVR_CMD_VERTEX_EOL);
v3->flags = PVR_CMD_VERTEX;
PushVertex(v2);
PushVertex(v3);
PushVertex(v1);
PushVertex(a);
PushVertex(b);
} break;
PushVertex(v2); // v2
PushVertex(v3); // v3
PushVertex(v1); // v1
ClipEdge(v3, v0, sq, PVR_CMD_VERTEX); // A
ClipEdge(v0, v1, sq, PVR_CMD_VERTEX_EOL); // B
break;
}
}