mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-12 17:17:09 -04:00
PS2: Optimise vertex clipping
This commit is contained in:
parent
bd223eb457
commit
a36ea8380c
@ -34,6 +34,50 @@ LoadClipScaleFactors:
|
||||
jr $ra
|
||||
nop
|
||||
|
||||
|
||||
.macro TransformVertex1
|
||||
lqc2 $vf10, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf11, $vf3, $vf10 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf11, 0x00($a1) # dst[0] = TRANSFORMED(V0)
|
||||
vmul $vf10, $vf11, $vf5 # TMP = TRANSFORMED(V0) * CLIP_PLANES_ADJUST
|
||||
vclipw.xyz $vf10, $vf10 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
.endm
|
||||
|
||||
.macro TransformVertex2
|
||||
lqc2 $vf12, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf13, $vf3, $vf12 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf13, 0x10($a1) # dst[1] = TRANSFORMED(V1)
|
||||
vmul $vf12, $vf13, $vf5 # TMP = TRANSFORMED(V1) * CLIP_PLANES_ADJUST
|
||||
vclipw.xyz $vf12, $vf12 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
.endm
|
||||
|
||||
.macro TransformVertex3
|
||||
lqc2 $vf14, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf15, $vf3, $vf14 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf15, 0x20($a1) # dst[2] = TRANSFORMED(V2)
|
||||
vmul $vf14, $vf15, $vf5 # TMP = TRANSFORMED(V2) * CLIP_PLANES_ADJUST
|
||||
vclipw.xyz $vf14, $vf14 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
.endm
|
||||
|
||||
.macro TransformVertex4
|
||||
lqc2 $vf16, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf17, $vf3, $vf16 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
vmul $vf16, $vf17, $vf5 # TMP = TRANSFORMED(V3) * CLIP_PLANES_ADJUST
|
||||
vclipw.xyz $vf16, $vf16 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
.endm
|
||||
|
||||
# Transforms 4 vertices with size of 24 bytes
|
||||
# $a0 = addresss of src vertices
|
||||
# $a1 = addresss of dst vertices
|
||||
@ -49,69 +93,41 @@ TransformTexturedQuad:
|
||||
sd $t0,0x00($a2) # tmp.x,y = t0
|
||||
lw $t0,0x08($a0) # t0 = src[0].z
|
||||
sw $t0,0x08($a2) # tmp.z = t0
|
||||
|
||||
# TRANSFORM VERTEX 1
|
||||
lqc2 $vf10, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf11, $vf3, $vf10 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf11, 0x00($a1) # dst[0] = TRANSFORMED(V0)
|
||||
#vmul $vf10, $vf11, $vf5 # TMP = TRANSFORMED(V0) * CLIP_PLANES_ADJUST
|
||||
#vclipw.xyz $vf10, $vf10 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
TransformVertex1
|
||||
|
||||
# LOAD VERTEX 2
|
||||
ld $t0,0x18($a0) # t0 = src[1].x,y
|
||||
sd $t0,0x00($a2) # tmp.x,y = t0
|
||||
lw $t0,0x20($a0) # t0 = src[1].z
|
||||
sw $t0,0x08($a2) # tmp.z = t0
|
||||
#cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
#sw $t0,0x00($a3) # clip_flags[0] = t0
|
||||
|
||||
# STORE CLIP FLAGS 1 RESULT
|
||||
cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
sw $t0,0x00($a3) # clip_flags[0] = t0
|
||||
# TRANSFORM VERTEX 2
|
||||
lqc2 $vf12, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf13, $vf3, $vf12 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf13, 0x10($a1) # dst[1] = TRANSFORMED(V1)
|
||||
#vmul $vf12, $vf13, $vf5 # TMP = TRANSFORMED(V1) * CLIP_PLANES_ADJUST
|
||||
#vclipw.xyz $vf12, $vf12 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
TransformVertex2
|
||||
|
||||
# LOAD VERTEX 3
|
||||
ld $t0,0x30($a0) # t0 = src[2].x,y
|
||||
sd $t0,0x00($a2) # tmp.x,y = t0
|
||||
lw $t0,0x38($a0) # t0 = src[2].z
|
||||
sw $t0,0x08($a2) # tmp.z = t0
|
||||
#cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
#sw $t0,0x04($a3) # clip_flags[1] = t0
|
||||
|
||||
# STORE CLIP FLAGS 2 RESULT
|
||||
cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
sw $t0,0x04($a3) # clip_flags[1] = t0
|
||||
# TRANSFORM VERTEX 3
|
||||
lqc2 $vf14, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf15, $vf3, $vf14 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf15, 0x20($a1) # dst[2] = TRANSFORMED(V2)
|
||||
#vmul $vf14, $vf15, $vf5 # TMP = TRANSFORMED(V2) * CLIP_PLANES_ADJUST
|
||||
#vclipw.xyz $vf14, $vf14 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
TransformVertex3
|
||||
|
||||
# LOAD VERTEX 4
|
||||
ld $t0,0x48($a0) # t0 = src[3].x,y
|
||||
sd $t0,0x00($a2) # tmp.x,y = t0
|
||||
lw $t0,0x50($a0) # t0 = src[3].z
|
||||
sw $t0,0x08($a2) # tmp.z = t0
|
||||
#cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
#sw $t0,0x08($a3) # clip_flags[2] = t0
|
||||
|
||||
# STORE CLIP FLAGS 3 RESULT
|
||||
cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
sw $t0,0x08($a3) # clip_flags[2] = t0
|
||||
# TRANSFORM VERTEX 4
|
||||
lqc2 $vf16, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf17, $vf3, $vf16 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
#vmul $vf16, $vf17, $vf5 # TMP = TRANSFORMED(V3) * CLIP_PLANES_ADJUST
|
||||
#vclipw.xyz $vf16, $vf16 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
TransformVertex4
|
||||
|
||||
# Desired output
|
||||
# dst[0] = V0
|
||||
@ -123,9 +139,11 @@ TransformTexturedQuad:
|
||||
sqc2 $vf15, 0x30($a1) # dst[3] = TRANSFORMED(V2)
|
||||
sqc2 $vf17, 0x40($a1) # dst[4] = TRANSFORMED(V3)
|
||||
sqc2 $vf11, 0x50($a1) # dst[5] = TRANSFORMED(V0)
|
||||
#vnop # adjust for delay
|
||||
#cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
#sw $t0,0x0C($a3) # clip_flags[3] = t0
|
||||
vnop # adjust for delay
|
||||
# STORE CLIP FLAGS 4 RESULT
|
||||
cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
sw $t0,0x0C($a3) # clip_flags[3] = t0
|
||||
|
||||
jr $ra
|
||||
nop
|
||||
|
||||
|
@ -22,6 +22,14 @@ static float vp_hwidth, vp_hheight;
|
||||
static int vp_originX, vp_originY;
|
||||
static cc_bool stateDirty, formatDirty;
|
||||
|
||||
typedef struct Matrix VU0_MATRIX __attribute__((aligned(16)));
|
||||
typedef struct Vec4 VU0_VECTOR __attribute__((aligned(16)));
|
||||
|
||||
static VU0_MATRIX mvp;
|
||||
static VU0_VECTOR clip_scale;
|
||||
extern void LoadMvpMatrix(VU0_MATRIX* matrix);
|
||||
extern void LoadClipScaleFactors(VU0_VECTOR* scale);
|
||||
|
||||
// double buffering
|
||||
static packet_t* packets[2];
|
||||
static packet_t* current;
|
||||
@ -431,11 +439,6 @@ void Gfx_DeleteDynamicVb(GfxResourceID* vb) { Gfx_DeleteVb(vb); }
|
||||
*---------------------------------------------------------Matrices--------------------------------------------------------*
|
||||
*#########################################################################################################################*/
|
||||
static struct Matrix _view, _proj;
|
||||
typedef struct Matrix VU0_MATRIX __attribute__((aligned(16)));
|
||||
typedef struct Vec4 VU0_VECTOR __attribute__((aligned(16)));
|
||||
|
||||
static VU0_MATRIX mvp;
|
||||
extern void LoadMvpMatrix(VU0_MATRIX* matrix);
|
||||
|
||||
void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) {
|
||||
if (type == MATRIX_VIEW) _view = *matrix;
|
||||
@ -629,14 +632,12 @@ static void DrawTexturedTriangles(int verticesCount, int startVertex) {
|
||||
{
|
||||
TransformTexturedQuad(v, V, &tmp, clip);
|
||||
|
||||
//if (((clip[0] | clip[1] | clip[2]) & 0x3F) == 0) {
|
||||
if (NotClipped(V[0]) && NotClipped(V[1]) && NotClipped(V[2])) {
|
||||
if (((clip[0] | clip[1] | clip[2]) & 0x3F) == 0) {
|
||||
dw = DrawTexturedTriangle(dw, V, v + 0, v + 1, v + 2);
|
||||
numVerts += 3;
|
||||
}
|
||||
|
||||
//if (((clip[2] | clip[3] | clip[0]) & 0x3F) == 0) {
|
||||
if (NotClipped(V[3]) && NotClipped(V[4]) && NotClipped(V[5])) {
|
||||
if (((clip[2] | clip[3] | clip[0]) & 0x3F) == 0) {
|
||||
dw = DrawTexturedTriangle(dw, V + 3, v + 2, v + 3, v + 0);
|
||||
numVerts += 3;
|
||||
}
|
||||
@ -710,6 +711,7 @@ static void DrawTriangles(int verticesCount, int startVertex) {
|
||||
q = dma_tag + 1;
|
||||
Platform_LogConst("Too much geometry!!!");
|
||||
}
|
||||
LoadClipScaleFactors(&clip_scale);
|
||||
|
||||
while (verticesCount)
|
||||
{
|
||||
@ -810,7 +812,6 @@ void Gfx_OnWindowResize(void) {
|
||||
Gfx_SetScissor( 0, 0, Game.Width, Game.Height);
|
||||
}
|
||||
|
||||
extern void LoadClipScaleFactors(VU0_VECTOR* scale);
|
||||
void Gfx_SetViewport(int x, int y, int w, int h) {
|
||||
vp_hwidth = w / 2;
|
||||
vp_hheight = h / 2;
|
||||
@ -833,13 +834,13 @@ void Gfx_SetViewport(int x, int y, int w, int h) {
|
||||
// X/W * vp_hwidth <= 2048 -- clipping against guard band
|
||||
// X/W <= 2048 / vp_hwidth
|
||||
// X * vp_hwidth / 2048 <= W
|
||||
VU0_VECTOR scale;
|
||||
scale.x = vp_hwidth / 2048.0f;
|
||||
scale.y = vp_hheight / 2048.0f;
|
||||
scale.z = 1.0f;
|
||||
scale.w = 1.0f;
|
||||
|
||||
LoadClipScaleFactors(&scale);
|
||||
clip_scale.x = vp_hwidth / 2048.0f;
|
||||
clip_scale.y = vp_hheight / 2048.0f;
|
||||
clip_scale.z = 1.0f;
|
||||
clip_scale.w = 1.0f;
|
||||
|
||||
LoadClipScaleFactors(&clip_scale);
|
||||
}
|
||||
|
||||
void Gfx_SetScissor(int x, int y, int w, int h) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user