diff --git a/misc/ps2/VertexTransform.S b/misc/ps2/VertexTransform.S index ca0add2d1..b24dd70f2 100644 --- a/misc/ps2/VertexTransform.S +++ b/misc/ps2/VertexTransform.S @@ -34,6 +34,50 @@ LoadClipScaleFactors: jr $ra nop + +.macro TransformVertex1 + lqc2 $vf10, 0x00($a2) # IN = tmp + vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) + vmaddax $ACC, $vf1, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x + vmadday $ACC, $vf2, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y + vmaddz $vf11, $vf3, $vf10 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z + sqc2 $vf11, 0x00($a1) # dst[0] = TRANSFORMED(V0) + vmul $vf10, $vf11, $vf5 # TMP = TRANSFORMED(V0) * CLIP_PLANES_ADJUST + vclipw.xyz $vf10, $vf10 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) +.endm + +.macro TransformVertex2 + lqc2 $vf12, 0x00($a2) # IN = tmp + vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) + vmaddax $ACC, $vf1, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x + vmadday $ACC, $vf2, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y + vmaddz $vf13, $vf3, $vf12 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z + sqc2 $vf13, 0x10($a1) # dst[1] = TRANSFORMED(V1) + vmul $vf12, $vf13, $vf5 # TMP = TRANSFORMED(V1) * CLIP_PLANES_ADJUST + vclipw.xyz $vf12, $vf12 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) +.endm + +.macro TransformVertex3 + lqc2 $vf14, 0x00($a2) # IN = tmp + vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) + vmaddax $ACC, $vf1, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x + vmadday $ACC, $vf2, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y + vmaddz $vf15, $vf3, $vf14 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z + sqc2 $vf15, 0x20($a1) # dst[2] = TRANSFORMED(V2) + vmul $vf14, $vf15, $vf5 # TMP = TRANSFORMED(V2) * CLIP_PLANES_ADJUST + vclipw.xyz $vf14, $vf14 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) +.endm + +.macro TransformVertex4 + lqc2 $vf16, 0x00($a2) # IN = tmp + vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) + vmaddax $ACC, $vf1, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x + vmadday $ACC, $vf2, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y + vmaddz $vf17, $vf3, $vf16 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z + vmul $vf16, $vf17, $vf5 # TMP = TRANSFORMED(V3) * CLIP_PLANES_ADJUST + vclipw.xyz $vf16, $vf16 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) +.endm + # Transforms 4 vertices with size of 24 bytes # $a0 = addresss of src vertices # $a1 = addresss of dst vertices @@ -49,69 +93,41 @@ TransformTexturedQuad: sd $t0,0x00($a2) # tmp.x,y = t0 lw $t0,0x08($a0) # t0 = src[0].z sw $t0,0x08($a2) # tmp.z = t0 - # TRANSFORM VERTEX 1 - lqc2 $vf10, 0x00($a2) # IN = tmp - vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) - vmaddax $ACC, $vf1, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x - vmadday $ACC, $vf2, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y - vmaddz $vf11, $vf3, $vf10 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z - sqc2 $vf11, 0x00($a1) # dst[0] = TRANSFORMED(V0) - #vmul $vf10, $vf11, $vf5 # TMP = TRANSFORMED(V0) * CLIP_PLANES_ADJUST - #vclipw.xyz $vf10, $vf10 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) + TransformVertex1 # LOAD VERTEX 2 ld $t0,0x18($a0) # t0 = src[1].x,y sd $t0,0x00($a2) # tmp.x,y = t0 lw $t0,0x20($a0) # t0 = src[1].z sw $t0,0x08($a2) # tmp.z = t0 - #cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] - #sw $t0,0x00($a3) # clip_flags[0] = t0 - + # STORE CLIP FLAGS 1 RESULT + cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] + sw $t0,0x00($a3) # clip_flags[0] = t0 # TRANSFORM VERTEX 2 - lqc2 $vf12, 0x00($a2) # IN = tmp - vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) - vmaddax $ACC, $vf1, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x - vmadday $ACC, $vf2, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y - vmaddz $vf13, $vf3, $vf12 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z - sqc2 $vf13, 0x10($a1) # dst[1] = TRANSFORMED(V1) - #vmul $vf12, $vf13, $vf5 # TMP = TRANSFORMED(V1) * CLIP_PLANES_ADJUST - #vclipw.xyz $vf12, $vf12 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) + TransformVertex2 # LOAD VERTEX 3 ld $t0,0x30($a0) # t0 = src[2].x,y sd $t0,0x00($a2) # tmp.x,y = t0 lw $t0,0x38($a0) # t0 = src[2].z sw $t0,0x08($a2) # tmp.z = t0 - #cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] - #sw $t0,0x04($a3) # clip_flags[1] = t0 - + # STORE CLIP FLAGS 2 RESULT + cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] + sw $t0,0x04($a3) # clip_flags[1] = t0 # TRANSFORM VERTEX 3 - lqc2 $vf14, 0x00($a2) # IN = tmp - vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) - vmaddax $ACC, $vf1, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x - vmadday $ACC, $vf2, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y - vmaddz $vf15, $vf3, $vf14 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z - sqc2 $vf15, 0x20($a1) # dst[2] = TRANSFORMED(V2) - #vmul $vf14, $vf15, $vf5 # TMP = TRANSFORMED(V2) * CLIP_PLANES_ADJUST - #vclipw.xyz $vf14, $vf14 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) + TransformVertex3 # LOAD VERTEX 4 ld $t0,0x48($a0) # t0 = src[3].x,y sd $t0,0x00($a2) # tmp.x,y = t0 lw $t0,0x50($a0) # t0 = src[3].z sw $t0,0x08($a2) # tmp.z = t0 - #cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] - #sw $t0,0x08($a3) # clip_flags[2] = t0 - + # STORE CLIP FLAGS 3 RESULT + cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] + sw $t0,0x08($a3) # clip_flags[2] = t0 # TRANSFORM VERTEX 4 - lqc2 $vf16, 0x00($a2) # IN = tmp - vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) - vmaddax $ACC, $vf1, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x - vmadday $ACC, $vf2, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y - vmaddz $vf17, $vf3, $vf16 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z - #vmul $vf16, $vf17, $vf5 # TMP = TRANSFORMED(V3) * CLIP_PLANES_ADJUST - #vclipw.xyz $vf16, $vf16 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) + TransformVertex4 # Desired output # dst[0] = V0 @@ -123,9 +139,11 @@ TransformTexturedQuad: sqc2 $vf15, 0x30($a1) # dst[3] = TRANSFORMED(V2) sqc2 $vf17, 0x40($a1) # dst[4] = TRANSFORMED(V3) sqc2 $vf11, 0x50($a1) # dst[5] = TRANSFORMED(V0) - #vnop # adjust for delay - #cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] - #sw $t0,0x0C($a3) # clip_flags[3] = t0 + vnop # adjust for delay + # STORE CLIP FLAGS 4 RESULT + cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] + sw $t0,0x0C($a3) # clip_flags[3] = t0 + jr $ra nop diff --git a/src/Graphics_PS2.c b/src/Graphics_PS2.c index b235340f5..2348a31ec 100644 --- a/src/Graphics_PS2.c +++ b/src/Graphics_PS2.c @@ -22,6 +22,14 @@ static float vp_hwidth, vp_hheight; static int vp_originX, vp_originY; static cc_bool stateDirty, formatDirty; +typedef struct Matrix VU0_MATRIX __attribute__((aligned(16))); +typedef struct Vec4 VU0_VECTOR __attribute__((aligned(16))); + +static VU0_MATRIX mvp; +static VU0_VECTOR clip_scale; +extern void LoadMvpMatrix(VU0_MATRIX* matrix); +extern void LoadClipScaleFactors(VU0_VECTOR* scale); + // double buffering static packet_t* packets[2]; static packet_t* current; @@ -431,11 +439,6 @@ void Gfx_DeleteDynamicVb(GfxResourceID* vb) { Gfx_DeleteVb(vb); } *---------------------------------------------------------Matrices--------------------------------------------------------* *#########################################################################################################################*/ static struct Matrix _view, _proj; -typedef struct Matrix VU0_MATRIX __attribute__((aligned(16))); -typedef struct Vec4 VU0_VECTOR __attribute__((aligned(16))); - -static VU0_MATRIX mvp; -extern void LoadMvpMatrix(VU0_MATRIX* matrix); void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) { if (type == MATRIX_VIEW) _view = *matrix; @@ -629,14 +632,12 @@ static void DrawTexturedTriangles(int verticesCount, int startVertex) { { TransformTexturedQuad(v, V, &tmp, clip); - //if (((clip[0] | clip[1] | clip[2]) & 0x3F) == 0) { - if (NotClipped(V[0]) && NotClipped(V[1]) && NotClipped(V[2])) { + if (((clip[0] | clip[1] | clip[2]) & 0x3F) == 0) { dw = DrawTexturedTriangle(dw, V, v + 0, v + 1, v + 2); numVerts += 3; } - //if (((clip[2] | clip[3] | clip[0]) & 0x3F) == 0) { - if (NotClipped(V[3]) && NotClipped(V[4]) && NotClipped(V[5])) { + if (((clip[2] | clip[3] | clip[0]) & 0x3F) == 0) { dw = DrawTexturedTriangle(dw, V + 3, v + 2, v + 3, v + 0); numVerts += 3; } @@ -710,6 +711,7 @@ static void DrawTriangles(int verticesCount, int startVertex) { q = dma_tag + 1; Platform_LogConst("Too much geometry!!!"); } + LoadClipScaleFactors(&clip_scale); while (verticesCount) { @@ -810,7 +812,6 @@ void Gfx_OnWindowResize(void) { Gfx_SetScissor( 0, 0, Game.Width, Game.Height); } -extern void LoadClipScaleFactors(VU0_VECTOR* scale); void Gfx_SetViewport(int x, int y, int w, int h) { vp_hwidth = w / 2; vp_hheight = h / 2; @@ -833,13 +834,13 @@ void Gfx_SetViewport(int x, int y, int w, int h) { // X/W * vp_hwidth <= 2048 -- clipping against guard band // X/W <= 2048 / vp_hwidth // X * vp_hwidth / 2048 <= W - VU0_VECTOR scale; - scale.x = vp_hwidth / 2048.0f; - scale.y = vp_hheight / 2048.0f; - scale.z = 1.0f; - scale.w = 1.0f; - LoadClipScaleFactors(&scale); + clip_scale.x = vp_hwidth / 2048.0f; + clip_scale.y = vp_hheight / 2048.0f; + clip_scale.z = 1.0f; + clip_scale.w = 1.0f; + + LoadClipScaleFactors(&clip_scale); } void Gfx_SetScissor(int x, int y, int w, int h) {