diff --git a/misc/ps2/DrawColoured.S b/misc/ps2/DrawColoured.S new file mode 100644 index 000000000..e69de29bb diff --git a/misc/ps2/VertexTransform.S b/misc/ps2/VertexTransform.S index 68630fc1e..3cd6bd79f 100644 --- a/misc/ps2/VertexTransform.S +++ b/misc/ps2/VertexTransform.S @@ -1,12 +1,3 @@ -# REGISTER USAGE -# vf0 = hardware coded to (0,0,0,1) -# vf1 = mvp.row1 -# vf2 = mvp.row2 -# vf3 = mvp.row3 -# vf4 = mvp.row4 -# vf5 = clipping scale adjustments to match guardbands -# vf6 = viewport origin -# vf7 = viewport scale # NOTE: vclipw.xyz takes 4 cycles to produce result, which must be accounted for .macro FUNC name @@ -19,98 +10,115 @@ # (i.e. instruction after branches/jumps are always unconditionally executed) .set noreorder +# global registers + #define V0001 $vf0 // hardware coded to (0,0,0,1) + #define MVP1 $vf1 // mvp.row1 + #define MVP2 $vf2 // mvp.row2 + #define MVP3 $vf3 // mvp.row3 + #define MVP4 $vf4 // mvp.row4 + #define CL_F $vf5 // clipping scale adjustments to match guardbands + #define VP_O $vf6 // viewport origin + #define VP_S $vf7 // viewport scale + +# transform temp registers + #define POSCL $vf10 // TRANSFORMED(POS_[1234]) * CLIP_PLANES_ADJUST + #define POS_1 $vf11 // vertex 1 position + #define POS_2 $vf12 // vertex 2 position + #define POS_3 $vf13 // vertex 3 position + #define POS_4 $vf14 // vertex 4 position + .align 4 # Loads matrix into VU0 registers # $a0 = addresss of mvp FUNC LoadMvpMatrix - lqc2 $vf1, 0x00($a0) # vf1 = mvp.row1 - lqc2 $vf2, 0x10($a0) # vf2 = mvp.row2 - lqc2 $vf3, 0x20($a0) # vf3 = mvp.row3 + lqc2 MVP1, 0x00($a0) # vf1 = mvp.row1 + lqc2 MVP2, 0x10($a0) # vf2 = mvp.row2 + lqc2 MVP3, 0x20($a0) # vf3 = mvp.row3 jr $ra - lqc2 $vf4, 0x30($a0) # vf4 = mvp.row4 + lqc2 MVP4, 0x30($a0) # vf4 = mvp.row4 # Loads clipping scaling factors into VU0 registers # $a0 = addresss of factors FUNC LoadClipScaleFactors jr $ra - lqc2 $vf5, 0x00($a0) # vf5 = factors + lqc2 CL_F, 0x00($a0) # Loads viewport origin into VU0 registers # $a0 = addresss of origin FUNC LoadViewportOrigin jr $ra - lqc2 $vf6, 0x00($a0) # vf6 = origin + lqc2 VP_O, 0x00($a0) # Loads viewport scale into VU0 registers # $a0 = addresss of scale FUNC LoadViewportScale jr $ra - lqc2 $vf7, 0x00($a0) # vf7 = scale + lqc2 VP_S, 0x00($a0) .macro TransformVertex1 # LOAD VERTEX 1 - lqc2 $vf10, 0x00($a2) # IN = tmp + lqc2 POS_1, 0x00($a2) # IN = tmp # TRANSFORM VERTEX 1 - vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) - vmaddax $ACC, $vf1, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x - vmadday $ACC, $vf2, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y - vmaddz $vf11, $vf3, $vf10 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z - sqc2 $vf11, 0x00($a1) # dst[0] = TRANSFORMED(V0) - vmul $vf10, $vf11, $vf5 # TMP = TRANSFORMED(V0) * CLIP_PLANES_ADJUST + vmulaw $ACC, MVP4, V0001 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) + vmaddax $ACC, MVP1, POS_1 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x + vmadday $ACC, MVP2, POS_1 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y + vmaddz POS_1, MVP3, POS_1 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z + sqc2 POS_1, 0x00($a1) # dst[0] = TRANSFORMED(V0) + vmul POSCL, POS_1, CL_F # TMP = TRANSFORMED(V0) * CLIP_PLANES_ADJUST # BEGIN CLIP FLAGS CALCULATION VERTEX 1 - vclipw.xyz $vf10, $vf10 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) + vclipw.xyz POSCL, POSCL # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) .endm .macro TransformVertex2 # LOAD VERTEX 2 - lqc2 $vf12, 0x00($a2) # IN = tmp + lqc2 POS_2, 0x00($a2) # IN = tmp # TRANSFORM VERTEX 2 - vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) - vmaddax $ACC, $vf1, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x - vmadday $ACC, $vf2, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y - vmaddz $vf13, $vf3, $vf12 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z - sqc2 $vf13, 0x10($a1) # dst[1] = TRANSFORMED(V1) - vmul $vf12, $vf13, $vf5 # TMP = TRANSFORMED(V1) * CLIP_PLANES_ADJUST + vmulaw $ACC, MVP4, V0001 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) + vmaddax $ACC, MVP1, POS_2 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x + vmadday $ACC, MVP2, POS_2 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y + vmaddz POS_2, MVP3, POS_2 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z + sqc2 POS_2, 0x10($a1) # dst[1] = TRANSFORMED(V1) + vmul POSCL, POS_2, CL_F # TMP = TRANSFORMED(V1) * CLIP_PLANES_ADJUST # STORE CLIP FLAGS VERTEX 1 RESULT - cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] - sw $t0,0x00($a3) # clip_flags[0] = t0 + cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] + sw $t0,0x00($a3) # clip_flags[0] = t0 # BEGIN CLIP FLAGS CALCULATION VERTEX 2 - vclipw.xyz $vf12, $vf12 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) + vclipw.xyz POSCL, POSCL # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) .endm .macro TransformVertex3 # LOAD VERTEX 3 - lqc2 $vf14, 0x00($a2) # IN = tmp + lqc2 POS_3, 0x00($a2) # IN = tmp # TRANSFORM VERTEX 3 - vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) - vmaddax $ACC, $vf1, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x - vmadday $ACC, $vf2, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y - vmaddz $vf15, $vf3, $vf14 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z - sqc2 $vf15, 0x20($a1) # dst[2] = TRANSFORMED(V2) - vmul $vf14, $vf15, $vf5 # TMP = TRANSFORMED(V2) * CLIP_PLANES_ADJUST + vmulaw $ACC, MVP4, V0001 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) + vmaddax $ACC, MVP1, POS_3 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x + vmadday $ACC, MVP2, POS_3 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y + vmaddz POS_3, MVP3, POS_3 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z + sqc2 POS_3, 0x20($a1) # dst[2] = TRANSFORMED(V2) + vmul POSCL, POS_3, CL_F # TMP = TRANSFORMED(V2) * CLIP_PLANES_ADJUST # STORE CLIP FLAGS VERTEX 2 RESULT - cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] - sw $t0,0x04($a3) # clip_flags[1] = t0 + cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] + sw $t0,0x04($a3) # clip_flags[1] = t0 # BEGIN CLIP FLAGS CALCULATION VERTEX 3 - vclipw.xyz $vf14, $vf14 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) + vclipw.xyz POSCL, POSCL # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) .endm .macro TransformVertex4 # LOAD VERTEX 4 - lqc2 $vf16, 0x00($a2) # IN = tmp + lqc2 POS_4, 0x00($a2) # IN = tmp # TRANSFORM VERTEX 4 - vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) - vmaddax $ACC, $vf1, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x - vmadday $ACC, $vf2, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y - vmaddz $vf17, $vf3, $vf16 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z - vmul $vf16, $vf17, $vf5 # TMP = TRANSFORMED(V3) * CLIP_PLANES_ADJUST + vmulaw $ACC, MVP4, V0001 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1) + vmaddax $ACC, MVP1, POS_4 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x + vmadday $ACC, MVP2, POS_4 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y + vmaddz POS_4, MVP3, POS_4 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z + vmul POSCL, POS_4, CL_F # TMP = TRANSFORMED(V3) * CLIP_PLANES_ADJUST # STORE CLIP FLAGS VERTEX 3 RESULT - cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] - sw $t0,0x08($a3) # clip_flags[2] = t0 + cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS] + sw $t0,0x08($a3) # clip_flags[2] = t0 # BEGIN CLIP FLAGS CALCULATION VERTEX 4 - vclipw.xyz $vf16, $vf16 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) + vclipw.xyz POSCL, POSCL # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w)) .endm .macro TransformFinish @@ -121,9 +129,9 @@ FUNC LoadViewportScale # dst[3] = V2 # dst[4] = V3 # dst[5] = V0 - sqc2 $vf15, 0x30($a1) # dst[3] = TRANSFORMED(V2) - sqc2 $vf17, 0x40($a1) # dst[4] = TRANSFORMED(V3) - sqc2 $vf11, 0x50($a1) # dst[5] = TRANSFORMED(V0) + sqc2 POS_3, 0x30($a1) # dst[3] = TRANSFORMED(V2) + sqc2 POS_4, 0x40($a1) # dst[4] = TRANSFORMED(V3) + sqc2 POS_1, 0x50($a1) # dst[5] = TRANSFORMED(V0) vnop # adjust for delay # STORE CLIP FLAGS 4 RESULT @@ -211,8 +219,8 @@ FUNC TransformColouredQuad FUNC ViewportTransform lqc2 $vf16, 0x00($a0) # IN = src vmulw $vf17, $vf16, $vf16 # TMP = IN[xyzw] * IN.w (inverse W) - vmul $vf18, $vf17, $vf7 # TMP = TMP * viewport_scale - vadd $vf19, $vf18, $vf6 # TMP = TMP + viewport_origin + vmul $vf18, $vf17, VP_S # TMP = TMP * viewport_scale + vadd $vf19, $vf18, VP_O # TMP = TMP + viewport_origin vftoi0 $vf19, $vf19 # TMP = int(TMP) jr $ra sqc2 $vf19, 0x00($a1) # dst = TMP