mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-17 19:45:23 -04:00
PS2: Tidy up assembly
This commit is contained in:
parent
751e44d399
commit
e4cbe64172
0
misc/ps2/DrawColoured.S
Normal file
0
misc/ps2/DrawColoured.S
Normal file
@ -1,12 +1,3 @@
|
||||
# REGISTER USAGE
|
||||
# vf0 = hardware coded to (0,0,0,1)
|
||||
# vf1 = mvp.row1
|
||||
# vf2 = mvp.row2
|
||||
# vf3 = mvp.row3
|
||||
# vf4 = mvp.row4
|
||||
# vf5 = clipping scale adjustments to match guardbands
|
||||
# vf6 = viewport origin
|
||||
# vf7 = viewport scale
|
||||
# NOTE: vclipw.xyz takes 4 cycles to produce result, which must be accounted for
|
||||
|
||||
.macro FUNC name
|
||||
@ -19,98 +10,115 @@
|
||||
# (i.e. instruction after branches/jumps are always unconditionally executed)
|
||||
.set noreorder
|
||||
|
||||
# global registers
|
||||
#define V0001 $vf0 // hardware coded to (0,0,0,1)
|
||||
#define MVP1 $vf1 // mvp.row1
|
||||
#define MVP2 $vf2 // mvp.row2
|
||||
#define MVP3 $vf3 // mvp.row3
|
||||
#define MVP4 $vf4 // mvp.row4
|
||||
#define CL_F $vf5 // clipping scale adjustments to match guardbands
|
||||
#define VP_O $vf6 // viewport origin
|
||||
#define VP_S $vf7 // viewport scale
|
||||
|
||||
# transform temp registers
|
||||
#define POSCL $vf10 // TRANSFORMED(POS_[1234]) * CLIP_PLANES_ADJUST
|
||||
#define POS_1 $vf11 // vertex 1 position
|
||||
#define POS_2 $vf12 // vertex 2 position
|
||||
#define POS_3 $vf13 // vertex 3 position
|
||||
#define POS_4 $vf14 // vertex 4 position
|
||||
|
||||
|
||||
.align 4
|
||||
# Loads matrix into VU0 registers
|
||||
# $a0 = addresss of mvp
|
||||
FUNC LoadMvpMatrix
|
||||
lqc2 $vf1, 0x00($a0) # vf1 = mvp.row1
|
||||
lqc2 $vf2, 0x10($a0) # vf2 = mvp.row2
|
||||
lqc2 $vf3, 0x20($a0) # vf3 = mvp.row3
|
||||
lqc2 MVP1, 0x00($a0) # vf1 = mvp.row1
|
||||
lqc2 MVP2, 0x10($a0) # vf2 = mvp.row2
|
||||
lqc2 MVP3, 0x20($a0) # vf3 = mvp.row3
|
||||
jr $ra
|
||||
lqc2 $vf4, 0x30($a0) # vf4 = mvp.row4
|
||||
lqc2 MVP4, 0x30($a0) # vf4 = mvp.row4
|
||||
|
||||
# Loads clipping scaling factors into VU0 registers
|
||||
# $a0 = addresss of factors
|
||||
FUNC LoadClipScaleFactors
|
||||
jr $ra
|
||||
lqc2 $vf5, 0x00($a0) # vf5 = factors
|
||||
lqc2 CL_F, 0x00($a0)
|
||||
|
||||
# Loads viewport origin into VU0 registers
|
||||
# $a0 = addresss of origin
|
||||
FUNC LoadViewportOrigin
|
||||
jr $ra
|
||||
lqc2 $vf6, 0x00($a0) # vf6 = origin
|
||||
lqc2 VP_O, 0x00($a0)
|
||||
|
||||
# Loads viewport scale into VU0 registers
|
||||
# $a0 = addresss of scale
|
||||
FUNC LoadViewportScale
|
||||
jr $ra
|
||||
lqc2 $vf7, 0x00($a0) # vf7 = scale
|
||||
lqc2 VP_S, 0x00($a0)
|
||||
|
||||
|
||||
.macro TransformVertex1
|
||||
# LOAD VERTEX 1
|
||||
lqc2 $vf10, 0x00($a2) # IN = tmp
|
||||
lqc2 POS_1, 0x00($a2) # IN = tmp
|
||||
# TRANSFORM VERTEX 1
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf11, $vf3, $vf10 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf11, 0x00($a1) # dst[0] = TRANSFORMED(V0)
|
||||
vmul $vf10, $vf11, $vf5 # TMP = TRANSFORMED(V0) * CLIP_PLANES_ADJUST
|
||||
vmulaw $ACC, MVP4, V0001 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, MVP1, POS_1 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, MVP2, POS_1 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz POS_1, MVP3, POS_1 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 POS_1, 0x00($a1) # dst[0] = TRANSFORMED(V0)
|
||||
vmul POSCL, POS_1, CL_F # TMP = TRANSFORMED(V0) * CLIP_PLANES_ADJUST
|
||||
# BEGIN CLIP FLAGS CALCULATION VERTEX 1
|
||||
vclipw.xyz $vf10, $vf10 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
vclipw.xyz POSCL, POSCL # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
.endm
|
||||
|
||||
.macro TransformVertex2
|
||||
# LOAD VERTEX 2
|
||||
lqc2 $vf12, 0x00($a2) # IN = tmp
|
||||
lqc2 POS_2, 0x00($a2) # IN = tmp
|
||||
# TRANSFORM VERTEX 2
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf13, $vf3, $vf12 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf13, 0x10($a1) # dst[1] = TRANSFORMED(V1)
|
||||
vmul $vf12, $vf13, $vf5 # TMP = TRANSFORMED(V1) * CLIP_PLANES_ADJUST
|
||||
vmulaw $ACC, MVP4, V0001 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, MVP1, POS_2 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, MVP2, POS_2 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz POS_2, MVP3, POS_2 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 POS_2, 0x10($a1) # dst[1] = TRANSFORMED(V1)
|
||||
vmul POSCL, POS_2, CL_F # TMP = TRANSFORMED(V1) * CLIP_PLANES_ADJUST
|
||||
# STORE CLIP FLAGS VERTEX 1 RESULT
|
||||
cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
sw $t0,0x00($a3) # clip_flags[0] = t0
|
||||
cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
sw $t0,0x00($a3) # clip_flags[0] = t0
|
||||
# BEGIN CLIP FLAGS CALCULATION VERTEX 2
|
||||
vclipw.xyz $vf12, $vf12 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
vclipw.xyz POSCL, POSCL # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
.endm
|
||||
|
||||
.macro TransformVertex3
|
||||
# LOAD VERTEX 3
|
||||
lqc2 $vf14, 0x00($a2) # IN = tmp
|
||||
lqc2 POS_3, 0x00($a2) # IN = tmp
|
||||
# TRANSFORM VERTEX 3
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf15, $vf3, $vf14 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf15, 0x20($a1) # dst[2] = TRANSFORMED(V2)
|
||||
vmul $vf14, $vf15, $vf5 # TMP = TRANSFORMED(V2) * CLIP_PLANES_ADJUST
|
||||
vmulaw $ACC, MVP4, V0001 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, MVP1, POS_3 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, MVP2, POS_3 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz POS_3, MVP3, POS_3 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 POS_3, 0x20($a1) # dst[2] = TRANSFORMED(V2)
|
||||
vmul POSCL, POS_3, CL_F # TMP = TRANSFORMED(V2) * CLIP_PLANES_ADJUST
|
||||
# STORE CLIP FLAGS VERTEX 2 RESULT
|
||||
cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
sw $t0,0x04($a3) # clip_flags[1] = t0
|
||||
cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
sw $t0,0x04($a3) # clip_flags[1] = t0
|
||||
# BEGIN CLIP FLAGS CALCULATION VERTEX 3
|
||||
vclipw.xyz $vf14, $vf14 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
vclipw.xyz POSCL, POSCL # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
.endm
|
||||
|
||||
.macro TransformVertex4
|
||||
# LOAD VERTEX 4
|
||||
lqc2 $vf16, 0x00($a2) # IN = tmp
|
||||
lqc2 POS_4, 0x00($a2) # IN = tmp
|
||||
# TRANSFORM VERTEX 4
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf17, $vf3, $vf16 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
vmul $vf16, $vf17, $vf5 # TMP = TRANSFORMED(V3) * CLIP_PLANES_ADJUST
|
||||
vmulaw $ACC, MVP4, V0001 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, MVP1, POS_4 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, MVP2, POS_4 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz POS_4, MVP3, POS_4 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
vmul POSCL, POS_4, CL_F # TMP = TRANSFORMED(V3) * CLIP_PLANES_ADJUST
|
||||
# STORE CLIP FLAGS VERTEX 3 RESULT
|
||||
cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
sw $t0,0x08($a3) # clip_flags[2] = t0
|
||||
cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
sw $t0,0x08($a3) # clip_flags[2] = t0
|
||||
# BEGIN CLIP FLAGS CALCULATION VERTEX 4
|
||||
vclipw.xyz $vf16, $vf16 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
vclipw.xyz POSCL, POSCL # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
.endm
|
||||
|
||||
.macro TransformFinish
|
||||
@ -121,9 +129,9 @@ FUNC LoadViewportScale
|
||||
# dst[3] = V2
|
||||
# dst[4] = V3
|
||||
# dst[5] = V0
|
||||
sqc2 $vf15, 0x30($a1) # dst[3] = TRANSFORMED(V2)
|
||||
sqc2 $vf17, 0x40($a1) # dst[4] = TRANSFORMED(V3)
|
||||
sqc2 $vf11, 0x50($a1) # dst[5] = TRANSFORMED(V0)
|
||||
sqc2 POS_3, 0x30($a1) # dst[3] = TRANSFORMED(V2)
|
||||
sqc2 POS_4, 0x40($a1) # dst[4] = TRANSFORMED(V3)
|
||||
sqc2 POS_1, 0x50($a1) # dst[5] = TRANSFORMED(V0)
|
||||
vnop # adjust for delay
|
||||
|
||||
# STORE CLIP FLAGS 4 RESULT
|
||||
@ -211,8 +219,8 @@ FUNC TransformColouredQuad
|
||||
FUNC ViewportTransform
|
||||
lqc2 $vf16, 0x00($a0) # IN = src
|
||||
vmulw $vf17, $vf16, $vf16 # TMP = IN[xyzw] * IN.w (inverse W)
|
||||
vmul $vf18, $vf17, $vf7 # TMP = TMP * viewport_scale
|
||||
vadd $vf19, $vf18, $vf6 # TMP = TMP + viewport_origin
|
||||
vmul $vf18, $vf17, VP_S # TMP = TMP * viewport_scale
|
||||
vadd $vf19, $vf18, VP_O # TMP = TMP + viewport_origin
|
||||
vftoi0 $vf19, $vf19 # TMP = int(TMP)
|
||||
jr $ra
|
||||
sqc2 $vf19, 0x00($a1) # dst = TMP
|
||||
|
Loading…
x
Reference in New Issue
Block a user