diff --git a/misc/ps2/VertexTransform.S b/misc/ps2/VertexTransform.S
index ef449335f..1bec93bfc 100644
--- a/misc/ps2/VertexTransform.S
+++ b/misc/ps2/VertexTransform.S
@@ -9,57 +9,44 @@
 # vf7 = viewport scale
 # NOTE: vclipw.xyz takes 4 cycles to produce result, which must be accounted for
 
+.macro FUNC name
+	.global \name
+	.type   \name,%function
+	\name:
+.endm
+
+# mips ISA has explicit delay slots
+# (i.e. instruction after branches/jumps are always unconditionally executed)
+.set noreorder
+
+
 .align 4
-
-.global LoadMvpMatrix
-.type   LoadMvpMatrix,%function
-.global LoadClipScaleFactors
-.type   LoadClipScaleFactors,%function
-.global LoadViewportOrigin
-.type   LoadViewportOrigin,%function
-.global LoadViewportScale
-.type   LoadViewportScale,%function
-
-.global TransformTexturedQuad
-.type   TransformTexturedQuad,%function
-.global TransformColouredQuad
-.type   TransformColouredQuad,%function
-.global ViewportTransform
-.type   ViewportTransform,%function
-
 # Loads matrix into VU0 registers
 #	$a0 = addresss of mvp
-LoadMvpMatrix:
+FUNC LoadMvpMatrix
 	lqc2 	$vf1, 0x00($a0) # vf1 = mvp.row1
 	lqc2 	$vf2, 0x10($a0) # vf2 = mvp.row2
 	lqc2 	$vf3, 0x20($a0) # vf3 = mvp.row3
-	lqc2 	$vf4, 0x30($a0) # vf4 = mvp.row4
 	jr		$ra
-	nop
-
+	lqc2 	$vf4, 0x30($a0) # vf4 = mvp.row4
 
 # Loads clipping scaling factors into VU0 registers
 #	$a0 = addresss of factors
-LoadClipScaleFactors:
+FUNC LoadClipScaleFactors
 	lqc2 	$vf5, 0x00($a0) # vf5 = factors
 	jr		$ra
-	nop
-
 
 # Loads viewport origin into VU0 registers
 #	$a0 = addresss of origin
-LoadViewportOrigin:
-	lqc2 	$vf6, 0x00($a0) # vf6 = origin
+FUNC LoadViewportOrigin
 	jr		$ra
-	nop
-
+	lqc2 	$vf6, 0x00($a0) # vf6 = origin
 
 # Loads viewport scale into VU0 registers
 #	$a0 = addresss of scale
-LoadViewportScale:
-	lqc2 	$vf7, 0x00($a0) # vf7 = scale
+FUNC LoadViewportScale
 	jr		$ra
-	nop
+	lqc2 	$vf7, 0x00($a0) # vf7 = scale
 
 
 .macro TransformVertex1
@@ -141,6 +128,7 @@ LoadViewportScale:
 
 	# STORE CLIP FLAGS 4 RESULT
 	cfc2	$t0, $18	  	   # t0 = VP0_REGS[CLIP_FLAGS]
+	jr		$ra
 	sw		$t0,0x0C($a3)      # clip_flags[3] = t0
 .endm
 
@@ -150,7 +138,7 @@ LoadViewportScale:
 #	$a1 = addresss of dst  vertices
 #   $a2 = address of  tmp  vertex
 #   $a3 = address of clip flags
-TransformTexturedQuad:
+FUNC TransformTexturedQuad
 	# LOAD 1.0 into W
 	lw		$t0,ONE_VALUE # t0 = 1.0f
 	sw		$t0,0x0C($a2) # tmp.w = f5
@@ -184,15 +172,13 @@ TransformTexturedQuad:
 	TransformVertex4
 
 	TransformFinish
-	jr		$ra
-	nop
 
 # Transforms 4 vertices with size of 16 bytes
 #	$a0 = addresss of src  vertices
 #	$a1 = addresss of dst  vertices
 #   $a2 = address of  tmp  vertex
 #   $a3 = address of clip flags
-TransformColouredQuad:
+FUNC TransformColouredQuad
 	# LOAD 1.0 into W
 	lw		$t0,ONE_VALUE # t0 = 1.0f
 	sw		$t0,0x0C($a2) # tmp.w = f5
@@ -226,8 +212,6 @@ TransformColouredQuad:
 	TransformVertex4
 
 	TransformFinish
-	jr		$ra
-	nop
 
 .global ONE_VALUE
 ONE_VALUE:  .float 1.0
@@ -235,12 +219,11 @@ ONE_VALUE:  .float 1.0
 
 #	$a0 = addresss of src
 #	$a1 = addresss of dst
-ViewportTransform:
+FUNC ViewportTransform
 	lqc2	$vf16, 0x00($a0)    # IN = src
 	vmulw	$vf17, $vf16, $vf16 # TMP = IN[xyzw] * IN.w (inverse W)
 	vmul	$vf18, $vf17, $vf7  # TMP = TMP * viewport_scale
 	vadd	$vf19, $vf18, $vf6  # TMP = TMP + viewport_origin
 	vftoi0  $vf19, $vf19	    # TMP = int(TMP)
-	sqc2	$vf19, 0x00($a1)    # dst = TMP
 	jr		$ra
-	nop
+	sqc2	$vf19, 0x00($a1)    # dst = TMP