mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-10 16:03:15 -04:00
Dreamcast: Optimise vertex transform by a couple of cycles
This commit is contained in:
parent
74f3c424e8
commit
6153ff8c8a
@ -7,13 +7,10 @@ _DrawColouredQuads:
|
|||||||
! Setup
|
! Setup
|
||||||
fldi0 fr1 ! U = 0
|
fldi0 fr1 ! U = 0
|
||||||
fldi0 fr2 ! V = 0
|
fldi0 fr2 ! V = 0
|
||||||
mov r4,r3 ! r3 = src
|
TransformSetup
|
||||||
add #-32, r5 ! r5 -= sizeof(VERTEX)
|
|
||||||
ViewportTransformSetup _VP_COL_HWIDTH
|
ViewportTransformSetup _VP_COL_HWIDTH
|
||||||
|
|
||||||
.TRANSFORM_QUAD:
|
.TRANSFORM_QUAD:
|
||||||
mov.l CMD_COL_VERT, r1 ! r1 = GPU VERT command
|
|
||||||
|
|
||||||
LoadColouredVertex
|
LoadColouredVertex
|
||||||
ProcessVertex1
|
ProcessVertex1
|
||||||
|
|
||||||
@ -24,7 +21,7 @@ _DrawColouredQuads:
|
|||||||
ProcessVertex3
|
ProcessVertex3
|
||||||
|
|
||||||
LoadColouredVertex
|
LoadColouredVertex
|
||||||
ProcessVertex4 CMD_COL_EOS
|
ProcessVertex4
|
||||||
|
|
||||||
! CLIPFLAGS TESTING
|
! CLIPFLAGS TESTING
|
||||||
cmp/eq #0,r0 ! T = r0 == 0 (all points invisible)
|
cmp/eq #0,r0 ! T = r0 == 0 (all points invisible)
|
||||||
@ -44,13 +41,9 @@ _DrawColouredQuads:
|
|||||||
bf .TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD
|
bf .TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD
|
||||||
nop
|
nop
|
||||||
|
|
||||||
add #32, r5 ! r5 += sizeof(VERTEX)
|
TransformEnd
|
||||||
rts ! return after executing instruction in delay slot
|
|
||||||
mov r5,r0 ! r0 = r5
|
|
||||||
|
|
||||||
.align 2
|
.align 4
|
||||||
CMD_COL_VERT: .long 0xe0000000
|
|
||||||
CMD_COL_EOS: .long 0xf0000000
|
|
||||||
|
|
||||||
.global _VP_COL_HWIDTH
|
.global _VP_COL_HWIDTH
|
||||||
_VP_COL_HWIDTH: .long 0
|
_VP_COL_HWIDTH: .long 0
|
||||||
|
@ -5,13 +5,10 @@
|
|||||||
|
|
||||||
_DrawTexturedQuads:
|
_DrawTexturedQuads:
|
||||||
! Setup
|
! Setup
|
||||||
mov r4,r3 ! r3 = src
|
TransformSetup
|
||||||
add #-32, r5 ! r5 -= sizeof(VERTEX)
|
|
||||||
ViewportTransformSetup _VP_TEX_HWIDTH
|
ViewportTransformSetup _VP_TEX_HWIDTH
|
||||||
|
|
||||||
.TRANSFORM_QUAD:
|
.TRANSFORM_QUAD:
|
||||||
mov.l CMD_TEX_VERT, r1 ! r1 = GPU VERT command
|
|
||||||
|
|
||||||
LoadTexturedVertex
|
LoadTexturedVertex
|
||||||
ProcessVertex1
|
ProcessVertex1
|
||||||
|
|
||||||
@ -22,7 +19,7 @@ _DrawTexturedQuads:
|
|||||||
ProcessVertex3
|
ProcessVertex3
|
||||||
|
|
||||||
LoadTexturedVertex
|
LoadTexturedVertex
|
||||||
ProcessVertex4 CMD_TEX_EOS
|
ProcessVertex4
|
||||||
|
|
||||||
! CLIPFLAGS TESTING
|
! CLIPFLAGS TESTING
|
||||||
cmp/eq #0,r0 ! T = r0 == 0 (all points invisible)
|
cmp/eq #0,r0 ! T = r0 == 0 (all points invisible)
|
||||||
@ -42,13 +39,9 @@ _DrawTexturedQuads:
|
|||||||
bf .TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD
|
bf .TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD
|
||||||
nop
|
nop
|
||||||
|
|
||||||
add #32, r5 ! r5 += sizeof(VERTEX)
|
TransformEnd
|
||||||
rts ! return after executing instruction in delay slot
|
|
||||||
mov r5,r0 ! r0 = r5
|
|
||||||
|
|
||||||
.align 2
|
.align 4
|
||||||
CMD_TEX_VERT: .long 0xe0000000
|
|
||||||
CMD_TEX_EOS: .long 0xf0000000
|
|
||||||
|
|
||||||
.global _VP_TEX_HWIDTH
|
.global _VP_TEX_HWIDTH
|
||||||
_VP_TEX_HWIDTH: .long 0
|
_VP_TEX_HWIDTH: .long 0
|
||||||
|
@ -3,8 +3,8 @@
|
|||||||
! =========================================================
|
! =========================================================
|
||||||
! The SH4 can dual issue (i.e. parallel execution) two instructions
|
! The SH4 can dual issue (i.e. parallel execution) two instructions
|
||||||
! as long as the groups of the two instructions are different:
|
! as long as the groups of the two instructions are different:
|
||||||
! * LS - most APU and FPU register load/stores
|
! * LS - most ALU and FPU register load/stores
|
||||||
! * EX - most APU arithmetic instructions
|
! * EX - most ALU arithmetic instructions
|
||||||
! * MT - TST, CMP, NOP, MOV Rm,Rn
|
! * MT - TST, CMP, NOP, MOV Rm,Rn
|
||||||
! * FE - most FPU arithmetic instructions
|
! * FE - most FPU arithmetic instructions
|
||||||
! * CO - other instructions (NOTE: Cannot be exeucted in parallel)
|
! * CO - other instructions (NOTE: Cannot be exeucted in parallel)
|
||||||
@ -14,7 +14,6 @@
|
|||||||
! * Latency: Interval between the issue of an instruction and the generation of its result (completion)
|
! * Latency: Interval between the issue of an instruction and the generation of its result (completion)
|
||||||
! * Latency is also the interval between the execution of two instructions with an interdependent relationship.
|
! * Latency is also the interval between the execution of two instructions with an interdependent relationship.
|
||||||
! (although different cases may either increase or decrease Latency)
|
! (although different cases may either increase or decrease Latency)
|
||||||
!
|
|
||||||
|
|
||||||
|
|
||||||
! =========================================================
|
! =========================================================
|
||||||
@ -38,6 +37,8 @@
|
|||||||
!r5 = dst pointer ARG
|
!r5 = dst pointer ARG
|
||||||
!r6 = quads count ARG
|
!r6 = quads count ARG
|
||||||
!r7 = ?
|
!r7 = ?
|
||||||
|
!r10 = PVR_CMD_VERTEX
|
||||||
|
!r11 = PVR_CMD_VERTEX_EOL
|
||||||
|
|
||||||
!fr0 = temp
|
!fr0 = temp
|
||||||
!fr1 = u
|
!fr1 = u
|
||||||
@ -55,6 +56,34 @@
|
|||||||
!fv4 = XYZW
|
!fv4 = XYZW
|
||||||
|
|
||||||
|
|
||||||
|
! =========================================================
|
||||||
|
! ========================= TRANSFORM SETUP ===============
|
||||||
|
! =========================================================
|
||||||
|
.macro TransformSetup
|
||||||
|
mov r4,r3 ! MT, r3 = src
|
||||||
|
mov.l r10, @-r15 ! LS, push(r10)
|
||||||
|
add #-32, r5 ! EX, r5 -= sizeof(VERTEX)
|
||||||
|
mov.l r11, @-r15 ! LS, push(r11)
|
||||||
|
mov #0xE0, r10 ! EX, r10 = 0x00 00 00 E0
|
||||||
|
pref @r3 ! LS, PREFETCH r3 (first vertex)
|
||||||
|
shll16 r10 ! EX, r10 = 0x00 E0 00 00
|
||||||
|
shll8 r10 ! EX, r10 = 0xE0 00 00 00 (PVR_CMD_VERTEX)
|
||||||
|
mov #0xF0, r11 ! EX, r11 = 0x00 00 00 F0
|
||||||
|
shll16 r11 ! EX, r11 = 0x00 F0 00 00
|
||||||
|
shll8 r11 ! EX, r11 = 0xF0 00 00 00 (PVR_CMD_VERTEX_EOL)
|
||||||
|
nop ! MT, align to even boundary
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro TransformEnd
|
||||||
|
mov.l @r15+, r11 ! LS, pop(r11)
|
||||||
|
mov.l @r15+, r10 ! LS, pop(r10)
|
||||||
|
|
||||||
|
add #32, r5 ! EX, r5 += sizeof(VERTEX)
|
||||||
|
rts ! CO, return after executing instruction in delay slot
|
||||||
|
mov r5,r0 ! MT, r0 = r5
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
! =========================================================
|
! =========================================================
|
||||||
! ========================= VERTEX LOADING ================
|
! ========================= VERTEX LOADING ================
|
||||||
! =========================================================
|
! =========================================================
|
||||||
@ -108,7 +137,7 @@
|
|||||||
fmov.s fr6,@-r5 ! LS, dst->z = Z
|
fmov.s fr6,@-r5 ! LS, dst->z = Z
|
||||||
fmov.s fr5,@-r5 ! LS, dst->y = Y
|
fmov.s fr5,@-r5 ! LS, dst->y = Y
|
||||||
fmov.s fr4,@-r5 ! LS, dst->x = X
|
fmov.s fr4,@-r5 ! LS, dst->x = X
|
||||||
mov.l r1,@-r5 ! LS, dst->flags = CMD_VERT
|
mov.l r10,@-r5 ! LS, dst->flags = PVR_CMD_VERTEX
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro ProcessVertex2
|
.macro ProcessVertex2
|
||||||
@ -124,7 +153,7 @@
|
|||||||
fmov.s fr5,@-r5 ! LS, dst->y = Y
|
fmov.s fr5,@-r5 ! LS, dst->y = Y
|
||||||
or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 1)
|
or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 1)
|
||||||
fmov.s fr4,@-r5 ! LS, dst->x = X
|
fmov.s fr4,@-r5 ! LS, dst->x = X
|
||||||
mov.l r1,@-r5 ! LS, dst->flags = CMD_VERT
|
mov.l r10,@-r5 ! LS, dst->flags = PVR_CMD_VERTEX
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro ProcessVertex3
|
.macro ProcessVertex3
|
||||||
@ -140,11 +169,12 @@
|
|||||||
shll2 r2 ! EX, tmp = tmp << 2
|
shll2 r2 ! EX, tmp = tmp << 2
|
||||||
fmov.s fr4,@-r5 ! LS, dst->x = X
|
fmov.s fr4,@-r5 ! LS, dst->x = X
|
||||||
or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 2)
|
or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 2)
|
||||||
mov.l r1,@-r5 ! LS, dst->flags = CMD_VERT
|
mov.l r10,@-r5 ! LS, dst->flags = PVR_CMD_VERTEX
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro ProcessVertex4 eos_addr
|
.macro ProcessVertex4
|
||||||
fmov.s fr7,@-r5 ! LS, dst->w = W
|
fmov.s fr7,@-r5 ! LS, dst->w = W
|
||||||
|
or r11,r0 ! EX, CLIPFLAGS |= PVR_CMD_VERTEX_EOL
|
||||||
fmov.s fr3,@-r5 ! LS, dst->c = C
|
fmov.s fr3,@-r5 ! LS, dst->c = C
|
||||||
fneg fr7 ! LS, W = -W
|
fneg fr7 ! LS, W = -W
|
||||||
fmov.s fr2,@-r5 ! LS, dst->v = V
|
fmov.s fr2,@-r5 ! LS, dst->v = V
|
||||||
@ -154,12 +184,10 @@
|
|||||||
fmov.s fr6,@-r5 ! LS, dst->z = Z
|
fmov.s fr6,@-r5 ! LS, dst->z = Z
|
||||||
shll2 r2 ! EX, tmp = tmp << 2
|
shll2 r2 ! EX, tmp = tmp << 2
|
||||||
fmov.s fr5,@-r5 ! LS, dst->y = Y
|
fmov.s fr5,@-r5 ! LS, dst->y = Y
|
||||||
add r2,r2 ! EX, tmp = (tmp << 2) + (tmp << 2)
|
add r2,r2 ! EX, tmp = (tmp << 2) + (tmp << 2) (T << 3)
|
||||||
fmov.s fr4,@-r5 ! LS, dst->x = X
|
fmov.s fr4,@-r5 ! LS, dst->x = X
|
||||||
mov.l \eos_addr, r1 ! LS, r1 = GPU EOS command
|
|
||||||
or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 3)
|
or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 3)
|
||||||
or r0,r1 ! EX, r1 |= CLIPFLAGS
|
mov.l r0,@-r5 ! LS, dst->flags = PVR_CMD_VERTEX_EOL | CLIPFLAGS
|
||||||
mov.l r1,@-r5 ! LS, dst->flags = GPU EOS | CLIPFLAGS
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
@ -156,7 +156,7 @@ Although the regular linux compiliation flags will work fine, to take full advan
|
|||||||
|
|
||||||
## Compiling - macOS
|
## Compiling - macOS
|
||||||
|
|
||||||
```cc -fno-math-errno *.c interop_cocoa.m -o ClassiCube -framework Cocoa -framework OpenGL -framework IOKit -lobjc```
|
```cc -fno-math-errno *.c Window_cocoa.m -o ClassiCube -framework Cocoa -framework OpenGL -framework IOKit -lobjc```
|
||||||
|
|
||||||
Note: You may need to install Xcode before you can compile ClassiCube
|
Note: You may need to install Xcode before you can compile ClassiCube
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user