diff --git a/misc/dreamcast/DrawColouredQuads.S b/misc/dreamcast/DrawColouredQuads.S index 074605a1d..44c37be0e 100644 --- a/misc/dreamcast/DrawColouredQuads.S +++ b/misc/dreamcast/DrawColouredQuads.S @@ -7,13 +7,10 @@ _DrawColouredQuads: ! Setup fldi0 fr1 ! U = 0 fldi0 fr2 ! V = 0 - mov r4,r3 ! r3 = src - add #-32, r5 ! r5 -= sizeof(VERTEX) + TransformSetup ViewportTransformSetup _VP_COL_HWIDTH .TRANSFORM_QUAD: - mov.l CMD_COL_VERT, r1 ! r1 = GPU VERT command - LoadColouredVertex ProcessVertex1 @@ -24,7 +21,7 @@ _DrawColouredQuads: ProcessVertex3 LoadColouredVertex - ProcessVertex4 CMD_COL_EOS + ProcessVertex4 ! CLIPFLAGS TESTING cmp/eq #0,r0 ! T = r0 == 0 (all points invisible) @@ -44,13 +41,9 @@ _DrawColouredQuads: bf .TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD nop - add #32, r5 ! r5 += sizeof(VERTEX) - rts ! return after executing instruction in delay slot - mov r5,r0 ! r0 = r5 + TransformEnd -.align 2 -CMD_COL_VERT: .long 0xe0000000 -CMD_COL_EOS: .long 0xf0000000 +.align 4 .global _VP_COL_HWIDTH _VP_COL_HWIDTH: .long 0 diff --git a/misc/dreamcast/DrawTexturedQuads.S b/misc/dreamcast/DrawTexturedQuads.S index 5ce16bd81..6de802512 100644 --- a/misc/dreamcast/DrawTexturedQuads.S +++ b/misc/dreamcast/DrawTexturedQuads.S @@ -5,13 +5,10 @@ _DrawTexturedQuads: ! Setup - mov r4,r3 ! r3 = src - add #-32, r5 ! r5 -= sizeof(VERTEX) + TransformSetup ViewportTransformSetup _VP_TEX_HWIDTH .TRANSFORM_QUAD: - mov.l CMD_TEX_VERT, r1 ! r1 = GPU VERT command - LoadTexturedVertex ProcessVertex1 @@ -22,7 +19,7 @@ _DrawTexturedQuads: ProcessVertex3 LoadTexturedVertex - ProcessVertex4 CMD_TEX_EOS + ProcessVertex4 ! CLIPFLAGS TESTING cmp/eq #0,r0 ! T = r0 == 0 (all points invisible) @@ -42,13 +39,9 @@ _DrawTexturedQuads: bf .TRANSFORM_QUAD ! if !T then goto TRANSFORM_QUAD nop - add #32, r5 ! r5 += sizeof(VERTEX) - rts ! return after executing instruction in delay slot - mov r5,r0 ! r0 = r5 + TransformEnd -.align 2 -CMD_TEX_VERT: .long 0xe0000000 -CMD_TEX_EOS: .long 0xf0000000 +.align 4 .global _VP_TEX_HWIDTH _VP_TEX_HWIDTH: .long 0 diff --git a/misc/dreamcast/ViewportTransform.S b/misc/dreamcast/ViewportTransform.S index 66db8263d..4804bcf13 100644 --- a/misc/dreamcast/ViewportTransform.S +++ b/misc/dreamcast/ViewportTransform.S @@ -3,8 +3,8 @@ ! ========================================================= ! The SH4 can dual issue (i.e. parallel execution) two instructions ! as long as the groups of the two instructions are different: -! * LS - most APU and FPU register load/stores -! * EX - most APU arithmetic instructions +! * LS - most ALU and FPU register load/stores +! * EX - most ALU arithmetic instructions ! * MT - TST, CMP, NOP, MOV Rm,Rn ! * FE - most FPU arithmetic instructions ! * CO - other instructions (NOTE: Cannot be exeucted in parallel) @@ -14,7 +14,6 @@ ! * Latency: Interval between the issue of an instruction and the generation of its result (completion) ! * Latency is also the interval between the execution of two instructions with an interdependent relationship. ! (although different cases may either increase or decrease Latency) -! ! ========================================================= @@ -38,6 +37,8 @@ !r5 = dst pointer ARG !r6 = quads count ARG !r7 = ? +!r10 = PVR_CMD_VERTEX +!r11 = PVR_CMD_VERTEX_EOL !fr0 = temp !fr1 = u @@ -55,6 +56,34 @@ !fv4 = XYZW +! ========================================================= +! ========================= TRANSFORM SETUP =============== +! ========================================================= +.macro TransformSetup + mov r4,r3 ! MT, r3 = src + mov.l r10, @-r15 ! LS, push(r10) + add #-32, r5 ! EX, r5 -= sizeof(VERTEX) + mov.l r11, @-r15 ! LS, push(r11) + mov #0xE0, r10 ! EX, r10 = 0x00 00 00 E0 + pref @r3 ! LS, PREFETCH r3 (first vertex) + shll16 r10 ! EX, r10 = 0x00 E0 00 00 + shll8 r10 ! EX, r10 = 0xE0 00 00 00 (PVR_CMD_VERTEX) + mov #0xF0, r11 ! EX, r11 = 0x00 00 00 F0 + shll16 r11 ! EX, r11 = 0x00 F0 00 00 + shll8 r11 ! EX, r11 = 0xF0 00 00 00 (PVR_CMD_VERTEX_EOL) + nop ! MT, align to even boundary +.endm + +.macro TransformEnd + mov.l @r15+, r11 ! LS, pop(r11) + mov.l @r15+, r10 ! LS, pop(r10) + + add #32, r5 ! EX, r5 += sizeof(VERTEX) + rts ! CO, return after executing instruction in delay slot + mov r5,r0 ! MT, r0 = r5 +.endm + + ! ========================================================= ! ========================= VERTEX LOADING ================ ! ========================================================= @@ -108,7 +137,7 @@ fmov.s fr6,@-r5 ! LS, dst->z = Z fmov.s fr5,@-r5 ! LS, dst->y = Y fmov.s fr4,@-r5 ! LS, dst->x = X - mov.l r1,@-r5 ! LS, dst->flags = CMD_VERT + mov.l r10,@-r5 ! LS, dst->flags = PVR_CMD_VERTEX .endm .macro ProcessVertex2 @@ -124,7 +153,7 @@ fmov.s fr5,@-r5 ! LS, dst->y = Y or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 1) fmov.s fr4,@-r5 ! LS, dst->x = X - mov.l r1,@-r5 ! LS, dst->flags = CMD_VERT + mov.l r10,@-r5 ! LS, dst->flags = PVR_CMD_VERTEX .endm .macro ProcessVertex3 @@ -140,11 +169,12 @@ shll2 r2 ! EX, tmp = tmp << 2 fmov.s fr4,@-r5 ! LS, dst->x = X or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 2) - mov.l r1,@-r5 ! LS, dst->flags = CMD_VERT + mov.l r10,@-r5 ! LS, dst->flags = PVR_CMD_VERTEX .endm -.macro ProcessVertex4 eos_addr +.macro ProcessVertex4 fmov.s fr7,@-r5 ! LS, dst->w = W + or r11,r0 ! EX, CLIPFLAGS |= PVR_CMD_VERTEX_EOL fmov.s fr3,@-r5 ! LS, dst->c = C fneg fr7 ! LS, W = -W fmov.s fr2,@-r5 ! LS, dst->v = V @@ -154,12 +184,10 @@ fmov.s fr6,@-r5 ! LS, dst->z = Z shll2 r2 ! EX, tmp = tmp << 2 fmov.s fr5,@-r5 ! LS, dst->y = Y - add r2,r2 ! EX, tmp = (tmp << 2) + (tmp << 2) + add r2,r2 ! EX, tmp = (tmp << 2) + (tmp << 2) (T << 3) fmov.s fr4,@-r5 ! LS, dst->x = X - mov.l \eos_addr, r1 ! LS, r1 = GPU EOS command or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 3) - or r0,r1 ! EX, r1 |= CLIPFLAGS - mov.l r1,@-r5 ! LS, dst->flags = GPU EOS | CLIPFLAGS + mov.l r0,@-r5 ! LS, dst->flags = PVR_CMD_VERTEX_EOL | CLIPFLAGS .endm diff --git a/readme.md b/readme.md index b356d740e..8af899d1e 100644 --- a/readme.md +++ b/readme.md @@ -156,7 +156,7 @@ Although the regular linux compiliation flags will work fine, to take full advan ## Compiling - macOS -```cc -fno-math-errno *.c interop_cocoa.m -o ClassiCube -framework Cocoa -framework OpenGL -framework IOKit -lobjc``` +```cc -fno-math-errno *.c Window_cocoa.m -o ClassiCube -framework Cocoa -framework OpenGL -framework IOKit -lobjc``` Note: You may need to install Xcode before you can compile ClassiCube