mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-11 08:36:38 -04:00
Dreamcast: Tidy up the assembly a bit
This commit is contained in:
parent
7f37bb8ddc
commit
e6d19d7ca1
2
.github/workflows/build_mac32.yml
vendored
2
.github/workflows/build_mac32.yml
vendored
@ -13,7 +13,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: ${{ inputs.WEBHOOK_URL != '' }}
|
||||
if: ${{ secrets.GHCR_ACCESS_KEY != '' }}
|
||||
runs-on: ubuntu-latest
|
||||
container:
|
||||
image: ghcr.io/classicube/minimal-osxcross:latest
|
||||
|
@ -1,27 +1,3 @@
|
||||
!r0 = clip flags
|
||||
!r1 = GPU command
|
||||
!r2 = temp
|
||||
!r3 = prefetch address
|
||||
!r4 = src pointer ARG
|
||||
!r5 = dst pointer ARG
|
||||
!r6 = quads count ARG
|
||||
!r7 = ?
|
||||
|
||||
!fr0 = temp
|
||||
!fr1 = u (0.0)
|
||||
!fr2 = v (0.0)
|
||||
!fr3 = c
|
||||
!fr4 = x
|
||||
!fr5 = y
|
||||
!fr6 = z
|
||||
!fr7 = w
|
||||
!fr8 = VIEWPORT_HWIDTH
|
||||
!fr9 = VIEWPORT_HHEIGHT
|
||||
!fr10 = VIEWPORT_X_PLUS_HWIDTH
|
||||
!fr11 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
|
||||
!fv4 = XYZW
|
||||
|
||||
#include "ViewportTransform.S"
|
||||
.global _DrawColouredQuads
|
||||
.align 4
|
||||
@ -90,4 +66,4 @@ _VP_COL_X_PLUS_HWIDTH: .long 0
|
||||
|
||||
.global _VP_COL_Y_PLUS_HHEIGHT
|
||||
.type _VP_COL_Y_PLUS_HHEIGHT,%object
|
||||
_VP_COL_Y_PLUS_HHEIGHT: .long 0
|
||||
_VP_COL_Y_PLUS_HHEIGHT: .long 0
|
||||
|
@ -1,27 +1,3 @@
|
||||
!r0 = clip flags
|
||||
!r1 = GPU command
|
||||
!r2 = temp
|
||||
!r3 = prefetch address
|
||||
!r4 = src pointer ARG
|
||||
!r5 = dst pointer ARG
|
||||
!r6 = quads count ARG
|
||||
!r7 = ?
|
||||
|
||||
!fr0 = temp
|
||||
!fr1 = u
|
||||
!fr2 = v
|
||||
!fr3 = c
|
||||
!fr4 = x
|
||||
!fr5 = y
|
||||
!fr6 = z
|
||||
!fr7 = w
|
||||
!fr8 = VIEWPORT_HWIDTH
|
||||
!fr9 = VIEWPORT_HHEIGHT
|
||||
!fr10 = VIEWPORT_X_PLUS_HWIDTH
|
||||
!fr11 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
|
||||
!fv4 = XYZW
|
||||
|
||||
#include "ViewportTransform.S"
|
||||
.global _DrawTexturedQuads
|
||||
.align 4
|
||||
@ -88,4 +64,4 @@ _VP_TEX_X_PLUS_HWIDTH: .long 0
|
||||
|
||||
.global _VP_TEX_Y_PLUS_HHEIGHT
|
||||
.type _VP_TEX_Y_PLUS_HHEIGHT,%object
|
||||
_VP_TEX_Y_PLUS_HHEIGHT: .long 0
|
||||
_VP_TEX_Y_PLUS_HHEIGHT: .long 0
|
||||
|
@ -1,108 +1,170 @@
|
||||
! =========================================================
|
||||
! ======================== PROCESSOR INFO =================
|
||||
! =========================================================
|
||||
! The SH4 can dual issue (i.e. parallel execution) instructions
|
||||
! as long as the groups of the two instructions are different:
|
||||
! * LS - most APU and FPU register load/stores
|
||||
! * EX - most APU arithmetic instructions
|
||||
! * MT - TST, CMP, NOP, MOV Rm,Rn
|
||||
! * FE - most FPU arithmetic instructions
|
||||
! * CO - other instructions
|
||||
|
||||
! Thee following general aspects of instructions are important to note per the SH4 manual:
|
||||
! * Issue rate: Interval between the issue of an instruction and that of the next instruction
|
||||
! * Latency: Interval between the issue of an instruction and the generation of its result (completion)
|
||||
! * Latency is also the interval between the execution of two instructions with an interdependent relationship.
|
||||
! (although different cases may either increase or decrease Latency)
|
||||
!
|
||||
! The instructions have the following latencies
|
||||
! * FADD.S/FMUL.S/FSUB.S/FMAC.S - 3/4
|
||||
! * FTRV - 5/8
|
||||
! * SHL/SHR - 1
|
||||
! * ADD/SUB/OR/XOR - 1
|
||||
|
||||
|
||||
! =========================================================
|
||||
! ======================== REGISTER USAGES ================
|
||||
! =========================================================
|
||||
! SH4 C ABI:
|
||||
! - R0 to R3 are return values (can be overwritten)
|
||||
! - R4 to R7 are input arguments (can be overwritten)
|
||||
! - R8 to R13 are non-volatile (must be restored at end)
|
||||
! - R14 is the frame pointer (must be restored at end)
|
||||
! - R15 is the stack pointer (must be restored at end)
|
||||
! - FR0 to FR3 are return values (can be overwritten)
|
||||
! - FR4 to FR11 are input arguments (can be overwritten)
|
||||
! - FR12 to FR13 are non-volatile (must be restored at end)
|
||||
|
||||
!r0 = clip flags
|
||||
!r1 = GPU command
|
||||
!r2 = temp
|
||||
!r3 = prefetch address
|
||||
!r4 = src pointer ARG
|
||||
!r5 = dst pointer ARG
|
||||
!r6 = quads count ARG
|
||||
!r7 = ?
|
||||
|
||||
!fr0 = temp
|
||||
!fr1 = u
|
||||
!fr2 = v
|
||||
!fr3 = c
|
||||
!fr4 = x
|
||||
!fr5 = y
|
||||
!fr6 = z
|
||||
!fr7 = w
|
||||
!fr8 = VIEWPORT_HWIDTH
|
||||
!fr9 = VIEWPORT_HHEIGHT
|
||||
!fr10 = VIEWPORT_X_PLUS_HWIDTH
|
||||
!fr11 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
|
||||
!fv4 = XYZW
|
||||
|
||||
|
||||
! =========================================================
|
||||
! ========================= VERTEX LOADING ================
|
||||
! =========================================================
|
||||
.macro LoadColouredVertex
|
||||
! PREPARE NEXT VERTEX
|
||||
add #16, r3 ! r3 += VERTEX_STRIDE
|
||||
pref @r3 ! PREFETCH r3 (next vertex)
|
||||
add #64, r5 ! r5 += 2 * sizeof(VERTEX)
|
||||
add #16, r3 ! EX, r3 += VERTEX_STRIDE
|
||||
pref @r3 ! LS, PREFETCH r3 (next vertex)
|
||||
add #64, r5 ! EX, r5 += 2 * sizeof(VERTEX)
|
||||
! LOAD XYZ
|
||||
fmov @r4+, fr4 ! X = src->x
|
||||
fmov @r4+, fr5 ! Y = src->y
|
||||
fmov @r4+, fr6 ! Z = src->z
|
||||
fldi1 fr7 ! W = 1.0
|
||||
fmov @r4+, fr4 ! LS, X = src->x
|
||||
fmov @r4+, fr5 ! LS, Y = src->y
|
||||
fmov @r4+, fr6 ! LS, Z = src->z
|
||||
fldi1 fr7 ! LS, W = 1.0
|
||||
! TRANSFORM VERTEX
|
||||
ftrv xmtrx, fv4 ! TRANSFORM(XYZW)
|
||||
ftrv xmtrx, fv4 ! FE, TRANSFORM(XYZW)
|
||||
! LOAD ATTRIBUTES
|
||||
fmov @r4+,fr3 ! C = src->color
|
||||
fmov @r4+,fr3 ! LS, C = src->color
|
||||
.endm
|
||||
|
||||
.macro LoadTexturedVertex
|
||||
! PREPARE NEXT VERTEX
|
||||
add #24, r3 ! r3 += VERTEX_STRIDE
|
||||
pref @r3 ! PREFETCH r3 (next vertex)
|
||||
add #64, r5 ! r5 += 2 * sizeof(VERTEX)
|
||||
add #24, r3 ! EX, r3 += VERTEX_STRIDE
|
||||
pref @r3 ! LS, PREFETCH r3 (next vertex)
|
||||
add #64, r5 ! EX, r5 += 2 * sizeof(VERTEX)
|
||||
! LOAD XYZ
|
||||
fmov @r4+, fr4 ! X = src->x
|
||||
fmov @r4+, fr5 ! Y = src->y
|
||||
fmov @r4+, fr6 ! Z = src->z
|
||||
fldi1 fr7 ! W = 1.0
|
||||
fmov @r4+, fr4 ! LS, X = src->x
|
||||
fmov @r4+, fr5 ! LS, Y = src->y
|
||||
fmov @r4+, fr6 ! LS, Z = src->z
|
||||
fldi1 fr7 ! LS, W = 1.0
|
||||
! TRANSFORM VERTEX
|
||||
ftrv xmtrx, fv4 ! TRANSFORM(XYZW)
|
||||
ftrv xmtrx, fv4 ! FE, TRANSFORM(XYZW)
|
||||
! LOAD ATTRIBUTES
|
||||
fmov @r4+,fr3 ! C = src->color
|
||||
fmov @r4+,fr1 ! U = src->u
|
||||
fmov @r4+,fr2 ! V = src->v
|
||||
fmov @r4+,fr3 ! LS, C = src->color
|
||||
fmov @r4+,fr1 ! LS, U = src->u
|
||||
fmov @r4+,fr2 ! LS, V = src->v
|
||||
.endm
|
||||
|
||||
! =========================================================
|
||||
! ========================= VERTEX OUTPUT =================
|
||||
! =========================================================
|
||||
! To take advantage of SH4 dual instruction processing, interleave
|
||||
! the clipflag calculation and vertex output instructions
|
||||
! To take advantage of SH4 dual instruction processing,
|
||||
! clipflag calculation and vertex output are interleaved
|
||||
.macro ProcessVertex1
|
||||
fmov.s fr7,@-r5 ! dst->w = W
|
||||
fmov.s fr3,@-r5 ! dst->c = C
|
||||
fneg fr7 ! W = -W
|
||||
fmov.s fr2,@-r5 ! dst->v = V
|
||||
fcmp/gt fr7,fr6 ! T = Z > W (i.e. Z > -W)
|
||||
fmov.s fr1,@-r5 ! dst->u = U
|
||||
movt r0 ! CLIPFLAGS = T
|
||||
fmov.s fr6,@-r5 ! dst->z = Z
|
||||
fmov.s fr5,@-r5 ! dst->y = Y
|
||||
fmov.s fr4,@-r5 ! dst->x = X
|
||||
mov.l r1,@-r5 ! dst->flags = CMD_VERT
|
||||
fmov.s fr7,@-r5 ! LS, dst->w = W
|
||||
fmov.s fr3,@-r5 ! LS, dst->c = C
|
||||
fneg fr7 ! LS, W = -W
|
||||
fmov.s fr2,@-r5 ! LS, dst->v = V
|
||||
fcmp/gt fr7,fr6 ! CO, T = Z > W (i.e. Z > -W)
|
||||
fmov.s fr1,@-r5 ! LS, dst->u = U
|
||||
movt r0 ! EX, CLIPFLAGS = T
|
||||
fmov.s fr6,@-r5 ! LS, dst->z = Z
|
||||
fmov.s fr5,@-r5 ! LS, dst->y = Y
|
||||
fmov.s fr4,@-r5 ! LS, dst->x = X
|
||||
mov.l r1,@-r5 ! LS, dst->flags = CMD_VERT
|
||||
.endm
|
||||
|
||||
.macro ProcessVertex2
|
||||
fmov.s fr7,@-r5 ! dst->w = W
|
||||
fmov.s fr3,@-r5 ! dst->c = C
|
||||
fneg fr7 ! W = -W
|
||||
fmov.s fr2,@-r5 ! dst->v = V
|
||||
fcmp/gt fr7,fr6 ! T = Z > W (i.e. Z > -W)
|
||||
fmov.s fr1,@-r5 ! dst->u = U
|
||||
movt r2 ! tmp = T
|
||||
fmov.s fr6,@-r5 ! dst->z = Z
|
||||
add r2,r2 ! tmp = tmp + tmp
|
||||
fmov.s fr5,@-r5 ! dst->y = Y
|
||||
or r2,r0 ! CLIPFLAGS |= tmp (T << 1)
|
||||
fmov.s fr4,@-r5 ! dst->x = X
|
||||
mov.l r1,@-r5 ! dst->flags = CMD_VERT
|
||||
fmov.s fr7,@-r5 ! LS, dst->w = W
|
||||
fmov.s fr3,@-r5 ! LS, dst->c = C
|
||||
fneg fr7 ! LS, W = -W
|
||||
fmov.s fr2,@-r5 ! LS, dst->v = V
|
||||
fcmp/gt fr7,fr6 ! CO, T = Z > W (i.e. Z > -W)
|
||||
fmov.s fr1,@-r5 ! LS, dst->u = U
|
||||
movt r2 ! EX, tmp = T
|
||||
fmov.s fr6,@-r5 ! LS, dst->z = Z
|
||||
add r2,r2 ! EX, tmp = tmp + tmp
|
||||
fmov.s fr5,@-r5 ! LS, dst->y = Y
|
||||
or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 1)
|
||||
fmov.s fr4,@-r5 ! LS, dst->x = X
|
||||
mov.l r1,@-r5 ! LS, dst->flags = CMD_VERT
|
||||
.endm
|
||||
|
||||
.macro ProcessVertex3
|
||||
fmov.s fr7,@-r5 ! dst->w = W
|
||||
fmov.s fr3,@-r5 ! dst->c = C
|
||||
fneg fr7 ! W = -W
|
||||
fmov.s fr2,@-r5 ! dst->v = V
|
||||
fcmp/gt fr7,fr6 ! T = Z > W (i.e. Z > -W)
|
||||
fmov.s fr1,@-r5 ! dst->u = U
|
||||
movt r2 ! tmp = T
|
||||
fmov.s fr6,@-r5 ! dst->z = Z
|
||||
fmov.s fr5,@-r5 ! dst->y = Y
|
||||
shll2 r2 ! tmp = tmp << 2
|
||||
fmov.s fr4,@-r5 ! dst->x = X
|
||||
or r2,r0 ! CLIPFLAGS |= tmp (T << 2)
|
||||
mov.l r1,@-r5 ! dst->flags = CMD_VERT
|
||||
fmov.s fr7,@-r5 ! LS, dst->w = W
|
||||
fmov.s fr3,@-r5 ! LS, dst->c = C
|
||||
fneg fr7 ! LS, W = -W
|
||||
fmov.s fr2,@-r5 ! LS, dst->v = V
|
||||
fcmp/gt fr7,fr6 ! CO, T = Z > W (i.e. Z > -W)
|
||||
fmov.s fr1,@-r5 ! LS, dst->u = U
|
||||
movt r2 ! EX, tmp = T
|
||||
fmov.s fr6,@-r5 ! LS, dst->z = Z
|
||||
fmov.s fr5,@-r5 ! LS, dst->y = Y
|
||||
shll2 r2 ! EX, tmp = tmp << 2
|
||||
fmov.s fr4,@-r5 ! LS, dst->x = X
|
||||
or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 2)
|
||||
mov.l r1,@-r5 ! LS, dst->flags = CMD_VERT
|
||||
.endm
|
||||
|
||||
.macro ProcessVertex4 eos_addr
|
||||
fmov.s fr7,@-r5 ! dst->w = W
|
||||
fmov.s fr3,@-r5 ! dst->c = C
|
||||
fneg fr7 ! W = -W
|
||||
fmov.s fr2,@-r5 ! dst->v = V
|
||||
fcmp/gt fr7,fr6 ! T = Z > W (i.e. Z > -W)
|
||||
fmov.s fr1,@-r5 ! dst->u = U
|
||||
movt r2 ! tmp = T
|
||||
fmov.s fr6,@-r5 ! dst->z = Z
|
||||
shll2 r2 ! tmp = tmp << 2
|
||||
fmov.s fr5,@-r5 ! dst->y = Y
|
||||
add r2,r2 ! tmp = (tmp << 2) + (tmp << 2)
|
||||
fmov.s fr4,@-r5 ! dst->x = X
|
||||
mov.l \eos_addr, r1 ! r1 = GPU EOS command
|
||||
or r2,r0 ! CLIPFLAGS |= tmp (T << 3)
|
||||
or r0,r1 ! r1 |= CLIPFLAGS
|
||||
mov.l r1,@-r5 ! dst->flags = GPU EOS | CLIPFLAGS
|
||||
fmov.s fr7,@-r5 ! LS, dst->w = W
|
||||
fmov.s fr3,@-r5 ! LS, dst->c = C
|
||||
fneg fr7 ! LS, W = -W
|
||||
fmov.s fr2,@-r5 ! LS, dst->v = V
|
||||
fcmp/gt fr7,fr6 ! CO, T = Z > W (i.e. Z > -W)
|
||||
fmov.s fr1,@-r5 ! LS, dst->u = U
|
||||
movt r2 ! EX, tmp = T
|
||||
fmov.s fr6,@-r5 ! LS, dst->z = Z
|
||||
shll2 r2 ! EX, tmp = tmp << 2
|
||||
fmov.s fr5,@-r5 ! LS, dst->y = Y
|
||||
add r2,r2 ! EX, tmp = (tmp << 2) + (tmp << 2)
|
||||
fmov.s fr4,@-r5 ! LS, dst->x = X
|
||||
mov.l \eos_addr, r1 ! LS, r1 = GPU EOS command
|
||||
or r2,r0 ! EX, CLIPFLAGS |= tmp (T << 3)
|
||||
or r0,r1 ! EX, r1 |= CLIPFLAGS
|
||||
mov.l r1,@-r5 ! LS, dst->flags = GPU EOS | CLIPFLAGS
|
||||
.endm
|
||||
|
||||
|
||||
@ -123,39 +185,39 @@
|
||||
!fr11 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
|
||||
.macro ViewportTransformSetup viewport_addr
|
||||
mova \viewport_addr, r0
|
||||
fmov.s @r0+,fr8 ! fr8 = VIEWPORT_HWIDTH
|
||||
fmov.s @r0+,fr9 ! fr9 = VIEWPORT_HHEIGHT
|
||||
fmov.s @r0+,fr10 ! fr10 = VIEWPORT_X_PLUS_HWIDTH
|
||||
fmov.s @r0+,fr11 ! fr11 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
nop ! align to even instructions
|
||||
mova \viewport_addr, r0 ! EX, r0 = &VIEWPORT
|
||||
fmov.s @r0+,fr8 ! LS, fr8 = VIEWPORT_HWIDTH
|
||||
fmov.s @r0+,fr9 ! LS, fr9 = VIEWPORT_HHEIGHT
|
||||
fmov.s @r0+,fr10 ! LS, fr10 = VIEWPORT_X_PLUS_HWIDTH
|
||||
fmov.s @r0+,fr11 ! LS, fr11 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
nop ! MT, align to even instructions
|
||||
.endm
|
||||
|
||||
.macro ViewportTransformVertex
|
||||
! INVERSE W CALCULATION
|
||||
add #28, r5 ! r5 = &vertex->w
|
||||
fmov.s @r5,fr0 ! fr0 = vertex->w
|
||||
fmul fr0,fr0 ! fr0 = fr0 * fr0
|
||||
add #-24, r5 ! r5 = &vertex->x
|
||||
fsrra fr0 ! fr0 = 1 / sqrt(fr0) -> 1 / vertex->w
|
||||
add #28, r5 ! EX, r5 = &vertex->w
|
||||
fmov.s @r5,fr0 ! LS, fr0 = vertex->w
|
||||
fmul fr0,fr0 ! FE, fr0 = fr0 * fr0
|
||||
add #-24, r5 ! EX, r5 = &vertex->x
|
||||
fsrra fr0 ! FE, fr0 = 1 / sqrt(fr0) -> 1 / vertex->w
|
||||
|
||||
! TRANSFORM X
|
||||
fmov.s @r5,fr4 ! fr4 = vertex->x
|
||||
fmov fr10,fr5 ! fr5 = VIEWPORT_X_PLUS_HWIDTH
|
||||
fmul fr8,fr4 ! fr4 = VIEWPORT_HWIDTH * vertex->x
|
||||
fmac fr0,fr4,fr5 ! fr5 = fr0 * fr4 + fr5 -- (X * F * hwidth) + x_plus_hwidth
|
||||
fmov.s fr5,@r5 ! vertex->x = fr5
|
||||
add #4, r5 ! r5 = &vertex->y
|
||||
fmov.s @r5,fr4 ! LS, fr4 = vertex->x
|
||||
fmov fr10,fr5 ! LS, fr5 = VIEWPORT_X_PLUS_HWIDTH
|
||||
fmul fr8,fr4 ! FE, fr4 = VIEWPORT_HWIDTH * vertex->x
|
||||
fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (X * F * hwidth) + x_plus_hwidth
|
||||
fmov.s fr5,@r5 ! LS, vertex->x = fr5
|
||||
add #4, r5 ! EX, r5 = &vertex->y
|
||||
|
||||
! TRANSFORM Y
|
||||
fmov.s @r5,fr4 ! fr4 = vertex->y
|
||||
fmov fr11,fr5 ! fr5 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
fmul fr9,fr4 ! fr4 = VIEWPORT_HHEIGHT * vertex->y
|
||||
fmac fr0,fr4,fr5 ! fr5 = fr0 * fr4 + fr5 -- (Y * F * hheight) + y_plus_hheight
|
||||
fmov.s fr5,@r5 ! vertex->y = fr5
|
||||
add #4, r5 ! r5 = &vertex->z
|
||||
fmov.s @r5,fr4 ! LS, fr4 = vertex->y
|
||||
fmov fr11,fr5 ! LS, fr5 = VIEWPORT_Y_PLUS_HHEIGHT
|
||||
fmul fr9,fr4 ! FE, fr4 = VIEWPORT_HHEIGHT * vertex->y
|
||||
fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (Y * F * hheight) + y_plus_hheight
|
||||
fmov.s fr5,@r5 ! LS, vertex->y = fr5
|
||||
add #4, r5 ! EX, r5 = &vertex->z
|
||||
|
||||
! ASSIGN Z
|
||||
fmov.s fr0,@r5 ! vertex->z = fr0
|
||||
add #20, r5 ! r5 += 20 (points to start of next vertex)
|
||||
.endm
|
||||
fmov.s fr0,@r5 ! LS, vertex->z = fr0
|
||||
add #20, r5 ! EX, r5 += 20 (points to start of next vertex)
|
||||
.endm
|
||||
|
Loading…
x
Reference in New Issue
Block a user