mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-09 07:18:34 -04:00
N64: Use optimised async triangle drawing (down to 7.8 ms on RSP)
This commit is contained in:
parent
d547f6e0a5
commit
6658154f2d
@ -32,6 +32,8 @@
|
|||||||
#define V2_OFFSET 2 * SCREEN_VTX_SIZE
|
#define V2_OFFSET 2 * SCREEN_VTX_SIZE
|
||||||
#define V3_OFFSET 3 * SCREEN_VTX_SIZE
|
#define V3_OFFSET 3 * SCREEN_VTX_SIZE
|
||||||
|
|
||||||
|
#define MAX_TRI_CMD_SIZE 0xB0
|
||||||
|
|
||||||
.macro compressClipCodes
|
.macro compressClipCodes
|
||||||
andi t2, t0, 0x707 // Isolate X/Y/Z clipping flags
|
andi t2, t0, 0x707 // Isolate X/Y/Z clipping flags
|
||||||
srl t1, t2, 5 // Shift hi flags to be aligned next to lo flags
|
srl t1, t2, 5 // Shift hi flags to be aligned next to lo flags
|
||||||
@ -76,7 +78,12 @@ CLIP_CODE_FACTORS: .half 1, 1, GUARD_BAND_FACTOR, GUARD_BAND_FACTOR
|
|||||||
.bss
|
.bss
|
||||||
.align 3
|
.align 3
|
||||||
|
|
||||||
VERTEX_CACHE: .dcb.b SCREEN_VTX_SIZE * 4
|
VERTEX_CACHE: .ds.b SCREEN_VTX_SIZE * 4
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
// Enough for all 10 triangle commands in worse case quad clipped scenario
|
||||||
|
TRI_CMD_BUFFER: .ds.b (MAX_TRI_CMD_SIZE * 10)
|
||||||
|
|
||||||
|
|
||||||
.text
|
.text
|
||||||
|
|
||||||
@ -211,8 +218,10 @@ GL_CalcScreenSpace:
|
|||||||
ssv vcspos_f.w, SCREEN_VTX_W+2 ,dst
|
ssv vcspos_f.w, SCREEN_VTX_W+2 ,dst
|
||||||
ssv vinvw_i.w, SCREEN_VTX_INVW+0,dst
|
ssv vinvw_i.w, SCREEN_VTX_INVW+0,dst
|
||||||
ssv vinvw_f.w, SCREEN_VTX_INVW+2,dst
|
ssv vinvw_f.w, SCREEN_VTX_INVW+2,dst
|
||||||
|
|
||||||
|
li t0, 0x3F
|
||||||
jr ra
|
jr ra
|
||||||
sb zero, SCREEN_VTX_PADDING(dst)
|
sb t0, SCREEN_VTX_PADDING(dst)
|
||||||
|
|
||||||
#undef dst
|
#undef dst
|
||||||
#undef vcspos_f
|
#undef vcspos_f
|
||||||
@ -226,46 +235,6 @@ GL_CalcScreenSpace:
|
|||||||
|
|
||||||
.endfunc
|
.endfunc
|
||||||
|
|
||||||
################################################################
|
|
||||||
# GL_CalcClipCodes
|
|
||||||
#
|
|
||||||
# Args:
|
|
||||||
# s3 = Destination vertex address
|
|
||||||
# $v02 = Clip space position (fractional part)
|
|
||||||
# $v03 = Clip space position (integer part)
|
|
||||||
#
|
|
||||||
################################################################
|
|
||||||
.func GL_CalcClipCodes
|
|
||||||
GL_CalcClipCodes:
|
|
||||||
#define dst s3
|
|
||||||
#define vcspos_f $v02
|
|
||||||
#define vcspos_i $v03
|
|
||||||
#define vguard_f $v27
|
|
||||||
#define vguard_i $v28
|
|
||||||
#define v___ $v29
|
|
||||||
#define w e3
|
|
||||||
|
|
||||||
vmudn vguard_f, vcspos_f, vguardscale
|
|
||||||
vmadh vguard_i, vcspos_i, vguardscale
|
|
||||||
|
|
||||||
vch v___, vguard_i, vguard_i.w
|
|
||||||
vcl v___, vguard_f, vguard_f.w
|
|
||||||
|
|
||||||
cfc2 t0, COP2_CTRL_VCC
|
|
||||||
compressClipCodes
|
|
||||||
jr ra
|
|
||||||
sb t2, SCREEN_VTX_CLIP_CODE(dst)
|
|
||||||
|
|
||||||
#undef dst
|
|
||||||
#undef vcspos_i
|
|
||||||
#undef vcspos_f
|
|
||||||
#undef vguard_i
|
|
||||||
#undef vguard_f
|
|
||||||
#undef v___
|
|
||||||
#undef w
|
|
||||||
|
|
||||||
.endfunc
|
|
||||||
|
|
||||||
################################################################
|
################################################################
|
||||||
# GL_TnL
|
# GL_TnL
|
||||||
#
|
#
|
||||||
@ -321,20 +290,21 @@ GL_TnL:
|
|||||||
vmudn vscreenpos_f, vscreenpos_f, vviewscale
|
vmudn vscreenpos_f, vscreenpos_f, vviewscale
|
||||||
vmadh vscreenpos_i, vscreenpos_i, vviewscale
|
vmadh vscreenpos_i, vscreenpos_i, vviewscale
|
||||||
vadd vscreenpos_i, vviewoff
|
vadd vscreenpos_i, vviewoff
|
||||||
|
li t0, 0x3F
|
||||||
|
|
||||||
sdv vscreenpos_i.e0, SCREEN_VTX_X ,vtx1
|
sdv vscreenpos_i.e0, SCREEN_VTX_X ,vtx1
|
||||||
ssv vcspos_i.w, SCREEN_VTX_W+0 ,vtx1
|
ssv vcspos_i.w, SCREEN_VTX_W+0 ,vtx1
|
||||||
ssv vcspos_f.w, SCREEN_VTX_W+2 ,vtx1
|
ssv vcspos_f.w, SCREEN_VTX_W+2 ,vtx1
|
||||||
ssv vinvw_i.w, SCREEN_VTX_INVW+0,vtx1
|
ssv vinvw_i.w, SCREEN_VTX_INVW+0,vtx1
|
||||||
ssv vinvw_f.w, SCREEN_VTX_INVW+2,vtx1
|
ssv vinvw_f.w, SCREEN_VTX_INVW+2,vtx1
|
||||||
sb zero, SCREEN_VTX_PADDING(vtx1)
|
sb t0, SCREEN_VTX_PADDING(vtx1)
|
||||||
|
|
||||||
sdv vscreenpos_i.e4, SCREEN_VTX_X ,vtx2
|
sdv vscreenpos_i.e4, SCREEN_VTX_X ,vtx2
|
||||||
ssv vcspos_i.W, SCREEN_VTX_W+0 ,vtx2
|
ssv vcspos_i.W, SCREEN_VTX_W+0 ,vtx2
|
||||||
ssv vcspos_f.W, SCREEN_VTX_W+2 ,vtx2
|
ssv vcspos_f.W, SCREEN_VTX_W+2 ,vtx2
|
||||||
ssv vinvw_i.W, SCREEN_VTX_INVW+0,vtx2
|
ssv vinvw_i.W, SCREEN_VTX_INVW+0,vtx2
|
||||||
ssv vinvw_f.W, SCREEN_VTX_INVW+2,vtx2
|
ssv vinvw_f.W, SCREEN_VTX_INVW+2,vtx2
|
||||||
sb zero, SCREEN_VTX_PADDING(vtx2)
|
sb t0, SCREEN_VTX_PADDING(vtx2)
|
||||||
|
|
||||||
cfc2 t0, COP2_CTRL_VCC
|
cfc2 t0, COP2_CTRL_VCC
|
||||||
compressClipCodes
|
compressClipCodes
|
||||||
@ -608,6 +578,7 @@ GPUCmd_DrawQuad:
|
|||||||
jal GL_ClipTriangle
|
jal GL_ClipTriangle
|
||||||
move s2, zero
|
move s2, zero
|
||||||
|
|
||||||
|
li s3, %lo(TRI_CMD_BUFFER)
|
||||||
beqz v1, gl_draw_triangle_end
|
beqz v1, gl_draw_triangle_end
|
||||||
addi s2, -6
|
addi s2, -6
|
||||||
lhu s5, 0(s1)
|
lhu s5, 0(s1)
|
||||||
@ -622,17 +593,16 @@ gl_draw_single_triangle:
|
|||||||
addi vtx3, SCREEN_VTX_X
|
addi vtx3, SCREEN_VTX_X
|
||||||
|
|
||||||
lhu a0, %lo(GL_TRI_CMD)
|
lhu a0, %lo(GL_TRI_CMD)
|
||||||
|
jal RDPQ_Triangle_Send_Async
|
||||||
lh v0, %lo(GL_TRI_CULL)
|
lh v0, %lo(GL_TRI_CULL)
|
||||||
jal RDPQ_Triangle
|
|
||||||
li s3, %lo(RDPQ_CMD_STAGING)
|
|
||||||
|
|
||||||
jal RDPQ_Send
|
|
||||||
li s4, %lo(RDPQ_CMD_STAGING)
|
|
||||||
|
|
||||||
blt s1, s2, gl_draw_clipped_triangles_loop
|
blt s1, s2, gl_draw_clipped_triangles_loop
|
||||||
addi s1, 2
|
addi s1, 2
|
||||||
|
|
||||||
gl_draw_triangle_end:
|
gl_draw_triangle_end:
|
||||||
|
jal RDPQ_Triangle_Send_End
|
||||||
|
nop
|
||||||
|
|
||||||
j RSPQ_Loop
|
j RSPQ_Loop
|
||||||
nop
|
nop
|
||||||
.endfunc
|
.endfunc
|
||||||
@ -642,28 +612,26 @@ gl_draw_triangle_end:
|
|||||||
################################################################
|
################################################################
|
||||||
.func DrawQuadForRDP
|
.func DrawQuadForRDP
|
||||||
DrawQuadForRDP:
|
DrawQuadForRDP:
|
||||||
lhu a0, %lo(GL_TRI_CMD)
|
li s3, %lo(TRI_CMD_BUFFER)
|
||||||
lh v0, %lo(GL_TRI_CULL)
|
|
||||||
li a1, %lo(VERTEX_CACHE) + V0_OFFSET + SCREEN_VTX_X
|
li a1, %lo(VERTEX_CACHE) + V0_OFFSET + SCREEN_VTX_X
|
||||||
li a2, %lo(VERTEX_CACHE) + V1_OFFSET + SCREEN_VTX_X
|
li a2, %lo(VERTEX_CACHE) + V1_OFFSET + SCREEN_VTX_X
|
||||||
li a3, %lo(VERTEX_CACHE) + V2_OFFSET + SCREEN_VTX_X
|
li a3, %lo(VERTEX_CACHE) + V2_OFFSET + SCREEN_VTX_X
|
||||||
jal RDPQ_Triangle
|
|
||||||
li s3, %lo(RDPQ_CMD_STAGING)
|
|
||||||
|
|
||||||
jal RDPQ_Send
|
|
||||||
li s4, %lo(RDPQ_CMD_STAGING)
|
|
||||||
|
|
||||||
lhu a0, %lo(GL_TRI_CMD)
|
|
||||||
lh v0, %lo(GL_TRI_CULL)
|
lh v0, %lo(GL_TRI_CULL)
|
||||||
|
jal RDPQ_Triangle_Send_Async
|
||||||
|
lhu a0, %lo(GL_TRI_CMD)
|
||||||
|
|
||||||
li a1, %lo(VERTEX_CACHE) + V2_OFFSET + SCREEN_VTX_X
|
li a1, %lo(VERTEX_CACHE) + V2_OFFSET + SCREEN_VTX_X
|
||||||
li a2, %lo(VERTEX_CACHE) + V3_OFFSET + SCREEN_VTX_X
|
li a2, %lo(VERTEX_CACHE) + V3_OFFSET + SCREEN_VTX_X
|
||||||
li a3, %lo(VERTEX_CACHE) + V0_OFFSET + SCREEN_VTX_X
|
li a3, %lo(VERTEX_CACHE) + V0_OFFSET + SCREEN_VTX_X
|
||||||
jal RDPQ_Triangle
|
lh v0, %lo(GL_TRI_CULL)
|
||||||
li s3, %lo(RDPQ_CMD_STAGING)
|
jal RDPQ_Triangle_Send_Async
|
||||||
|
lhu a0, %lo(GL_TRI_CMD)
|
||||||
|
|
||||||
jal RDPQ_Send
|
jal RDPQ_Triangle_Send_End
|
||||||
li s4, %lo(RDPQ_CMD_STAGING)
|
nop
|
||||||
|
|
||||||
|
RDPQ_Triangle_Cull:
|
||||||
|
RDPQ_Triangle_Clip:
|
||||||
j RSPQ_Loop
|
j RSPQ_Loop
|
||||||
nop
|
nop
|
||||||
.endfunc
|
.endfunc
|
||||||
|
Loading…
x
Reference in New Issue
Block a user