mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-07 14:23:29 -04:00
N64: Save a few cycles here and there (down to 7.5 ms)
This commit is contained in:
parent
21b4fe79aa
commit
06cb87f773
@ -166,7 +166,7 @@ static int F2I(float value, int scale) {
|
|||||||
e = (raw.i & FLT_EXPONENT_MASK) >> FLT_EXPONENT_SHIFT;
|
e = (raw.i & FLT_EXPONENT_MASK) >> FLT_EXPONENT_SHIFT;
|
||||||
|
|
||||||
// Ignore denormal, infinity, or large exponents
|
// Ignore denormal, infinity, or large exponents
|
||||||
if (e <= 0 || e >= 160) return 0;
|
if (e <= 0 || e >= 146) return 0;
|
||||||
|
|
||||||
return value * scale;
|
return value * scale;
|
||||||
}
|
}
|
||||||
|
@ -123,8 +123,9 @@ GPUCmd_PushRDP:
|
|||||||
|
|
||||||
.func GPUCmd_MatrixLoad
|
.func GPUCmd_MatrixLoad
|
||||||
GPUCmd_MatrixLoad:
|
GPUCmd_MatrixLoad:
|
||||||
#define src t0
|
#define src s6
|
||||||
#define dst t1
|
#define dst s7
|
||||||
|
|
||||||
#define vmat0_i $v02
|
#define vmat0_i $v02
|
||||||
#define vmat1_i $v03
|
#define vmat1_i $v03
|
||||||
#define vmat2_i $v04
|
#define vmat2_i $v04
|
||||||
@ -180,14 +181,14 @@ GPUCmd_MatrixLoad:
|
|||||||
# GL_CalcScreenSpace
|
# GL_CalcScreenSpace
|
||||||
#
|
#
|
||||||
# Args:
|
# Args:
|
||||||
# s3 = Destination vertex address
|
# a0 = Destination vertex address
|
||||||
# $v02 = Clip space position (fractional part)
|
# $v02 = Clip space position (fractional part)
|
||||||
# $v03 = Clip space position (integer part)
|
# $v03 = Clip space position (integer part)
|
||||||
#
|
#
|
||||||
################################################################
|
################################################################
|
||||||
.func GL_CalcScreenSpace
|
.func GL_CalcScreenSpace
|
||||||
GL_CalcScreenSpace:
|
GL_CalcScreenSpace:
|
||||||
#define dst s3
|
#define dst a0
|
||||||
#define vcspos_f $v02
|
#define vcspos_f $v02
|
||||||
#define vcspos_i $v03
|
#define vcspos_i $v03
|
||||||
#define vinvw_f $v23
|
#define vinvw_f $v23
|
||||||
@ -197,6 +198,13 @@ GL_CalcScreenSpace:
|
|||||||
#define v___ $v29
|
#define v___ $v29
|
||||||
#define w e3
|
#define w e3
|
||||||
|
|
||||||
|
ldv vcspos_i, SCREEN_VTX_CS_POSi, dst
|
||||||
|
ldv vcspos_f, SCREEN_VTX_CS_POSf, dst
|
||||||
|
|
||||||
|
li t0, %lo(GL_VIEWPORT_SCALE)
|
||||||
|
ldv vviewscale.e0, 0, t0
|
||||||
|
ldv vviewoff.e0, 8, t0
|
||||||
|
|
||||||
# Calculate 32-bit inverse W
|
# Calculate 32-bit inverse W
|
||||||
# TODO: NR?
|
# TODO: NR?
|
||||||
vrcph vinvw_i.w, vcspos_i.w
|
vrcph vinvw_i.w, vcspos_i.w
|
||||||
@ -208,17 +216,17 @@ GL_CalcScreenSpace:
|
|||||||
vmadn vscreenpos_f, vcspos_f, vinvw_i.w
|
vmadn vscreenpos_f, vcspos_f, vinvw_i.w
|
||||||
vmadh vscreenpos_i, vcspos_i, vinvw_i.w
|
vmadh vscreenpos_i, vcspos_i, vinvw_i.w
|
||||||
|
|
||||||
vmudn vscreenpos_f, vscreenpos_f, vviewscale
|
li t0, 0x3F
|
||||||
vmadh vscreenpos_i, vscreenpos_i, vviewscale
|
vmudn v___, vscreenpos_f, vviewscale
|
||||||
vadd vscreenpos_i, vviewoff
|
vmadh v___, vscreenpos_i, vviewscale
|
||||||
|
vmadh vscreenpos_i, vviewoff, K1
|
||||||
|
|
||||||
sdv vscreenpos_i, SCREEN_VTX_X ,dst
|
|
||||||
ssv vcspos_i.w, SCREEN_VTX_W+0 ,dst
|
ssv vcspos_i.w, SCREEN_VTX_W+0 ,dst
|
||||||
ssv vcspos_f.w, SCREEN_VTX_W+2 ,dst
|
ssv vcspos_f.w, SCREEN_VTX_W+2 ,dst
|
||||||
ssv vinvw_i.w, SCREEN_VTX_INVW+0,dst
|
ssv vinvw_i.w, SCREEN_VTX_INVW+0,dst
|
||||||
ssv vinvw_f.w, SCREEN_VTX_INVW+2,dst
|
ssv vinvw_f.w, SCREEN_VTX_INVW+2,dst
|
||||||
|
sdv vscreenpos_i, SCREEN_VTX_X ,dst
|
||||||
|
|
||||||
li t0, 0x3F
|
|
||||||
jr ra
|
jr ra
|
||||||
sb t0, SCREEN_VTX_PADDING(dst)
|
sb t0, SCREEN_VTX_PADDING(dst)
|
||||||
|
|
||||||
@ -238,14 +246,14 @@ GL_CalcScreenSpace:
|
|||||||
# GL_TnL
|
# GL_TnL
|
||||||
#
|
#
|
||||||
# Args:
|
# Args:
|
||||||
# a1 = address of the vertex in DMEM (usually within VERTEX_CACHE)
|
|
||||||
# a2 = address of the vertex in DMEM (usually within VERTEX_CACHE)
|
# a2 = address of the vertex in DMEM (usually within VERTEX_CACHE)
|
||||||
|
# a3 = address of the vertex in DMEM (usually within VERTEX_CACHE)
|
||||||
#
|
#
|
||||||
################################################################
|
################################################################
|
||||||
.func GL_TnL
|
.func GL_TnL
|
||||||
GL_TnL:
|
GL_TnL:
|
||||||
#define vtx1 a1
|
#define vtx1 a2
|
||||||
#define vtx2 a2
|
#define vtx2 a3
|
||||||
#define w e3
|
#define w e3
|
||||||
#define W e7
|
#define W e7
|
||||||
|
|
||||||
@ -260,12 +268,15 @@ GL_TnL:
|
|||||||
#define vscreenpos_i $v27
|
#define vscreenpos_i $v27
|
||||||
#define vscreenpos_f $v28
|
#define vscreenpos_f $v28
|
||||||
|
|
||||||
|
//emux_trace_start
|
||||||
|
|
||||||
ldv vcspos_i.e0, SCREEN_VTX_CS_POSi,vtx1
|
ldv vcspos_i.e0, SCREEN_VTX_CS_POSi,vtx1
|
||||||
ldv vcspos_i.e4, SCREEN_VTX_CS_POSi,vtx2
|
ldv vcspos_i.e4, SCREEN_VTX_CS_POSi,vtx2
|
||||||
ldv vcspos_f.e0, SCREEN_VTX_CS_POSf,vtx1
|
ldv vcspos_f.e0, SCREEN_VTX_CS_POSf,vtx1
|
||||||
ldv vcspos_f.e4, SCREEN_VTX_CS_POSf,vtx2
|
ldv vcspos_f.e4, SCREEN_VTX_CS_POSf,vtx2
|
||||||
|
li t1, 0x3F
|
||||||
|
|
||||||
# Calculate 32-bit inverse W
|
// Calculate 32-bit inverse W for vertex 1
|
||||||
vrcph vinvw_i.w, vcspos_i.w
|
vrcph vinvw_i.w, vcspos_i.w
|
||||||
vrcpl vinvw_f.w, vcspos_f.w
|
vrcpl vinvw_f.w, vcspos_f.w
|
||||||
vrcph vinvw_i.w, vzero.e0
|
vrcph vinvw_i.w, vzero.e0
|
||||||
@ -273,11 +284,16 @@ GL_TnL:
|
|||||||
vmudn vguard_f, vcspos_f, vguardscale
|
vmudn vguard_f, vcspos_f, vguardscale
|
||||||
vmadh vguard_i, vcspos_i, vguardscale
|
vmadh vguard_i, vcspos_i, vguardscale
|
||||||
|
|
||||||
# Calculate 32-bit inverse W
|
// Calculate 32-bit inverse W for vertex 2
|
||||||
vrcph vinvw_i.W, vcspos_i.W
|
vrcph vinvw_i.W, vcspos_i.W
|
||||||
vrcpl vinvw_f.W, vcspos_f.W
|
vrcpl vinvw_f.W, vcspos_f.W
|
||||||
vrcph vinvw_i.W, vzero.e0
|
vrcph vinvw_i.W, vzero.e0
|
||||||
|
|
||||||
|
ssv vcspos_i.w, SCREEN_VTX_W+0, vtx1
|
||||||
|
ssv vcspos_f.w, SCREEN_VTX_W+2, vtx1
|
||||||
|
ssv vcspos_i.W, SCREEN_VTX_W+0, vtx2
|
||||||
|
ssv vcspos_f.W, SCREEN_VTX_W+2, vtx2
|
||||||
|
|
||||||
vmudl v___, vcspos_f, vinvw_f.wwwwWWWW
|
vmudl v___, vcspos_f, vinvw_f.wwwwWWWW
|
||||||
vmadm v___, vcspos_i, vinvw_f.wwwwWWWW
|
vmadm v___, vcspos_i, vinvw_f.wwwwWWWW
|
||||||
vmadn vscreenpos_f, vcspos_f, vinvw_i.wwwwWWWW
|
vmadn vscreenpos_f, vcspos_f, vinvw_i.wwwwWWWW
|
||||||
@ -286,34 +302,31 @@ GL_TnL:
|
|||||||
vch v___, vguard_i, vguard_i.wwwwWWWW
|
vch v___, vguard_i, vguard_i.wwwwWWWW
|
||||||
vcl v___, vguard_f, vguard_f.wwwwWWWW
|
vcl v___, vguard_f, vguard_f.wwwwWWWW
|
||||||
|
|
||||||
vmudn vscreenpos_f, vscreenpos_f, vviewscale
|
vmudn v___, vscreenpos_f, vviewscale
|
||||||
vmadh vscreenpos_i, vscreenpos_i, vviewscale
|
vmadh v___, vscreenpos_i, vviewscale
|
||||||
vadd vscreenpos_i, vviewoff
|
vmadh vscreenpos_i, vviewoff, K1
|
||||||
li t0, 0x3F
|
|
||||||
|
|
||||||
sdv vscreenpos_i.e0, SCREEN_VTX_X ,vtx1
|
|
||||||
ssv vcspos_i.w, SCREEN_VTX_W+0 ,vtx1
|
|
||||||
ssv vcspos_f.w, SCREEN_VTX_W+2 ,vtx1
|
|
||||||
ssv vinvw_i.w, SCREEN_VTX_INVW+0,vtx1
|
ssv vinvw_i.w, SCREEN_VTX_INVW+0,vtx1
|
||||||
ssv vinvw_f.w, SCREEN_VTX_INVW+2,vtx1
|
ssv vinvw_f.w, SCREEN_VTX_INVW+2,vtx1
|
||||||
sb t0, SCREEN_VTX_PADDING(vtx1)
|
|
||||||
|
|
||||||
sdv vscreenpos_i.e4, SCREEN_VTX_X ,vtx2
|
|
||||||
ssv vcspos_i.W, SCREEN_VTX_W+0 ,vtx2
|
|
||||||
ssv vcspos_f.W, SCREEN_VTX_W+2 ,vtx2
|
|
||||||
ssv vinvw_i.W, SCREEN_VTX_INVW+0,vtx2
|
ssv vinvw_i.W, SCREEN_VTX_INVW+0,vtx2
|
||||||
ssv vinvw_f.W, SCREEN_VTX_INVW+2,vtx2
|
ssv vinvw_f.W, SCREEN_VTX_INVW+2,vtx2
|
||||||
sb t0, SCREEN_VTX_PADDING(vtx2)
|
cfc2 t0, COP2_CTRL_VCC
|
||||||
|
|
||||||
cfc2 t0, COP2_CTRL_VCC
|
sdv vscreenpos_i.e0, SCREEN_VTX_X ,vtx1
|
||||||
compressClipCodes
|
sdv vscreenpos_i.e4, SCREEN_VTX_X ,vtx2
|
||||||
|
sb t1, SCREEN_VTX_PADDING(vtx1)
|
||||||
|
sb t1, SCREEN_VTX_PADDING(vtx2)
|
||||||
|
|
||||||
|
compressClipCodes # TODO move to overlap with vector ops
|
||||||
sb t2, SCREEN_VTX_CLIP_CODE(vtx1)
|
sb t2, SCREEN_VTX_CLIP_CODE(vtx1)
|
||||||
|
|
||||||
srl t0, t0, 4
|
srl t0, t0, 4
|
||||||
compressClipCodes
|
compressClipCodes # TODO move to overlap with vector ops
|
||||||
|
//emux_trace_stop
|
||||||
jr ra
|
jr ra
|
||||||
sb t2, SCREEN_VTX_CLIP_CODE(vtx2)
|
sb t2, SCREEN_VTX_CLIP_CODE(vtx2)
|
||||||
|
|
||||||
|
|
||||||
#undef vinvw_f
|
#undef vinvw_f
|
||||||
#undef vinvw_i
|
#undef vinvw_i
|
||||||
#undef vscreenpos_i
|
#undef vscreenpos_i
|
||||||
@ -334,6 +347,10 @@ GL_TnL:
|
|||||||
.align 3
|
.align 3
|
||||||
.func GPUCmd_DrawQuad
|
.func GPUCmd_DrawQuad
|
||||||
GPUCmd_DrawQuad:
|
GPUCmd_DrawQuad:
|
||||||
|
#define vtx_ptr a0
|
||||||
|
#define mtx_ptr s2
|
||||||
|
#define src_ptr s3
|
||||||
|
|
||||||
#define v___ $v01
|
#define v___ $v01
|
||||||
|
|
||||||
#define vst_i $v12
|
#define vst_i $v12
|
||||||
@ -361,9 +378,7 @@ GPUCmd_DrawQuad:
|
|||||||
#define v1_cflags t2
|
#define v1_cflags t2
|
||||||
#define v2_cflags t3
|
#define v2_cflags t3
|
||||||
#define v3_cflags t4
|
#define v3_cflags t4
|
||||||
#define mtx_ptr t5 // t5 is also used by GL_ClipTriangle
|
// t5 is used by GL_ClipTriangle
|
||||||
#define vtx_ptr t6
|
|
||||||
#define src_ptr t7
|
|
||||||
|
|
||||||
addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64
|
addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64
|
||||||
li vtx_ptr, %lo(VERTEX_CACHE)
|
li vtx_ptr, %lo(VERTEX_CACHE)
|
||||||
@ -380,7 +395,6 @@ GPUCmd_DrawQuad:
|
|||||||
lqv vmtx1_f, 0x50,mtx_ptr
|
lqv vmtx1_f, 0x50,mtx_ptr
|
||||||
lqv vmtx2_f, 0x60,mtx_ptr
|
lqv vmtx2_f, 0x60,mtx_ptr
|
||||||
lqv vmtx3_f, 0x70,mtx_ptr
|
lqv vmtx3_f, 0x70,mtx_ptr
|
||||||
#undef mtx_ptr
|
|
||||||
|
|
||||||
// ########################
|
// ########################
|
||||||
// Vertex 0 and 1 transform
|
// Vertex 0 and 1 transform
|
||||||
@ -405,10 +419,10 @@ GPUCmd_DrawQuad:
|
|||||||
vmudm vcspos_i, vcspos_i, K2048
|
vmudm vcspos_i, vcspos_i, K2048
|
||||||
vmadl vcspos_f, vcspos_f, K2048
|
vmadl vcspos_f, vcspos_f, K2048
|
||||||
|
|
||||||
li tmp, %lo(GL_STATE_TEX_SIZE)
|
li t6, %lo(GL_STATE_TEX_SIZE)
|
||||||
lqv vtexsize, 0x00, tmp
|
lqv vtexsize, 0x00, t6
|
||||||
slv vcol.e0, SCREEN_VTX_RGBA + V0_OFFSET, vtx_ptr
|
slv vcol.e0, SCREEN_VTX_RGBA + V0_OFFSET, vtx_ptr
|
||||||
lqv vtexoffset, 0x10, tmp
|
lqv vtexoffset, 0x10, t6
|
||||||
slv vcol.e2, SCREEN_VTX_RGBA + V1_OFFSET, vtx_ptr
|
slv vcol.e2, SCREEN_VTX_RGBA + V1_OFFSET, vtx_ptr
|
||||||
|
|
||||||
// Calculate and store clipping flags against CS.W.
|
// Calculate and store clipping flags against CS.W.
|
||||||
@ -527,13 +541,13 @@ GPUCmd_DrawQuad:
|
|||||||
ldv vguardscale.e0, 0, t0
|
ldv vguardscale.e0, 0, t0
|
||||||
ldv vguardscale.e4, 0, t0
|
ldv vguardscale.e4, 0, t0
|
||||||
|
|
||||||
li a1, %lo(VERTEX_CACHE) + V0_OFFSET
|
li a2, %lo(VERTEX_CACHE) + V0_OFFSET
|
||||||
jal GL_TnL
|
jal GL_TnL
|
||||||
li a2, %lo(VERTEX_CACHE) + V1_OFFSET
|
li a3, %lo(VERTEX_CACHE) + V1_OFFSET
|
||||||
|
|
||||||
li a1, %lo(VERTEX_CACHE) + V2_OFFSET
|
li a2, %lo(VERTEX_CACHE) + V2_OFFSET
|
||||||
jal GL_TnL
|
jal GL_TnL
|
||||||
li a2, %lo(VERTEX_CACHE) + V3_OFFSET
|
li a3, %lo(VERTEX_CACHE) + V3_OFFSET
|
||||||
|
|
||||||
// ########################
|
// ########################
|
||||||
// Guardband check
|
// Guardband check
|
||||||
@ -580,11 +594,19 @@ GPUCmd_DrawQuad:
|
|||||||
beqz v1, gl_draw_triangle_end
|
beqz v1, gl_draw_triangle_end
|
||||||
addi s2, -6
|
addi s2, -6
|
||||||
lhu s5, 0(s1)
|
lhu s5, 0(s1)
|
||||||
|
|
||||||
|
jal GL_CalcScreenSpace
|
||||||
|
lhu a0, 0(s1)
|
||||||
|
jal GL_CalcScreenSpace
|
||||||
|
lhu a0, 2(s1)
|
||||||
|
|
||||||
gl_draw_clipped_triangles_loop:
|
gl_draw_clipped_triangles_loop:
|
||||||
move vtx1, s5
|
move vtx1, s5
|
||||||
lhu vtx2, 2(s1)
|
lhu vtx2, 2(s1)
|
||||||
lhu vtx3, 4(s1)
|
lhu vtx3, 4(s1)
|
||||||
# TODO do VP transform here
|
|
||||||
|
jal GL_CalcScreenSpace
|
||||||
|
move a0, vtx3
|
||||||
|
|
||||||
gl_draw_single_triangle:
|
gl_draw_single_triangle:
|
||||||
addi vtx1, SCREEN_VTX_X
|
addi vtx1, SCREEN_VTX_X
|
||||||
|
@ -18,7 +18,6 @@ CACHE_OFFSETS: .half 2,4,6,8, 10,12,14,16, 18,20
|
|||||||
|
|
||||||
.section .bss.gl_clipping
|
.section .bss.gl_clipping
|
||||||
|
|
||||||
.align 4
|
|
||||||
CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE
|
CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE
|
||||||
CLIP_CACHE_END:
|
CLIP_CACHE_END:
|
||||||
|
|
||||||
@ -342,32 +341,11 @@ gl_clip_plane_loop_end:
|
|||||||
blt plane_flag, (1<<CLIPPING_PLANE_COUNT), gl_clip_plane_loop
|
blt plane_flag, (1<<CLIPPING_PLANE_COUNT), gl_clip_plane_loop
|
||||||
addi plane, CLIPPING_PLANE_SIZE
|
addi plane, CLIPPING_PLANE_SIZE
|
||||||
|
|
||||||
#define cache_vtx s3
|
|
||||||
#define cache_end s5
|
|
||||||
|
|
||||||
# Calculate screen space values for new vertices (in the clip cache)
|
|
||||||
# TODO: maybe iterate over out_list instead
|
|
||||||
li cache_vtx, %lo(CLIP_CACHE)
|
|
||||||
li cache_end, %lo(CLIP_CACHE_END) - SCREEN_VTX_SIZE
|
|
||||||
gl_clip_finalize_loop:
|
|
||||||
lbu t0, SCREEN_VTX_PADDING(cache_vtx)
|
|
||||||
neg t0
|
|
||||||
|
|
||||||
# Only calculate screen space values if the vertex is actually used
|
|
||||||
ldv vint_i, SCREEN_VTX_CS_POSi,cache_vtx
|
|
||||||
bltzal t0, GL_CalcScreenSpace
|
|
||||||
ldv vint_f, SCREEN_VTX_CS_POSf,cache_vtx
|
|
||||||
|
|
||||||
blt cache_vtx, cache_end, gl_clip_finalize_loop
|
|
||||||
addi cache_vtx, SCREEN_VTX_SIZE
|
|
||||||
|
|
||||||
gl_clip_return:
|
gl_clip_return:
|
||||||
# Done!
|
# Done!
|
||||||
jr ra2
|
jr ra2
|
||||||
add s2, out_list, out_count
|
add s2, out_list, out_count
|
||||||
|
|
||||||
#undef cache_vtx
|
|
||||||
#undef cache_end
|
|
||||||
#undef clip_flags
|
#undef clip_flags
|
||||||
#undef plane_flag
|
#undef plane_flag
|
||||||
#undef in_count
|
#undef in_count
|
||||||
|
Loading…
x
Reference in New Issue
Block a user