mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-08-03 10:47:39 -04:00
N64: Save a few cycles here and there (down to 7.5 ms)
This commit is contained in:
parent
21b4fe79aa
commit
06cb87f773
@ -166,7 +166,7 @@ static int F2I(float value, int scale) {
|
||||
e = (raw.i & FLT_EXPONENT_MASK) >> FLT_EXPONENT_SHIFT;
|
||||
|
||||
// Ignore denormal, infinity, or large exponents
|
||||
if (e <= 0 || e >= 160) return 0;
|
||||
if (e <= 0 || e >= 146) return 0;
|
||||
|
||||
return value * scale;
|
||||
}
|
||||
|
@ -123,8 +123,9 @@ GPUCmd_PushRDP:
|
||||
|
||||
.func GPUCmd_MatrixLoad
|
||||
GPUCmd_MatrixLoad:
|
||||
#define src t0
|
||||
#define dst t1
|
||||
#define src s6
|
||||
#define dst s7
|
||||
|
||||
#define vmat0_i $v02
|
||||
#define vmat1_i $v03
|
||||
#define vmat2_i $v04
|
||||
@ -180,14 +181,14 @@ GPUCmd_MatrixLoad:
|
||||
# GL_CalcScreenSpace
|
||||
#
|
||||
# Args:
|
||||
# s3 = Destination vertex address
|
||||
# a0 = Destination vertex address
|
||||
# $v02 = Clip space position (fractional part)
|
||||
# $v03 = Clip space position (integer part)
|
||||
#
|
||||
################################################################
|
||||
.func GL_CalcScreenSpace
|
||||
GL_CalcScreenSpace:
|
||||
#define dst s3
|
||||
#define dst a0
|
||||
#define vcspos_f $v02
|
||||
#define vcspos_i $v03
|
||||
#define vinvw_f $v23
|
||||
@ -197,6 +198,13 @@ GL_CalcScreenSpace:
|
||||
#define v___ $v29
|
||||
#define w e3
|
||||
|
||||
ldv vcspos_i, SCREEN_VTX_CS_POSi, dst
|
||||
ldv vcspos_f, SCREEN_VTX_CS_POSf, dst
|
||||
|
||||
li t0, %lo(GL_VIEWPORT_SCALE)
|
||||
ldv vviewscale.e0, 0, t0
|
||||
ldv vviewoff.e0, 8, t0
|
||||
|
||||
# Calculate 32-bit inverse W
|
||||
# TODO: NR?
|
||||
vrcph vinvw_i.w, vcspos_i.w
|
||||
@ -208,17 +216,17 @@ GL_CalcScreenSpace:
|
||||
vmadn vscreenpos_f, vcspos_f, vinvw_i.w
|
||||
vmadh vscreenpos_i, vcspos_i, vinvw_i.w
|
||||
|
||||
vmudn vscreenpos_f, vscreenpos_f, vviewscale
|
||||
vmadh vscreenpos_i, vscreenpos_i, vviewscale
|
||||
vadd vscreenpos_i, vviewoff
|
||||
li t0, 0x3F
|
||||
vmudn v___, vscreenpos_f, vviewscale
|
||||
vmadh v___, vscreenpos_i, vviewscale
|
||||
vmadh vscreenpos_i, vviewoff, K1
|
||||
|
||||
sdv vscreenpos_i, SCREEN_VTX_X ,dst
|
||||
ssv vcspos_i.w, SCREEN_VTX_W+0 ,dst
|
||||
ssv vcspos_f.w, SCREEN_VTX_W+2 ,dst
|
||||
ssv vinvw_i.w, SCREEN_VTX_INVW+0,dst
|
||||
ssv vinvw_f.w, SCREEN_VTX_INVW+2,dst
|
||||
sdv vscreenpos_i, SCREEN_VTX_X ,dst
|
||||
|
||||
li t0, 0x3F
|
||||
jr ra
|
||||
sb t0, SCREEN_VTX_PADDING(dst)
|
||||
|
||||
@ -238,14 +246,14 @@ GL_CalcScreenSpace:
|
||||
# GL_TnL
|
||||
#
|
||||
# Args:
|
||||
# a1 = address of the vertex in DMEM (usually within VERTEX_CACHE)
|
||||
# a2 = address of the vertex in DMEM (usually within VERTEX_CACHE)
|
||||
# a3 = address of the vertex in DMEM (usually within VERTEX_CACHE)
|
||||
#
|
||||
################################################################
|
||||
.func GL_TnL
|
||||
GL_TnL:
|
||||
#define vtx1 a1
|
||||
#define vtx2 a2
|
||||
#define vtx1 a2
|
||||
#define vtx2 a3
|
||||
#define w e3
|
||||
#define W e7
|
||||
|
||||
@ -260,12 +268,15 @@ GL_TnL:
|
||||
#define vscreenpos_i $v27
|
||||
#define vscreenpos_f $v28
|
||||
|
||||
//emux_trace_start
|
||||
|
||||
ldv vcspos_i.e0, SCREEN_VTX_CS_POSi,vtx1
|
||||
ldv vcspos_i.e4, SCREEN_VTX_CS_POSi,vtx2
|
||||
ldv vcspos_f.e0, SCREEN_VTX_CS_POSf,vtx1
|
||||
ldv vcspos_f.e4, SCREEN_VTX_CS_POSf,vtx2
|
||||
li t1, 0x3F
|
||||
|
||||
# Calculate 32-bit inverse W
|
||||
// Calculate 32-bit inverse W for vertex 1
|
||||
vrcph vinvw_i.w, vcspos_i.w
|
||||
vrcpl vinvw_f.w, vcspos_f.w
|
||||
vrcph vinvw_i.w, vzero.e0
|
||||
@ -273,11 +284,16 @@ GL_TnL:
|
||||
vmudn vguard_f, vcspos_f, vguardscale
|
||||
vmadh vguard_i, vcspos_i, vguardscale
|
||||
|
||||
# Calculate 32-bit inverse W
|
||||
// Calculate 32-bit inverse W for vertex 2
|
||||
vrcph vinvw_i.W, vcspos_i.W
|
||||
vrcpl vinvw_f.W, vcspos_f.W
|
||||
vrcph vinvw_i.W, vzero.e0
|
||||
|
||||
ssv vcspos_i.w, SCREEN_VTX_W+0, vtx1
|
||||
ssv vcspos_f.w, SCREEN_VTX_W+2, vtx1
|
||||
ssv vcspos_i.W, SCREEN_VTX_W+0, vtx2
|
||||
ssv vcspos_f.W, SCREEN_VTX_W+2, vtx2
|
||||
|
||||
vmudl v___, vcspos_f, vinvw_f.wwwwWWWW
|
||||
vmadm v___, vcspos_i, vinvw_f.wwwwWWWW
|
||||
vmadn vscreenpos_f, vcspos_f, vinvw_i.wwwwWWWW
|
||||
@ -286,34 +302,31 @@ GL_TnL:
|
||||
vch v___, vguard_i, vguard_i.wwwwWWWW
|
||||
vcl v___, vguard_f, vguard_f.wwwwWWWW
|
||||
|
||||
vmudn vscreenpos_f, vscreenpos_f, vviewscale
|
||||
vmadh vscreenpos_i, vscreenpos_i, vviewscale
|
||||
vadd vscreenpos_i, vviewoff
|
||||
li t0, 0x3F
|
||||
vmudn v___, vscreenpos_f, vviewscale
|
||||
vmadh v___, vscreenpos_i, vviewscale
|
||||
vmadh vscreenpos_i, vviewoff, K1
|
||||
|
||||
sdv vscreenpos_i.e0, SCREEN_VTX_X ,vtx1
|
||||
ssv vcspos_i.w, SCREEN_VTX_W+0 ,vtx1
|
||||
ssv vcspos_f.w, SCREEN_VTX_W+2 ,vtx1
|
||||
ssv vinvw_i.w, SCREEN_VTX_INVW+0,vtx1
|
||||
ssv vinvw_f.w, SCREEN_VTX_INVW+2,vtx1
|
||||
sb t0, SCREEN_VTX_PADDING(vtx1)
|
||||
|
||||
sdv vscreenpos_i.e4, SCREEN_VTX_X ,vtx2
|
||||
ssv vcspos_i.W, SCREEN_VTX_W+0 ,vtx2
|
||||
ssv vcspos_f.W, SCREEN_VTX_W+2 ,vtx2
|
||||
ssv vinvw_i.W, SCREEN_VTX_INVW+0,vtx2
|
||||
ssv vinvw_f.W, SCREEN_VTX_INVW+2,vtx2
|
||||
sb t0, SCREEN_VTX_PADDING(vtx2)
|
||||
cfc2 t0, COP2_CTRL_VCC
|
||||
|
||||
cfc2 t0, COP2_CTRL_VCC
|
||||
compressClipCodes
|
||||
sdv vscreenpos_i.e0, SCREEN_VTX_X ,vtx1
|
||||
sdv vscreenpos_i.e4, SCREEN_VTX_X ,vtx2
|
||||
sb t1, SCREEN_VTX_PADDING(vtx1)
|
||||
sb t1, SCREEN_VTX_PADDING(vtx2)
|
||||
|
||||
compressClipCodes # TODO move to overlap with vector ops
|
||||
sb t2, SCREEN_VTX_CLIP_CODE(vtx1)
|
||||
|
||||
srl t0, t0, 4
|
||||
compressClipCodes
|
||||
compressClipCodes # TODO move to overlap with vector ops
|
||||
//emux_trace_stop
|
||||
jr ra
|
||||
sb t2, SCREEN_VTX_CLIP_CODE(vtx2)
|
||||
|
||||
|
||||
#undef vinvw_f
|
||||
#undef vinvw_i
|
||||
#undef vscreenpos_i
|
||||
@ -334,6 +347,10 @@ GL_TnL:
|
||||
.align 3
|
||||
.func GPUCmd_DrawQuad
|
||||
GPUCmd_DrawQuad:
|
||||
#define vtx_ptr a0
|
||||
#define mtx_ptr s2
|
||||
#define src_ptr s3
|
||||
|
||||
#define v___ $v01
|
||||
|
||||
#define vst_i $v12
|
||||
@ -361,9 +378,7 @@ GPUCmd_DrawQuad:
|
||||
#define v1_cflags t2
|
||||
#define v2_cflags t3
|
||||
#define v3_cflags t4
|
||||
#define mtx_ptr t5 // t5 is also used by GL_ClipTriangle
|
||||
#define vtx_ptr t6
|
||||
#define src_ptr t7
|
||||
// t5 is used by GL_ClipTriangle
|
||||
|
||||
addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64
|
||||
li vtx_ptr, %lo(VERTEX_CACHE)
|
||||
@ -380,7 +395,6 @@ GPUCmd_DrawQuad:
|
||||
lqv vmtx1_f, 0x50,mtx_ptr
|
||||
lqv vmtx2_f, 0x60,mtx_ptr
|
||||
lqv vmtx3_f, 0x70,mtx_ptr
|
||||
#undef mtx_ptr
|
||||
|
||||
// ########################
|
||||
// Vertex 0 and 1 transform
|
||||
@ -405,10 +419,10 @@ GPUCmd_DrawQuad:
|
||||
vmudm vcspos_i, vcspos_i, K2048
|
||||
vmadl vcspos_f, vcspos_f, K2048
|
||||
|
||||
li tmp, %lo(GL_STATE_TEX_SIZE)
|
||||
lqv vtexsize, 0x00, tmp
|
||||
li t6, %lo(GL_STATE_TEX_SIZE)
|
||||
lqv vtexsize, 0x00, t6
|
||||
slv vcol.e0, SCREEN_VTX_RGBA + V0_OFFSET, vtx_ptr
|
||||
lqv vtexoffset, 0x10, tmp
|
||||
lqv vtexoffset, 0x10, t6
|
||||
slv vcol.e2, SCREEN_VTX_RGBA + V1_OFFSET, vtx_ptr
|
||||
|
||||
// Calculate and store clipping flags against CS.W.
|
||||
@ -527,13 +541,13 @@ GPUCmd_DrawQuad:
|
||||
ldv vguardscale.e0, 0, t0
|
||||
ldv vguardscale.e4, 0, t0
|
||||
|
||||
li a1, %lo(VERTEX_CACHE) + V0_OFFSET
|
||||
li a2, %lo(VERTEX_CACHE) + V0_OFFSET
|
||||
jal GL_TnL
|
||||
li a2, %lo(VERTEX_CACHE) + V1_OFFSET
|
||||
li a3, %lo(VERTEX_CACHE) + V1_OFFSET
|
||||
|
||||
li a1, %lo(VERTEX_CACHE) + V2_OFFSET
|
||||
li a2, %lo(VERTEX_CACHE) + V2_OFFSET
|
||||
jal GL_TnL
|
||||
li a2, %lo(VERTEX_CACHE) + V3_OFFSET
|
||||
li a3, %lo(VERTEX_CACHE) + V3_OFFSET
|
||||
|
||||
// ########################
|
||||
// Guardband check
|
||||
@ -580,11 +594,19 @@ GPUCmd_DrawQuad:
|
||||
beqz v1, gl_draw_triangle_end
|
||||
addi s2, -6
|
||||
lhu s5, 0(s1)
|
||||
|
||||
jal GL_CalcScreenSpace
|
||||
lhu a0, 0(s1)
|
||||
jal GL_CalcScreenSpace
|
||||
lhu a0, 2(s1)
|
||||
|
||||
gl_draw_clipped_triangles_loop:
|
||||
move vtx1, s5
|
||||
lhu vtx2, 2(s1)
|
||||
lhu vtx3, 4(s1)
|
||||
# TODO do VP transform here
|
||||
|
||||
jal GL_CalcScreenSpace
|
||||
move a0, vtx3
|
||||
|
||||
gl_draw_single_triangle:
|
||||
addi vtx1, SCREEN_VTX_X
|
||||
|
@ -18,7 +18,6 @@ CACHE_OFFSETS: .half 2,4,6,8, 10,12,14,16, 18,20
|
||||
|
||||
.section .bss.gl_clipping
|
||||
|
||||
.align 4
|
||||
CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE
|
||||
CLIP_CACHE_END:
|
||||
|
||||
@ -342,32 +341,11 @@ gl_clip_plane_loop_end:
|
||||
blt plane_flag, (1<<CLIPPING_PLANE_COUNT), gl_clip_plane_loop
|
||||
addi plane, CLIPPING_PLANE_SIZE
|
||||
|
||||
#define cache_vtx s3
|
||||
#define cache_end s5
|
||||
|
||||
# Calculate screen space values for new vertices (in the clip cache)
|
||||
# TODO: maybe iterate over out_list instead
|
||||
li cache_vtx, %lo(CLIP_CACHE)
|
||||
li cache_end, %lo(CLIP_CACHE_END) - SCREEN_VTX_SIZE
|
||||
gl_clip_finalize_loop:
|
||||
lbu t0, SCREEN_VTX_PADDING(cache_vtx)
|
||||
neg t0
|
||||
|
||||
# Only calculate screen space values if the vertex is actually used
|
||||
ldv vint_i, SCREEN_VTX_CS_POSi,cache_vtx
|
||||
bltzal t0, GL_CalcScreenSpace
|
||||
ldv vint_f, SCREEN_VTX_CS_POSf,cache_vtx
|
||||
|
||||
blt cache_vtx, cache_end, gl_clip_finalize_loop
|
||||
addi cache_vtx, SCREEN_VTX_SIZE
|
||||
|
||||
gl_clip_return:
|
||||
# Done!
|
||||
jr ra2
|
||||
add s2, out_list, out_count
|
||||
|
||||
#undef cache_vtx
|
||||
#undef cache_end
|
||||
#undef clip_flags
|
||||
#undef plane_flag
|
||||
#undef in_count
|
||||
|
Loading…
x
Reference in New Issue
Block a user