mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-08-04 03:07:07 -04:00
Optimise vertex upload
This commit is contained in:
parent
b73d03b199
commit
db9b359b8b
@ -34,7 +34,7 @@ enum {
|
|||||||
GPU_CMD_SET_LONG = 0x3,
|
GPU_CMD_SET_LONG = 0x3,
|
||||||
|
|
||||||
GPU_CMD_DRAW_QUAD = 0x4,
|
GPU_CMD_DRAW_QUAD = 0x4,
|
||||||
GPU_CMD_UPLOAD_VTX = 0x5,
|
GPU_CMD_UPLOAD_QUAD = 0x5,
|
||||||
GPU_CMD_MATRIX_LOAD = 0x6,
|
GPU_CMD_MATRIX_LOAD = 0x6,
|
||||||
|
|
||||||
GPU_CMD_PUSH_RDP = 0x7,
|
GPU_CMD_PUSH_RDP = 0x7,
|
||||||
@ -156,41 +156,40 @@ static inline void put_word(rspq_write_t* s, uint16_t v1, uint16_t v2)
|
|||||||
rspq_write_arg(s, v2 | (v1 << 16));
|
rspq_write_arg(s, v2 | (v1 << 16));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void upload_vertex(uint32_t index, uint8_t cache_index)
|
static void upload_vertex(rspq_write_t* s, uint32_t index)
|
||||||
{
|
{
|
||||||
rspq_write_t s = rspq_write_begin(gpup_id, GPU_CMD_UPLOAD_VTX, 5);
|
|
||||||
rspq_write_arg(&s, cache_index * PRIM_VTX_SIZE);
|
|
||||||
char* ptr = gpu_pointer + index * gpu_stride;
|
char* ptr = gpu_pointer + index * gpu_stride;
|
||||||
|
|
||||||
float* vtx = (float*)(ptr + 0);
|
float* vtx = (float*)(ptr + 0);
|
||||||
put_word(&s, vtx[0] * (1<<VTX_SHIFT),
|
put_word(s, vtx[0] * (1<<VTX_SHIFT),
|
||||||
vtx[1] * (1<<VTX_SHIFT));
|
vtx[1] * (1<<VTX_SHIFT));
|
||||||
put_word(&s, vtx[2] * (1<<VTX_SHIFT),
|
put_word(s, vtx[2] * (1<<VTX_SHIFT),
|
||||||
1.0f * (1<<VTX_SHIFT));
|
1.0f * (1<<VTX_SHIFT));
|
||||||
|
|
||||||
uint32_t* col = (uint32_t*)(ptr + 12); // TODO put_byte ?
|
uint32_t* col = (uint32_t*)(ptr + 12);
|
||||||
rspq_write_arg(&s, *col);
|
rspq_write_arg(s, *col);
|
||||||
|
|
||||||
if (gpu_texturing) {
|
if (gpu_texturing) {
|
||||||
float* tex = (float*)(ptr + 16);
|
float* tex = (float*)(ptr + 16);
|
||||||
put_word(&s, tex[0] * (1<<TEX_SHIFT),
|
put_word(s, tex[0] * (1<<TEX_SHIFT),
|
||||||
tex[1] * (1<<TEX_SHIFT));
|
tex[1] * (1<<TEX_SHIFT));
|
||||||
} else {
|
} else {
|
||||||
put_word(&s, 0,
|
put_word(s, 0,
|
||||||
0);
|
0);
|
||||||
}
|
}
|
||||||
rspq_write_end(&s);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gpuDrawArrays(uint32_t first, uint32_t count)
|
static void gpuDrawArrays(uint32_t first, uint32_t count)
|
||||||
{
|
{
|
||||||
for (uint32_t i = 0; i < count; i++)
|
for (uint32_t i = 0; i < count; i += 4)
|
||||||
{
|
{
|
||||||
uint8_t cache_index = i & 3;
|
rspq_write_t s = rspq_write_begin(gpup_id, GPU_CMD_UPLOAD_QUAD, 17);
|
||||||
upload_vertex(first + i, cache_index);
|
rspq_write_arg(&s, 0);
|
||||||
|
for (uint32_t j = 0; j < 4; j++)
|
||||||
// Last vertex of quad?
|
{
|
||||||
if ((i & 3) != 3) continue;
|
upload_vertex(&s, first + i + j);
|
||||||
|
}
|
||||||
|
rspq_write_end(&s);
|
||||||
|
|
||||||
// We pass -1 because the triangle can be clipped and split into multiple
|
// We pass -1 because the triangle can be clipped and split into multiple
|
||||||
// triangles.
|
// triangles.
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
RSPQ_DefineCommand GPUCmd_SetLong, 12 # 0x3
|
RSPQ_DefineCommand GPUCmd_SetLong, 12 # 0x3
|
||||||
|
|
||||||
RSPQ_DefineCommand GPUCmd_DrawQuad, 4 # 0x4
|
RSPQ_DefineCommand GPUCmd_DrawQuad, 4 # 0x4
|
||||||
RSPQ_DefineCommand GPUCmd_UploadVertex, 20 # 0x5
|
RSPQ_DefineCommand GPUCmd_UploadQuad, 68 # 0x5
|
||||||
RSPQ_DefineCommand GPUCmd_MatrixLoad, 68 # 0x6
|
RSPQ_DefineCommand GPUCmd_MatrixLoad, 68 # 0x6
|
||||||
|
|
||||||
RSPQ_DefineCommand GPUCmd_PushRDP, 12 # 0x7
|
RSPQ_DefineCommand GPUCmd_PushRDP, 12 # 0x7
|
||||||
@ -38,15 +38,9 @@ VERTEX_CACHE: .dcb.b PRIM_VTX_SIZE * VERTEX_CACHE_SIZE
|
|||||||
RSPQ_EndSavedState
|
RSPQ_EndSavedState
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
CACHE_OFFSETS: .half 2,4,6,8,10,12,14,16,18
|
|
||||||
|
|
||||||
CLIP_CODE_FACTORS: .half 1, 1, GUARD_BAND_FACTOR, GUARD_BAND_FACTOR
|
CLIP_CODE_FACTORS: .half 1, 1, GUARD_BAND_FACTOR, GUARD_BAND_FACTOR
|
||||||
DRAW_TRI_RA: .word 0
|
DRAW_TRI_RA: .word 0
|
||||||
|
|
||||||
#define CLIPPING_PLANE_COUNT 6
|
|
||||||
#define CLIPPING_CACHE_SIZE 9
|
|
||||||
#define CLIPPING_PLANE_SIZE 8
|
|
||||||
|
|
||||||
#define SCREEN_VTX_CS_POSi 0 // X, Y, Z, W (all 32-bit)
|
#define SCREEN_VTX_CS_POSi 0 // X, Y, Z, W (all 32-bit)
|
||||||
#define SCREEN_VTX_CS_POSf 8 // X, Y, Z, W (all 32-bit)
|
#define SCREEN_VTX_CS_POSf 8 // X, Y, Z, W (all 32-bit)
|
||||||
#define SCREEN_VTX_X 16
|
#define SCREEN_VTX_X 16
|
||||||
@ -131,23 +125,13 @@ GPUCmd_MatrixLoad:
|
|||||||
#undef dst
|
#undef dst
|
||||||
.endfunc
|
.endfunc
|
||||||
|
|
||||||
########################################
|
|
||||||
# GPUCmd_UploadVertex
|
|
||||||
#
|
|
||||||
# Arguments:
|
|
||||||
# * 0x00 (a0): offset within VERTEX_CACHE
|
|
||||||
# * 0x04 (a1): object space X, Y (16-bit)
|
|
||||||
# * 0x08 (a2): object space Z, W (16-bit)
|
|
||||||
# * 0x0C (a3): RGBA (8-bit each one)
|
|
||||||
# * 0x10: S, T (16-bit)
|
|
||||||
#
|
|
||||||
########################################
|
|
||||||
.align 3
|
.align 3
|
||||||
.func GPUCmd_UploadVertex
|
.func GPUCmd_UploadQuad
|
||||||
GPUCmd_UploadVertex:
|
GPUCmd_UploadQuad:
|
||||||
#define vtx a0
|
#define vtx a0
|
||||||
#define mtx_ptr s0
|
#define mtx_ptr s0
|
||||||
#define src_ptr s4
|
#define src_ptr s4
|
||||||
|
#define vcount s3
|
||||||
|
|
||||||
#define v___ $v01
|
#define v___ $v01
|
||||||
|
|
||||||
@ -171,17 +155,10 @@ GPUCmd_UploadVertex:
|
|||||||
#define z e2
|
#define z e2
|
||||||
#define w e3
|
#define w e3
|
||||||
|
|
||||||
addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 16
|
addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64
|
||||||
|
li vtx, %lo(VERTEX_CACHE)
|
||||||
|
li vcount, 4
|
||||||
|
|
||||||
ldv vpos, 0, src_ptr # Load X, Y, Z, W
|
|
||||||
llv vcol, 8, src_ptr # Load RGBA
|
|
||||||
llv vtex, 12, src_ptr # Load U, V
|
|
||||||
|
|
||||||
addi vtx, %lo(VERTEX_CACHE)
|
|
||||||
slv vcol, SCREEN_VTX_RGBA, vtx
|
|
||||||
slv vtex, SCREEN_VTX_S_T, vtx
|
|
||||||
|
|
||||||
# == matrix multiply ==
|
|
||||||
li mtx_ptr, %lo(GL_MATRIX_MVP)
|
li mtx_ptr, %lo(GL_MATRIX_MVP)
|
||||||
ldv vmtx0_i.e0, 0x00,mtx_ptr
|
ldv vmtx0_i.e0, 0x00,mtx_ptr
|
||||||
ldv vmtx1_i.e0, 0x08,mtx_ptr
|
ldv vmtx1_i.e0, 0x08,mtx_ptr
|
||||||
@ -192,6 +169,12 @@ GPUCmd_UploadVertex:
|
|||||||
ldv vmtx2_f.e0, 0x30,mtx_ptr
|
ldv vmtx2_f.e0, 0x30,mtx_ptr
|
||||||
ldv vmtx3_f.e0, 0x38,mtx_ptr
|
ldv vmtx3_f.e0, 0x38,mtx_ptr
|
||||||
|
|
||||||
|
upload_vertex:
|
||||||
|
ldv vpos, 0, src_ptr # Load X, Y, Z, W
|
||||||
|
llv vcol, 8, src_ptr # Load RGBA
|
||||||
|
llv vtex, 12, src_ptr # Load U, V
|
||||||
|
|
||||||
|
# matrix multiply
|
||||||
vmudn v___, vmtx0_f, vpos.h0
|
vmudn v___, vmtx0_f, vpos.h0
|
||||||
vmadh v___, vmtx0_i, vpos.h0
|
vmadh v___, vmtx0_i, vpos.h0
|
||||||
vmadn v___, vmtx1_f, vpos.h1
|
vmadn v___, vmtx1_f, vpos.h1
|
||||||
@ -201,12 +184,17 @@ GPUCmd_UploadVertex:
|
|||||||
vmadn v___, vmtx3_f, vpos.h3
|
vmadn v___, vmtx3_f, vpos.h3
|
||||||
vmadh vcspos_i, vmtx3_i, vpos.h3
|
vmadh vcspos_i, vmtx3_i, vpos.h3
|
||||||
vmadn vcspos_f, vzero, vzero
|
vmadn vcspos_f, vzero, vzero
|
||||||
# == end matrix multiply ==
|
|
||||||
|
slv vcol, SCREEN_VTX_RGBA, vtx
|
||||||
|
slv vtex, SCREEN_VTX_S_T, vtx
|
||||||
|
|
||||||
# 32-bit right shift by 5, to keep the clip space coordinates unscaled
|
# 32-bit right shift by 5, to keep the clip space coordinates unscaled
|
||||||
vmudm vcspos_i, vcspos_i, vshift8.e4
|
vmudm vcspos_i, vcspos_i, vshift8.e4
|
||||||
vmadl vcspos_f, vcspos_f, vshift8.e4
|
vmadl vcspos_f, vcspos_f, vshift8.e4
|
||||||
|
|
||||||
|
addi vcount, -1
|
||||||
|
addi src_ptr, 16
|
||||||
|
|
||||||
sdv vcspos_i, SCREEN_VTX_CS_POSi,vtx
|
sdv vcspos_i, SCREEN_VTX_CS_POSi,vtx
|
||||||
sdv vcspos_f, SCREEN_VTX_CS_POSf,vtx
|
sdv vcspos_f, SCREEN_VTX_CS_POSf,vtx
|
||||||
|
|
||||||
@ -221,9 +209,13 @@ GPUCmd_UploadVertex:
|
|||||||
srl t1, t0, 5
|
srl t1, t0, 5
|
||||||
andi t0, 0x7
|
andi t0, 0x7
|
||||||
or t0, t1
|
or t0, t1
|
||||||
jr ra
|
|
||||||
sb t0, PRIM_VTX_TRCODE(vtx)
|
sb t0, PRIM_VTX_TRCODE(vtx)
|
||||||
|
|
||||||
|
bnez vcount, upload_vertex
|
||||||
|
addi vtx, PRIM_VTX_SIZE
|
||||||
|
|
||||||
|
jr ra
|
||||||
|
nop
|
||||||
#undef src_ptr
|
#undef src_ptr
|
||||||
#undef vtx
|
#undef vtx
|
||||||
|
|
||||||
|
@ -1,3 +1,6 @@
|
|||||||
|
#define CLIPPING_PLANE_COUNT 6
|
||||||
|
#define CLIPPING_CACHE_SIZE 9
|
||||||
|
#define CLIPPING_PLANE_SIZE 8
|
||||||
|
|
||||||
.section .data.gl_clipping
|
.section .data.gl_clipping
|
||||||
|
|
||||||
@ -10,6 +13,9 @@ CLIP_PLANES:
|
|||||||
.half 0, 1, 0, -GUARD_BAND_FACTOR
|
.half 0, 1, 0, -GUARD_BAND_FACTOR
|
||||||
.half 0, 0, 1, -1
|
.half 0, 0, 1, -1
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
CACHE_OFFSETS: .half 2,4,6,8,10,12,14,16,18
|
||||||
|
|
||||||
.section .bss.gl_clipping
|
.section .bss.gl_clipping
|
||||||
|
|
||||||
CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE
|
CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE
|
||||||
|
Loading…
x
Reference in New Issue
Block a user