mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-08-03 18:57:27 -04:00
Optimise vertex upload
This commit is contained in:
parent
b73d03b199
commit
db9b359b8b
@ -34,7 +34,7 @@ enum {
|
||||
GPU_CMD_SET_LONG = 0x3,
|
||||
|
||||
GPU_CMD_DRAW_QUAD = 0x4,
|
||||
GPU_CMD_UPLOAD_VTX = 0x5,
|
||||
GPU_CMD_UPLOAD_QUAD = 0x5,
|
||||
GPU_CMD_MATRIX_LOAD = 0x6,
|
||||
|
||||
GPU_CMD_PUSH_RDP = 0x7,
|
||||
@ -156,41 +156,40 @@ static inline void put_word(rspq_write_t* s, uint16_t v1, uint16_t v2)
|
||||
rspq_write_arg(s, v2 | (v1 << 16));
|
||||
}
|
||||
|
||||
static void upload_vertex(uint32_t index, uint8_t cache_index)
|
||||
static void upload_vertex(rspq_write_t* s, uint32_t index)
|
||||
{
|
||||
rspq_write_t s = rspq_write_begin(gpup_id, GPU_CMD_UPLOAD_VTX, 5);
|
||||
rspq_write_arg(&s, cache_index * PRIM_VTX_SIZE);
|
||||
char* ptr = gpu_pointer + index * gpu_stride;
|
||||
|
||||
float* vtx = (float*)(ptr + 0);
|
||||
put_word(&s, vtx[0] * (1<<VTX_SHIFT),
|
||||
put_word(s, vtx[0] * (1<<VTX_SHIFT),
|
||||
vtx[1] * (1<<VTX_SHIFT));
|
||||
put_word(&s, vtx[2] * (1<<VTX_SHIFT),
|
||||
put_word(s, vtx[2] * (1<<VTX_SHIFT),
|
||||
1.0f * (1<<VTX_SHIFT));
|
||||
|
||||
uint32_t* col = (uint32_t*)(ptr + 12); // TODO put_byte ?
|
||||
rspq_write_arg(&s, *col);
|
||||
uint32_t* col = (uint32_t*)(ptr + 12);
|
||||
rspq_write_arg(s, *col);
|
||||
|
||||
if (gpu_texturing) {
|
||||
float* tex = (float*)(ptr + 16);
|
||||
put_word(&s, tex[0] * (1<<TEX_SHIFT),
|
||||
put_word(s, tex[0] * (1<<TEX_SHIFT),
|
||||
tex[1] * (1<<TEX_SHIFT));
|
||||
} else {
|
||||
put_word(&s, 0,
|
||||
put_word(s, 0,
|
||||
0);
|
||||
}
|
||||
rspq_write_end(&s);
|
||||
}
|
||||
|
||||
static void gpuDrawArrays(uint32_t first, uint32_t count)
|
||||
{
|
||||
for (uint32_t i = 0; i < count; i++)
|
||||
for (uint32_t i = 0; i < count; i += 4)
|
||||
{
|
||||
uint8_t cache_index = i & 3;
|
||||
upload_vertex(first + i, cache_index);
|
||||
|
||||
// Last vertex of quad?
|
||||
if ((i & 3) != 3) continue;
|
||||
rspq_write_t s = rspq_write_begin(gpup_id, GPU_CMD_UPLOAD_QUAD, 17);
|
||||
rspq_write_arg(&s, 0);
|
||||
for (uint32_t j = 0; j < 4; j++)
|
||||
{
|
||||
upload_vertex(&s, first + i + j);
|
||||
}
|
||||
rspq_write_end(&s);
|
||||
|
||||
// We pass -1 because the triangle can be clipped and split into multiple
|
||||
// triangles.
|
||||
|
@ -10,7 +10,7 @@
|
||||
RSPQ_DefineCommand GPUCmd_SetLong, 12 # 0x3
|
||||
|
||||
RSPQ_DefineCommand GPUCmd_DrawQuad, 4 # 0x4
|
||||
RSPQ_DefineCommand GPUCmd_UploadVertex, 20 # 0x5
|
||||
RSPQ_DefineCommand GPUCmd_UploadQuad, 68 # 0x5
|
||||
RSPQ_DefineCommand GPUCmd_MatrixLoad, 68 # 0x6
|
||||
|
||||
RSPQ_DefineCommand GPUCmd_PushRDP, 12 # 0x7
|
||||
@ -38,15 +38,9 @@ VERTEX_CACHE: .dcb.b PRIM_VTX_SIZE * VERTEX_CACHE_SIZE
|
||||
RSPQ_EndSavedState
|
||||
|
||||
.align 4
|
||||
CACHE_OFFSETS: .half 2,4,6,8,10,12,14,16,18
|
||||
|
||||
CLIP_CODE_FACTORS: .half 1, 1, GUARD_BAND_FACTOR, GUARD_BAND_FACTOR
|
||||
DRAW_TRI_RA: .word 0
|
||||
|
||||
#define CLIPPING_PLANE_COUNT 6
|
||||
#define CLIPPING_CACHE_SIZE 9
|
||||
#define CLIPPING_PLANE_SIZE 8
|
||||
|
||||
#define SCREEN_VTX_CS_POSi 0 // X, Y, Z, W (all 32-bit)
|
||||
#define SCREEN_VTX_CS_POSf 8 // X, Y, Z, W (all 32-bit)
|
||||
#define SCREEN_VTX_X 16
|
||||
@ -131,23 +125,13 @@ GPUCmd_MatrixLoad:
|
||||
#undef dst
|
||||
.endfunc
|
||||
|
||||
########################################
|
||||
# GPUCmd_UploadVertex
|
||||
#
|
||||
# Arguments:
|
||||
# * 0x00 (a0): offset within VERTEX_CACHE
|
||||
# * 0x04 (a1): object space X, Y (16-bit)
|
||||
# * 0x08 (a2): object space Z, W (16-bit)
|
||||
# * 0x0C (a3): RGBA (8-bit each one)
|
||||
# * 0x10: S, T (16-bit)
|
||||
#
|
||||
########################################
|
||||
.align 3
|
||||
.func GPUCmd_UploadVertex
|
||||
GPUCmd_UploadVertex:
|
||||
.func GPUCmd_UploadQuad
|
||||
GPUCmd_UploadQuad:
|
||||
#define vtx a0
|
||||
#define mtx_ptr s0
|
||||
#define src_ptr s4
|
||||
#define vcount s3
|
||||
|
||||
#define v___ $v01
|
||||
|
||||
@ -171,17 +155,10 @@ GPUCmd_UploadVertex:
|
||||
#define z e2
|
||||
#define w e3
|
||||
|
||||
addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 16
|
||||
addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64
|
||||
li vtx, %lo(VERTEX_CACHE)
|
||||
li vcount, 4
|
||||
|
||||
ldv vpos, 0, src_ptr # Load X, Y, Z, W
|
||||
llv vcol, 8, src_ptr # Load RGBA
|
||||
llv vtex, 12, src_ptr # Load U, V
|
||||
|
||||
addi vtx, %lo(VERTEX_CACHE)
|
||||
slv vcol, SCREEN_VTX_RGBA, vtx
|
||||
slv vtex, SCREEN_VTX_S_T, vtx
|
||||
|
||||
# == matrix multiply ==
|
||||
li mtx_ptr, %lo(GL_MATRIX_MVP)
|
||||
ldv vmtx0_i.e0, 0x00,mtx_ptr
|
||||
ldv vmtx1_i.e0, 0x08,mtx_ptr
|
||||
@ -192,6 +169,12 @@ GPUCmd_UploadVertex:
|
||||
ldv vmtx2_f.e0, 0x30,mtx_ptr
|
||||
ldv vmtx3_f.e0, 0x38,mtx_ptr
|
||||
|
||||
upload_vertex:
|
||||
ldv vpos, 0, src_ptr # Load X, Y, Z, W
|
||||
llv vcol, 8, src_ptr # Load RGBA
|
||||
llv vtex, 12, src_ptr # Load U, V
|
||||
|
||||
# matrix multiply
|
||||
vmudn v___, vmtx0_f, vpos.h0
|
||||
vmadh v___, vmtx0_i, vpos.h0
|
||||
vmadn v___, vmtx1_f, vpos.h1
|
||||
@ -201,12 +184,17 @@ GPUCmd_UploadVertex:
|
||||
vmadn v___, vmtx3_f, vpos.h3
|
||||
vmadh vcspos_i, vmtx3_i, vpos.h3
|
||||
vmadn vcspos_f, vzero, vzero
|
||||
# == end matrix multiply ==
|
||||
|
||||
slv vcol, SCREEN_VTX_RGBA, vtx
|
||||
slv vtex, SCREEN_VTX_S_T, vtx
|
||||
|
||||
# 32-bit right shift by 5, to keep the clip space coordinates unscaled
|
||||
vmudm vcspos_i, vcspos_i, vshift8.e4
|
||||
vmadl vcspos_f, vcspos_f, vshift8.e4
|
||||
|
||||
addi vcount, -1
|
||||
addi src_ptr, 16
|
||||
|
||||
sdv vcspos_i, SCREEN_VTX_CS_POSi,vtx
|
||||
sdv vcspos_f, SCREEN_VTX_CS_POSf,vtx
|
||||
|
||||
@ -221,9 +209,13 @@ GPUCmd_UploadVertex:
|
||||
srl t1, t0, 5
|
||||
andi t0, 0x7
|
||||
or t0, t1
|
||||
jr ra
|
||||
sb t0, PRIM_VTX_TRCODE(vtx)
|
||||
|
||||
bnez vcount, upload_vertex
|
||||
addi vtx, PRIM_VTX_SIZE
|
||||
|
||||
jr ra
|
||||
nop
|
||||
#undef src_ptr
|
||||
#undef vtx
|
||||
|
||||
|
@ -1,3 +1,6 @@
|
||||
#define CLIPPING_PLANE_COUNT 6
|
||||
#define CLIPPING_CACHE_SIZE 9
|
||||
#define CLIPPING_PLANE_SIZE 8
|
||||
|
||||
.section .data.gl_clipping
|
||||
|
||||
@ -10,6 +13,9 @@ CLIP_PLANES:
|
||||
.half 0, 1, 0, -GUARD_BAND_FACTOR
|
||||
.half 0, 0, 1, -1
|
||||
|
||||
.align 4
|
||||
CACHE_OFFSETS: .half 2,4,6,8,10,12,14,16,18
|
||||
|
||||
.section .bss.gl_clipping
|
||||
|
||||
CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE
|
||||
|
Loading…
x
Reference in New Issue
Block a user