Simplify RGBA calculation, saves 4 bytes per vertex upload command

This commit is contained in:
UnknownShadow200 2025-04-30 21:10:29 +10:00
parent 34474b32aa
commit 06e09c3019
3 changed files with 17 additions and 26 deletions

View File

@ -32,14 +32,11 @@
#define PRIM_VTX_Y 18 // Object space position (16-bit)
#define PRIM_VTX_Z 20 // Object space position (16-bit)
#define PRIM_VTX_W 22 // Object space position (16-bit)
#define PRIM_VTX_R 24
#define PRIM_VTX_G 26
#define PRIM_VTX_B 28
#define PRIM_VTX_A 30
#define PRIM_VTX_RGBA 24
// 28,29,30,31 pad
#define PRIM_VTX_TEX_S 32
#define PRIM_VTX_TEX_T 34
#define PRIM_VTX_TEX_R 36
#define PRIM_VTX_TEX_Q 38
//36,37,38,39 pad
#define PRIM_VTX_TRCODE 40 // trivial-reject clipping flags (against -w/+w)
#define PRIM_VTX_SIZE 42

View File

@ -157,7 +157,7 @@ static inline void put_word(rspq_write_t* s, uint16_t v1, uint16_t v2)
static void upload_vertex(uint32_t index, uint8_t cache_index)
{
rspq_write_t s = rspq_write_begin(gpup_id, GPU_CMD_UPLOAD_VTX, 6);
rspq_write_t s = rspq_write_begin(gpup_id, GPU_CMD_UPLOAD_VTX, 5);
rspq_write_arg(&s, cache_index * PRIM_VTX_SIZE);
char* ptr = gpu_pointer + index * gpu_stride;
@ -167,11 +167,8 @@ static void upload_vertex(uint32_t index, uint8_t cache_index)
put_word(&s, vtx[2] * (1<<VTX_SHIFT),
1.0f * (1<<VTX_SHIFT));
uint8_t* col = (uint8_t*)(ptr + 12); // TODO put_byte ?
put_word(&s, col[0] << 7,
col[1] << 7);
put_word(&s, col[2] << 7,
col[3] << 7);
uint32_t* col = (uint32_t*)(ptr + 12); // TODO put_byte ?
rspq_write_arg(&s, *col);
if (gpu_texturing) {
float* tex = (float*)(ptr + 16);

View File

@ -10,7 +10,7 @@
RSPQ_DefineCommand GPUCmd_SetLong, 12 # 0x3
RSPQ_DefineCommand GPUCmd_DrawTriangle, 8 # 0x4
RSPQ_DefineCommand GPUCmd_UploadVertex, 24 # 0x5
RSPQ_DefineCommand GPUCmd_UploadVertex, 20 # 0x5
RSPQ_DefineCommand GPUCmd_MatrixLoad, 68 # 0x6
RSPQ_EndOverlayHeader
@ -131,12 +131,12 @@ GPUCmd_UploadVertex:
sub cmd_ptr, rspq_cmd_size
ldv vpos, 0, cmd_ptr # Load X, Y, Z, W
ldv vcol, 8, cmd_ptr # Load R, G, B, A
llv vtex, 16, cmd_ptr # Load U, V
llv vcol, 8, cmd_ptr # Load RGBA
llv vtex, 12, cmd_ptr # Load U, V
addi vtx, %lo(VERTEX_CACHE)
sdv vpos, PRIM_VTX_X ,vtx
sdv vcol, PRIM_VTX_R ,vtx
slv vcol, PRIM_VTX_RGBA ,vtx
slv vtex, PRIM_VTX_TEX_S ,vtx
# == matrix multiply ==
@ -333,20 +333,16 @@ GL_TnL:
#define tmp_ptr s2
#define vtx s3
#define s e0
move ra2, ra
#define v___ $v01
#define vrgba $v04
ldv vrgba.e0, PRIM_VTX_R, vtx # R + G + B + A
ldv vrgba.e4, PRIM_VTX_R, vtx # R + G + B + A
#define vtexsize $v06
#define vtexoffset $v07
#define vst $v08
move ra2, ra
llv vst, PRIM_VTX_TEX_S,vtx # S + T
suv vrgba, SCREEN_VTX_RGBA,vtx
llv vrgba, PRIM_VTX_RGBA, vtx # RGBA
llv vst, PRIM_VTX_TEX_S,vtx # S + T
li s1, %lo(GL_STATE_TEX_SIZE)
llv vtexsize.s, 0,s1
@ -375,7 +371,9 @@ GL_TnL:
#undef vst_f
#undef q
lbu t0, PRIM_VTX_TRCODE(vtx)
lbu t0, PRIM_VTX_TRCODE(vtx)
slv vrgba, SCREEN_VTX_RGBA,vtx
slv vst, SCREEN_VTX_S,vtx
#define vcspos_f $v02
#define vcspos_i $v03
@ -385,10 +383,9 @@ GL_TnL:
# Mark this vertex as having T&L applied
ori t0, 0x80
sb t0, PRIM_VTX_TRCODE(vtx)
jal GL_CalcScreenSpace
slv vst.s, SCREEN_VTX_S,vtx
sb t0, PRIM_VTX_TRCODE(vtx)
j GL_CalcClipCodes
move ra, ra2