Simplify RGBA calculation, saves 4 bytes per vertex upload command

This commit is contained in:
UnknownShadow200 2025-04-30 21:10:29 +10:00
parent 34474b32aa
commit 06e09c3019
3 changed files with 17 additions and 26 deletions

View File

@ -32,14 +32,11 @@
#define PRIM_VTX_Y 18 // Object space position (16-bit) #define PRIM_VTX_Y 18 // Object space position (16-bit)
#define PRIM_VTX_Z 20 // Object space position (16-bit) #define PRIM_VTX_Z 20 // Object space position (16-bit)
#define PRIM_VTX_W 22 // Object space position (16-bit) #define PRIM_VTX_W 22 // Object space position (16-bit)
#define PRIM_VTX_R 24 #define PRIM_VTX_RGBA 24
#define PRIM_VTX_G 26 // 28,29,30,31 pad
#define PRIM_VTX_B 28
#define PRIM_VTX_A 30
#define PRIM_VTX_TEX_S 32 #define PRIM_VTX_TEX_S 32
#define PRIM_VTX_TEX_T 34 #define PRIM_VTX_TEX_T 34
#define PRIM_VTX_TEX_R 36 //36,37,38,39 pad
#define PRIM_VTX_TEX_Q 38
#define PRIM_VTX_TRCODE 40 // trivial-reject clipping flags (against -w/+w) #define PRIM_VTX_TRCODE 40 // trivial-reject clipping flags (against -w/+w)
#define PRIM_VTX_SIZE 42 #define PRIM_VTX_SIZE 42

View File

@ -157,7 +157,7 @@ static inline void put_word(rspq_write_t* s, uint16_t v1, uint16_t v2)
static void upload_vertex(uint32_t index, uint8_t cache_index) static void upload_vertex(uint32_t index, uint8_t cache_index)
{ {
rspq_write_t s = rspq_write_begin(gpup_id, GPU_CMD_UPLOAD_VTX, 6); rspq_write_t s = rspq_write_begin(gpup_id, GPU_CMD_UPLOAD_VTX, 5);
rspq_write_arg(&s, cache_index * PRIM_VTX_SIZE); rspq_write_arg(&s, cache_index * PRIM_VTX_SIZE);
char* ptr = gpu_pointer + index * gpu_stride; char* ptr = gpu_pointer + index * gpu_stride;
@ -167,11 +167,8 @@ static void upload_vertex(uint32_t index, uint8_t cache_index)
put_word(&s, vtx[2] * (1<<VTX_SHIFT), put_word(&s, vtx[2] * (1<<VTX_SHIFT),
1.0f * (1<<VTX_SHIFT)); 1.0f * (1<<VTX_SHIFT));
uint8_t* col = (uint8_t*)(ptr + 12); // TODO put_byte ? uint32_t* col = (uint32_t*)(ptr + 12); // TODO put_byte ?
put_word(&s, col[0] << 7, rspq_write_arg(&s, *col);
col[1] << 7);
put_word(&s, col[2] << 7,
col[3] << 7);
if (gpu_texturing) { if (gpu_texturing) {
float* tex = (float*)(ptr + 16); float* tex = (float*)(ptr + 16);

View File

@ -10,7 +10,7 @@
RSPQ_DefineCommand GPUCmd_SetLong, 12 # 0x3 RSPQ_DefineCommand GPUCmd_SetLong, 12 # 0x3
RSPQ_DefineCommand GPUCmd_DrawTriangle, 8 # 0x4 RSPQ_DefineCommand GPUCmd_DrawTriangle, 8 # 0x4
RSPQ_DefineCommand GPUCmd_UploadVertex, 24 # 0x5 RSPQ_DefineCommand GPUCmd_UploadVertex, 20 # 0x5
RSPQ_DefineCommand GPUCmd_MatrixLoad, 68 # 0x6 RSPQ_DefineCommand GPUCmd_MatrixLoad, 68 # 0x6
RSPQ_EndOverlayHeader RSPQ_EndOverlayHeader
@ -131,12 +131,12 @@ GPUCmd_UploadVertex:
sub cmd_ptr, rspq_cmd_size sub cmd_ptr, rspq_cmd_size
ldv vpos, 0, cmd_ptr # Load X, Y, Z, W ldv vpos, 0, cmd_ptr # Load X, Y, Z, W
ldv vcol, 8, cmd_ptr # Load R, G, B, A llv vcol, 8, cmd_ptr # Load RGBA
llv vtex, 16, cmd_ptr # Load U, V llv vtex, 12, cmd_ptr # Load U, V
addi vtx, %lo(VERTEX_CACHE) addi vtx, %lo(VERTEX_CACHE)
sdv vpos, PRIM_VTX_X ,vtx sdv vpos, PRIM_VTX_X ,vtx
sdv vcol, PRIM_VTX_R ,vtx slv vcol, PRIM_VTX_RGBA ,vtx
slv vtex, PRIM_VTX_TEX_S ,vtx slv vtex, PRIM_VTX_TEX_S ,vtx
# == matrix multiply == # == matrix multiply ==
@ -333,20 +333,16 @@ GL_TnL:
#define tmp_ptr s2 #define tmp_ptr s2
#define vtx s3 #define vtx s3
#define s e0 #define s e0
move ra2, ra
#define v___ $v01 #define v___ $v01
#define vrgba $v04 #define vrgba $v04
ldv vrgba.e0, PRIM_VTX_R, vtx # R + G + B + A
ldv vrgba.e4, PRIM_VTX_R, vtx # R + G + B + A
#define vtexsize $v06 #define vtexsize $v06
#define vtexoffset $v07 #define vtexoffset $v07
#define vst $v08 #define vst $v08
move ra2, ra
llv vst, PRIM_VTX_TEX_S,vtx # S + T llv vrgba, PRIM_VTX_RGBA, vtx # RGBA
suv vrgba, SCREEN_VTX_RGBA,vtx llv vst, PRIM_VTX_TEX_S,vtx # S + T
li s1, %lo(GL_STATE_TEX_SIZE) li s1, %lo(GL_STATE_TEX_SIZE)
llv vtexsize.s, 0,s1 llv vtexsize.s, 0,s1
@ -375,7 +371,9 @@ GL_TnL:
#undef vst_f #undef vst_f
#undef q #undef q
lbu t0, PRIM_VTX_TRCODE(vtx) lbu t0, PRIM_VTX_TRCODE(vtx)
slv vrgba, SCREEN_VTX_RGBA,vtx
slv vst, SCREEN_VTX_S,vtx
#define vcspos_f $v02 #define vcspos_f $v02
#define vcspos_i $v03 #define vcspos_i $v03
@ -385,10 +383,9 @@ GL_TnL:
# Mark this vertex as having T&L applied # Mark this vertex as having T&L applied
ori t0, 0x80 ori t0, 0x80
sb t0, PRIM_VTX_TRCODE(vtx)
jal GL_CalcScreenSpace jal GL_CalcScreenSpace
slv vst.s, SCREEN_VTX_S,vtx sb t0, PRIM_VTX_TRCODE(vtx)
j GL_CalcClipCodes j GL_CalcClipCodes
move ra, ra2 move ra, ra2