N64: WIP on in-place vertex list

This commit is contained in:
UnknownShadow200 2025-07-21 20:48:17 +10:00
parent 6658154f2d
commit 21b4fe79aa
4 changed files with 108 additions and 47 deletions

View File

@ -144,38 +144,87 @@ static inline void put_word(rspq_write_t* s, uint16_t v1, uint16_t v2)
rspq_write_arg(s, v2 | (v1 << 16));
}
static void upload_vertex(rspq_write_t* s, uint32_t index)
{
char* ptr = gpu_pointer + index * gpu_stride;
float* vtx = (float*)(ptr + 0);
put_word(s, vtx[0] * (1<<VTX_SHIFT),
vtx[1] * (1<<VTX_SHIFT));
put_word(s, vtx[2] * (1<<VTX_SHIFT),
1.0f * (1<<VTX_SHIFT));
uint32_t* col = (uint32_t*)(ptr + 12);
rspq_write_arg(s, *col);
if (gpu_texturing) {
float* tex = (float*)(ptr + 16);
put_word(s, tex[0] * (1<<TEX_SHIFT),
tex[1] * (1<<TEX_SHIFT));
} else {
put_word(s, 0,
0);
}
struct rsp_vertex {
uint16_t x, y;
uint16_t z, w; // w ignored
uint32_t rgba;
uint16_t u, v;
};
#define FLT_EXPONENT_BIAS 127
#define FLT_EXPONENT_SHIFT 23
#define FLT_EXPONENT_MASK 0x7F800000
static int F2I(float value, int scale) {
union IntAndFloat raw;
int e;
raw.f = value;
e = (raw.i & FLT_EXPONENT_MASK) >> FLT_EXPONENT_SHIFT;
// Ignore denormal, infinity, or large exponents
if (e <= 0 || e >= 160) return 0;
return value * scale;
}
static void convert_textured_vertices(GfxResourceID vb, int count) {
struct VertexTextured* src = (struct VertexTextured*)vb;
struct rsp_vertex* dst = (struct rsp_vertex*)vb;
for (int i = 0; i < count; i++, src++, dst++)
{
float x = src->x, y = src->y, z = src->z;
float u = src->U, v = src->V;
PackedCol rgba = src->Col;
dst->x = F2I(x, 1<<VTX_SHIFT);
dst->y = F2I(y, 1<<VTX_SHIFT);
dst->z = F2I(z, 1<<VTX_SHIFT);
dst->u = F2I(u, 1<<TEX_SHIFT);
dst->v = F2I(v, 1<<TEX_SHIFT);
dst->rgba = rgba;
}
}
static void convert_coloured_vertices(GfxResourceID vb, int count) {
struct VertexColoured* src = (struct VertexColoured*)vb;
struct rsp_vertex* dst = (struct rsp_vertex*)vb;
for (int i = 0; i < count; i++, src++, dst++)
{
float x = src->x, y = src->y, z = src->z;
PackedCol rgba = src->Col;
dst->x = F2I(x, 1<<VTX_SHIFT);
dst->y = F2I(y, 1<<VTX_SHIFT);
dst->z = F2I(z, 1<<VTX_SHIFT);
dst->u = 0;
dst->v = 0;
dst->rgba = rgba;
}
}
static void gpuDrawArrays(uint32_t first, uint32_t count)
{
uint32_t* ptr = (uint32_t*)(gpu_pointer + first * sizeof(struct rsp_vertex));
for (uint32_t i = 0; i < count; i += 4)
{
rspq_write_t s = rspq_write_begin(gpup_id, GPU_CMD_DRAW_QUAD, 17);
rspq_write_arg(&s, 0); // padding
for (uint32_t j = 0; j < 4; j++)
{
upload_vertex(&s, first + i + j);
rspq_write_arg(&s, *ptr++);
rspq_write_arg(&s, *ptr++);
rspq_write_arg(&s, *ptr++);
rspq_write_arg(&s, *ptr++);
}
rspq_write_end(&s);
}

View File

@ -123,9 +123,8 @@ GPUCmd_PushRDP:
.func GPUCmd_MatrixLoad
GPUCmd_MatrixLoad:
#define src s6
#define dst s7
#define src t0
#define dst t1
#define vmat0_i $v02
#define vmat1_i $v03
#define vmat2_i $v04
@ -239,14 +238,14 @@ GL_CalcScreenSpace:
# GL_TnL
#
# Args:
# s2 = address of the vertex in DMEM (usually within VERTEX_CACHE)
# s3 = address of the vertex in DMEM (usually within VERTEX_CACHE)
# a1 = address of the vertex in DMEM (usually within VERTEX_CACHE)
# a2 = address of the vertex in DMEM (usually within VERTEX_CACHE)
#
################################################################
.func GL_TnL
GL_TnL:
#define vtx1 s2
#define vtx2 s3
#define vtx1 a1
#define vtx2 a2
#define w e3
#define W e7
@ -335,10 +334,6 @@ GL_TnL:
.align 3
.func GPUCmd_DrawQuad
GPUCmd_DrawQuad:
#define vtx_ptr a0
#define mtx_ptr s0
#define src_ptr s4
#define v___ $v01
#define vst_i $v12
@ -366,7 +361,9 @@ GPUCmd_DrawQuad:
#define v1_cflags t2
#define v2_cflags t3
#define v3_cflags t4
// t5 is used by GL_ClipTriangle
#define mtx_ptr t5 // t5 is also used by GL_ClipTriangle
#define vtx_ptr t6
#define src_ptr t7
addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64
li vtx_ptr, %lo(VERTEX_CACHE)
@ -383,6 +380,7 @@ GPUCmd_DrawQuad:
lqv vmtx1_f, 0x50,mtx_ptr
lqv vmtx2_f, 0x60,mtx_ptr
lqv vmtx3_f, 0x70,mtx_ptr
#undef mtx_ptr
// ########################
// Vertex 0 and 1 transform
@ -407,10 +405,10 @@ GPUCmd_DrawQuad:
vmudm vcspos_i, vcspos_i, K2048
vmadl vcspos_f, vcspos_f, K2048
li t6, %lo(GL_STATE_TEX_SIZE)
lqv vtexsize, 0x00, t6
li tmp, %lo(GL_STATE_TEX_SIZE)
lqv vtexsize, 0x00, tmp
slv vcol.e0, SCREEN_VTX_RGBA + V0_OFFSET, vtx_ptr
lqv vtexoffset, 0x10, t6
lqv vtexoffset, 0x10, tmp
slv vcol.e2, SCREEN_VTX_RGBA + V1_OFFSET, vtx_ptr
// Calculate and store clipping flags against CS.W.
@ -529,13 +527,13 @@ GPUCmd_DrawQuad:
ldv vguardscale.e0, 0, t0
ldv vguardscale.e4, 0, t0
li s2, %lo(VERTEX_CACHE) + V0_OFFSET
li a1, %lo(VERTEX_CACHE) + V0_OFFSET
jal GL_TnL
li s3, %lo(VERTEX_CACHE) + V1_OFFSET
li a2, %lo(VERTEX_CACHE) + V1_OFFSET
li s2, %lo(VERTEX_CACHE) + V2_OFFSET
li a1, %lo(VERTEX_CACHE) + V2_OFFSET
jal GL_TnL
li s3, %lo(VERTEX_CACHE) + V3_OFFSET
li a2, %lo(VERTEX_CACHE) + V3_OFFSET
// ########################
// Guardband check
@ -586,6 +584,7 @@ gl_draw_clipped_triangles_loop:
move vtx1, s5
lhu vtx2, 2(s1)
lhu vtx3, 4(s1)
# TODO do VP transform here
gl_draw_single_triangle:
addi vtx1, SCREEN_VTX_X

View File

@ -18,6 +18,7 @@ CACHE_OFFSETS: .half 2,4,6,8, 10,12,14,16, 18,20
.section .bss.gl_clipping
.align 4
CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE
CLIP_CACHE_END:
@ -50,10 +51,10 @@ GL_ClipTriangle:
#define out_list s1
#define plane s2
#define intersection s3
#define cur_ptr s4
#define prev_ptr s5
#define cur_vtx s6
#define prev_vtx s7
#define cur_ptr a0
#define prev_ptr a1
#define cur_vtx a2
#define prev_vtx a3
#define p0 k0
#define p1 k1
#define vtx1 a1
@ -99,6 +100,11 @@ GL_ClipTriangle:
sh vtx4, 6(out_list)
li out_count, 4*2
#undef vtx1
#undef vtx2
#undef vtx3
#undef vtx4
li plane, %lo(CLIP_PLANES)
li plane_flag, 1
@ -377,9 +383,6 @@ gl_clip_return:
#undef prev_vtx
#undef p0
#undef p1
#undef vtx1
#undef vtx2
#undef vtx3
#undef vplane
#undef vpos_i
#undef vpos_f

View File

@ -356,7 +356,7 @@ struct VertexBuffer {
};
static struct VertexBuffer* gfx_vb;
static int vb_size;
static int vb_count, vb_fmt;
static void VB_ClearCache(struct VertexBuffer* vb) {
for (int i = 0; i < MAX_CACHED_CALLS; i++)
@ -430,13 +430,23 @@ void Gfx_DeleteVb(GfxResourceID* vb) {
}
void* Gfx_LockVb(GfxResourceID vb, VertexFormat fmt, int count) {
vb_size = count * strideSizes[fmt];
vb_count = count;
vb_fmt = fmt;
return ((struct VertexBuffer*)vb)->vertices;
}
void Gfx_UnlockVb(GfxResourceID vb) {
VB_ClearCache(vb); // data may have changed
gfx_vb = vb;
void* ptr = ((struct VertexBuffer*)vb)->vertices;
if (vb_fmt == VERTEX_FORMAT_COLOURED) {
convert_coloured_vertices(ptr, vb_count);
data_cache_hit_writeback_invalidate(ptr, vb_count * sizeof(struct rsp_vertex));
} else {
convert_textured_vertices(ptr, vb_count);
data_cache_hit_writeback_invalidate(ptr, vb_count * sizeof(struct rsp_vertex));
}
}