diff --git a/misc/n64/gpu.c b/misc/n64/gpu.c index b1611871b..a62f406d1 100644 --- a/misc/n64/gpu.c +++ b/misc/n64/gpu.c @@ -144,38 +144,87 @@ static inline void put_word(rspq_write_t* s, uint16_t v1, uint16_t v2) rspq_write_arg(s, v2 | (v1 << 16)); } -static void upload_vertex(rspq_write_t* s, uint32_t index) -{ - char* ptr = gpu_pointer + index * gpu_stride; - float* vtx = (float*)(ptr + 0); - put_word(s, vtx[0] * (1<> FLT_EXPONENT_SHIFT; + + // Ignore denormal, infinity, or large exponents + if (e <= 0 || e >= 160) return 0; + + return value * scale; } +static void convert_textured_vertices(GfxResourceID vb, int count) { + struct VertexTextured* src = (struct VertexTextured*)vb; + struct rsp_vertex* dst = (struct rsp_vertex*)vb; + + for (int i = 0; i < count; i++, src++, dst++) + { + float x = src->x, y = src->y, z = src->z; + float u = src->U, v = src->V; + PackedCol rgba = src->Col; + + dst->x = F2I(x, 1<y = F2I(y, 1<z = F2I(z, 1<u = F2I(u, 1<v = F2I(v, 1<rgba = rgba; + } +} + +static void convert_coloured_vertices(GfxResourceID vb, int count) { + struct VertexColoured* src = (struct VertexColoured*)vb; + struct rsp_vertex* dst = (struct rsp_vertex*)vb; + + for (int i = 0; i < count; i++, src++, dst++) + { + float x = src->x, y = src->y, z = src->z; + PackedCol rgba = src->Col; + + dst->x = F2I(x, 1<y = F2I(y, 1<z = F2I(z, 1<u = 0; + dst->v = 0; + dst->rgba = rgba; + } +} + + static void gpuDrawArrays(uint32_t first, uint32_t count) { + uint32_t* ptr = (uint32_t*)(gpu_pointer + first * sizeof(struct rsp_vertex)); for (uint32_t i = 0; i < count; i += 4) { rspq_write_t s = rspq_write_begin(gpup_id, GPU_CMD_DRAW_QUAD, 17); rspq_write_arg(&s, 0); // padding + for (uint32_t j = 0; j < 4; j++) { - upload_vertex(&s, first + i + j); + rspq_write_arg(&s, *ptr++); + rspq_write_arg(&s, *ptr++); + rspq_write_arg(&s, *ptr++); + rspq_write_arg(&s, *ptr++); } rspq_write_end(&s); } diff --git a/misc/n64/rsp_gpu.S b/misc/n64/rsp_gpu.S index 89cbe9d0e..d969e422c 100644 --- a/misc/n64/rsp_gpu.S +++ b/misc/n64/rsp_gpu.S @@ -123,9 +123,8 @@ GPUCmd_PushRDP: .func GPUCmd_MatrixLoad GPUCmd_MatrixLoad: - #define src s6 - #define dst s7 - + #define src t0 + #define dst t1 #define vmat0_i $v02 #define vmat1_i $v03 #define vmat2_i $v04 @@ -239,14 +238,14 @@ GL_CalcScreenSpace: # GL_TnL # # Args: - # s2 = address of the vertex in DMEM (usually within VERTEX_CACHE) - # s3 = address of the vertex in DMEM (usually within VERTEX_CACHE) + # a1 = address of the vertex in DMEM (usually within VERTEX_CACHE) + # a2 = address of the vertex in DMEM (usually within VERTEX_CACHE) # ################################################################ .func GL_TnL GL_TnL: - #define vtx1 s2 - #define vtx2 s3 + #define vtx1 a1 + #define vtx2 a2 #define w e3 #define W e7 @@ -335,10 +334,6 @@ GL_TnL: .align 3 .func GPUCmd_DrawQuad GPUCmd_DrawQuad: - #define vtx_ptr a0 - #define mtx_ptr s0 - #define src_ptr s4 - #define v___ $v01 #define vst_i $v12 @@ -366,7 +361,9 @@ GPUCmd_DrawQuad: #define v1_cflags t2 #define v2_cflags t3 #define v3_cflags t4 - // t5 is used by GL_ClipTriangle + #define mtx_ptr t5 // t5 is also used by GL_ClipTriangle + #define vtx_ptr t6 + #define src_ptr t7 addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64 li vtx_ptr, %lo(VERTEX_CACHE) @@ -383,6 +380,7 @@ GPUCmd_DrawQuad: lqv vmtx1_f, 0x50,mtx_ptr lqv vmtx2_f, 0x60,mtx_ptr lqv vmtx3_f, 0x70,mtx_ptr + #undef mtx_ptr // ######################## // Vertex 0 and 1 transform @@ -407,10 +405,10 @@ GPUCmd_DrawQuad: vmudm vcspos_i, vcspos_i, K2048 vmadl vcspos_f, vcspos_f, K2048 - li t6, %lo(GL_STATE_TEX_SIZE) - lqv vtexsize, 0x00, t6 + li tmp, %lo(GL_STATE_TEX_SIZE) + lqv vtexsize, 0x00, tmp slv vcol.e0, SCREEN_VTX_RGBA + V0_OFFSET, vtx_ptr - lqv vtexoffset, 0x10, t6 + lqv vtexoffset, 0x10, tmp slv vcol.e2, SCREEN_VTX_RGBA + V1_OFFSET, vtx_ptr // Calculate and store clipping flags against CS.W. @@ -529,13 +527,13 @@ GPUCmd_DrawQuad: ldv vguardscale.e0, 0, t0 ldv vguardscale.e4, 0, t0 - li s2, %lo(VERTEX_CACHE) + V0_OFFSET + li a1, %lo(VERTEX_CACHE) + V0_OFFSET jal GL_TnL - li s3, %lo(VERTEX_CACHE) + V1_OFFSET + li a2, %lo(VERTEX_CACHE) + V1_OFFSET - li s2, %lo(VERTEX_CACHE) + V2_OFFSET + li a1, %lo(VERTEX_CACHE) + V2_OFFSET jal GL_TnL - li s3, %lo(VERTEX_CACHE) + V3_OFFSET + li a2, %lo(VERTEX_CACHE) + V3_OFFSET // ######################## // Guardband check @@ -586,6 +584,7 @@ gl_draw_clipped_triangles_loop: move vtx1, s5 lhu vtx2, 2(s1) lhu vtx3, 4(s1) + # TODO do VP transform here gl_draw_single_triangle: addi vtx1, SCREEN_VTX_X diff --git a/misc/n64/rsp_gpu_clipping.inc b/misc/n64/rsp_gpu_clipping.inc index 31bebf8d4..c7b11d41f 100644 --- a/misc/n64/rsp_gpu_clipping.inc +++ b/misc/n64/rsp_gpu_clipping.inc @@ -18,6 +18,7 @@ CACHE_OFFSETS: .half 2,4,6,8, 10,12,14,16, 18,20 .section .bss.gl_clipping + .align 4 CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE CLIP_CACHE_END: @@ -50,10 +51,10 @@ GL_ClipTriangle: #define out_list s1 #define plane s2 #define intersection s3 - #define cur_ptr s4 - #define prev_ptr s5 - #define cur_vtx s6 - #define prev_vtx s7 + #define cur_ptr a0 + #define prev_ptr a1 + #define cur_vtx a2 + #define prev_vtx a3 #define p0 k0 #define p1 k1 #define vtx1 a1 @@ -99,6 +100,11 @@ GL_ClipTriangle: sh vtx4, 6(out_list) li out_count, 4*2 +#undef vtx1 +#undef vtx2 +#undef vtx3 +#undef vtx4 + li plane, %lo(CLIP_PLANES) li plane_flag, 1 @@ -377,9 +383,6 @@ gl_clip_return: #undef prev_vtx #undef p0 #undef p1 - #undef vtx1 - #undef vtx2 - #undef vtx3 #undef vplane #undef vpos_i #undef vpos_f diff --git a/src/Graphics_N64.c b/src/Graphics_N64.c index 5e7267670..3ce56d26c 100644 --- a/src/Graphics_N64.c +++ b/src/Graphics_N64.c @@ -356,7 +356,7 @@ struct VertexBuffer { }; static struct VertexBuffer* gfx_vb; -static int vb_size; +static int vb_count, vb_fmt; static void VB_ClearCache(struct VertexBuffer* vb) { for (int i = 0; i < MAX_CACHED_CALLS; i++) @@ -430,13 +430,23 @@ void Gfx_DeleteVb(GfxResourceID* vb) { } void* Gfx_LockVb(GfxResourceID vb, VertexFormat fmt, int count) { - vb_size = count * strideSizes[fmt]; + vb_count = count; + vb_fmt = fmt; return ((struct VertexBuffer*)vb)->vertices; } void Gfx_UnlockVb(GfxResourceID vb) { VB_ClearCache(vb); // data may have changed gfx_vb = vb; + + void* ptr = ((struct VertexBuffer*)vb)->vertices; + if (vb_fmt == VERTEX_FORMAT_COLOURED) { + convert_coloured_vertices(ptr, vb_count); + data_cache_hit_writeback_invalidate(ptr, vb_count * sizeof(struct rsp_vertex)); + } else { + convert_textured_vertices(ptr, vb_count); + data_cache_hit_writeback_invalidate(ptr, vb_count * sizeof(struct rsp_vertex)); + } }