diff --git a/misc/n64/Makefile b/misc/n64/Makefile index f8fcbfe9e..c243c55fd 100644 --- a/misc/n64/Makefile +++ b/misc/n64/Makefile @@ -1,23 +1,28 @@ BUILD_DIR = build-n64 -SOURCE_DIR = src +SOURCE_DIR = misc/n64 N64_ROM_TITLE = "ClassiCube" N64_ROM_RTC = true TARGET = ClassiCube-n64 -N64_MKDFS_ROOT = "misc/n64" +N64_MKDFS_ROOT = "misc/n64/files" CFILES := $(notdir $(wildcard src/*.c)) -OFILES := $(CFILES:.c=.o) +OFILES := $(CFILES:.c=.o) rsp_gpu.o OBJS := $(addprefix $(BUILD_DIR)/,$(OFILES)) CFLAGS := -Wno-error=missing-braces -Wno-error=strict-aliasing -Wno-error=incompatible-pointer-types default: $(TARGET).z64 +$(BUILD_DIR)/%.o: src/%.c + @mkdir -p $(dir $@) + @echo " [CC] $<" + $(CC) -c $(CFLAGS) -o $@ $< + include $(N64_INST)/include/n64.mk $(TARGET).z64: N64_ROM_TITLE = "ClassiCube" $(TARGET).z64: $(BUILD_DIR)/filesystem.dfs -$(BUILD_DIR)/filesystem.dfs: misc/n64/default.zip +$(BUILD_DIR)/filesystem.dfs: misc/n64/files/default.zip $(BUILD_DIR)/ClassiCube-n64.elf: $(OBJS) diff --git a/misc/n64/default.zip b/misc/n64/files/default.zip similarity index 100% rename from misc/n64/default.zip rename to misc/n64/files/default.zip diff --git a/misc/n64/gpu.c b/misc/n64/gpu.c new file mode 100644 index 000000000..b65fd63e1 --- /dev/null +++ b/misc/n64/gpu.c @@ -0,0 +1,242 @@ +#include "rspq.h" +#include "rdpq.h" +#include "rdpq_rect.h" +#include "rdpq_mode.h" +#include "rdpq_debug.h" +#include "display.h" + +// This is a severely cutdown version of libdragon's OpenGL implementation +#define VTX_SHIFT 5 +#define TEX_SHIFT 8 + +static uint32_t gpup_id; +//DEFINE_RSP_UCODE(rsp_gpu); +extern uint8_t _binary_build_n64_rsp_gpu_text_bin_start[]; +extern uint8_t _binary_build_n64_rsp_gpu_data_bin_start[]; +extern uint8_t _binary_build_n64_rsp_gpu_meta_bin_start[]; +extern uint8_t _binary_build_n64_rsp_gpu_text_bin_end[0]; +extern uint8_t _binary_build_n64_rsp_gpu_data_bin_end[0]; +extern uint8_t _binary_build_n64_rsp_gpu_meta_bin_end[0]; + +static rsp_ucode_t rsp_gpu = (rsp_ucode_t){ + .code = _binary_build_n64_rsp_gpu_text_bin_start, + .code_end = _binary_build_n64_rsp_gpu_text_bin_end, + .data = _binary_build_n64_rsp_gpu_data_bin_start, + .data_end = _binary_build_n64_rsp_gpu_data_bin_end, + .meta = _binary_build_n64_rsp_gpu_meta_bin_start, + .meta_end = _binary_build_n64_rsp_gpu_meta_bin_end, + .name = "rsp_gpu" +}; + +enum { + GPU_CMD_SET_BYTE = 0x0, + GPU_CMD_SET_SHORT = 0x1, + GPU_CMD_SET_WORD = 0x2, + GPU_CMD_SET_LONG = 0x3, + + GPU_CMD_DRAW_QUAD = 0x4, + GPU_CMD_MATRIX_LOAD = 0x5, + + GPU_CMD_PUSH_RDP = 0x6, +}; + +typedef struct { + int16_t mvp_matrix_i[4][4]; + uint16_t mvp_matrix_f[4][4]; + int16_t vp_scale[4]; + int16_t vp_offset[4]; + uint16_t tex_size[2]; + uint16_t tex_offset[2]; + uint16_t tri_cmd; + uint16_t tri_cull; +} __attribute__((aligned(8), packed)) gpu_state; + +__attribute__((always_inline)) +static inline void gpu_set_byte(uint32_t offset, uint8_t value) +{ + rspq_write(gpup_id, GPU_CMD_SET_BYTE, offset, value); +} + +__attribute__((always_inline)) +static inline void gpu_set_short(uint32_t offset, uint16_t value) +{ + rspq_write(gpup_id, GPU_CMD_SET_SHORT, offset, value); +} + +__attribute__((always_inline)) +static inline void gpu_set_word(uint32_t offset, uint32_t value) +{ + rspq_write(gpup_id, GPU_CMD_SET_WORD, offset, value); +} + +__attribute__((always_inline)) +static inline void gpu_set_long(uint32_t offset, uint64_t value) +{ + rspq_write(gpup_id, GPU_CMD_SET_LONG, offset, value >> 32, value & 0xFFFFFFFF); +} + +#define RDP_CMD_SYNC_PIPE 0xE7000000 +#define RDP_CMD_SET_BLEND_COLOR 0xF9000000 + +__attribute__((always_inline)) +static inline void gpu_push_rdp(uint32_t a1, uint64_t a2) +{ + rdpq_write(2, gpup_id, GPU_CMD_PUSH_RDP, 0, a1, a2); +} + + +static float gpu_vp_scale[3]; +static float gpu_vp_offset[3]; +static bool gpu_texturing; +static void* gpu_pointer; +static int gpu_stride; + +#define GPU_ATTR_Z (1 << 8) +#define GPU_ATTR_TEX (1 << 9) +#define GPU_ATTR_SHADE (1 << 10) +#define GPU_ATTR_EDGE (1 << 11) +static bool gpu_attr_z, gpu_attr_tex; + +static void gpuUpdateFormat(void) +{ + uint16_t cmd = 0xC000 | GPU_ATTR_SHADE | GPU_ATTR_EDGE; + + if (gpu_attr_z) cmd |= GPU_ATTR_Z; + if (gpu_attr_tex) cmd |= GPU_ATTR_TEX; + + gpu_set_short(offsetof(gpu_state, tri_cmd), cmd); +} + +static void gpuSetTexSize(uint16_t width, uint16_t height) +{ + gpu_set_word(offsetof(gpu_state, tex_size[0]), (width << 16) | height); +} + + +static inline void write_shorts(rspq_write_t *w, const uint16_t *s, uint32_t count) +{ + for (uint32_t i = 0; i < count; i += 2) + { + uint32_t packed = ((uint32_t)s[i] << 16) | (uint32_t)s[i+1]; + rspq_write_arg(w, packed); + } +} + +static inline void gpu_matrix_write(rspq_write_t* w, const float* m) +{ + uint16_t integer[16]; + uint16_t fraction[16]; + + for (uint32_t i = 0; i < 16; i++) + { + int32_t fixed = m[i] * (1<<16); + integer[i] = (uint16_t)((fixed & 0xFFFF0000) >> 16); + fraction[i] = (uint16_t)(fixed & 0x0000FFFF); + } + + write_shorts(w, integer, 16); + write_shorts(w, fraction, 16); +} + +static void gpuLoadMatrix(const float* m) +{ + rspq_write_t w = rspq_write_begin(gpup_id, GPU_CMD_MATRIX_LOAD, 17); + rspq_write_arg(&w, 0); // padding + gpu_matrix_write(&w, m); + rspq_write_end(&w); +} + +static inline void put_word(rspq_write_t* s, uint16_t v1, uint16_t v2) +{ + rspq_write_arg(s, v2 | (v1 << 16)); +} + +static void upload_vertex(rspq_write_t* s, uint32_t index) +{ + char* ptr = gpu_pointer + index * gpu_stride; + + float* vtx = (float*)(ptr + 0); + put_word(s, vtx[0] * (1< +#include +#define MATRIX_SIZE 64 +#define GUARD_BAND_FACTOR 2 + + .data + + RSPQ_BeginOverlayHeader + RSPQ_DefineCommand GPUCmd_SetByte, 8 # 0x0 + RSPQ_DefineCommand GPUCmd_SetShort, 8 # 0x1 + RSPQ_DefineCommand GPUCmd_SetWord, 8 # 0x2 + RSPQ_DefineCommand GPUCmd_SetLong, 12 # 0x3 + + RSPQ_DefineCommand GPUCmd_DrawQuad, 68 # 0x4 + RSPQ_DefineCommand GPUCmd_MatrixLoad, 68 # 0x5 + + RSPQ_DefineCommand GPUCmd_PushRDP, 12 # 0x6 + RSPQ_EndOverlayHeader + + .align 4 +BANNER0: .ascii " RSP OpenGL T&L " +BANNER1: .ascii "Rasky & Snacchus" + + RSPQ_BeginSavedState + +GL_STATE: + # This is the GL state that is also used by the pipeline. + GL_MATRIX_MVP: .ds.b MATRIX_SIZE + GL_VIEWPORT_SCALE: .half 0,0,0,0 + GL_VIEWPORT_OFFSET: .half 0,0,0,0 + GL_STATE_TEX_SIZE: .half 0,0 + GL_STATE_TEX_OFFSET: .half 0,0 + GL_TRI_CMD: .half 0 + GL_TRI_CULL: .half 0 + + RSPQ_EndSavedState + + .align 4 +CLIP_CODE_FACTORS: .half 1, 1, GUARD_BAND_FACTOR, GUARD_BAND_FACTOR +DRAW_TRI_RA: .word 0 + +#define SCREEN_VTX_CS_POSi 0 // X, Y, Z, W (all 32-bit) +#define SCREEN_VTX_CS_POSf 8 // X, Y, Z, W (all 32-bit) +#define SCREEN_VTX_X 16 +#define SCREEN_VTX_Y 18 +#define SCREEN_VTX_Z 20 +#define SCREEN_VTX_CLIP_CODE 22 +#define SCREEN_VTX_PADDING 23 +#define SCREEN_VTX_RGBA 24 +#define SCREEN_VTX_S_T 28 // 28 S, 30 T +#define SCREEN_VTX_W 32 // FIXME: this is duplicated in CS_POS +#define SCREEN_VTX_INVW 36 // 32-bit +#define SCREEN_VTX_SIZE 40 + + .bss + .align 3 +#define VERTEX_CACHE_SIZE 4 +//0-39 same as screenvtx +#define PRIM_VTX_TRCODE 40 // trivial-reject clipping flags (against -w/+w) +#define PRIM_VTX_SIZE 42 + +VERTEX_CACHE: .dcb.b PRIM_VTX_SIZE * VERTEX_CACHE_SIZE + + .text + + .func GPUCmd_SetByte +GPUCmd_SetByte: + jr ra + sb a1, %lo(GL_STATE)(a0) + .endfunc + + .func GPUCmd_SetShort +GPUCmd_SetShort: + jr ra + sh a1, %lo(GL_STATE)(a0) + .endfunc + + .func GPUCmd_SetWord +GPUCmd_SetWord: + jr ra + sw a1, %lo(GL_STATE) + 0(a0) + .endfunc + + .func GPUCmd_SetLong +GPUCmd_SetLong: + sw a2, %lo(GL_STATE) + 4(a0) + jr ra + sw a1, %lo(GL_STATE) + 0(a0) + .endfunc + + + .func GPUCmd_PushRDP +GPUCmd_PushRDP: + # RDP command is expected in a0 and a1 + move a0, a1 + move a1, a2 + + jal_and_j RDPQ_Write8, RDPQ_Finalize + .endfunc + + + .func GPUCmd_MatrixLoad +GPUCmd_MatrixLoad: + #define src s6 + #define dst s7 + + #define vrhs01_i $v02 + #define vrhs01_f $v03 + #define vrhs23_i $v04 + #define vrhs23_f $v05 + + addi src, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64 + addi dst, zero, %lo(GL_MATRIX_MVP) + + # Load the matrix from command parameters (misaligned) + lqv vrhs01_i, 0x00,src + lrv vrhs01_i, 0x10,src + lqv vrhs23_i, 0x10,src + lrv vrhs23_i, 0x20,src + lqv vrhs01_f, 0x20,src + lrv vrhs01_f, 0x30,src + lqv vrhs23_f, 0x30,src + lrv vrhs23_f, 0x40,src + + sqv vrhs01_i, 0x00,dst + sqv vrhs23_i, 0x10,dst + sqv vrhs01_f, 0x20,dst + jr ra + sqv vrhs23_f, 0x30,dst + +#undef src +#undef dst + .endfunc + + .align 3 + .func GPUCmd_DrawQuad +GPUCmd_DrawQuad: + #define vtx a0 + #define mtx_ptr s0 + #define src_ptr s4 + #define vcount s3 + + #define v___ $v01 + + #define vmtx0_i $v16 // m00 m01 m02 m03 + #define vmtx0_f $v17 + #define vmtx1_i $v18 // m10 m11 m12 m13 + #define vmtx1_f $v19 + #define vmtx2_i $v20 // m20 m21 m22 m23 + #define vmtx2_f $v21 + #define vmtx3_i $v22 // m30 m31 m32 m03 + #define vmtx3_f $v23 + + #define vpos $v24 + #define vcol $v25 + #define vtex $v26 + #define vcspos_i $v28 + #define vcspos_f $v29 + + #define x e0 + #define y e1 + #define z e2 + #define w e3 + + addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64 + li vtx, %lo(VERTEX_CACHE) + li vcount, 4 + + li mtx_ptr, %lo(GL_MATRIX_MVP) + ldv vmtx0_i.e0, 0x00,mtx_ptr + ldv vmtx1_i.e0, 0x08,mtx_ptr + ldv vmtx2_i.e0, 0x10,mtx_ptr + ldv vmtx3_i.e0, 0x18,mtx_ptr + ldv vmtx0_f.e0, 0x20,mtx_ptr + ldv vmtx1_f.e0, 0x28,mtx_ptr + ldv vmtx2_f.e0, 0x30,mtx_ptr + ldv vmtx3_f.e0, 0x38,mtx_ptr + +upload_vertex: + ldv vpos, 0, src_ptr # Load X, Y, Z, W + llv vcol, 8, src_ptr # Load RGBA + llv vtex, 12, src_ptr # Load U, V + + # matrix multiply + vmudn v___, vmtx0_f, vpos.h0 + vmadh v___, vmtx0_i, vpos.h0 + vmadn v___, vmtx1_f, vpos.h1 + vmadh v___, vmtx1_i, vpos.h1 + vmadn v___, vmtx2_f, vpos.h2 + vmadh v___, vmtx2_i, vpos.h2 + vmadn v___, vmtx3_f, vpos.h3 + vmadh vcspos_i, vmtx3_i, vpos.h3 + vmadn vcspos_f, vzero, vzero + + slv vcol, SCREEN_VTX_RGBA, vtx + slv vtex, SCREEN_VTX_S_T, vtx + + # 32-bit right shift by 5, to keep the clip space coordinates unscaled + vmudm vcspos_i, vcspos_i, vshift8.e4 + vmadl vcspos_f, vcspos_f, vshift8.e4 + + addi vcount, -1 + addi src_ptr, 16 + + sdv vcspos_i, SCREEN_VTX_CS_POSi,vtx + sdv vcspos_f, SCREEN_VTX_CS_POSf,vtx + + # Calculate and store clipping flags against CS.W. + # These will be used for trivial rejections. + vch v___, vcspos_i, vcspos_i.w + vcl v___, vcspos_f, vcspos_f.w + cfc2 t0, COP2_CTRL_VCC + andi t0, 0x707 # Isolate X/Y/Z flags + + # Compress flags to 8 bit + srl t1, t0, 5 + andi t0, 0x7 + or t0, t1 + sb t0, PRIM_VTX_TRCODE(vtx) + + bnez vcount, upload_vertex + addi vtx, PRIM_VTX_SIZE + + + # now do the actual drawing + li a1, %lo(VERTEX_CACHE) + 0*PRIM_VTX_SIZE + li a2, %lo(VERTEX_CACHE) + 1*PRIM_VTX_SIZE + jal GPUCmd_DrawTriangle + li a3, %lo(VERTEX_CACHE) + 2*PRIM_VTX_SIZE + + li a1, %lo(VERTEX_CACHE) + 0*PRIM_VTX_SIZE + li a2, %lo(VERTEX_CACHE) + 2*PRIM_VTX_SIZE + jal GPUCmd_DrawTriangle + li a3, %lo(VERTEX_CACHE) + 3*PRIM_VTX_SIZE + + j RSPQ_Loop + nop + #undef src_ptr + #undef vtx + + #undef x + #undef y + #undef z + #undef w + + #undef v___ + + #undef vmtx0_i + #undef vmtx0_f + #undef vmtx1_i + #undef vmtx1_f + #undef vmtx2_i + #undef vmtx2_f + #undef vmtx3_i + #undef vmtx3_f + + #undef vpos + #undef vcspos_i + #undef vcspos_f + + .endfunc + + ################################################################ + # GL_CalcScreenSpace + # + # Args: + # s3 = Destination vertex address + # $v02 = Clip space position (fractional part) + # $v03 = Clip space position (integer part) + # + ################################################################ + .func GL_CalcScreenSpace +GL_CalcScreenSpace: + #define dst s3 + #define vcspos_f $v02 + #define vcspos_i $v03 + #define vinvw_f $v23 + #define vinvw_i $v24 + #define vviewscale $v25 + #define vviewoff $v26 + #define vscreenpos_i $v27 + #define vscreenpos_f $v28 + #define v___ $v29 + #define w e3 + + # Calculate 32-bit inverse W + # TODO: NR? + vrcph vinvw_i.w, vcspos_i.w + vrcpl vinvw_f.w, vcspos_f.w + vrcph vinvw_i.w, vzero.e0 + + # Calculate screenspace coords + li t0, %lo(GL_VIEWPORT_SCALE) + ldv vviewscale, 0,t0 + ldv vviewoff, 8,t0 + + vmudl v___, vcspos_f, vinvw_f.w + vmadm v___, vcspos_i, vinvw_f.w + vmadn vscreenpos_f, vcspos_f, vinvw_i.w + vmadh vscreenpos_i, vcspos_i, vinvw_i.w + + vmudn vscreenpos_f, vscreenpos_f, vviewscale + vmadh vscreenpos_i, vscreenpos_i, vviewscale + vadd vscreenpos_i, vviewoff + + sdv vscreenpos_i, SCREEN_VTX_X ,dst + ssv vcspos_i.w, SCREEN_VTX_W+0 ,dst + ssv vcspos_f.w, SCREEN_VTX_W+2 ,dst + ssv vinvw_i.w, SCREEN_VTX_INVW+0,dst + ssv vinvw_f.w, SCREEN_VTX_INVW+2,dst + jr ra + sb zero, SCREEN_VTX_PADDING(dst) + + #undef dst + #undef vcspos_f + #undef vcspos_i + #undef vinvw_f + #undef vinvw_i + #undef vviewscale + #undef vviewoff + #undef vscreenpos_i + #undef vscreenpos_f + #undef v___ + #undef w + + .endfunc + + ################################################################ + # GL_CalcClipCodes + # + # Args: + # s3 = Destination vertex address + # $v02 = Clip space position (fractional part) + # $v03 = Clip space position (integer part) + # + ################################################################ + .func GL_CalcClipCodes +GL_CalcClipCodes: + #define dst s3 + #define vcspos_f $v02 + #define vcspos_i $v03 + #define vguard_f $v27 + #define vguard_i $v28 + #define v___ $v29 + #define w e3 + + li t0, %lo(CLIP_CODE_FACTORS) + ldv vguard_i, 0,t0 + + vmudn vguard_f, vcspos_f, vguard_i + vmadh vguard_i, vcspos_i, vguard_i + + vch v___, vguard_i, vguard_i.w + vcl v___, vguard_f, vguard_f.w + cfc2 t0, COP2_CTRL_VCC + andi t0, 0x707 + srl t1, t0, 5 + andi t0, 0x7 + or t0, t1 + jr ra + sb t0, SCREEN_VTX_CLIP_CODE(dst) + + #undef dst + #undef vcspos_i + #undef vcspos_f + #undef vguard_i + #undef vguard_f + #undef v___ + #undef w + + .endfunc + + ################################################################ + # GL_TnL + # + # Args: + # s3 = address of the vertex in DMEM (usually within VERTEX_CACHE) + # + ################################################################ + .func GL_TnL +GL_TnL: + #define vtx s3 + + #define v___ $v01 + #define vcspos_f $v02 + #define vcspos_i $v03 + #define vtexsize $v06 + #define vtexoffset $v07 + #define vst $v08 + #define vst_i $v28 + #define vst_f $v29 + move ra2, ra + + llv vst, SCREEN_VTX_S_T, vtx # S + T + + li t0, %lo(GL_STATE_TEX_SIZE) + llv vtexsize, 0,t0 + llv vtexoffset, 4,t0 + + # Scale texcoord by texsize and subtract offset (to correct for bilinear sampling if active) + #vmudn v___, vst, vtexsize + # vmadh vst, vtexoffset, K1 + + #vmudn v___, vst, vtexsize + #vmadh vst, vtexoffset, K1 + #vmudl vst, vst, vtexsize + + vmudh v___, vst, vtexsize + vsar vst_i, COP2_ACC_HI + vsar vst_f, COP2_ACC_MD + + vmudl vst_f, vst_f, K8192 + vmadm vst_i, vst_i, K8192 + vmadn vst, vzero, vzero + + #undef vst_i + #undef vst_f + + lbu t0, PRIM_VTX_TRCODE(vtx) + slv vst, SCREEN_VTX_S_T, vtx + + ldv vcspos_f, SCREEN_VTX_CS_POSf,vtx + ldv vcspos_i, SCREEN_VTX_CS_POSi,vtx + + # Mark this vertex as having T&L applied + ori t0, 0x80 + + jal GL_CalcScreenSpace + sb t0, PRIM_VTX_TRCODE(vtx) + + j GL_CalcClipCodes + move ra, ra2 + + #undef vcspos_f + #undef vcspos_i + #undef vtexsize + #undef vtexoffset + + #undef vtx + + #undef v___ + #undef vrgba + #undef vst + #undef s + + .endfunc + + + .func GPUCmd_DrawTriangle +GPUCmd_DrawTriangle: + #define vtx1 a1 + #define vtx2 a2 + #define vtx3 a3 + #define trcode1 t6 + #define trcode2 t7 + #define trcode3 t8 + sw ra, %lo(DRAW_TRI_RA) # TODO find a register for this + + # Trivial reject: if all the vertices are out of the same plane (at least one), + # the triangle is out of the viewport. + # NOTE: This deliberately uses lb instead of lbu so the sign bit is extended. + # The MSB of each TR-code is a bit flag that is set if the vertex has already + # had T&L applied once. + lb trcode1, PRIM_VTX_TRCODE(vtx1) + lb trcode2, PRIM_VTX_TRCODE(vtx2) + lb trcode3, PRIM_VTX_TRCODE(vtx3) + and t0, trcode1, trcode2 + and t0, trcode3 + andi t0, 0x3F + bnez t0, JrRa + nop + + # Perform T&L for each vertex if we haven't already + bgezal trcode1, GL_TnL + move s3, vtx1 + + bgezal trcode2, GL_TnL + move s3, vtx2 + + bgezal trcode3, GL_TnL + move s3, vtx3 + + lbu t0, SCREEN_VTX_CLIP_CODE(vtx1) + lbu t1, SCREEN_VTX_CLIP_CODE(vtx2) + lbu t2, SCREEN_VTX_CLIP_CODE(vtx3) + or t5, t0, t1 + or t5, t2 + + move s1, zero + beqz t5, gl_draw_single_triangle + move s2, zero + + jal GL_ClipTriangle + nop + + beqz v1, gl_draw_triangle_end + addi s2, -6 + lhu s5, 0(s1) +gl_draw_clipped_triangles_loop: + move vtx1, s5 + lhu vtx2, 2(s1) + lhu vtx3, 4(s1) + +gl_draw_single_triangle: + addi vtx1, SCREEN_VTX_X + addi vtx2, SCREEN_VTX_X + addi vtx3, SCREEN_VTX_X + + lhu a0, %lo(GL_TRI_CMD) + lh v0, %lo(GL_TRI_CULL) + jal RDPQ_Triangle + li s3, %lo(RDPQ_CMD_STAGING) + + jal RDPQ_Send + li s4, %lo(RDPQ_CMD_STAGING) + + blt s1, s2, gl_draw_clipped_triangles_loop + addi s1, 2 + +gl_draw_triangle_end: + lw ra, %lo(DRAW_TRI_RA) + jr ra + nop + + #undef vtx1 + #undef vtx2 + #undef vtx3 + .endfunc + +#include "rsp_gpu_clipping.inc" +#include diff --git a/misc/n64/rsp_gpu_clipping.inc b/misc/n64/rsp_gpu_clipping.inc new file mode 100644 index 000000000..f406650fb --- /dev/null +++ b/misc/n64/rsp_gpu_clipping.inc @@ -0,0 +1,380 @@ +#define CLIPPING_PLANE_COUNT 6 +#define CLIPPING_CACHE_SIZE 9 +#define CLIPPING_PLANE_SIZE 8 + + .section .data.gl_clipping + + .align 4 +CLIP_PLANES: + .half 1, 0, 0, GUARD_BAND_FACTOR + .half 0, 1, 0, GUARD_BAND_FACTOR + .half 0, 0, 1, 1 + .half 1, 0, 0, -GUARD_BAND_FACTOR + .half 0, 1, 0, -GUARD_BAND_FACTOR + .half 0, 0, 1, -1 + + .align 4 +CACHE_OFFSETS: .half 2,4,6,8,10,12,14,16,18 + + .section .bss.gl_clipping + +CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE +CLIP_CACHE_END: + +CLIP_LISTS: + CLIP_LIST0: .dcb.w CLIPPING_CACHE_SIZE + CLIP_LIST1: .dcb.w CLIPPING_CACHE_SIZE + + + .section .text.gl_clipping + + ################################################################ + # GL_ClipTriangle + # Clip a triangle against the view-frustum by using the Sutherland-Hodgman algorithm + # https://en.wikipedia.org/wiki/Sutherland%E2%80%93Hodgman_algorithm + # Args: + # a1-a3 = Vertices + # t5 = OR'd clip flags of the triangle's vertices + # Returns: + # s1 = Pointer to list of output vertices + # s2 = Pointer to end of list + ################################################################ + .func GL_ClipTriangle +GL_ClipTriangle: + #define out_count v1 + #define clip_flags t5 + #define plane_flag t6 + #define in_count t7 + #define in_end t8 + #define in_list s0 + #define out_list s1 + #define plane s2 + #define intersection s3 + #define cur_ptr s4 + #define prev_ptr s5 + #define cur_vtx s6 + #define prev_vtx s7 + #define p0 k0 + #define p1 k1 + #define vtx1 a1 + #define vtx2 a2 + #define vtx3 a3 + + #define vplane $v01 + #define vint_f $v02 + #define vint_i $v03 + #define vdot_i $v04 + #define vdot_f $v05 + #define vdiff_i $v06 + #define vdiff_f $v07 + #define va_i $v08 + #define va_f $v09 + #define vpos_i $v10 + #define vpos_f $v11 + #define vattr0 $v12 + #define vattr1 $v13 + #define voff0 $v14 + #define voff1 $v15 + #define vcache0 $v16 + #define vcache1 $v17 + #define v__ $v29 + + move ra2, ra + + # Init in_list as empty + li in_list, %lo(CLIP_LIST0) + move in_count, zero + + # Put three original vertices in the out_list + # (So after the initial swap they will be in the in_list) + li out_list, %lo(CLIP_LIST1) + sh vtx1, 0(out_list) + sh vtx2, 2(out_list) + sh vtx3, 4(out_list) + li out_count, 3*2 + + li plane, %lo(CLIP_PLANES) + li plane_flag, 1 + + # Load cache offsets + li t0, %lo(CACHE_OFFSETS) + vxor voff1, voff1 + lqv voff0, 0,t0 + lsv voff1, 16,t0 + + # Temporarily use the RDP staging area as a map of which cache slots are used + # Init to zero + li t0, %lo(RDPQ_CMD_STAGING) + sqv vzero, 0,t0 + sqv vzero, 16,t0 + + # Iterate over the 6 clipping planes +gl_clip_plane_loop: + and t0, clip_flags, plane_flag + beqz t0, gl_clip_plane_loop_end + move t1, in_list + + # Swap in and out lists + + # If the out list is empty from the last iteration, + # the triangle has no visible points and we are done + beqz out_count, gl_clip_return + move in_list, out_list + move out_list, t1 + move in_count, out_count + move out_count, zero + + # Iterate over the egdes of the polygon in the input list + # The current edge is between cur_vtx and prev_vtx + move cur_ptr, in_list + add in_end, in_list, in_count + # Init the "previous" vertex to the last in the list for the wrap-around + addi prev_ptr, in_end, -2 + +gl_clip_edge_loop: + #define cur_flag t3 + #define prev_flag t4 + + # Check which side of the plane the two vertices are on + lhu cur_vtx, 0(cur_ptr) + lhu prev_vtx, 0(prev_ptr) + lbu cur_flag, SCREEN_VTX_CLIP_CODE(cur_vtx) + lbu prev_flag, SCREEN_VTX_CLIP_CODE(prev_vtx) + and cur_flag, plane_flag + and prev_flag, plane_flag + + # If they are on opposite sides, there is an intersection + xor t0, cur_flag, prev_flag + beqz t0, gl_clip_no_intersection + move p0, cur_vtx + + # Swap the two points if necessary to make intersection calculation consistent + # This will make sure p0 is always inside and p1 is always outside + bnez prev_flag, gl_clip_no_swap + move p1, prev_vtx + xor p0, p0, p1 + xor p1, p0, p1 + xor p0, p0, p1 + + #undef prev_flag + +gl_clip_no_swap: + # Calculate intersection of the line segment and the plane + + li t0, %lo(RDPQ_CMD_STAGING) + lqv vcache0, 0,t0 + lqv vcache1, 16,t0 + + # Repeat plane coefficients twice + ldv vplane.e0, 0,plane + ldv vplane.e4, 0,plane + + # vpos: x0 y0 z0 w0 x1 y1 z1 w1 + ldv vpos_i.e0, SCREEN_VTX_CS_POSi,p0 + ldv vpos_f.e0, SCREEN_VTX_CS_POSf,p0 + ldv vpos_i.e4, SCREEN_VTX_CS_POSi,p1 + ldv vpos_f.e4, SCREEN_VTX_CS_POSf,p1 + + # vint: x1 y1 z1 w1 + ldv vint_i.e0, SCREEN_VTX_CS_POSi,p1 + ldv vint_f.e0, SCREEN_VTX_CS_POSf,p1 + + # vattr0: r0 g0 b0 a0 s0 t0 + luv vattr0.e0, SCREEN_VTX_RGBA ,p0 + llv vattr0.e4, SCREEN_VTX_S_T ,p0 + + # vattr1: r1 g1 b1 a1 s1 t1 + luv vattr1.e0, SCREEN_VTX_RGBA ,p1 + llv vattr1.e4, SCREEN_VTX_S_T ,p1 + + # Find first free slot in clip cache + + # Add the values from the "used slots map" to the cache offsets + # After this, each lane will contain the offset of its corresponding cache slot, + # but only if the slot is not used. If it is used, it will contain some large value. + vaddc vcache0, voff0 + vaddc vcache1, voff1 + + # Look for the smallest value, which will end up in vcache.e0 + # Because used slots are marked as large values, they will never be found. + vlt vcache0, vcache0.q1 + vlt vcache0, vcache0.h2 + vlt vcache0, vcache0.e4 + vlt vcache0, vcache1.e0 + + mfc2 t0, vcache0.e0 + + # Mark slot as used by storing some large value (careful of overflows!) + li t1, 0xFF + sh t1, %lo(RDPQ_CMD_STAGING)-2(t0) + + # t0 is the index multiplied by 2 + # intersection = t0 * 20 = t0 * 16 + t0 * 4 + sll intersection, t0, 4 + sll t1, t0, 2 + add intersection, t1 + + # CAUTION: intersection might point to the same address as either p0 or p1, + # because one of them is the previous point, which could have been marked unused + # in the previous iteration. As long as we don't access p0 or p1 after writing to + # intersection, this is fine. + addi intersection, %lo(CLIP_CACHE) - SCREEN_VTX_SIZE + + # Store the cache offset in unused memory (used later when finding the cache slot to mark as unused) + sb t0, SCREEN_VTX_PADDING(intersection) + + # Compute dot products of both positions with the clip plane + # vdot.e0: d0 = dot(p0, plane) + # vdot.e4: d1 = dot(p1, plane) + vmudn vdot_f, vpos_f, vplane + vmadh vdot_i, vpos_i, vplane + vaddc vdot_f, vdot_f.q1 + vadd vdot_i, vdot_i.q1 + vaddc vdot_f, vdot_f.h2 + vadd vdot_i, vdot_i.h2 + + # d0 - d1 + vsubc vdiff_f, vdot_f, vdot_f.e4 + vsub vdiff_i, vdot_i, vdot_i.e4 + + # 1 / (d0 - d1) + vrcph v__.e0, vdiff_i.e0 + vrcpl va_f.e0, vdiff_f.e0 + vrcph va_i.e0, vzero.e0 + + # a = d0 / (d0 - d1) + vmudl v__, va_f, vdot_f.e0 + vmadm v__, va_i, vdot_f.e0 + vmadn va_f, va_f, vdot_i.e0 + + # Prepare 0x7FFF in va_i.e0 + vsubc va_i, vshift8, K1 + + # a = min(a, 1) + vge v__, va_f, vzero + vmrg va_f, va_f, va_i.e0 + + # Account for right shift introduced by vrcp + vmudn va_f, va_f, K2 + + # p1 - p0 + vsubc vint_f, vpos_f + vsub vint_i, vpos_i + # attr1 - attr0 + vsubc vattr1, vattr0 + + # Result of linear interpolation: + # p0 + a * (p1 - p0) + vmudl v__, vint_f, va_f.e0 + vmadm v__, vint_i, va_f.e0 + vmadn vint_f, vpos_f, K1 + vmadh vint_i, vpos_i, K1 + + # a * (attr1 - attr0) + vmudm vattr1, vattr1, va_f.e0 + + # attr0 + a * (attr1 - attr0) + vaddc vattr0, vattr1 + + # Store results + sdv vint_i.e0, SCREEN_VTX_CS_POSi,intersection + sdv vint_f.e0, SCREEN_VTX_CS_POSf,intersection + suv vattr0.e0, SCREEN_VTX_RGBA ,intersection + jal GL_CalcClipCodes + slv vattr0.e4, SCREEN_VTX_S_T ,intersection + + # Add intersection to the output list + add t0, out_list, out_count + sh intersection, 0(t0) + addi out_count, 2 + +gl_clip_no_intersection: + # If cur_vtx is inside, add it to the output list + bnez cur_flag, gl_clip_no_current + add t0, out_list, out_count + sh cur_vtx, 0(t0) + b gl_clip_edge_loop_end + addi out_count, 2 + + #undef cur_flag + +gl_clip_no_current: + # Check if the vertex is stored in the clip cache + lbu t0, SCREEN_VTX_PADDING(cur_vtx) + beqz t0, gl_clip_edge_loop_end + # Reset the padding field to zero, so the screen space values won't be recalculated below + sb zero, SCREEN_VTX_PADDING(cur_vtx) + # If so, mark it as unused + sh zero, %lo(RDPQ_CMD_STAGING)-2(t0) + +gl_clip_edge_loop_end: + # Advance to the next edge + addi cur_ptr, 2 + blt cur_ptr, in_end, gl_clip_edge_loop + addi prev_ptr, cur_ptr, -2 + +gl_clip_plane_loop_end: + # Advance to the next clipping plane + sll plane_flag, 1 + blt plane_flag, (1< -#include -#include #include - -typedef void (*GL_SetupVBFunc)(void); -static GL_SetupVBFunc gfx_setupVBFunc; - +#include +#include "../misc/n64/gpu.c" /*########################################################################################################################* *---------------------------------------------------------General---------------------------------------------------------* *#########################################################################################################################*/ static surface_t zbuffer; +static GfxResourceID white_square; void Gfx_Create(void) { - gl_init(); + rspq_init(); + //rspq_profile_start(); + rdpq_init(); //rdpq_debug_start(); // TODO debug //rdpq_debug_log(true); + + rdpq_set_mode_standard(); + __rdpq_mode_change_som(SOM_TEXTURE_PERSP, SOM_TEXTURE_PERSP); + __rdpq_mode_change_som(SOM_ZMODE_MASK, SOM_ZMODE_OPAQUE); + rdpq_mode_dithering(DITHER_SQUARE_SQUARE); + + gpu_init(); + + // Set alpha compare threshold + gpu_push_rdp(RDP_CMD_SYNC_PIPE, 0); + gpu_push_rdp(RDP_CMD_SET_BLEND_COLOR, (0 << 24) | (0 << 16) | (0 << 8) | 127); + zbuffer = surface_alloc(FMT_RGBA16, display_get_width(), display_get_height()); Gfx.MaxTexWidth = 256; @@ -36,6 +47,9 @@ void Gfx_Create(void) { Gfx.SupportsNonPowTwoTextures = true; Gfx_RestoreState(); + + Gfx_SetFaceCulling(false); + Gfx_SetViewport(0, 0, Game.Width, Game.Height); } cc_bool Gfx_TryRestoreContext(void) { @@ -44,11 +58,9 @@ cc_bool Gfx_TryRestoreContext(void) { void Gfx_Free(void) { Gfx_FreeState(); - gl_close(); + gpu_close(); } -#define gl_Toggle(cap) if (enabled) { glEnable(cap); } else { glDisable(cap); } - /*########################################################################################################################* *-----------------------------------------------------------Misc----------------------------------------------------------* @@ -73,21 +85,17 @@ void Gfx_SetVSync(cc_bool vsync) { void Gfx_OnWindowResize(void) { } void Gfx_SetViewport(int x, int y, int w, int h) { - glViewport(x, Game.Height - h - y, w, h); -} -void Gfx_SetScissor (int x, int y, int w, int h) { - cc_bool enabled = x != 0 || y != 0 || w != Game.Width || h != Game.Height; - if (enabled) { glEnable(GL_SCISSOR_TEST); } else { glDisable(GL_SCISSOR_TEST); } - - glScissor(x, Game.Height - h - y, w, h); + gpuViewport(x, y, w, h); } +void Gfx_SetScissor(int x, int y, int w, int h) { + rdpq_set_scissor(x, y, x + w, y + h); +} void Gfx_BeginFrame(void) { surface_t* disp = display_get(); rdpq_attach(disp, &zbuffer); - gl_context_begin(); Platform_LogConst("GFX ctx beg"); } @@ -113,9 +121,11 @@ void Gfx_ClearColor(PackedCol color) { void Gfx_EndFrame(void) { Platform_LogConst("GFX ctx end"); - gl_context_end(); rdpq_detach_show(); -//Platform_LogConst("GFX END"); + //Platform_LogConst("GFX END"); + + //rspq_profile_dump(); + //rspq_profile_next_frame(); } @@ -124,14 +134,32 @@ void Gfx_EndFrame(void) { *#########################################################################################################################*/ typedef struct CCTexture { surface_t surface; - GLuint textureID; + rspq_block_t* upload_block; } CCTexture; +void Gfx_BindTexture(GfxResourceID texId) { + if (!texId) texId = white_square; + CCTexture* tex = (CCTexture*)texId; + + rspq_block_run(tex->upload_block); + gpuSetTexSize(tex->surface.width, tex->surface.height); +} + #define ALIGNUP8(size) (((size) + 7) & ~0x07) // A8 B8 G8 R8 > A1 B5 G5 B5 #define To16BitPixel(src) \ - ((src & 0x80) >> 7) | ((src & 0xF800) >> 10) | ((src & 0xF80000) >> 13) | ((src & 0xF8000000) >> 16); + ((src & 0x80) >> 7) | ((src & 0xF800) >> 10) | ((src & 0xF80000) >> 13) | ((src & 0xF8000000) >> 16); + +static void UploadTexture(CCTexture* tex, rdpq_texparms_t* params) { + rspq_block_begin(); + + rdpq_tex_multi_begin(); + rdpq_tex_upload(TILE0, &tex->surface, params); + rdpq_tex_multi_end(); + + tex->upload_block = rspq_block_end(); +} GfxResourceID Gfx_AllocTexture(struct Bitmap* bmp, int rowWidth, cc_uint8 flags, cc_bool mipmaps) { cc_bool bit16 = flags & TEXTURE_FLAG_LOWRES; @@ -141,15 +169,8 @@ GfxResourceID Gfx_AllocTexture(struct Bitmap* bmp, int rowWidth, cc_uint8 flags, if (pitch * bmp->height > 4096) return 0; CCTexture* tex = Mem_Alloc(1, sizeof(CCTexture), "texture"); - - glGenTextures(1, &tex->textureID); - glBindTexture(GL_TEXTURE_2D, tex->textureID); - // NOTE: Enabling these fixes textures, but seems to break on cen64 - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, mipmaps ? GL_LINEAR : GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, mipmaps ? GL_LINEAR : GL_NEAREST); - - tex->surface = surface_alloc(bit16 ? FMT_RGBA16 : FMT_RGBA32, bmp->width, bmp->height); - surface_t* fb = &tex->surface; + tex->surface = surface_alloc(bit16 ? FMT_RGBA16 : FMT_RGBA32, bmp->width, bmp->height); + surface_t* fb = &tex->surface; if (bit16) { cc_uint32* src = (cc_uint32*)bmp->scan0; @@ -172,33 +193,17 @@ GfxResourceID Gfx_AllocTexture(struct Bitmap* bmp, int rowWidth, cc_uint8 flags, bmp, rowWidth * BITMAPCOLOR_SIZE); } - rdpq_texparms_t params = { .s.repeats = (flags & TEXTURE_FLAG_NONPOW2) ? 1 : REPEAT_INFINITE, .t.repeats = (flags & TEXTURE_FLAG_NONPOW2) ? 1 : REPEAT_INFINITE, }; - - // rdpq_tex_upload(TILE0, &tex->surface, ¶ms); - glSurfaceTexImageN64(GL_TEXTURE_2D, 0, fb, ¶ms); + UploadTexture(tex, ¶ms); return tex; } -void Gfx_BindTexture(GfxResourceID texId) { - CCTexture* tex = (CCTexture*)texId; - GLuint glID = tex ? tex->textureID : 0; - //Platform_Log1("BIND: %i", &glID); - - //rdpq_debug_log(true); - glBindTexture(GL_TEXTURE_2D, glID); - // rdpq_debug_log(false); -} - void Gfx_UpdateTexture(GfxResourceID texId, int x, int y, struct Bitmap* part, int rowWidth, cc_bool mipmaps) { - // TODO: Just memcpying doesn't actually work. maybe due to glSurfaceTexImageN64 caching the RSQ upload block? - // TODO: Is there a more optimised approach than just calling glSurfaceTexImageN64 CCTexture* tex = (CCTexture*)texId; - surface_t* fb = &tex->surface; cc_uint32* src = (cc_uint32*)part->scan0 + x; cc_uint8* dst = (cc_uint8*)fb->buffer + (x * 4) + (y * fb->stride); @@ -210,21 +215,22 @@ void Gfx_UpdateTexture(GfxResourceID texId, int x, int y, struct Bitmap* part, i part->width * 4); } - - glBindTexture(GL_TEXTURE_2D, tex->textureID); rdpq_texparms_t params = (rdpq_texparms_t){ .s.repeats = REPEAT_INFINITE, .t.repeats = REPEAT_INFINITE, }; - // rdpq_tex_upload(TILE0, &tex->surface, ¶ms); - glSurfaceTexImageN64(GL_TEXTURE_2D, 0, fb, ¶ms); + + rdpq_call_deferred((void (*)(void*))rspq_block_free, tex->upload_block); + UploadTexture(tex, ¶ms); } void Gfx_DeleteTexture(GfxResourceID* texId) { CCTexture* tex = (CCTexture*)(*texId); if (!tex) return; - glDeleteTextures(1, &tex->textureID); + if (tex->upload_block) rdpq_call_deferred((void (*)(void*))rspq_block_free, tex->upload_block); + surface_free(&tex->surface); + Mem_Free(tex); *texId = NULL; } @@ -236,29 +242,46 @@ void Gfx_DisableMipmaps(void) { } /*########################################################################################################################* *-----------------------------------------------------State management----------------------------------------------------* *#########################################################################################################################*/ -void Gfx_SetFaceCulling(cc_bool enabled) { gl_Toggle(GL_CULL_FACE); } -static void SetAlphaBlend(cc_bool enabled) { gl_Toggle(GL_BLEND); } -void Gfx_SetAlphaArgBlend(cc_bool enabled) { } - -static void SetColorWrite(cc_bool r, cc_bool g, cc_bool b, cc_bool a) { - //glColorMask(r, g, b, a); TODO +void Gfx_SetFaceCulling(cc_bool enabled) { + gpuSetCullFace(enabled); } -void Gfx_SetDepthWrite(cc_bool enabled) { glDepthMask(enabled); } -void Gfx_SetDepthTest(cc_bool enabled) { gl_Toggle(GL_DEPTH_TEST); } +static void SetAlphaBlend(cc_bool enabled) { + rdpq_mode_blender(enabled ? RDPQ_BLENDER_MULTIPLY : 0); + __rdpq_mode_change_som(SOM_ZMODE_MASK, enabled ? SOM_ZMODE_TRANSPARENT : SOM_ZMODE_OPAQUE); +} + +void Gfx_SetAlphaArgBlend(cc_bool enabled) { } + +static void SetAlphaTest(cc_bool enabled) { + __rdpq_mode_change_som(SOM_ALPHACOMPARE_MASK, enabled ? SOM_ALPHACOMPARE_THRESHOLD : 0); +} + +static void SetColorWrite(cc_bool r, cc_bool g, cc_bool b, cc_bool a) { + //gpuColorMask(r, g, b, a); TODO +} + +void Gfx_SetDepthWrite(cc_bool enabled) { + __rdpq_mode_change_som(SOM_Z_WRITE, enabled ? SOM_Z_WRITE : 0); +} + +void Gfx_SetDepthTest(cc_bool enabled) { + __rdpq_mode_change_som(SOM_Z_COMPARE, enabled ? SOM_Z_COMPARE : 0); + + gpu_attr_z = enabled; + gpuUpdateFormat(); +} static void Gfx_FreeState(void) { FreeDefaultResources(); } static void Gfx_RestoreState(void) { InitDefaultResources(); - glEnableClientState(GL_VERTEX_ARRAY); - glEnableClientState(GL_COLOR_ARRAY); gfx_format = -1; - - glHint(GL_FOG_HINT, GL_NICEST); - glAlphaFunc(GL_GREATER, 0.5f); - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glDepthFunc(GL_LESS); - //glEnable(GL_RDPQ_TEXTURING_N64); + + // 1x1 dummy white texture + struct Bitmap bmp; + BitmapCol pixels[1] = { BITMAPCOLOR_WHITE }; + Bitmap_Init(bmp, 1, 1, pixels); + white_square = Gfx_CreateTexture(&bmp, 0, false); } cc_bool Gfx_WarnIfNecessary(void) { return false; } @@ -348,8 +371,8 @@ static rspq_block_t* VB_GetCached(struct VertexBuffer* vb, int offset, int count if (vb->cache.blocks[i]) continue; rspq_block_begin(); - gfx_setupVBFunc(); - glDrawArrays(GL_QUADS, offset, count); + gpu_pointer = gfx_vb->vertices; + gpuDrawArrays(offset, count); rspq_block_t* block = rspq_block_end(); vb->cache.blocks[i] = block; @@ -435,80 +458,64 @@ void Gfx_SetFogEnd(float value) { void Gfx_SetFogMode(FogFunc func) { } -static void SetAlphaTest(cc_bool enabled) { - if (enabled) { glEnable(GL_ALPHA_TEST); } else { glDisable(GL_ALPHA_TEST); } -} - void Gfx_DepthOnlyRendering(cc_bool depthOnly) { depthOnlyRendering = depthOnly; // TODO: Better approach? maybe using glBlendFunc instead? cc_bool enabled = !depthOnly; + //SetColorWrite(enabled & gfx_colorMask[0], enabled & gfx_colorMask[1], // enabled & gfx_colorMask[2], enabled & gfx_colorMask[3]); - if (enabled) { glEnable(GL_TEXTURE_2D); } else { glDisable(GL_TEXTURE_2D); } + gpu_attr_tex = enabled; + gpuUpdateFormat(); } /*########################################################################################################################* *---------------------------------------------------------Matrices--------------------------------------------------------* *#########################################################################################################################*/ -static GLenum matrix_modes[3] = { GL_PROJECTION, GL_MODELVIEW, GL_TEXTURE }; -static int lastMatrix; +static struct Matrix _view, _proj; void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) { - if (type != lastMatrix) { lastMatrix = type; glMatrixMode(matrix_modes[type]); } + if (type == MATRIX_VIEW) _view = *matrix; + if (type == MATRIX_PROJ) _proj = *matrix; - if (matrix == &Matrix_Identity) { - glLoadIdentity(); - } else { - glLoadMatrixf((const float*)matrix); - } + struct Matrix mvp __attribute__((aligned(64))); + Matrix_Mul(&mvp, &_view, &_proj); + gpuLoadMatrix((const float*)&mvp); } void Gfx_LoadMVP(const struct Matrix* view, const struct Matrix* proj, struct Matrix* mvp) { - Gfx_LoadMatrix(MATRIX_VIEW, view); - Gfx_LoadMatrix(MATRIX_PROJ, proj); + _proj = *proj; + _view = *view; + Matrix_Mul(mvp, view, proj); + gpuLoadMatrix((const float*)mvp); } -static struct Matrix texMatrix = Matrix_IdentityValue; void Gfx_EnableTextureOffset(float x, float y) { - texMatrix.row4.x = x; texMatrix.row4.y = y; - Gfx_LoadMatrix(2, &texMatrix); + // TODO } -void Gfx_DisableTextureOffset(void) { Gfx_LoadMatrix(2, &Matrix_Identity); } +void Gfx_DisableTextureOffset(void) { } /*########################################################################################################################* *--------------------------------------------------------Rendering--------------------------------------------------------* *#########################################################################################################################*/ -static void GL_SetupVbColoured(void) { - glVertexPointer(3, GL_FLOAT, SIZEOF_VERTEX_COLOURED, (void*)(gfx_vb->vertices + 0)); - glColorPointer(4, GL_UNSIGNED_BYTE, SIZEOF_VERTEX_COLOURED, (void*)(gfx_vb->vertices + 12)); -} - -static void GL_SetupVbTextured(void) { - glVertexPointer(3, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 0)); - glColorPointer(4, GL_UNSIGNED_BYTE, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 12)); - glTexCoordPointer(2, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 16)); -} - void Gfx_SetVertexFormat(VertexFormat fmt) { if (fmt == gfx_format) return; gfx_format = fmt; gfx_stride = strideSizes[fmt]; + gpu_stride = gfx_stride; if (fmt == VERTEX_FORMAT_TEXTURED) { - glEnableClientState(GL_TEXTURE_COORD_ARRAY); - glEnable(GL_TEXTURE_2D); - - gfx_setupVBFunc = GL_SetupVbTextured; + rdpq_mode_combiner(RDPQ_COMBINER_TEX_SHADE); } else { - glDisableClientState(GL_TEXTURE_COORD_ARRAY); - glDisable(GL_TEXTURE_2D); - - gfx_setupVBFunc = GL_SetupVbColoured; + rdpq_mode_combiner(RDPQ_COMBINER_SHADE); } + + gpu_texturing = fmt == VERTEX_FORMAT_TEXTURED; + gpu_attr_tex = gpu_texturing; + gpuUpdateFormat(); } void Gfx_DrawVb_Lines(int verticesCount) { @@ -520,8 +527,8 @@ void Gfx_DrawVb_IndexedTris_Range(int verticesCount, int startVertex, DrawHints if (block) { rspq_block_run(block); } else { - gfx_setupVBFunc(); - glDrawArrays(GL_QUADS, startVertex, verticesCount); + gpu_pointer = gfx_vb->vertices; + gpuDrawArrays(startVertex, verticesCount); } } @@ -531,8 +538,8 @@ void Gfx_DrawVb_IndexedTris(int verticesCount) { if (block) { rspq_block_run(block); } else { - gfx_setupVBFunc(); - glDrawArrays(GL_QUADS, 0, verticesCount); + gpu_pointer = gfx_vb->vertices; + gpuDrawArrays(0, verticesCount); } } @@ -543,10 +550,8 @@ void Gfx_DrawIndexedTris_T2fC4b(int verticesCount, int startVertex) { if (block) { rspq_block_run(block); } else { - glVertexPointer(3, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices)); - glColorPointer(4, GL_UNSIGNED_BYTE, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 12)); - glTexCoordPointer(2, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 16)); - glDrawArrays(GL_QUADS, startVertex, verticesCount); + gpu_pointer = gfx_vb->vertices; + gpuDrawArrays(startVertex, verticesCount); } } #endif