diff --git a/misc/n64/gpu.c b/misc/n64/gpu.c index d84943dab..a496eb173 100644 --- a/misc/n64/gpu.c +++ b/misc/n64/gpu.c @@ -1,4 +1,3 @@ -#include "GL/gl.h" #include "rspq.h" #include "rdpq.h" #include "rdpq_rect.h" @@ -6,14 +5,11 @@ #include "rdpq_debug.h" #include "display.h" #include "rdp.h" -#include -#include -#include #include "gl_constants.h" // This is a severely cutdown version of libdragon's OpenGL implementation -static uint32_t glp_id; +static uint32_t gpup_id; //DEFINE_RSP_UCODE(rsp_gpu); extern uint8_t _binary_build_n64_rsp_gpu_text_bin_start[]; extern uint8_t _binary_build_n64_rsp_gpu_data_bin_start[]; @@ -46,23 +42,16 @@ enum { GPU_CMD_PRE_INIT_PIPE = 0x8, }; -enum { - ATTRIB_VERTEX, - ATTRIB_COLOR, - ATTRIB_TEXCOORD, - ATTRIB_COUNT -}; - typedef struct { - GLfloat scale[3]; - GLfloat offset[3]; -} gl_viewport_t; + float scale[3]; + float offset[3]; +} gpu_viewport_t; typedef struct { int16_t i[4][4]; uint16_t f[4][4]; -} gl_matrix_srv_t; -_Static_assert(sizeof(gl_matrix_srv_t) == MATRIX_SIZE, "Matrix size does not match"); +} gpu_matrix_srv_t; +_Static_assert(sizeof(gpu_matrix_srv_t) == MATRIX_SIZE, "Matrix size does not match"); typedef struct { rspq_write_t w; @@ -71,16 +60,10 @@ typedef struct { uint32_t word; }; uint32_t buffer_head; -} gl_cmd_stream_t; +} gpu_cmd_stream_t; typedef struct { - GLsizei stride; - const GLvoid *pointer; - bool enabled; -} gl_array_t; - -typedef struct { - gl_matrix_srv_t mvp_matrix; + gpu_matrix_srv_t mvp_matrix; int16_t viewport_scale[4]; int16_t viewport_offset[4]; uint32_t flags; @@ -88,134 +71,111 @@ typedef struct { uint16_t tex_offset[2]; uint16_t tri_cmd; uint16_t tri_cull; -} __attribute__((aligned(8), packed)) gl_server_state_t; +} __attribute__((aligned(8), packed)) gpu_state; -static inline const void *gl_get_attrib_element(const gl_array_t *src, uint32_t index) +static inline gpu_cmd_stream_t gpu_cmd_stream_begin(uint32_t ovl_id, uint32_t cmd_id, int size) { - return src->pointer + index * src->stride; -} - -static inline gl_cmd_stream_t gl_cmd_stream_begin(uint32_t ovl_id, uint32_t cmd_id, int size) -{ - return (gl_cmd_stream_t) { + return (gpu_cmd_stream_t) { .w = rspq_write_begin(ovl_id, cmd_id, size), .buffer_head = 2, }; } -static inline void gl_cmd_stream_commit(gl_cmd_stream_t *s) +static inline void gpu_cmd_stream_commit(gpu_cmd_stream_t *s) { rspq_write_arg(&s->w, s->word); s->buffer_head = 0; s->word = 0; } -static inline void gl_cmd_stream_put_half(gl_cmd_stream_t *s, uint16_t v) +static inline void gpu_cmd_stream_put_half(gpu_cmd_stream_t *s, uint16_t v) { s->bytes[s->buffer_head++] = v >> 8; s->bytes[s->buffer_head++] = v & 0xFF; if (s->buffer_head == sizeof(uint32_t)) { - gl_cmd_stream_commit(s); + gpu_cmd_stream_commit(s); } } -static inline void gl_cmd_stream_end(gl_cmd_stream_t *s) +static inline void gpu_cmd_stream_end(gpu_cmd_stream_t *s) { if (s->buffer_head > 0) { - gl_cmd_stream_commit(s); + gpu_cmd_stream_commit(s); } rspq_write_end(&s->w); } __attribute__((always_inline)) -static inline void gl_set_flag_raw(uint32_t offset, uint32_t flag, bool value) +static inline void gpu_set_flag_raw(uint32_t offset, uint32_t flag, bool value) { - rspq_write(glp_id, GPU_CMD_SET_FLAG, offset | value, value ? flag : ~flag); + rspq_write(gpup_id, GPU_CMD_SET_FLAG, offset | value, value ? flag : ~flag); } __attribute__((always_inline)) -static inline void gl_set_flag(uint32_t flag, bool value) +static inline void gpu_set_flag(uint32_t flag, bool value) { - gl_set_flag_raw(offsetof(gl_server_state_t, flags), flag, value); + gpu_set_flag_raw(offsetof(gpu_state, flags), flag, value); } __attribute__((always_inline)) -static inline void gl_set_byte(uint32_t offset, uint8_t value) +static inline void gpu_set_byte(uint32_t offset, uint8_t value) { - rspq_write(glp_id, GPU_CMD_SET_BYTE, offset, value); + rspq_write(gpup_id, GPU_CMD_SET_BYTE, offset, value); } __attribute__((always_inline)) -static inline void gl_set_short(uint32_t offset, uint16_t value) +static inline void gpu_set_short(uint32_t offset, uint16_t value) { - rspq_write(glp_id, GPU_CMD_SET_SHORT, offset, value); + rspq_write(gpup_id, GPU_CMD_SET_SHORT, offset, value); } __attribute__((always_inline)) -static inline void gl_set_word(uint32_t offset, uint32_t value) +static inline void gpu_set_word(uint32_t offset, uint32_t value) { - rspq_write(glp_id, GPU_CMD_SET_WORD, offset, value); + rspq_write(gpup_id, GPU_CMD_SET_WORD, offset, value); } __attribute__((always_inline)) -static inline void gl_set_long(uint32_t offset, uint64_t value) +static inline void gpu_set_long(uint32_t offset, uint64_t value) { - rspq_write(glp_id, GPU_CMD_SET_LONG, offset, value >> 32, value & 0xFFFFFFFF); + rspq_write(gpup_id, GPU_CMD_SET_LONG, offset, value >> 32, value & 0xFFFFFFFF); } -static inline void glpipe_draw_triangle(int i0, int i1, int i2) +static inline void gpupipe_draw_triangle(int i0, int i1, int i2) { // We pass -1 because the triangle can be clipped and split into multiple // triangles. - rdpq_write(-1, glp_id, GPU_CMD_DRAW_TRI, + rdpq_write(-1, gpup_id, GPU_CMD_DRAW_TRI, (i0*PRIM_VTX_SIZE), ((i1*PRIM_VTX_SIZE)<<16) | (i2*PRIM_VTX_SIZE) ); } -static gl_viewport_t state_viewport; -static gl_array_t state_arrays[ATTRIB_COUNT]; +static gpu_viewport_t state_viewport; +static bool gpu_texturing; +static void* gpu_pointer; +static int gpu_stride; -void gl_init() -{ - glp_id = rspq_overlay_register(&rsp_gpu); - glDepthRange(0, 1); -} - -void gl_close() -{ - rspq_wait(); - rspq_overlay_unregister(glp_id); -} - -void gl_set_flag2(GLenum target, bool value) +#define GPU_ATTR_Z 0 +#define GPU_ATTR_TEX 1 +static void gpuSetFlag(int target, bool value) { switch (target) { - case GL_DEPTH_TEST: - gl_set_flag(FLAG_DEPTH_TEST, value); + case GPU_ATTR_Z: + gpu_set_flag(FLAG_DEPTH_TEST, value); break; - case GL_TEXTURE_2D: - gl_set_flag(FLAG_TEXTURE_ACTIVE, value); + case GPU_ATTR_TEX: + gpu_set_flag(FLAG_TEXTURE_ACTIVE, value); break; } } -void glEnable(GLenum target) +static void gpuSetTexSize(uint16_t width, uint16_t height) { - gl_set_flag2(target, true); -} - -void glDisable(GLenum target) -{ - gl_set_flag2(target, false); -} - -void glTexSizeN64(uint16_t width, uint16_t height) -{ - gl_set_word(offsetof(gl_server_state_t, tex_size[0]), (width << 16) | height); + gpu_set_word(offsetof(gpu_state, tex_size[0]), (width << 16) | height); } @@ -228,7 +188,7 @@ static inline void write_shorts(rspq_write_t *w, const uint16_t *s, uint32_t cou } } -static inline void gl_matrix_write(rspq_write_t *w, const GLfloat *m) +static inline void gpu_matrix_write(rspq_write_t* w, const float* m) { uint16_t integer[16]; uint16_t fraction[16]; @@ -244,125 +204,80 @@ static inline void gl_matrix_write(rspq_write_t *w, const GLfloat *m) write_shorts(w, fraction, 16); } -void glLoadMatrixf(const GLfloat *m) +static void gpuLoadMatrix(const float* m) { - rspq_write_t w = rspq_write_begin(glp_id, GPU_CMD_MATRIX_LOAD, 17); + rspq_write_t w = rspq_write_begin(gpup_id, GPU_CMD_MATRIX_LOAD, 17); rspq_write_arg(&w, false); // no multiply - gl_matrix_write(&w, m); + gpu_matrix_write(&w, m); rspq_write_end(&w); } -static void upload_vertex(const gl_array_t *arrays, uint32_t index, uint8_t cache_index) +static void upload_vertex(uint32_t index, uint8_t cache_index) { - gl_cmd_stream_t s = gl_cmd_stream_begin(glp_id, GPU_CMD_UPLOAD_VTX, 6); - gl_cmd_stream_put_half(&s, cache_index * PRIM_VTX_SIZE); + gpu_cmd_stream_t s = gpu_cmd_stream_begin(gpup_id, GPU_CMD_UPLOAD_VTX, 6); + gpu_cmd_stream_put_half(&s, cache_index * PRIM_VTX_SIZE); + char* ptr = gpu_pointer + index * gpu_stride; - const float* vtx = gl_get_attrib_element(&arrays[ATTRIB_VERTEX], index); - gl_cmd_stream_put_half(&s, vtx[0] * (1<stride = stride; - array->pointer = pointer; -} - -void glTexCoordPointer(GLint size, GLenum type, GLsizei stride, const GLvoid *pointer) -{ - gl_array_t *array = &state_arrays[ATTRIB_TEXCOORD]; - array->stride = stride; - array->pointer = pointer; -} - -void glColorPointer(GLint size, GLenum type, GLsizei stride, const GLvoid *pointer) -{ - gl_array_t *array = &state_arrays[ATTRIB_COLOR]; - array->stride = stride; - array->pointer = pointer; -} - -void gl_set_array_enabled(int array_type, bool enabled) -{ - state_arrays[array_type].enabled = enabled; -} - -void glEnableClientState(GLenum array) -{ - gl_set_array_enabled(gl_array_type_from_enum(array), true); -} - -void glDisableClientState(GLenum array) -{ - gl_set_array_enabled(gl_array_type_from_enum(array), false); -} - -void glDrawArrays(GLenum mode, GLint first, GLsizei count) -{ - rspq_write(glp_id, GPU_CMD_PRE_INIT_PIPE); - gl_rsp_draw_arrays(first, count); -} - -void glDepthRange(GLclampd n, GLclampd f) +static void gpuDepthRange(float n, float f) { state_viewport.scale[2] = (f - n) * 0.5f; state_viewport.offset[2] = n + (f - n) * 0.5f; - gl_set_short( - offsetof(gl_server_state_t, viewport_scale) + sizeof(int16_t) * 2, + gpu_set_short( + offsetof(gpu_state, viewport_scale) + sizeof(int16_t) * 2, state_viewport.scale[2] * 4); - gl_set_short( - offsetof(gl_server_state_t, viewport_offset) + sizeof(int16_t) * 2, + gpu_set_short( + offsetof(gpu_state, viewport_offset) + sizeof(int16_t) * 2, state_viewport.offset[2] * 4); } -void glViewport(GLint x, GLint y, GLsizei w, GLsizei h) +static void gpuViewport(int x, int y, int w, int h) { state_viewport.scale[0] = w * 0.5f; state_viewport.scale[1] = h * -0.5f; @@ -382,17 +297,29 @@ void glViewport(GLint x, GLint y, GLsizei w, GLsizei h) uint16_t offset_y = state_viewport.offset[1] * SCREEN_XY_SCALE; uint16_t offset_z = state_viewport.offset[2] * SCREEN_Z_SCALE; - gl_set_long( - offsetof(gl_server_state_t, viewport_scale), + gpu_set_long( + offsetof(gpu_state, viewport_scale), ((uint64_t)scale_x << 48) | ((uint64_t)scale_y << 32) | ((uint64_t)scale_z << 16)); - gl_set_long( - offsetof(gl_server_state_t, viewport_offset), + gpu_set_long( + offsetof(gpu_state, viewport_offset), ((uint64_t)offset_x << 48) | ((uint64_t)offset_y << 32) | ((uint64_t)offset_z << 16)); } -void glCullFace(GLenum mode) +static void gpuSetCullFace(bool enabled) { // 1 = cull backfaces // 2 = don't cull - gl_set_short(offsetof(gl_server_state_t, tri_cull), mode ? 1 : 2); + gpu_set_short(offsetof(gpu_state, tri_cull), enabled ? 1 : 2); +} + +void gpu_init() +{ + gpup_id = rspq_overlay_register(&rsp_gpu); + gpuDepthRange(0, 1); +} + +void gpu_close() +{ + rspq_wait(); + rspq_overlay_unregister(gpup_id); } diff --git a/src/Graphics_N64.c b/src/Graphics_N64.c index 2331f86ec..7c7c207ca 100644 --- a/src/Graphics_N64.c +++ b/src/Graphics_N64.c @@ -9,10 +9,6 @@ #include #include "../misc/n64/gpu.c" -typedef void (*GL_SetupVBFunc)(void); -static GL_SetupVBFunc gfx_setupVBFunc; - - /*########################################################################################################################* *---------------------------------------------------------General---------------------------------------------------------* *#########################################################################################################################*/ @@ -34,7 +30,7 @@ void Gfx_Create(void) { // Set alpha compare threshold rdpq_set_blend_color(RGBA32(0,0,0, 127)); - gl_init(); + gpu_init(); zbuffer = surface_alloc(FMT_RGBA16, display_get_width(), display_get_height()); Gfx.MaxTexWidth = 256; @@ -60,11 +56,9 @@ cc_bool Gfx_TryRestoreContext(void) { void Gfx_Free(void) { Gfx_FreeState(); - gl_close(); + gpu_close(); } -#define gl_Toggle(cap) if (enabled) { glEnable(cap); } else { glDisable(cap); } - /*########################################################################################################################* *-----------------------------------------------------------Misc----------------------------------------------------------* @@ -89,7 +83,7 @@ void Gfx_SetVSync(cc_bool vsync) { void Gfx_OnWindowResize(void) { } void Gfx_SetViewport(int x, int y, int w, int h) { - glViewport(x, y, w, h); + gpuViewport(x, y, w, h); } void Gfx_SetScissor(int x, int y, int w, int h) { @@ -146,7 +140,7 @@ void Gfx_BindTexture(GfxResourceID texId) { CCTexture* tex = (CCTexture*)texId; rspq_block_run(tex->upload_block); - glTexSizeN64(tex->surface.width, tex->surface.height); + gpuSetTexSize(tex->surface.width, tex->surface.height); } #define ALIGNUP8(size) (((size) + 7) & ~0x07) @@ -207,10 +201,7 @@ GfxResourceID Gfx_AllocTexture(struct Bitmap* bmp, int rowWidth, cc_uint8 flags, } void Gfx_UpdateTexture(GfxResourceID texId, int x, int y, struct Bitmap* part, int rowWidth, cc_bool mipmaps) { - // TODO: Just memcpying doesn't actually work. maybe due to glSurfaceTexImageN64 caching the RSQ upload block? - // TODO: Is there a more optimised approach than just calling glSurfaceTexImageN64 CCTexture* tex = (CCTexture*)texId; - surface_t* fb = &tex->surface; cc_uint32* src = (cc_uint32*)part->scan0 + x; cc_uint8* dst = (cc_uint8*)fb->buffer + (x * 4) + (y * fb->stride); @@ -250,7 +241,7 @@ void Gfx_DisableMipmaps(void) { } *-----------------------------------------------------State management----------------------------------------------------* *#########################################################################################################################*/ void Gfx_SetFaceCulling(cc_bool enabled) { - glCullFace(enabled ? GL_BACK : 0); + gpuSetCullFace(enabled); } static void SetAlphaBlend(cc_bool enabled) { @@ -265,7 +256,7 @@ static void SetAlphaTest(cc_bool enabled) { } static void SetColorWrite(cc_bool r, cc_bool g, cc_bool b, cc_bool a) { - //glColorMask(r, g, b, a); TODO + //gpuColorMask(r, g, b, a); TODO } void Gfx_SetDepthWrite(cc_bool enabled) { @@ -275,14 +266,12 @@ void Gfx_SetDepthWrite(cc_bool enabled) { void Gfx_SetDepthTest(cc_bool enabled) { __rdpq_mode_change_som(SOM_Z_COMPARE, enabled ? SOM_Z_COMPARE : 0); - gl_Toggle(GL_DEPTH_TEST); + gpuSetFlag(GPU_ATTR_Z, enabled); } static void Gfx_FreeState(void) { FreeDefaultResources(); } static void Gfx_RestoreState(void) { InitDefaultResources(); - glEnableClientState(GL_VERTEX_ARRAY); - glEnableClientState(GL_COLOR_ARRAY); gfx_format = -1; // 1x1 dummy white texture @@ -379,8 +368,8 @@ static rspq_block_t* VB_GetCached(struct VertexBuffer* vb, int offset, int count if (vb->cache.blocks[i]) continue; rspq_block_begin(); - gfx_setupVBFunc(); - glDrawArrays(GL_QUADS, offset, count); + gpu_pointer = gfx_vb->vertices; + gpuDrawArrays(offset, count); rspq_block_t* block = rspq_block_end(); vb->cache.blocks[i] = block; @@ -469,9 +458,10 @@ void Gfx_SetFogMode(FogFunc func) { void Gfx_DepthOnlyRendering(cc_bool depthOnly) { depthOnlyRendering = depthOnly; // TODO: Better approach? maybe using glBlendFunc instead? cc_bool enabled = !depthOnly; + //SetColorWrite(enabled & gfx_colorMask[0], enabled & gfx_colorMask[1], // enabled & gfx_colorMask[2], enabled & gfx_colorMask[3]); - if (enabled) { glEnable(GL_TEXTURE_2D); } else { glDisable(GL_TEXTURE_2D); } + gpuSetFlag(GPU_ATTR_TEX, enabled); } @@ -486,7 +476,7 @@ void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) { struct Matrix mvp __attribute__((aligned(64))); Matrix_Mul(&mvp, &_view, &_proj); - glLoadMatrixf((const float*)&mvp); + gpuLoadMatrix((const float*)&mvp); } void Gfx_LoadMVP(const struct Matrix* view, const struct Matrix* proj, struct Matrix* mvp) { @@ -494,7 +484,7 @@ void Gfx_LoadMVP(const struct Matrix* view, const struct Matrix* proj, struct Ma _view = *view; Matrix_Mul(mvp, view, proj); - glLoadMatrixf((const float*)mvp); + gpuLoadMatrix((const float*)mvp); } void Gfx_EnableTextureOffset(float x, float y) { @@ -507,35 +497,20 @@ void Gfx_DisableTextureOffset(void) { } /*########################################################################################################################* *--------------------------------------------------------Rendering--------------------------------------------------------* *#########################################################################################################################*/ -static void GL_SetupVbColoured(void) { - glVertexPointer(3, GL_FLOAT, SIZEOF_VERTEX_COLOURED, (void*)(gfx_vb->vertices + 0)); - glColorPointer(4, GL_UNSIGNED_BYTE, SIZEOF_VERTEX_COLOURED, (void*)(gfx_vb->vertices + 12)); -} - -static void GL_SetupVbTextured(void) { - glVertexPointer(3, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 0)); - glColorPointer(4, GL_UNSIGNED_BYTE, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 12)); - glTexCoordPointer(2, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 16)); -} - void Gfx_SetVertexFormat(VertexFormat fmt) { if (fmt == gfx_format) return; gfx_format = fmt; gfx_stride = strideSizes[fmt]; + gpu_stride = gfx_stride; if (fmt == VERTEX_FORMAT_TEXTURED) { - glEnableClientState(GL_TEXTURE_COORD_ARRAY); - glEnable(GL_TEXTURE_2D); - - gfx_setupVBFunc = GL_SetupVbTextured; rdpq_mode_combiner(RDPQ_COMBINER_TEX_SHADE); } else { - glDisableClientState(GL_TEXTURE_COORD_ARRAY); - glDisable(GL_TEXTURE_2D); - - gfx_setupVBFunc = GL_SetupVbColoured; rdpq_mode_combiner(RDPQ_COMBINER_SHADE); } + + gpu_texturing = fmt == VERTEX_FORMAT_TEXTURED; + gpuSetFlag(GPU_ATTR_TEX, gpu_texturing); } void Gfx_DrawVb_Lines(int verticesCount) { @@ -547,8 +522,8 @@ void Gfx_DrawVb_IndexedTris_Range(int verticesCount, int startVertex, DrawHints if (block) { rspq_block_run(block); } else { - gfx_setupVBFunc(); - glDrawArrays(GL_QUADS, startVertex, verticesCount); + gpu_pointer = gfx_vb->vertices; + gpuDrawArrays(startVertex, verticesCount); } } @@ -558,8 +533,8 @@ void Gfx_DrawVb_IndexedTris(int verticesCount) { if (block) { rspq_block_run(block); } else { - gfx_setupVBFunc(); - glDrawArrays(GL_QUADS, 0, verticesCount); + gpu_pointer = gfx_vb->vertices; + gpuDrawArrays(0, verticesCount); } } @@ -570,10 +545,8 @@ void Gfx_DrawIndexedTris_T2fC4b(int verticesCount, int startVertex) { if (block) { rspq_block_run(block); } else { - glVertexPointer(3, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices)); - glColorPointer(4, GL_UNSIGNED_BYTE, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 12)); - glTexCoordPointer(2, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 16)); - glDrawArrays(GL_QUADS, startVertex, verticesCount); + gpu_pointer = gfx_vb->vertices; + gpuDrawArrays(startVertex, verticesCount); } } #endif