mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-10 16:03:15 -04:00
Merge pull request #1356 from ClassiCube/N64Optim
Optimise GPU backend for N64
This commit is contained in:
commit
8b36940221
@ -1,23 +1,28 @@
|
|||||||
BUILD_DIR = build-n64
|
BUILD_DIR = build-n64
|
||||||
SOURCE_DIR = src
|
SOURCE_DIR = misc/n64
|
||||||
N64_ROM_TITLE = "ClassiCube"
|
N64_ROM_TITLE = "ClassiCube"
|
||||||
N64_ROM_RTC = true
|
N64_ROM_RTC = true
|
||||||
TARGET = ClassiCube-n64
|
TARGET = ClassiCube-n64
|
||||||
N64_MKDFS_ROOT = "misc/n64"
|
N64_MKDFS_ROOT = "misc/n64/files"
|
||||||
|
|
||||||
CFILES := $(notdir $(wildcard src/*.c))
|
CFILES := $(notdir $(wildcard src/*.c))
|
||||||
OFILES := $(CFILES:.c=.o)
|
OFILES := $(CFILES:.c=.o) rsp_gpu.o
|
||||||
OBJS := $(addprefix $(BUILD_DIR)/,$(OFILES))
|
OBJS := $(addprefix $(BUILD_DIR)/,$(OFILES))
|
||||||
CFLAGS := -Wno-error=missing-braces -Wno-error=strict-aliasing -Wno-error=incompatible-pointer-types
|
CFLAGS := -Wno-error=missing-braces -Wno-error=strict-aliasing -Wno-error=incompatible-pointer-types
|
||||||
|
|
||||||
default: $(TARGET).z64
|
default: $(TARGET).z64
|
||||||
|
|
||||||
|
$(BUILD_DIR)/%.o: src/%.c
|
||||||
|
@mkdir -p $(dir $@)
|
||||||
|
@echo " [CC] $<"
|
||||||
|
$(CC) -c $(CFLAGS) -o $@ $<
|
||||||
|
|
||||||
include $(N64_INST)/include/n64.mk
|
include $(N64_INST)/include/n64.mk
|
||||||
|
|
||||||
$(TARGET).z64: N64_ROM_TITLE = "ClassiCube"
|
$(TARGET).z64: N64_ROM_TITLE = "ClassiCube"
|
||||||
$(TARGET).z64: $(BUILD_DIR)/filesystem.dfs
|
$(TARGET).z64: $(BUILD_DIR)/filesystem.dfs
|
||||||
|
|
||||||
$(BUILD_DIR)/filesystem.dfs: misc/n64/default.zip
|
$(BUILD_DIR)/filesystem.dfs: misc/n64/files/default.zip
|
||||||
|
|
||||||
$(BUILD_DIR)/ClassiCube-n64.elf: $(OBJS)
|
$(BUILD_DIR)/ClassiCube-n64.elf: $(OBJS)
|
||||||
|
|
||||||
|
242
misc/n64/gpu.c
Normal file
242
misc/n64/gpu.c
Normal file
@ -0,0 +1,242 @@
|
|||||||
|
#include "rspq.h"
|
||||||
|
#include "rdpq.h"
|
||||||
|
#include "rdpq_rect.h"
|
||||||
|
#include "rdpq_mode.h"
|
||||||
|
#include "rdpq_debug.h"
|
||||||
|
#include "display.h"
|
||||||
|
|
||||||
|
// This is a severely cutdown version of libdragon's OpenGL implementation
|
||||||
|
#define VTX_SHIFT 5
|
||||||
|
#define TEX_SHIFT 8
|
||||||
|
|
||||||
|
static uint32_t gpup_id;
|
||||||
|
//DEFINE_RSP_UCODE(rsp_gpu);
|
||||||
|
extern uint8_t _binary_build_n64_rsp_gpu_text_bin_start[];
|
||||||
|
extern uint8_t _binary_build_n64_rsp_gpu_data_bin_start[];
|
||||||
|
extern uint8_t _binary_build_n64_rsp_gpu_meta_bin_start[];
|
||||||
|
extern uint8_t _binary_build_n64_rsp_gpu_text_bin_end[0];
|
||||||
|
extern uint8_t _binary_build_n64_rsp_gpu_data_bin_end[0];
|
||||||
|
extern uint8_t _binary_build_n64_rsp_gpu_meta_bin_end[0];
|
||||||
|
|
||||||
|
static rsp_ucode_t rsp_gpu = (rsp_ucode_t){
|
||||||
|
.code = _binary_build_n64_rsp_gpu_text_bin_start,
|
||||||
|
.code_end = _binary_build_n64_rsp_gpu_text_bin_end,
|
||||||
|
.data = _binary_build_n64_rsp_gpu_data_bin_start,
|
||||||
|
.data_end = _binary_build_n64_rsp_gpu_data_bin_end,
|
||||||
|
.meta = _binary_build_n64_rsp_gpu_meta_bin_start,
|
||||||
|
.meta_end = _binary_build_n64_rsp_gpu_meta_bin_end,
|
||||||
|
.name = "rsp_gpu"
|
||||||
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
GPU_CMD_SET_BYTE = 0x0,
|
||||||
|
GPU_CMD_SET_SHORT = 0x1,
|
||||||
|
GPU_CMD_SET_WORD = 0x2,
|
||||||
|
GPU_CMD_SET_LONG = 0x3,
|
||||||
|
|
||||||
|
GPU_CMD_DRAW_QUAD = 0x4,
|
||||||
|
GPU_CMD_MATRIX_LOAD = 0x5,
|
||||||
|
|
||||||
|
GPU_CMD_PUSH_RDP = 0x6,
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
int16_t mvp_matrix_i[4][4];
|
||||||
|
uint16_t mvp_matrix_f[4][4];
|
||||||
|
int16_t vp_scale[4];
|
||||||
|
int16_t vp_offset[4];
|
||||||
|
uint16_t tex_size[2];
|
||||||
|
uint16_t tex_offset[2];
|
||||||
|
uint16_t tri_cmd;
|
||||||
|
uint16_t tri_cull;
|
||||||
|
} __attribute__((aligned(8), packed)) gpu_state;
|
||||||
|
|
||||||
|
__attribute__((always_inline))
|
||||||
|
static inline void gpu_set_byte(uint32_t offset, uint8_t value)
|
||||||
|
{
|
||||||
|
rspq_write(gpup_id, GPU_CMD_SET_BYTE, offset, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((always_inline))
|
||||||
|
static inline void gpu_set_short(uint32_t offset, uint16_t value)
|
||||||
|
{
|
||||||
|
rspq_write(gpup_id, GPU_CMD_SET_SHORT, offset, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((always_inline))
|
||||||
|
static inline void gpu_set_word(uint32_t offset, uint32_t value)
|
||||||
|
{
|
||||||
|
rspq_write(gpup_id, GPU_CMD_SET_WORD, offset, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((always_inline))
|
||||||
|
static inline void gpu_set_long(uint32_t offset, uint64_t value)
|
||||||
|
{
|
||||||
|
rspq_write(gpup_id, GPU_CMD_SET_LONG, offset, value >> 32, value & 0xFFFFFFFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define RDP_CMD_SYNC_PIPE 0xE7000000
|
||||||
|
#define RDP_CMD_SET_BLEND_COLOR 0xF9000000
|
||||||
|
|
||||||
|
__attribute__((always_inline))
|
||||||
|
static inline void gpu_push_rdp(uint32_t a1, uint64_t a2)
|
||||||
|
{
|
||||||
|
rdpq_write(2, gpup_id, GPU_CMD_PUSH_RDP, 0, a1, a2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static float gpu_vp_scale[3];
|
||||||
|
static float gpu_vp_offset[3];
|
||||||
|
static bool gpu_texturing;
|
||||||
|
static void* gpu_pointer;
|
||||||
|
static int gpu_stride;
|
||||||
|
|
||||||
|
#define GPU_ATTR_Z (1 << 8)
|
||||||
|
#define GPU_ATTR_TEX (1 << 9)
|
||||||
|
#define GPU_ATTR_SHADE (1 << 10)
|
||||||
|
#define GPU_ATTR_EDGE (1 << 11)
|
||||||
|
static bool gpu_attr_z, gpu_attr_tex;
|
||||||
|
|
||||||
|
static void gpuUpdateFormat(void)
|
||||||
|
{
|
||||||
|
uint16_t cmd = 0xC000 | GPU_ATTR_SHADE | GPU_ATTR_EDGE;
|
||||||
|
|
||||||
|
if (gpu_attr_z) cmd |= GPU_ATTR_Z;
|
||||||
|
if (gpu_attr_tex) cmd |= GPU_ATTR_TEX;
|
||||||
|
|
||||||
|
gpu_set_short(offsetof(gpu_state, tri_cmd), cmd);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gpuSetTexSize(uint16_t width, uint16_t height)
|
||||||
|
{
|
||||||
|
gpu_set_word(offsetof(gpu_state, tex_size[0]), (width << 16) | height);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void write_shorts(rspq_write_t *w, const uint16_t *s, uint32_t count)
|
||||||
|
{
|
||||||
|
for (uint32_t i = 0; i < count; i += 2)
|
||||||
|
{
|
||||||
|
uint32_t packed = ((uint32_t)s[i] << 16) | (uint32_t)s[i+1];
|
||||||
|
rspq_write_arg(w, packed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void gpu_matrix_write(rspq_write_t* w, const float* m)
|
||||||
|
{
|
||||||
|
uint16_t integer[16];
|
||||||
|
uint16_t fraction[16];
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
int32_t fixed = m[i] * (1<<16);
|
||||||
|
integer[i] = (uint16_t)((fixed & 0xFFFF0000) >> 16);
|
||||||
|
fraction[i] = (uint16_t)(fixed & 0x0000FFFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
write_shorts(w, integer, 16);
|
||||||
|
write_shorts(w, fraction, 16);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gpuLoadMatrix(const float* m)
|
||||||
|
{
|
||||||
|
rspq_write_t w = rspq_write_begin(gpup_id, GPU_CMD_MATRIX_LOAD, 17);
|
||||||
|
rspq_write_arg(&w, 0); // padding
|
||||||
|
gpu_matrix_write(&w, m);
|
||||||
|
rspq_write_end(&w);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void put_word(rspq_write_t* s, uint16_t v1, uint16_t v2)
|
||||||
|
{
|
||||||
|
rspq_write_arg(s, v2 | (v1 << 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void upload_vertex(rspq_write_t* s, uint32_t index)
|
||||||
|
{
|
||||||
|
char* ptr = gpu_pointer + index * gpu_stride;
|
||||||
|
|
||||||
|
float* vtx = (float*)(ptr + 0);
|
||||||
|
put_word(s, vtx[0] * (1<<VTX_SHIFT),
|
||||||
|
vtx[1] * (1<<VTX_SHIFT));
|
||||||
|
put_word(s, vtx[2] * (1<<VTX_SHIFT),
|
||||||
|
1.0f * (1<<VTX_SHIFT));
|
||||||
|
|
||||||
|
uint32_t* col = (uint32_t*)(ptr + 12);
|
||||||
|
rspq_write_arg(s, *col);
|
||||||
|
|
||||||
|
if (gpu_texturing) {
|
||||||
|
float* tex = (float*)(ptr + 16);
|
||||||
|
put_word(s, tex[0] * (1<<TEX_SHIFT),
|
||||||
|
tex[1] * (1<<TEX_SHIFT));
|
||||||
|
} else {
|
||||||
|
put_word(s, 0,
|
||||||
|
0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gpuDrawArrays(uint32_t first, uint32_t count)
|
||||||
|
{
|
||||||
|
for (uint32_t i = 0; i < count; i += 4)
|
||||||
|
{
|
||||||
|
rspq_write_t s = rspq_write_begin(gpup_id, GPU_CMD_DRAW_QUAD, 17);
|
||||||
|
rspq_write_arg(&s, 0); // padding
|
||||||
|
for (uint32_t j = 0; j < 4; j++)
|
||||||
|
{
|
||||||
|
upload_vertex(&s, first + i + j);
|
||||||
|
}
|
||||||
|
rspq_write_end(&s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gpuDepthRange(float n, float f)
|
||||||
|
{
|
||||||
|
gpu_vp_scale[2] = (f - n) * 0.5f;
|
||||||
|
gpu_vp_offset[2] = n + (f - n) * 0.5f;
|
||||||
|
|
||||||
|
gpu_set_short(offsetof(gpu_state, vp_scale[2]), gpu_vp_scale[2] * 4);
|
||||||
|
gpu_set_short(offsetof(gpu_state, vp_offset[2]), gpu_vp_offset[2] * 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gpuViewport(int x, int y, int w, int h)
|
||||||
|
{
|
||||||
|
gpu_vp_scale[0] = w * 0.5f;
|
||||||
|
gpu_vp_scale[1] = h * -0.5f;
|
||||||
|
gpu_vp_offset[0] = x + w * 0.5f;
|
||||||
|
gpu_vp_offset[1] = y + h * 0.5f;
|
||||||
|
|
||||||
|
// Screen coordinates are s13.2
|
||||||
|
#define SCREEN_XY_SCALE 4.0f
|
||||||
|
#define SCREEN_Z_SCALE 32767.0f
|
||||||
|
|
||||||
|
// * 2.0f to compensate for RSP reciprocal missing 1 bit
|
||||||
|
uint16_t scale_x = gpu_vp_scale[0] * SCREEN_XY_SCALE * 2.0f;
|
||||||
|
uint16_t scale_y = gpu_vp_scale[1] * SCREEN_XY_SCALE * 2.0f;
|
||||||
|
uint16_t scale_z = gpu_vp_scale[2] * SCREEN_Z_SCALE * 2.0f;
|
||||||
|
|
||||||
|
uint16_t offset_x = gpu_vp_offset[0] * SCREEN_XY_SCALE;
|
||||||
|
uint16_t offset_y = gpu_vp_offset[1] * SCREEN_XY_SCALE;
|
||||||
|
uint16_t offset_z = gpu_vp_offset[2] * SCREEN_Z_SCALE;
|
||||||
|
|
||||||
|
gpu_set_long(
|
||||||
|
offsetof(gpu_state, vp_scale),
|
||||||
|
((uint64_t)scale_x << 48) | ((uint64_t)scale_y << 32) | ((uint64_t)scale_z << 16));
|
||||||
|
gpu_set_long(
|
||||||
|
offsetof(gpu_state, vp_offset),
|
||||||
|
((uint64_t)offset_x << 48) | ((uint64_t)offset_y << 32) | ((uint64_t)offset_z << 16));
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gpuSetCullFace(bool enabled) {
|
||||||
|
// 1 = cull backfaces
|
||||||
|
// 2 = don't cull
|
||||||
|
gpu_set_short(offsetof(gpu_state, tri_cull), enabled ? 1 : 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gpu_init() {
|
||||||
|
gpup_id = rspq_overlay_register(&rsp_gpu);
|
||||||
|
gpuDepthRange(0, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void gpu_close() {
|
||||||
|
rspq_wait();
|
||||||
|
rspq_overlay_unregister(gpup_id);
|
||||||
|
}
|
531
misc/n64/rsp_gpu.S
Normal file
531
misc/n64/rsp_gpu.S
Normal file
@ -0,0 +1,531 @@
|
|||||||
|
#include <rsp_queue.inc>
|
||||||
|
#include <rdpq_macros.h>
|
||||||
|
#define MATRIX_SIZE 64
|
||||||
|
#define GUARD_BAND_FACTOR 2
|
||||||
|
|
||||||
|
.data
|
||||||
|
|
||||||
|
RSPQ_BeginOverlayHeader
|
||||||
|
RSPQ_DefineCommand GPUCmd_SetByte, 8 # 0x0
|
||||||
|
RSPQ_DefineCommand GPUCmd_SetShort, 8 # 0x1
|
||||||
|
RSPQ_DefineCommand GPUCmd_SetWord, 8 # 0x2
|
||||||
|
RSPQ_DefineCommand GPUCmd_SetLong, 12 # 0x3
|
||||||
|
|
||||||
|
RSPQ_DefineCommand GPUCmd_DrawQuad, 68 # 0x4
|
||||||
|
RSPQ_DefineCommand GPUCmd_MatrixLoad, 68 # 0x5
|
||||||
|
|
||||||
|
RSPQ_DefineCommand GPUCmd_PushRDP, 12 # 0x6
|
||||||
|
RSPQ_EndOverlayHeader
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
BANNER0: .ascii " RSP OpenGL T&L "
|
||||||
|
BANNER1: .ascii "Rasky & Snacchus"
|
||||||
|
|
||||||
|
RSPQ_BeginSavedState
|
||||||
|
|
||||||
|
GL_STATE:
|
||||||
|
# This is the GL state that is also used by the pipeline.
|
||||||
|
GL_MATRIX_MVP: .ds.b MATRIX_SIZE
|
||||||
|
GL_VIEWPORT_SCALE: .half 0,0,0,0
|
||||||
|
GL_VIEWPORT_OFFSET: .half 0,0,0,0
|
||||||
|
GL_STATE_TEX_SIZE: .half 0,0
|
||||||
|
GL_STATE_TEX_OFFSET: .half 0,0
|
||||||
|
GL_TRI_CMD: .half 0
|
||||||
|
GL_TRI_CULL: .half 0
|
||||||
|
|
||||||
|
RSPQ_EndSavedState
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
CLIP_CODE_FACTORS: .half 1, 1, GUARD_BAND_FACTOR, GUARD_BAND_FACTOR
|
||||||
|
DRAW_TRI_RA: .word 0
|
||||||
|
|
||||||
|
#define SCREEN_VTX_CS_POSi 0 // X, Y, Z, W (all 32-bit)
|
||||||
|
#define SCREEN_VTX_CS_POSf 8 // X, Y, Z, W (all 32-bit)
|
||||||
|
#define SCREEN_VTX_X 16
|
||||||
|
#define SCREEN_VTX_Y 18
|
||||||
|
#define SCREEN_VTX_Z 20
|
||||||
|
#define SCREEN_VTX_CLIP_CODE 22
|
||||||
|
#define SCREEN_VTX_PADDING 23
|
||||||
|
#define SCREEN_VTX_RGBA 24
|
||||||
|
#define SCREEN_VTX_S_T 28 // 28 S, 30 T
|
||||||
|
#define SCREEN_VTX_W 32 // FIXME: this is duplicated in CS_POS
|
||||||
|
#define SCREEN_VTX_INVW 36 // 32-bit
|
||||||
|
#define SCREEN_VTX_SIZE 40
|
||||||
|
|
||||||
|
.bss
|
||||||
|
.align 3
|
||||||
|
#define VERTEX_CACHE_SIZE 4
|
||||||
|
//0-39 same as screenvtx
|
||||||
|
#define PRIM_VTX_TRCODE 40 // trivial-reject clipping flags (against -w/+w)
|
||||||
|
#define PRIM_VTX_SIZE 42
|
||||||
|
|
||||||
|
VERTEX_CACHE: .dcb.b PRIM_VTX_SIZE * VERTEX_CACHE_SIZE
|
||||||
|
|
||||||
|
.text
|
||||||
|
|
||||||
|
.func GPUCmd_SetByte
|
||||||
|
GPUCmd_SetByte:
|
||||||
|
jr ra
|
||||||
|
sb a1, %lo(GL_STATE)(a0)
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
.func GPUCmd_SetShort
|
||||||
|
GPUCmd_SetShort:
|
||||||
|
jr ra
|
||||||
|
sh a1, %lo(GL_STATE)(a0)
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
.func GPUCmd_SetWord
|
||||||
|
GPUCmd_SetWord:
|
||||||
|
jr ra
|
||||||
|
sw a1, %lo(GL_STATE) + 0(a0)
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
.func GPUCmd_SetLong
|
||||||
|
GPUCmd_SetLong:
|
||||||
|
sw a2, %lo(GL_STATE) + 4(a0)
|
||||||
|
jr ra
|
||||||
|
sw a1, %lo(GL_STATE) + 0(a0)
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
|
||||||
|
.func GPUCmd_PushRDP
|
||||||
|
GPUCmd_PushRDP:
|
||||||
|
# RDP command is expected in a0 and a1
|
||||||
|
move a0, a1
|
||||||
|
move a1, a2
|
||||||
|
|
||||||
|
jal_and_j RDPQ_Write8, RDPQ_Finalize
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
|
||||||
|
.func GPUCmd_MatrixLoad
|
||||||
|
GPUCmd_MatrixLoad:
|
||||||
|
#define src s6
|
||||||
|
#define dst s7
|
||||||
|
|
||||||
|
#define vrhs01_i $v02
|
||||||
|
#define vrhs01_f $v03
|
||||||
|
#define vrhs23_i $v04
|
||||||
|
#define vrhs23_f $v05
|
||||||
|
|
||||||
|
addi src, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64
|
||||||
|
addi dst, zero, %lo(GL_MATRIX_MVP)
|
||||||
|
|
||||||
|
# Load the matrix from command parameters (misaligned)
|
||||||
|
lqv vrhs01_i, 0x00,src
|
||||||
|
lrv vrhs01_i, 0x10,src
|
||||||
|
lqv vrhs23_i, 0x10,src
|
||||||
|
lrv vrhs23_i, 0x20,src
|
||||||
|
lqv vrhs01_f, 0x20,src
|
||||||
|
lrv vrhs01_f, 0x30,src
|
||||||
|
lqv vrhs23_f, 0x30,src
|
||||||
|
lrv vrhs23_f, 0x40,src
|
||||||
|
|
||||||
|
sqv vrhs01_i, 0x00,dst
|
||||||
|
sqv vrhs23_i, 0x10,dst
|
||||||
|
sqv vrhs01_f, 0x20,dst
|
||||||
|
jr ra
|
||||||
|
sqv vrhs23_f, 0x30,dst
|
||||||
|
|
||||||
|
#undef src
|
||||||
|
#undef dst
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
.align 3
|
||||||
|
.func GPUCmd_DrawQuad
|
||||||
|
GPUCmd_DrawQuad:
|
||||||
|
#define vtx a0
|
||||||
|
#define mtx_ptr s0
|
||||||
|
#define src_ptr s4
|
||||||
|
#define vcount s3
|
||||||
|
|
||||||
|
#define v___ $v01
|
||||||
|
|
||||||
|
#define vmtx0_i $v16 // m00 m01 m02 m03
|
||||||
|
#define vmtx0_f $v17
|
||||||
|
#define vmtx1_i $v18 // m10 m11 m12 m13
|
||||||
|
#define vmtx1_f $v19
|
||||||
|
#define vmtx2_i $v20 // m20 m21 m22 m23
|
||||||
|
#define vmtx2_f $v21
|
||||||
|
#define vmtx3_i $v22 // m30 m31 m32 m03
|
||||||
|
#define vmtx3_f $v23
|
||||||
|
|
||||||
|
#define vpos $v24
|
||||||
|
#define vcol $v25
|
||||||
|
#define vtex $v26
|
||||||
|
#define vcspos_i $v28
|
||||||
|
#define vcspos_f $v29
|
||||||
|
|
||||||
|
#define x e0
|
||||||
|
#define y e1
|
||||||
|
#define z e2
|
||||||
|
#define w e3
|
||||||
|
|
||||||
|
addi src_ptr, rspq_dmem_buf_ptr, %lo(RSPQ_DMEM_BUFFER) - 64
|
||||||
|
li vtx, %lo(VERTEX_CACHE)
|
||||||
|
li vcount, 4
|
||||||
|
|
||||||
|
li mtx_ptr, %lo(GL_MATRIX_MVP)
|
||||||
|
ldv vmtx0_i.e0, 0x00,mtx_ptr
|
||||||
|
ldv vmtx1_i.e0, 0x08,mtx_ptr
|
||||||
|
ldv vmtx2_i.e0, 0x10,mtx_ptr
|
||||||
|
ldv vmtx3_i.e0, 0x18,mtx_ptr
|
||||||
|
ldv vmtx0_f.e0, 0x20,mtx_ptr
|
||||||
|
ldv vmtx1_f.e0, 0x28,mtx_ptr
|
||||||
|
ldv vmtx2_f.e0, 0x30,mtx_ptr
|
||||||
|
ldv vmtx3_f.e0, 0x38,mtx_ptr
|
||||||
|
|
||||||
|
upload_vertex:
|
||||||
|
ldv vpos, 0, src_ptr # Load X, Y, Z, W
|
||||||
|
llv vcol, 8, src_ptr # Load RGBA
|
||||||
|
llv vtex, 12, src_ptr # Load U, V
|
||||||
|
|
||||||
|
# matrix multiply
|
||||||
|
vmudn v___, vmtx0_f, vpos.h0
|
||||||
|
vmadh v___, vmtx0_i, vpos.h0
|
||||||
|
vmadn v___, vmtx1_f, vpos.h1
|
||||||
|
vmadh v___, vmtx1_i, vpos.h1
|
||||||
|
vmadn v___, vmtx2_f, vpos.h2
|
||||||
|
vmadh v___, vmtx2_i, vpos.h2
|
||||||
|
vmadn v___, vmtx3_f, vpos.h3
|
||||||
|
vmadh vcspos_i, vmtx3_i, vpos.h3
|
||||||
|
vmadn vcspos_f, vzero, vzero
|
||||||
|
|
||||||
|
slv vcol, SCREEN_VTX_RGBA, vtx
|
||||||
|
slv vtex, SCREEN_VTX_S_T, vtx
|
||||||
|
|
||||||
|
# 32-bit right shift by 5, to keep the clip space coordinates unscaled
|
||||||
|
vmudm vcspos_i, vcspos_i, vshift8.e4
|
||||||
|
vmadl vcspos_f, vcspos_f, vshift8.e4
|
||||||
|
|
||||||
|
addi vcount, -1
|
||||||
|
addi src_ptr, 16
|
||||||
|
|
||||||
|
sdv vcspos_i, SCREEN_VTX_CS_POSi,vtx
|
||||||
|
sdv vcspos_f, SCREEN_VTX_CS_POSf,vtx
|
||||||
|
|
||||||
|
# Calculate and store clipping flags against CS.W.
|
||||||
|
# These will be used for trivial rejections.
|
||||||
|
vch v___, vcspos_i, vcspos_i.w
|
||||||
|
vcl v___, vcspos_f, vcspos_f.w
|
||||||
|
cfc2 t0, COP2_CTRL_VCC
|
||||||
|
andi t0, 0x707 # Isolate X/Y/Z flags
|
||||||
|
|
||||||
|
# Compress flags to 8 bit
|
||||||
|
srl t1, t0, 5
|
||||||
|
andi t0, 0x7
|
||||||
|
or t0, t1
|
||||||
|
sb t0, PRIM_VTX_TRCODE(vtx)
|
||||||
|
|
||||||
|
bnez vcount, upload_vertex
|
||||||
|
addi vtx, PRIM_VTX_SIZE
|
||||||
|
|
||||||
|
|
||||||
|
# now do the actual drawing
|
||||||
|
li a1, %lo(VERTEX_CACHE) + 0*PRIM_VTX_SIZE
|
||||||
|
li a2, %lo(VERTEX_CACHE) + 1*PRIM_VTX_SIZE
|
||||||
|
jal GPUCmd_DrawTriangle
|
||||||
|
li a3, %lo(VERTEX_CACHE) + 2*PRIM_VTX_SIZE
|
||||||
|
|
||||||
|
li a1, %lo(VERTEX_CACHE) + 0*PRIM_VTX_SIZE
|
||||||
|
li a2, %lo(VERTEX_CACHE) + 2*PRIM_VTX_SIZE
|
||||||
|
jal GPUCmd_DrawTriangle
|
||||||
|
li a3, %lo(VERTEX_CACHE) + 3*PRIM_VTX_SIZE
|
||||||
|
|
||||||
|
j RSPQ_Loop
|
||||||
|
nop
|
||||||
|
#undef src_ptr
|
||||||
|
#undef vtx
|
||||||
|
|
||||||
|
#undef x
|
||||||
|
#undef y
|
||||||
|
#undef z
|
||||||
|
#undef w
|
||||||
|
|
||||||
|
#undef v___
|
||||||
|
|
||||||
|
#undef vmtx0_i
|
||||||
|
#undef vmtx0_f
|
||||||
|
#undef vmtx1_i
|
||||||
|
#undef vmtx1_f
|
||||||
|
#undef vmtx2_i
|
||||||
|
#undef vmtx2_f
|
||||||
|
#undef vmtx3_i
|
||||||
|
#undef vmtx3_f
|
||||||
|
|
||||||
|
#undef vpos
|
||||||
|
#undef vcspos_i
|
||||||
|
#undef vcspos_f
|
||||||
|
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
################################################################
|
||||||
|
# GL_CalcScreenSpace
|
||||||
|
#
|
||||||
|
# Args:
|
||||||
|
# s3 = Destination vertex address
|
||||||
|
# $v02 = Clip space position (fractional part)
|
||||||
|
# $v03 = Clip space position (integer part)
|
||||||
|
#
|
||||||
|
################################################################
|
||||||
|
.func GL_CalcScreenSpace
|
||||||
|
GL_CalcScreenSpace:
|
||||||
|
#define dst s3
|
||||||
|
#define vcspos_f $v02
|
||||||
|
#define vcspos_i $v03
|
||||||
|
#define vinvw_f $v23
|
||||||
|
#define vinvw_i $v24
|
||||||
|
#define vviewscale $v25
|
||||||
|
#define vviewoff $v26
|
||||||
|
#define vscreenpos_i $v27
|
||||||
|
#define vscreenpos_f $v28
|
||||||
|
#define v___ $v29
|
||||||
|
#define w e3
|
||||||
|
|
||||||
|
# Calculate 32-bit inverse W
|
||||||
|
# TODO: NR?
|
||||||
|
vrcph vinvw_i.w, vcspos_i.w
|
||||||
|
vrcpl vinvw_f.w, vcspos_f.w
|
||||||
|
vrcph vinvw_i.w, vzero.e0
|
||||||
|
|
||||||
|
# Calculate screenspace coords
|
||||||
|
li t0, %lo(GL_VIEWPORT_SCALE)
|
||||||
|
ldv vviewscale, 0,t0
|
||||||
|
ldv vviewoff, 8,t0
|
||||||
|
|
||||||
|
vmudl v___, vcspos_f, vinvw_f.w
|
||||||
|
vmadm v___, vcspos_i, vinvw_f.w
|
||||||
|
vmadn vscreenpos_f, vcspos_f, vinvw_i.w
|
||||||
|
vmadh vscreenpos_i, vcspos_i, vinvw_i.w
|
||||||
|
|
||||||
|
vmudn vscreenpos_f, vscreenpos_f, vviewscale
|
||||||
|
vmadh vscreenpos_i, vscreenpos_i, vviewscale
|
||||||
|
vadd vscreenpos_i, vviewoff
|
||||||
|
|
||||||
|
sdv vscreenpos_i, SCREEN_VTX_X ,dst
|
||||||
|
ssv vcspos_i.w, SCREEN_VTX_W+0 ,dst
|
||||||
|
ssv vcspos_f.w, SCREEN_VTX_W+2 ,dst
|
||||||
|
ssv vinvw_i.w, SCREEN_VTX_INVW+0,dst
|
||||||
|
ssv vinvw_f.w, SCREEN_VTX_INVW+2,dst
|
||||||
|
jr ra
|
||||||
|
sb zero, SCREEN_VTX_PADDING(dst)
|
||||||
|
|
||||||
|
#undef dst
|
||||||
|
#undef vcspos_f
|
||||||
|
#undef vcspos_i
|
||||||
|
#undef vinvw_f
|
||||||
|
#undef vinvw_i
|
||||||
|
#undef vviewscale
|
||||||
|
#undef vviewoff
|
||||||
|
#undef vscreenpos_i
|
||||||
|
#undef vscreenpos_f
|
||||||
|
#undef v___
|
||||||
|
#undef w
|
||||||
|
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
################################################################
|
||||||
|
# GL_CalcClipCodes
|
||||||
|
#
|
||||||
|
# Args:
|
||||||
|
# s3 = Destination vertex address
|
||||||
|
# $v02 = Clip space position (fractional part)
|
||||||
|
# $v03 = Clip space position (integer part)
|
||||||
|
#
|
||||||
|
################################################################
|
||||||
|
.func GL_CalcClipCodes
|
||||||
|
GL_CalcClipCodes:
|
||||||
|
#define dst s3
|
||||||
|
#define vcspos_f $v02
|
||||||
|
#define vcspos_i $v03
|
||||||
|
#define vguard_f $v27
|
||||||
|
#define vguard_i $v28
|
||||||
|
#define v___ $v29
|
||||||
|
#define w e3
|
||||||
|
|
||||||
|
li t0, %lo(CLIP_CODE_FACTORS)
|
||||||
|
ldv vguard_i, 0,t0
|
||||||
|
|
||||||
|
vmudn vguard_f, vcspos_f, vguard_i
|
||||||
|
vmadh vguard_i, vcspos_i, vguard_i
|
||||||
|
|
||||||
|
vch v___, vguard_i, vguard_i.w
|
||||||
|
vcl v___, vguard_f, vguard_f.w
|
||||||
|
cfc2 t0, COP2_CTRL_VCC
|
||||||
|
andi t0, 0x707
|
||||||
|
srl t1, t0, 5
|
||||||
|
andi t0, 0x7
|
||||||
|
or t0, t1
|
||||||
|
jr ra
|
||||||
|
sb t0, SCREEN_VTX_CLIP_CODE(dst)
|
||||||
|
|
||||||
|
#undef dst
|
||||||
|
#undef vcspos_i
|
||||||
|
#undef vcspos_f
|
||||||
|
#undef vguard_i
|
||||||
|
#undef vguard_f
|
||||||
|
#undef v___
|
||||||
|
#undef w
|
||||||
|
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
################################################################
|
||||||
|
# GL_TnL
|
||||||
|
#
|
||||||
|
# Args:
|
||||||
|
# s3 = address of the vertex in DMEM (usually within VERTEX_CACHE)
|
||||||
|
#
|
||||||
|
################################################################
|
||||||
|
.func GL_TnL
|
||||||
|
GL_TnL:
|
||||||
|
#define vtx s3
|
||||||
|
|
||||||
|
#define v___ $v01
|
||||||
|
#define vcspos_f $v02
|
||||||
|
#define vcspos_i $v03
|
||||||
|
#define vtexsize $v06
|
||||||
|
#define vtexoffset $v07
|
||||||
|
#define vst $v08
|
||||||
|
#define vst_i $v28
|
||||||
|
#define vst_f $v29
|
||||||
|
move ra2, ra
|
||||||
|
|
||||||
|
llv vst, SCREEN_VTX_S_T, vtx # S + T
|
||||||
|
|
||||||
|
li t0, %lo(GL_STATE_TEX_SIZE)
|
||||||
|
llv vtexsize, 0,t0
|
||||||
|
llv vtexoffset, 4,t0
|
||||||
|
|
||||||
|
# Scale texcoord by texsize and subtract offset (to correct for bilinear sampling if active)
|
||||||
|
#vmudn v___, vst, vtexsize
|
||||||
|
# vmadh vst, vtexoffset, K1
|
||||||
|
|
||||||
|
#vmudn v___, vst, vtexsize
|
||||||
|
#vmadh vst, vtexoffset, K1
|
||||||
|
#vmudl vst, vst, vtexsize
|
||||||
|
|
||||||
|
vmudh v___, vst, vtexsize
|
||||||
|
vsar vst_i, COP2_ACC_HI
|
||||||
|
vsar vst_f, COP2_ACC_MD
|
||||||
|
|
||||||
|
vmudl vst_f, vst_f, K8192
|
||||||
|
vmadm vst_i, vst_i, K8192
|
||||||
|
vmadn vst, vzero, vzero
|
||||||
|
|
||||||
|
#undef vst_i
|
||||||
|
#undef vst_f
|
||||||
|
|
||||||
|
lbu t0, PRIM_VTX_TRCODE(vtx)
|
||||||
|
slv vst, SCREEN_VTX_S_T, vtx
|
||||||
|
|
||||||
|
ldv vcspos_f, SCREEN_VTX_CS_POSf,vtx
|
||||||
|
ldv vcspos_i, SCREEN_VTX_CS_POSi,vtx
|
||||||
|
|
||||||
|
# Mark this vertex as having T&L applied
|
||||||
|
ori t0, 0x80
|
||||||
|
|
||||||
|
jal GL_CalcScreenSpace
|
||||||
|
sb t0, PRIM_VTX_TRCODE(vtx)
|
||||||
|
|
||||||
|
j GL_CalcClipCodes
|
||||||
|
move ra, ra2
|
||||||
|
|
||||||
|
#undef vcspos_f
|
||||||
|
#undef vcspos_i
|
||||||
|
#undef vtexsize
|
||||||
|
#undef vtexoffset
|
||||||
|
|
||||||
|
#undef vtx
|
||||||
|
|
||||||
|
#undef v___
|
||||||
|
#undef vrgba
|
||||||
|
#undef vst
|
||||||
|
#undef s
|
||||||
|
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
|
||||||
|
.func GPUCmd_DrawTriangle
|
||||||
|
GPUCmd_DrawTriangle:
|
||||||
|
#define vtx1 a1
|
||||||
|
#define vtx2 a2
|
||||||
|
#define vtx3 a3
|
||||||
|
#define trcode1 t6
|
||||||
|
#define trcode2 t7
|
||||||
|
#define trcode3 t8
|
||||||
|
sw ra, %lo(DRAW_TRI_RA) # TODO find a register for this
|
||||||
|
|
||||||
|
# Trivial reject: if all the vertices are out of the same plane (at least one),
|
||||||
|
# the triangle is out of the viewport.
|
||||||
|
# NOTE: This deliberately uses lb instead of lbu so the sign bit is extended.
|
||||||
|
# The MSB of each TR-code is a bit flag that is set if the vertex has already
|
||||||
|
# had T&L applied once.
|
||||||
|
lb trcode1, PRIM_VTX_TRCODE(vtx1)
|
||||||
|
lb trcode2, PRIM_VTX_TRCODE(vtx2)
|
||||||
|
lb trcode3, PRIM_VTX_TRCODE(vtx3)
|
||||||
|
and t0, trcode1, trcode2
|
||||||
|
and t0, trcode3
|
||||||
|
andi t0, 0x3F
|
||||||
|
bnez t0, JrRa
|
||||||
|
nop
|
||||||
|
|
||||||
|
# Perform T&L for each vertex if we haven't already
|
||||||
|
bgezal trcode1, GL_TnL
|
||||||
|
move s3, vtx1
|
||||||
|
|
||||||
|
bgezal trcode2, GL_TnL
|
||||||
|
move s3, vtx2
|
||||||
|
|
||||||
|
bgezal trcode3, GL_TnL
|
||||||
|
move s3, vtx3
|
||||||
|
|
||||||
|
lbu t0, SCREEN_VTX_CLIP_CODE(vtx1)
|
||||||
|
lbu t1, SCREEN_VTX_CLIP_CODE(vtx2)
|
||||||
|
lbu t2, SCREEN_VTX_CLIP_CODE(vtx3)
|
||||||
|
or t5, t0, t1
|
||||||
|
or t5, t2
|
||||||
|
|
||||||
|
move s1, zero
|
||||||
|
beqz t5, gl_draw_single_triangle
|
||||||
|
move s2, zero
|
||||||
|
|
||||||
|
jal GL_ClipTriangle
|
||||||
|
nop
|
||||||
|
|
||||||
|
beqz v1, gl_draw_triangle_end
|
||||||
|
addi s2, -6
|
||||||
|
lhu s5, 0(s1)
|
||||||
|
gl_draw_clipped_triangles_loop:
|
||||||
|
move vtx1, s5
|
||||||
|
lhu vtx2, 2(s1)
|
||||||
|
lhu vtx3, 4(s1)
|
||||||
|
|
||||||
|
gl_draw_single_triangle:
|
||||||
|
addi vtx1, SCREEN_VTX_X
|
||||||
|
addi vtx2, SCREEN_VTX_X
|
||||||
|
addi vtx3, SCREEN_VTX_X
|
||||||
|
|
||||||
|
lhu a0, %lo(GL_TRI_CMD)
|
||||||
|
lh v0, %lo(GL_TRI_CULL)
|
||||||
|
jal RDPQ_Triangle
|
||||||
|
li s3, %lo(RDPQ_CMD_STAGING)
|
||||||
|
|
||||||
|
jal RDPQ_Send
|
||||||
|
li s4, %lo(RDPQ_CMD_STAGING)
|
||||||
|
|
||||||
|
blt s1, s2, gl_draw_clipped_triangles_loop
|
||||||
|
addi s1, 2
|
||||||
|
|
||||||
|
gl_draw_triangle_end:
|
||||||
|
lw ra, %lo(DRAW_TRI_RA)
|
||||||
|
jr ra
|
||||||
|
nop
|
||||||
|
|
||||||
|
#undef vtx1
|
||||||
|
#undef vtx2
|
||||||
|
#undef vtx3
|
||||||
|
.endfunc
|
||||||
|
|
||||||
|
#include "rsp_gpu_clipping.inc"
|
||||||
|
#include <rsp_rdpq.inc>
|
380
misc/n64/rsp_gpu_clipping.inc
Normal file
380
misc/n64/rsp_gpu_clipping.inc
Normal file
@ -0,0 +1,380 @@
|
|||||||
|
#define CLIPPING_PLANE_COUNT 6
|
||||||
|
#define CLIPPING_CACHE_SIZE 9
|
||||||
|
#define CLIPPING_PLANE_SIZE 8
|
||||||
|
|
||||||
|
.section .data.gl_clipping
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
CLIP_PLANES:
|
||||||
|
.half 1, 0, 0, GUARD_BAND_FACTOR
|
||||||
|
.half 0, 1, 0, GUARD_BAND_FACTOR
|
||||||
|
.half 0, 0, 1, 1
|
||||||
|
.half 1, 0, 0, -GUARD_BAND_FACTOR
|
||||||
|
.half 0, 1, 0, -GUARD_BAND_FACTOR
|
||||||
|
.half 0, 0, 1, -1
|
||||||
|
|
||||||
|
.align 4
|
||||||
|
CACHE_OFFSETS: .half 2,4,6,8,10,12,14,16,18
|
||||||
|
|
||||||
|
.section .bss.gl_clipping
|
||||||
|
|
||||||
|
CLIP_CACHE: .dcb.b SCREEN_VTX_SIZE * CLIPPING_CACHE_SIZE
|
||||||
|
CLIP_CACHE_END:
|
||||||
|
|
||||||
|
CLIP_LISTS:
|
||||||
|
CLIP_LIST0: .dcb.w CLIPPING_CACHE_SIZE
|
||||||
|
CLIP_LIST1: .dcb.w CLIPPING_CACHE_SIZE
|
||||||
|
|
||||||
|
|
||||||
|
.section .text.gl_clipping
|
||||||
|
|
||||||
|
################################################################
|
||||||
|
# GL_ClipTriangle
|
||||||
|
# Clip a triangle against the view-frustum by using the Sutherland-Hodgman algorithm
|
||||||
|
# https://en.wikipedia.org/wiki/Sutherland%E2%80%93Hodgman_algorithm
|
||||||
|
# Args:
|
||||||
|
# a1-a3 = Vertices
|
||||||
|
# t5 = OR'd clip flags of the triangle's vertices
|
||||||
|
# Returns:
|
||||||
|
# s1 = Pointer to list of output vertices
|
||||||
|
# s2 = Pointer to end of list
|
||||||
|
################################################################
|
||||||
|
.func GL_ClipTriangle
|
||||||
|
GL_ClipTriangle:
|
||||||
|
#define out_count v1
|
||||||
|
#define clip_flags t5
|
||||||
|
#define plane_flag t6
|
||||||
|
#define in_count t7
|
||||||
|
#define in_end t8
|
||||||
|
#define in_list s0
|
||||||
|
#define out_list s1
|
||||||
|
#define plane s2
|
||||||
|
#define intersection s3
|
||||||
|
#define cur_ptr s4
|
||||||
|
#define prev_ptr s5
|
||||||
|
#define cur_vtx s6
|
||||||
|
#define prev_vtx s7
|
||||||
|
#define p0 k0
|
||||||
|
#define p1 k1
|
||||||
|
#define vtx1 a1
|
||||||
|
#define vtx2 a2
|
||||||
|
#define vtx3 a3
|
||||||
|
|
||||||
|
#define vplane $v01
|
||||||
|
#define vint_f $v02
|
||||||
|
#define vint_i $v03
|
||||||
|
#define vdot_i $v04
|
||||||
|
#define vdot_f $v05
|
||||||
|
#define vdiff_i $v06
|
||||||
|
#define vdiff_f $v07
|
||||||
|
#define va_i $v08
|
||||||
|
#define va_f $v09
|
||||||
|
#define vpos_i $v10
|
||||||
|
#define vpos_f $v11
|
||||||
|
#define vattr0 $v12
|
||||||
|
#define vattr1 $v13
|
||||||
|
#define voff0 $v14
|
||||||
|
#define voff1 $v15
|
||||||
|
#define vcache0 $v16
|
||||||
|
#define vcache1 $v17
|
||||||
|
#define v__ $v29
|
||||||
|
|
||||||
|
move ra2, ra
|
||||||
|
|
||||||
|
# Init in_list as empty
|
||||||
|
li in_list, %lo(CLIP_LIST0)
|
||||||
|
move in_count, zero
|
||||||
|
|
||||||
|
# Put three original vertices in the out_list
|
||||||
|
# (So after the initial swap they will be in the in_list)
|
||||||
|
li out_list, %lo(CLIP_LIST1)
|
||||||
|
sh vtx1, 0(out_list)
|
||||||
|
sh vtx2, 2(out_list)
|
||||||
|
sh vtx3, 4(out_list)
|
||||||
|
li out_count, 3*2
|
||||||
|
|
||||||
|
li plane, %lo(CLIP_PLANES)
|
||||||
|
li plane_flag, 1
|
||||||
|
|
||||||
|
# Load cache offsets
|
||||||
|
li t0, %lo(CACHE_OFFSETS)
|
||||||
|
vxor voff1, voff1
|
||||||
|
lqv voff0, 0,t0
|
||||||
|
lsv voff1, 16,t0
|
||||||
|
|
||||||
|
# Temporarily use the RDP staging area as a map of which cache slots are used
|
||||||
|
# Init to zero
|
||||||
|
li t0, %lo(RDPQ_CMD_STAGING)
|
||||||
|
sqv vzero, 0,t0
|
||||||
|
sqv vzero, 16,t0
|
||||||
|
|
||||||
|
# Iterate over the 6 clipping planes
|
||||||
|
gl_clip_plane_loop:
|
||||||
|
and t0, clip_flags, plane_flag
|
||||||
|
beqz t0, gl_clip_plane_loop_end
|
||||||
|
move t1, in_list
|
||||||
|
|
||||||
|
# Swap in and out lists
|
||||||
|
|
||||||
|
# If the out list is empty from the last iteration,
|
||||||
|
# the triangle has no visible points and we are done
|
||||||
|
beqz out_count, gl_clip_return
|
||||||
|
move in_list, out_list
|
||||||
|
move out_list, t1
|
||||||
|
move in_count, out_count
|
||||||
|
move out_count, zero
|
||||||
|
|
||||||
|
# Iterate over the egdes of the polygon in the input list
|
||||||
|
# The current edge is between cur_vtx and prev_vtx
|
||||||
|
move cur_ptr, in_list
|
||||||
|
add in_end, in_list, in_count
|
||||||
|
# Init the "previous" vertex to the last in the list for the wrap-around
|
||||||
|
addi prev_ptr, in_end, -2
|
||||||
|
|
||||||
|
gl_clip_edge_loop:
|
||||||
|
#define cur_flag t3
|
||||||
|
#define prev_flag t4
|
||||||
|
|
||||||
|
# Check which side of the plane the two vertices are on
|
||||||
|
lhu cur_vtx, 0(cur_ptr)
|
||||||
|
lhu prev_vtx, 0(prev_ptr)
|
||||||
|
lbu cur_flag, SCREEN_VTX_CLIP_CODE(cur_vtx)
|
||||||
|
lbu prev_flag, SCREEN_VTX_CLIP_CODE(prev_vtx)
|
||||||
|
and cur_flag, plane_flag
|
||||||
|
and prev_flag, plane_flag
|
||||||
|
|
||||||
|
# If they are on opposite sides, there is an intersection
|
||||||
|
xor t0, cur_flag, prev_flag
|
||||||
|
beqz t0, gl_clip_no_intersection
|
||||||
|
move p0, cur_vtx
|
||||||
|
|
||||||
|
# Swap the two points if necessary to make intersection calculation consistent
|
||||||
|
# This will make sure p0 is always inside and p1 is always outside
|
||||||
|
bnez prev_flag, gl_clip_no_swap
|
||||||
|
move p1, prev_vtx
|
||||||
|
xor p0, p0, p1
|
||||||
|
xor p1, p0, p1
|
||||||
|
xor p0, p0, p1
|
||||||
|
|
||||||
|
#undef prev_flag
|
||||||
|
|
||||||
|
gl_clip_no_swap:
|
||||||
|
# Calculate intersection of the line segment and the plane
|
||||||
|
|
||||||
|
li t0, %lo(RDPQ_CMD_STAGING)
|
||||||
|
lqv vcache0, 0,t0
|
||||||
|
lqv vcache1, 16,t0
|
||||||
|
|
||||||
|
# Repeat plane coefficients twice
|
||||||
|
ldv vplane.e0, 0,plane
|
||||||
|
ldv vplane.e4, 0,plane
|
||||||
|
|
||||||
|
# vpos: x0 y0 z0 w0 x1 y1 z1 w1
|
||||||
|
ldv vpos_i.e0, SCREEN_VTX_CS_POSi,p0
|
||||||
|
ldv vpos_f.e0, SCREEN_VTX_CS_POSf,p0
|
||||||
|
ldv vpos_i.e4, SCREEN_VTX_CS_POSi,p1
|
||||||
|
ldv vpos_f.e4, SCREEN_VTX_CS_POSf,p1
|
||||||
|
|
||||||
|
# vint: x1 y1 z1 w1
|
||||||
|
ldv vint_i.e0, SCREEN_VTX_CS_POSi,p1
|
||||||
|
ldv vint_f.e0, SCREEN_VTX_CS_POSf,p1
|
||||||
|
|
||||||
|
# vattr0: r0 g0 b0 a0 s0 t0
|
||||||
|
luv vattr0.e0, SCREEN_VTX_RGBA ,p0
|
||||||
|
llv vattr0.e4, SCREEN_VTX_S_T ,p0
|
||||||
|
|
||||||
|
# vattr1: r1 g1 b1 a1 s1 t1
|
||||||
|
luv vattr1.e0, SCREEN_VTX_RGBA ,p1
|
||||||
|
llv vattr1.e4, SCREEN_VTX_S_T ,p1
|
||||||
|
|
||||||
|
# Find first free slot in clip cache
|
||||||
|
|
||||||
|
# Add the values from the "used slots map" to the cache offsets
|
||||||
|
# After this, each lane will contain the offset of its corresponding cache slot,
|
||||||
|
# but only if the slot is not used. If it is used, it will contain some large value.
|
||||||
|
vaddc vcache0, voff0
|
||||||
|
vaddc vcache1, voff1
|
||||||
|
|
||||||
|
# Look for the smallest value, which will end up in vcache.e0
|
||||||
|
# Because used slots are marked as large values, they will never be found.
|
||||||
|
vlt vcache0, vcache0.q1
|
||||||
|
vlt vcache0, vcache0.h2
|
||||||
|
vlt vcache0, vcache0.e4
|
||||||
|
vlt vcache0, vcache1.e0
|
||||||
|
|
||||||
|
mfc2 t0, vcache0.e0
|
||||||
|
|
||||||
|
# Mark slot as used by storing some large value (careful of overflows!)
|
||||||
|
li t1, 0xFF
|
||||||
|
sh t1, %lo(RDPQ_CMD_STAGING)-2(t0)
|
||||||
|
|
||||||
|
# t0 is the index multiplied by 2
|
||||||
|
# intersection = t0 * 20 = t0 * 16 + t0 * 4
|
||||||
|
sll intersection, t0, 4
|
||||||
|
sll t1, t0, 2
|
||||||
|
add intersection, t1
|
||||||
|
|
||||||
|
# CAUTION: intersection might point to the same address as either p0 or p1,
|
||||||
|
# because one of them is the previous point, which could have been marked unused
|
||||||
|
# in the previous iteration. As long as we don't access p0 or p1 after writing to
|
||||||
|
# intersection, this is fine.
|
||||||
|
addi intersection, %lo(CLIP_CACHE) - SCREEN_VTX_SIZE
|
||||||
|
|
||||||
|
# Store the cache offset in unused memory (used later when finding the cache slot to mark as unused)
|
||||||
|
sb t0, SCREEN_VTX_PADDING(intersection)
|
||||||
|
|
||||||
|
# Compute dot products of both positions with the clip plane
|
||||||
|
# vdot.e0: d0 = dot(p0, plane)
|
||||||
|
# vdot.e4: d1 = dot(p1, plane)
|
||||||
|
vmudn vdot_f, vpos_f, vplane
|
||||||
|
vmadh vdot_i, vpos_i, vplane
|
||||||
|
vaddc vdot_f, vdot_f.q1
|
||||||
|
vadd vdot_i, vdot_i.q1
|
||||||
|
vaddc vdot_f, vdot_f.h2
|
||||||
|
vadd vdot_i, vdot_i.h2
|
||||||
|
|
||||||
|
# d0 - d1
|
||||||
|
vsubc vdiff_f, vdot_f, vdot_f.e4
|
||||||
|
vsub vdiff_i, vdot_i, vdot_i.e4
|
||||||
|
|
||||||
|
# 1 / (d0 - d1)
|
||||||
|
vrcph v__.e0, vdiff_i.e0
|
||||||
|
vrcpl va_f.e0, vdiff_f.e0
|
||||||
|
vrcph va_i.e0, vzero.e0
|
||||||
|
|
||||||
|
# a = d0 / (d0 - d1)
|
||||||
|
vmudl v__, va_f, vdot_f.e0
|
||||||
|
vmadm v__, va_i, vdot_f.e0
|
||||||
|
vmadn va_f, va_f, vdot_i.e0
|
||||||
|
|
||||||
|
# Prepare 0x7FFF in va_i.e0
|
||||||
|
vsubc va_i, vshift8, K1
|
||||||
|
|
||||||
|
# a = min(a, 1)
|
||||||
|
vge v__, va_f, vzero
|
||||||
|
vmrg va_f, va_f, va_i.e0
|
||||||
|
|
||||||
|
# Account for right shift introduced by vrcp
|
||||||
|
vmudn va_f, va_f, K2
|
||||||
|
|
||||||
|
# p1 - p0
|
||||||
|
vsubc vint_f, vpos_f
|
||||||
|
vsub vint_i, vpos_i
|
||||||
|
# attr1 - attr0
|
||||||
|
vsubc vattr1, vattr0
|
||||||
|
|
||||||
|
# Result of linear interpolation:
|
||||||
|
# p0 + a * (p1 - p0)
|
||||||
|
vmudl v__, vint_f, va_f.e0
|
||||||
|
vmadm v__, vint_i, va_f.e0
|
||||||
|
vmadn vint_f, vpos_f, K1
|
||||||
|
vmadh vint_i, vpos_i, K1
|
||||||
|
|
||||||
|
# a * (attr1 - attr0)
|
||||||
|
vmudm vattr1, vattr1, va_f.e0
|
||||||
|
|
||||||
|
# attr0 + a * (attr1 - attr0)
|
||||||
|
vaddc vattr0, vattr1
|
||||||
|
|
||||||
|
# Store results
|
||||||
|
sdv vint_i.e0, SCREEN_VTX_CS_POSi,intersection
|
||||||
|
sdv vint_f.e0, SCREEN_VTX_CS_POSf,intersection
|
||||||
|
suv vattr0.e0, SCREEN_VTX_RGBA ,intersection
|
||||||
|
jal GL_CalcClipCodes
|
||||||
|
slv vattr0.e4, SCREEN_VTX_S_T ,intersection
|
||||||
|
|
||||||
|
# Add intersection to the output list
|
||||||
|
add t0, out_list, out_count
|
||||||
|
sh intersection, 0(t0)
|
||||||
|
addi out_count, 2
|
||||||
|
|
||||||
|
gl_clip_no_intersection:
|
||||||
|
# If cur_vtx is inside, add it to the output list
|
||||||
|
bnez cur_flag, gl_clip_no_current
|
||||||
|
add t0, out_list, out_count
|
||||||
|
sh cur_vtx, 0(t0)
|
||||||
|
b gl_clip_edge_loop_end
|
||||||
|
addi out_count, 2
|
||||||
|
|
||||||
|
#undef cur_flag
|
||||||
|
|
||||||
|
gl_clip_no_current:
|
||||||
|
# Check if the vertex is stored in the clip cache
|
||||||
|
lbu t0, SCREEN_VTX_PADDING(cur_vtx)
|
||||||
|
beqz t0, gl_clip_edge_loop_end
|
||||||
|
# Reset the padding field to zero, so the screen space values won't be recalculated below
|
||||||
|
sb zero, SCREEN_VTX_PADDING(cur_vtx)
|
||||||
|
# If so, mark it as unused
|
||||||
|
sh zero, %lo(RDPQ_CMD_STAGING)-2(t0)
|
||||||
|
|
||||||
|
gl_clip_edge_loop_end:
|
||||||
|
# Advance to the next edge
|
||||||
|
addi cur_ptr, 2
|
||||||
|
blt cur_ptr, in_end, gl_clip_edge_loop
|
||||||
|
addi prev_ptr, cur_ptr, -2
|
||||||
|
|
||||||
|
gl_clip_plane_loop_end:
|
||||||
|
# Advance to the next clipping plane
|
||||||
|
sll plane_flag, 1
|
||||||
|
blt plane_flag, (1<<CLIPPING_PLANE_COUNT), gl_clip_plane_loop
|
||||||
|
addi plane, CLIPPING_PLANE_SIZE
|
||||||
|
|
||||||
|
#define cache_vtx s3
|
||||||
|
#define cache_end s5
|
||||||
|
|
||||||
|
# Calculate screen space values for new vertices (in the clip cache)
|
||||||
|
# TODO: maybe iterate over out_list instead
|
||||||
|
li cache_vtx, %lo(CLIP_CACHE)
|
||||||
|
li cache_end, %lo(CLIP_CACHE_END) - SCREEN_VTX_SIZE
|
||||||
|
gl_clip_finalize_loop:
|
||||||
|
lbu t0, SCREEN_VTX_PADDING(cache_vtx)
|
||||||
|
neg t0
|
||||||
|
|
||||||
|
# Only calculate screen space values if the vertex is actually used
|
||||||
|
ldv vint_i, SCREEN_VTX_CS_POSi,cache_vtx
|
||||||
|
bltzal t0, GL_CalcScreenSpace
|
||||||
|
ldv vint_f, SCREEN_VTX_CS_POSf,cache_vtx
|
||||||
|
|
||||||
|
blt cache_vtx, cache_end, gl_clip_finalize_loop
|
||||||
|
addi cache_vtx, SCREEN_VTX_SIZE
|
||||||
|
|
||||||
|
gl_clip_return:
|
||||||
|
# Done!
|
||||||
|
jr ra2
|
||||||
|
add s2, out_list, out_count
|
||||||
|
|
||||||
|
#undef cache_vtx
|
||||||
|
#undef cache_end
|
||||||
|
#undef clip_flags
|
||||||
|
#undef plane_flag
|
||||||
|
#undef in_count
|
||||||
|
#undef out_count
|
||||||
|
#undef in_end
|
||||||
|
#undef intersection
|
||||||
|
#undef in_list
|
||||||
|
#undef out_list
|
||||||
|
#undef plane
|
||||||
|
#undef cur_ptr
|
||||||
|
#undef prev_ptr
|
||||||
|
#undef cur_vtx
|
||||||
|
#undef prev_vtx
|
||||||
|
#undef p0
|
||||||
|
#undef p1
|
||||||
|
#undef vtx1
|
||||||
|
#undef vtx2
|
||||||
|
#undef vtx3
|
||||||
|
#undef vplane
|
||||||
|
#undef vpos_i
|
||||||
|
#undef vpos_f
|
||||||
|
#undef vdot_i
|
||||||
|
#undef vdot_f
|
||||||
|
#undef vdiff_i
|
||||||
|
#undef vdiff_f
|
||||||
|
#undef va_f
|
||||||
|
#undef vint_i
|
||||||
|
#undef vint_f
|
||||||
|
#undef vattr0
|
||||||
|
#undef vattr1
|
||||||
|
#undef v__
|
||||||
|
|
||||||
|
.endfunc
|
@ -5,23 +5,34 @@
|
|||||||
#include "Logger.h"
|
#include "Logger.h"
|
||||||
#include "Window.h"
|
#include "Window.h"
|
||||||
#include <libdragon.h>
|
#include <libdragon.h>
|
||||||
#include <GL/gl.h>
|
|
||||||
#include <GL/gl_integration.h>
|
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
|
#include <rspq_profile.h>
|
||||||
typedef void (*GL_SetupVBFunc)(void);
|
#include "../misc/n64/gpu.c"
|
||||||
static GL_SetupVBFunc gfx_setupVBFunc;
|
|
||||||
|
|
||||||
|
|
||||||
/*########################################################################################################################*
|
/*########################################################################################################################*
|
||||||
*---------------------------------------------------------General---------------------------------------------------------*
|
*---------------------------------------------------------General---------------------------------------------------------*
|
||||||
*#########################################################################################################################*/
|
*#########################################################################################################################*/
|
||||||
static surface_t zbuffer;
|
static surface_t zbuffer;
|
||||||
|
static GfxResourceID white_square;
|
||||||
|
|
||||||
void Gfx_Create(void) {
|
void Gfx_Create(void) {
|
||||||
gl_init();
|
rspq_init();
|
||||||
|
//rspq_profile_start();
|
||||||
|
rdpq_init();
|
||||||
//rdpq_debug_start(); // TODO debug
|
//rdpq_debug_start(); // TODO debug
|
||||||
//rdpq_debug_log(true);
|
//rdpq_debug_log(true);
|
||||||
|
|
||||||
|
rdpq_set_mode_standard();
|
||||||
|
__rdpq_mode_change_som(SOM_TEXTURE_PERSP, SOM_TEXTURE_PERSP);
|
||||||
|
__rdpq_mode_change_som(SOM_ZMODE_MASK, SOM_ZMODE_OPAQUE);
|
||||||
|
rdpq_mode_dithering(DITHER_SQUARE_SQUARE);
|
||||||
|
|
||||||
|
gpu_init();
|
||||||
|
|
||||||
|
// Set alpha compare threshold
|
||||||
|
gpu_push_rdp(RDP_CMD_SYNC_PIPE, 0);
|
||||||
|
gpu_push_rdp(RDP_CMD_SET_BLEND_COLOR, (0 << 24) | (0 << 16) | (0 << 8) | 127);
|
||||||
|
|
||||||
zbuffer = surface_alloc(FMT_RGBA16, display_get_width(), display_get_height());
|
zbuffer = surface_alloc(FMT_RGBA16, display_get_width(), display_get_height());
|
||||||
|
|
||||||
Gfx.MaxTexWidth = 256;
|
Gfx.MaxTexWidth = 256;
|
||||||
@ -36,6 +47,9 @@ void Gfx_Create(void) {
|
|||||||
|
|
||||||
Gfx.SupportsNonPowTwoTextures = true;
|
Gfx.SupportsNonPowTwoTextures = true;
|
||||||
Gfx_RestoreState();
|
Gfx_RestoreState();
|
||||||
|
|
||||||
|
Gfx_SetFaceCulling(false);
|
||||||
|
Gfx_SetViewport(0, 0, Game.Width, Game.Height);
|
||||||
}
|
}
|
||||||
|
|
||||||
cc_bool Gfx_TryRestoreContext(void) {
|
cc_bool Gfx_TryRestoreContext(void) {
|
||||||
@ -44,11 +58,9 @@ cc_bool Gfx_TryRestoreContext(void) {
|
|||||||
|
|
||||||
void Gfx_Free(void) {
|
void Gfx_Free(void) {
|
||||||
Gfx_FreeState();
|
Gfx_FreeState();
|
||||||
gl_close();
|
gpu_close();
|
||||||
}
|
}
|
||||||
|
|
||||||
#define gl_Toggle(cap) if (enabled) { glEnable(cap); } else { glDisable(cap); }
|
|
||||||
|
|
||||||
|
|
||||||
/*########################################################################################################################*
|
/*########################################################################################################################*
|
||||||
*-----------------------------------------------------------Misc----------------------------------------------------------*
|
*-----------------------------------------------------------Misc----------------------------------------------------------*
|
||||||
@ -73,21 +85,17 @@ void Gfx_SetVSync(cc_bool vsync) {
|
|||||||
void Gfx_OnWindowResize(void) { }
|
void Gfx_OnWindowResize(void) { }
|
||||||
|
|
||||||
void Gfx_SetViewport(int x, int y, int w, int h) {
|
void Gfx_SetViewport(int x, int y, int w, int h) {
|
||||||
glViewport(x, Game.Height - h - y, w, h);
|
gpuViewport(x, y, w, h);
|
||||||
}
|
|
||||||
void Gfx_SetScissor (int x, int y, int w, int h) {
|
|
||||||
cc_bool enabled = x != 0 || y != 0 || w != Game.Width || h != Game.Height;
|
|
||||||
if (enabled) { glEnable(GL_SCISSOR_TEST); } else { glDisable(GL_SCISSOR_TEST); }
|
|
||||||
|
|
||||||
glScissor(x, Game.Height - h - y, w, h);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Gfx_SetScissor(int x, int y, int w, int h) {
|
||||||
|
rdpq_set_scissor(x, y, x + w, y + h);
|
||||||
|
}
|
||||||
|
|
||||||
void Gfx_BeginFrame(void) {
|
void Gfx_BeginFrame(void) {
|
||||||
surface_t* disp = display_get();
|
surface_t* disp = display_get();
|
||||||
rdpq_attach(disp, &zbuffer);
|
rdpq_attach(disp, &zbuffer);
|
||||||
|
|
||||||
gl_context_begin();
|
|
||||||
Platform_LogConst("GFX ctx beg");
|
Platform_LogConst("GFX ctx beg");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,9 +121,11 @@ void Gfx_ClearColor(PackedCol color) {
|
|||||||
|
|
||||||
void Gfx_EndFrame(void) {
|
void Gfx_EndFrame(void) {
|
||||||
Platform_LogConst("GFX ctx end");
|
Platform_LogConst("GFX ctx end");
|
||||||
gl_context_end();
|
|
||||||
rdpq_detach_show();
|
rdpq_detach_show();
|
||||||
//Platform_LogConst("GFX END");
|
//Platform_LogConst("GFX END");
|
||||||
|
|
||||||
|
//rspq_profile_dump();
|
||||||
|
//rspq_profile_next_frame();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -124,15 +134,33 @@ void Gfx_EndFrame(void) {
|
|||||||
*#########################################################################################################################*/
|
*#########################################################################################################################*/
|
||||||
typedef struct CCTexture {
|
typedef struct CCTexture {
|
||||||
surface_t surface;
|
surface_t surface;
|
||||||
GLuint textureID;
|
rspq_block_t* upload_block;
|
||||||
} CCTexture;
|
} CCTexture;
|
||||||
|
|
||||||
|
void Gfx_BindTexture(GfxResourceID texId) {
|
||||||
|
if (!texId) texId = white_square;
|
||||||
|
CCTexture* tex = (CCTexture*)texId;
|
||||||
|
|
||||||
|
rspq_block_run(tex->upload_block);
|
||||||
|
gpuSetTexSize(tex->surface.width, tex->surface.height);
|
||||||
|
}
|
||||||
|
|
||||||
#define ALIGNUP8(size) (((size) + 7) & ~0x07)
|
#define ALIGNUP8(size) (((size) + 7) & ~0x07)
|
||||||
|
|
||||||
// A8 B8 G8 R8 > A1 B5 G5 B5
|
// A8 B8 G8 R8 > A1 B5 G5 B5
|
||||||
#define To16BitPixel(src) \
|
#define To16BitPixel(src) \
|
||||||
((src & 0x80) >> 7) | ((src & 0xF800) >> 10) | ((src & 0xF80000) >> 13) | ((src & 0xF8000000) >> 16);
|
((src & 0x80) >> 7) | ((src & 0xF800) >> 10) | ((src & 0xF80000) >> 13) | ((src & 0xF8000000) >> 16);
|
||||||
|
|
||||||
|
static void UploadTexture(CCTexture* tex, rdpq_texparms_t* params) {
|
||||||
|
rspq_block_begin();
|
||||||
|
|
||||||
|
rdpq_tex_multi_begin();
|
||||||
|
rdpq_tex_upload(TILE0, &tex->surface, params);
|
||||||
|
rdpq_tex_multi_end();
|
||||||
|
|
||||||
|
tex->upload_block = rspq_block_end();
|
||||||
|
}
|
||||||
|
|
||||||
GfxResourceID Gfx_AllocTexture(struct Bitmap* bmp, int rowWidth, cc_uint8 flags, cc_bool mipmaps) {
|
GfxResourceID Gfx_AllocTexture(struct Bitmap* bmp, int rowWidth, cc_uint8 flags, cc_bool mipmaps) {
|
||||||
cc_bool bit16 = flags & TEXTURE_FLAG_LOWRES;
|
cc_bool bit16 = flags & TEXTURE_FLAG_LOWRES;
|
||||||
// rows are actually 8 byte aligned in TMEM https://github.com/DragonMinded/libdragon/blob/f360fa1bb1fb3ff3d98f4ab58692d40c828636c9/src/rdpq/rdpq_tex.c#L132
|
// rows are actually 8 byte aligned in TMEM https://github.com/DragonMinded/libdragon/blob/f360fa1bb1fb3ff3d98f4ab58692d40c828636c9/src/rdpq/rdpq_tex.c#L132
|
||||||
@ -141,13 +169,6 @@ GfxResourceID Gfx_AllocTexture(struct Bitmap* bmp, int rowWidth, cc_uint8 flags,
|
|||||||
if (pitch * bmp->height > 4096) return 0;
|
if (pitch * bmp->height > 4096) return 0;
|
||||||
|
|
||||||
CCTexture* tex = Mem_Alloc(1, sizeof(CCTexture), "texture");
|
CCTexture* tex = Mem_Alloc(1, sizeof(CCTexture), "texture");
|
||||||
|
|
||||||
glGenTextures(1, &tex->textureID);
|
|
||||||
glBindTexture(GL_TEXTURE_2D, tex->textureID);
|
|
||||||
// NOTE: Enabling these fixes textures, but seems to break on cen64
|
|
||||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, mipmaps ? GL_LINEAR : GL_NEAREST);
|
|
||||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, mipmaps ? GL_LINEAR : GL_NEAREST);
|
|
||||||
|
|
||||||
tex->surface = surface_alloc(bit16 ? FMT_RGBA16 : FMT_RGBA32, bmp->width, bmp->height);
|
tex->surface = surface_alloc(bit16 ? FMT_RGBA16 : FMT_RGBA32, bmp->width, bmp->height);
|
||||||
surface_t* fb = &tex->surface;
|
surface_t* fb = &tex->surface;
|
||||||
|
|
||||||
@ -172,33 +193,17 @@ GfxResourceID Gfx_AllocTexture(struct Bitmap* bmp, int rowWidth, cc_uint8 flags,
|
|||||||
bmp, rowWidth * BITMAPCOLOR_SIZE);
|
bmp, rowWidth * BITMAPCOLOR_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
rdpq_texparms_t params =
|
rdpq_texparms_t params =
|
||||||
{
|
{
|
||||||
.s.repeats = (flags & TEXTURE_FLAG_NONPOW2) ? 1 : REPEAT_INFINITE,
|
.s.repeats = (flags & TEXTURE_FLAG_NONPOW2) ? 1 : REPEAT_INFINITE,
|
||||||
.t.repeats = (flags & TEXTURE_FLAG_NONPOW2) ? 1 : REPEAT_INFINITE,
|
.t.repeats = (flags & TEXTURE_FLAG_NONPOW2) ? 1 : REPEAT_INFINITE,
|
||||||
};
|
};
|
||||||
|
UploadTexture(tex, ¶ms);
|
||||||
// rdpq_tex_upload(TILE0, &tex->surface, ¶ms);
|
|
||||||
glSurfaceTexImageN64(GL_TEXTURE_2D, 0, fb, ¶ms);
|
|
||||||
return tex;
|
return tex;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_BindTexture(GfxResourceID texId) {
|
|
||||||
CCTexture* tex = (CCTexture*)texId;
|
|
||||||
GLuint glID = tex ? tex->textureID : 0;
|
|
||||||
//Platform_Log1("BIND: %i", &glID);
|
|
||||||
|
|
||||||
//rdpq_debug_log(true);
|
|
||||||
glBindTexture(GL_TEXTURE_2D, glID);
|
|
||||||
// rdpq_debug_log(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
void Gfx_UpdateTexture(GfxResourceID texId, int x, int y, struct Bitmap* part, int rowWidth, cc_bool mipmaps) {
|
void Gfx_UpdateTexture(GfxResourceID texId, int x, int y, struct Bitmap* part, int rowWidth, cc_bool mipmaps) {
|
||||||
// TODO: Just memcpying doesn't actually work. maybe due to glSurfaceTexImageN64 caching the RSQ upload block?
|
|
||||||
// TODO: Is there a more optimised approach than just calling glSurfaceTexImageN64
|
|
||||||
CCTexture* tex = (CCTexture*)texId;
|
CCTexture* tex = (CCTexture*)texId;
|
||||||
|
|
||||||
surface_t* fb = &tex->surface;
|
surface_t* fb = &tex->surface;
|
||||||
cc_uint32* src = (cc_uint32*)part->scan0 + x;
|
cc_uint32* src = (cc_uint32*)part->scan0 + x;
|
||||||
cc_uint8* dst = (cc_uint8*)fb->buffer + (x * 4) + (y * fb->stride);
|
cc_uint8* dst = (cc_uint8*)fb->buffer + (x * 4) + (y * fb->stride);
|
||||||
@ -210,21 +215,22 @@ void Gfx_UpdateTexture(GfxResourceID texId, int x, int y, struct Bitmap* part, i
|
|||||||
part->width * 4);
|
part->width * 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
glBindTexture(GL_TEXTURE_2D, tex->textureID);
|
|
||||||
rdpq_texparms_t params = (rdpq_texparms_t){
|
rdpq_texparms_t params = (rdpq_texparms_t){
|
||||||
.s.repeats = REPEAT_INFINITE,
|
.s.repeats = REPEAT_INFINITE,
|
||||||
.t.repeats = REPEAT_INFINITE,
|
.t.repeats = REPEAT_INFINITE,
|
||||||
};
|
};
|
||||||
// rdpq_tex_upload(TILE0, &tex->surface, ¶ms);
|
|
||||||
glSurfaceTexImageN64(GL_TEXTURE_2D, 0, fb, ¶ms);
|
rdpq_call_deferred((void (*)(void*))rspq_block_free, tex->upload_block);
|
||||||
|
UploadTexture(tex, ¶ms);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_DeleteTexture(GfxResourceID* texId) {
|
void Gfx_DeleteTexture(GfxResourceID* texId) {
|
||||||
CCTexture* tex = (CCTexture*)(*texId);
|
CCTexture* tex = (CCTexture*)(*texId);
|
||||||
if (!tex) return;
|
if (!tex) return;
|
||||||
|
|
||||||
glDeleteTextures(1, &tex->textureID);
|
if (tex->upload_block) rdpq_call_deferred((void (*)(void*))rspq_block_free, tex->upload_block);
|
||||||
|
surface_free(&tex->surface);
|
||||||
|
|
||||||
Mem_Free(tex);
|
Mem_Free(tex);
|
||||||
*texId = NULL;
|
*texId = NULL;
|
||||||
}
|
}
|
||||||
@ -236,29 +242,46 @@ void Gfx_DisableMipmaps(void) { }
|
|||||||
/*########################################################################################################################*
|
/*########################################################################################################################*
|
||||||
*-----------------------------------------------------State management----------------------------------------------------*
|
*-----------------------------------------------------State management----------------------------------------------------*
|
||||||
*#########################################################################################################################*/
|
*#########################################################################################################################*/
|
||||||
void Gfx_SetFaceCulling(cc_bool enabled) { gl_Toggle(GL_CULL_FACE); }
|
void Gfx_SetFaceCulling(cc_bool enabled) {
|
||||||
static void SetAlphaBlend(cc_bool enabled) { gl_Toggle(GL_BLEND); }
|
gpuSetCullFace(enabled);
|
||||||
void Gfx_SetAlphaArgBlend(cc_bool enabled) { }
|
|
||||||
|
|
||||||
static void SetColorWrite(cc_bool r, cc_bool g, cc_bool b, cc_bool a) {
|
|
||||||
//glColorMask(r, g, b, a); TODO
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_SetDepthWrite(cc_bool enabled) { glDepthMask(enabled); }
|
static void SetAlphaBlend(cc_bool enabled) {
|
||||||
void Gfx_SetDepthTest(cc_bool enabled) { gl_Toggle(GL_DEPTH_TEST); }
|
rdpq_mode_blender(enabled ? RDPQ_BLENDER_MULTIPLY : 0);
|
||||||
|
__rdpq_mode_change_som(SOM_ZMODE_MASK, enabled ? SOM_ZMODE_TRANSPARENT : SOM_ZMODE_OPAQUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Gfx_SetAlphaArgBlend(cc_bool enabled) { }
|
||||||
|
|
||||||
|
static void SetAlphaTest(cc_bool enabled) {
|
||||||
|
__rdpq_mode_change_som(SOM_ALPHACOMPARE_MASK, enabled ? SOM_ALPHACOMPARE_THRESHOLD : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void SetColorWrite(cc_bool r, cc_bool g, cc_bool b, cc_bool a) {
|
||||||
|
//gpuColorMask(r, g, b, a); TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
void Gfx_SetDepthWrite(cc_bool enabled) {
|
||||||
|
__rdpq_mode_change_som(SOM_Z_WRITE, enabled ? SOM_Z_WRITE : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Gfx_SetDepthTest(cc_bool enabled) {
|
||||||
|
__rdpq_mode_change_som(SOM_Z_COMPARE, enabled ? SOM_Z_COMPARE : 0);
|
||||||
|
|
||||||
|
gpu_attr_z = enabled;
|
||||||
|
gpuUpdateFormat();
|
||||||
|
}
|
||||||
|
|
||||||
static void Gfx_FreeState(void) { FreeDefaultResources(); }
|
static void Gfx_FreeState(void) { FreeDefaultResources(); }
|
||||||
static void Gfx_RestoreState(void) {
|
static void Gfx_RestoreState(void) {
|
||||||
InitDefaultResources();
|
InitDefaultResources();
|
||||||
glEnableClientState(GL_VERTEX_ARRAY);
|
|
||||||
glEnableClientState(GL_COLOR_ARRAY);
|
|
||||||
gfx_format = -1;
|
gfx_format = -1;
|
||||||
|
|
||||||
glHint(GL_FOG_HINT, GL_NICEST);
|
// 1x1 dummy white texture
|
||||||
glAlphaFunc(GL_GREATER, 0.5f);
|
struct Bitmap bmp;
|
||||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);
|
BitmapCol pixels[1] = { BITMAPCOLOR_WHITE };
|
||||||
glDepthFunc(GL_LESS);
|
Bitmap_Init(bmp, 1, 1, pixels);
|
||||||
//glEnable(GL_RDPQ_TEXTURING_N64);
|
white_square = Gfx_CreateTexture(&bmp, 0, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
cc_bool Gfx_WarnIfNecessary(void) { return false; }
|
cc_bool Gfx_WarnIfNecessary(void) { return false; }
|
||||||
@ -348,8 +371,8 @@ static rspq_block_t* VB_GetCached(struct VertexBuffer* vb, int offset, int count
|
|||||||
if (vb->cache.blocks[i]) continue;
|
if (vb->cache.blocks[i]) continue;
|
||||||
|
|
||||||
rspq_block_begin();
|
rspq_block_begin();
|
||||||
gfx_setupVBFunc();
|
gpu_pointer = gfx_vb->vertices;
|
||||||
glDrawArrays(GL_QUADS, offset, count);
|
gpuDrawArrays(offset, count);
|
||||||
rspq_block_t* block = rspq_block_end();
|
rspq_block_t* block = rspq_block_end();
|
||||||
|
|
||||||
vb->cache.blocks[i] = block;
|
vb->cache.blocks[i] = block;
|
||||||
@ -435,80 +458,64 @@ void Gfx_SetFogEnd(float value) {
|
|||||||
void Gfx_SetFogMode(FogFunc func) {
|
void Gfx_SetFogMode(FogFunc func) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static void SetAlphaTest(cc_bool enabled) {
|
|
||||||
if (enabled) { glEnable(GL_ALPHA_TEST); } else { glDisable(GL_ALPHA_TEST); }
|
|
||||||
}
|
|
||||||
|
|
||||||
void Gfx_DepthOnlyRendering(cc_bool depthOnly) {
|
void Gfx_DepthOnlyRendering(cc_bool depthOnly) {
|
||||||
depthOnlyRendering = depthOnly; // TODO: Better approach? maybe using glBlendFunc instead?
|
depthOnlyRendering = depthOnly; // TODO: Better approach? maybe using glBlendFunc instead?
|
||||||
cc_bool enabled = !depthOnly;
|
cc_bool enabled = !depthOnly;
|
||||||
|
|
||||||
//SetColorWrite(enabled & gfx_colorMask[0], enabled & gfx_colorMask[1],
|
//SetColorWrite(enabled & gfx_colorMask[0], enabled & gfx_colorMask[1],
|
||||||
// enabled & gfx_colorMask[2], enabled & gfx_colorMask[3]);
|
// enabled & gfx_colorMask[2], enabled & gfx_colorMask[3]);
|
||||||
if (enabled) { glEnable(GL_TEXTURE_2D); } else { glDisable(GL_TEXTURE_2D); }
|
gpu_attr_tex = enabled;
|
||||||
|
gpuUpdateFormat();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*########################################################################################################################*
|
/*########################################################################################################################*
|
||||||
*---------------------------------------------------------Matrices--------------------------------------------------------*
|
*---------------------------------------------------------Matrices--------------------------------------------------------*
|
||||||
*#########################################################################################################################*/
|
*#########################################################################################################################*/
|
||||||
static GLenum matrix_modes[3] = { GL_PROJECTION, GL_MODELVIEW, GL_TEXTURE };
|
static struct Matrix _view, _proj;
|
||||||
static int lastMatrix;
|
|
||||||
|
|
||||||
void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) {
|
void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) {
|
||||||
if (type != lastMatrix) { lastMatrix = type; glMatrixMode(matrix_modes[type]); }
|
if (type == MATRIX_VIEW) _view = *matrix;
|
||||||
|
if (type == MATRIX_PROJ) _proj = *matrix;
|
||||||
|
|
||||||
if (matrix == &Matrix_Identity) {
|
struct Matrix mvp __attribute__((aligned(64)));
|
||||||
glLoadIdentity();
|
Matrix_Mul(&mvp, &_view, &_proj);
|
||||||
} else {
|
gpuLoadMatrix((const float*)&mvp);
|
||||||
glLoadMatrixf((const float*)matrix);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_LoadMVP(const struct Matrix* view, const struct Matrix* proj, struct Matrix* mvp) {
|
void Gfx_LoadMVP(const struct Matrix* view, const struct Matrix* proj, struct Matrix* mvp) {
|
||||||
Gfx_LoadMatrix(MATRIX_VIEW, view);
|
_proj = *proj;
|
||||||
Gfx_LoadMatrix(MATRIX_PROJ, proj);
|
_view = *view;
|
||||||
|
|
||||||
Matrix_Mul(mvp, view, proj);
|
Matrix_Mul(mvp, view, proj);
|
||||||
|
gpuLoadMatrix((const float*)mvp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct Matrix texMatrix = Matrix_IdentityValue;
|
|
||||||
void Gfx_EnableTextureOffset(float x, float y) {
|
void Gfx_EnableTextureOffset(float x, float y) {
|
||||||
texMatrix.row4.x = x; texMatrix.row4.y = y;
|
// TODO
|
||||||
Gfx_LoadMatrix(2, &texMatrix);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_DisableTextureOffset(void) { Gfx_LoadMatrix(2, &Matrix_Identity); }
|
void Gfx_DisableTextureOffset(void) { }
|
||||||
|
|
||||||
|
|
||||||
/*########################################################################################################################*
|
/*########################################################################################################################*
|
||||||
*--------------------------------------------------------Rendering--------------------------------------------------------*
|
*--------------------------------------------------------Rendering--------------------------------------------------------*
|
||||||
*#########################################################################################################################*/
|
*#########################################################################################################################*/
|
||||||
static void GL_SetupVbColoured(void) {
|
|
||||||
glVertexPointer(3, GL_FLOAT, SIZEOF_VERTEX_COLOURED, (void*)(gfx_vb->vertices + 0));
|
|
||||||
glColorPointer(4, GL_UNSIGNED_BYTE, SIZEOF_VERTEX_COLOURED, (void*)(gfx_vb->vertices + 12));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void GL_SetupVbTextured(void) {
|
|
||||||
glVertexPointer(3, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 0));
|
|
||||||
glColorPointer(4, GL_UNSIGNED_BYTE, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 12));
|
|
||||||
glTexCoordPointer(2, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 16));
|
|
||||||
}
|
|
||||||
|
|
||||||
void Gfx_SetVertexFormat(VertexFormat fmt) {
|
void Gfx_SetVertexFormat(VertexFormat fmt) {
|
||||||
if (fmt == gfx_format) return;
|
if (fmt == gfx_format) return;
|
||||||
gfx_format = fmt;
|
gfx_format = fmt;
|
||||||
gfx_stride = strideSizes[fmt];
|
gfx_stride = strideSizes[fmt];
|
||||||
|
gpu_stride = gfx_stride;
|
||||||
|
|
||||||
if (fmt == VERTEX_FORMAT_TEXTURED) {
|
if (fmt == VERTEX_FORMAT_TEXTURED) {
|
||||||
glEnableClientState(GL_TEXTURE_COORD_ARRAY);
|
rdpq_mode_combiner(RDPQ_COMBINER_TEX_SHADE);
|
||||||
glEnable(GL_TEXTURE_2D);
|
|
||||||
|
|
||||||
gfx_setupVBFunc = GL_SetupVbTextured;
|
|
||||||
} else {
|
} else {
|
||||||
glDisableClientState(GL_TEXTURE_COORD_ARRAY);
|
rdpq_mode_combiner(RDPQ_COMBINER_SHADE);
|
||||||
glDisable(GL_TEXTURE_2D);
|
|
||||||
|
|
||||||
gfx_setupVBFunc = GL_SetupVbColoured;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
gpu_texturing = fmt == VERTEX_FORMAT_TEXTURED;
|
||||||
|
gpu_attr_tex = gpu_texturing;
|
||||||
|
gpuUpdateFormat();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_DrawVb_Lines(int verticesCount) {
|
void Gfx_DrawVb_Lines(int verticesCount) {
|
||||||
@ -520,8 +527,8 @@ void Gfx_DrawVb_IndexedTris_Range(int verticesCount, int startVertex, DrawHints
|
|||||||
if (block) {
|
if (block) {
|
||||||
rspq_block_run(block);
|
rspq_block_run(block);
|
||||||
} else {
|
} else {
|
||||||
gfx_setupVBFunc();
|
gpu_pointer = gfx_vb->vertices;
|
||||||
glDrawArrays(GL_QUADS, startVertex, verticesCount);
|
gpuDrawArrays(startVertex, verticesCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -531,8 +538,8 @@ void Gfx_DrawVb_IndexedTris(int verticesCount) {
|
|||||||
if (block) {
|
if (block) {
|
||||||
rspq_block_run(block);
|
rspq_block_run(block);
|
||||||
} else {
|
} else {
|
||||||
gfx_setupVBFunc();
|
gpu_pointer = gfx_vb->vertices;
|
||||||
glDrawArrays(GL_QUADS, 0, verticesCount);
|
gpuDrawArrays(0, verticesCount);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -543,10 +550,8 @@ void Gfx_DrawIndexedTris_T2fC4b(int verticesCount, int startVertex) {
|
|||||||
if (block) {
|
if (block) {
|
||||||
rspq_block_run(block);
|
rspq_block_run(block);
|
||||||
} else {
|
} else {
|
||||||
glVertexPointer(3, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices));
|
gpu_pointer = gfx_vb->vertices;
|
||||||
glColorPointer(4, GL_UNSIGNED_BYTE, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 12));
|
gpuDrawArrays(startVertex, verticesCount);
|
||||||
glTexCoordPointer(2, GL_FLOAT, SIZEOF_VERTEX_TEXTURED, (void*)(gfx_vb->vertices + 16));
|
|
||||||
glDrawArrays(GL_QUADS, startVertex, verticesCount);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
Loading…
x
Reference in New Issue
Block a user