mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-09 15:28:21 -04:00
PS2: Slightly optimise vertex transform
This commit is contained in:
parent
90643e8077
commit
bd223eb457
@ -1,4 +1,4 @@
|
||||
#include "ViewportTransform.S"
|
||||
#include "VertexTransform.S"
|
||||
.global _DrawColouredQuads
|
||||
.align 4
|
||||
.type _DrawColouredQuads,%function
|
||||
|
@ -1,4 +1,4 @@
|
||||
#include "ViewportTransform.S"
|
||||
#include "VertexTransform.S"
|
||||
.global _DrawTexturedQuads
|
||||
.align 4
|
||||
.type _DrawTexturedQuads,%function
|
||||
|
@ -77,7 +77,7 @@ $(BUILD_DIR)/%.o: third_party/bearssl/src/%.c
|
||||
kos-cc $(CFLAGS) -c $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: misc/dreamcast/%.S
|
||||
kos-cc -c $< -o $@
|
||||
kos-cc $(DEPFLAGS) -c $< -o $@
|
||||
|
||||
# Dependency tracking
|
||||
$(DEPFILES):
|
||||
|
@ -1,10 +1,17 @@
|
||||
ifeq ($(strip $(PS2SDK)),)
|
||||
$(error "PS2SDK must be set in your environment")
|
||||
endif
|
||||
SOURCE_DIRS := src misc/ps2
|
||||
BUILD_DIR = build-ps2
|
||||
|
||||
CFILES := $(wildcard src/*.c)
|
||||
OBJS := $(patsubst src/%.c, $(BUILD_DIR)/%.o, $(CFILES))
|
||||
S_FILES := $(foreach dir,$(SOURCE_DIRS),$(wildcard $(dir)/*.S))
|
||||
C_FILES := $(foreach dir,$(SOURCE_DIRS),$(wildcard $(dir)/*.c))
|
||||
OBJS := $(addprefix $(BUILD_DIR)/, $(notdir $(C_FILES:%.c=%.o) $(S_FILES:%.S=%.o)))
|
||||
|
||||
# Dependency tracking
|
||||
DEPFLAGS = -MT $@ -MMD -MP -MF $(BUILD_DIR)/$*.d
|
||||
DEPFILES := $(OBJS:%.o=%.d)
|
||||
|
||||
IOP_MODS:= DEV9_irx.o NETMAN_irx.o SMAP_irx.o USBD_irx.o BDM_irx.o BDMFS_FATFS_irx.o USBMASS_BD_irx.o USBHDFSD_irx.o USBMOUSE_irx.o USBKBD_irx.o
|
||||
|
||||
EE_BIN = ClassiCube-ps2.elf
|
||||
@ -23,6 +30,8 @@ clean:
|
||||
|
||||
$(BUILD_DIR):
|
||||
mkdir -p $@
|
||||
|
||||
include $(PS2SDK)/samples/Makefile.pref
|
||||
|
||||
# Networking IRX modules
|
||||
$(BUILD_DIR)/DEV9_irx.c: $(PS2SDK)/iop/irx/ps2dev9.irx
|
||||
@ -58,17 +67,27 @@ $(BUILD_DIR)/USBKBD_irx.c: $(PS2SDK)/iop/irx/ps2kbd.irx
|
||||
bin2c $< $@ USBKBD_irx
|
||||
|
||||
|
||||
include $(PS2SDK)/samples/Makefile.pref
|
||||
#---------------------------------------------------------------------------------
|
||||
# executable generation
|
||||
#---------------------------------------------------------------------------------
|
||||
$(EE_BIN): $(EE_OBJS)
|
||||
$(EE_CC) -T$(EE_LINKFILE) -O2 -o $(EE_BIN) $(EE_OBJS) $(EE_LDFLAGS) $(EE_LIBS)
|
||||
|
||||
|
||||
#---------------------------------------------------------------------------------
|
||||
# object generation
|
||||
#---------------------------------------------------------------------------------
|
||||
$(BUILD_DIR)/%.o: src/%.c
|
||||
$(EE_CC) $(EE_CFLAGS) $(EE_INCS) -c $< -o $@
|
||||
$(EE_CC) $(DEPFLAGS) $(EE_CFLAGS) $(EE_INCS) -c $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: $(BUILD_DIR)/%.c # IOP modules
|
||||
$(EE_CC) $(EE_CFLAGS) $(EE_INCS) -c $< -o $@
|
||||
|
||||
$(BUILD_DIR)/%.o: %.S
|
||||
$(EE_CC) $(EE_CFLAGS) $(EE_INCS) -c $< -o $@
|
||||
$(BUILD_DIR)/%.o: misc/ps2/%.S
|
||||
$(EE_CC) $(DEPFLAGS) $(EE_CFLAGS) $(EE_INCS) -c $< -o $@
|
||||
|
||||
$(EE_BIN): $(EE_OBJS)
|
||||
$(EE_CC) -T$(EE_LINKFILE) -O2 -o $(EE_BIN) $(EE_OBJS) $(EE_LDFLAGS) $(EE_LIBS)
|
||||
# Dependency tracking
|
||||
$(DEPFILES):
|
||||
|
||||
include $(wildcard $(DEPFILES))
|
||||
|
||||
|
135
misc/ps2/VertexTransform.S
Normal file
135
misc/ps2/VertexTransform.S
Normal file
@ -0,0 +1,135 @@
|
||||
# REGISTER USAGE
|
||||
# vf0 = hardware coded to (0,0,0,1)
|
||||
# vf1 = mvp.row1
|
||||
# vf2 = mvp.row2
|
||||
# vf3 = mvp.row3
|
||||
# vf4 = mvp.row4
|
||||
# vf5 = clipping scale adjustments to match guardbands
|
||||
# NOTE: vclipw.xyz takes 4 cycles to produce result, which must be accounted for
|
||||
|
||||
.align 4
|
||||
|
||||
.global LoadMvpMatrix
|
||||
.type LoadMvpMatrix,%function
|
||||
.global LoadClipScaleFactors
|
||||
.type LoadClipScaleFactors,%function
|
||||
.global TransformTexturedQuad
|
||||
.type TransformTexturedQuad,%function
|
||||
|
||||
# Loads matrix into VU0 registers
|
||||
# $a0 = addresss of mvp
|
||||
LoadMvpMatrix:
|
||||
lqc2 $vf1, 0x00($a0) # vf1 = mvp.row1
|
||||
lqc2 $vf2, 0x10($a0) # vf2 = mvp.row2
|
||||
lqc2 $vf3, 0x20($a0) # vf3 = mvp.row3
|
||||
lqc2 $vf4, 0x30($a0) # vf4 = mvp.row4
|
||||
jr $ra
|
||||
nop
|
||||
|
||||
|
||||
# Loads clipping scaling factors into VU0 registers
|
||||
# $a0 = addresss of factors
|
||||
LoadClipScaleFactors:
|
||||
lqc2 $vf5, 0x00($a0) # vf5 = factors
|
||||
jr $ra
|
||||
nop
|
||||
|
||||
# Transforms 4 vertices with size of 24 bytes
|
||||
# $a0 = addresss of src vertices
|
||||
# $a1 = addresss of dst vertices
|
||||
# $a2 = address of tmp vertex
|
||||
# $a3 = address of clip flags
|
||||
TransformTexturedQuad:
|
||||
# LOAD 1.0 into W
|
||||
lw $t0,ONE_VALUE # t0 = 1.0f
|
||||
sw $t0,0x0C($a2) # tmp.w = f5
|
||||
|
||||
# LOAD VERTEX 1
|
||||
ld $t0,0x00($a0) # t0 = src[0].x,y
|
||||
sd $t0,0x00($a2) # tmp.x,y = t0
|
||||
lw $t0,0x08($a0) # t0 = src[0].z
|
||||
sw $t0,0x08($a2) # tmp.z = t0
|
||||
|
||||
# TRANSFORM VERTEX 1
|
||||
lqc2 $vf10, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf10 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf11, $vf3, $vf10 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf11, 0x00($a1) # dst[0] = TRANSFORMED(V0)
|
||||
#vmul $vf10, $vf11, $vf5 # TMP = TRANSFORMED(V0) * CLIP_PLANES_ADJUST
|
||||
#vclipw.xyz $vf10, $vf10 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
|
||||
# LOAD VERTEX 2
|
||||
ld $t0,0x18($a0) # t0 = src[1].x,y
|
||||
sd $t0,0x00($a2) # tmp.x,y = t0
|
||||
lw $t0,0x20($a0) # t0 = src[1].z
|
||||
sw $t0,0x08($a2) # tmp.z = t0
|
||||
#cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
#sw $t0,0x00($a3) # clip_flags[0] = t0
|
||||
|
||||
# TRANSFORM VERTEX 2
|
||||
lqc2 $vf12, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf12 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf13, $vf3, $vf12 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf13, 0x10($a1) # dst[1] = TRANSFORMED(V1)
|
||||
#vmul $vf12, $vf13, $vf5 # TMP = TRANSFORMED(V1) * CLIP_PLANES_ADJUST
|
||||
#vclipw.xyz $vf12, $vf12 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
|
||||
# LOAD VERTEX 3
|
||||
ld $t0,0x30($a0) # t0 = src[2].x,y
|
||||
sd $t0,0x00($a2) # tmp.x,y = t0
|
||||
lw $t0,0x38($a0) # t0 = src[2].z
|
||||
sw $t0,0x08($a2) # tmp.z = t0
|
||||
#cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
#sw $t0,0x04($a3) # clip_flags[1] = t0
|
||||
|
||||
# TRANSFORM VERTEX 3
|
||||
lqc2 $vf14, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf14 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf15, $vf3, $vf14 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
sqc2 $vf15, 0x20($a1) # dst[2] = TRANSFORMED(V2)
|
||||
#vmul $vf14, $vf15, $vf5 # TMP = TRANSFORMED(V2) * CLIP_PLANES_ADJUST
|
||||
#vclipw.xyz $vf14, $vf14 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
|
||||
# LOAD VERTEX 4
|
||||
ld $t0,0x48($a0) # t0 = src[3].x,y
|
||||
sd $t0,0x00($a2) # tmp.x,y = t0
|
||||
lw $t0,0x50($a0) # t0 = src[3].z
|
||||
sw $t0,0x08($a2) # tmp.z = t0
|
||||
#cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
#sw $t0,0x08($a3) # clip_flags[2] = t0
|
||||
|
||||
# TRANSFORM VERTEX 4
|
||||
lqc2 $vf16, 0x00($a2) # IN = tmp
|
||||
vmulaw $ACC, $vf4, $vf0 # ACC[xyzw] = mvp.row3[xyzw] * 1.0f; (vf0.w is 1)
|
||||
vmaddax $ACC, $vf1, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row0[xyzw] * IN.x
|
||||
vmadday $ACC, $vf2, $vf16 # ACC[xyzw] = ACC[xyzw] + mvp.row1[xyzw] * IN.y
|
||||
vmaddz $vf17, $vf3, $vf16 # OUT[xyzw] = ACC[xyzw] + mvp.row2[xyzw] * IN.z
|
||||
#vmul $vf16, $vf17, $vf5 # TMP = TRANSFORMED(V3) * CLIP_PLANES_ADJUST
|
||||
#vclipw.xyz $vf16, $vf16 # CLIP_FLAGS.append(CLIP(TMP.xyz, TMP.w))
|
||||
|
||||
# Desired output
|
||||
# dst[0] = V0
|
||||
# dst[1] = V1
|
||||
# dst[2] = V2
|
||||
# dst[3] = V2
|
||||
# dst[4] = V3
|
||||
# dst[5] = V0
|
||||
sqc2 $vf15, 0x30($a1) # dst[3] = TRANSFORMED(V2)
|
||||
sqc2 $vf17, 0x40($a1) # dst[4] = TRANSFORMED(V3)
|
||||
sqc2 $vf11, 0x50($a1) # dst[5] = TRANSFORMED(V0)
|
||||
#vnop # adjust for delay
|
||||
#cfc2 $t0, $18 # t0 = VP0_REGS[CLIP_FLAGS]
|
||||
#sw $t0,0x0C($a3) # clip_flags[3] = t0
|
||||
jr $ra
|
||||
nop
|
||||
|
||||
.align 4
|
||||
|
||||
.global ONE_VALUE
|
||||
ONE_VALUE: .float 1.0
|
@ -435,6 +435,7 @@ typedef struct Matrix VU0_MATRIX __attribute__((aligned(16)));
|
||||
typedef struct Vec4 VU0_VECTOR __attribute__((aligned(16)));
|
||||
|
||||
static VU0_MATRIX mvp;
|
||||
extern void LoadMvpMatrix(VU0_MATRIX* matrix);
|
||||
|
||||
void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) {
|
||||
if (type == MATRIX_VIEW) _view = *matrix;
|
||||
@ -442,14 +443,7 @@ void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) {
|
||||
|
||||
Matrix_Mul(&mvp, &_view, &_proj);
|
||||
// TODO
|
||||
asm __volatile__(
|
||||
"lqc2 $vf1, 0x00(%0) \n" // vf1 = mvp.row1
|
||||
"lqc2 $vf2, 0x10(%0) \n" // vf2 = mvp.row2
|
||||
"lqc2 $vf3, 0x20(%0) \n" // vf3 = mvp.row3
|
||||
"lqc2 $vf4, 0x30(%0) \n" // vf4 = mvp.row4
|
||||
:
|
||||
: "r" (&mvp)
|
||||
);
|
||||
LoadMvpMatrix(&mvp);
|
||||
}
|
||||
|
||||
void Gfx_LoadIdentityMatrix(MatrixType type) {
|
||||
@ -619,6 +613,8 @@ static u64* DrawTexturedTriangle(u64* dw, VU0_VECTOR* coords,
|
||||
return dw;
|
||||
}
|
||||
|
||||
extern void TransformTexturedQuad(void* src, VU0_VECTOR* dst, VU0_VECTOR* tmp, int* clip_flags);
|
||||
|
||||
static void DrawTexturedTriangles(int verticesCount, int startVertex) {
|
||||
struct VertexTextured* v = (struct VertexTextured*)gfx_vertices + startVertex;
|
||||
qword_t* base = q;
|
||||
@ -626,29 +622,22 @@ static void DrawTexturedTriangles(int verticesCount, int startVertex) {
|
||||
u64* dw = (u64*)q;
|
||||
|
||||
unsigned numVerts = 0;
|
||||
VU0_VECTOR V[4];
|
||||
VU0_VECTOR V[6], tmp;
|
||||
int clip[4];
|
||||
|
||||
for (int i = 0; i < verticesCount / 4; i++, v += 4)
|
||||
{
|
||||
TransformVertex(v + 0, &V[0]);
|
||||
TransformVertex(v + 1, &V[1]);
|
||||
TransformVertex(v + 2, &V[2]);
|
||||
TransformVertex(v + 3, &V[3]);
|
||||
TransformTexturedQuad(v, V, &tmp, clip);
|
||||
|
||||
// V0, V1, V2
|
||||
//if (((clip[0] | clip[1] | clip[2]) & 0x3F) == 0) {
|
||||
if (NotClipped(V[0]) && NotClipped(V[1]) && NotClipped(V[2])) {
|
||||
dw = DrawTexturedTriangle(dw, V, v + 0, v + 1, v + 2);
|
||||
numVerts += 3;
|
||||
}
|
||||
|
||||
VU0_VECTOR v0 = V[0];
|
||||
V[0] = V[2];
|
||||
V[1] = V[3];
|
||||
V[2] = v0;
|
||||
|
||||
// V2, V3, V0
|
||||
if (NotClipped(V[0]) && NotClipped(V[1]) && NotClipped(V[2])) {
|
||||
dw = DrawTexturedTriangle(dw, V, v + 2, v + 3, v + 0);
|
||||
//if (((clip[2] | clip[3] | clip[0]) & 0x3F) == 0) {
|
||||
if (NotClipped(V[3]) && NotClipped(V[4]) && NotClipped(V[5])) {
|
||||
dw = DrawTexturedTriangle(dw, V + 3, v + 2, v + 3, v + 0);
|
||||
numVerts += 3;
|
||||
}
|
||||
}
|
||||
@ -821,11 +810,36 @@ void Gfx_OnWindowResize(void) {
|
||||
Gfx_SetScissor( 0, 0, Game.Width, Game.Height);
|
||||
}
|
||||
|
||||
extern void LoadClipScaleFactors(VU0_VECTOR* scale);
|
||||
void Gfx_SetViewport(int x, int y, int w, int h) {
|
||||
vp_hwidth = w / 2;
|
||||
vp_hheight = h / 2;
|
||||
vp_originX = ftoi4(2048 - (x / 2));
|
||||
vp_originY = -ftoi4(2048 - (y / 2));
|
||||
|
||||
// The code below clips to the viewport clip planes
|
||||
// For e.g. X this is [2048 - vp_width / 2, 2048 + vp_width / 2]
|
||||
// However the guard band itself ranges from 0 to 4096
|
||||
// To reduce need to clip, clip against guard band on X/Y axes instead
|
||||
/*return
|
||||
xAdj >= -pos.w && xAdj <= pos.w &&
|
||||
yAdj >= -pos.w && yAdj <= pos.w &&
|
||||
pos.z >= -pos.w && pos.z <= pos.w;*/
|
||||
|
||||
// Rescale clip planes to guard band extent:
|
||||
// X/W * vp_hwidth <= vp_hwidth -- clipping against viewport
|
||||
// X/W <= 1
|
||||
// X <= W
|
||||
// X/W * vp_hwidth <= 2048 -- clipping against guard band
|
||||
// X/W <= 2048 / vp_hwidth
|
||||
// X * vp_hwidth / 2048 <= W
|
||||
VU0_VECTOR scale;
|
||||
scale.x = vp_hwidth / 2048.0f;
|
||||
scale.y = vp_hheight / 2048.0f;
|
||||
scale.z = 1.0f;
|
||||
scale.w = 1.0f;
|
||||
|
||||
LoadClipScaleFactors(&scale);
|
||||
}
|
||||
|
||||
void Gfx_SetScissor(int x, int y, int w, int h) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user