mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-18 12:05:14 -04:00
PS2: Optimise vertex transform further
This commit is contained in:
parent
8767f31d6e
commit
b09a76ea7e
@ -5,6 +5,8 @@
|
|||||||
# vf3 = mvp.row3
|
# vf3 = mvp.row3
|
||||||
# vf4 = mvp.row4
|
# vf4 = mvp.row4
|
||||||
# vf5 = clipping scale adjustments to match guardbands
|
# vf5 = clipping scale adjustments to match guardbands
|
||||||
|
# vf6 = viewport origin
|
||||||
|
# vf7 = viewport scale
|
||||||
# NOTE: vclipw.xyz takes 4 cycles to produce result, which must be accounted for
|
# NOTE: vclipw.xyz takes 4 cycles to produce result, which must be accounted for
|
||||||
|
|
||||||
.align 4
|
.align 4
|
||||||
@ -13,10 +15,17 @@
|
|||||||
.type LoadMvpMatrix,%function
|
.type LoadMvpMatrix,%function
|
||||||
.global LoadClipScaleFactors
|
.global LoadClipScaleFactors
|
||||||
.type LoadClipScaleFactors,%function
|
.type LoadClipScaleFactors,%function
|
||||||
|
.global LoadViewportOrigin
|
||||||
|
.type LoadViewportOrigin,%function
|
||||||
|
.global LoadViewportScale
|
||||||
|
.type LoadViewportScale,%function
|
||||||
|
|
||||||
.global TransformTexturedQuad
|
.global TransformTexturedQuad
|
||||||
.type TransformTexturedQuad,%function
|
.type TransformTexturedQuad,%function
|
||||||
.global TransformColouredQuad
|
.global TransformColouredQuad
|
||||||
.type TransformColouredQuad,%function
|
.type TransformColouredQuad,%function
|
||||||
|
.global ViewportTransform
|
||||||
|
.type ViewportTransform,%function
|
||||||
|
|
||||||
# Loads matrix into VU0 registers
|
# Loads matrix into VU0 registers
|
||||||
# $a0 = addresss of mvp
|
# $a0 = addresss of mvp
|
||||||
@ -37,6 +46,22 @@ LoadClipScaleFactors:
|
|||||||
nop
|
nop
|
||||||
|
|
||||||
|
|
||||||
|
# Loads viewport origin into VU0 registers
|
||||||
|
# $a0 = addresss of origin
|
||||||
|
LoadViewportOrigin:
|
||||||
|
lqc2 $vf6, 0x00($a0) # vf6 = origin
|
||||||
|
jr $ra
|
||||||
|
nop
|
||||||
|
|
||||||
|
|
||||||
|
# Loads viewport scale into VU0 registers
|
||||||
|
# $a0 = addresss of scale
|
||||||
|
LoadViewportScale:
|
||||||
|
lqc2 $vf7, 0x00($a0) # vf7 = scale
|
||||||
|
jr $ra
|
||||||
|
nop
|
||||||
|
|
||||||
|
|
||||||
.macro TransformVertex1
|
.macro TransformVertex1
|
||||||
# TRANSFORM VERTEX 1
|
# TRANSFORM VERTEX 1
|
||||||
lqc2 $vf10, 0x00($a2) # IN = tmp
|
lqc2 $vf10, 0x00($a2) # IN = tmp
|
||||||
@ -201,3 +226,16 @@ TransformColouredQuad:
|
|||||||
|
|
||||||
.global ONE_VALUE
|
.global ONE_VALUE
|
||||||
ONE_VALUE: .float 1.0
|
ONE_VALUE: .float 1.0
|
||||||
|
|
||||||
|
|
||||||
|
# $a0 = addresss of src
|
||||||
|
# $a1 = addresss of dst
|
||||||
|
ViewportTransform:
|
||||||
|
lqc2 $vf16, 0x00($a0) # IN = src
|
||||||
|
vmulw $vf17, $vf16, $vf16 # TMP = IN[xyzw] * IN.w (inverse W)
|
||||||
|
vmul $vf18, $vf17, $vf7 # TMP = TMP * viewport_scale
|
||||||
|
vadd $vf19, $vf18, $vf6 # TMP = TMP + viewport_origin
|
||||||
|
vftoi0 $vf19, $vf19 # TMP = int(TMP)
|
||||||
|
sqc2 $vf19, 0x00($a1) # dst = TMP
|
||||||
|
jr $ra
|
||||||
|
nop
|
||||||
|
@ -15,20 +15,21 @@
|
|||||||
#include <draw3d.h>
|
#include <draw3d.h>
|
||||||
#include <malloc.h>
|
#include <malloc.h>
|
||||||
|
|
||||||
|
typedef struct Matrix VU0_MATRIX __attribute__((aligned(16)));
|
||||||
|
typedef struct Vec4 VU0_VECTOR __attribute__((aligned(16)));
|
||||||
|
typedef struct { int x, y, z, w; } VU0_IVECTOR __attribute__((aligned(16)));
|
||||||
|
|
||||||
static void* gfx_vertices;
|
static void* gfx_vertices;
|
||||||
extern framebuffer_t fb_colors[2];
|
extern framebuffer_t fb_colors[2];
|
||||||
extern zbuffer_t fb_depth;
|
extern zbuffer_t fb_depth;
|
||||||
static float vp_hwidth, vp_hheight;
|
static VU0_VECTOR vp_origin, vp_scale;
|
||||||
static float vp_originX, vp_originY;
|
|
||||||
static cc_bool stateDirty, formatDirty;
|
static cc_bool stateDirty, formatDirty;
|
||||||
|
|
||||||
typedef struct Matrix VU0_MATRIX __attribute__((aligned(16)));
|
|
||||||
typedef struct Vec4 VU0_VECTOR __attribute__((aligned(16)));
|
|
||||||
|
|
||||||
static VU0_MATRIX mvp;
|
static VU0_MATRIX mvp;
|
||||||
static VU0_VECTOR clip_scale;
|
|
||||||
extern void LoadMvpMatrix(VU0_MATRIX* matrix);
|
extern void LoadMvpMatrix(VU0_MATRIX* matrix);
|
||||||
extern void LoadClipScaleFactors(VU0_VECTOR* scale);
|
extern void LoadClipScaleFactors(VU0_VECTOR* scale);
|
||||||
|
extern void LoadViewportOrigin(VU0_VECTOR* origin);
|
||||||
|
extern void LoadViewportScale(VU0_VECTOR* scale);
|
||||||
|
|
||||||
// double buffering
|
// double buffering
|
||||||
static packet_t* packets[2];
|
static packet_t* packets[2];
|
||||||
@ -521,18 +522,16 @@ void Gfx_SetVertexFormat(VertexFormat fmt) {
|
|||||||
formatDirty = true;
|
formatDirty = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
//#define VCopy(dst, src) dst.x = vp_hwidth * (1 + src.x / src.w); dst.y = vp_hheight * (1 - src.y / src.w); dst.z = src.z / src.w; dst.w = src.w;
|
extern void ViewportTransform(VU0_VECTOR* src, VU0_IVECTOR* dst);
|
||||||
static xyz_t FinishVertex(VU0_VECTOR* src, float invW) {
|
static xyz_t FinishVertex(VU0_VECTOR* src, float invW) {
|
||||||
float x = vp_hwidth * (src->x * invW);
|
src->w = invW;
|
||||||
float y = vp_hheight * (src->y * invW);
|
VU0_IVECTOR tmp;
|
||||||
float z = src->z * invW;
|
ViewportTransform(src, &tmp);
|
||||||
|
|
||||||
unsigned int maxZ = 1 << (32 - 1); // TODO: half this? or << 24 instead?
|
|
||||||
|
|
||||||
xyz_t xyz;
|
xyz_t xyz;
|
||||||
xyz.x = (short)(x * 16 + vp_originX);
|
xyz.x = (short)tmp.x;
|
||||||
xyz.y = (short)(y * -16 + vp_originY);
|
xyz.y = (short)tmp.y;
|
||||||
xyz.z = (unsigned int)((z + 1.0f) * maxZ);
|
xyz.z = tmp.z;
|
||||||
return xyz;
|
return xyz;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -740,11 +739,11 @@ cc_bool Gfx_WarnIfNecessary(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_BeginFrame(void) {
|
void Gfx_BeginFrame(void) {
|
||||||
Platform_LogConst("--- Frame ---");
|
//Platform_LogConst("--- Frame ---");
|
||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_EndFrame(void) {
|
void Gfx_EndFrame(void) {
|
||||||
Platform_LogConst("--- EF1 ---");
|
//Platform_LogConst("--- EF1 ---");
|
||||||
// Double buffering
|
// Double buffering
|
||||||
graph_set_framebuffer_filtered(fb_colors[context].address,
|
graph_set_framebuffer_filtered(fb_colors[context].address,
|
||||||
fb_colors[context].width,
|
fb_colors[context].width,
|
||||||
@ -757,16 +756,16 @@ void Gfx_EndFrame(void) {
|
|||||||
DMATAG_END(dma_tag, (q - current->data) - 1, 0, 0, 0);
|
DMATAG_END(dma_tag, (q - current->data) - 1, 0, 0, 0);
|
||||||
dma_wait_fast();
|
dma_wait_fast();
|
||||||
dma_channel_send_chain(DMA_CHANNEL_GIF, current->data, q - current->data, 0, 0);
|
dma_channel_send_chain(DMA_CHANNEL_GIF, current->data, q - current->data, 0, 0);
|
||||||
Platform_LogConst("--- EF2 ---");
|
//Platform_LogConst("--- EF2 ---");
|
||||||
|
|
||||||
draw_wait_finish();
|
draw_wait_finish();
|
||||||
Platform_LogConst("--- EF3 ---");
|
//Platform_LogConst("--- EF3 ---");
|
||||||
|
|
||||||
if (gfx_vsync) graph_wait_vsync();
|
if (gfx_vsync) graph_wait_vsync();
|
||||||
|
|
||||||
context ^= 1;
|
context ^= 1;
|
||||||
UpdateContext();
|
UpdateContext();
|
||||||
Platform_LogConst("--- EF4 ---");
|
//Platform_LogConst("--- EF4 ---");
|
||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_SetVSync(cc_bool vsync) {
|
void Gfx_SetVSync(cc_bool vsync) {
|
||||||
@ -779,11 +778,21 @@ void Gfx_OnWindowResize(void) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Gfx_SetViewport(int x, int y, int w, int h) {
|
void Gfx_SetViewport(int x, int y, int w, int h) {
|
||||||
vp_hwidth = w / 2;
|
VU0_VECTOR clip_scale;
|
||||||
vp_hheight = h / 2;
|
unsigned int maxZ = 1 << (24 - 1); // TODO: half this? or << 24 instead?
|
||||||
vp_originX = ftoi4(2048 - (x / 2));
|
|
||||||
vp_originY = -ftoi4(2048 - (y / 2));
|
|
||||||
|
|
||||||
|
vp_origin.x = ftoi4(2048 - (x / 2));
|
||||||
|
vp_origin.y = -ftoi4(2048 - (y / 2));
|
||||||
|
vp_origin.z = maxZ / 2.0f;
|
||||||
|
LoadViewportOrigin(&vp_origin);
|
||||||
|
|
||||||
|
vp_scale.x = 16 * (w / 2);
|
||||||
|
vp_scale.y = -16 * (h / 2);
|
||||||
|
vp_scale.z = maxZ / 2.0f;
|
||||||
|
LoadViewportScale(&vp_scale);
|
||||||
|
|
||||||
|
float hwidth = w / 2;
|
||||||
|
float hheight = h / 2;
|
||||||
// The code below clips to the viewport clip planes
|
// The code below clips to the viewport clip planes
|
||||||
// For e.g. X this is [2048 - vp_width / 2, 2048 + vp_width / 2]
|
// For e.g. X this is [2048 - vp_width / 2, 2048 + vp_width / 2]
|
||||||
// However the guard band itself ranges from 0 to 4096
|
// However the guard band itself ranges from 0 to 4096
|
||||||
@ -801,8 +810,8 @@ void Gfx_SetViewport(int x, int y, int w, int h) {
|
|||||||
// X/W <= 2048 / vp_hwidth
|
// X/W <= 2048 / vp_hwidth
|
||||||
// X * vp_hwidth / 2048 <= W
|
// X * vp_hwidth / 2048 <= W
|
||||||
|
|
||||||
clip_scale.x = vp_hwidth / 2048.0f;
|
clip_scale.x = hwidth / 2048.0f;
|
||||||
clip_scale.y = vp_hheight / 2048.0f;
|
clip_scale.y = hheight / 2048.0f;
|
||||||
clip_scale.z = 1.0f;
|
clip_scale.z = 1.0f;
|
||||||
clip_scale.w = 1.0f;
|
clip_scale.w = 1.0f;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user