diff --git a/misc/ps2/VertexTransform.S b/misc/ps2/VertexTransform.S index 8ae49a2fa..b3448d3f8 100644 --- a/misc/ps2/VertexTransform.S +++ b/misc/ps2/VertexTransform.S @@ -5,6 +5,8 @@ # vf3 = mvp.row3 # vf4 = mvp.row4 # vf5 = clipping scale adjustments to match guardbands +# vf6 = viewport origin +# vf7 = viewport scale # NOTE: vclipw.xyz takes 4 cycles to produce result, which must be accounted for .align 4 @@ -13,10 +15,17 @@ .type LoadMvpMatrix,%function .global LoadClipScaleFactors .type LoadClipScaleFactors,%function +.global LoadViewportOrigin +.type LoadViewportOrigin,%function +.global LoadViewportScale +.type LoadViewportScale,%function + .global TransformTexturedQuad .type TransformTexturedQuad,%function .global TransformColouredQuad .type TransformColouredQuad,%function +.global ViewportTransform +.type ViewportTransform,%function # Loads matrix into VU0 registers # $a0 = addresss of mvp @@ -37,6 +46,22 @@ LoadClipScaleFactors: nop +# Loads viewport origin into VU0 registers +# $a0 = addresss of origin +LoadViewportOrigin: + lqc2 $vf6, 0x00($a0) # vf6 = origin + jr $ra + nop + + +# Loads viewport scale into VU0 registers +# $a0 = addresss of scale +LoadViewportScale: + lqc2 $vf7, 0x00($a0) # vf7 = scale + jr $ra + nop + + .macro TransformVertex1 # TRANSFORM VERTEX 1 lqc2 $vf10, 0x00($a2) # IN = tmp @@ -201,3 +226,16 @@ TransformColouredQuad: .global ONE_VALUE ONE_VALUE: .float 1.0 + + +# $a0 = addresss of src +# $a1 = addresss of dst +ViewportTransform: + lqc2 $vf16, 0x00($a0) # IN = src + vmulw $vf17, $vf16, $vf16 # TMP = IN[xyzw] * IN.w (inverse W) + vmul $vf18, $vf17, $vf7 # TMP = TMP * viewport_scale + vadd $vf19, $vf18, $vf6 # TMP = TMP + viewport_origin + vftoi0 $vf19, $vf19 # TMP = int(TMP) + sqc2 $vf19, 0x00($a1) # dst = TMP + jr $ra + nop diff --git a/src/Graphics_PS2.c b/src/Graphics_PS2.c index b97c29821..26f9d549b 100644 --- a/src/Graphics_PS2.c +++ b/src/Graphics_PS2.c @@ -15,20 +15,21 @@ #include #include +typedef struct Matrix VU0_MATRIX __attribute__((aligned(16))); +typedef struct Vec4 VU0_VECTOR __attribute__((aligned(16))); +typedef struct { int x, y, z, w; } VU0_IVECTOR __attribute__((aligned(16))); + static void* gfx_vertices; extern framebuffer_t fb_colors[2]; extern zbuffer_t fb_depth; -static float vp_hwidth, vp_hheight; -static float vp_originX, vp_originY; +static VU0_VECTOR vp_origin, vp_scale; static cc_bool stateDirty, formatDirty; -typedef struct Matrix VU0_MATRIX __attribute__((aligned(16))); -typedef struct Vec4 VU0_VECTOR __attribute__((aligned(16))); - static VU0_MATRIX mvp; -static VU0_VECTOR clip_scale; extern void LoadMvpMatrix(VU0_MATRIX* matrix); extern void LoadClipScaleFactors(VU0_VECTOR* scale); +extern void LoadViewportOrigin(VU0_VECTOR* origin); +extern void LoadViewportScale(VU0_VECTOR* scale); // double buffering static packet_t* packets[2]; @@ -521,18 +522,16 @@ void Gfx_SetVertexFormat(VertexFormat fmt) { formatDirty = true; } -//#define VCopy(dst, src) dst.x = vp_hwidth * (1 + src.x / src.w); dst.y = vp_hheight * (1 - src.y / src.w); dst.z = src.z / src.w; dst.w = src.w; +extern void ViewportTransform(VU0_VECTOR* src, VU0_IVECTOR* dst); static xyz_t FinishVertex(VU0_VECTOR* src, float invW) { - float x = vp_hwidth * (src->x * invW); - float y = vp_hheight * (src->y * invW); - float z = src->z * invW; - - unsigned int maxZ = 1 << (32 - 1); // TODO: half this? or << 24 instead? - + src->w = invW; + VU0_IVECTOR tmp; + ViewportTransform(src, &tmp); + xyz_t xyz; - xyz.x = (short)(x * 16 + vp_originX); - xyz.y = (short)(y * -16 + vp_originY); - xyz.z = (unsigned int)((z + 1.0f) * maxZ); + xyz.x = (short)tmp.x; + xyz.y = (short)tmp.y; + xyz.z = tmp.z; return xyz; } @@ -740,11 +739,11 @@ cc_bool Gfx_WarnIfNecessary(void) { } void Gfx_BeginFrame(void) { - Platform_LogConst("--- Frame ---"); + //Platform_LogConst("--- Frame ---"); } void Gfx_EndFrame(void) { - Platform_LogConst("--- EF1 ---"); + //Platform_LogConst("--- EF1 ---"); // Double buffering graph_set_framebuffer_filtered(fb_colors[context].address, fb_colors[context].width, @@ -757,16 +756,16 @@ void Gfx_EndFrame(void) { DMATAG_END(dma_tag, (q - current->data) - 1, 0, 0, 0); dma_wait_fast(); dma_channel_send_chain(DMA_CHANNEL_GIF, current->data, q - current->data, 0, 0); - Platform_LogConst("--- EF2 ---"); + //Platform_LogConst("--- EF2 ---"); draw_wait_finish(); - Platform_LogConst("--- EF3 ---"); + //Platform_LogConst("--- EF3 ---"); if (gfx_vsync) graph_wait_vsync(); context ^= 1; UpdateContext(); - Platform_LogConst("--- EF4 ---"); + //Platform_LogConst("--- EF4 ---"); } void Gfx_SetVSync(cc_bool vsync) { @@ -779,11 +778,21 @@ void Gfx_OnWindowResize(void) { } void Gfx_SetViewport(int x, int y, int w, int h) { - vp_hwidth = w / 2; - vp_hheight = h / 2; - vp_originX = ftoi4(2048 - (x / 2)); - vp_originY = -ftoi4(2048 - (y / 2)); + VU0_VECTOR clip_scale; + unsigned int maxZ = 1 << (24 - 1); // TODO: half this? or << 24 instead? + vp_origin.x = ftoi4(2048 - (x / 2)); + vp_origin.y = -ftoi4(2048 - (y / 2)); + vp_origin.z = maxZ / 2.0f; + LoadViewportOrigin(&vp_origin); + + vp_scale.x = 16 * (w / 2); + vp_scale.y = -16 * (h / 2); + vp_scale.z = maxZ / 2.0f; + LoadViewportScale(&vp_scale); + + float hwidth = w / 2; + float hheight = h / 2; // The code below clips to the viewport clip planes // For e.g. X this is [2048 - vp_width / 2, 2048 + vp_width / 2] // However the guard band itself ranges from 0 to 4096 @@ -801,8 +810,8 @@ void Gfx_SetViewport(int x, int y, int w, int h) { // X/W <= 2048 / vp_hwidth // X * vp_hwidth / 2048 <= W - clip_scale.x = vp_hwidth / 2048.0f; - clip_scale.y = vp_hheight / 2048.0f; + clip_scale.x = hwidth / 2048.0f; + clip_scale.y = hheight / 2048.0f; clip_scale.z = 1.0f; clip_scale.w = 1.0f;