diff --git a/misc/dreamcast/VertexDraw.S b/misc/dreamcast/VertexDraw.S index 63a8e4d75..f9ced88fc 100644 --- a/misc/dreamcast/VertexDraw.S +++ b/misc/dreamcast/VertexDraw.S @@ -6,16 +6,6 @@ ! r13 = cur vertex ! r14 = next vertex (prefetch) -!fr12 = VIEWPORT_HWIDTH -!fr13 = VIEWPORT_HHEIGHT -!fr14 = VIEWPORT_X_PLUS_HWIDTH -!fr15 = VIEWPORT_Y_PLUS_HHEIGHT - -#define F_HW fr12 -#define F_HH fr13 -#define F_XP fr14 -#define F_YP fr15 - #define R_VTX r10 #define R_EOL r11 #define REG_CMD_VTX r10 @@ -51,42 +41,8 @@ add #32,r8 ! EX, SQ += 32 .endm - -! Pushes a vertex to the store queue -! CLOBBERS: fr0, fr4, fr5 -! INPUTS: R (vertex) -! OUTPUTS: -! TODO optimise greatly -.macro ViewportTransform R -! INVERSE W CALCULATION - add #28, \R ! EX, \R = &vertex->w - fmov.s @\R,fr0 ! LS, fr0 = vertex->w - fmul fr0,fr0 ! FE, fr0 = fr0 * fr0 - add #-24, \R ! EX, \R = &vertex->x - fsrra fr0 ! FE, fr0 = 1 / sqrt(fr0) -> 1 / vertex->w - -! TRANSFORM X - fmov.s @\R,fr4 ! LS, fr4 = vertex->x - fmov F_XP,fr5 ! LS, fr5 = VIEWPORT_X_PLUS_HWIDTH - fmul F_HW,fr4 ! FE, fr4 = VIEWPORT_HWIDTH * vertex->x - fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (X * F * hwidth) + x_plus_hwidth - fmov.s fr5,@\R ! LS, vertex->x = fr5 - add #4, \R ! EX, \R = &vertex->y - -! TRANSFORM Y - fmov.s @\R,fr4 ! LS, fr4 = vertex->y - fmov F_YP,fr5 ! LS, fr5 = VIEWPORT_Y_PLUS_HHEIGHT - fmul F_HH,fr4 ! FE, fr4 = VIEWPORT_HHEIGHT * vertex->y - fmac fr0,fr4,fr5 ! FE, fr5 = fr0 * fr4 + fr5 -- (Y * F * hheight) + y_plus_hheight - fmov.s fr5,@\R ! LS, vertex->y = fr5 - add #4, \R ! EX, \R = &vertex->z - -! ASSIGN Z - fmov.s fr0,@\R ! LS, vertex->z = fr0 - add #-12, \R ! EX, \R -= 12 (back to start of vertex) -.endm - ! Transforms then pushes a vertex to the store queue +! note: Vertices are assumed as pre viewport transformed already ! CLOBBERS: r2, fr0, fr4, fr5 ! INPUTS: R (vertex), r8 (SQ global) ! OUTPUTS: R, r8 altered @@ -109,24 +65,20 @@ ! TRANSFORM X fmov.s @\R,fr4 ! LS, fr4 = SRC->x - fmov F_XP,fr5 ! LS, fr5 = VIEWPORT_X_PLUS_HWIDTH - fmul F_HW,fr4 ! FE, fr4 = VIEWPORT_HWIDTH * SRC->x + fmul fr0,fr4 ! FE, fr4 = invW * SRC->x mov.l @(20,\R),r2 ! LS, tmp = SRC->bgra mov.l r2,@(20,r8) ! LS, SRC->bgra = tmp - fmac fr0,fr4,fr5 ! FE, fr5 = invW * fr4 + fr5 -- (X * F * hwidth) + x_plus_hwidth add #4, \R ! EX, SRC += 4 - fmov.s fr5,@r8 ! LS, DST->x = fr5 + fmov.s fr4,@r8 ! LS, DST->x = fr4 ! TRANSFORM Y fmov.s @\R,fr4 ! LS, fr4 = SRC->y - fmov F_YP,fr5 ! LS, fr5 = VIEWPORT_Y_PLUS_HHEIGHT - fmul F_HH,fr4 ! FE, fr4 = VIEWPORT_HHEIGHT * SRC->y add #8, r8 ! EX, DST += 8 + fmul fr0,fr4 ! FE, fr4 = invW * SRC->y fmov.s fr0,@r8 ! LS, DST->z = invW - fmac fr0,fr4,fr5 ! FE, fr5 = invW * fr4 + fr5 -- (Y * F * hheight) + y_plus_hheight add #-4, r8 ! EX, DST -= 4 add #-8, \R ! EX, src -= 8 (back to start of vertex) - fmov.s fr5,@r8 ! LS, DST->y = fr5 + fmov.s fr4,@r8 ! LS, DST->y = fr4 add #-8,r8 ! EX, DST -= 8 (back to start of vertex) pref @r8 ! LS, Trigger SQ @@ -561,17 +513,6 @@ _ProcessVertexList: mov.l r13,@-r15 mov.l r14,@-r15 sts.l pr,@-r15 -! STORE FPU REGISTERS - fmov.s F_HW,@-r15 - fmov.s F_HH,@-r15 - fmov.s F_XP,@-r15 - fmov.s F_YP,@-r15 -! VIEWPORT SETUP - mov.l .VP_1,r0 ! LS, &vp - fmov.s @r0+,F_HW ! LS, vp.HWIDTH - fmov.s @r0+,F_HH ! LS, vp.HHEIGHT - fmov.s @r0+,F_XP ! LS, vp.X_PLUS_HWIDTH - fmov.s @r0+,F_YP ! LS, vp.Y_PLUS_HHEIGHT ! REGISTER SETUP mov r4,r14 mov r4,r13 @@ -583,33 +524,11 @@ _ProcessVertexList: bra SUBMIT_LOOP add #-64,r15 -! Handles a non-vertex command +! Submits a PowerVR GPU command DO_CMD: - mov r13,r4 ! r4 = CUR - mov r1,r0 ! r0 = MASK - cmp/eq #35,r0 ! T = MASK == 0x23 - bt.s 9f ! if (T) goto 9; - nop -! PowerVR GPU command PushVertex REG_V0 bra NEXT_ITER nop -! Viewport update command -9: - add #4,r4 - mov.l .VP_1,r2 - ! Load VIEWPORT registers - fmov.s @r4+,F_HW ! VIEWPORT_HWIDTH = src->x - fmov.s @r4+,F_HH ! VIEWPORT_HHEIGHT = src->y - fmov.s @r4+,F_XP ! VIEWPORT_X_PLUS_HWIDTH = src->z - add #16,r2 - fmov.s @r4+,F_YP ! VIEWPORT_Y_PLUS_HHEIGHT = src->u - ! And store to vp global - fmov.s F_YP,@-r2 - fmov.s F_XP,@-r2 - fmov.s F_HH,@-r2 - bra NEXT_ITER - fmov.s F_HW,@-r2 SUBMIT_LOOP: mov.l @r13,r0 ! FLAGS = CUR->flags @@ -645,18 +564,6 @@ NEXT_ITER: mov r14,r13 ! CUR = NEXT add #64,r15 -! VIEWPORT SAVE - mov.l .VP_1,r0 - add #16,r0 - fmov.s F_YP,@-r0 - fmov.s F_XP,@-r0 - fmov.s F_HH,@-r0 - fmov.s F_HW,@-r0 -! RESTORE FPU REGISTERS - fmov.s @r15+,F_YP - fmov.s @r15+,F_XP - fmov.s @r15+,F_HH - fmov.s @r15+,F_HW ! RESTORE CPU REGISTERS lds.l @r15+,pr mov.l @r15+,r14 diff --git a/misc/dreamcast/VertexTransform.S b/misc/dreamcast/VertexTransform.S index 1a36ca965..877798b04 100644 --- a/misc/dreamcast/VertexTransform.S +++ b/misc/dreamcast/VertexTransform.S @@ -48,11 +48,6 @@ !fr5 = y !fr6 = z !fr7 = w -!fr8 = VIEWPORT_HWIDTH -!fr9 = VIEWPORT_HHEIGHT -!fr10 = VIEWPORT_X_PLUS_HWIDTH -!fr11 = VIEWPORT_Y_PLUS_HHEIGHT - !fv4 = XYZW diff --git a/src/Graphics_Dreamcast.c b/src/Graphics_Dreamcast.c index 80b9281da..a2abec4ca 100644 --- a/src/Graphics_Dreamcast.c +++ b/src/Graphics_Dreamcast.c @@ -448,11 +448,21 @@ static matrix_t __attribute__((aligned(32))) _proj, _view; static float textureOffsetX, textureOffsetY; static int textureOffset; +static float vp_scaleX, vp_scaleY, vp_offsetX, vp_offsetY; +static matrix_t __attribute__((aligned(32))) mat_vp; + void Gfx_LoadMatrix(MatrixType type, const struct Matrix* matrix) { if (type == MATRIX_PROJ) memcpy(&_proj, matrix, sizeof(struct Matrix)); if (type == MATRIX_VIEW) memcpy(&_view, matrix, sizeof(struct Matrix)); - - mat_load( &_proj); + + memcpy(&mat_vp, &Matrix_Identity, sizeof(struct Matrix)); + mat_vp[0][0] = vp_scaleX; + mat_vp[1][1] = vp_scaleY; + mat_vp[3][0] = vp_offsetX; + mat_vp[3][1] = vp_offsetY; + + mat_load(&mat_vp); + mat_apply(&_proj); mat_apply(&_view); } @@ -645,13 +655,10 @@ static void PushCommand(void* cmd) { } void Gfx_SetViewport(int x, int y, int w, int h) { - Vertex c; - c.flags = PVR_CMD_USERCLIP | 0x23; - c.x = w * 0.5f; // hwidth - c.y = h * -0.5f; // hheight - c.z = x + w * 0.5f; // x_plus_hwidth - c.u = y + h * 0.5f; // y_plus_hheight - PushCommand(&c); + vp_scaleX = w * 0.5f; // hwidth + vp_scaleY = h * -0.5f; // hheight + vp_offsetX = x + w * 0.5f; // x_plus_hwidth + vp_offsetY = y + h * 0.5f; // y_plus_hheight } void Gfx_SetScissor(int x, int y, int w, int h) { diff --git a/third_party/gldc/src/gldc.h b/third_party/gldc/src/gldc.h index 6d462c18e..8dc9f6e63 100644 --- a/third_party/gldc/src/gldc.h +++ b/third_party/gldc/src/gldc.h @@ -44,15 +44,6 @@ typedef struct { #define GL_FORCE_INLINE static __attribute__((always_inline)) inline -typedef struct { - float hwidth; /* width * 0.5f */ - float hheight; /* height * 0.5f */ - float x_plus_hwidth; - float y_plus_hheight; -} Viewport; - -extern Viewport VIEWPORTS[3]; - typedef struct { //0 GLuint index; diff --git a/third_party/gldc/src/sh4.c b/third_party/gldc/src/sh4.c index 4c27eab27..9cbae7b4e 100644 --- a/third_party/gldc/src/sh4.c +++ b/third_party/gldc/src/sh4.c @@ -8,7 +8,6 @@ #define SQ_BASE_ADDRESS (void*) 0xe0000000 #define PREFETCH(addr) __builtin_prefetch((addr)) -Viewport vp; GL_FORCE_INLINE float _glFastInvert(float x) { return MATH_fsrra(x * x); @@ -17,10 +16,10 @@ GL_FORCE_INLINE float _glFastInvert(float x) { GL_FORCE_INLINE void _glPerspectiveDivideVertex(Vertex* vertex) { const float f = _glFastInvert(vertex->w); - /* Convert to NDC and apply viewport */ - vertex->x = (vertex->x * f * vp.hwidth) + vp.x_plus_hwidth; - vertex->y = (vertex->y * f * vp.hheight) + vp.y_plus_hheight; - vertex->z = f; + /* Convert to NDC (viewport already applied) */ + vertex->x = vertex->x * f; + vertex->y = vertex->y * f; + vertex->z = _glFastInvert(vertex->w); } @@ -377,21 +376,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_ } } -static __attribute__((noinline)) void HandleCommand(Vertex* v) { - if ((v->flags & 0xFF) != 0x23) { - _glPushHeaderOrVertex(v); - return; - } - - vp.hwidth = v->x; - vp.hheight = v->y; - vp.x_plus_hwidth = v->z; - vp.y_plus_hheight = v->u; -} - extern void ProcessVertexList(Vertex* v3, int n, void* sq_addr); void SceneListSubmit(Vertex* v3, int n, int type) { - vp = VIEWPORTS[type]; PVR_SET(SPAN_SORT_CFG, 0x0); //Set PVR DMA registers @@ -414,7 +400,7 @@ void SceneListSubmit(Vertex* v3, int n, int type) { case PVR_CMD_VERTEX: continue; default: - HandleCommand(v3); + _glPushHeaderOrVertex(v3); continue; }; @@ -460,5 +446,4 @@ void SceneListSubmit(Vertex* v3, int n, int type) { } _glFlushBuffer(); - VIEWPORTS[type] = vp; } diff --git a/third_party/gldc/src/state.c b/third_party/gldc/src/state.c index c9068a63f..7481ed92e 100644 --- a/third_party/gldc/src/state.c +++ b/third_party/gldc/src/state.c @@ -26,7 +26,6 @@ GLboolean AUTOSORT_ENABLED; AlignedVector OP_LIST; AlignedVector PT_LIST; AlignedVector TR_LIST; -Viewport VIEWPORTS[3]; void glKosInit() { _glInitTextures();