diff --git a/src/AudioBackend.c b/src/AudioBackend.c index 71bb1ba62..1f45a13c0 100644 --- a/src/AudioBackend.c +++ b/src/AudioBackend.c @@ -1435,7 +1435,7 @@ cc_result Audio_AllocChunks(cc_uint32 size, struct AudioChunk* chunks, int numCh void* dst = memalign(32, size * numChunks); if (!dst) return ERR_OUT_OF_MEMORY; totalSize += size * numChunks; - Platform_Log3("ALLOC: %i X %i (%i)", &size, &numChunks, &totalSize); + //Platform_Log3("ALLOC: %i X %i (%i)", &size, &numChunks, &totalSize); for (int i = 0; i < numChunks; i++) { diff --git a/src/Graphics_Dreamcast.c b/src/Graphics_Dreamcast.c index 1085bc4c5..58d4d7190 100644 --- a/src/Graphics_Dreamcast.c +++ b/src/Graphics_Dreamcast.c @@ -18,7 +18,7 @@ static cc_bool renderingDisabled; /*########################################################################################################################* *---------------------------------------------------------General---------------------------------------------------------* *#########################################################################################################################*/ -static int InitPowerVR(void) { +static void InitPowerVR(void) { cc_bool autosort = false; // Turn off auto sorting to match traditional GPU behaviour cc_bool fsaa = false; AUTOSORT_ENABLED = autosort; @@ -507,7 +507,7 @@ cc_bool Gfx_WarnIfNecessary(void) { /*########################################################################################################################* *----------------------------------------------------------Drawing--------------------------------------------------------* *#########################################################################################################################*/ -extern void apply_poly_header(pvr_poly_hdr_t* header, PolyList* activePolyList); +extern void apply_poly_header(pvr_poly_hdr_t* header, int list_type); extern Vertex* DrawColouredQuads(const void* src, Vertex* dst, int numQuads); extern Vertex* DrawTexturedQuads(const void* src, Vertex* dst, int numQuads); @@ -522,7 +522,7 @@ void DrawQuads(int count, void* src) { Vertex* beg = aligned_vector_reserve(&output->vector, vec->size + (header_required) + count); if (header_required) { - apply_poly_header((pvr_poly_hdr_t*)beg, output); + apply_poly_header((pvr_poly_hdr_t*)beg, output->list_type); STATE_DIRTY = GL_FALSE; beg++; vec->size += 1; @@ -632,9 +632,13 @@ void Gfx_SetViewport(int x, int y, int w, int h) { } STATE_DIRTY = GL_TRUE; - glViewport(x, y, w, h); glScissor (x, y, w, h); + VIEWPORT.hwidth = w * 0.5f; + VIEWPORT.hheight = h * -0.5f; + VIEWPORT.x_plus_hwidth = x + w * 0.5f; + VIEWPORT.y_plus_hheight = y + h * 0.5f; + VP_COL_HWIDTH = VP_TEX_HWIDTH = w * 0.5f; VP_COL_HHEIGHT = VP_TEX_HHEIGHT = h * -0.5f; diff --git a/src/Platform_PSP.c b/src/Platform_PSP.c index 605113f38..3313953ab 100644 --- a/src/Platform_PSP.c +++ b/src/Platform_PSP.c @@ -327,7 +327,7 @@ cc_result Socket_Create(cc_socket* s, cc_sockaddr* addr, cc_bool nonblocking) { cc_result Socket_Connect(cc_socket s, cc_sockaddr* addr) { struct sockaddr* raw = (struct sockaddr*)addr->data; - int res = sceNetInetConnect(*s, raw, addr->size); + int res = sceNetInetConnect(s, raw, addr->size); return res < 0 ? sceNetInetGetErrno() : 0; } diff --git a/third_party/gldc/src/aligned_vector.h b/third_party/gldc/src/aligned_vector.h index 30dd4ae7e..9ed0f6bf8 100644 --- a/third_party/gldc/src/aligned_vector.h +++ b/third_party/gldc/src/aligned_vector.h @@ -112,14 +112,3 @@ AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const uint32_ return ret; } -AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){ - vector->size = 0; -} - -AV_FORCE_INLINE void aligned_vector_init(AlignedVector* vector) { - /* Now initialize the header*/ - vector->size = 0; - vector->capacity = 0; - vector->data = NULL; -} - diff --git a/third_party/gldc/src/flush.c b/third_party/gldc/src/flush.c deleted file mode 100644 index 5c274136c..000000000 --- a/third_party/gldc/src/flush.c +++ /dev/null @@ -1,52 +0,0 @@ -#include -#include "gldc.h" - -PolyList OP_LIST; -PolyList PT_LIST; -PolyList TR_LIST; - -void glKosInit() { - _glInitContext(); - _glInitTextures(); - - OP_LIST.list_type = PVR_LIST_OP_POLY; - PT_LIST.list_type = PVR_LIST_PT_POLY; - TR_LIST.list_type = PVR_LIST_TR_POLY; - - aligned_vector_init(&OP_LIST.vector); - aligned_vector_init(&PT_LIST.vector); - aligned_vector_init(&TR_LIST.vector); - - aligned_vector_reserve(&OP_LIST.vector, 1024 * 3); - aligned_vector_reserve(&PT_LIST.vector, 512 * 3); - aligned_vector_reserve(&TR_LIST.vector, 1024 * 3); -} - - -void glKosSwapBuffers() { - _glApplyScissor(true); - - pvr_scene_begin(); - if(OP_LIST.vector.size > 2) { - pvr_list_begin(PVR_LIST_OP_POLY); - SceneListSubmit((Vertex*)OP_LIST.vector.data, OP_LIST.vector.size); - pvr_list_finish(); - } - - if(PT_LIST.vector.size > 2) { - pvr_list_begin(PVR_LIST_PT_POLY); - SceneListSubmit((Vertex*)PT_LIST.vector.data, PT_LIST.vector.size); - pvr_list_finish(); - } - - if(TR_LIST.vector.size > 2) { - pvr_list_begin(PVR_LIST_TR_POLY); - SceneListSubmit((Vertex*)TR_LIST.vector.data, TR_LIST.vector.size); - pvr_list_finish(); - } - pvr_scene_finish(); - - OP_LIST.vector.size = 0; - PT_LIST.vector.size = 0; - TR_LIST.vector.size = 0; -} diff --git a/third_party/gldc/src/gldc.h b/third_party/gldc/src/gldc.h index 1c9846a6e..2e2894573 100644 --- a/third_party/gldc/src/gldc.h +++ b/third_party/gldc/src/gldc.h @@ -2,15 +2,10 @@ #define PRIVATE_H #include -#include -#include -#include #include "aligned_vector.h" #define MAX_TEXTURE_COUNT 768 - -#define GL_SCISSOR_TEST 0x0008 #define GL_NEAREST 0x2600 #define GL_LINEAR 0x2601 #define GL_OUT_OF_MEMORY 0x0505 @@ -33,7 +28,6 @@ void gldcBindTexture(GLuint texture); int gldcAllocTexture(int w, int h, int format); void gldcGetTexture(void** data, int* width, int* height); -void glViewport(int x, int y, int width, int height); void glScissor( int x, int y, int width, int height); void glKosInit(); @@ -106,8 +100,6 @@ typedef struct { } __attribute__((aligned(32))) TextureObject; -void _glInitContext(); -void _glInitSubmissionTarget(); void _glInitTextures(); extern TextureObject* TEXTURE_ACTIVE; @@ -152,10 +144,6 @@ void _glApplyScissor(int force); extern GLboolean STATE_DIRTY; -#define MIN(a,b) (((a)<(b))?(a):(b)) -#define MAX(a,b) (((a)>(b))?(a):(b)) -#define CLAMP( X, _MIN, _MAX ) ( (X)<(_MIN) ? (_MIN) : ((X)>(_MAX) ? (_MAX) : (X)) ) - void SceneListSubmit(Vertex* v2, int n); static inline int DimensionFlag(int w) { diff --git a/third_party/gldc/src/sh4_math.h b/third_party/gldc/src/sh4_math.h index c12c30d99..34359f6d2 100644 --- a/third_party/gldc/src/sh4_math.h +++ b/third_party/gldc/src/sh4_math.h @@ -133,9 +133,6 @@ static const ALL_FLOATS_STRUCT MATH_identity_matrix = {1.0f, 0.0f, 0.0f, 0.0f, 0 // a*b+c float MATH_fmac(float a, float b, float c) - // a*b-c - float MATH_fmac_Dec(float a, float b, float c) - // fminf() - return the min of two floats // This doesn't check for NaN float MATH_Fast_Fminf(float a, float b) @@ -205,19 +202,6 @@ static inline __attribute__((always_inline)) float MATH_fmac(float a, float b, f return c; } -// a*b-c -static inline __attribute__((always_inline)) float MATH_fmac_Dec(float a, float b, float c) -{ - asm volatile ("fneg %[floatc]\n\t" - "fmac fr0, %[floatb], %[floatc]\n" - : [floatc] "+&f" (c) // outputs, "+" means r/w, "&" means it's written to before all inputs are consumed - : "w" (a), [floatb] "f" (b) // inputs - : // no clobbers - ); - - return c; -} - // Fast fminf() - return the min of two floats // This doesn't check for NaN static inline __attribute__((always_inline)) float MATH_Fast_Fminf(float a, float b) @@ -833,18 +817,6 @@ static inline __attribute__((always_inline)) _Complex float MATH_fsca_Float_Rad( // Sum of Squares (w^2 + x^2 + y^2 + z^2) float MATH_Sum_of_Squares(float w, float x, float y, float z) - // Cross product with bonus multiply (vec X vec = orthogonal vec, with an extra a*b=c) - RETURN_VECTOR_STRUCT MATH_Cross_Product_with_Mult(float x1, float x2, float x3, float y1, float y2, float y3, float a, float b) - - // Cross product (vec X vec = orthogonal vec) - RETURN_VECTOR_STRUCT MATH_Cross_Product(float x1, float x2, float x3, float y1, float y2, float y3) - - // Outer product (vec (X) vec = 4x4 matrix) - void MATH_Outer_Product(float x1, float x2, float x3, float x4, float y1, float y2, float y3, float y4) - - // Matrix transform (4x4 matrix * 4x1 vec = 4x1 vec) - RETURN_VECTOR_STRUCT MATH_Matrix_Transform(float x1, float x2, float x3, float x4) - // 4x4 Matrix transpose (XMTRX^T) void MATH_Matrix_Transpose(void) @@ -953,434 +925,6 @@ static inline __attribute__((always_inline)) float MATH_Sum_of_Squares(float w, return __z; } -// Cross product: vec X vec = orthogonal vec -// _ _ _ _ _ _ -// | x1 | | y1 | | z1 | -// | x2 | X | y2 | = | z2 | -// |_ x3 _| |_ y3 _| |_ z3 _| -// -// With bonus multiply: -// -// a * b = c -// -// IMPORTANT USAGE INFORMATION (cross product): -// -// Return vector struct maps as below to the above diagram: -// -// typedef struct { -// float z1; -// float z2; -// float z3; -// float z4; // c is stored in z4, and c = a*b if using 'with mult' version (else c = 0) -// } RETURN_VECTOR_STRUCT; -// -// For people familiar with the unit vector notation, z1 == 'i', z2 == 'j', -// and z3 == 'k'. -// -// The cross product matrix will also be stored in XMTRX after this, so calling -// MATH_Matrix_Transform() on a vector after using this function will do a cross -// product with the same x1-x3 values and a multiply with the same 'a' value -// as used in this function. In this a situation, 'a' will be multiplied with -// the x4 parameter of MATH_Matrix_Transform(). a = 0 if not using the 'with mult' -// version of the cross product function. -// -// For reference, XMTRX will look like this: -// -// [ 0 -x3 x2 0 ] -// [ x3 0 -x1 0 ] -// [ -x2 x1 0 0 ] -// [ 0 0 0 a ] (<-- a = 0 if not using 'with mult') -// -// Similarly to how the sine and cosine functions use fsca and return 2 floats, -// the cross product functions actually return 4 floats. The first 3 are the -// cross product output, and the 4th is a*b. The SH4 only multiplies 4x4 -// matrices with 4x1 vectors, which is why the output is like that--but it means -// we also get a bonus float multiplication while we do our cross product! -// - -// Please do not call this function directly (notice the weird syntax); call -// MATH_Cross_Product() or MATH_Cross_Product_with_Mult() instead. -static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT xMATH_do_Cross_Product_with_Mult(float x3, float a, float y3, float b, float x2, float x1, float y1, float y2) -{ - // FR4-FR11 are the regs that are passed in, in that order. - // Just need to make sure GCC doesn't modify anything, and these register vars do that job. - - // Temporary variables are necessary per GCC to avoid clobbering: - // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables - - float tx1 = x1; - float tx2 = x2; - float tx3 = x3; - float ta = a; - - float ty1 = y1; - float ty2 = y2; - float ty3 = y3; - float tb = b; - - register float __x1 __asm__("fr9") = tx1; // need to negate (need to move to fr6, then negate fr9) - register float __x2 __asm__("fr8") = tx2; // in place for matrix (need to move to fr2 then negate fr2) - register float __x3 __asm__("fr4") = tx3; // need to negate (move to fr1 first, then negate fr4) - register float __a __asm__("fr5") = ta; - - register float __y1 __asm__("fr10") = ty1; - register float __y2 __asm__("fr11") = ty2; - register float __y3 __asm__("fr6") = ty3; - register float __b __asm__("fr7") = tb; - - register float __z1 __asm__("fr0") = 0.0f; // z1 - register float __z2 __asm__("fr1") = 0.0f; // z2 (not moving x3 here yet since a double 0 is needed) - register float __z3 __asm__("fr2") = tx2; // z3 (this handles putting x2 in fr2) - register float __c __asm__("fr3") = 0.0f; // c - - // This actually does a matrix transform to do the cross product. - // It's this: - // _ _ _ _ - // [ 0 -x3 x2 0 ] | y1 | | -x3y2 + x2y3 | - // [ x3 0 -x1 0 ] | y2 | = | x3y1 - x1y3 | - // [ -x2 x1 0 0 ] | y3 | | -x2y1 + x1y2 | - // [ 0 0 0 a ] |_ b _| |_ c _| - // - - asm volatile ( - // set up back bank's FV0 - "fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs) - - // Save FR12-FR15, which are supposed to be preserved across functions calls. - // This stops them from getting clobbered and saves 4 stack pushes (memory accesses). - "fmov DR12, XD12\n\t" - "fmov DR14, XD14\n\t" - - "fmov DR10, XD0\n\t" // fmov 'y1' and 'y2' from FR10, FR11 into position (XF0, XF1) - "fmov DR6, XD2\n\t" // fmov 'y3' and 'b' from FR6, FR7 into position (XF2, XF3) - - // pair move zeros for some speed in setting up front bank for matrix - "fmov DR0, DR10\n\t" // clear FR10, FR11 - "fmov DR0, DR12\n\t" // clear FR12, FR13 - "fschg\n\t" // switch back to single moves - // prepare front bank for XMTRX - "fmov FR5, FR15\n\t" // fmov 'a' into position - "fmov FR0, FR14\n\t" // clear out FR14 - "fmov FR0, FR7\n\t" // clear out FR7 - "fmov FR0, FR5\n\t" // clear out FR5 - - "fneg FR2\n\t" // set up 'x2' - "fmov FR9, FR6\n\t" // set up 'x1' - "fneg FR9\n\t" - "fmov FR4, FR1\n\t" // set up 'x3' - "fneg FR4\n\t" - // flip banks and matrix multiply - "frchg\n\t" - "ftrv XMTRX, FV0\n" - : "+&w" (__z1), "+&f" (__z2), "+&f" (__z3), "+&f" (__c) // output (using FV0) - : "f" (__x1), "f" (__x2), "f" (__x3), "f" (__y1), "f" (__y2), "f" (__y3), "f" (__a), "f" (__b) // inputs - : // clobbers (all of the float regs get clobbered, except for FR12-FR15 which were specially preserved) - ); - - RETURN_VECTOR_STRUCT output = {__z1, __z2, __z3, __c}; - return output; -} - -// Please do not call this function directly (notice the weird syntax); call -// MATH_Cross_Product() or MATH_Cross_Product_with_Mult() instead. -static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT xMATH_do_Cross_Product(float x3, float zero, float x1, float y3, float x2, float x1_2, float y1, float y2) -{ - // FR4-FR11 are the regs that are passed in, in that order. - // Just need to make sure GCC doesn't modify anything, and these register vars do that job. - - // Temporary variables are necessary per GCC to avoid clobbering: - // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables - - float tx1 = x1; - float tx2 = x2; - float tx3 = x3; - float tx1_2 = x1_2; - - float ty1 = y1; - float ty2 = y2; - float ty3 = y3; - float tzero = zero; - - register float __x1 __asm__("fr6") = tx1; // in place - register float __x2 __asm__("fr8") = tx2; // in place (fmov to fr2, negate fr2) - register float __x3 __asm__("fr4") = tx3; // need to negate (fmov to fr1, negate fr4) - - register float __zero __asm__("fr5") = tzero; // in place - register float __x1_2 __asm__("fr9") = tx1_2; // need to negate - - register float __y1 __asm__("fr10") = ty1; - register float __y2 __asm__("fr11") = ty2; - // no __y3 needed in this function - - register float __z1 __asm__("fr0") = tzero; // z1 - register float __z2 __asm__("fr1") = tzero; // z2 - register float __z3 __asm__("fr2") = ty3; // z3 - register float __c __asm__("fr3") = tzero; // c - - // This actually does a matrix transform to do the cross product. - // It's this: - // _ _ _ _ - // [ 0 -x3 x2 0 ] | y1 | | -x3y2 + x2y3 | - // [ x3 0 -x1 0 ] | y2 | = | x3y1 - x1y3 | - // [ -x2 x1 0 0 ] | y3 | | -x2y1 + x1y2 | - // [ 0 0 0 0 ] |_ 0 _| |_ 0 _| - // - - asm volatile ( - // zero out FR7. For some reason, if this is done in C after __z3 is set: - // register float __y3 __asm__("fr7") = tzero; - // then GCC will emit a spurious stack push (pushing FR12). So just zero it here. - "fmov FR5, FR7\n\t" - // set up back bank's FV0 - "fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs) - - // Save FR12-FR15, which are supposed to be preserved across functions calls. - // This stops them from getting clobbered and saves 4 stack pushes (memory accesses). - "fmov DR12, XD12\n\t" - "fmov DR14, XD14\n\t" - - "fmov DR10, XD0\n\t" // fmov 'y1' and 'y2' from FR10, FR11 into position (XF0, XF1) - "fmov DR2, XD2\n\t" // fmov 'y3' and '0' from FR2, FR3 into position (XF2, XF3) - - // pair move zeros for some speed in setting up front bank for matrix - "fmov DR0, DR10\n\t" // clear FR10, FR11 - "fmov DR0, DR12\n\t" // clear FR12, FR13 - "fmov DR0, DR14\n\t" // clear FR14, FR15 - "fschg\n\t" // switch back to single moves - // prepare front bank for XMTRX - "fneg FR9\n\t" // set up 'x1' - "fmov FR8, FR2\n\t" // set up 'x2' - "fneg FR2\n\t" - "fmov FR4, FR1\n\t" // set up 'x3' - "fneg FR4\n\t" - // flip banks and matrix multiply - "frchg\n\t" - "ftrv XMTRX, FV0\n" - : "+&w" (__z1), "+&f" (__z2), "+&f" (__z3), "+&f" (__c) // output (using FV0) - : "f" (__x1), "f" (__x2), "f" (__x3), "f" (__y1), "f" (__y2), "f" (__zero), "f" (__x1_2) // inputs - : "fr7" // clobbers (all of the float regs get clobbered, except for FR12-FR15 which were specially preserved) - ); - - RETURN_VECTOR_STRUCT output = {__z1, __z2, __z3, __c}; - return output; -} - -//------------------------------------------------------------------------------ -// Functions that wrap the xMATH_do_Cross_Product[_with_Mult]() functions to make -// it easier to organize parameters -//------------------------------------------------------------------------------ - -// Cross product with a bonus float multiply (c = a * b) -static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT MATH_Cross_Product_with_Mult(float x1, float x2, float x3, float y1, float y2, float y3, float a, float b) -{ - return xMATH_do_Cross_Product_with_Mult(x3, a, y3, b, x2, x1, y1, y2); -} - -// Plain cross product; does not use the bonus float multiply (c = 0 and a in the cross product matrix will be 0) -// This is a tiny bit faster than 'with_mult' (about 2 cycles faster) -static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT MATH_Cross_Product(float x1, float x2, float x3, float y1, float y2, float y3) -{ - return xMATH_do_Cross_Product(x3, 0.0f, x1, y3, x2, x1, y1, y2); -} - -// Outer product: vec (X) vec = matrix -// _ _ -// | x1 | -// | x2 | (X) [ y1 y2 y3 y4 ] = 4x4 matrix -// | x3 | -// |_ x4 _| -// -// This returns the floats in the back bank (XF0-15), which are inaccessible -// outside of using frchg or paired-move fmov. It's meant to set up a matrix for -// use with other matrix functions. GCC also does not touch the XFn bank. -// This will also wipe out anything stored in the float registers, as it uses the -// whole FPU register file (all 32 of the float registers). -static inline __attribute__((always_inline)) void MATH_Outer_Product(float x1, float x2, float x3, float x4, float y1, float y2, float y3, float y4) -{ - // FR4-FR11 are the regs that are passed in, in that order. - // Just need to make sure GCC doesn't modify anything, and these register vars do that job. - - // Temporary variables are necessary per GCC to avoid clobbering: - // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables - - float tx1 = x1; - float tx2 = x2; - float tx3 = x3; - float tx4 = x4; - - float ty1 = y1; - float ty2 = y2; - float ty3 = y3; - float ty4 = y4; - - // vector FV4 - register float __x1 __asm__("fr4") = tx1; - register float __x2 __asm__("fr5") = tx2; - register float __x3 __asm__("fr6") = tx3; - register float __x4 __asm__("fr7") = tx4; - - // vector FV8 - register float __y1 __asm__("fr8") = ty1; - register float __y2 __asm__("fr9") = ty2; - register float __y3 __asm__("fr10") = ty3; // in place already - register float __y4 __asm__("fr11") = ty4; - - // This actually does a 4x4 matrix multiply to do the outer product. - // It's this: - // - // [ x1 x1 x1 x1 ] [ y1 0 0 0 ] [ x1y1 x1y2 x1y3 x1y4 ] - // [ x2 x2 x2 x2 ] [ 0 y2 0 0 ] = [ x2y1 x2y2 x2y3 x2y4 ] - // [ x3 x3 x3 x3 ] [ 0 0 y3 0 ] [ x3y1 x3y2 x3y3 x3y4 ] - // [ x4 x4 x4 x4 ] [ 0 0 0 y4 ] [ x4y1 x4y2 x4y3 x4y4 ] - // - - asm volatile ( - // zero out unoccupied front floats to make a double 0 in DR2 - "fldi0 FR2\n\t" - "fmov FR2, FR3\n\t" - "fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs) - // fmov 'x1' and 'x2' from FR4, FR5 into position (XF0,4,8,12, XF1,5,9,13) - "fmov DR4, XD0\n\t" - "fmov DR4, XD4\n\t" - "fmov DR4, XD8\n\t" - "fmov DR4, XD12\n\t" - // fmov 'x3' and 'x4' from FR6, FR7 into position (XF2,6,10,14, XF3,7,11,15) - "fmov DR6, XD2\n\t" - "fmov DR6, XD6\n\t" - "fmov DR6, XD10\n\t" - "fmov DR6, XD14\n\t" - // set up front floats (y1-y4) - "fmov DR8, DR0\n\t" - "fmov DR8, DR4\n\t" - "fmov DR10, DR14\n\t" - // finish zeroing out front floats - "fmov DR2, DR6\n\t" - "fmov DR2, DR8\n\t" - "fmov DR2, DR12\n\t" - "fschg\n\t" // switch back to single-move mode - // zero out remaining values and matrix multiply 4x4 - "fmov FR2, FR1\n\t" - "ftrv XMTRX, FV0\n\t" - - "fmov FR6, FR4\n\t" - "ftrv XMTRX, FV4\n\t" - - "fmov FR8, FR11\n\t" - "ftrv XMTRX, FV8\n\t" - - "fmov FR12, FR14\n\t" - "ftrv XMTRX, FV12\n\t" - // Save output in XF regs - "frchg\n" - : // no outputs - : "f" (__x1), "f" (__x2), "f" (__x3), "f" (__x4), "f" (__y1), "f" (__y2), "f" (__y3), "f" (__y4) // inputs - : "fr0", "fr1", "fr2", "fr3", "fr12", "fr13", "fr14", "fr15" // clobbers, can't avoid it - ); - // GCC will restore FR12-FR15 from the stack after this, so we really can't keep the output in the front bank. -} - -// Matrix transform: matrix * vector = vector -// _ _ _ _ -// [ ----------- ] | x1 | | z1 | -// [ ---XMTRX--- ] | x2 | = | z2 | -// [ ----------- ] | x3 | | z3 | -// [ ----------- ] |_ x4 _| |_ z4 _| -// -// IMPORTANT USAGE INFORMATION (matrix transform): -// -// Return vector struct maps 1:1 to the above diagram: -// -// typedef struct { -// float z1; -// float z2; -// float z3; -// float z4; -// } RETURN_VECTOR_STRUCT; -// -// Similarly to how the sine and cosine functions use fsca and return 2 floats, -// the matrix transform function actually returns 4 floats. The SH4 only multiplies -// 4x4 matrices with 4x1 vectors, which is why the output is like that. -// -// Multiply a matrix stored in the back bank (XMTRX) with an input vector -static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT MATH_Matrix_Transform(float x1, float x2, float x3, float x4) -{ - // The floats comprising FV4 are the regs that are passed in. - // Just need to make sure GCC doesn't modify anything, and these register vars do that job. - - // Temporary variables are necessary per GCC to avoid clobbering: - // https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables - - float tx1 = x1; - float tx2 = x2; - float tx3 = x3; - float tx4 = x4; - - // output vector FV0 - register float __z1 __asm__("fr0") = tx1; - register float __z2 __asm__("fr1") = tx2; - register float __z3 __asm__("fr2") = tx3; - register float __z4 __asm__("fr3") = tx4; - - asm volatile ("ftrv XMTRX, FV0\n\t" - // have to do this to obey SH4 calling convention--output returned in FV0 - : "+w" (__z1), "+f" (__z2), "+f" (__z3), "+f" (__z4) // outputs, "+" means r/w - : // no inputs - : // no clobbers - ); - - RETURN_VECTOR_STRUCT output = {__z1, __z2, __z3, __z4}; - return output; -} - -// Matrix Transpose -// -// This does a matrix transpose on the matrix in XMTRX, which swaps rows with -// columns as follows (math notation is [XMTRX]^T): -// -// [ a b c d ] T [ a e i m ] -// [ e f g h ] = [ b f j n ] -// [ i j k l ] [ c g k o ] -// [ m n o p ] [ d h l p ] -// -// PLEASE NOTE: It is faster to avoid the need for a transpose altogether by -// structuring matrices and vectors accordingly. -static inline __attribute__((always_inline)) void MATH_Matrix_Transpose(void) -{ - asm volatile ( - "frchg\n\t" // fmov for singles only works on front bank - // FR0, FR5, FR10, and FR15 are already in place - // swap FR1 and FR4 - "flds FR1, FPUL\n\t" - "fmov FR4, FR1\n\t" - "fsts FPUL, FR4\n\t" - // swap FR2 and FR8 - "flds FR2, FPUL\n\t" - "fmov FR8, FR2\n\t" - "fsts FPUL, FR8\n\t" - // swap FR3 and FR12 - "flds FR3, FPUL\n\t" - "fmov FR12, FR3\n\t" - "fsts FPUL, FR12\n\t" - // swap FR6 and FR9 - "flds FR6, FPUL\n\t" - "fmov FR9, FR6\n\t" - "fsts FPUL, FR9\n\t" - // swap FR7 and FR13 - "flds FR7, FPUL\n\t" - "fmov FR13, FR7\n\t" - "fsts FPUL, FR13\n\t" - // swap FR11 and FR14 - "flds FR11, FPUL\n\t" - "fmov FR14, FR11\n\t" - "fsts FPUL, FR14\n\t" - // restore XMTRX to back bank - "frchg\n" - : // no outputs - : // no inputs - : "fpul" // clobbers - ); -} - // Matrix product: matrix * matrix = matrix // // These use the whole dang floating point unit. @@ -1582,73 +1126,14 @@ static inline __attribute__((always_inline)) ALL_FLOATS_STRUCT * MATH_Store_XMTR // /* - //------------------------------------------------------------------------------ - // Commonly useful functions - //------------------------------------------------------------------------------ - - // Returns 1 if point 't' is inside triangle with vertices 'v0', 'v1', and 'v2', and 0 if not - int MATH_Is_Point_In_Triangle(float v0x, float v0y, float v1x, float v1y, float v2x, float v2y, float ptx, float pty) - //------------------------------------------------------------------------------ // Interpolation //------------------------------------------------------------------------------ // Linear interpolation float MATH_Lerp(float a, float b, float t) - - // Speherical interpolation ('theta' in fsca units) - float MATH_Slerp(float a, float b, float t, float theta) - - //------------------------------------------------------------------------------ - // Fast Sinc functions (unnormalized, sin(x)/x version) - //------------------------------------------------------------------------------ - // Just pass in MATH_pi * x for normalized versions :) - - // Sinc function (fsca units) - float MATH_Fast_Sincf(float x) - - // Sinc function (degrees) - float MATH_Fast_Sincf_Deg(float x) - - // Sinc function (rads) - float MATH_Fast_Sincf_Rad(float x) - */ -//------------------------------------------------------------------------------ -// Commonly useful functions -//------------------------------------------------------------------------------ - -// Returns 1 if point 'pt' is inside triangle with vertices 'v0', 'v1', and 'v2', and 0 if not -// Determines triangle center using barycentric coordinate transformation -// Adapted from: https://stackoverflow.com/questions/2049582/how-to-determine-if-a-point-is-in-a-2d-triangle -// Specifically the answer by user 'adreasdr' in addition to the comment by user 'urraka' on the answer from user 'Andreas Brinck' -// -// The notation here assumes v0x is the x-component of v0, v0y is the y-component of v0, etc. -// -static inline __attribute__((always_inline)) int MATH_Is_Point_In_Triangle(float v0x, float v0y, float v1x, float v1y, float v2x, float v2y, float ptx, float pty) -{ - float sdot = MATH_fipr(v0y, -v0x, v2y - v0y, v0x - v2x, v2x, v2y, ptx, pty); - float tdot = MATH_fipr(v0x, -v0y, v0y - v1y, v1x - v0x, v1y, v1x, ptx, pty); - - float areadot = MATH_fipr(-v1y, v0y, v0x, v1x, v2x, -v1x + v2x, v1y - v2y, v2y); - - // 'areadot' could be negative depending on the winding of the triangle - if(areadot < 0.0f) - { - sdot *= -1.0f; - tdot *= -1.0f; - areadot *= -1.0f; - } - - if( (sdot > 0.0f) && (tdot > 0.0f) && (areadot > (sdot + tdot)) ) - { - return 1; - } - - return 0; -} - //------------------------------------------------------------------------------ // Interpolation //------------------------------------------------------------------------------ @@ -1659,123 +1144,6 @@ static inline __attribute__((always_inline)) float MATH_Lerp(float a, float b, f return MATH_fmac(t, (b-a), a); } -// Speherical interpolation ('theta' in fsca units) -static inline __attribute__((always_inline)) float MATH_Slerp(float a, float b, float t, float theta) -{ - // a is an element of v0, b is an element of v1 - // v = ( v0 * sin(theta - t * theta) + v1 * sin(t * theta) ) / sin(theta) - // by using sine/cosine identities and properties, this can be optimized to: - // v = v0 * cos(-t * theta) + ( v0 * ( cos(theta) * sin(-t * theta) ) - sin(-t * theta) * v1 ) / sin(theta) - // which only requires two calls to fsca. - // Specifically, sin(a + b) = sin(a)cos(b) + cos(a)sin(b) & sin(-a) = -sin(a) - - // MATH_fsca_* functions return reverse-ordered complex numbers for speed reasons (i.e. normally sine is the imaginary part) - // This could be made even faster by using MATH_fsca_Int() with 'theta' and 't' as unsigned ints - -#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION - - RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float(theta); - float sine_value_theta = sine_cosine.sine; - float cosine_value_theta = sine_cosine.cosine; - - RETURN_FSCA_STRUCT sine_cosine2 = MATH_fsca_Float(-t * theta); - float sine_value_minus_t_theta = sine_cosine2.sine; - float cosine_value_minus_t_theta = sine_cosine2.cosine; - -#else - - _Complex float sine_cosine = MATH_fsca_Float(theta); - float sine_value_theta = __real__ sine_cosine; - float cosine_value_theta = __imag__ sine_cosine; - - _Complex float sine_cosine2 = MATH_fsca_Float(-t * theta); - float sine_value_minus_t_theta = __real__ sine_cosine2; - float cosine_value_minus_t_theta = __imag__ sine_cosine2; - -#endif - - float numer = a * cosine_value_theta * sine_value_minus_t_theta - sine_value_minus_t_theta * b; - float output_float = a * cosine_value_minus_t_theta + MATH_Fast_Divide(numer, sine_value_theta); - - return output_float; -} - -//------------------------------------------------------------------------------ -// Fast Sinc (unnormalized, sin(x)/x version) -//------------------------------------------------------------------------------ -// -// Just pass in MATH_pi * x for normalized versions :) -// - -// Sinc function (fsca units) -static inline __attribute__((always_inline)) float MATH_Fast_Sincf(float x) -{ - if(x == 0.0f) - { - return 1.0f; - } - -#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION - - RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float(x); - float sine_value = sine_cosine.sine; - -#else - - _Complex float sine_cosine = MATH_fsca_Float(x); - float sine_value = __real__ sine_cosine; - -#endif - - return MATH_Fast_Divide(sine_value, x); -} - -// Sinc function (degrees) -static inline __attribute__((always_inline)) float MATH_Fast_Sincf_Deg(float x) -{ - if(x == 0.0f) - { - return 1.0f; - } - -#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION - - RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Deg(x); - float sine_value = sine_cosine.sine; - -#else - - _Complex float sine_cosine = MATH_fsca_Float_Deg(x); - float sine_value = __real__ sine_cosine; - -#endif - - return MATH_Fast_Divide(sine_value, x); -} - -// Sinc function (rads) -static inline __attribute__((always_inline)) float MATH_Fast_Sincf_Rad(float x) -{ - if(x == 0.0f) - { - return 1.0f; - } - -#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION - - RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Rad(x); - float sine_value = sine_cosine.sine; - -#else - - _Complex float sine_cosine = MATH_fsca_Float_Rad(x); - float sine_value = __real__ sine_cosine; - -#endif - - return MATH_Fast_Divide(sine_value, x); -} - //============================================================================== // Miscellaneous Snippets //============================================================================== diff --git a/third_party/gldc/src/state.c b/third_party/gldc/src/state.c index 74e77248a..c9d6588a4 100644 --- a/third_party/gldc/src/state.c +++ b/third_party/gldc/src/state.c @@ -1,8 +1,14 @@ #include #include #include +#include +#include #include "gldc.h" +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) +#define CLAMP( X, _MIN, _MAX ) ( (X)<(_MIN) ? (_MIN) : ((X)>(_MAX) ? (_MAX) : (X)) ) + GLboolean STATE_DIRTY = GL_TRUE; GLboolean DEPTH_TEST_ENABLED = GL_FALSE; @@ -21,37 +27,67 @@ GLboolean BLEND_ENABLED = GL_FALSE; GLboolean TEXTURES_ENABLED = GL_FALSE; GLboolean AUTOSORT_ENABLED = GL_FALSE; +PolyList OP_LIST; +PolyList PT_LIST; +PolyList TR_LIST; +Viewport VIEWPORT; + static struct { int x; int y; int width; int height; GLboolean applied; -} scissor_rect = {0, 0, 640, 480, false}; +} scissor_rect; -void _glInitContext() { - scissor_rect.x = 0; - scissor_rect.y = 0; +void glKosInit() { scissor_rect.width = vid_mode->width; scissor_rect.height = vid_mode->height; + _glInitTextures(); + + OP_LIST.list_type = PVR_LIST_OP_POLY; + PT_LIST.list_type = PVR_LIST_PT_POLY; + TR_LIST.list_type = PVR_LIST_TR_POLY; + + aligned_vector_reserve(&OP_LIST.vector, 1024 * 3); + aligned_vector_reserve(&PT_LIST.vector, 512 * 3); + aligned_vector_reserve(&TR_LIST.vector, 1024 * 3); +} + +void glKosSwapBuffers() { + _glApplyScissor(true); + + pvr_scene_begin(); + if (OP_LIST.vector.size > 2) { + pvr_list_begin(PVR_LIST_OP_POLY); + SceneListSubmit((Vertex*)OP_LIST.vector.data, OP_LIST.vector.size); + pvr_list_finish(); + } + + if (PT_LIST.vector.size > 2) { + pvr_list_begin(PVR_LIST_PT_POLY); + SceneListSubmit((Vertex*)PT_LIST.vector.data, PT_LIST.vector.size); + pvr_list_finish(); + } + + if (TR_LIST.vector.size > 2) { + pvr_list_begin(PVR_LIST_TR_POLY); + SceneListSubmit((Vertex*)TR_LIST.vector.data, TR_LIST.vector.size); + pvr_list_finish(); + } + pvr_scene_finish(); + + OP_LIST.vector.size = 0; + PT_LIST.vector.size = 0; + TR_LIST.vector.size = 0; } void glScissor(int x, int y, int width, int height) { - - if(scissor_rect.x == x && - scissor_rect.y == y && - scissor_rect.width == width && - scissor_rect.height == height) { - return; - } - scissor_rect.x = x; scissor_rect.y = y; scissor_rect.width = width; scissor_rect.height = height; scissor_rect.applied = false; - STATE_DIRTY = GL_TRUE; // FIXME: do we need this? - _glApplyScissor(false); } @@ -87,27 +123,27 @@ void _glApplyScissor(int force) { PVRTileClipCommand c; - int miny, maxx, maxy; - + int sx, sy, ex, ey; int scissor_width = MAX(MIN(scissor_rect.width, vid_mode->width), 0); int scissor_height = MAX(MIN(scissor_rect.height, vid_mode->height), 0); /* force the origin to the lower left-hand corner of the screen */ - miny = (vid_mode->height - scissor_height) - scissor_rect.y; - maxx = (scissor_width + scissor_rect.x); - maxy = (scissor_height + miny); + sx = scissor_rect.x; + sy = (vid_mode->height - scissor_height) - scissor_rect.y; + ex = sx + scissor_width; + ey = sy + scissor_height; /* load command structure while mapping screen coords to TA tiles */ c.flags = PVR_CMD_USERCLIP; c.d1 = c.d2 = c.d3 = 0; - uint16_t vw = vid_mode->width >> 5; + uint16_t vw = vid_mode->width >> 5; uint16_t vh = vid_mode->height >> 5; - c.sx = CLAMP(scissor_rect.x >> 5, 0, vw); - c.sy = CLAMP(miny >> 5, 0, vh); - c.ex = CLAMP((maxx >> 5) - 1, 0, vw); - c.ey = CLAMP((maxy >> 5) - 1, 0, vh); + c.sx = CLAMP(sx >> 5, 0, vw); + c.sy = CLAMP(sy >> 5, 0, vh); + c.ex = CLAMP((ex >> 5) - 1, 0, vw); + c.ey = CLAMP((ey >> 5) - 1, 0, vh); aligned_vector_push_back(&OP_LIST.vector, &c, 1); aligned_vector_push_back(&PT_LIST.vector, &c, 1); @@ -116,30 +152,16 @@ void _glApplyScissor(int force) { scissor_rect.applied = true; } -Viewport VIEWPORT; -/* Set the GL viewport */ -void glViewport(int x, int y, int width, int height) { - VIEWPORT.hwidth = width * 0.5f; - VIEWPORT.hheight = height * -0.5f; - VIEWPORT.x_plus_hwidth = x + width * 0.5f; - VIEWPORT.y_plus_hheight = y + height * 0.5f; -} - - -void apply_poly_header(pvr_poly_hdr_t* dst, PolyList* activePolyList) { - const TextureObject *tx1 = TEXTURE_ACTIVE; +void apply_poly_header(pvr_poly_hdr_t* dst, int list_type) { + TextureObject* tx1 = TEXTURE_ACTIVE; uint32_t txr_base; - TRACE(); - - int list_type = activePolyList->list_type; int gen_color_clamp = PVR_CLRCLAMP_DISABLE; int gen_culling = CULLING_ENABLED ? PVR_CULLING_CW : PVR_CULLING_SMALL; int depth_comp = DEPTH_TEST_ENABLED ? PVR_DEPTHCMP_GEQUAL : PVR_DEPTHCMP_ALWAYS; int depth_write = DEPTH_MASK_ENABLED ? PVR_DEPTHWRITE_ENABLE : PVR_DEPTHWRITE_DISABLE; - int gen_shading = SHADE_MODEL; int gen_clip_mode = SCISSOR_TEST_ENABLED ? PVR_USERCLIP_INSIDE : PVR_USERCLIP_DISABLE; int gen_fog_type = FOG_ENABLED ? PVR_FOG_TABLE : PVR_FOG_DISABLE; @@ -179,7 +201,7 @@ void apply_poly_header(pvr_poly_hdr_t* dst, PolyList* activePolyList) { /* Or in the list type, shading type, color and UV formats */ dst->cmd |= (list_type << PVR_TA_CMD_TYPE_SHIFT) & PVR_TA_CMD_TYPE_MASK; dst->cmd |= (PVR_CLRFMT_ARGBPACKED << PVR_TA_CMD_CLRFMT_SHIFT) & PVR_TA_CMD_CLRFMT_MASK; - dst->cmd |= (gen_shading << PVR_TA_CMD_SHADE_SHIFT) & PVR_TA_CMD_SHADE_MASK; + dst->cmd |= (SHADE_MODEL << PVR_TA_CMD_SHADE_SHIFT) & PVR_TA_CMD_SHADE_MASK; dst->cmd |= (PVR_UVFMT_32BIT << PVR_TA_CMD_UVFMT_SHIFT) & PVR_TA_CMD_UVFMT_MASK; dst->cmd |= (gen_clip_mode << PVR_TA_CMD_USERCLIP_SHIFT) & PVR_TA_CMD_USERCLIP_MASK; diff --git a/third_party/gldc/src/texture.c b/third_party/gldc/src/texture.c index 27a5f9975..7f32a4355 100644 --- a/third_party/gldc/src/texture.c +++ b/third_party/gldc/src/texture.c @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include "gldc.h" #include "yalloc/yalloc.h"