mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-15 10:35:11 -04:00
Dreamcast: simplify code a bit more
This commit is contained in:
parent
e91ce96900
commit
655850e81a
@ -1435,7 +1435,7 @@ cc_result Audio_AllocChunks(cc_uint32 size, struct AudioChunk* chunks, int numCh
|
|||||||
void* dst = memalign(32, size * numChunks);
|
void* dst = memalign(32, size * numChunks);
|
||||||
if (!dst) return ERR_OUT_OF_MEMORY;
|
if (!dst) return ERR_OUT_OF_MEMORY;
|
||||||
totalSize += size * numChunks;
|
totalSize += size * numChunks;
|
||||||
Platform_Log3("ALLOC: %i X %i (%i)", &size, &numChunks, &totalSize);
|
//Platform_Log3("ALLOC: %i X %i (%i)", &size, &numChunks, &totalSize);
|
||||||
|
|
||||||
for (int i = 0; i < numChunks; i++)
|
for (int i = 0; i < numChunks; i++)
|
||||||
{
|
{
|
||||||
|
@ -18,7 +18,7 @@ static cc_bool renderingDisabled;
|
|||||||
/*########################################################################################################################*
|
/*########################################################################################################################*
|
||||||
*---------------------------------------------------------General---------------------------------------------------------*
|
*---------------------------------------------------------General---------------------------------------------------------*
|
||||||
*#########################################################################################################################*/
|
*#########################################################################################################################*/
|
||||||
static int InitPowerVR(void) {
|
static void InitPowerVR(void) {
|
||||||
cc_bool autosort = false; // Turn off auto sorting to match traditional GPU behaviour
|
cc_bool autosort = false; // Turn off auto sorting to match traditional GPU behaviour
|
||||||
cc_bool fsaa = false;
|
cc_bool fsaa = false;
|
||||||
AUTOSORT_ENABLED = autosort;
|
AUTOSORT_ENABLED = autosort;
|
||||||
@ -507,7 +507,7 @@ cc_bool Gfx_WarnIfNecessary(void) {
|
|||||||
/*########################################################################################################################*
|
/*########################################################################################################################*
|
||||||
*----------------------------------------------------------Drawing--------------------------------------------------------*
|
*----------------------------------------------------------Drawing--------------------------------------------------------*
|
||||||
*#########################################################################################################################*/
|
*#########################################################################################################################*/
|
||||||
extern void apply_poly_header(pvr_poly_hdr_t* header, PolyList* activePolyList);
|
extern void apply_poly_header(pvr_poly_hdr_t* header, int list_type);
|
||||||
|
|
||||||
extern Vertex* DrawColouredQuads(const void* src, Vertex* dst, int numQuads);
|
extern Vertex* DrawColouredQuads(const void* src, Vertex* dst, int numQuads);
|
||||||
extern Vertex* DrawTexturedQuads(const void* src, Vertex* dst, int numQuads);
|
extern Vertex* DrawTexturedQuads(const void* src, Vertex* dst, int numQuads);
|
||||||
@ -522,7 +522,7 @@ void DrawQuads(int count, void* src) {
|
|||||||
Vertex* beg = aligned_vector_reserve(&output->vector, vec->size + (header_required) + count);
|
Vertex* beg = aligned_vector_reserve(&output->vector, vec->size + (header_required) + count);
|
||||||
|
|
||||||
if (header_required) {
|
if (header_required) {
|
||||||
apply_poly_header((pvr_poly_hdr_t*)beg, output);
|
apply_poly_header((pvr_poly_hdr_t*)beg, output->list_type);
|
||||||
STATE_DIRTY = GL_FALSE;
|
STATE_DIRTY = GL_FALSE;
|
||||||
beg++;
|
beg++;
|
||||||
vec->size += 1;
|
vec->size += 1;
|
||||||
@ -632,9 +632,13 @@ void Gfx_SetViewport(int x, int y, int w, int h) {
|
|||||||
}
|
}
|
||||||
STATE_DIRTY = GL_TRUE;
|
STATE_DIRTY = GL_TRUE;
|
||||||
|
|
||||||
glViewport(x, y, w, h);
|
|
||||||
glScissor (x, y, w, h);
|
glScissor (x, y, w, h);
|
||||||
|
|
||||||
|
VIEWPORT.hwidth = w * 0.5f;
|
||||||
|
VIEWPORT.hheight = h * -0.5f;
|
||||||
|
VIEWPORT.x_plus_hwidth = x + w * 0.5f;
|
||||||
|
VIEWPORT.y_plus_hheight = y + h * 0.5f;
|
||||||
|
|
||||||
VP_COL_HWIDTH = VP_TEX_HWIDTH = w * 0.5f;
|
VP_COL_HWIDTH = VP_TEX_HWIDTH = w * 0.5f;
|
||||||
VP_COL_HHEIGHT = VP_TEX_HHEIGHT = h * -0.5f;
|
VP_COL_HHEIGHT = VP_TEX_HHEIGHT = h * -0.5f;
|
||||||
|
|
||||||
|
@ -327,7 +327,7 @@ cc_result Socket_Create(cc_socket* s, cc_sockaddr* addr, cc_bool nonblocking) {
|
|||||||
cc_result Socket_Connect(cc_socket s, cc_sockaddr* addr) {
|
cc_result Socket_Connect(cc_socket s, cc_sockaddr* addr) {
|
||||||
struct sockaddr* raw = (struct sockaddr*)addr->data;
|
struct sockaddr* raw = (struct sockaddr*)addr->data;
|
||||||
|
|
||||||
int res = sceNetInetConnect(*s, raw, addr->size);
|
int res = sceNetInetConnect(s, raw, addr->size);
|
||||||
return res < 0 ? sceNetInetGetErrno() : 0;
|
return res < 0 ? sceNetInetGetErrno() : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
11
third_party/gldc/src/aligned_vector.h
vendored
11
third_party/gldc/src/aligned_vector.h
vendored
@ -112,14 +112,3 @@ AV_FORCE_INLINE void* aligned_vector_extend(AlignedVector* vector, const uint32_
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
AV_FORCE_INLINE void aligned_vector_clear(AlignedVector* vector){
|
|
||||||
vector->size = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
AV_FORCE_INLINE void aligned_vector_init(AlignedVector* vector) {
|
|
||||||
/* Now initialize the header*/
|
|
||||||
vector->size = 0;
|
|
||||||
vector->capacity = 0;
|
|
||||||
vector->data = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
52
third_party/gldc/src/flush.c
vendored
52
third_party/gldc/src/flush.c
vendored
@ -1,52 +0,0 @@
|
|||||||
#include <stdbool.h>
|
|
||||||
#include "gldc.h"
|
|
||||||
|
|
||||||
PolyList OP_LIST;
|
|
||||||
PolyList PT_LIST;
|
|
||||||
PolyList TR_LIST;
|
|
||||||
|
|
||||||
void glKosInit() {
|
|
||||||
_glInitContext();
|
|
||||||
_glInitTextures();
|
|
||||||
|
|
||||||
OP_LIST.list_type = PVR_LIST_OP_POLY;
|
|
||||||
PT_LIST.list_type = PVR_LIST_PT_POLY;
|
|
||||||
TR_LIST.list_type = PVR_LIST_TR_POLY;
|
|
||||||
|
|
||||||
aligned_vector_init(&OP_LIST.vector);
|
|
||||||
aligned_vector_init(&PT_LIST.vector);
|
|
||||||
aligned_vector_init(&TR_LIST.vector);
|
|
||||||
|
|
||||||
aligned_vector_reserve(&OP_LIST.vector, 1024 * 3);
|
|
||||||
aligned_vector_reserve(&PT_LIST.vector, 512 * 3);
|
|
||||||
aligned_vector_reserve(&TR_LIST.vector, 1024 * 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void glKosSwapBuffers() {
|
|
||||||
_glApplyScissor(true);
|
|
||||||
|
|
||||||
pvr_scene_begin();
|
|
||||||
if(OP_LIST.vector.size > 2) {
|
|
||||||
pvr_list_begin(PVR_LIST_OP_POLY);
|
|
||||||
SceneListSubmit((Vertex*)OP_LIST.vector.data, OP_LIST.vector.size);
|
|
||||||
pvr_list_finish();
|
|
||||||
}
|
|
||||||
|
|
||||||
if(PT_LIST.vector.size > 2) {
|
|
||||||
pvr_list_begin(PVR_LIST_PT_POLY);
|
|
||||||
SceneListSubmit((Vertex*)PT_LIST.vector.data, PT_LIST.vector.size);
|
|
||||||
pvr_list_finish();
|
|
||||||
}
|
|
||||||
|
|
||||||
if(TR_LIST.vector.size > 2) {
|
|
||||||
pvr_list_begin(PVR_LIST_TR_POLY);
|
|
||||||
SceneListSubmit((Vertex*)TR_LIST.vector.data, TR_LIST.vector.size);
|
|
||||||
pvr_list_finish();
|
|
||||||
}
|
|
||||||
pvr_scene_finish();
|
|
||||||
|
|
||||||
OP_LIST.vector.size = 0;
|
|
||||||
PT_LIST.vector.size = 0;
|
|
||||||
TR_LIST.vector.size = 0;
|
|
||||||
}
|
|
12
third_party/gldc/src/gldc.h
vendored
12
third_party/gldc/src/gldc.h
vendored
@ -2,15 +2,10 @@
|
|||||||
#define PRIVATE_H
|
#define PRIVATE_H
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
|
||||||
#include <kos.h>
|
|
||||||
#include <dc/pvr.h>
|
|
||||||
#include "aligned_vector.h"
|
#include "aligned_vector.h"
|
||||||
|
|
||||||
#define MAX_TEXTURE_COUNT 768
|
#define MAX_TEXTURE_COUNT 768
|
||||||
|
|
||||||
|
|
||||||
#define GL_SCISSOR_TEST 0x0008
|
|
||||||
#define GL_NEAREST 0x2600
|
#define GL_NEAREST 0x2600
|
||||||
#define GL_LINEAR 0x2601
|
#define GL_LINEAR 0x2601
|
||||||
#define GL_OUT_OF_MEMORY 0x0505
|
#define GL_OUT_OF_MEMORY 0x0505
|
||||||
@ -33,7 +28,6 @@ void gldcBindTexture(GLuint texture);
|
|||||||
int gldcAllocTexture(int w, int h, int format);
|
int gldcAllocTexture(int w, int h, int format);
|
||||||
void gldcGetTexture(void** data, int* width, int* height);
|
void gldcGetTexture(void** data, int* width, int* height);
|
||||||
|
|
||||||
void glViewport(int x, int y, int width, int height);
|
|
||||||
void glScissor( int x, int y, int width, int height);
|
void glScissor( int x, int y, int width, int height);
|
||||||
|
|
||||||
void glKosInit();
|
void glKosInit();
|
||||||
@ -106,8 +100,6 @@ typedef struct {
|
|||||||
} __attribute__((aligned(32))) TextureObject;
|
} __attribute__((aligned(32))) TextureObject;
|
||||||
|
|
||||||
|
|
||||||
void _glInitContext();
|
|
||||||
void _glInitSubmissionTarget();
|
|
||||||
void _glInitTextures();
|
void _glInitTextures();
|
||||||
|
|
||||||
extern TextureObject* TEXTURE_ACTIVE;
|
extern TextureObject* TEXTURE_ACTIVE;
|
||||||
@ -152,10 +144,6 @@ void _glApplyScissor(int force);
|
|||||||
|
|
||||||
extern GLboolean STATE_DIRTY;
|
extern GLboolean STATE_DIRTY;
|
||||||
|
|
||||||
#define MIN(a,b) (((a)<(b))?(a):(b))
|
|
||||||
#define MAX(a,b) (((a)>(b))?(a):(b))
|
|
||||||
#define CLAMP( X, _MIN, _MAX ) ( (X)<(_MIN) ? (_MIN) : ((X)>(_MAX) ? (_MAX) : (X)) )
|
|
||||||
|
|
||||||
void SceneListSubmit(Vertex* v2, int n);
|
void SceneListSubmit(Vertex* v2, int n);
|
||||||
|
|
||||||
static inline int DimensionFlag(int w) {
|
static inline int DimensionFlag(int w) {
|
||||||
|
632
third_party/gldc/src/sh4_math.h
vendored
632
third_party/gldc/src/sh4_math.h
vendored
@ -133,9 +133,6 @@ static const ALL_FLOATS_STRUCT MATH_identity_matrix = {1.0f, 0.0f, 0.0f, 0.0f, 0
|
|||||||
// a*b+c
|
// a*b+c
|
||||||
float MATH_fmac(float a, float b, float c)
|
float MATH_fmac(float a, float b, float c)
|
||||||
|
|
||||||
// a*b-c
|
|
||||||
float MATH_fmac_Dec(float a, float b, float c)
|
|
||||||
|
|
||||||
// fminf() - return the min of two floats
|
// fminf() - return the min of two floats
|
||||||
// This doesn't check for NaN
|
// This doesn't check for NaN
|
||||||
float MATH_Fast_Fminf(float a, float b)
|
float MATH_Fast_Fminf(float a, float b)
|
||||||
@ -205,19 +202,6 @@ static inline __attribute__((always_inline)) float MATH_fmac(float a, float b, f
|
|||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
// a*b-c
|
|
||||||
static inline __attribute__((always_inline)) float MATH_fmac_Dec(float a, float b, float c)
|
|
||||||
{
|
|
||||||
asm volatile ("fneg %[floatc]\n\t"
|
|
||||||
"fmac fr0, %[floatb], %[floatc]\n"
|
|
||||||
: [floatc] "+&f" (c) // outputs, "+" means r/w, "&" means it's written to before all inputs are consumed
|
|
||||||
: "w" (a), [floatb] "f" (b) // inputs
|
|
||||||
: // no clobbers
|
|
||||||
);
|
|
||||||
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fast fminf() - return the min of two floats
|
// Fast fminf() - return the min of two floats
|
||||||
// This doesn't check for NaN
|
// This doesn't check for NaN
|
||||||
static inline __attribute__((always_inline)) float MATH_Fast_Fminf(float a, float b)
|
static inline __attribute__((always_inline)) float MATH_Fast_Fminf(float a, float b)
|
||||||
@ -833,18 +817,6 @@ static inline __attribute__((always_inline)) _Complex float MATH_fsca_Float_Rad(
|
|||||||
// Sum of Squares (w^2 + x^2 + y^2 + z^2)
|
// Sum of Squares (w^2 + x^2 + y^2 + z^2)
|
||||||
float MATH_Sum_of_Squares(float w, float x, float y, float z)
|
float MATH_Sum_of_Squares(float w, float x, float y, float z)
|
||||||
|
|
||||||
// Cross product with bonus multiply (vec X vec = orthogonal vec, with an extra a*b=c)
|
|
||||||
RETURN_VECTOR_STRUCT MATH_Cross_Product_with_Mult(float x1, float x2, float x3, float y1, float y2, float y3, float a, float b)
|
|
||||||
|
|
||||||
// Cross product (vec X vec = orthogonal vec)
|
|
||||||
RETURN_VECTOR_STRUCT MATH_Cross_Product(float x1, float x2, float x3, float y1, float y2, float y3)
|
|
||||||
|
|
||||||
// Outer product (vec (X) vec = 4x4 matrix)
|
|
||||||
void MATH_Outer_Product(float x1, float x2, float x3, float x4, float y1, float y2, float y3, float y4)
|
|
||||||
|
|
||||||
// Matrix transform (4x4 matrix * 4x1 vec = 4x1 vec)
|
|
||||||
RETURN_VECTOR_STRUCT MATH_Matrix_Transform(float x1, float x2, float x3, float x4)
|
|
||||||
|
|
||||||
// 4x4 Matrix transpose (XMTRX^T)
|
// 4x4 Matrix transpose (XMTRX^T)
|
||||||
void MATH_Matrix_Transpose(void)
|
void MATH_Matrix_Transpose(void)
|
||||||
|
|
||||||
@ -953,434 +925,6 @@ static inline __attribute__((always_inline)) float MATH_Sum_of_Squares(float w,
|
|||||||
return __z;
|
return __z;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cross product: vec X vec = orthogonal vec
|
|
||||||
// _ _ _ _ _ _
|
|
||||||
// | x1 | | y1 | | z1 |
|
|
||||||
// | x2 | X | y2 | = | z2 |
|
|
||||||
// |_ x3 _| |_ y3 _| |_ z3 _|
|
|
||||||
//
|
|
||||||
// With bonus multiply:
|
|
||||||
//
|
|
||||||
// a * b = c
|
|
||||||
//
|
|
||||||
// IMPORTANT USAGE INFORMATION (cross product):
|
|
||||||
//
|
|
||||||
// Return vector struct maps as below to the above diagram:
|
|
||||||
//
|
|
||||||
// typedef struct {
|
|
||||||
// float z1;
|
|
||||||
// float z2;
|
|
||||||
// float z3;
|
|
||||||
// float z4; // c is stored in z4, and c = a*b if using 'with mult' version (else c = 0)
|
|
||||||
// } RETURN_VECTOR_STRUCT;
|
|
||||||
//
|
|
||||||
// For people familiar with the unit vector notation, z1 == 'i', z2 == 'j',
|
|
||||||
// and z3 == 'k'.
|
|
||||||
//
|
|
||||||
// The cross product matrix will also be stored in XMTRX after this, so calling
|
|
||||||
// MATH_Matrix_Transform() on a vector after using this function will do a cross
|
|
||||||
// product with the same x1-x3 values and a multiply with the same 'a' value
|
|
||||||
// as used in this function. In this a situation, 'a' will be multiplied with
|
|
||||||
// the x4 parameter of MATH_Matrix_Transform(). a = 0 if not using the 'with mult'
|
|
||||||
// version of the cross product function.
|
|
||||||
//
|
|
||||||
// For reference, XMTRX will look like this:
|
|
||||||
//
|
|
||||||
// [ 0 -x3 x2 0 ]
|
|
||||||
// [ x3 0 -x1 0 ]
|
|
||||||
// [ -x2 x1 0 0 ]
|
|
||||||
// [ 0 0 0 a ] (<-- a = 0 if not using 'with mult')
|
|
||||||
//
|
|
||||||
// Similarly to how the sine and cosine functions use fsca and return 2 floats,
|
|
||||||
// the cross product functions actually return 4 floats. The first 3 are the
|
|
||||||
// cross product output, and the 4th is a*b. The SH4 only multiplies 4x4
|
|
||||||
// matrices with 4x1 vectors, which is why the output is like that--but it means
|
|
||||||
// we also get a bonus float multiplication while we do our cross product!
|
|
||||||
//
|
|
||||||
|
|
||||||
// Please do not call this function directly (notice the weird syntax); call
|
|
||||||
// MATH_Cross_Product() or MATH_Cross_Product_with_Mult() instead.
|
|
||||||
static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT xMATH_do_Cross_Product_with_Mult(float x3, float a, float y3, float b, float x2, float x1, float y1, float y2)
|
|
||||||
{
|
|
||||||
// FR4-FR11 are the regs that are passed in, in that order.
|
|
||||||
// Just need to make sure GCC doesn't modify anything, and these register vars do that job.
|
|
||||||
|
|
||||||
// Temporary variables are necessary per GCC to avoid clobbering:
|
|
||||||
// https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
|
|
||||||
|
|
||||||
float tx1 = x1;
|
|
||||||
float tx2 = x2;
|
|
||||||
float tx3 = x3;
|
|
||||||
float ta = a;
|
|
||||||
|
|
||||||
float ty1 = y1;
|
|
||||||
float ty2 = y2;
|
|
||||||
float ty3 = y3;
|
|
||||||
float tb = b;
|
|
||||||
|
|
||||||
register float __x1 __asm__("fr9") = tx1; // need to negate (need to move to fr6, then negate fr9)
|
|
||||||
register float __x2 __asm__("fr8") = tx2; // in place for matrix (need to move to fr2 then negate fr2)
|
|
||||||
register float __x3 __asm__("fr4") = tx3; // need to negate (move to fr1 first, then negate fr4)
|
|
||||||
register float __a __asm__("fr5") = ta;
|
|
||||||
|
|
||||||
register float __y1 __asm__("fr10") = ty1;
|
|
||||||
register float __y2 __asm__("fr11") = ty2;
|
|
||||||
register float __y3 __asm__("fr6") = ty3;
|
|
||||||
register float __b __asm__("fr7") = tb;
|
|
||||||
|
|
||||||
register float __z1 __asm__("fr0") = 0.0f; // z1
|
|
||||||
register float __z2 __asm__("fr1") = 0.0f; // z2 (not moving x3 here yet since a double 0 is needed)
|
|
||||||
register float __z3 __asm__("fr2") = tx2; // z3 (this handles putting x2 in fr2)
|
|
||||||
register float __c __asm__("fr3") = 0.0f; // c
|
|
||||||
|
|
||||||
// This actually does a matrix transform to do the cross product.
|
|
||||||
// It's this:
|
|
||||||
// _ _ _ _
|
|
||||||
// [ 0 -x3 x2 0 ] | y1 | | -x3y2 + x2y3 |
|
|
||||||
// [ x3 0 -x1 0 ] | y2 | = | x3y1 - x1y3 |
|
|
||||||
// [ -x2 x1 0 0 ] | y3 | | -x2y1 + x1y2 |
|
|
||||||
// [ 0 0 0 a ] |_ b _| |_ c _|
|
|
||||||
//
|
|
||||||
|
|
||||||
asm volatile (
|
|
||||||
// set up back bank's FV0
|
|
||||||
"fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs)
|
|
||||||
|
|
||||||
// Save FR12-FR15, which are supposed to be preserved across functions calls.
|
|
||||||
// This stops them from getting clobbered and saves 4 stack pushes (memory accesses).
|
|
||||||
"fmov DR12, XD12\n\t"
|
|
||||||
"fmov DR14, XD14\n\t"
|
|
||||||
|
|
||||||
"fmov DR10, XD0\n\t" // fmov 'y1' and 'y2' from FR10, FR11 into position (XF0, XF1)
|
|
||||||
"fmov DR6, XD2\n\t" // fmov 'y3' and 'b' from FR6, FR7 into position (XF2, XF3)
|
|
||||||
|
|
||||||
// pair move zeros for some speed in setting up front bank for matrix
|
|
||||||
"fmov DR0, DR10\n\t" // clear FR10, FR11
|
|
||||||
"fmov DR0, DR12\n\t" // clear FR12, FR13
|
|
||||||
"fschg\n\t" // switch back to single moves
|
|
||||||
// prepare front bank for XMTRX
|
|
||||||
"fmov FR5, FR15\n\t" // fmov 'a' into position
|
|
||||||
"fmov FR0, FR14\n\t" // clear out FR14
|
|
||||||
"fmov FR0, FR7\n\t" // clear out FR7
|
|
||||||
"fmov FR0, FR5\n\t" // clear out FR5
|
|
||||||
|
|
||||||
"fneg FR2\n\t" // set up 'x2'
|
|
||||||
"fmov FR9, FR6\n\t" // set up 'x1'
|
|
||||||
"fneg FR9\n\t"
|
|
||||||
"fmov FR4, FR1\n\t" // set up 'x3'
|
|
||||||
"fneg FR4\n\t"
|
|
||||||
// flip banks and matrix multiply
|
|
||||||
"frchg\n\t"
|
|
||||||
"ftrv XMTRX, FV0\n"
|
|
||||||
: "+&w" (__z1), "+&f" (__z2), "+&f" (__z3), "+&f" (__c) // output (using FV0)
|
|
||||||
: "f" (__x1), "f" (__x2), "f" (__x3), "f" (__y1), "f" (__y2), "f" (__y3), "f" (__a), "f" (__b) // inputs
|
|
||||||
: // clobbers (all of the float regs get clobbered, except for FR12-FR15 which were specially preserved)
|
|
||||||
);
|
|
||||||
|
|
||||||
RETURN_VECTOR_STRUCT output = {__z1, __z2, __z3, __c};
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Please do not call this function directly (notice the weird syntax); call
|
|
||||||
// MATH_Cross_Product() or MATH_Cross_Product_with_Mult() instead.
|
|
||||||
static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT xMATH_do_Cross_Product(float x3, float zero, float x1, float y3, float x2, float x1_2, float y1, float y2)
|
|
||||||
{
|
|
||||||
// FR4-FR11 are the regs that are passed in, in that order.
|
|
||||||
// Just need to make sure GCC doesn't modify anything, and these register vars do that job.
|
|
||||||
|
|
||||||
// Temporary variables are necessary per GCC to avoid clobbering:
|
|
||||||
// https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
|
|
||||||
|
|
||||||
float tx1 = x1;
|
|
||||||
float tx2 = x2;
|
|
||||||
float tx3 = x3;
|
|
||||||
float tx1_2 = x1_2;
|
|
||||||
|
|
||||||
float ty1 = y1;
|
|
||||||
float ty2 = y2;
|
|
||||||
float ty3 = y3;
|
|
||||||
float tzero = zero;
|
|
||||||
|
|
||||||
register float __x1 __asm__("fr6") = tx1; // in place
|
|
||||||
register float __x2 __asm__("fr8") = tx2; // in place (fmov to fr2, negate fr2)
|
|
||||||
register float __x3 __asm__("fr4") = tx3; // need to negate (fmov to fr1, negate fr4)
|
|
||||||
|
|
||||||
register float __zero __asm__("fr5") = tzero; // in place
|
|
||||||
register float __x1_2 __asm__("fr9") = tx1_2; // need to negate
|
|
||||||
|
|
||||||
register float __y1 __asm__("fr10") = ty1;
|
|
||||||
register float __y2 __asm__("fr11") = ty2;
|
|
||||||
// no __y3 needed in this function
|
|
||||||
|
|
||||||
register float __z1 __asm__("fr0") = tzero; // z1
|
|
||||||
register float __z2 __asm__("fr1") = tzero; // z2
|
|
||||||
register float __z3 __asm__("fr2") = ty3; // z3
|
|
||||||
register float __c __asm__("fr3") = tzero; // c
|
|
||||||
|
|
||||||
// This actually does a matrix transform to do the cross product.
|
|
||||||
// It's this:
|
|
||||||
// _ _ _ _
|
|
||||||
// [ 0 -x3 x2 0 ] | y1 | | -x3y2 + x2y3 |
|
|
||||||
// [ x3 0 -x1 0 ] | y2 | = | x3y1 - x1y3 |
|
|
||||||
// [ -x2 x1 0 0 ] | y3 | | -x2y1 + x1y2 |
|
|
||||||
// [ 0 0 0 0 ] |_ 0 _| |_ 0 _|
|
|
||||||
//
|
|
||||||
|
|
||||||
asm volatile (
|
|
||||||
// zero out FR7. For some reason, if this is done in C after __z3 is set:
|
|
||||||
// register float __y3 __asm__("fr7") = tzero;
|
|
||||||
// then GCC will emit a spurious stack push (pushing FR12). So just zero it here.
|
|
||||||
"fmov FR5, FR7\n\t"
|
|
||||||
// set up back bank's FV0
|
|
||||||
"fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs)
|
|
||||||
|
|
||||||
// Save FR12-FR15, which are supposed to be preserved across functions calls.
|
|
||||||
// This stops them from getting clobbered and saves 4 stack pushes (memory accesses).
|
|
||||||
"fmov DR12, XD12\n\t"
|
|
||||||
"fmov DR14, XD14\n\t"
|
|
||||||
|
|
||||||
"fmov DR10, XD0\n\t" // fmov 'y1' and 'y2' from FR10, FR11 into position (XF0, XF1)
|
|
||||||
"fmov DR2, XD2\n\t" // fmov 'y3' and '0' from FR2, FR3 into position (XF2, XF3)
|
|
||||||
|
|
||||||
// pair move zeros for some speed in setting up front bank for matrix
|
|
||||||
"fmov DR0, DR10\n\t" // clear FR10, FR11
|
|
||||||
"fmov DR0, DR12\n\t" // clear FR12, FR13
|
|
||||||
"fmov DR0, DR14\n\t" // clear FR14, FR15
|
|
||||||
"fschg\n\t" // switch back to single moves
|
|
||||||
// prepare front bank for XMTRX
|
|
||||||
"fneg FR9\n\t" // set up 'x1'
|
|
||||||
"fmov FR8, FR2\n\t" // set up 'x2'
|
|
||||||
"fneg FR2\n\t"
|
|
||||||
"fmov FR4, FR1\n\t" // set up 'x3'
|
|
||||||
"fneg FR4\n\t"
|
|
||||||
// flip banks and matrix multiply
|
|
||||||
"frchg\n\t"
|
|
||||||
"ftrv XMTRX, FV0\n"
|
|
||||||
: "+&w" (__z1), "+&f" (__z2), "+&f" (__z3), "+&f" (__c) // output (using FV0)
|
|
||||||
: "f" (__x1), "f" (__x2), "f" (__x3), "f" (__y1), "f" (__y2), "f" (__zero), "f" (__x1_2) // inputs
|
|
||||||
: "fr7" // clobbers (all of the float regs get clobbered, except for FR12-FR15 which were specially preserved)
|
|
||||||
);
|
|
||||||
|
|
||||||
RETURN_VECTOR_STRUCT output = {__z1, __z2, __z3, __c};
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Functions that wrap the xMATH_do_Cross_Product[_with_Mult]() functions to make
|
|
||||||
// it easier to organize parameters
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
// Cross product with a bonus float multiply (c = a * b)
|
|
||||||
static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT MATH_Cross_Product_with_Mult(float x1, float x2, float x3, float y1, float y2, float y3, float a, float b)
|
|
||||||
{
|
|
||||||
return xMATH_do_Cross_Product_with_Mult(x3, a, y3, b, x2, x1, y1, y2);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Plain cross product; does not use the bonus float multiply (c = 0 and a in the cross product matrix will be 0)
|
|
||||||
// This is a tiny bit faster than 'with_mult' (about 2 cycles faster)
|
|
||||||
static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT MATH_Cross_Product(float x1, float x2, float x3, float y1, float y2, float y3)
|
|
||||||
{
|
|
||||||
return xMATH_do_Cross_Product(x3, 0.0f, x1, y3, x2, x1, y1, y2);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Outer product: vec (X) vec = matrix
|
|
||||||
// _ _
|
|
||||||
// | x1 |
|
|
||||||
// | x2 | (X) [ y1 y2 y3 y4 ] = 4x4 matrix
|
|
||||||
// | x3 |
|
|
||||||
// |_ x4 _|
|
|
||||||
//
|
|
||||||
// This returns the floats in the back bank (XF0-15), which are inaccessible
|
|
||||||
// outside of using frchg or paired-move fmov. It's meant to set up a matrix for
|
|
||||||
// use with other matrix functions. GCC also does not touch the XFn bank.
|
|
||||||
// This will also wipe out anything stored in the float registers, as it uses the
|
|
||||||
// whole FPU register file (all 32 of the float registers).
|
|
||||||
static inline __attribute__((always_inline)) void MATH_Outer_Product(float x1, float x2, float x3, float x4, float y1, float y2, float y3, float y4)
|
|
||||||
{
|
|
||||||
// FR4-FR11 are the regs that are passed in, in that order.
|
|
||||||
// Just need to make sure GCC doesn't modify anything, and these register vars do that job.
|
|
||||||
|
|
||||||
// Temporary variables are necessary per GCC to avoid clobbering:
|
|
||||||
// https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
|
|
||||||
|
|
||||||
float tx1 = x1;
|
|
||||||
float tx2 = x2;
|
|
||||||
float tx3 = x3;
|
|
||||||
float tx4 = x4;
|
|
||||||
|
|
||||||
float ty1 = y1;
|
|
||||||
float ty2 = y2;
|
|
||||||
float ty3 = y3;
|
|
||||||
float ty4 = y4;
|
|
||||||
|
|
||||||
// vector FV4
|
|
||||||
register float __x1 __asm__("fr4") = tx1;
|
|
||||||
register float __x2 __asm__("fr5") = tx2;
|
|
||||||
register float __x3 __asm__("fr6") = tx3;
|
|
||||||
register float __x4 __asm__("fr7") = tx4;
|
|
||||||
|
|
||||||
// vector FV8
|
|
||||||
register float __y1 __asm__("fr8") = ty1;
|
|
||||||
register float __y2 __asm__("fr9") = ty2;
|
|
||||||
register float __y3 __asm__("fr10") = ty3; // in place already
|
|
||||||
register float __y4 __asm__("fr11") = ty4;
|
|
||||||
|
|
||||||
// This actually does a 4x4 matrix multiply to do the outer product.
|
|
||||||
// It's this:
|
|
||||||
//
|
|
||||||
// [ x1 x1 x1 x1 ] [ y1 0 0 0 ] [ x1y1 x1y2 x1y3 x1y4 ]
|
|
||||||
// [ x2 x2 x2 x2 ] [ 0 y2 0 0 ] = [ x2y1 x2y2 x2y3 x2y4 ]
|
|
||||||
// [ x3 x3 x3 x3 ] [ 0 0 y3 0 ] [ x3y1 x3y2 x3y3 x3y4 ]
|
|
||||||
// [ x4 x4 x4 x4 ] [ 0 0 0 y4 ] [ x4y1 x4y2 x4y3 x4y4 ]
|
|
||||||
//
|
|
||||||
|
|
||||||
asm volatile (
|
|
||||||
// zero out unoccupied front floats to make a double 0 in DR2
|
|
||||||
"fldi0 FR2\n\t"
|
|
||||||
"fmov FR2, FR3\n\t"
|
|
||||||
"fschg\n\t" // switch fmov to paired moves (note: only paired moves can access XDn regs)
|
|
||||||
// fmov 'x1' and 'x2' from FR4, FR5 into position (XF0,4,8,12, XF1,5,9,13)
|
|
||||||
"fmov DR4, XD0\n\t"
|
|
||||||
"fmov DR4, XD4\n\t"
|
|
||||||
"fmov DR4, XD8\n\t"
|
|
||||||
"fmov DR4, XD12\n\t"
|
|
||||||
// fmov 'x3' and 'x4' from FR6, FR7 into position (XF2,6,10,14, XF3,7,11,15)
|
|
||||||
"fmov DR6, XD2\n\t"
|
|
||||||
"fmov DR6, XD6\n\t"
|
|
||||||
"fmov DR6, XD10\n\t"
|
|
||||||
"fmov DR6, XD14\n\t"
|
|
||||||
// set up front floats (y1-y4)
|
|
||||||
"fmov DR8, DR0\n\t"
|
|
||||||
"fmov DR8, DR4\n\t"
|
|
||||||
"fmov DR10, DR14\n\t"
|
|
||||||
// finish zeroing out front floats
|
|
||||||
"fmov DR2, DR6\n\t"
|
|
||||||
"fmov DR2, DR8\n\t"
|
|
||||||
"fmov DR2, DR12\n\t"
|
|
||||||
"fschg\n\t" // switch back to single-move mode
|
|
||||||
// zero out remaining values and matrix multiply 4x4
|
|
||||||
"fmov FR2, FR1\n\t"
|
|
||||||
"ftrv XMTRX, FV0\n\t"
|
|
||||||
|
|
||||||
"fmov FR6, FR4\n\t"
|
|
||||||
"ftrv XMTRX, FV4\n\t"
|
|
||||||
|
|
||||||
"fmov FR8, FR11\n\t"
|
|
||||||
"ftrv XMTRX, FV8\n\t"
|
|
||||||
|
|
||||||
"fmov FR12, FR14\n\t"
|
|
||||||
"ftrv XMTRX, FV12\n\t"
|
|
||||||
// Save output in XF regs
|
|
||||||
"frchg\n"
|
|
||||||
: // no outputs
|
|
||||||
: "f" (__x1), "f" (__x2), "f" (__x3), "f" (__x4), "f" (__y1), "f" (__y2), "f" (__y3), "f" (__y4) // inputs
|
|
||||||
: "fr0", "fr1", "fr2", "fr3", "fr12", "fr13", "fr14", "fr15" // clobbers, can't avoid it
|
|
||||||
);
|
|
||||||
// GCC will restore FR12-FR15 from the stack after this, so we really can't keep the output in the front bank.
|
|
||||||
}
|
|
||||||
|
|
||||||
// Matrix transform: matrix * vector = vector
|
|
||||||
// _ _ _ _
|
|
||||||
// [ ----------- ] | x1 | | z1 |
|
|
||||||
// [ ---XMTRX--- ] | x2 | = | z2 |
|
|
||||||
// [ ----------- ] | x3 | | z3 |
|
|
||||||
// [ ----------- ] |_ x4 _| |_ z4 _|
|
|
||||||
//
|
|
||||||
// IMPORTANT USAGE INFORMATION (matrix transform):
|
|
||||||
//
|
|
||||||
// Return vector struct maps 1:1 to the above diagram:
|
|
||||||
//
|
|
||||||
// typedef struct {
|
|
||||||
// float z1;
|
|
||||||
// float z2;
|
|
||||||
// float z3;
|
|
||||||
// float z4;
|
|
||||||
// } RETURN_VECTOR_STRUCT;
|
|
||||||
//
|
|
||||||
// Similarly to how the sine and cosine functions use fsca and return 2 floats,
|
|
||||||
// the matrix transform function actually returns 4 floats. The SH4 only multiplies
|
|
||||||
// 4x4 matrices with 4x1 vectors, which is why the output is like that.
|
|
||||||
//
|
|
||||||
// Multiply a matrix stored in the back bank (XMTRX) with an input vector
|
|
||||||
static inline __attribute__((always_inline)) RETURN_VECTOR_STRUCT MATH_Matrix_Transform(float x1, float x2, float x3, float x4)
|
|
||||||
{
|
|
||||||
// The floats comprising FV4 are the regs that are passed in.
|
|
||||||
// Just need to make sure GCC doesn't modify anything, and these register vars do that job.
|
|
||||||
|
|
||||||
// Temporary variables are necessary per GCC to avoid clobbering:
|
|
||||||
// https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
|
|
||||||
|
|
||||||
float tx1 = x1;
|
|
||||||
float tx2 = x2;
|
|
||||||
float tx3 = x3;
|
|
||||||
float tx4 = x4;
|
|
||||||
|
|
||||||
// output vector FV0
|
|
||||||
register float __z1 __asm__("fr0") = tx1;
|
|
||||||
register float __z2 __asm__("fr1") = tx2;
|
|
||||||
register float __z3 __asm__("fr2") = tx3;
|
|
||||||
register float __z4 __asm__("fr3") = tx4;
|
|
||||||
|
|
||||||
asm volatile ("ftrv XMTRX, FV0\n\t"
|
|
||||||
// have to do this to obey SH4 calling convention--output returned in FV0
|
|
||||||
: "+w" (__z1), "+f" (__z2), "+f" (__z3), "+f" (__z4) // outputs, "+" means r/w
|
|
||||||
: // no inputs
|
|
||||||
: // no clobbers
|
|
||||||
);
|
|
||||||
|
|
||||||
RETURN_VECTOR_STRUCT output = {__z1, __z2, __z3, __z4};
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Matrix Transpose
|
|
||||||
//
|
|
||||||
// This does a matrix transpose on the matrix in XMTRX, which swaps rows with
|
|
||||||
// columns as follows (math notation is [XMTRX]^T):
|
|
||||||
//
|
|
||||||
// [ a b c d ] T [ a e i m ]
|
|
||||||
// [ e f g h ] = [ b f j n ]
|
|
||||||
// [ i j k l ] [ c g k o ]
|
|
||||||
// [ m n o p ] [ d h l p ]
|
|
||||||
//
|
|
||||||
// PLEASE NOTE: It is faster to avoid the need for a transpose altogether by
|
|
||||||
// structuring matrices and vectors accordingly.
|
|
||||||
static inline __attribute__((always_inline)) void MATH_Matrix_Transpose(void)
|
|
||||||
{
|
|
||||||
asm volatile (
|
|
||||||
"frchg\n\t" // fmov for singles only works on front bank
|
|
||||||
// FR0, FR5, FR10, and FR15 are already in place
|
|
||||||
// swap FR1 and FR4
|
|
||||||
"flds FR1, FPUL\n\t"
|
|
||||||
"fmov FR4, FR1\n\t"
|
|
||||||
"fsts FPUL, FR4\n\t"
|
|
||||||
// swap FR2 and FR8
|
|
||||||
"flds FR2, FPUL\n\t"
|
|
||||||
"fmov FR8, FR2\n\t"
|
|
||||||
"fsts FPUL, FR8\n\t"
|
|
||||||
// swap FR3 and FR12
|
|
||||||
"flds FR3, FPUL\n\t"
|
|
||||||
"fmov FR12, FR3\n\t"
|
|
||||||
"fsts FPUL, FR12\n\t"
|
|
||||||
// swap FR6 and FR9
|
|
||||||
"flds FR6, FPUL\n\t"
|
|
||||||
"fmov FR9, FR6\n\t"
|
|
||||||
"fsts FPUL, FR9\n\t"
|
|
||||||
// swap FR7 and FR13
|
|
||||||
"flds FR7, FPUL\n\t"
|
|
||||||
"fmov FR13, FR7\n\t"
|
|
||||||
"fsts FPUL, FR13\n\t"
|
|
||||||
// swap FR11 and FR14
|
|
||||||
"flds FR11, FPUL\n\t"
|
|
||||||
"fmov FR14, FR11\n\t"
|
|
||||||
"fsts FPUL, FR14\n\t"
|
|
||||||
// restore XMTRX to back bank
|
|
||||||
"frchg\n"
|
|
||||||
: // no outputs
|
|
||||||
: // no inputs
|
|
||||||
: "fpul" // clobbers
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Matrix product: matrix * matrix = matrix
|
// Matrix product: matrix * matrix = matrix
|
||||||
//
|
//
|
||||||
// These use the whole dang floating point unit.
|
// These use the whole dang floating point unit.
|
||||||
@ -1582,73 +1126,14 @@ static inline __attribute__((always_inline)) ALL_FLOATS_STRUCT * MATH_Store_XMTR
|
|||||||
//
|
//
|
||||||
/*
|
/*
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Commonly useful functions
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
// Returns 1 if point 't' is inside triangle with vertices 'v0', 'v1', and 'v2', and 0 if not
|
|
||||||
int MATH_Is_Point_In_Triangle(float v0x, float v0y, float v1x, float v1y, float v2x, float v2y, float ptx, float pty)
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Interpolation
|
// Interpolation
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
|
|
||||||
// Linear interpolation
|
// Linear interpolation
|
||||||
float MATH_Lerp(float a, float b, float t)
|
float MATH_Lerp(float a, float b, float t)
|
||||||
|
|
||||||
// Speherical interpolation ('theta' in fsca units)
|
|
||||||
float MATH_Slerp(float a, float b, float t, float theta)
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Fast Sinc functions (unnormalized, sin(x)/x version)
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Just pass in MATH_pi * x for normalized versions :)
|
|
||||||
|
|
||||||
// Sinc function (fsca units)
|
|
||||||
float MATH_Fast_Sincf(float x)
|
|
||||||
|
|
||||||
// Sinc function (degrees)
|
|
||||||
float MATH_Fast_Sincf_Deg(float x)
|
|
||||||
|
|
||||||
// Sinc function (rads)
|
|
||||||
float MATH_Fast_Sincf_Rad(float x)
|
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Commonly useful functions
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
// Returns 1 if point 'pt' is inside triangle with vertices 'v0', 'v1', and 'v2', and 0 if not
|
|
||||||
// Determines triangle center using barycentric coordinate transformation
|
|
||||||
// Adapted from: https://stackoverflow.com/questions/2049582/how-to-determine-if-a-point-is-in-a-2d-triangle
|
|
||||||
// Specifically the answer by user 'adreasdr' in addition to the comment by user 'urraka' on the answer from user 'Andreas Brinck'
|
|
||||||
//
|
|
||||||
// The notation here assumes v0x is the x-component of v0, v0y is the y-component of v0, etc.
|
|
||||||
//
|
|
||||||
static inline __attribute__((always_inline)) int MATH_Is_Point_In_Triangle(float v0x, float v0y, float v1x, float v1y, float v2x, float v2y, float ptx, float pty)
|
|
||||||
{
|
|
||||||
float sdot = MATH_fipr(v0y, -v0x, v2y - v0y, v0x - v2x, v2x, v2y, ptx, pty);
|
|
||||||
float tdot = MATH_fipr(v0x, -v0y, v0y - v1y, v1x - v0x, v1y, v1x, ptx, pty);
|
|
||||||
|
|
||||||
float areadot = MATH_fipr(-v1y, v0y, v0x, v1x, v2x, -v1x + v2x, v1y - v2y, v2y);
|
|
||||||
|
|
||||||
// 'areadot' could be negative depending on the winding of the triangle
|
|
||||||
if(areadot < 0.0f)
|
|
||||||
{
|
|
||||||
sdot *= -1.0f;
|
|
||||||
tdot *= -1.0f;
|
|
||||||
areadot *= -1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( (sdot > 0.0f) && (tdot > 0.0f) && (areadot > (sdot + tdot)) )
|
|
||||||
{
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
// Interpolation
|
// Interpolation
|
||||||
//------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------
|
||||||
@ -1659,123 +1144,6 @@ static inline __attribute__((always_inline)) float MATH_Lerp(float a, float b, f
|
|||||||
return MATH_fmac(t, (b-a), a);
|
return MATH_fmac(t, (b-a), a);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Speherical interpolation ('theta' in fsca units)
|
|
||||||
static inline __attribute__((always_inline)) float MATH_Slerp(float a, float b, float t, float theta)
|
|
||||||
{
|
|
||||||
// a is an element of v0, b is an element of v1
|
|
||||||
// v = ( v0 * sin(theta - t * theta) + v1 * sin(t * theta) ) / sin(theta)
|
|
||||||
// by using sine/cosine identities and properties, this can be optimized to:
|
|
||||||
// v = v0 * cos(-t * theta) + ( v0 * ( cos(theta) * sin(-t * theta) ) - sin(-t * theta) * v1 ) / sin(theta)
|
|
||||||
// which only requires two calls to fsca.
|
|
||||||
// Specifically, sin(a + b) = sin(a)cos(b) + cos(a)sin(b) & sin(-a) = -sin(a)
|
|
||||||
|
|
||||||
// MATH_fsca_* functions return reverse-ordered complex numbers for speed reasons (i.e. normally sine is the imaginary part)
|
|
||||||
// This could be made even faster by using MATH_fsca_Int() with 'theta' and 't' as unsigned ints
|
|
||||||
|
|
||||||
#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
|
|
||||||
|
|
||||||
RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float(theta);
|
|
||||||
float sine_value_theta = sine_cosine.sine;
|
|
||||||
float cosine_value_theta = sine_cosine.cosine;
|
|
||||||
|
|
||||||
RETURN_FSCA_STRUCT sine_cosine2 = MATH_fsca_Float(-t * theta);
|
|
||||||
float sine_value_minus_t_theta = sine_cosine2.sine;
|
|
||||||
float cosine_value_minus_t_theta = sine_cosine2.cosine;
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
_Complex float sine_cosine = MATH_fsca_Float(theta);
|
|
||||||
float sine_value_theta = __real__ sine_cosine;
|
|
||||||
float cosine_value_theta = __imag__ sine_cosine;
|
|
||||||
|
|
||||||
_Complex float sine_cosine2 = MATH_fsca_Float(-t * theta);
|
|
||||||
float sine_value_minus_t_theta = __real__ sine_cosine2;
|
|
||||||
float cosine_value_minus_t_theta = __imag__ sine_cosine2;
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
float numer = a * cosine_value_theta * sine_value_minus_t_theta - sine_value_minus_t_theta * b;
|
|
||||||
float output_float = a * cosine_value_minus_t_theta + MATH_Fast_Divide(numer, sine_value_theta);
|
|
||||||
|
|
||||||
return output_float;
|
|
||||||
}
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
// Fast Sinc (unnormalized, sin(x)/x version)
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Just pass in MATH_pi * x for normalized versions :)
|
|
||||||
//
|
|
||||||
|
|
||||||
// Sinc function (fsca units)
|
|
||||||
static inline __attribute__((always_inline)) float MATH_Fast_Sincf(float x)
|
|
||||||
{
|
|
||||||
if(x == 0.0f)
|
|
||||||
{
|
|
||||||
return 1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
|
|
||||||
|
|
||||||
RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float(x);
|
|
||||||
float sine_value = sine_cosine.sine;
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
_Complex float sine_cosine = MATH_fsca_Float(x);
|
|
||||||
float sine_value = __real__ sine_cosine;
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return MATH_Fast_Divide(sine_value, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sinc function (degrees)
|
|
||||||
static inline __attribute__((always_inline)) float MATH_Fast_Sincf_Deg(float x)
|
|
||||||
{
|
|
||||||
if(x == 0.0f)
|
|
||||||
{
|
|
||||||
return 1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
|
|
||||||
|
|
||||||
RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Deg(x);
|
|
||||||
float sine_value = sine_cosine.sine;
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
_Complex float sine_cosine = MATH_fsca_Float_Deg(x);
|
|
||||||
float sine_value = __real__ sine_cosine;
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return MATH_Fast_Divide(sine_value, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sinc function (rads)
|
|
||||||
static inline __attribute__((always_inline)) float MATH_Fast_Sincf_Rad(float x)
|
|
||||||
{
|
|
||||||
if(x == 0.0f)
|
|
||||||
{
|
|
||||||
return 1.0f;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if __GNUC__ <= GNUC_FSCA_ERROR_VERSION
|
|
||||||
|
|
||||||
RETURN_FSCA_STRUCT sine_cosine = MATH_fsca_Float_Rad(x);
|
|
||||||
float sine_value = sine_cosine.sine;
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
_Complex float sine_cosine = MATH_fsca_Float_Rad(x);
|
|
||||||
float sine_value = __real__ sine_cosine;
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return MATH_Fast_Divide(sine_value, x);
|
|
||||||
}
|
|
||||||
|
|
||||||
//==============================================================================
|
//==============================================================================
|
||||||
// Miscellaneous Snippets
|
// Miscellaneous Snippets
|
||||||
//==============================================================================
|
//==============================================================================
|
||||||
|
104
third_party/gldc/src/state.c
vendored
104
third_party/gldc/src/state.c
vendored
@ -1,8 +1,14 @@
|
|||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <kos.h>
|
||||||
|
#include <dc/pvr.h>
|
||||||
#include "gldc.h"
|
#include "gldc.h"
|
||||||
|
|
||||||
|
#define MIN(a,b) (((a)<(b))?(a):(b))
|
||||||
|
#define MAX(a,b) (((a)>(b))?(a):(b))
|
||||||
|
#define CLAMP( X, _MIN, _MAX ) ( (X)<(_MIN) ? (_MIN) : ((X)>(_MAX) ? (_MAX) : (X)) )
|
||||||
|
|
||||||
GLboolean STATE_DIRTY = GL_TRUE;
|
GLboolean STATE_DIRTY = GL_TRUE;
|
||||||
|
|
||||||
GLboolean DEPTH_TEST_ENABLED = GL_FALSE;
|
GLboolean DEPTH_TEST_ENABLED = GL_FALSE;
|
||||||
@ -21,37 +27,67 @@ GLboolean BLEND_ENABLED = GL_FALSE;
|
|||||||
GLboolean TEXTURES_ENABLED = GL_FALSE;
|
GLboolean TEXTURES_ENABLED = GL_FALSE;
|
||||||
GLboolean AUTOSORT_ENABLED = GL_FALSE;
|
GLboolean AUTOSORT_ENABLED = GL_FALSE;
|
||||||
|
|
||||||
|
PolyList OP_LIST;
|
||||||
|
PolyList PT_LIST;
|
||||||
|
PolyList TR_LIST;
|
||||||
|
Viewport VIEWPORT;
|
||||||
|
|
||||||
static struct {
|
static struct {
|
||||||
int x;
|
int x;
|
||||||
int y;
|
int y;
|
||||||
int width;
|
int width;
|
||||||
int height;
|
int height;
|
||||||
GLboolean applied;
|
GLboolean applied;
|
||||||
} scissor_rect = {0, 0, 640, 480, false};
|
} scissor_rect;
|
||||||
|
|
||||||
void _glInitContext() {
|
void glKosInit() {
|
||||||
scissor_rect.x = 0;
|
|
||||||
scissor_rect.y = 0;
|
|
||||||
scissor_rect.width = vid_mode->width;
|
scissor_rect.width = vid_mode->width;
|
||||||
scissor_rect.height = vid_mode->height;
|
scissor_rect.height = vid_mode->height;
|
||||||
|
_glInitTextures();
|
||||||
|
|
||||||
|
OP_LIST.list_type = PVR_LIST_OP_POLY;
|
||||||
|
PT_LIST.list_type = PVR_LIST_PT_POLY;
|
||||||
|
TR_LIST.list_type = PVR_LIST_TR_POLY;
|
||||||
|
|
||||||
|
aligned_vector_reserve(&OP_LIST.vector, 1024 * 3);
|
||||||
|
aligned_vector_reserve(&PT_LIST.vector, 512 * 3);
|
||||||
|
aligned_vector_reserve(&TR_LIST.vector, 1024 * 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
void glKosSwapBuffers() {
|
||||||
|
_glApplyScissor(true);
|
||||||
|
|
||||||
|
pvr_scene_begin();
|
||||||
|
if (OP_LIST.vector.size > 2) {
|
||||||
|
pvr_list_begin(PVR_LIST_OP_POLY);
|
||||||
|
SceneListSubmit((Vertex*)OP_LIST.vector.data, OP_LIST.vector.size);
|
||||||
|
pvr_list_finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PT_LIST.vector.size > 2) {
|
||||||
|
pvr_list_begin(PVR_LIST_PT_POLY);
|
||||||
|
SceneListSubmit((Vertex*)PT_LIST.vector.data, PT_LIST.vector.size);
|
||||||
|
pvr_list_finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TR_LIST.vector.size > 2) {
|
||||||
|
pvr_list_begin(PVR_LIST_TR_POLY);
|
||||||
|
SceneListSubmit((Vertex*)TR_LIST.vector.data, TR_LIST.vector.size);
|
||||||
|
pvr_list_finish();
|
||||||
|
}
|
||||||
|
pvr_scene_finish();
|
||||||
|
|
||||||
|
OP_LIST.vector.size = 0;
|
||||||
|
PT_LIST.vector.size = 0;
|
||||||
|
TR_LIST.vector.size = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void glScissor(int x, int y, int width, int height) {
|
void glScissor(int x, int y, int width, int height) {
|
||||||
|
|
||||||
if(scissor_rect.x == x &&
|
|
||||||
scissor_rect.y == y &&
|
|
||||||
scissor_rect.width == width &&
|
|
||||||
scissor_rect.height == height) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
scissor_rect.x = x;
|
scissor_rect.x = x;
|
||||||
scissor_rect.y = y;
|
scissor_rect.y = y;
|
||||||
scissor_rect.width = width;
|
scissor_rect.width = width;
|
||||||
scissor_rect.height = height;
|
scissor_rect.height = height;
|
||||||
scissor_rect.applied = false;
|
scissor_rect.applied = false;
|
||||||
STATE_DIRTY = GL_TRUE; // FIXME: do we need this?
|
|
||||||
|
|
||||||
_glApplyScissor(false);
|
_glApplyScissor(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -87,27 +123,27 @@ void _glApplyScissor(int force) {
|
|||||||
|
|
||||||
PVRTileClipCommand c;
|
PVRTileClipCommand c;
|
||||||
|
|
||||||
int miny, maxx, maxy;
|
int sx, sy, ex, ey;
|
||||||
|
|
||||||
int scissor_width = MAX(MIN(scissor_rect.width, vid_mode->width), 0);
|
int scissor_width = MAX(MIN(scissor_rect.width, vid_mode->width), 0);
|
||||||
int scissor_height = MAX(MIN(scissor_rect.height, vid_mode->height), 0);
|
int scissor_height = MAX(MIN(scissor_rect.height, vid_mode->height), 0);
|
||||||
|
|
||||||
/* force the origin to the lower left-hand corner of the screen */
|
/* force the origin to the lower left-hand corner of the screen */
|
||||||
miny = (vid_mode->height - scissor_height) - scissor_rect.y;
|
sx = scissor_rect.x;
|
||||||
maxx = (scissor_width + scissor_rect.x);
|
sy = (vid_mode->height - scissor_height) - scissor_rect.y;
|
||||||
maxy = (scissor_height + miny);
|
ex = sx + scissor_width;
|
||||||
|
ey = sy + scissor_height;
|
||||||
|
|
||||||
/* load command structure while mapping screen coords to TA tiles */
|
/* load command structure while mapping screen coords to TA tiles */
|
||||||
c.flags = PVR_CMD_USERCLIP;
|
c.flags = PVR_CMD_USERCLIP;
|
||||||
c.d1 = c.d2 = c.d3 = 0;
|
c.d1 = c.d2 = c.d3 = 0;
|
||||||
|
|
||||||
uint16_t vw = vid_mode->width >> 5;
|
uint16_t vw = vid_mode->width >> 5;
|
||||||
uint16_t vh = vid_mode->height >> 5;
|
uint16_t vh = vid_mode->height >> 5;
|
||||||
|
|
||||||
c.sx = CLAMP(scissor_rect.x >> 5, 0, vw);
|
c.sx = CLAMP(sx >> 5, 0, vw);
|
||||||
c.sy = CLAMP(miny >> 5, 0, vh);
|
c.sy = CLAMP(sy >> 5, 0, vh);
|
||||||
c.ex = CLAMP((maxx >> 5) - 1, 0, vw);
|
c.ex = CLAMP((ex >> 5) - 1, 0, vw);
|
||||||
c.ey = CLAMP((maxy >> 5) - 1, 0, vh);
|
c.ey = CLAMP((ey >> 5) - 1, 0, vh);
|
||||||
|
|
||||||
aligned_vector_push_back(&OP_LIST.vector, &c, 1);
|
aligned_vector_push_back(&OP_LIST.vector, &c, 1);
|
||||||
aligned_vector_push_back(&PT_LIST.vector, &c, 1);
|
aligned_vector_push_back(&PT_LIST.vector, &c, 1);
|
||||||
@ -116,30 +152,16 @@ void _glApplyScissor(int force) {
|
|||||||
scissor_rect.applied = true;
|
scissor_rect.applied = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Viewport VIEWPORT;
|
|
||||||
|
|
||||||
/* Set the GL viewport */
|
void apply_poly_header(pvr_poly_hdr_t* dst, int list_type) {
|
||||||
void glViewport(int x, int y, int width, int height) {
|
TextureObject* tx1 = TEXTURE_ACTIVE;
|
||||||
VIEWPORT.hwidth = width * 0.5f;
|
|
||||||
VIEWPORT.hheight = height * -0.5f;
|
|
||||||
VIEWPORT.x_plus_hwidth = x + width * 0.5f;
|
|
||||||
VIEWPORT.y_plus_hheight = y + height * 0.5f;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void apply_poly_header(pvr_poly_hdr_t* dst, PolyList* activePolyList) {
|
|
||||||
const TextureObject *tx1 = TEXTURE_ACTIVE;
|
|
||||||
uint32_t txr_base;
|
uint32_t txr_base;
|
||||||
TRACE();
|
|
||||||
|
|
||||||
int list_type = activePolyList->list_type;
|
|
||||||
int gen_color_clamp = PVR_CLRCLAMP_DISABLE;
|
int gen_color_clamp = PVR_CLRCLAMP_DISABLE;
|
||||||
|
|
||||||
int gen_culling = CULLING_ENABLED ? PVR_CULLING_CW : PVR_CULLING_SMALL;
|
int gen_culling = CULLING_ENABLED ? PVR_CULLING_CW : PVR_CULLING_SMALL;
|
||||||
int depth_comp = DEPTH_TEST_ENABLED ? PVR_DEPTHCMP_GEQUAL : PVR_DEPTHCMP_ALWAYS;
|
int depth_comp = DEPTH_TEST_ENABLED ? PVR_DEPTHCMP_GEQUAL : PVR_DEPTHCMP_ALWAYS;
|
||||||
int depth_write = DEPTH_MASK_ENABLED ? PVR_DEPTHWRITE_ENABLE : PVR_DEPTHWRITE_DISABLE;
|
int depth_write = DEPTH_MASK_ENABLED ? PVR_DEPTHWRITE_ENABLE : PVR_DEPTHWRITE_DISABLE;
|
||||||
|
|
||||||
int gen_shading = SHADE_MODEL;
|
|
||||||
int gen_clip_mode = SCISSOR_TEST_ENABLED ? PVR_USERCLIP_INSIDE : PVR_USERCLIP_DISABLE;
|
int gen_clip_mode = SCISSOR_TEST_ENABLED ? PVR_USERCLIP_INSIDE : PVR_USERCLIP_DISABLE;
|
||||||
int gen_fog_type = FOG_ENABLED ? PVR_FOG_TABLE : PVR_FOG_DISABLE;
|
int gen_fog_type = FOG_ENABLED ? PVR_FOG_TABLE : PVR_FOG_DISABLE;
|
||||||
|
|
||||||
@ -179,7 +201,7 @@ void apply_poly_header(pvr_poly_hdr_t* dst, PolyList* activePolyList) {
|
|||||||
/* Or in the list type, shading type, color and UV formats */
|
/* Or in the list type, shading type, color and UV formats */
|
||||||
dst->cmd |= (list_type << PVR_TA_CMD_TYPE_SHIFT) & PVR_TA_CMD_TYPE_MASK;
|
dst->cmd |= (list_type << PVR_TA_CMD_TYPE_SHIFT) & PVR_TA_CMD_TYPE_MASK;
|
||||||
dst->cmd |= (PVR_CLRFMT_ARGBPACKED << PVR_TA_CMD_CLRFMT_SHIFT) & PVR_TA_CMD_CLRFMT_MASK;
|
dst->cmd |= (PVR_CLRFMT_ARGBPACKED << PVR_TA_CMD_CLRFMT_SHIFT) & PVR_TA_CMD_CLRFMT_MASK;
|
||||||
dst->cmd |= (gen_shading << PVR_TA_CMD_SHADE_SHIFT) & PVR_TA_CMD_SHADE_MASK;
|
dst->cmd |= (SHADE_MODEL << PVR_TA_CMD_SHADE_SHIFT) & PVR_TA_CMD_SHADE_MASK;
|
||||||
dst->cmd |= (PVR_UVFMT_32BIT << PVR_TA_CMD_UVFMT_SHIFT) & PVR_TA_CMD_UVFMT_MASK;
|
dst->cmd |= (PVR_UVFMT_32BIT << PVR_TA_CMD_UVFMT_SHIFT) & PVR_TA_CMD_UVFMT_MASK;
|
||||||
dst->cmd |= (gen_clip_mode << PVR_TA_CMD_USERCLIP_SHIFT) & PVR_TA_CMD_USERCLIP_MASK;
|
dst->cmd |= (gen_clip_mode << PVR_TA_CMD_USERCLIP_SHIFT) & PVR_TA_CMD_USERCLIP_MASK;
|
||||||
|
|
||||||
|
2
third_party/gldc/src/texture.c
vendored
2
third_party/gldc/src/texture.c
vendored
@ -2,6 +2,8 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <kos.h>
|
||||||
|
#include <dc/pvr.h>
|
||||||
|
|
||||||
#include "gldc.h"
|
#include "gldc.h"
|
||||||
#include "yalloc/yalloc.h"
|
#include "yalloc/yalloc.h"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user