mirror of
https://github.com/ClassiCube/ClassiCube.git
synced 2025-09-15 02:25:32 -04:00
Dreamcast: Minorly optimise T&L to save a cycle
This commit is contained in:
parent
35747957b7
commit
7bc1d6b70a
@ -11,9 +11,8 @@ CFLAGS := -g -DNDEBUG -O3 -fipa-pta -fno-pie -flto=auto -fomit-frame-pointer -fb
|
||||
DEPFLAGS = -MT $@ -MMD -MP -MF $(BUILD_DIR)/$*.d
|
||||
DEPFILES := $(OBJS:%.o=%.d)
|
||||
|
||||
GLDC_LIB = third_party/gldc/libGLdc.a
|
||||
LDFLAGS = -g
|
||||
LIBS = -lm $(GLDC_LIB) -lppp -lkosfat
|
||||
LIBS = -lm -lppp -lkosfat
|
||||
|
||||
ifeq ($(strip $(KOS_BASE)),)
|
||||
$(warning Please set KOS variables in your environment. For example:)
|
||||
@ -37,7 +36,7 @@ $(BUILD_DIR):
|
||||
#---------------------------------------------------------------------------------
|
||||
# executable generation
|
||||
#---------------------------------------------------------------------------------
|
||||
$(TARGET).elf: $(OBJS) $(GLDC_LIB)
|
||||
$(TARGET).elf: $(OBJS)
|
||||
kos-cc $(LDFLAGS) $^ -o $@ $(LIBS)
|
||||
|
||||
$(TARGET).bin: $(TARGET).elf
|
||||
@ -66,10 +65,6 @@ $(TARGET).cdi: $(TARGET).iso
|
||||
#---------------------------------------------------------------------------------
|
||||
# object generation
|
||||
#---------------------------------------------------------------------------------
|
||||
$(GLDC_LIB): FORCE
|
||||
$(MAKE) -C third_party/gldc
|
||||
FORCE: ;
|
||||
|
||||
$(BUILD_DIR)/%.o: src/%.c
|
||||
kos-cc $(CFLAGS) $(DEPFLAGS) -c $< -o $@
|
||||
|
||||
|
@ -11,13 +11,14 @@
|
||||
! FR10 = invT
|
||||
! FR11 = t
|
||||
|
||||
#define TM1 r1 // temp register 1
|
||||
#define TM2 r3 // temp register 2
|
||||
|
||||
! INPUT ARGUMENTS
|
||||
#define IN1 r4 // input vertex 1
|
||||
#define IN2 r5 // input vertex 2
|
||||
#define OUT r6 // output vertex
|
||||
#define TYP r7 // type/flags for output vertex
|
||||
|
||||
#define TM1 r1 // temp register 1
|
||||
#define TM2 r3 // temp register 2
|
||||
#define CL1 r4 // input colour 1
|
||||
#define CL2 r5 // input colour 2
|
||||
#define CLO r7 // output colour
|
||||
@ -60,6 +61,7 @@ _ClipEdge:
|
||||
fmov.s @TM1,fr11 ! LS, fr11 = v2->z
|
||||
fsub fr2,fr11 ! FE, fr11 = v2->z - v1->z
|
||||
fldi0 fr8 ! LS, fr8 = 0
|
||||
mov.l TYP,@OUT ! LS, OUT->flags = TYPE
|
||||
fmul fr11,fr11 ! FE, fr11 = (v2->z - v1->z) * (v2->z * v1->z)
|
||||
fldi0 fr9 ! LS, fr9 = 0
|
||||
fldi0 fr0 ! LS, fr0 = 0
|
||||
@ -118,6 +120,7 @@ _ClipEdge:
|
||||
cmp/eq CL1,CL2 ! MT, T = ACOLOR == BCOLOR
|
||||
bt.s 1f ! BR, if (T) goto 1;
|
||||
mov CL1,CLO ! MT, OUTCOLOR = ACOLOR (branch delay instruction)
|
||||
|
||||
! Interpolate B
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.b
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
@ -130,6 +133,7 @@ _ClipEdge:
|
||||
ftrc fr3,fpul ! FE, FPUL = int(lerp)
|
||||
shlr8 CL2 ! EX, BCOLOR >>= 8
|
||||
sts fpul,TM2 ! CO, tmp = FPUL
|
||||
|
||||
! Interpolate G
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.g
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
@ -144,6 +148,7 @@ _ClipEdge:
|
||||
mov TM2,CLO ! MT, OUTCOLOR.b = tmp
|
||||
shlr8 CL2 ! EX, BCOLOR >>= 8
|
||||
sts fpul,TM2 ! CO, tmp = FPUL
|
||||
|
||||
! Interpolate R
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.r
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
@ -160,6 +165,7 @@ _ClipEdge:
|
||||
shlr8 CL2 ! EX, BCOLOR >>= 8
|
||||
sts fpul,TM2 ! CO, tmp = FPUL
|
||||
|
||||
! Interpolate A
|
||||
extu.b CL1,TM1 ! EX, val = ACOLOR.a
|
||||
lds TM1,fpul ! CO, FPUL = val
|
||||
float fpul,fr2 ! EX, fr2 = float(FPUL)
|
||||
|
@ -9,8 +9,8 @@
|
||||
#include <kos.h>
|
||||
#include <dc/matrix.h>
|
||||
#include <dc/pvr.h>
|
||||
#include "../third_party/gldc/src/gldc.h"
|
||||
#include "../third_party/gldc/src/state.c"
|
||||
#include "../third_party/gldc/state.c"
|
||||
#include "../third_party/gldc/sh4.c"
|
||||
|
||||
static cc_bool renderingDisabled;
|
||||
static cc_bool stateDirty;
|
||||
|
20
third_party/gldc/Makefile
vendored
20
third_party/gldc/Makefile
vendored
@ -1,20 +0,0 @@
|
||||
OBJS := sh4.o
|
||||
|
||||
C_FLAGS = -O3 -DNDEBUG -mfsrra -mfsca -fno-math-errno -ffp-contract=fast -ffast-math -O3 -mpretend-cmove -fexpensive-optimizations -fomit-frame-pointer -finline-functions -ml -m4-single-only -ffunction-sections -fdata-sections -std=gnu99
|
||||
|
||||
C_DEFINES = -DDREAMCAST -DNDEBUG -D__DREAMCAST__ -D__arch_dreamcast -D_arch_dreamcast -D_arch_sub_pristine
|
||||
|
||||
TARGET := libGLdc.a
|
||||
|
||||
ifeq ($(strip $(KOS_BASE)),)
|
||||
$(error "Please set KOS variables in your environment.")
|
||||
endif
|
||||
|
||||
default: $(TARGET)
|
||||
|
||||
%.o: src/%.c
|
||||
kos-cc $(C_DEFINES) $(C_FLAGS) -c $< -o $@
|
||||
|
||||
$(TARGET): $(OBJS)
|
||||
kos-ar cr $@ $^
|
||||
kos-ranlib $@
|
66
third_party/gldc/README.md
vendored
66
third_party/gldc/README.md
vendored
@ -1,66 +0,0 @@
|
||||
This is a fork of GLdc optimised for the Dreamcast port of ClassiCube, and unfortunately is essentially useless for any other project
|
||||
|
||||
---
|
||||
|
||||
# GLdc
|
||||
|
||||
**Development of GLdc has moved to [Gitlab](https://gitlab.com/simulant/GLdc)**
|
||||
|
||||
This is a partial implementation of OpenGL 1.2 for the SEGA Dreamcast for use
|
||||
with the KallistiOS SDK.
|
||||
|
||||
It began as a fork of libGL by Josh Pearson but has undergone a large refactor
|
||||
which is essentially a rewrite.
|
||||
|
||||
The aim is to implement as much of OpenGL 1.2 as possible, and to add additional
|
||||
features via extensions.
|
||||
|
||||
Things left to (re)implement:
|
||||
|
||||
- Spotlights (Trivial)
|
||||
- Framebuffer extension (Trivial)
|
||||
- Texture Matrix (Trivial)
|
||||
|
||||
Things I'd like to do:
|
||||
|
||||
- Use a clean "gl.h"
|
||||
- Define an extension for modifier volumes
|
||||
- Add support for point sprites
|
||||
- Optimise, add unit tests for correctness
|
||||
|
||||
# Compiling
|
||||
|
||||
GLdc uses CMake for its build system, it currently ships with two "backends":
|
||||
|
||||
- kospvr - This is the hardware-accelerated Dreamcast backend
|
||||
- software - This is a stub software rasterizer used for testing testing and debugging
|
||||
|
||||
To compile a Dreamcast debug build, you'll want to do something like the following:
|
||||
|
||||
```
|
||||
mkdir dcbuild
|
||||
cd dcbuild
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/Dreamcast.cmake -G "Unix Makefiles" ..
|
||||
make
|
||||
```
|
||||
|
||||
For a release build, replace the cmake line with with the following:
|
||||
```
|
||||
cmake -DCMAKE_TOOLCHAIN_FILE=../toolchains/Dreamcast.cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release ..
|
||||
```
|
||||
|
||||
You will need KallistiOS compiled and configured (e.g. the KOS_BASE environment
|
||||
variable must be set)
|
||||
|
||||
To compile for PC:
|
||||
|
||||
```
|
||||
mkdir pcbuild
|
||||
cd pcbuild
|
||||
cmake -G "Unix Makefiles" ..
|
||||
make
|
||||
```
|
||||
|
||||
# Special Thanks!
|
||||
|
||||
- Massive shout out to Hayden Kowalchuk for diagnosing and fixing a large number of bugs while porting GL Quake to the Dreamcast. Absolute hero!
|
@ -2,9 +2,6 @@
|
||||
#define PRIVATE_H
|
||||
#include <stdint.h>
|
||||
|
||||
#define GLenum unsigned int
|
||||
#define GLboolean unsigned char
|
||||
|
||||
#define GLDC_FORCE_INLINE __attribute__((always_inline)) inline
|
||||
#define GLDC_NO_INLINE __attribute__((noinline))
|
||||
|
||||
@ -24,6 +21,6 @@ typedef struct {
|
||||
uint16_t height;
|
||||
} TextureObject;
|
||||
|
||||
void SceneListSubmit(Vertex* v2, int n);
|
||||
void GLDC_NO_INLINE SceneListSubmit(Vertex* v2, int n);
|
||||
|
||||
#endif // PRIVATE_H
|
102
third_party/gldc/src/sh4.c → third_party/gldc/sh4.c
vendored
102
third_party/gldc/src/sh4.c → third_party/gldc/sh4.c
vendored
@ -15,24 +15,21 @@ static GLDC_FORCE_INLINE float sh4_fsrra(float x) {
|
||||
return x;
|
||||
}
|
||||
|
||||
static GLDC_FORCE_INLINE float _glFastInvert(float x) {
|
||||
return sh4_fsrra(x * x);
|
||||
}
|
||||
|
||||
static GLDC_FORCE_INLINE void PushVertex(Vertex* v) {
|
||||
volatile Vertex* dst = (Vertex*)(sq);
|
||||
float f = _glFastInvert(v->w);
|
||||
// Convert to NDC (viewport already applied)
|
||||
float x = v->x * f;
|
||||
float y = v->y * f;
|
||||
|
||||
float ww = v->w * v->w;
|
||||
dst->flags = v->flags;
|
||||
dst->x = x;
|
||||
dst->y = y;
|
||||
dst->z = f;
|
||||
dst->u = v->u;
|
||||
dst->v = v->v;
|
||||
dst->bgra = v->bgra;
|
||||
float f = sh4_fsrra(ww); // 1/sqrt(w^2) ~ 1/w
|
||||
// Convert to NDC (viewport already applied)
|
||||
float x = v->x * f;
|
||||
float y = v->y * f;
|
||||
|
||||
dst->x = x;
|
||||
dst->y = y;
|
||||
dst->z = f;
|
||||
dst->u = v->u;
|
||||
dst->v = v->v;
|
||||
dst->bgra = v->bgra;
|
||||
__asm__("pref @%0" : : "r"(dst));
|
||||
dst++;
|
||||
}
|
||||
@ -51,7 +48,7 @@ static inline void PushCommand(Vertex* v) {
|
||||
sq += 8;
|
||||
}
|
||||
|
||||
extern void ClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout);
|
||||
extern void ClipEdge(const Vertex* const v1, const Vertex* const v2, Vertex* vout, int type);
|
||||
|
||||
#define V0_VIS (1 << 0)
|
||||
#define V1_VIS (1 << 1)
|
||||
@ -74,10 +71,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// .....A....B...
|
||||
// / |
|
||||
// v3--v2---v1
|
||||
ClipEdge(v3, v0, a);
|
||||
a->flags = PVR_CMD_VERTEX_EOL;
|
||||
ClipEdge(v0, v1, b);
|
||||
b->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v3, v0, a, PVR_CMD_VERTEX_EOL);
|
||||
ClipEdge(v0, v1, b, PVR_CMD_VERTEX);
|
||||
|
||||
PushVertex(v0);
|
||||
PushVertex(b);
|
||||
@ -92,10 +87,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// ....A.....B...
|
||||
// / |
|
||||
// v0--v3---v2
|
||||
ClipEdge(v0, v1, a);
|
||||
a->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v1, v2, b);
|
||||
b->flags = PVR_CMD_VERTEX_EOL;
|
||||
ClipEdge(v0, v1, a, PVR_CMD_VERTEX);
|
||||
ClipEdge(v1, v2, b, PVR_CMD_VERTEX_EOL);
|
||||
|
||||
PushVertex(a);
|
||||
PushVertex(v1);
|
||||
@ -109,11 +102,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// ....A.....B...
|
||||
// / |
|
||||
// v1--v0---v3
|
||||
|
||||
ClipEdge(v1, v2, a);
|
||||
a->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v2, v3, b);
|
||||
b->flags = PVR_CMD_VERTEX_EOL;
|
||||
ClipEdge(v1, v2, a, PVR_CMD_VERTEX);
|
||||
ClipEdge(v2, v3, b, PVR_CMD_VERTEX_EOL);
|
||||
|
||||
PushVertex(a);
|
||||
PushVertex(v2);
|
||||
@ -127,10 +117,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// ....A.....B...
|
||||
// / |
|
||||
// v2--v1---v0
|
||||
ClipEdge(v2, v3, a);
|
||||
a->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v3, v0, b);
|
||||
b->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v2, v3, a, PVR_CMD_VERTEX);
|
||||
ClipEdge(v3, v0, b, PVR_CMD_VERTEX);
|
||||
|
||||
PushVertex(b);
|
||||
PushVertex(a);
|
||||
@ -144,10 +132,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// ....B..........A...
|
||||
// \ |
|
||||
// v3-----v2
|
||||
ClipEdge(v1, v2, a);
|
||||
a->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v3, v0, b);
|
||||
b->flags = PVR_CMD_VERTEX_EOL;
|
||||
ClipEdge(v1, v2, a, PVR_CMD_VERTEX);
|
||||
ClipEdge(v3, v0, b, PVR_CMD_VERTEX_EOL);
|
||||
|
||||
PushVertex(v1);
|
||||
PushVertex(a);
|
||||
@ -162,10 +148,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// ....B..........A...
|
||||
// \ |
|
||||
// v2-----v1
|
||||
ClipEdge(v0, v1, a);
|
||||
a->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v2, v3, b);
|
||||
b->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v0, v1, a, PVR_CMD_VERTEX);
|
||||
ClipEdge(v2, v3, b, PVR_CMD_VERTEX);
|
||||
|
||||
PushVertex(a);
|
||||
PushVertex(b);
|
||||
@ -179,10 +163,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// ....B..........A...
|
||||
// \ |
|
||||
// v0-----v3
|
||||
ClipEdge(v2, v3, a);
|
||||
a->flags = PVR_CMD_VERTEX_EOL;
|
||||
ClipEdge(v0, v1, b);
|
||||
b->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v2, v3, a, PVR_CMD_VERTEX_EOL);
|
||||
ClipEdge(v0, v1, b, PVR_CMD_VERTEX);
|
||||
|
||||
PushVertex(v1);
|
||||
PushVertex(v2);
|
||||
@ -197,10 +179,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// ....B..........A...
|
||||
// \ |
|
||||
// v1-----v0
|
||||
ClipEdge(v3, v0, a);
|
||||
a->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v1, v2, b);
|
||||
b->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v3, v0, a, PVR_CMD_VERTEX);
|
||||
ClipEdge(v1, v2, b, PVR_CMD_VERTEX);
|
||||
|
||||
PushVertex(b);
|
||||
PushVertex(v2);
|
||||
@ -216,10 +196,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// \ |
|
||||
// v3
|
||||
// v1,v2,v0 v2,v0,A v0,A,B
|
||||
ClipEdge(v2, v3, a);
|
||||
a->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v3, v0, b);
|
||||
b->flags = PVR_CMD_VERTEX_EOL;
|
||||
ClipEdge(v2, v3, a, PVR_CMD_VERTEX);
|
||||
ClipEdge(v3, v0, b, PVR_CMD_VERTEX_EOL);
|
||||
|
||||
PushVertex(v1);
|
||||
PushVertex(v2);
|
||||
@ -236,10 +214,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// \ |
|
||||
// v2
|
||||
// v0,v1,v3 v1,v3,A v3,A,B
|
||||
ClipEdge(v1, v2, a);
|
||||
a->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v2, v3, b);
|
||||
b->flags = PVR_CMD_VERTEX_EOL;
|
||||
ClipEdge(v1, v2, a, PVR_CMD_VERTEX);
|
||||
ClipEdge(v2, v3, b, PVR_CMD_VERTEX_EOL);
|
||||
v3->flags = PVR_CMD_VERTEX;
|
||||
|
||||
PushVertex(v0);
|
||||
@ -257,10 +233,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// \ |
|
||||
// v1
|
||||
// v3,v0,v2 v0,v2,A v2,A,B
|
||||
ClipEdge(v0, v1, a);
|
||||
a->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v1, v2, b);
|
||||
b->flags = PVR_CMD_VERTEX_EOL;
|
||||
ClipEdge(v0, v1, a, PVR_CMD_VERTEX);
|
||||
ClipEdge(v1, v2, b, PVR_CMD_VERTEX_EOL);
|
||||
v3->flags = PVR_CMD_VERTEX;
|
||||
|
||||
PushVertex(v3);
|
||||
@ -278,10 +252,8 @@ static void SubmitClipped(Vertex* v0, Vertex* v1, Vertex* v2, Vertex* v3, uint8_
|
||||
// \ |
|
||||
// v0
|
||||
// v2,v3,v1 v3,v1,A v1,A,B
|
||||
ClipEdge(v3, v0, a);
|
||||
a->flags = PVR_CMD_VERTEX;
|
||||
ClipEdge(v0, v1, b);
|
||||
b->flags = PVR_CMD_VERTEX_EOL;
|
||||
ClipEdge(v3, v0, a, PVR_CMD_VERTEX);
|
||||
ClipEdge(v0, v1, b, PVR_CMD_VERTEX_EOL);
|
||||
v3->flags = PVR_CMD_VERTEX;
|
||||
|
||||
PushVertex(v2);
|
@ -3,21 +3,21 @@
|
||||
|
||||
static TextureObject* TEXTURE_ACTIVE;
|
||||
|
||||
static GLboolean DEPTH_TEST_ENABLED;
|
||||
static GLboolean DEPTH_MASK_ENABLED;
|
||||
static uint8_t DEPTH_TEST_ENABLED;
|
||||
static uint8_t DEPTH_MASK_ENABLED;
|
||||
|
||||
static GLboolean CULLING_ENABLED;
|
||||
static uint8_t CULLING_ENABLED;
|
||||
|
||||
static GLboolean FOG_ENABLED;
|
||||
static GLboolean ALPHA_TEST_ENABLED;
|
||||
static uint8_t FOG_ENABLED;
|
||||
static uint8_t ALPHA_TEST_ENABLED;
|
||||
|
||||
static GLboolean SCISSOR_TEST_ENABLED;
|
||||
static GLenum SHADE_MODEL = PVR_SHADE_GOURAUD;
|
||||
static uint8_t SCISSOR_TEST_ENABLED;
|
||||
static uint32_t SHADE_MODEL = PVR_SHADE_GOURAUD;
|
||||
|
||||
static GLboolean BLEND_ENABLED;
|
||||
static uint8_t BLEND_ENABLED;
|
||||
|
||||
static GLboolean TEXTURES_ENABLED;
|
||||
static GLboolean AUTOSORT_ENABLED;
|
||||
static uint8_t TEXTURES_ENABLED;
|
||||
static uint8_t AUTOSORT_ENABLED;
|
||||
|
||||
static inline int DimensionFlag(int w) {
|
||||
switch(w) {
|
Loading…
x
Reference in New Issue
Block a user