3DS: Try to use VRAM for textures when possible

This commit is contained in:
UnknownShadow200 2025-07-10 22:20:28 +10:00
parent 403e466ac2
commit 2741a33a15
15 changed files with 85 additions and 183 deletions

View File

@ -19,14 +19,18 @@ jobs:
image: ghcr.io/kos-builds/kos-dc:sha-20149ee-14.2.0
steps:
- uses: actions/checkout@v4
- name: Compile Dreamcast build
id: compile
- name: Install prerequisites
shell: bash
run: |
apt-get update
apt-get -y install genisoimage
wget https://github.com/ClassiCube/rpi-compiling-stuff/raw/main/cdi4dc -O /opt/toolchains/dc/kos/utils/cdi4dc
chmod +x /opt/toolchains/dc/kos/utils/cdi4dc
- name: Compile Dreamcast build
id: compile
shell: bash
run: |
export PATH=/opt/toolchains/dc/kos/utils/:$PATH
make dreamcast

View File

@ -19,11 +19,14 @@ jobs:
image: ghcr.io/volkertb/debian-djgpp
steps:
- uses: actions/checkout@v4
- name: Compile MS dos build
id: compile
- name: Install prerequisites
run: |
apt-get update
apt-get -y install curl
- name: Compile MS dos build
id: compile
run: |
make CC=gcc dos RELEASE=1

View File

@ -19,11 +19,14 @@ jobs:
image: ghcr.io/dragonminded/libdragon:preview
steps:
- uses: actions/checkout@v4
- name: Compile N64 build
id: compile
- name: Install prerequisites
run: |
apt-get update
apt-get -y install curl
- name: Compile N64 build
id: compile
run: |
REAL_DIR=`pwd`
cd /tmp
git clone -b opengl https://github.com/DragonMinded/libdragon.git --depth=1

View File

@ -19,11 +19,14 @@ jobs:
image: skylyrac/blocksds:dev-latest
steps:
- uses: actions/checkout@v4
- name: Compile NDS build
id: compile
- name: Install prerequisites
run: |
apt-get update
apt-get -y install curl
- name: Compile NDS build
id: compile
run: |
make ds
export BUILD_DSI=1
make ds

View File

@ -19,11 +19,14 @@ jobs:
image: ghcr.io/classicube/minimal-psn00b:latest
steps:
- uses: actions/checkout@v4
- name: Compile PS1 build
id: compile
- name: Install prerequisites
run: |
apt-get update
apt-get install -y curl
- name: Compile PS1 build
id: compile
run: |
export PSN00BSDK_ROOT=/usr/local/psnoob
make ps1

View File

@ -19,10 +19,13 @@ jobs:
image: ghcr.io/ps2dev/ps2sdk:latest
steps:
- uses: actions/checkout@v4
- name: Install prerequisites
run: |
apk add make mpc1 curl
- name: Compile PS2 build
id: compile
run: |
apk add make mpc1 curl
make ps2

View File

@ -19,11 +19,14 @@ jobs:
image: ghcr.io/classicube/minimal-psl1ght:latest
steps:
- uses: actions/checkout@v4
- name: Compile PS3 build
id: compile
- name: Install prerequisites
run: |
apt-get update
apt-get install -y curl
- name: Compile PS3 build
id: compile
run: |
export PS3DEV=/usr/local/ps3dev
export PSL1GHT=/usr/local/ps3dev
export PATH=$PATH:$PS3DEV/bin

View File

@ -19,11 +19,13 @@ jobs:
image: pspdev/pspdev:latest
steps:
- uses: actions/checkout@v4
- name: Install prerequisites
run: |
apk add curl curl-dev
- name: Compile PSP build
id: compile
run: |
apk add curl curl-dev
export PSPSDK=$(psp-config --pspsdk-path)
make psp

View File

@ -19,11 +19,14 @@ jobs:
image: ijacquez/yaul:1.0.10
steps:
- uses: actions/checkout@v4
- name: Compile Saturn build
id: compile
- name: Install prerequisites
run: |
apt-get update
apt-get -y install curl
- name: Compile Saturn build
id: compile
run: |
make saturn
- uses: ./.github/actions/notify_failure

View File

@ -23,6 +23,7 @@ jobs:
run: Invoke-WebRequest https://nnp.nnchan.ru/dl/symbiansr1_gcce_workflow.zip -OutFile symbiansdk.zip
- name: Extract Symbian SDK
run: Expand-Archive symbiansdk.zip -DestinationPath .
- name: Compile Symbian build
id: compile
run: |

View File

@ -23,7 +23,6 @@ jobs:
id: compile
run: |
make wii
make clean
make gamecube
- name: Create Wii homebrew

View File

@ -19,10 +19,13 @@ jobs:
image: ghcr.io/xboxdev/nxdk:git-e955705a
steps:
- uses: actions/checkout@v4
- name: Install prerequisites
run: |
apk add curl curl-dev
- name: Compile Xbox build
id: compile
run: |
apk add curl curl-dev
eval $(/usr/src/nxdk/bin/activate -s)
make xbox

View File

@ -19,12 +19,15 @@ jobs:
image: free60/libxenon
steps:
- uses: actions/checkout@v4
- name: Compile 360 build
id: compile
- name: Install prerequisites
run: |
sed -i -e 's/archive.ubuntu.com\|security.ubuntu.com/old-releases.ubuntu.com/g' /etc/apt/sources.list
apt-get update
apt-get install -y curl
- name: Compile 360 build
id: compile
run: |
export DEVKITXENON=/usr/local/xenon
export PATH=$PATH:$DEVKITXENON/bin:$DEVKITXENON/usr/bin
make xbox360

View File

@ -159,6 +159,13 @@ static void InitCitro3D(void) {
C3D_RenderTargetColor(&bottomTarget, GPU_RB_RGBA8);
C3D_RenderTargetSetOutput(&bottomTarget, GFX_BOTTOM, GFX_LEFT, DISPLAY_TRANSFER_FLAGS);
// Allocate right framebuffer (for stereoscopic 3D) in advance even when it doesn't actually used
// Although this means a bit less VRAM available for textures, this ensures that if the user later
// turns on stereoscopic 3D, don't have to try handling the case when insufficient VRAM for it
C3D_RenderTargetInit(&topTargetRight, 240, 400);
C3D_RenderTargetColor(&topTargetRight, GPU_RB_RGBA8);
C3D_RenderTargetDepth(&topTargetRight, GPU_RB_DEPTH24);
gfxSetDoubleBuffering(GFX_TOP, true);
SetDefaultState();
AllocShaders();
@ -178,6 +185,7 @@ void Gfx_Create(void) {
Gfx.Created = true;
gfx_vsync = true;
Gfx.NonPowTwoTexturesSupport = GFX_NONPOW2_UPLOAD;
Gfx_RestoreState();
}
@ -239,9 +247,6 @@ void Gfx_Set3DRight(struct Matrix* proj, struct Matrix* view) {
Calc3DProjection(+1, proj);
if (!createdTopTargetRight) {
C3D_RenderTargetInit(&topTargetRight, 240, 400);
C3D_RenderTargetColor(&topTargetRight, GPU_RB_RGBA8);
C3D_RenderTargetDepth(&topTargetRight, GPU_RB_DEPTH24);
C3D_RenderTargetSetOutput(&topTargetRight, GFX_TOP, GFX_RIGHT, DISPLAY_TRANSFER_FLAGS);
createdTopTargetRight = true;
}
@ -292,7 +297,13 @@ static void GPUTexture_Unref(GfxResourceID* resource) {
}
static void GPUTexture_Free(struct GPUTexture* tex) {
C3D_TexDelete(&tex->texture);
void* addr = tex->texture.data;
if (addrIsVRAM(addr)) {
vramFree(addr);
} else {
linearFree(addr);
}
Mem_Free(tex);
}
@ -330,10 +341,13 @@ static void GPUTextures_DeleteUnreferenced(void) {
/*########################################################################################################################*
*---------------------------------------------------------Textures--------------------------------------------------------*
*#########################################################################################################################*/
static bool CreateNativeTexture(C3D_Tex* tex, u32 width, u32 height) {
static bool CreateNativeTexture(C3D_Tex* tex, u32 width, u32 height, int vram) {
width = Math_NextPowOf2(width);
height = Math_NextPowOf2(height);
u32 size = width * height * 4;
//tex->data = p.onVram ? vramAlloc(total_size) : linearAlloc(total_size);
tex->data = linearAlloc(size);
tex->data = vram ? vramAlloc(size) : NULL;
if (!tex->data) tex->data = linearAlloc(size);
if (!tex->data) return false;
tex->width = width;
@ -351,29 +365,6 @@ static bool CreateNativeTexture(C3D_Tex* tex, u32 width, u32 height) {
return true;
}
static void TryTransferToVRAM(C3D_Tex* tex) {
return;
// NOTE: the linearFree below results in broken texture. maybe no DMA?
void* vram = vramAlloc(tex->size);
if (!vram) return;
C3D_SyncTextureCopy((u32*)tex->data, 0, (u32*)vram, 0, tex->size, 8);
linearFree(tex->data);
tex->data = vram;
}
/*static inline cc_uint32 CalcZOrder(cc_uint32 x, cc_uint32 y) {
// Simplified "Interleave bits by Binary Magic Numbers" from
// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveTableObvious
// TODO: Simplify to array lookup?
x = (x | (x << 2)) & 0x33;
x = (x | (x << 1)) & 0x55;
y = (y | (y << 2)) & 0x33;
y = (y | (y << 1)) & 0x55;
return x | (y << 1);
}*/
static inline cc_uint32 CalcZOrder(cc_uint32 a) {
// Simplified "Interleave bits by Binary Magic Numbers" from
// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
@ -390,28 +381,29 @@ static inline cc_uint32 CalcZOrder(cc_uint32 a) {
// four 4x4 subtiles, which are in turn composed of four 2x2 subtiles
static void ToMortonTexture(C3D_Tex* tex, int originX, int originY,
struct Bitmap* bmp, int rowWidth) {
unsigned int pixel, mortonX, mortonY;
unsigned int mortonX, mortonY;
unsigned int dstX, dstY, tileX, tileY;
int width = bmp->width, height = bmp->height;
int src_w = bmp->width, dst_w = tex->width;
int src_h = bmp->height, dst_h = tex->height;
cc_uint32* dst = tex->data;
cc_uint32* src = bmp->scan0;
for (int y = 0; y < height; y++)
for (int y = 0; y < src_h; y++)
{
dstY = tex->height - 1 - (y + originY);
dstY = dst_h - 1 - (y + originY);
tileY = dstY & ~0x07;
mortonY = CalcZOrder(dstY & 0x07) << 1;
for (int x = 0; x < width; x++)
for (int x = 0; x < src_w; x++)
{
dstX = x + originX;
tileX = dstX & ~0x07;
mortonX = CalcZOrder(dstX & 0x07);
pixel = src[x + (y * rowWidth)];
dst[(mortonX | mortonY) + (tileX * 8) + (tileY * tex->width)] = pixel;
dst[(mortonX | mortonY) + (tileX * 8) + (tileY * dst_w)] = src[x];
}
src += rowWidth;
}
// TODO flush data cache GSPGPU_FlushDataCache
}
@ -419,11 +411,11 @@ static void ToMortonTexture(C3D_Tex* tex, int originX, int originY,
GfxResourceID Gfx_AllocTexture(struct Bitmap* bmp, int rowWidth, cc_uint8 flags, cc_bool mipmaps) {
struct GPUTexture* tex = GPUTexture_Alloc();
bool success = CreateNativeTexture(&tex->texture, bmp->width, bmp->height);
int can_vram = !(flags & TEXTURE_FLAG_DYNAMIC);
bool success = CreateNativeTexture(&tex->texture, bmp->width, bmp->height, can_vram);
if (!success) return NULL;
ToMortonTexture(&tex->texture, 0, 0, bmp, rowWidth);
if (!(flags & TEXTURE_FLAG_DYNAMIC)) TryTransferToVRAM(&tex->texture);
return tex;
}

123
third_party/citro3d.c vendored
View File

@ -97,66 +97,7 @@ typedef struct
};
} C3D_Tex;
static void C3D_TexLoadImage(C3D_Tex* tex, const void* data, GPU_TEXFACE face, int level);
static void C3D_TexGenerateMipmap(C3D_Tex* tex, GPU_TEXFACE face);
static void C3D_TexBind(int unitId, C3D_Tex* tex);
static void C3D_TexFlush(C3D_Tex* tex);
static void C3D_TexDelete(C3D_Tex* tex);
static inline int C3D_TexCalcMaxLevel(u32 width, u32 height)
{
return (31-__builtin_clz(width < height ? width : height)) - 3; // avoid sizes smaller than 8
}
static inline u32 C3D_TexCalcLevelSize(u32 size, int level)
{
return size >> (2*level);
}
static inline u32 C3D_TexCalcTotalSize(u32 size, int maxLevel)
{
/*
S = s + sr + sr^2 + sr^3 + ... + sr^n
Sr = sr + sr^2 + sr^3 + ... + sr^(n+1)
S-Sr = s - sr^(n+1)
S(1-r) = s(1 - r^(n+1))
S = s (1 - r^(n+1)) / (1-r)
r = 1/4
1-r = 3/4
S = 4s (1 - (1/4)^(n+1)) / 3
S = 4s (1 - 1/4^(n+1)) / 3
S = (4/3) (s - s/4^(n+1))
S = (4/3) (s - s/(1<<(2n+2)))
S = (4/3) (s - s>>(2n+2))
*/
return (size - C3D_TexCalcLevelSize(size,maxLevel+1)) * 4 / 3;
}
static inline void* C3D_TexGetImagePtr(C3D_Tex* tex, void* data, int level, u32* size)
{
if (size) *size = level >= 0 ? C3D_TexCalcLevelSize(tex->size, level) : C3D_TexCalcTotalSize(tex->size, tex->maxLevel);
if (!level) return data;
return (u8*)data + (level > 0 ? C3D_TexCalcTotalSize(tex->size, level-1) : 0);
}
static inline void* C3D_Tex2DGetImagePtr(C3D_Tex* tex, int level, u32* size)
{
return C3D_TexGetImagePtr(tex, tex->data, level, size);
}
static inline void C3D_TexUpload(C3D_Tex* tex, const void* data)
{
C3D_TexLoadImage(tex, data, GPU_TEXFACE_2D, 0);
}
static void C3D_DepthMap(bool bIsZBuffer, float zScale, float zOffset);
@ -257,7 +198,6 @@ typedef enum
static u32 C3D_CalcColorBufSize(u32 width, u32 height, GPU_COLORBUF fmt);
static u32 C3D_CalcDepthBufSize(u32 width, u32 height, GPU_DEPTHBUF fmt);
static C3D_FrameBuf* C3D_GetFrameBuf(void);
static void C3D_SetFrameBuf(C3D_FrameBuf* fb);
static void C3D_FrameBufClear(C3D_FrameBuf* fb, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth);
static void C3D_FrameBufTransfer(C3D_FrameBuf* fb, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags);
@ -337,8 +277,6 @@ static inline void C3D_RenderTargetClear(C3D_RenderTarget* target, C3D_ClearBits
C3D_FrameBufClear(&target->frameBuf, clearBits, clearColor, clearDepth);
}
static void C3D_SyncTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags);
@ -818,14 +756,6 @@ static u32 C3D_CalcDepthBufSize(u32 width, u32 height, GPU_DEPTHBUF fmt)
return size*(2+depthFmtSizes[fmt]);
}
static C3D_FrameBuf* C3D_GetFrameBuf(void)
{
C3D_Context* ctx = C3Di_GetContext();
ctx->flags |= C3DiF_FrameBuf;
return &ctx->fb;
}
static void C3D_SetFrameBuf(C3D_FrameBuf* fb)
{
C3D_Context* ctx = C3Di_GetContext();
@ -1205,32 +1135,6 @@ static void C3D_RenderTargetSetOutput(C3D_RenderTarget* target, gfxScreen_t scre
}
}
static void C3Di_SafeTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags)
{
C3Di_WaitAndClearQueue(-1);
inSafeTransfer = true;
GX_TextureCopy(inadr, indim, outadr, outdim, size, flags);
gxCmdQueueRun(&C3Di_GetContext()->gxQueue);
}
static void C3D_SyncTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags)
{
if (inFrame)
{
C3D_FrameSplit(0);
GX_TextureCopy(inadr, indim, outadr, outdim, size, flags);
} else
{
C3Di_SafeTextureCopy(inadr, indim, outadr, outdim, size, flags);
gspWaitForPPF();
}
}
static void C3Di_TexEnvBind(int id, C3D_TexEnv* env)
{
@ -1241,18 +1145,6 @@ static void C3Di_TexEnvBind(int id, C3D_TexEnv* env)
static void C3D_TexLoadImage(C3D_Tex* tex, const void* data, GPU_TEXFACE face, int level)
{
u32 size = 0;
void* out = C3D_TexGetImagePtr(tex, tex->data, level, &size);
if (!addrIsVRAM(out))
memcpy(out, data, size);
else
C3D_SyncTextureCopy((u32*)data, 0, (u32*)out, 0, size, 8);
}
static void C3D_TexBind(int unitId, C3D_Tex* tex)
{
C3D_Context* ctx = C3Di_GetContext();
@ -1261,21 +1153,6 @@ static void C3D_TexBind(int unitId, C3D_Tex* tex)
ctx->tex[unitId] = tex;
}
static void C3D_TexFlush(C3D_Tex* tex)
{
if (!addrIsVRAM(tex->data))
GSPGPU_FlushDataCache(tex->data, C3D_TexCalcTotalSize(tex->size, tex->maxLevel));
}
static void C3D_TexDelete(C3D_Tex* tex)
{
void* addr = tex->data;
if (addrIsVRAM(addr))
vramFree(addr);
else
linearFree(addr);
}
static void C3Di_SetTex(int unit, C3D_Tex* tex)
{
u32 reg[10];