From 2741a33a155d6f041bd84a3085aa54870906fa32 Mon Sep 17 00:00:00 2001 From: UnknownShadow200 Date: Thu, 10 Jul 2025 22:20:28 +1000 Subject: [PATCH] 3DS: Try to use VRAM for textures when possible --- .github/workflows/build_dreamcast.yml | 8 +- .github/workflows/build_msdos.yml | 7 +- .github/workflows/build_n64.yml | 7 +- .github/workflows/build_nds.yml | 7 +- .github/workflows/build_ps1.yml | 7 +- .github/workflows/build_ps2.yml | 5 +- .github/workflows/build_ps3.yml | 7 +- .github/workflows/build_psp.yml | 6 +- .github/workflows/build_saturn.yml | 7 +- .github/workflows/build_symbian.yml | 1 + .github/workflows/build_wiigc.yml | 1 - .github/workflows/build_xbox.yml | 5 +- .github/workflows/build_xbox360.yml | 7 +- src/3ds/Graphics_3DS.c | 70 +++++++-------- third_party/citro3d.c | 123 -------------------------- 15 files changed, 85 insertions(+), 183 deletions(-) diff --git a/.github/workflows/build_dreamcast.yml b/.github/workflows/build_dreamcast.yml index 809630752..059030acf 100644 --- a/.github/workflows/build_dreamcast.yml +++ b/.github/workflows/build_dreamcast.yml @@ -19,14 +19,18 @@ jobs: image: ghcr.io/kos-builds/kos-dc:sha-20149ee-14.2.0 steps: - uses: actions/checkout@v4 - - name: Compile Dreamcast build - id: compile + - name: Install prerequisites shell: bash run: | apt-get update apt-get -y install genisoimage wget https://github.com/ClassiCube/rpi-compiling-stuff/raw/main/cdi4dc -O /opt/toolchains/dc/kos/utils/cdi4dc chmod +x /opt/toolchains/dc/kos/utils/cdi4dc + + - name: Compile Dreamcast build + id: compile + shell: bash + run: | export PATH=/opt/toolchains/dc/kos/utils/:$PATH make dreamcast diff --git a/.github/workflows/build_msdos.yml b/.github/workflows/build_msdos.yml index 4607d115e..a6a80e141 100644 --- a/.github/workflows/build_msdos.yml +++ b/.github/workflows/build_msdos.yml @@ -19,11 +19,14 @@ jobs: image: ghcr.io/volkertb/debian-djgpp steps: - uses: actions/checkout@v4 - - name: Compile MS dos build - id: compile + - name: Install prerequisites run: | apt-get update apt-get -y install curl + + - name: Compile MS dos build + id: compile + run: | make CC=gcc dos RELEASE=1 diff --git a/.github/workflows/build_n64.yml b/.github/workflows/build_n64.yml index 5678b3c28..524264208 100644 --- a/.github/workflows/build_n64.yml +++ b/.github/workflows/build_n64.yml @@ -19,11 +19,14 @@ jobs: image: ghcr.io/dragonminded/libdragon:preview steps: - uses: actions/checkout@v4 - - name: Compile N64 build - id: compile + - name: Install prerequisites run: | apt-get update apt-get -y install curl + + - name: Compile N64 build + id: compile + run: | REAL_DIR=`pwd` cd /tmp git clone -b opengl https://github.com/DragonMinded/libdragon.git --depth=1 diff --git a/.github/workflows/build_nds.yml b/.github/workflows/build_nds.yml index 0d9fedcd5..9ffd175fd 100644 --- a/.github/workflows/build_nds.yml +++ b/.github/workflows/build_nds.yml @@ -19,11 +19,14 @@ jobs: image: skylyrac/blocksds:dev-latest steps: - uses: actions/checkout@v4 - - name: Compile NDS build - id: compile + - name: Install prerequisites run: | apt-get update apt-get -y install curl + + - name: Compile NDS build + id: compile + run: | make ds export BUILD_DSI=1 make ds diff --git a/.github/workflows/build_ps1.yml b/.github/workflows/build_ps1.yml index 0aa57190d..f96be84bf 100644 --- a/.github/workflows/build_ps1.yml +++ b/.github/workflows/build_ps1.yml @@ -19,11 +19,14 @@ jobs: image: ghcr.io/classicube/minimal-psn00b:latest steps: - uses: actions/checkout@v4 - - name: Compile PS1 build - id: compile + - name: Install prerequisites run: | apt-get update apt-get install -y curl + + - name: Compile PS1 build + id: compile + run: | export PSN00BSDK_ROOT=/usr/local/psnoob make ps1 diff --git a/.github/workflows/build_ps2.yml b/.github/workflows/build_ps2.yml index 248646694..432b0b374 100644 --- a/.github/workflows/build_ps2.yml +++ b/.github/workflows/build_ps2.yml @@ -19,10 +19,13 @@ jobs: image: ghcr.io/ps2dev/ps2sdk:latest steps: - uses: actions/checkout@v4 + - name: Install prerequisites + run: | + apk add make mpc1 curl + - name: Compile PS2 build id: compile run: | - apk add make mpc1 curl make ps2 diff --git a/.github/workflows/build_ps3.yml b/.github/workflows/build_ps3.yml index fd893a7bc..69bcba4af 100644 --- a/.github/workflows/build_ps3.yml +++ b/.github/workflows/build_ps3.yml @@ -19,11 +19,14 @@ jobs: image: ghcr.io/classicube/minimal-psl1ght:latest steps: - uses: actions/checkout@v4 - - name: Compile PS3 build - id: compile + - name: Install prerequisites run: | apt-get update apt-get install -y curl + + - name: Compile PS3 build + id: compile + run: | export PS3DEV=/usr/local/ps3dev export PSL1GHT=/usr/local/ps3dev export PATH=$PATH:$PS3DEV/bin diff --git a/.github/workflows/build_psp.yml b/.github/workflows/build_psp.yml index f932584e8..f87effee4 100644 --- a/.github/workflows/build_psp.yml +++ b/.github/workflows/build_psp.yml @@ -19,11 +19,13 @@ jobs: image: pspdev/pspdev:latest steps: - uses: actions/checkout@v4 + - name: Install prerequisites + run: | + apk add curl curl-dev + - name: Compile PSP build id: compile run: | - apk add curl curl-dev - export PSPSDK=$(psp-config --pspsdk-path) make psp diff --git a/.github/workflows/build_saturn.yml b/.github/workflows/build_saturn.yml index 3db14aa40..dba9c85b2 100644 --- a/.github/workflows/build_saturn.yml +++ b/.github/workflows/build_saturn.yml @@ -19,11 +19,14 @@ jobs: image: ijacquez/yaul:1.0.10 steps: - uses: actions/checkout@v4 - - name: Compile Saturn build - id: compile + - name: Install prerequisites run: | apt-get update apt-get -y install curl + + - name: Compile Saturn build + id: compile + run: | make saturn - uses: ./.github/actions/notify_failure diff --git a/.github/workflows/build_symbian.yml b/.github/workflows/build_symbian.yml index 8f9f76e6a..92c41a744 100644 --- a/.github/workflows/build_symbian.yml +++ b/.github/workflows/build_symbian.yml @@ -23,6 +23,7 @@ jobs: run: Invoke-WebRequest https://nnp.nnchan.ru/dl/symbiansr1_gcce_workflow.zip -OutFile symbiansdk.zip - name: Extract Symbian SDK run: Expand-Archive symbiansdk.zip -DestinationPath . + - name: Compile Symbian build id: compile run: | diff --git a/.github/workflows/build_wiigc.yml b/.github/workflows/build_wiigc.yml index 7ff45bb4c..3d6a6deb5 100644 --- a/.github/workflows/build_wiigc.yml +++ b/.github/workflows/build_wiigc.yml @@ -23,7 +23,6 @@ jobs: id: compile run: | make wii - make clean make gamecube - name: Create Wii homebrew diff --git a/.github/workflows/build_xbox.yml b/.github/workflows/build_xbox.yml index 371867d06..523ab31e0 100644 --- a/.github/workflows/build_xbox.yml +++ b/.github/workflows/build_xbox.yml @@ -19,10 +19,13 @@ jobs: image: ghcr.io/xboxdev/nxdk:git-e955705a steps: - uses: actions/checkout@v4 + - name: Install prerequisites + run: | + apk add curl curl-dev + - name: Compile Xbox build id: compile run: | - apk add curl curl-dev eval $(/usr/src/nxdk/bin/activate -s) make xbox diff --git a/.github/workflows/build_xbox360.yml b/.github/workflows/build_xbox360.yml index 0e40e1866..e562895d6 100644 --- a/.github/workflows/build_xbox360.yml +++ b/.github/workflows/build_xbox360.yml @@ -19,12 +19,15 @@ jobs: image: free60/libxenon steps: - uses: actions/checkout@v4 - - name: Compile 360 build - id: compile + - name: Install prerequisites run: | sed -i -e 's/archive.ubuntu.com\|security.ubuntu.com/old-releases.ubuntu.com/g' /etc/apt/sources.list apt-get update apt-get install -y curl + + - name: Compile 360 build + id: compile + run: | export DEVKITXENON=/usr/local/xenon export PATH=$PATH:$DEVKITXENON/bin:$DEVKITXENON/usr/bin make xbox360 diff --git a/src/3ds/Graphics_3DS.c b/src/3ds/Graphics_3DS.c index 7e0e4e922..dcc98c791 100644 --- a/src/3ds/Graphics_3DS.c +++ b/src/3ds/Graphics_3DS.c @@ -159,6 +159,13 @@ static void InitCitro3D(void) { C3D_RenderTargetColor(&bottomTarget, GPU_RB_RGBA8); C3D_RenderTargetSetOutput(&bottomTarget, GFX_BOTTOM, GFX_LEFT, DISPLAY_TRANSFER_FLAGS); + // Allocate right framebuffer (for stereoscopic 3D) in advance even when it doesn't actually used + // Although this means a bit less VRAM available for textures, this ensures that if the user later + // turns on stereoscopic 3D, don't have to try handling the case when insufficient VRAM for it + C3D_RenderTargetInit(&topTargetRight, 240, 400); + C3D_RenderTargetColor(&topTargetRight, GPU_RB_RGBA8); + C3D_RenderTargetDepth(&topTargetRight, GPU_RB_DEPTH24); + gfxSetDoubleBuffering(GFX_TOP, true); SetDefaultState(); AllocShaders(); @@ -178,6 +185,7 @@ void Gfx_Create(void) { Gfx.Created = true; gfx_vsync = true; + Gfx.NonPowTwoTexturesSupport = GFX_NONPOW2_UPLOAD; Gfx_RestoreState(); } @@ -239,9 +247,6 @@ void Gfx_Set3DRight(struct Matrix* proj, struct Matrix* view) { Calc3DProjection(+1, proj); if (!createdTopTargetRight) { - C3D_RenderTargetInit(&topTargetRight, 240, 400); - C3D_RenderTargetColor(&topTargetRight, GPU_RB_RGBA8); - C3D_RenderTargetDepth(&topTargetRight, GPU_RB_DEPTH24); C3D_RenderTargetSetOutput(&topTargetRight, GFX_TOP, GFX_RIGHT, DISPLAY_TRANSFER_FLAGS); createdTopTargetRight = true; } @@ -292,7 +297,13 @@ static void GPUTexture_Unref(GfxResourceID* resource) { } static void GPUTexture_Free(struct GPUTexture* tex) { - C3D_TexDelete(&tex->texture); + void* addr = tex->texture.data; + if (addrIsVRAM(addr)) { + vramFree(addr); + } else { + linearFree(addr); + } + Mem_Free(tex); } @@ -330,10 +341,13 @@ static void GPUTextures_DeleteUnreferenced(void) { /*########################################################################################################################* *---------------------------------------------------------Textures--------------------------------------------------------* *#########################################################################################################################*/ -static bool CreateNativeTexture(C3D_Tex* tex, u32 width, u32 height) { +static bool CreateNativeTexture(C3D_Tex* tex, u32 width, u32 height, int vram) { + width = Math_NextPowOf2(width); + height = Math_NextPowOf2(height); u32 size = width * height * 4; - //tex->data = p.onVram ? vramAlloc(total_size) : linearAlloc(total_size); - tex->data = linearAlloc(size); + + tex->data = vram ? vramAlloc(size) : NULL; + if (!tex->data) tex->data = linearAlloc(size); if (!tex->data) return false; tex->width = width; @@ -351,29 +365,6 @@ static bool CreateNativeTexture(C3D_Tex* tex, u32 width, u32 height) { return true; } -static void TryTransferToVRAM(C3D_Tex* tex) { - return; - // NOTE: the linearFree below results in broken texture. maybe no DMA? - void* vram = vramAlloc(tex->size); - if (!vram) return; - - C3D_SyncTextureCopy((u32*)tex->data, 0, (u32*)vram, 0, tex->size, 8); - linearFree(tex->data); - tex->data = vram; -} - -/*static inline cc_uint32 CalcZOrder(cc_uint32 x, cc_uint32 y) { - // Simplified "Interleave bits by Binary Magic Numbers" from - // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveTableObvious - // TODO: Simplify to array lookup? - x = (x | (x << 2)) & 0x33; - x = (x | (x << 1)) & 0x55; - - y = (y | (y << 2)) & 0x33; - y = (y | (y << 1)) & 0x55; - - return x | (y << 1); -}*/ static inline cc_uint32 CalcZOrder(cc_uint32 a) { // Simplified "Interleave bits by Binary Magic Numbers" from // http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN @@ -390,28 +381,29 @@ static inline cc_uint32 CalcZOrder(cc_uint32 a) { // four 4x4 subtiles, which are in turn composed of four 2x2 subtiles static void ToMortonTexture(C3D_Tex* tex, int originX, int originY, struct Bitmap* bmp, int rowWidth) { - unsigned int pixel, mortonX, mortonY; + unsigned int mortonX, mortonY; unsigned int dstX, dstY, tileX, tileY; - int width = bmp->width, height = bmp->height; + int src_w = bmp->width, dst_w = tex->width; + int src_h = bmp->height, dst_h = tex->height; cc_uint32* dst = tex->data; cc_uint32* src = bmp->scan0; - for (int y = 0; y < height; y++) + for (int y = 0; y < src_h; y++) { - dstY = tex->height - 1 - (y + originY); + dstY = dst_h - 1 - (y + originY); tileY = dstY & ~0x07; mortonY = CalcZOrder(dstY & 0x07) << 1; - for (int x = 0; x < width; x++) + for (int x = 0; x < src_w; x++) { dstX = x + originX; tileX = dstX & ~0x07; mortonX = CalcZOrder(dstX & 0x07); - pixel = src[x + (y * rowWidth)]; - dst[(mortonX | mortonY) + (tileX * 8) + (tileY * tex->width)] = pixel; + dst[(mortonX | mortonY) + (tileX * 8) + (tileY * dst_w)] = src[x]; } + src += rowWidth; } // TODO flush data cache GSPGPU_FlushDataCache } @@ -419,11 +411,11 @@ static void ToMortonTexture(C3D_Tex* tex, int originX, int originY, GfxResourceID Gfx_AllocTexture(struct Bitmap* bmp, int rowWidth, cc_uint8 flags, cc_bool mipmaps) { struct GPUTexture* tex = GPUTexture_Alloc(); - bool success = CreateNativeTexture(&tex->texture, bmp->width, bmp->height); + int can_vram = !(flags & TEXTURE_FLAG_DYNAMIC); + bool success = CreateNativeTexture(&tex->texture, bmp->width, bmp->height, can_vram); if (!success) return NULL; ToMortonTexture(&tex->texture, 0, 0, bmp, rowWidth); - if (!(flags & TEXTURE_FLAG_DYNAMIC)) TryTransferToVRAM(&tex->texture); return tex; } diff --git a/third_party/citro3d.c b/third_party/citro3d.c index fbe49684e..d81d3a550 100644 --- a/third_party/citro3d.c +++ b/third_party/citro3d.c @@ -97,66 +97,7 @@ typedef struct }; } C3D_Tex; -static void C3D_TexLoadImage(C3D_Tex* tex, const void* data, GPU_TEXFACE face, int level); -static void C3D_TexGenerateMipmap(C3D_Tex* tex, GPU_TEXFACE face); static void C3D_TexBind(int unitId, C3D_Tex* tex); -static void C3D_TexFlush(C3D_Tex* tex); -static void C3D_TexDelete(C3D_Tex* tex); - -static inline int C3D_TexCalcMaxLevel(u32 width, u32 height) -{ - return (31-__builtin_clz(width < height ? width : height)) - 3; // avoid sizes smaller than 8 -} - -static inline u32 C3D_TexCalcLevelSize(u32 size, int level) -{ - return size >> (2*level); -} - -static inline u32 C3D_TexCalcTotalSize(u32 size, int maxLevel) -{ - /* - S = s + sr + sr^2 + sr^3 + ... + sr^n - Sr = sr + sr^2 + sr^3 + ... + sr^(n+1) - S-Sr = s - sr^(n+1) - S(1-r) = s(1 - r^(n+1)) - S = s (1 - r^(n+1)) / (1-r) - - r = 1/4 - 1-r = 3/4 - - S = 4s (1 - (1/4)^(n+1)) / 3 - S = 4s (1 - 1/4^(n+1)) / 3 - S = (4/3) (s - s/4^(n+1)) - S = (4/3) (s - s/(1<<(2n+2))) - S = (4/3) (s - s>>(2n+2)) - */ - return (size - C3D_TexCalcLevelSize(size,maxLevel+1)) * 4 / 3; -} - -static inline void* C3D_TexGetImagePtr(C3D_Tex* tex, void* data, int level, u32* size) -{ - if (size) *size = level >= 0 ? C3D_TexCalcLevelSize(tex->size, level) : C3D_TexCalcTotalSize(tex->size, tex->maxLevel); - if (!level) return data; - return (u8*)data + (level > 0 ? C3D_TexCalcTotalSize(tex->size, level-1) : 0); -} - -static inline void* C3D_Tex2DGetImagePtr(C3D_Tex* tex, int level, u32* size) -{ - return C3D_TexGetImagePtr(tex, tex->data, level, size); -} - -static inline void C3D_TexUpload(C3D_Tex* tex, const void* data) -{ - C3D_TexLoadImage(tex, data, GPU_TEXFACE_2D, 0); -} - - - - - - - static void C3D_DepthMap(bool bIsZBuffer, float zScale, float zOffset); @@ -257,7 +198,6 @@ typedef enum static u32 C3D_CalcColorBufSize(u32 width, u32 height, GPU_COLORBUF fmt); static u32 C3D_CalcDepthBufSize(u32 width, u32 height, GPU_DEPTHBUF fmt); -static C3D_FrameBuf* C3D_GetFrameBuf(void); static void C3D_SetFrameBuf(C3D_FrameBuf* fb); static void C3D_FrameBufClear(C3D_FrameBuf* fb, C3D_ClearBits clearBits, u32 clearColor, u32 clearDepth); static void C3D_FrameBufTransfer(C3D_FrameBuf* fb, gfxScreen_t screen, gfx3dSide_t side, u32 transferFlags); @@ -337,8 +277,6 @@ static inline void C3D_RenderTargetClear(C3D_RenderTarget* target, C3D_ClearBits C3D_FrameBufClear(&target->frameBuf, clearBits, clearColor, clearDepth); } -static void C3D_SyncTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags); - @@ -818,14 +756,6 @@ static u32 C3D_CalcDepthBufSize(u32 width, u32 height, GPU_DEPTHBUF fmt) return size*(2+depthFmtSizes[fmt]); } -static C3D_FrameBuf* C3D_GetFrameBuf(void) -{ - C3D_Context* ctx = C3Di_GetContext(); - - ctx->flags |= C3DiF_FrameBuf; - return &ctx->fb; -} - static void C3D_SetFrameBuf(C3D_FrameBuf* fb) { C3D_Context* ctx = C3Di_GetContext(); @@ -1205,32 +1135,6 @@ static void C3D_RenderTargetSetOutput(C3D_RenderTarget* target, gfxScreen_t scre } } -static void C3Di_SafeTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags) -{ - C3Di_WaitAndClearQueue(-1); - inSafeTransfer = true; - GX_TextureCopy(inadr, indim, outadr, outdim, size, flags); - gxCmdQueueRun(&C3Di_GetContext()->gxQueue); -} - -static void C3D_SyncTextureCopy(u32* inadr, u32 indim, u32* outadr, u32 outdim, u32 size, u32 flags) -{ - if (inFrame) - { - C3D_FrameSplit(0); - GX_TextureCopy(inadr, indim, outadr, outdim, size, flags); - } else - { - C3Di_SafeTextureCopy(inadr, indim, outadr, outdim, size, flags); - gspWaitForPPF(); - } -} - - - - - - static void C3Di_TexEnvBind(int id, C3D_TexEnv* env) { @@ -1241,18 +1145,6 @@ static void C3Di_TexEnvBind(int id, C3D_TexEnv* env) - -static void C3D_TexLoadImage(C3D_Tex* tex, const void* data, GPU_TEXFACE face, int level) -{ - u32 size = 0; - void* out = C3D_TexGetImagePtr(tex, tex->data, level, &size); - - if (!addrIsVRAM(out)) - memcpy(out, data, size); - else - C3D_SyncTextureCopy((u32*)data, 0, (u32*)out, 0, size, 8); -} - static void C3D_TexBind(int unitId, C3D_Tex* tex) { C3D_Context* ctx = C3Di_GetContext(); @@ -1261,21 +1153,6 @@ static void C3D_TexBind(int unitId, C3D_Tex* tex) ctx->tex[unitId] = tex; } -static void C3D_TexFlush(C3D_Tex* tex) -{ - if (!addrIsVRAM(tex->data)) - GSPGPU_FlushDataCache(tex->data, C3D_TexCalcTotalSize(tex->size, tex->maxLevel)); -} - -static void C3D_TexDelete(C3D_Tex* tex) -{ - void* addr = tex->data; - if (addrIsVRAM(addr)) - vramFree(addr); - else - linearFree(addr); -} - static void C3Di_SetTex(int unit, C3D_Tex* tex) { u32 reg[10];