Saturn: Properly manage VRAM allocations for textures

This commit is contained in:
UnknownShadow200 2025-07-08 22:29:15 +10:00
parent f22810fe54
commit 28a2a74a97
5 changed files with 191 additions and 122 deletions

View File

@ -177,6 +177,32 @@ SOFTWARE.
BearSSL license
==================
Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
FreeType license
==================
The FreeType Project LICENSE

View File

@ -5,6 +5,7 @@
#include "Logger.h"
#include "Window.h"
#include <nds.h>
#include "_BlockAlloc.h"
#define DS_MAT_PROJECTION 0
#define DS_MAT_POSITION 1
@ -15,56 +16,6 @@ static int matrix_modes[] = { DS_MAT_PROJECTION, DS_MAT_MODELVIEW };
static int lastMatrix;
/*########################################################################################################################*
*-------------------------------------------------------Memory alloc------------------------------------------------------*
*#########################################################################################################################*/
#define blockalloc_page(block) ((block) >> 3)
#define blockalloc_bit(block) (1 << ((block) & 0x07))
#define BLOCKS_PER_PAGE 8
static CC_INLINE int blockalloc_can_alloc(cc_uint8* table, int beg, int blocks) {
for (int i = beg; i < beg + blocks; i++)
{
cc_uint8 page = table[blockalloc_page(i)];
if (page & blockalloc_bit(i)) return false;
}
return true;
}
static int blockalloc_alloc(cc_uint8* table, int maxBlocks, int blocks) {
if (blocks > maxBlocks) return -1;
for (int i = 0; i < maxBlocks - blocks;)
{
cc_uint8 page = table[blockalloc_page(i)];
// If entire page is used, skip entirely over it
if ((i & 0x07) == 0 && page == 0xFF) { i += 8; continue; }
// If block is used, move onto trying next block
if (page & blockalloc_bit(i)) { i++; continue; }
// If can't be allocated starting at block, try next
if (!blockalloc_can_alloc(table, i, blocks)) { i++; continue; }
for (int j = i; j < i + blocks; j++)
{
table[blockalloc_page(j)] |= blockalloc_bit(j);
}
return i;
}
return -1;
}
static void blockalloc_free(cc_uint8* table, int origin, int blocks) {
// Mark the used blocks as free again
for (int i = origin; i < origin + blocks; i++)
{
table[blockalloc_page(i)] &= ~blockalloc_bit(i);
}
}
/*########################################################################################################################*
*---------------------------------------------------------General---------------------------------------------------------*
*#########################################################################################################################*/
@ -76,30 +27,26 @@ static CC_INLINE void CopyHWords(void* src, void* dst, int len) {
for (int i = 0; i < len; i++) dst_[i] = src_[i];
}
static void WaitForGPUDone(void) {
if (!GFX_BUSY) return;
// Geometry engine still busy from before, give it some time
swiWaitForVBlank();
swiWaitForVBlank();
swiWaitForVBlank();
if (!GFX_BUSY) return;
// The geometry engine may still have some leftover state, try to recover
for (int i = 0; i < 8 && GFX_BUSY; i++)
{
GFX_VERTEX16 = 0;
swiDelay(0x400);
}
}
void ResetGPU(void) {
powerOn(POWER_3D_CORE | POWER_MATRIX); // Enable 3D core & geometry engine
if (GFX_BUSY) {
// Geometry engine sill busy from before, give it some time
swiWaitForVBlank();
swiWaitForVBlank();
swiWaitForVBlank();
if (GFX_BUSY) {
// The geometry engine is still busy. This can happen due to a
// partial vertex upload by the previous homebrew application (=>
// ARM7->ARM9 forced reset). So long as the buffer wasn't flushed,
// this is recoverable, so we attempt to do so.
for (int i = 0; i < 8; i++)
{
GFX_VERTEX16 = 0;
// TODO: Do we need such a high arbitrary delay value?
swiDelay(0x400);
if (!GFX_BUSY) break;
}
}
}
WaitForGPUDone();
// Clear the FIFO
GFX_STATUS |= (1 << 29);
@ -245,7 +192,7 @@ static void texture_alloc(CCTexture* tex, int size) {
}
static void texture_release(CCTexture* tex) {
blockalloc_free(tex_table, tex->texBase, tex->texBlocks);
blockalloc_dealloc(tex_table, tex->texBase, tex->texBlocks);
}
// Palette VRAM banks - Bank E (64 kb)

View File

@ -5,7 +5,7 @@
#include "Window.h"
#include <stdint.h>
#include <yaul.h>
#include <stdlib.h>
#include "_BlockAlloc.h"
#define SCREEN_WIDTH 320
#define SCREEN_HEIGHT 224
@ -24,9 +24,6 @@ static vdp1_cmdt_t* cmds_cur;
static uint16_t* z_cur;
static PackedCol clear_color;
static vdp1_vram_partitions_t _vdp1_vram_partitions;
static void* tex_vram_addr;
static vdp1_gouraud_table_t* gourad_base;
static const vdp1_cmdt_draw_mode_t color_draw_mode = {
.cc_mode = VDP1_CMDT_CC_REPLACE,
@ -37,6 +34,39 @@ static const vdp1_cmdt_draw_mode_t shaded_draw_mode = {
.color_mode = VDP1_CMDT_CM_RGB_32768
};
#define CMDT_REGION_COUNT 2048
#define TEXTURE_REGION_SIZE 442000
#define GOURAUD_REGION_COUNT 1024
#define CLUT_REGION_COUNT 256
static vdp1_gouraud_table_t* gouraud_base;
static vdp1_clut_t* clut_base;
static vdp1_cmdt_t* cmdt_base;
static void* texture_base;
static void InitVRAM(void) {
vdp1_vram_t ptr = VDP1_VRAM(0);
cmdt_base = (vdp1_cmdt_t*)ptr;
ptr += CMDT_REGION_COUNT * sizeof(vdp1_cmdt_t);
texture_base = (void*)ptr;
ptr += TEXTURE_REGION_SIZE;
gouraud_base = (vdp1_gouraud_table_t*)ptr;
ptr += GOURAUD_REGION_COUNT * sizeof(vdp1_gouraud_table_t);
clut_base = (vdp1_clut_t*)ptr;
ptr += CLUT_REGION_COUNT * sizeof(vdp1_clut_t);
int S = (int)(ptr - VDP1_VRAM(sizeof(vdp1_cmdt_t)));
int T = VDP1_VRAM_SIZE;
Platform_Log2("VRAM ALLOC: %i of %i", &S, &T);
if (ptr <= VDP1_VRAM(VDP1_VRAM_SIZE)) return;
Process_Abort("Invalid VRAM allocations");
}
static void UpdateVDP1Env(void) {
vdp1_env_t env;
vdp1_env_default_init(&env);
@ -58,7 +88,7 @@ static void CalcGouraudColours(void) {
int b_idx = (i & 0x380) >> 7, B = b_idx << (5 - 3);
rgb1555_t gouraud = RGB1555(1, R, G, B);
vdp1_gouraud_table_t* cur = &_vdp1_vram_partitions.gouraud_base[i];
vdp1_gouraud_table_t* cur = &gouraud_base[i];
cur->colors[0] = gouraud;
cur->colors[1] = gouraud;
cur->colors[2] = gouraud;
@ -99,15 +129,12 @@ static void SetupHeaderCommands(void) {
void Gfx_Create(void) {
if (!Gfx.Created) {
vdp1_vram_partitions_get(&_vdp1_vram_partitions);
// TODO less ram for gourad base
// TODO less ram for gouraud base
vdp2_scrn_back_color_set(VDP2_VRAM_ADDR(3, 0x01FFFE),
RGB1555(1, 0, 3, 15));
vdp2_sprite_priority_set(0, 6);
tex_vram_addr = _vdp1_vram_partitions.texture_base;
gourad_base = _vdp1_vram_partitions.gouraud_base;
InitVRAM();
UpdateVDP1Env();
CalcGouraudColours();
}
@ -133,67 +160,75 @@ static uint16_t cur_char_size;
static uint16_t cur_char_base;
typedef struct CCTexture {
int width, height;
void* addr;
short width, height;
unsigned offset, blocks;
} CCTexture;
#define CCTexture_Addr(tex) (texture_base + (tex)->offset * TEX_BLOCK_SIZE)
#define TEX_BLOCK_SIZE 256
#define TEX_TOTAL_BLOCKS (TEXTURE_REGION_SIZE / TEX_BLOCK_SIZE)
static cc_uint8 tex_table[TEX_TOTAL_BLOCKS / BLOCKS_PER_PAGE];
static cc_bool texture_alloc(CCTexture* tex, int width, int height) {
int size = width * height * 2;
int blocks = (size + (TEX_BLOCK_SIZE - 1)) / TEX_BLOCK_SIZE;
int addr = blockalloc_alloc(tex_table, TEX_TOTAL_BLOCKS, blocks);
if (addr == -1) return false;
tex->width = width;
tex->height = height;
tex->offset = addr;
tex->blocks = blocks;
return true;
}
static void texture_release(CCTexture* tex) {
blockalloc_dealloc(tex_table, tex->offset, tex->blocks);
}
GfxResourceID Gfx_AllocTexture(struct Bitmap* bmp, int rowWidth, cc_uint8 flags, cc_bool mipmaps) {
CCTexture* tex = Mem_TryAlloc(1, sizeof(CCTexture));
if (!tex) return NULL;
// use malloc to ensure tmp is in HRAM (can't DMA from LRAM))
cc_uint16* tmp = malloc(bmp->width * bmp->height * 2);
if (!tmp) return NULL;
tex->addr = tex_vram_addr;
tex->width = bmp->width;
tex->height = bmp->height;
tex_vram_addr += tex->width * tex->height * 2;
int avail = (char*)gourad_base - (char*)tex_vram_addr;
if (avail <= 0) {
if (!texture_alloc(tex, bmp->width, bmp->height)) {
Platform_LogConst("OUT OF VRAM");
Mem_Free(tmp);
Mem_Free(tex);
return NULL;
} else {
Platform_Log1("VRAM: %i bytes left", &avail);
}
// TODO: Only copy when rowWidth != bmp->width
for (int y = 0; y < bmp->height; y++)
int vram_free = blockalloc_total_free(tex_table, TEX_TOTAL_BLOCKS) * TEX_BLOCK_SIZE;
Platform_Log1("VRAM left: %i bytes", &vram_free);
int width = bmp->width, height = bmp->height;
cc_uint8* tmp = CCTexture_Addr(tex);
cc_uint16* src = bmp->scan0;
cc_uint16* dst = (cc_uint16*)tmp;
for (int y = 0; y < height; y++)
{
cc_uint16* src = bmp->scan0 + y * rowWidth;
cc_uint16* dst = tmp + y * bmp->width;
for (int x = 0; x < bmp->width; x++)
for (int x = 0; x < width; x++)
{
dst[x] = src[x];
*dst++ = src[x];
}
src += rowWidth;
}
scu_dma_transfer(0, tex->addr, tmp, tex->width * tex->height * 2);
scu_dma_transfer_wait(0);
free(tmp);
return tex;
}
void Gfx_BindTexture(GfxResourceID texId) {
if (!texId) texId = white_square;
CCTexture* tex = (CCTexture*)texId;
cc_uint8* addr = CCTexture_Addr(tex);
cur_char_size = (((tex->width >> 3) << 8) | tex->height) & 0x3FFF;
cur_char_base = ((vdp1_vram_t)tex->addr >> 3) & 0xFFFF;
cur_char_size = (((tex->width >> 3) << 8) | tex->height) & 0x3FFF;
cur_char_base = ((vdp1_vram_t)addr >> 3) & 0xFFFF;
}
void Gfx_DeleteTexture(GfxResourceID* texId) {
CCTexture* tex = *texId;
// TODO properly free vram
if (tex) {
// This is mainly to avoid leak with text in top left
int size = tex->width * tex->height * 2;
if (tex_vram_addr == tex->addr + size)
tex_vram_addr -= size;
texture_release(tex);
Mem_Free(tex);
}
*texId = NULL;
@ -475,7 +510,8 @@ void Gfx_DrawVb_Lines(int verticesCount) {
static inline int __attribute__((always_inline)) TransformVector(void* v, struct MatrixCol* col) {
int res;
__asm__("lds.l @%[col]+, MACL \n"
__asm__("clrmac \n"
"lds.l @%[col]+, MACL \n"
"mac.l @%[col]+, @%[vec]+ \n"
"mac.l @%[col]+, @%[vec]+ \n"
"mac.l @%[col]+, @%[vec]+ \n"
@ -515,7 +551,7 @@ static void DrawColouredQuads2D(int verticesCount, int startVertex) {
static void DrawTexturedQuads2D(int verticesCount, int startVertex) {
uint16_t char_size = cur_char_size;
uint16_t char_base = cur_char_base;
uint16_t gour_base = ((vdp1_vram_t)gourad_base >> 3);
uint16_t gour_base = ((vdp1_vram_t)gouraud_base >> 3);
struct SATVertexTextured* v = (struct SATVertexTextured*)gfx_vertices + startVertex;
for (int i = 0; i < verticesCount; i += 4, v += 4)
@ -620,7 +656,7 @@ static void DrawTexturedQuads3D(int verticesCount, int startVertex) {
struct SATVertexTextured* v = (struct SATVertexTextured*)gfx_vertices + startVertex;
uint16_t char_size = cur_char_size;
uint16_t char_base = cur_char_base;
uint16_t gour_base = ((vdp1_vram_t)gourad_base >> 3);
uint16_t gour_base = ((vdp1_vram_t)gouraud_base >> 3);
for (int i = 0; i < verticesCount; i += 4, v += 4)
{

View File

@ -672,7 +672,6 @@ static cc_result HttpClient_HandleRedirect(struct HttpClientState* state) {
*#########################################################################################################################*/
static void HttpBackend_Init(void) {
SSLBackend_Init(httpsVerify);
//httpOnly = true; // TODO: insecure
}
static void Http_AddHeader(struct HttpRequest* req, const char* key, const cc_string* value) {

61
src/_BlockAlloc.h Normal file
View File

@ -0,0 +1,61 @@
#define blockalloc_page(block) ((block) >> 3)
#define blockalloc_bit(block) (1 << ((block) & 0x07))
#define BLOCKS_PER_PAGE 8
static CC_INLINE int blockalloc_can_alloc(cc_uint8* table, int beg, int blocks) {
for (int i = beg; i < beg + blocks; i++)
{
cc_uint8 page = table[blockalloc_page(i)];
if (page & blockalloc_bit(i)) return false;
}
return true;
}
static int blockalloc_alloc(cc_uint8* table, int maxBlocks, int blocks) {
if (blocks > maxBlocks) return -1;
for (int i = 0; i < maxBlocks - blocks;)
{
cc_uint8 page = table[blockalloc_page(i)];
// If entire page is used, skip entirely over it
if ((i & 0x07) == 0 && page == 0xFF) { i += 8; continue; }
// If block is used, move onto trying next block
if (page & blockalloc_bit(i)) { i++; continue; }
// If can't be allocated starting at block, try next
if (!blockalloc_can_alloc(table, i, blocks)) { i++; continue; }
for (int j = i; j < i + blocks; j++)
{
table[blockalloc_page(j)] |= blockalloc_bit(j);
}
return i;
}
return -1;
}
static void blockalloc_dealloc(cc_uint8* table, int origin, int blocks) {
// Mark the used blocks as free again
for (int i = origin; i < origin + blocks; i++)
{
table[blockalloc_page(i)] &= ~blockalloc_bit(i);
}
}
// TODO: is this worth optimising to look at entire page in 0x00 and 0xFF case?
static int blockalloc_total_free(cc_uint8* table, int maxBlocks) {
int free_blocks = 0, i, used;
cc_uint8 page;
for (i = 0; i < maxBlocks; i++)
{
page = table[blockalloc_page(i)];
used = page & blockalloc_bit(i);
if (!used) free_blocks++;
i++;
}
return free_blocks;
}