Separate the face buffer based on LOD (#1609)

- reduces allocator overhead (less items in the free list of each
buffer)
- reduces the chance to run out of fake GPU memory (we can now
effectively store 50% more faces than before)
- reduces the chance to run out of actual GPU memory
- reduces height of lag spikes of buffer resizes (but increases their
frequency)
- makes it more clear in which LOD (LOD1) the main memory bottlenecks
are
- reduces the occlusion culling lag (#1161) after disocclusion of parts
of the screen (roughly halving the impact).

It however does decrease the granularity of timing information, since I
can no longer look at the passes separately, but I think that's a fair
price to pay

- [x] cleanup
This commit is contained in:
IntegratedQuantum 2025-06-07 09:32:05 +02:00 committed by GitHub
parent b1af1be3d2
commit bf7d20f11e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 74 additions and 69 deletions

View File

@ -2610,9 +2610,11 @@ pub fn generateBlockTexture(blockType: u16) Texture {
face.position.lightIndex = 0;
}
var allocation: SubAllocation = .{.start = 0, .len = 0};
main.renderer.chunk_meshing.faceBuffer.uploadData(faceData.items, &allocation);
main.renderer.chunk_meshing.faceBuffers[0].uploadData(faceData.items, &allocation);
defer main.renderer.chunk_meshing.faceBuffers[0].free(allocation);
var lightAllocation: SubAllocation = .{.start = 0, .len = 0};
main.renderer.chunk_meshing.lightBuffer.uploadData(&.{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}, &lightAllocation);
main.renderer.chunk_meshing.lightBuffers[0].uploadData(&.{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}, &lightAllocation);
defer main.renderer.chunk_meshing.lightBuffers[0].free(lightAllocation);
{
const i = 6; // Easily switch between the 8 rotations.
@ -2671,7 +2673,6 @@ pub fn generateBlockTexture(blockType: u16) Texture {
c.glBindFramebuffer(c.GL_FRAMEBUFFER, 0);
main.renderer.chunk_meshing.faceBuffer.free(allocation);
c.glViewport(0, 0, main.Window.width, main.Window.height);
c.glBlendFunc(c.GL_SRC_ALPHA, c.GL_ONE_MINUS_SRC_ALPHA);
return texture;

View File

@ -83,28 +83,18 @@ pub fn render() void {
}
draw.print("Mesh Queue size: {}", .{main.renderer.mesh_storage.updatableList.items.len}, 0, y, 8, .left);
y += 8;
{
for(0..main.settings.highestLod + 1) |lod| {
const faceDataSize: usize = @sizeOf(main.renderer.chunk_meshing.FaceData);
const size: usize = main.renderer.chunk_meshing.faceBuffer.capacity*faceDataSize;
const used: usize = main.renderer.chunk_meshing.faceBuffer.used*faceDataSize;
var largestFreeBlock: usize = 0;
for(main.renderer.chunk_meshing.faceBuffer.freeBlocks.items) |freeBlock| {
largestFreeBlock = @max(largestFreeBlock, freeBlock.len);
}
const fragmentation = size - used - largestFreeBlock*faceDataSize;
draw.print("ChunkMesh memory: {} MiB / {} MiB (fragmentation: {} MiB)", .{used >> 20, size >> 20, fragmentation >> 20}, 0, y, 8, .left);
const size: usize = main.renderer.chunk_meshing.faceBuffers[lod].capacity*faceDataSize;
const used: usize = main.renderer.chunk_meshing.faceBuffers[lod].used*faceDataSize;
draw.print("ChunkMesh memory LOD{}: {} MiB / {} MiB", .{lod, used >> 20, size >> 20}, 0, y, 8, .left);
y += 8;
}
{
for(0..main.settings.highestLod + 1) |lod| {
const lightDataSize: usize = @sizeOf(u32);
const size: usize = main.renderer.chunk_meshing.lightBuffer.capacity*lightDataSize;
const used: usize = main.renderer.chunk_meshing.lightBuffer.used*lightDataSize;
var largestFreeBlock: usize = 0;
for(main.renderer.chunk_meshing.lightBuffer.freeBlocks.items) |freeBlock| {
largestFreeBlock = @max(largestFreeBlock, freeBlock.len);
}
const fragmentation = size - used - largestFreeBlock*lightDataSize;
draw.print("Light memory: {} MiB / {} MiB (fragmentation: {} MiB)", .{used >> 20, size >> 20, fragmentation >> 20}, 0, y, 8, .left);
const size: usize = main.renderer.chunk_meshing.lightBuffers[lod].capacity*lightDataSize;
const used: usize = main.renderer.chunk_meshing.lightBuffers[lod].used*lightDataSize;
draw.print("Light memory LOD{}: {} MiB / {} MiB", .{lod, used >> 20, size >> 20}, 0, y, 8, .left);
y += 8;
}
{

View File

@ -17,14 +17,11 @@ pub const Samples = enum(u8) {
skybox,
animation,
chunk_rendering_preparation,
chunk_rendering_previous_visible,
chunk_rendering_occlusion_test,
chunk_rendering_new_visible,
chunk_rendering,
entity_rendering,
block_entity_rendering,
particle_rendering,
transparent_rendering_preparation,
transparent_rendering_occlusion_test,
transparent_rendering,
bloom_extract_downsample,
bloom_first_pass,
@ -39,14 +36,11 @@ const names = [_][]const u8{
"Skybox",
"Pre-processing Block Animations",
"Chunk Rendering Preparation",
"Chunk Rendering Previous Visible",
"Chunk Rendering Occlusion Test",
"Chunk Rendering New Visible",
"Chunk Rendering",
"Entity Rendering",
"Block Entity Rendering",
"Particle Rendering",
"Transparent Rendering Preparation",
"Transparent Rendering Occlusion Test",
"Transparent Rendering",
"Bloom - Extract color and downsample",
"Bloom - First Pass",

View File

@ -225,15 +225,15 @@ pub fn renderWorld(world: *World, ambientLight: Vec3f, skyColor: Vec3f, playerPo
chunk_meshing.beginRender();
var chunkList = main.List(u32).init(main.stackAllocator);
defer chunkList.deinit();
var chunkLists: [main.settings.highestSupportedLod + 1]main.List(u32) = @splat(main.List(u32).init(main.stackAllocator));
defer for(chunkLists) |list| list.deinit();
for(meshes) |mesh| {
mesh.prepareRendering(&chunkList);
mesh.prepareRendering(&chunkLists);
}
gpu_performance_measuring.stopQuery();
if(chunkList.items.len != 0) {
chunk_meshing.drawChunksIndirect(chunkList.items, game.projectionMatrix, ambientLight, playerPos, false);
}
gpu_performance_measuring.startQuery(.chunk_rendering);
chunk_meshing.drawChunksIndirect(&chunkLists, game.projectionMatrix, ambientLight, playerPos, false);
gpu_performance_measuring.stopQuery();
gpu_performance_measuring.startQuery(.entity_rendering);
entity.ClientEntityManager.render(game.projectionMatrix, ambientLight, playerPos);
@ -264,17 +264,17 @@ pub fn renderWorld(world: *World, ambientLight: Vec3f, skyColor: Vec3f, playerPo
c.glTextureBarrier();
{
chunkList.clearRetainingCapacity();
for(&chunkLists) |*list| list.clearRetainingCapacity();
var i: usize = meshes.len;
while(true) {
if(i == 0) break;
i -= 1;
meshes[i].prepareTransparentRendering(playerPos, &chunkList);
meshes[i].prepareTransparentRendering(playerPos, &chunkLists);
}
gpu_performance_measuring.stopQuery();
if(chunkList.items.len != 0) {
chunk_meshing.drawChunksIndirect(chunkList.items, game.projectionMatrix, ambientLight, playerPos, true);
}
gpu_performance_measuring.startQuery(.transparent_rendering);
chunk_meshing.drawChunksIndirect(&chunkLists, game.projectionMatrix, ambientLight, playerPos, true);
gpu_performance_measuring.stopQuery();
}
c.glDepthRange(0, 0.001);

View File

@ -64,8 +64,8 @@ pub var occlusionTestUniforms: struct {
} = undefined;
pub var vao: c_uint = undefined;
var vbo: c_uint = undefined;
pub var faceBuffer: graphics.LargeBuffer(FaceData) = undefined;
pub var lightBuffer: graphics.LargeBuffer(u32) = undefined;
pub var faceBuffers: [settings.highestSupportedLod + 1]graphics.LargeBuffer(FaceData) = undefined;
pub var lightBuffers: [settings.highestSupportedLod + 1]graphics.LargeBuffer(u32) = undefined;
pub var chunkBuffer: graphics.LargeBuffer(ChunkData) = undefined;
pub var commandBuffer: graphics.LargeBuffer(IndirectData) = undefined;
pub var chunkIDBuffer: graphics.LargeBuffer(u32) = undefined;
@ -133,8 +133,10 @@ pub fn init() void {
c.glBufferData(c.GL_ELEMENT_ARRAY_BUFFER, rawData.len*@sizeOf(u32), &rawData, c.GL_STATIC_DRAW);
c.glBindVertexArray(0);
faceBuffer.init(main.globalAllocator, 1 << 20, 3);
lightBuffer.init(main.globalAllocator, 1 << 20, 10);
for(0..settings.highestSupportedLod + 1) |i| {
faceBuffers[i].init(main.globalAllocator, 1 << 20, 3);
lightBuffers[i].init(main.globalAllocator, 1 << 20, 10);
}
chunkBuffer.init(main.globalAllocator, 1 << 20, 6);
commandBuffer.init(main.globalAllocator, 1 << 20, 8);
chunkIDBuffer.init(main.globalAllocator, 1 << 20, 9);
@ -148,24 +150,30 @@ pub fn deinit() void {
commandPipeline.deinit();
c.glDeleteVertexArrays(1, &vao);
c.glDeleteBuffers(1, &vbo);
faceBuffer.deinit();
lightBuffer.deinit();
for(0..settings.highestSupportedLod + 1) |i| {
faceBuffers[i].deinit();
lightBuffers[i].deinit();
}
chunkBuffer.deinit();
commandBuffer.deinit();
chunkIDBuffer.deinit();
}
pub fn beginRender() void {
faceBuffer.beginRender();
lightBuffer.beginRender();
for(0..settings.highestSupportedLod + 1) |i| {
faceBuffers[i].beginRender();
lightBuffers[i].beginRender();
}
chunkBuffer.beginRender();
commandBuffer.beginRender();
chunkIDBuffer.beginRender();
}
pub fn endRender() void {
faceBuffer.endRender();
lightBuffer.endRender();
for(0..settings.highestSupportedLod + 1) |i| {
faceBuffers[i].endRender();
lightBuffers[i].endRender();
}
chunkBuffer.endRender();
commandBuffer.endRender();
chunkIDBuffer.endRender();
@ -212,7 +220,20 @@ pub fn bindTransparentShaderAndUniforms(projMatrix: Mat4f, ambient: Vec3f, playe
c.glBindVertexArray(vao);
}
pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec3f, playerPos: Vec3d, transparent: bool) void {
fn bindBuffers(lod: usize) void {
faceBuffers[lod].ssbo.bind(faceBuffers[lod].binding);
lightBuffers[lod].ssbo.bind(lightBuffers[lod].binding);
}
pub fn drawChunksIndirect(chunkIds: *const [main.settings.highestSupportedLod + 1]main.List(u32), projMatrix: Mat4f, ambient: Vec3f, playerPos: Vec3d, transparent: bool) void {
for(0..chunkIds.len) |i| {
const lod = if(transparent) main.settings.highestSupportedLod - i else i;
bindBuffers(lod);
drawChunksOfLod(chunkIds[lod].items, projMatrix, ambient, playerPos, transparent);
}
}
fn drawChunksOfLod(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec3f, playerPos: Vec3d, transparent: bool) void {
const drawCallsEstimate: u31 = @intCast(if(transparent) chunkIDs.len else chunkIDs.len*8);
var chunkIDAllocation: main.graphics.SubAllocation = .{.start = 0, .len = 0};
chunkIDBuffer.uploadData(chunkIDs, &chunkIDAllocation);
@ -227,7 +248,6 @@ pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec
c.glUniform1i(commandUniforms.isTransparent, @intFromBool(transparent));
c.glUniform3i(commandUniforms.playerPositionInteger, @intFromFloat(@floor(playerPos[0])), @intFromFloat(@floor(playerPos[1])), @intFromFloat(@floor(playerPos[2])));
if(!transparent) {
gpu_performance_measuring.startQuery(.chunk_rendering_previous_visible);
c.glUniform1i(commandUniforms.onlyDrawPreviouslyInvisible, 0);
c.glDispatchCompute(@intCast(@divFloor(chunkIDs.len + 63, 64)), 1, 1); // TODO: Replace with @divCeil once available
c.glMemoryBarrier(c.GL_SHADER_STORAGE_BARRIER_BIT | c.GL_COMMAND_BARRIER_BIT);
@ -239,11 +259,9 @@ pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec
}
c.glBindBuffer(c.GL_DRAW_INDIRECT_BUFFER, commandBuffer.ssbo.bufferID);
c.glMultiDrawElementsIndirect(c.GL_TRIANGLES, c.GL_UNSIGNED_INT, @ptrFromInt(allocation.start*@sizeOf(IndirectData)), drawCallsEstimate, 0);
gpu_performance_measuring.stopQuery();
}
// Occlusion tests:
gpu_performance_measuring.startQuery(if(transparent) .transparent_rendering_occlusion_test else .chunk_rendering_occlusion_test);
occlusionTestPipeline.bind(null);
c.glUniform3i(occlusionTestUniforms.playerPositionInteger, @intFromFloat(@floor(playerPos[0])), @intFromFloat(@floor(playerPos[1])), @intFromFloat(@floor(playerPos[2])));
c.glUniform3f(occlusionTestUniforms.playerPositionFraction, @floatCast(@mod(playerPos[0], 1)), @floatCast(@mod(playerPos[1], 1)), @floatCast(@mod(playerPos[2], 1)));
@ -252,10 +270,8 @@ pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec
c.glBindVertexArray(vao);
c.glDrawElementsBaseVertex(c.GL_TRIANGLES, @intCast(6*6*chunkIDs.len), c.GL_UNSIGNED_INT, null, chunkIDAllocation.start*24);
c.glMemoryBarrier(c.GL_SHADER_STORAGE_BARRIER_BIT);
gpu_performance_measuring.stopQuery();
// Draw again:
gpu_performance_measuring.startQuery(if(transparent) .transparent_rendering else .chunk_rendering_new_visible);
commandPipeline.bind();
c.glUniform1i(commandUniforms.onlyDrawPreviouslyInvisible, 1);
c.glDispatchCompute(@intCast(@divFloor(chunkIDs.len + 63, 64)), 1, 1); // TODO: Replace with @divCeil once available
@ -268,7 +284,6 @@ pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec
}
c.glBindBuffer(c.GL_DRAW_INDIRECT_BUFFER, commandBuffer.ssbo.bufferID);
c.glMultiDrawElementsIndirect(c.GL_TRIANGLES, c.GL_UNSIGNED_INT, @ptrFromInt(allocation.start*@sizeOf(IndirectData)), drawCallsEstimate, 0);
gpu_performance_measuring.stopQuery();
}
pub const FaceData = extern struct {
@ -347,9 +362,10 @@ pub const PrimitiveMesh = struct { // MARK: PrimitiveMesh
wasChanged: bool = false,
min: Vec3f = undefined,
max: Vec3f = undefined,
lod: u3,
fn deinit(self: *PrimitiveMesh) void {
faceBuffer.free(self.bufferAllocation);
faceBuffers[self.lod].free(self.bufferAllocation);
self.completeList.deinit(main.globalAllocator);
}
@ -573,8 +589,8 @@ pub const PrimitiveMesh = struct { // MARK: PrimitiveMesh
len += list[i].len;
}
const fullBuffer = faceBuffer.allocateAndMapRange(len, &self.bufferAllocation);
defer faceBuffer.unmapRange(fullBuffer);
const fullBuffer = faceBuffers[self.lod].allocateAndMapRange(len, &self.bufferAllocation);
defer faceBuffers[self.lod].unmapRange(fullBuffer);
// Sort the faces by normal to allow for backface culling on the GPU:
var i: u32 = 0;
var iStart = i;
@ -679,8 +695,12 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
self.* = ChunkMesh{
.pos = pos,
.size = chunk.chunkSize*pos.voxelSize,
.opaqueMesh = .{},
.transparentMesh = .{},
.opaqueMesh = .{
.lod = @intCast(std.math.log2_int(u32, pos.voxelSize)),
},
.transparentMesh = .{
.lod = @intCast(std.math.log2_int(u32, pos.voxelSize)),
},
.chunk = ch,
.lightingData = .{
lighting.ChannelChunk.init(ch, false),
@ -705,7 +725,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
self.blockBreakingFaces.deinit();
main.globalAllocator.free(self.blockBreakingFacesSortingData);
main.globalAllocator.free(self.lightList);
lightBuffer.free(self.lightAllocation);
lightBuffers[std.math.log2_int(u32, self.pos.voxelSize)].free(self.lightAllocation);
}
pub fn increaseRefCount(self: *ChunkMesh) void {
@ -1355,7 +1375,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
if(self.lightListNeedsUpload) {
self.lightListNeedsUpload = false;
lightBuffer.uploadData(self.lightList, &self.lightAllocation);
lightBuffers[std.math.log2_int(u32, self.pos.voxelSize)].uploadData(self.lightList, &self.lightAllocation);
}
self.uploadChunkPosition();
@ -1555,15 +1575,15 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
}}, &self.chunkAllocation);
}
pub fn prepareRendering(self: *ChunkMesh, chunkList: *main.List(u32)) void {
pub fn prepareRendering(self: *ChunkMesh, chunkLists: *[main.settings.highestSupportedLod + 1]main.List(u32)) void {
if(self.opaqueMesh.vertexCount == 0) return;
chunkList.append(self.chunkAllocation.start);
chunkLists[std.math.log2_int(u32, self.pos.voxelSize)].append(self.chunkAllocation.start);
quadsDrawn += self.opaqueMesh.vertexCount/6;
}
pub fn prepareTransparentRendering(self: *ChunkMesh, playerPosition: Vec3d, chunkList: *main.List(u32)) void {
pub fn prepareTransparentRendering(self: *ChunkMesh, playerPosition: Vec3d, chunkLists: *[main.settings.highestSupportedLod + 1]main.List(u32)) void {
if(self.transparentMesh.vertexCount == 0 and self.blockBreakingFaces.items.len == 0) return;
var needsUpdate: bool = false;
@ -1696,11 +1716,11 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
}
self.culledSortingCount += @intCast(self.blockBreakingFaces.items.len);
// Upload:
faceBuffer.uploadData(self.sortingOutputBuffer[0..self.culledSortingCount], &self.transparentMesh.bufferAllocation);
faceBuffers[std.math.log2_int(u32, self.pos.voxelSize)].uploadData(self.sortingOutputBuffer[0..self.culledSortingCount], &self.transparentMesh.bufferAllocation);
self.uploadChunkPosition();
}
chunkList.append(self.chunkAllocation.start);
chunkLists[std.math.log2_int(u32, self.pos.voxelSize)].append(self.chunkAllocation.start);
transparentQuadsDrawn += self.culledSortingCount;
}
};