Separate the face buffer based on LOD (#1609)

- reduces allocator overhead (less items in the free list of each buffer) - reduces the chance to run out of fake GPU memory (we can now effectively store 50% more faces than before) - reduces the chance to run out of actual GPU memory - reduces height of lag spikes of buffer resizes (but increases their frequency) - makes it more clear in which LOD (LOD1) the main memory bottlenecks are - reduces the occlusion culling lag (#1161) after disocclusion of parts of the screen (roughly halving the impact). It however does decrease the granularity of timing information, since I can no longer look at the passes separately, but I think that's a fair price to pay - [x] cleanup
2025-09-21 18:34:30 -04:00 · 2025-06-07 09:32:05 +02:00 · 2025-06-07 09:32:05 +02:00 · bf7d20f11e
commit bf7d20f11e
parent b1af1be3d2
5 changed files with 74 additions and 69 deletions
--- a/src/graphics.zig
+++ b/src/graphics.zig
@ -2610,9 +2610,11 @@ pub fn generateBlockTexture(blockType: u16) Texture {
 		face.position.lightIndex = 0;
 	}
 	var allocation: SubAllocation = .{.start = 0, .len = 0};
-	main.renderer.chunk_meshing.faceBuffer.uploadData(faceData.items, &allocation);
+	main.renderer.chunk_meshing.faceBuffers[0].uploadData(faceData.items, &allocation);
+	defer main.renderer.chunk_meshing.faceBuffers[0].free(allocation);
 	var lightAllocation: SubAllocation = .{.start = 0, .len = 0};
-	main.renderer.chunk_meshing.lightBuffer.uploadData(&.{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}, &lightAllocation);
+	main.renderer.chunk_meshing.lightBuffers[0].uploadData(&.{0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff}, &lightAllocation);
+	defer main.renderer.chunk_meshing.lightBuffers[0].free(lightAllocation);

 	{
 		const i = 6; // Easily switch between the 8 rotations.
@ -2671,7 +2673,6 @@ pub fn generateBlockTexture(blockType: u16) Texture {

 	c.glBindFramebuffer(c.GL_FRAMEBUFFER, 0);

-	main.renderer.chunk_meshing.faceBuffer.free(allocation);
 	c.glViewport(0, 0, main.Window.width, main.Window.height);
 	c.glBlendFunc(c.GL_SRC_ALPHA, c.GL_ONE_MINUS_SRC_ALPHA);
 	return texture;
--- a/src/gui/windows/debug.zig
+++ b/src/gui/windows/debug.zig
@ -83,28 +83,18 @@ pub fn render() void {
 		}
 		draw.print("Mesh Queue size: {}", .{main.renderer.mesh_storage.updatableList.items.len}, 0, y, 8, .left);
 		y += 8;
-		{
+		for(0..main.settings.highestLod + 1) |lod| {
 			const faceDataSize: usize = @sizeOf(main.renderer.chunk_meshing.FaceData);
-			const size: usize = main.renderer.chunk_meshing.faceBuffer.capacity*faceDataSize;
-			const used: usize = main.renderer.chunk_meshing.faceBuffer.used*faceDataSize;
-			var largestFreeBlock: usize = 0;
-			for(main.renderer.chunk_meshing.faceBuffer.freeBlocks.items) |freeBlock| {
-				largestFreeBlock = @max(largestFreeBlock, freeBlock.len);
-			}
-			const fragmentation = size - used - largestFreeBlock*faceDataSize;
-			draw.print("ChunkMesh memory: {} MiB / {} MiB (fragmentation: {} MiB)", .{used >> 20, size >> 20, fragmentation >> 20}, 0, y, 8, .left);
+			const size: usize = main.renderer.chunk_meshing.faceBuffers[lod].capacity*faceDataSize;
+			const used: usize = main.renderer.chunk_meshing.faceBuffers[lod].used*faceDataSize;
+			draw.print("ChunkMesh memory LOD{}: {} MiB / {} MiB", .{lod, used >> 20, size >> 20}, 0, y, 8, .left);
 			y += 8;
 		}
-		{
+		for(0..main.settings.highestLod + 1) |lod| {
 			const lightDataSize: usize = @sizeOf(u32);
-			const size: usize = main.renderer.chunk_meshing.lightBuffer.capacity*lightDataSize;
-			const used: usize = main.renderer.chunk_meshing.lightBuffer.used*lightDataSize;
-			var largestFreeBlock: usize = 0;
-			for(main.renderer.chunk_meshing.lightBuffer.freeBlocks.items) |freeBlock| {
-				largestFreeBlock = @max(largestFreeBlock, freeBlock.len);
-			}
-			const fragmentation = size - used - largestFreeBlock*lightDataSize;
-			draw.print("Light memory: {} MiB / {} MiB (fragmentation: {} MiB)", .{used >> 20, size >> 20, fragmentation >> 20}, 0, y, 8, .left);
+			const size: usize = main.renderer.chunk_meshing.lightBuffers[lod].capacity*lightDataSize;
+			const used: usize = main.renderer.chunk_meshing.lightBuffers[lod].used*lightDataSize;
+			draw.print("Light memory LOD{}: {} MiB / {} MiB", .{lod, used >> 20, size >> 20}, 0, y, 8, .left);
 			y += 8;
 		}
 		{
--- a/src/gui/windows/gpu_performance_measuring.zig
+++ b/src/gui/windows/gpu_performance_measuring.zig
@ -17,14 +17,11 @@ pub const Samples = enum(u8) {
 	skybox,
 	animation,
 	chunk_rendering_preparation,
-	chunk_rendering_previous_visible,
-	chunk_rendering_occlusion_test,
-	chunk_rendering_new_visible,
+	chunk_rendering,
 	entity_rendering,
 	block_entity_rendering,
 	particle_rendering,
 	transparent_rendering_preparation,
-	transparent_rendering_occlusion_test,
 	transparent_rendering,
 	bloom_extract_downsample,
 	bloom_first_pass,
@ -39,14 +36,11 @@ const names = [_][]const u8{
 	"Skybox",
 	"Pre-processing Block Animations",
 	"Chunk Rendering Preparation",
-	"Chunk Rendering Previous Visible",
-	"Chunk Rendering Occlusion Test",
-	"Chunk Rendering New Visible",
+	"Chunk Rendering",
 	"Entity Rendering",
 	"Block Entity Rendering",
 	"Particle Rendering",
 	"Transparent Rendering Preparation",
-	"Transparent Rendering Occlusion Test",
 	"Transparent Rendering",
 	"Bloom - Extract color and downsample",
 	"Bloom - First Pass",
--- a/src/renderer.zig
+++ b/src/renderer.zig
@ -225,15 +225,15 @@ pub fn renderWorld(world: *World, ambientLight: Vec3f, skyColor: Vec3f, playerPo

 	chunk_meshing.beginRender();

-	var chunkList = main.List(u32).init(main.stackAllocator);
-	defer chunkList.deinit();
+	var chunkLists: [main.settings.highestSupportedLod + 1]main.List(u32) = @splat(main.List(u32).init(main.stackAllocator));
+	defer for(chunkLists) |list| list.deinit();
 	for(meshes) |mesh| {
-		mesh.prepareRendering(&chunkList);
+		mesh.prepareRendering(&chunkLists);
 	}
 	gpu_performance_measuring.stopQuery();
-	if(chunkList.items.len != 0) {
-		chunk_meshing.drawChunksIndirect(chunkList.items, game.projectionMatrix, ambientLight, playerPos, false);
-	}
+	gpu_performance_measuring.startQuery(.chunk_rendering);
+	chunk_meshing.drawChunksIndirect(&chunkLists, game.projectionMatrix, ambientLight, playerPos, false);
+	gpu_performance_measuring.stopQuery();

 	gpu_performance_measuring.startQuery(.entity_rendering);
 	entity.ClientEntityManager.render(game.projectionMatrix, ambientLight, playerPos);
@ -264,17 +264,17 @@ pub fn renderWorld(world: *World, ambientLight: Vec3f, skyColor: Vec3f, playerPo
 	c.glTextureBarrier();

 	{
-		chunkList.clearRetainingCapacity();
+		for(&chunkLists) |*list| list.clearRetainingCapacity();
 		var i: usize = meshes.len;
 		while(true) {
 			if(i == 0) break;
 			i -= 1;
-			meshes[i].prepareTransparentRendering(playerPos, &chunkList);
+			meshes[i].prepareTransparentRendering(playerPos, &chunkLists);
 		}
 		gpu_performance_measuring.stopQuery();
-		if(chunkList.items.len != 0) {
-			chunk_meshing.drawChunksIndirect(chunkList.items, game.projectionMatrix, ambientLight, playerPos, true);
-		}
+		gpu_performance_measuring.startQuery(.transparent_rendering);
+		chunk_meshing.drawChunksIndirect(&chunkLists, game.projectionMatrix, ambientLight, playerPos, true);
+		gpu_performance_measuring.stopQuery();
 	}

 	c.glDepthRange(0, 0.001);
--- a/src/renderer/chunk_meshing.zig
+++ b/src/renderer/chunk_meshing.zig
@ -64,8 +64,8 @@ pub var occlusionTestUniforms: struct {
 } = undefined;
 pub var vao: c_uint = undefined;
 var vbo: c_uint = undefined;
-pub var faceBuffer: graphics.LargeBuffer(FaceData) = undefined;
-pub var lightBuffer: graphics.LargeBuffer(u32) = undefined;
+pub var faceBuffers: [settings.highestSupportedLod + 1]graphics.LargeBuffer(FaceData) = undefined;
+pub var lightBuffers: [settings.highestSupportedLod + 1]graphics.LargeBuffer(u32) = undefined;
 pub var chunkBuffer: graphics.LargeBuffer(ChunkData) = undefined;
 pub var commandBuffer: graphics.LargeBuffer(IndirectData) = undefined;
 pub var chunkIDBuffer: graphics.LargeBuffer(u32) = undefined;
@ -133,8 +133,10 @@ pub fn init() void {
 	c.glBufferData(c.GL_ELEMENT_ARRAY_BUFFER, rawData.len*@sizeOf(u32), &rawData, c.GL_STATIC_DRAW);
 	c.glBindVertexArray(0);

-	faceBuffer.init(main.globalAllocator, 1 << 20, 3);
-	lightBuffer.init(main.globalAllocator, 1 << 20, 10);
+	for(0..settings.highestSupportedLod + 1) |i| {
+		faceBuffers[i].init(main.globalAllocator, 1 << 20, 3);
+		lightBuffers[i].init(main.globalAllocator, 1 << 20, 10);
+	}
 	chunkBuffer.init(main.globalAllocator, 1 << 20, 6);
 	commandBuffer.init(main.globalAllocator, 1 << 20, 8);
 	chunkIDBuffer.init(main.globalAllocator, 1 << 20, 9);
@ -148,24 +150,30 @@ pub fn deinit() void {
 	commandPipeline.deinit();
 	c.glDeleteVertexArrays(1, &vao);
 	c.glDeleteBuffers(1, &vbo);
-	faceBuffer.deinit();
-	lightBuffer.deinit();
+	for(0..settings.highestSupportedLod + 1) |i| {
+		faceBuffers[i].deinit();
+		lightBuffers[i].deinit();
+	}
 	chunkBuffer.deinit();
 	commandBuffer.deinit();
 	chunkIDBuffer.deinit();
 }

 pub fn beginRender() void {
-	faceBuffer.beginRender();
-	lightBuffer.beginRender();
+	for(0..settings.highestSupportedLod + 1) |i| {
+		faceBuffers[i].beginRender();
+		lightBuffers[i].beginRender();
+	}
 	chunkBuffer.beginRender();
 	commandBuffer.beginRender();
 	chunkIDBuffer.beginRender();
 }

 pub fn endRender() void {
-	faceBuffer.endRender();
-	lightBuffer.endRender();
+	for(0..settings.highestSupportedLod + 1) |i| {
+		faceBuffers[i].endRender();
+		lightBuffers[i].endRender();
+	}
 	chunkBuffer.endRender();
 	commandBuffer.endRender();
 	chunkIDBuffer.endRender();
@ -212,7 +220,20 @@ pub fn bindTransparentShaderAndUniforms(projMatrix: Mat4f, ambient: Vec3f, playe
 	c.glBindVertexArray(vao);
 }

-pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec3f, playerPos: Vec3d, transparent: bool) void {
+fn bindBuffers(lod: usize) void {
+	faceBuffers[lod].ssbo.bind(faceBuffers[lod].binding);
+	lightBuffers[lod].ssbo.bind(lightBuffers[lod].binding);
+}
+
+pub fn drawChunksIndirect(chunkIds: *const [main.settings.highestSupportedLod + 1]main.List(u32), projMatrix: Mat4f, ambient: Vec3f, playerPos: Vec3d, transparent: bool) void {
+	for(0..chunkIds.len) |i| {
+		const lod = if(transparent) main.settings.highestSupportedLod - i else i;
+		bindBuffers(lod);
+		drawChunksOfLod(chunkIds[lod].items, projMatrix, ambient, playerPos, transparent);
+	}
+}
+
+fn drawChunksOfLod(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec3f, playerPos: Vec3d, transparent: bool) void {
 	const drawCallsEstimate: u31 = @intCast(if(transparent) chunkIDs.len else chunkIDs.len*8);
 	var chunkIDAllocation: main.graphics.SubAllocation = .{.start = 0, .len = 0};
 	chunkIDBuffer.uploadData(chunkIDs, &chunkIDAllocation);
@ -227,7 +248,6 @@ pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec
 	c.glUniform1i(commandUniforms.isTransparent, @intFromBool(transparent));
 	c.glUniform3i(commandUniforms.playerPositionInteger, @intFromFloat(@floor(playerPos[0])), @intFromFloat(@floor(playerPos[1])), @intFromFloat(@floor(playerPos[2])));
 	if(!transparent) {
-		gpu_performance_measuring.startQuery(.chunk_rendering_previous_visible);
 		c.glUniform1i(commandUniforms.onlyDrawPreviouslyInvisible, 0);
 		c.glDispatchCompute(@intCast(@divFloor(chunkIDs.len + 63, 64)), 1, 1); // TODO: Replace with @divCeil once available
 		c.glMemoryBarrier(c.GL_SHADER_STORAGE_BARRIER_BIT | c.GL_COMMAND_BARRIER_BIT);
@ -239,11 +259,9 @@ pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec
 		}
 		c.glBindBuffer(c.GL_DRAW_INDIRECT_BUFFER, commandBuffer.ssbo.bufferID);
 		c.glMultiDrawElementsIndirect(c.GL_TRIANGLES, c.GL_UNSIGNED_INT, @ptrFromInt(allocation.start*@sizeOf(IndirectData)), drawCallsEstimate, 0);
-		gpu_performance_measuring.stopQuery();
 	}

 	// Occlusion tests:
-	gpu_performance_measuring.startQuery(if(transparent) .transparent_rendering_occlusion_test else .chunk_rendering_occlusion_test);
 	occlusionTestPipeline.bind(null);
 	c.glUniform3i(occlusionTestUniforms.playerPositionInteger, @intFromFloat(@floor(playerPos[0])), @intFromFloat(@floor(playerPos[1])), @intFromFloat(@floor(playerPos[2])));
 	c.glUniform3f(occlusionTestUniforms.playerPositionFraction, @floatCast(@mod(playerPos[0], 1)), @floatCast(@mod(playerPos[1], 1)), @floatCast(@mod(playerPos[2], 1)));
@ -252,10 +270,8 @@ pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec
 	c.glBindVertexArray(vao);
 	c.glDrawElementsBaseVertex(c.GL_TRIANGLES, @intCast(6*6*chunkIDs.len), c.GL_UNSIGNED_INT, null, chunkIDAllocation.start*24);
 	c.glMemoryBarrier(c.GL_SHADER_STORAGE_BARRIER_BIT);
-	gpu_performance_measuring.stopQuery();

 	// Draw again:
-	gpu_performance_measuring.startQuery(if(transparent) .transparent_rendering else .chunk_rendering_new_visible);
 	commandPipeline.bind();
 	c.glUniform1i(commandUniforms.onlyDrawPreviouslyInvisible, 1);
 	c.glDispatchCompute(@intCast(@divFloor(chunkIDs.len + 63, 64)), 1, 1); // TODO: Replace with @divCeil once available
@ -268,7 +284,6 @@ pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec
 	}
 	c.glBindBuffer(c.GL_DRAW_INDIRECT_BUFFER, commandBuffer.ssbo.bufferID);
 	c.glMultiDrawElementsIndirect(c.GL_TRIANGLES, c.GL_UNSIGNED_INT, @ptrFromInt(allocation.start*@sizeOf(IndirectData)), drawCallsEstimate, 0);
-	gpu_performance_measuring.stopQuery();
 }

 pub const FaceData = extern struct {
@ -347,9 +362,10 @@ pub const PrimitiveMesh = struct { // MARK: PrimitiveMesh
 	wasChanged: bool = false,
 	min: Vec3f = undefined,
 	max: Vec3f = undefined,
+	lod: u3,

 	fn deinit(self: *PrimitiveMesh) void {
-		faceBuffer.free(self.bufferAllocation);
+		faceBuffers[self.lod].free(self.bufferAllocation);
 		self.completeList.deinit(main.globalAllocator);
 	}

@ -573,8 +589,8 @@ pub const PrimitiveMesh = struct { // MARK: PrimitiveMesh
 			len += list[i].len;
 		}

-		const fullBuffer = faceBuffer.allocateAndMapRange(len, &self.bufferAllocation);
-		defer faceBuffer.unmapRange(fullBuffer);
+		const fullBuffer = faceBuffers[self.lod].allocateAndMapRange(len, &self.bufferAllocation);
+		defer faceBuffers[self.lod].unmapRange(fullBuffer);
 		// Sort the faces by normal to allow for backface culling on the GPU:
 		var i: u32 = 0;
 		var iStart = i;
@ -679,8 +695,12 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
 		self.* = ChunkMesh{
 			.pos = pos,
 			.size = chunk.chunkSize*pos.voxelSize,
-			.opaqueMesh = .{},
-			.transparentMesh = .{},
+			.opaqueMesh = .{
+				.lod = @intCast(std.math.log2_int(u32, pos.voxelSize)),
+			},
+			.transparentMesh = .{
+				.lod = @intCast(std.math.log2_int(u32, pos.voxelSize)),
+			},
 			.chunk = ch,
 			.lightingData = .{
 				lighting.ChannelChunk.init(ch, false),
@ -705,7 +725,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
 		self.blockBreakingFaces.deinit();
 		main.globalAllocator.free(self.blockBreakingFacesSortingData);
 		main.globalAllocator.free(self.lightList);
-		lightBuffer.free(self.lightAllocation);
+		lightBuffers[std.math.log2_int(u32, self.pos.voxelSize)].free(self.lightAllocation);
 	}

 	pub fn increaseRefCount(self: *ChunkMesh) void {
@ -1355,7 +1375,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh

 		if(self.lightListNeedsUpload) {
 			self.lightListNeedsUpload = false;
-			lightBuffer.uploadData(self.lightList, &self.lightAllocation);
+			lightBuffers[std.math.log2_int(u32, self.pos.voxelSize)].uploadData(self.lightList, &self.lightAllocation);
 		}

 		self.uploadChunkPosition();
@ -1555,15 +1575,15 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
 		}}, &self.chunkAllocation);
 	}

-	pub fn prepareRendering(self: *ChunkMesh, chunkList: *main.List(u32)) void {
+	pub fn prepareRendering(self: *ChunkMesh, chunkLists: *[main.settings.highestSupportedLod + 1]main.List(u32)) void {
 		if(self.opaqueMesh.vertexCount == 0) return;

-		chunkList.append(self.chunkAllocation.start);
+		chunkLists[std.math.log2_int(u32, self.pos.voxelSize)].append(self.chunkAllocation.start);

 		quadsDrawn += self.opaqueMesh.vertexCount/6;
 	}

-	pub fn prepareTransparentRendering(self: *ChunkMesh, playerPosition: Vec3d, chunkList: *main.List(u32)) void {
+	pub fn prepareTransparentRendering(self: *ChunkMesh, playerPosition: Vec3d, chunkLists: *[main.settings.highestSupportedLod + 1]main.List(u32)) void {
 		if(self.transparentMesh.vertexCount == 0 and self.blockBreakingFaces.items.len == 0) return;

 		var needsUpdate: bool = false;
@ -1696,11 +1716,11 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
 			}
 			self.culledSortingCount += @intCast(self.blockBreakingFaces.items.len);
 			// Upload:
-			faceBuffer.uploadData(self.sortingOutputBuffer[0..self.culledSortingCount], &self.transparentMesh.bufferAllocation);
+			faceBuffers[std.math.log2_int(u32, self.pos.voxelSize)].uploadData(self.sortingOutputBuffer[0..self.culledSortingCount], &self.transparentMesh.bufferAllocation);
 			self.uploadChunkPosition();
 		}

-		chunkList.append(self.chunkAllocation.start);
+		chunkLists[std.math.log2_int(u32, self.pos.voxelSize)].append(self.chunkAllocation.start);
 		transparentQuadsDrawn += self.culledSortingCount;
 	}
 };