Implement pre-vertex-shader backface culling on the GPU.

Decreases frametime by ~16% on my hardware.
Not as much as I hoped, but still worth a lot.
This commit is contained in:
IntegratedQuantum 2024-06-06 16:44:51 +02:00
parent 50076d172d
commit c225efa1de
4 changed files with 85 additions and 27 deletions

View File

@ -42,7 +42,7 @@ struct ChunkData {
int visibilityMask; int visibilityMask;
int voxelSize; int voxelSize;
uint vertexStartOpaque; uint vertexStartOpaque;
uint vertexCountOpaque; uint faceCountsByNormalOpaque[7];
uint vertexStartTransparent; uint vertexStartTransparent;
uint vertexCountTransparent; uint vertexCountTransparent;
}; };

View File

@ -12,7 +12,7 @@ struct ChunkData {
int visibilityMask; int visibilityMask;
int voxelSize; int voxelSize;
uint vertexStartOpaque; uint vertexStartOpaque;
uint vertexCountOpaque; uint faceCountsByNormalOpaque[7];
uint vertexStartTransparent; uint vertexStartTransparent;
uint vertexCountTransparent; uint vertexCountTransparent;
}; };
@ -40,26 +40,62 @@ uniform uint chunkIDIndex;
uniform uint commandIndexStart; uniform uint commandIndexStart;
uniform uint size; uniform uint size;
uniform bool isTransparent; uniform bool isTransparent;
uniform ivec3 playerPositionInteger;
bool isVisible(int dir, ivec3 relativePlayerPos, int voxelSize) {
switch(dir) {
case 0: // dirUp
return relativePlayerPos.z >= 0;
case 1: // dirDown
return relativePlayerPos.z < 32*voxelSize;
case 2: // dirPosX
return relativePlayerPos.x >= 0;
case 3: // dirNegX
return relativePlayerPos.x < 32*voxelSize;
case 4: // dirPosY
return relativePlayerPos.y >= 0;
case 5: // dirNegY
return relativePlayerPos.y < 32*voxelSize;
}
return true;
}
DrawElementsIndirectCommand addCommand(uint indices, uint vertexOffset, uint chunkID) {
return DrawElementsIndirectCommand(indices, 1, 0, int(vertexOffset), chunkID);
}
void main() { void main() {
uint chunkID = chunkIDs[chunkIDIndex + gl_GlobalInvocationID.x]; uint chunkID = chunkIDs[chunkIDIndex + gl_GlobalInvocationID.x];
uint commandIndex = commandIndexStart + gl_GlobalInvocationID.x;
if(gl_GlobalInvocationID.x >= size) return; if(gl_GlobalInvocationID.x >= size) return;
if(isTransparent) { if(isTransparent) {
commands[commandIndex] = DrawElementsIndirectCommand( uint commandIndex = commandIndexStart + gl_GlobalInvocationID.x;
chunks[chunkID].vertexCountTransparent, commands[commandIndex] = addCommand(chunks[chunkID].vertexCountTransparent, chunks[chunkID].vertexStartTransparent, chunkID);
1,
0,
int(chunks[chunkID].vertexStartTransparent),
chunkID
);
} else { } else {
commands[commandIndex] = DrawElementsIndirectCommand( uint commandIndex = commandIndexStart + gl_GlobalInvocationID.x*4;
chunks[chunkID].vertexCountOpaque, uint commandIndexEnd = commandIndex + 4;
1, uint groupFaceOffset = 0;
0, uint groupFaceCount = 0;
int(chunks[chunkID].vertexStartOpaque), for(int i = 0; i < 7; i++) {
chunkID uint faceCount = chunks[chunkID].faceCountsByNormalOpaque[i];
); if(isVisible(i, playerPositionInteger - chunks[chunkID].position.xyz, chunks[chunkID].voxelSize) || faceCount == 0) {
groupFaceCount += faceCount;
} else {
if(groupFaceCount != 0) {
commands[commandIndex] = addCommand(6*groupFaceCount, chunks[chunkID].vertexStartOpaque + 4*groupFaceOffset, chunkID);
commandIndex += 1;
groupFaceOffset += groupFaceCount;
groupFaceCount = 0;
}
groupFaceOffset += faceCount;
}
}
if(groupFaceCount != 0) {
commands[commandIndex] = addCommand(6*groupFaceCount, chunks[chunkID].vertexStartOpaque + 4*groupFaceOffset, chunkID);
commandIndex += 1;
}
for(; commandIndex < commandIndexEnd; commandIndex++) {
commands[commandIndex] = DrawElementsIndirectCommand(0, 0, 0, 0, 0);
}
} }
} }

View File

@ -2015,7 +2015,7 @@ pub fn generateBlockTexture(blockType: u16) Texture {
.visibilityMask = 255, .visibilityMask = 255,
.voxelSize = 1, .voxelSize = 1,
.vertexStartOpaque = undefined, .vertexStartOpaque = undefined,
.vertexCountOpaque = undefined, .faceCountsByNormalOpaque = undefined,
.vertexStartTransparent = undefined, .vertexStartTransparent = undefined,
.vertexCountTransparent = undefined, .vertexCountTransparent = undefined,
}}, &chunkAllocation); }}, &chunkAllocation);

View File

@ -51,6 +51,7 @@ pub var commandUniforms: struct {
commandIndexStart: c_int, commandIndexStart: c_int,
size: c_int, size: c_int,
isTransparent: c_int, isTransparent: c_int,
playerPositionInteger: c_int,
} = undefined; } = undefined;
var vao: c_uint = undefined; var vao: c_uint = undefined;
var vbo: c_uint = undefined; var vbo: c_uint = undefined;
@ -158,16 +159,18 @@ pub fn bindTransparentShaderAndUniforms(projMatrix: Mat4f, ambient: Vec3f, playe
} }
pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec3f, playerPos: Vec3d, transparent: bool) void { pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec3f, playerPos: Vec3d, transparent: bool) void {
const drawCallsEstimate: u31 = @intCast(if(transparent) chunkIDs.len else chunkIDs.len*4);
var chunkIDAllocation: main.graphics.SubAllocation = .{.start = 0, .len = 0}; var chunkIDAllocation: main.graphics.SubAllocation = .{.start = 0, .len = 0};
chunkIDBuffer.uploadData(chunkIDs, &chunkIDAllocation); chunkIDBuffer.uploadData(chunkIDs, &chunkIDAllocation);
defer chunkIDBuffer.free(chunkIDAllocation); defer chunkIDBuffer.free(chunkIDAllocation);
const allocation = commandBuffer.rawAlloc(@intCast(chunkIDs.len)); const allocation = commandBuffer.rawAlloc(drawCallsEstimate);
defer commandBuffer.free(allocation); defer commandBuffer.free(allocation);
commandShader.bind(); commandShader.bind();
c.glUniform1ui(commandUniforms.chunkIDIndex, chunkIDAllocation.start); c.glUniform1ui(commandUniforms.chunkIDIndex, chunkIDAllocation.start);
c.glUniform1ui(commandUniforms.commandIndexStart, allocation.start); c.glUniform1ui(commandUniforms.commandIndexStart, allocation.start);
c.glUniform1ui(commandUniforms.size, @intCast(chunkIDs.len)); c.glUniform1ui(commandUniforms.size, @intCast(chunkIDs.len));
c.glUniform1i(commandUniforms.isTransparent, @intFromBool(transparent)); c.glUniform1i(commandUniforms.isTransparent, @intFromBool(transparent));
c.glUniform3i(commandUniforms.playerPositionInteger, @intFromFloat(playerPos[0]), @intFromFloat(playerPos[1]), @intFromFloat(playerPos[2]));
c.glDispatchCompute(@intCast(@divFloor(chunkIDs.len + 63, 64)), 1, 1); // TODO: Replace with @divCeil once available c.glDispatchCompute(@intCast(@divFloor(chunkIDs.len + 63, 64)), 1, 1); // TODO: Replace with @divCeil once available
c.glMemoryBarrier(c.GL_SHADER_STORAGE_BARRIER_BIT); c.glMemoryBarrier(c.GL_SHADER_STORAGE_BARRIER_BIT);
@ -177,7 +180,7 @@ pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec
bindShaderAndUniforms(projMatrix, ambient, playerPos); bindShaderAndUniforms(projMatrix, ambient, playerPos);
} }
c.glBindBuffer(c.GL_DRAW_INDIRECT_BUFFER, commandBuffer.ssbo.bufferID); c.glBindBuffer(c.GL_DRAW_INDIRECT_BUFFER, commandBuffer.ssbo.bufferID);
c.glMultiDrawElementsIndirect(c.GL_TRIANGLES, c.GL_UNSIGNED_INT, @ptrFromInt(allocation.start*@sizeOf(IndirectData)), @intCast(chunkIDs.len), 0); c.glMultiDrawElementsIndirect(c.GL_TRIANGLES, c.GL_UNSIGNED_INT, @ptrFromInt(allocation.start*@sizeOf(IndirectData)), drawCallsEstimate, 0);
} }
pub const FaceData = extern struct { pub const FaceData = extern struct {
@ -208,7 +211,7 @@ pub const ChunkData = extern struct {
visibilityMask: i32, visibilityMask: i32,
voxelSize: i32, voxelSize: i32,
vertexStartOpaque: u32, vertexStartOpaque: u32,
vertexCountOpaque: u32, faceCountsByNormalOpaque: [7]u32,
vertexStartTransparent: u32, vertexStartTransparent: u32,
vertexCountTransparent: u32, vertexCountTransparent: u32,
}; };
@ -232,6 +235,7 @@ const PrimitiveMesh = struct {
mutex: std.Thread.Mutex = .{}, mutex: std.Thread.Mutex = .{},
bufferAllocation: graphics.SubAllocation = .{.start = 0, .len = 0}, bufferAllocation: graphics.SubAllocation = .{.start = 0, .len = 0},
vertexCount: u31 = 0, vertexCount: u31 = 0,
byNormalCount: [7]u32 = .{0} ** 7,
wasChanged: bool = false, wasChanged: bool = false,
fn deinit(self: *PrimitiveMesh) void { fn deinit(self: *PrimitiveMesh) void {
@ -497,12 +501,30 @@ const PrimitiveMesh = struct {
} }
const fullBuffer = faceBuffer.allocateAndMapRange(len, &self.bufferAllocation); const fullBuffer = faceBuffer.allocateAndMapRange(len, &self.bufferAllocation);
defer faceBuffer.unmapRange(fullBuffer); defer faceBuffer.unmapRange(fullBuffer);
@memcpy(fullBuffer[0..self.coreLen], self.completeList[0..self.coreLen]); // Sort the faces by normal to allow for backface culling on the GPU:
var i: usize = self.coreLen; var i: u32 = 0;
for(0..6) |n| { var iStart = i;
@memcpy(fullBuffer[i..][0..list[n].len], list[n]); const coreList = self.completeList[0..self.coreLen];
i += list[n].len; for(0..7) |normal| {
for(coreList) |face| {
if(main.models.extraQuadInfos.items[face.blockAndQuad.quadIndex].alignedNormalDirection) |normalDir| {
if(normalDir == normal) {
fullBuffer[i] = face;
i += 1;
} }
} else if(normal == 6) {
fullBuffer[i] = face;
i += 1;
}
}
if(normal < 6) {
@memcpy(fullBuffer[i..][0..list[normal ^ 1].len], list[normal ^ 1]);
i += @intCast(list[normal ^ 1].len);
}
self.byNormalCount[normal] = i - iStart;
iStart = i;
}
std.debug.assert(i == fullBuffer.len);
self.vertexCount = @intCast(6*fullBuffer.len); self.vertexCount = @intCast(6*fullBuffer.len);
self.wasChanged = true; self.wasChanged = true;
} }
@ -1102,7 +1124,7 @@ pub const ChunkMesh = struct {
.voxelSize = self.pos.voxelSize, .voxelSize = self.pos.voxelSize,
.visibilityMask = self.visibilityMask, .visibilityMask = self.visibilityMask,
.vertexStartOpaque = self.opaqueMesh.bufferAllocation.start*4, .vertexStartOpaque = self.opaqueMesh.bufferAllocation.start*4,
.vertexCountOpaque = self.opaqueMesh.vertexCount, .faceCountsByNormalOpaque = self.opaqueMesh.byNormalCount,
.vertexStartTransparent = self.transparentMesh.bufferAllocation.start*4, .vertexStartTransparent = self.transparentMesh.bufferAllocation.start*4,
.vertexCountTransparent = self.transparentMesh.bufferAllocation.len*6, .vertexCountTransparent = self.transparentMesh.bufferAllocation.len*6,
}}, &self.chunkAllocation); }}, &self.chunkAllocation);