From 90375b871a67e896ff22400914ae0f3753369154 Mon Sep 17 00:00:00 2001 From: IntegratedQuantum <43880493+IntegratedQuantum@users.noreply.github.com> Date: Sat, 2 Aug 2025 14:42:34 +0200 Subject: [PATCH] Make reading light data thread safe without mutexes (#1727) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After failing in #1725 I decided to use a different approach at atomizing the palette compressed data, I added a new indirection which can be used to swap the entire content in one atomic operation. This should make the process itself cheaper than what I had implemented before. related: https://github.com/PixelGuys/Cubyz/issues/1471 https://github.com/PixelGuys/Cubyz/issues/1413 improves https://github.com/PixelGuys/Cubyz/issues/277 Remaining work: - [x] double check the implementation - [x] Fully remove the ReadWriteLock - [x] Check if this improved meshing performance → yes it did by 10-20% - [x] Check if this improved block update speed → yes it did by ~25% --- src/block_entity.zig | 4 - src/chunk.zig | 2 +- src/renderer/chunk_meshing.zig | 22 +- src/renderer/lighting.zig | 68 ++--- src/renderer/mesh_storage.zig | 4 - src/server/storage.zig | 24 +- .../terrain/chunkgen/TerrainGenerator.zig | 8 +- src/server/world.zig | 4 +- src/utils.zig | 256 +++++++++++------- 9 files changed, 217 insertions(+), 175 deletions(-) diff --git a/src/block_entity.zig b/src/block_entity.zig index a8471ec6..5ec5fcb3 100644 --- a/src/block_entity.zig +++ b/src/block_entity.zig @@ -475,10 +475,6 @@ pub const BlockEntityTypes = struct { c.glUniform1i(uniforms.quadIndex, @intFromEnum(quad)); const mesh = main.renderer.mesh_storage.getMesh(main.chunk.ChunkPosition.initFromWorldPos(signData.blockPos, 1)) orelse continue :outer; - mesh.lightingData[0].lock.lockRead(); - defer mesh.lightingData[0].lock.unlockRead(); - mesh.lightingData[1].lock.lockRead(); - defer mesh.lightingData[1].lock.unlockRead(); const light: [4]u32 = main.renderer.chunk_meshing.PrimitiveMesh.getLight(mesh, signData.blockPos -% Vec3i{mesh.pos.wx, mesh.pos.wy, mesh.pos.wz}, 0, quad); c.glUniform4ui(uniforms.lightData, light[0], light[1], light[2], light[3]); c.glUniform3i(uniforms.chunkPos, signData.blockPos[0] & ~main.chunk.chunkMask, signData.blockPos[1] & ~main.chunk.chunkMask, signData.blockPos[2] & ~main.chunk.chunkMask); diff --git a/src/chunk.zig b/src/chunk.zig index dc3b4a85..f1b14b38 100644 --- a/src/chunk.zig +++ b/src/chunk.zig @@ -286,7 +286,7 @@ pub const Chunk = struct { // MARK: Chunk fn deinitContent(self: *Chunk) void { std.debug.assert(self.blockPosToEntityDataMap.count() == 0); self.blockPosToEntityDataMap.deinit(main.globalAllocator.allocator); - self.data.deinit(); + self.data.deferredDeinit(); } pub fn unloadBlockEntities(self: *Chunk, comptime side: main.utils.Side) void { diff --git a/src/renderer/chunk_meshing.zig b/src/renderer/chunk_meshing.zig index e166f3fd..96c70431 100644 --- a/src/renderer/chunk_meshing.zig +++ b/src/renderer/chunk_meshing.zig @@ -381,8 +381,6 @@ pub const PrimitiveMesh = struct { // MARK: PrimitiveMesh self.max = @splat(-std.math.floatMax(f32)); self.lock.lockRead(); - parent.lightingData[0].lock.lockRead(); - parent.lightingData[1].lock.lockRead(); for(self.completeList.getEverything()) |*face| { const light = getLight(parent, .{face.position.x, face.position.y, face.position.z}, face.blockAndQuad.texture, face.blockAndQuad.quadIndex); const result = lightMap.getOrPut(light) catch unreachable; @@ -401,8 +399,6 @@ pub const PrimitiveMesh = struct { // MARK: PrimitiveMesh self.max = @max(self.max, basePos + cornerPos); } } - parent.lightingData[0].lock.unlockRead(); - parent.lightingData[1].lock.unlockRead(); self.lock.unlockRead(); } @@ -421,10 +417,6 @@ pub const PrimitiveMesh = struct { // MARK: PrimitiveMesh return getValues(parent, wx, wy, wz); } const neighborMesh = mesh_storage.getMesh(.{.wx = wx, .wy = wy, .wz = wz, .voxelSize = parent.pos.voxelSize}) orelse return .{0, 0, 0, 0, 0, 0}; - neighborMesh.lightingData[0].lock.lockRead(); - neighborMesh.lightingData[1].lock.lockRead(); - defer neighborMesh.lightingData[0].lock.unlockRead(); - defer neighborMesh.lightingData[1].lock.unlockRead(); return getValues(neighborMesh, wx, wy, wz); } @@ -807,7 +799,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh self.mutex.unlock(); self.lightingData[0].propagateLights(lightEmittingBlocks.items, true, lightRefreshList); sunLight: { - var allSun: bool = self.chunk.data.paletteLength == 1 and self.chunk.data.palette[0].typ == 0; + var allSun: bool = self.chunk.data.palette().len == 1 and self.chunk.data.palette()[0].load(.unordered).typ == 0; var sunStarters: [chunk.chunkSize*chunk.chunkSize][3]u8 = undefined; var index: usize = 0; const lightStartMap = mesh_storage.getLightMapPiece(self.pos.wx, self.pos.wy, self.pos.voxelSize) orelse break :sunLight; @@ -915,10 +907,10 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh hasInternalQuads: bool = false, alwaysViewThrough: bool = false, }; - var paletteCache = main.stackAllocator.alloc(OcclusionInfo, self.chunk.data.paletteLength); + var paletteCache = main.stackAllocator.alloc(OcclusionInfo, self.chunk.data.palette().len); defer main.stackAllocator.free(paletteCache); - for(0..self.chunk.data.paletteLength) |i| { - const block = self.chunk.data.palette[i]; + for(0..self.chunk.data.palette().len) |i| { + const block = self.chunk.data.palette()[i].load(.unordered); const model = blocks.meshes.model(block).model(); var result: OcclusionInfo = .{}; if(model.noNeighborsOccluded or block.viewThrough()) { @@ -946,7 +938,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh const y: u5 = @intCast(_y); for(0..chunk.chunkSize) |_z| { const z: u5 = @intCast(_z); - const paletteId = self.chunk.data.data.getValue(chunk.getIndex(x, y, z)); + const paletteId = self.chunk.data.impl.raw.data.getValue(chunk.getIndex(x, y, z)); const occlusionInfo = paletteCache[paletteId]; const setBit = @as(u32, 1) << z; if(occlusionInfo.alwaysViewThrough or (!occlusionInfo.canSeeAllNeighbors and occlusionInfo.canSeeNeighbor == 0)) { @@ -986,7 +978,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh const y: u5 = @intCast(_y); for(0..chunk.chunkSize) |_z| { const z: u5 = @intCast(_z); - const paletteId = self.chunk.data.data.getValue(chunk.getIndex(x, y, z)); + const paletteId = self.chunk.data.impl.raw.data.getValue(chunk.getIndex(x, y, z)); const occlusionInfo = paletteCache[paletteId]; const setBit = @as(u32, 1) << z; if(depthFilteredViewThroughMask[x][y] & setBit != 0) {} else if(occlusionInfo.canSeeAllNeighbors) { @@ -1002,7 +994,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh hasFaces[x][y] |= setBit; } if(occlusionInfo.hasInternalQuads) { - const block = self.chunk.data.palette[paletteId]; + const block = self.chunk.data.palette()[paletteId].load(.unordered); if(block.transparent()) { appendInternalQuads(block, x, y, z, false, &transparentCore, main.stackAllocator); } else { diff --git a/src/renderer/lighting.zig b/src/renderer/lighting.zig index df9ef30d..4213678c 100644 --- a/src/renderer/lighting.zig +++ b/src/renderer/lighting.zig @@ -17,6 +17,21 @@ pub fn deinit() void { memoryPool.deinit(); } +const LightValue = packed struct(u32) { + r: u8, + g: u8, + b: u8, + pad: u8 = undefined, + + fn fromArray(arr: [3]u8) LightValue { + return .{.r = arr[0], .g = arr[1], .b = arr[2]}; + } + + fn toArray(self: LightValue) [3]u8 { + return .{self.r, self.g, self.b}; + } +}; + fn extractColor(in: u32) [3]u8 { return .{ @truncate(in >> 16), @@ -26,14 +41,14 @@ fn extractColor(in: u32) [3]u8 { } pub const ChannelChunk = struct { - data: main.utils.PaletteCompressedRegion([3]u8, chunk.chunkVolume), - lock: main.utils.ReadWriteLock, + data: main.utils.PaletteCompressedRegion(LightValue, chunk.chunkVolume), + mutex: std.Thread.Mutex, ch: *chunk.Chunk, isSun: bool, pub fn init(ch: *chunk.Chunk, isSun: bool) *ChannelChunk { const self = memoryPool.create(); - self.lock = .{}; + self.mutex = .{}; self.ch = ch; self.isSun = isSun; self.data.init(); @@ -41,7 +56,7 @@ pub const ChannelChunk = struct { } pub fn deinit(self: *ChannelChunk) void { - self.data.deinit(); + self.data.deferredDeinit(); memoryPool.destroy(self); } @@ -66,9 +81,8 @@ pub const ChannelChunk = struct { }; pub fn getValue(self: *ChannelChunk, x: i32, y: i32, z: i32) [3]u8 { - self.lock.assertLockedRead(); const index = chunk.getIndex(x, y, z); - return self.data.getValue(index); + return self.data.getValue(index).toArray(); } fn calculateIncomingOcclusion(result: *[3]u8, block: blocks.Block, voxelSize: u31, neighbor: chunk.Neighbor) void { @@ -106,17 +120,17 @@ pub const ChannelChunk = struct { } } - self.lock.lockWrite(); + self.mutex.lock(); while(lightQueue.popFront()) |entry| { const index = chunk.getIndex(entry.x, entry.y, entry.z); - const oldValue: [3]u8 = self.data.getValue(index); + const oldValue: [3]u8 = self.data.getValue(index).toArray(); const newValue: [3]u8 = .{ @max(entry.value[0], oldValue[0]), @max(entry.value[1], oldValue[1]), @max(entry.value[2], oldValue[2]), }; if(newValue[0] == oldValue[0] and newValue[1] == oldValue[1] and newValue[2] == oldValue[2]) continue; - self.data.setValue(index, newValue); + self.data.setValue(index, .fromArray(newValue)); for(chunk.Neighbor.iterable) |neighbor| { if(neighbor.toInt() == entry.sourceDir) continue; const nx = entry.x + neighbor.relX(); @@ -140,7 +154,7 @@ pub const ChannelChunk = struct { } } self.data.optimizeLayout(); - self.lock.unlockWrite(); + self.mutex.unlock(); self.addSelfToLightRefreshList(lightRefreshList); for(chunk.Neighbor.iterable) |neighbor| { @@ -172,10 +186,10 @@ pub const ChannelChunk = struct { } var isFirstIteration: bool = isFirstBlock; - self.lock.lockWrite(); + self.mutex.lock(); while(lightQueue.popFront()) |entry| { const index = chunk.getIndex(entry.x, entry.y, entry.z); - const oldValue: [3]u8 = self.data.getValue(index); + const oldValue: [3]u8 = self.data.getValue(index).toArray(); var activeValue: @Vector(3, bool) = @bitCast(entry.activeValue); var append: bool = false; if(activeValue[0] and entry.value[0] != oldValue[0]) { @@ -209,7 +223,7 @@ pub const ChannelChunk = struct { if(activeValue[0]) insertValue[0] = 0; if(activeValue[1]) insertValue[1] = 0; if(activeValue[2]) insertValue[2] = 0; - self.data.setValue(index, insertValue); + self.data.setValue(index, .fromArray(insertValue)); for(chunk.Neighbor.iterable) |neighbor| { if(neighbor.toInt() == entry.sourceDir) continue; const nx = entry.x + neighbor.relX(); @@ -231,7 +245,7 @@ pub const ChannelChunk = struct { lightQueue.pushBack(result); } } - self.lock.unlockWrite(); + self.mutex.unlock(); self.addSelfToLightRefreshList(lightRefreshList); for(chunk.Neighbor.iterable) |neighbor| { @@ -307,11 +321,9 @@ pub const ChannelChunk = struct { const otherZ = z +% neighbor.relZ() & chunk.chunkMask; const neighborMesh = mesh_storage.getNeighbor(self.ch.pos, self.ch.pos.voxelSize, neighbor) orelse continue; const neighborLightChunk = neighborMesh.lightingData[@intFromBool(self.isSun)]; - neighborLightChunk.lock.lockRead(); - defer neighborLightChunk.lock.unlockRead(); const index = chunk.getIndex(x, y, z); const neighborIndex = chunk.getIndex(otherX, otherY, otherZ); - var value: [3]u8 = neighborLightChunk.data.getValue(neighborIndex); + var value: [3]u8 = neighborLightChunk.data.getValue(neighborIndex).toArray(); if(!self.isSun or neighbor != .dirUp or value[0] != 255 or value[1] != 255 or value[2] != 255) { value[0] -|= 8*|@as(u8, @intCast(self.ch.pos.voxelSize)); value[1] -|= 8*|@as(u8, @intCast(self.ch.pos.voxelSize)); @@ -330,13 +342,9 @@ pub const ChannelChunk = struct { pub fn propagateUniformSun(self: *ChannelChunk, lightRefreshList: *main.List(chunk.ChunkPosition)) void { std.debug.assert(self.isSun); - self.lock.lockWrite(); - if(self.data.paletteLength != 1) { - self.data.deinit(); - self.data.init(); - } - self.data.palette[0] = .{255, 255, 255}; - self.lock.unlockWrite(); + self.mutex.lock(); + self.data.fillUniform(.fromArray(.{255, 255, 255})); + self.mutex.unlock(); const val = 255 -| 8*|@as(u8, @intCast(self.ch.pos.voxelSize)); var lightQueue = main.utils.CircularBufferQueue(Entry).init(main.stackAllocator, 1 << 12); defer lightQueue.deinit(); @@ -378,12 +386,10 @@ pub const ChannelChunk = struct { pub fn propagateLightsDestructive(self: *ChannelChunk, lights: []const [3]u8, lightRefreshList: *main.List(chunk.ChunkPosition)) void { var lightQueue = main.utils.CircularBufferQueue(Entry).init(main.stackAllocator, 1 << 12); defer lightQueue.deinit(); - self.lock.lockRead(); for(lights) |pos| { const index = chunk.getIndex(pos[0], pos[1], pos[2]); - lightQueue.pushBack(.{.x = @intCast(pos[0]), .y = @intCast(pos[1]), .z = @intCast(pos[2]), .value = self.data.getValue(index), .sourceDir = 6, .activeValue = 0b111}); + lightQueue.pushBack(.{.x = @intCast(pos[0]), .y = @intCast(pos[1]), .z = @intCast(pos[2]), .value = self.data.getValue(index).toArray(), .sourceDir = 6, .activeValue = 0b111}); } - self.lock.unlockRead(); var constructiveEntries: main.ListUnmanaged(ChunkEntries) = .{}; defer constructiveEntries.deinit(main.stackAllocator); constructiveEntries.append(main.stackAllocator, .{ @@ -395,10 +401,10 @@ pub const ChannelChunk = struct { var entryList = entries.entries; defer entryList.deinit(main.stackAllocator); const channelChunk = if(mesh) |_mesh| _mesh.lightingData[@intFromBool(self.isSun)] else self; - channelChunk.lock.lockWrite(); + channelChunk.mutex.lock(); for(entryList.items) |entry| { const index = chunk.getIndex(entry.x, entry.y, entry.z); - var value = channelChunk.data.getValue(index); + var value = channelChunk.data.getValue(index).toArray(); const light = if(self.isSun) .{0, 0, 0} else extractColor(channelChunk.ch.data.getValue(index).light()); value = .{ @max(value[0], light[0]), @@ -406,10 +412,10 @@ pub const ChannelChunk = struct { @max(value[2], light[2]), }; if(value[0] == 0 and value[1] == 0 and value[2] == 0) continue; - channelChunk.data.setValue(index, .{0, 0, 0}); + channelChunk.data.setValue(index, .fromArray(.{0, 0, 0})); lightQueue.pushBack(.{.x = entry.x, .y = entry.y, .z = entry.z, .value = value, .sourceDir = 6, .activeValue = 0b111}); } - channelChunk.lock.unlockWrite(); + channelChunk.mutex.unlock(); channelChunk.propagateDirect(&lightQueue, lightRefreshList); } } diff --git a/src/renderer/mesh_storage.zig b/src/renderer/mesh_storage.zig index 98c40dc1..b061fdff 100644 --- a/src/renderer/mesh_storage.zig +++ b/src/renderer/mesh_storage.zig @@ -197,10 +197,6 @@ pub fn getLight(wx: i32, wy: i32, wz: i32) ?[6]u8 { const x = (wx >> mesh.chunk.voxelSizeShift) & chunk.chunkMask; const y = (wy >> mesh.chunk.voxelSizeShift) & chunk.chunkMask; const z = (wz >> mesh.chunk.voxelSizeShift) & chunk.chunkMask; - mesh.lightingData[0].lock.lockRead(); - defer mesh.lightingData[0].lock.unlockRead(); - mesh.lightingData[1].lock.lockRead(); - defer mesh.lightingData[1].lock.unlockRead(); return mesh.lightingData[1].getValue(x, y, z) ++ mesh.lightingData[0].getValue(x, y, z); } diff --git a/src/server/storage.zig b/src/server/storage.zig index bcba8115..927dce50 100644 --- a/src/server/storage.zig +++ b/src/server/storage.zig @@ -282,18 +282,18 @@ pub const ChunkCompression = struct { // MARK: ChunkCompression } fn compressBlockData(ch: *chunk.Chunk, allowLossy: bool, writer: *BinaryWriter) void { - if(ch.data.paletteLength == 1) { + if(ch.data.palette().len == 1) { writer.writeEnum(ChunkCompressionAlgo, .uniform); - writer.writeInt(u32, ch.data.palette[0].toInt()); + writer.writeInt(u32, ch.data.palette()[0].load(.unordered).toInt()); return; } - if(ch.data.paletteLength < 256) { + if(ch.data.palette().len < 256) { var uncompressedData: [chunk.chunkVolume]u8 = undefined; var solidMask: [chunk.chunkSize*chunk.chunkSize]u32 = undefined; for(0..chunk.chunkVolume) |i| { - uncompressedData[i] = @intCast(ch.data.data.getValue(i)); + uncompressedData[i] = @intCast(ch.data.impl.raw.data.getValue(i)); if(allowLossy) { - const block = ch.data.palette[uncompressedData[i]]; + const block = ch.data.palette()[uncompressedData[i]].load(.unordered); const model = main.blocks.meshes.model(block).model(); const occluder = model.allNeighborsOccluded and !block.viewThrough(); if(occluder) { @@ -323,10 +323,10 @@ pub const ChunkCompression = struct { // MARK: ChunkCompression defer main.stackAllocator.free(compressedData); writer.writeEnum(ChunkCompressionAlgo, .deflate_with_8bit_palette); - writer.writeInt(u8, @intCast(ch.data.paletteLength)); + writer.writeInt(u8, @intCast(ch.data.palette().len)); - for(0..ch.data.paletteLength) |i| { - writer.writeInt(u32, ch.data.palette[i].toInt()); + for(0..ch.data.palette().len) |i| { + writer.writeInt(u32, ch.data.palette()[i].load(.unordered).toInt()); } writer.writeVarInt(usize, compressedData.len); writer.writeSlice(compressedData); @@ -347,7 +347,7 @@ pub const ChunkCompression = struct { // MARK: ChunkCompression } fn decompressBlockData(ch: *chunk.Chunk, reader: *BinaryReader) !void { - std.debug.assert(ch.data.paletteLength == 1); + std.debug.assert(ch.data.palette().len == 1); const compressionAlgorithm = try reader.readEnum(ChunkCompressionAlgo); @@ -371,11 +371,11 @@ pub const ChunkCompression = struct { // MARK: ChunkCompression .deflate_with_8bit_palette, .deflate_with_8bit_palette_no_block_entities => { const paletteLength = try reader.readInt(u8); - ch.data.deinit(); + ch.data.deferredDeinit(); ch.data.initCapacity(paletteLength); for(0..paletteLength) |i| { - ch.data.palette[i] = main.blocks.Block.fromInt(try reader.readInt(u32)); + ch.data.palette()[i] = .init(main.blocks.Block.fromInt(try reader.readInt(u32))); } const decompressedData = main.stackAllocator.alloc(u8, chunk.chunkVolume); @@ -392,7 +392,7 @@ pub const ChunkCompression = struct { // MARK: ChunkCompression } }, .uniform => { - ch.data.palette[0] = main.blocks.Block.fromInt(try reader.readInt(u32)); + ch.data.palette()[0] = .init(main.blocks.Block.fromInt(try reader.readInt(u32))); }, } } diff --git a/src/server/terrain/chunkgen/TerrainGenerator.zig b/src/server/terrain/chunkgen/TerrainGenerator.zig index a32366cc..66077ad7 100644 --- a/src/server/terrain/chunkgen/TerrainGenerator.zig +++ b/src/server/terrain/chunkgen/TerrainGenerator.zig @@ -45,15 +45,11 @@ pub fn generate(worldSeed: u64, chunk: *main.chunk.ServerChunk, caveMap: CaveMap } } if(minHeight > chunk.super.pos.wz +| chunk.super.width) { - chunk.super.data.deinit(); - chunk.super.data.init(); - chunk.super.data.palette[0] = stone; + chunk.super.data.fillUniform(stone); return; } if(maxHeight < chunk.super.pos.wz) { - chunk.super.data.deinit(); - chunk.super.data.init(); - chunk.super.data.palette[0] = air; + chunk.super.data.fillUniform(air); return; } } diff --git a/src/server/world.zig b/src/server/world.zig index 3dba427f..0ad93aac 100644 --- a/src/server/world.zig +++ b/src/server/world.zig @@ -328,8 +328,8 @@ const ChunkManager = struct { // MARK: ChunkManager generator.generate(server.world.?.seed ^ generator.generatorSeed, ch, caveMap, biomeMap); } if(pos.voxelSize != 1) { // Generate LOD replacements - for(ch.super.data.palette[0..ch.super.data.paletteLength]) |*block| { - block.typ = block.lodReplacement(); + for(ch.super.data.palette()) |*block| { + block.store(.{.typ = block.load(.unordered).lodReplacement(), .data = block.load(.unordered).data}, .unordered); } } return ch; diff --git a/src/utils.zig b/src/utils.zig index 2b171342..9d287530 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -989,7 +989,7 @@ pub fn deinitDynamicIntArrayStorage() void { pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIntArray std.debug.assert(std.math.isPowerOfTwo(size)); return struct { - data: []align(64) u32 = &.{}, + data: []align(64) Atomic(u32) = &.{}, bitSize: u5 = 0, const Self = @This(); @@ -997,12 +997,12 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn pub fn initCapacity(bitSize: u5) Self { std.debug.assert(bitSize == 0 or bitSize & bitSize - 1 == 0); // Must be a power of 2 return .{ - .data = dynamicIntArrayAllocator.allocator().alignedAlloc(u32, .@"64", @as(usize, @divExact(size, @bitSizeOf(u32)))*bitSize), + .data = dynamicIntArrayAllocator.allocator().alignedAlloc(Atomic(u32), .@"64", @as(usize, @divExact(size, @bitSizeOf(u32)))*bitSize), .bitSize = bitSize, }; } - pub fn deinit(self: *Self) void { + fn deinit(self: *Self) void { dynamicIntArrayAllocator.allocator().free(self.data); self.* = .{}; } @@ -1016,23 +1016,21 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn return result; } - pub fn resizeOnce(self: *Self) void { - const newBitSize = if(self.bitSize != 0) self.bitSize*2 else 1; - var newSelf = Self.initCapacity(newBitSize); + pub fn resizeOnceFrom(self: *Self, other: *const Self) void { + const newBitSize = if(other.bitSize != 0) other.bitSize*2 else 1; + std.debug.assert(self.bitSize == newBitSize); - switch(self.bitSize) { - 0 => @memset(newSelf.data, 0), + switch(other.bitSize) { + 0 => @memset(self.data, .init(0)), inline 1, 2, 4, 8 => |bits| { - for(0..self.data.len) |i| { - const oldVal = self.data[i]; - newSelf.data[2*i] = bitInterleave(bits, oldVal & 0xffff); - newSelf.data[2*i + 1] = bitInterleave(bits, oldVal >> 16); + for(0..other.data.len) |i| { + const oldVal = other.data[i].load(.unordered); + self.data[2*i].store(bitInterleave(bits, oldVal & 0xffff), .unordered); + self.data[2*i + 1].store(bitInterleave(bits, oldVal >> 16), .unordered); } }, else => unreachable, } - dynamicIntArrayAllocator.allocator().free(self.data); - self.* = newSelf; } pub fn getValue(self: *const Self, i: usize) u32 { @@ -1042,7 +1040,7 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn const intIndex = bitIndex >> 5; const bitOffset: u5 = @intCast(bitIndex & 31); const bitMask = (@as(u32, 1) << self.bitSize) - 1; - return self.data[intIndex] >> bitOffset & bitMask; + return self.data[intIndex].load(.unordered) >> bitOffset & bitMask; } pub fn setValue(self: *Self, i: usize, value: u32) void { @@ -1053,9 +1051,9 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn const bitOffset: u5 = @intCast(bitIndex & 31); const bitMask = (@as(u32, 1) << self.bitSize) - 1; std.debug.assert(value <= bitMask); - const ptr: *u32 = &self.data[intIndex]; - ptr.* &= ~(bitMask << bitOffset); - ptr.* |= value << bitOffset; + const ptr: *Atomic(u32) = &self.data[intIndex]; + const newValue = (ptr.load(.unordered) & ~(bitMask << bitOffset)) | value << bitOffset; + ptr.store(newValue, .unordered); } pub fn setAndGetValue(self: *Self, i: usize, value: u32) u32 { @@ -1066,45 +1064,57 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn const bitOffset: u5 = @intCast(bitIndex & 31); const bitMask = (@as(u32, 1) << self.bitSize) - 1; std.debug.assert(value <= bitMask); - const ptr: *u32 = &self.data[intIndex]; - const result = ptr.* >> bitOffset & bitMask; - ptr.* &= ~(bitMask << bitOffset); - ptr.* |= value << bitOffset; + const ptr: *Atomic(u32) = &self.data[intIndex]; + const oldValue = ptr.load(.unordered); + const result = oldValue >> bitOffset & bitMask; + const newValue = (oldValue & ~(bitMask << bitOffset)) | value << bitOffset; + ptr.store(newValue, .unordered); return result; } }; } pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: PaletteCompressedRegion - return struct { + const Impl = struct { data: DynamicPackedIntArray(size) = .{}, - palette: []T, + palette: []Atomic(T), paletteOccupancy: []u32, paletteLength: u32, activePaletteEntries: u32, - + }; + return struct { + impl: Atomic(*Impl), const Self = @This(); pub fn init(self: *Self) void { + const impl = main.globalAllocator.create(Impl); self.* = .{ - .palette = main.globalAllocator.alloc(T, 1), + .impl = .init(impl), + }; + impl.* = .{ + .palette = main.globalAllocator.alloc(Atomic(T), 1), .paletteOccupancy = main.globalAllocator.alloc(u32, 1), .paletteLength = 1, .activePaletteEntries = 1, }; - self.palette[0] = std.mem.zeroes(T); - self.paletteOccupancy[0] = size; + impl.palette[0] = .init(std.mem.zeroes(T)); + impl.paletteOccupancy[0] = size; } pub fn initCopy(self: *Self, template: *const Self) void { - const dataDupe = DynamicPackedIntArray(size).initCapacity(template.data.bitSize); - @memcpy(dataDupe.data, template.data.data); + const impl = main.globalAllocator.create(Impl); + const templateImpl = template.impl.load(.acquire); + const dataDupe = DynamicPackedIntArray(size).initCapacity(templateImpl.data.bitSize); + @memcpy(dataDupe.data, templateImpl.data.data); self.* = .{ + .impl = .init(impl), + }; + impl.* = .{ .data = dataDupe, - .palette = main.globalAllocator.dupe(T, template.palette), - .paletteOccupancy = main.globalAllocator.dupe(u32, template.paletteOccupancy), - .paletteLength = template.paletteLength, - .activePaletteEntries = template.activePaletteEntries, + .palette = main.globalAllocator.dupe(Atomic(T), templateImpl.palette), + .paletteOccupancy = main.globalAllocator.dupe(u32, templateImpl.paletteOccupancy), + .paletteLength = templateImpl.paletteLength, + .activePaletteEntries = templateImpl.activePaletteEntries, }; } @@ -1112,21 +1122,32 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale std.debug.assert(paletteLength < 0x80000000 and paletteLength > 0); const bitSize: u5 = getTargetBitSize(paletteLength); const bufferLength = @as(u32, 1) << bitSize; + const impl = main.globalAllocator.create(Impl); self.* = .{ + .impl = .init(impl), + }; + impl.* = .{ .data = DynamicPackedIntArray(size).initCapacity(bitSize), - .palette = main.globalAllocator.alloc(T, bufferLength), + .palette = main.globalAllocator.alloc(Atomic(T), bufferLength), .paletteOccupancy = main.globalAllocator.alloc(u32, bufferLength), .paletteLength = paletteLength, .activePaletteEntries = 1, }; - self.palette[0] = std.mem.zeroes(T); - self.paletteOccupancy[0] = size; + impl.palette[0] = .init(std.mem.zeroes(T)); + impl.paletteOccupancy[0] = size; + @memset(impl.paletteOccupancy[1..], 0); + @memset(impl.data.data, .init(0)); } - pub fn deinit(self: *Self) void { - self.data.deinit(); - main.globalAllocator.free(self.palette); - main.globalAllocator.free(self.paletteOccupancy); + fn privateDeinit(impl: *Impl, _: usize) void { + impl.data.deinit(); + main.globalAllocator.free(impl.palette); + main.globalAllocator.free(impl.paletteOccupancy); + main.globalAllocator.destroy(impl); + } + + pub fn deferredDeinit(self: *Self) void { + main.heap.GarbageCollection.deferredFree(.{.ptr = self.impl.raw, .freeFunction = main.utils.castFunctionSelfToAnyopaque(privateDeinit)}); } fn getTargetBitSize(paletteLength: u32) u5 { @@ -1137,57 +1158,87 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale } pub fn getValue(self: *const Self, i: usize) T { - return self.palette[self.data.getValue(i)]; + const impl = self.impl.load(.acquire); + return impl.palette[impl.data.getValue(i)].load(.unordered); + } + + pub fn palette(self: *const Self) []Atomic(T) { + const impl = self.impl.raw; + return impl.palette[0..impl.paletteLength]; + } + + pub fn fillUniform(self: *Self, value: T) void { + const impl = self.impl.raw; + if(impl.paletteLength == 1) { + impl.palette[0].store(value, .unordered); + return; + } + var newSelf: Self = undefined; + newSelf.init(); + newSelf.impl.raw.palette[0] = .init(value); + newSelf.impl.raw = self.impl.swap(newSelf.impl.raw, .release); + newSelf.deferredDeinit(); } fn getOrInsertPaletteIndex(noalias self: *Self, val: T) u32 { - std.debug.assert(self.paletteLength <= self.palette.len); + var impl = self.impl.raw; + std.debug.assert(impl.paletteLength <= impl.palette.len); var paletteIndex: u32 = 0; - while(paletteIndex < self.paletteLength) : (paletteIndex += 1) { // TODO: There got to be a faster way to do this. Either using SIMD or using a cache or hashmap. - if(std.meta.eql(self.palette[paletteIndex], val)) { + while(paletteIndex < impl.paletteLength) : (paletteIndex += 1) { + if(std.meta.eql(impl.palette[paletteIndex].load(.unordered), val)) { break; } } - if(paletteIndex == self.paletteLength) { - if(self.paletteLength == self.palette.len) { - self.data.resizeOnce(); - self.palette = main.globalAllocator.realloc(self.palette, @as(usize, 1) << self.data.bitSize); - const oldLen = self.paletteOccupancy.len; - self.paletteOccupancy = main.globalAllocator.realloc(self.paletteOccupancy, @as(usize, 1) << self.data.bitSize); - @memset(self.paletteOccupancy[oldLen..], 0); + if(paletteIndex == impl.paletteLength) { + if(impl.paletteLength == impl.palette.len) { + var newSelf: Self = undefined; + newSelf.initCapacity(impl.paletteLength*2); + const newImpl = newSelf.impl.raw; + // TODO: Resize stuff + newImpl.data.resizeOnceFrom(&impl.data); + @memcpy(newImpl.palette[0..impl.palette.len], impl.palette); + @memcpy(newImpl.paletteOccupancy[0..impl.paletteOccupancy.len], impl.paletteOccupancy); + @memset(newImpl.paletteOccupancy[impl.paletteOccupancy.len..], 0); + newImpl.activePaletteEntries = impl.activePaletteEntries; + newImpl.paletteLength = impl.paletteLength; + newSelf.impl.raw = self.impl.swap(newImpl, .release); + newSelf.deferredDeinit(); + impl = newImpl; } - self.palette[paletteIndex] = val; - self.paletteLength += 1; - std.debug.assert(self.paletteLength <= self.palette.len); + impl.palette[paletteIndex].store(val, .unordered); + impl.paletteLength += 1; + std.debug.assert(impl.paletteLength <= impl.palette.len); } return paletteIndex; } pub fn setRawValue(noalias self: *Self, i: usize, paletteIndex: u32) void { - const previousPaletteIndex = self.data.setAndGetValue(i, paletteIndex); + const impl = self.impl.raw; + const previousPaletteIndex = impl.data.setAndGetValue(i, paletteIndex); if(previousPaletteIndex != paletteIndex) { - if(self.paletteOccupancy[paletteIndex] == 0) { - self.activePaletteEntries += 1; + if(impl.paletteOccupancy[paletteIndex] == 0) { + impl.activePaletteEntries += 1; } - self.paletteOccupancy[paletteIndex] += 1; - self.paletteOccupancy[previousPaletteIndex] -= 1; - if(self.paletteOccupancy[previousPaletteIndex] == 0) { - self.activePaletteEntries -= 1; + impl.paletteOccupancy[paletteIndex] += 1; + impl.paletteOccupancy[previousPaletteIndex] -= 1; + if(impl.paletteOccupancy[previousPaletteIndex] == 0) { + impl.activePaletteEntries -= 1; } } } pub fn setValue(noalias self: *Self, i: usize, val: T) void { const paletteIndex = self.getOrInsertPaletteIndex(val); - const previousPaletteIndex = self.data.setAndGetValue(i, paletteIndex); + const impl = self.impl.raw; + const previousPaletteIndex = impl.data.setAndGetValue(i, paletteIndex); if(previousPaletteIndex != paletteIndex) { - if(self.paletteOccupancy[paletteIndex] == 0) { - self.activePaletteEntries += 1; + if(impl.paletteOccupancy[paletteIndex] == 0) { + impl.activePaletteEntries += 1; } - self.paletteOccupancy[paletteIndex] += 1; - self.paletteOccupancy[previousPaletteIndex] -= 1; - if(self.paletteOccupancy[previousPaletteIndex] == 0) { - self.activePaletteEntries -= 1; + impl.paletteOccupancy[paletteIndex] += 1; + impl.paletteOccupancy[previousPaletteIndex] -= 1; + if(impl.paletteOccupancy[previousPaletteIndex] == 0) { + impl.activePaletteEntries -= 1; } } } @@ -1195,52 +1246,57 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale pub fn setValueInColumn(noalias self: *Self, startIndex: usize, endIndex: usize, val: T) void { std.debug.assert(startIndex < endIndex); const paletteIndex = self.getOrInsertPaletteIndex(val); + const impl = self.impl.raw; for(startIndex..endIndex) |i| { - const previousPaletteIndex = self.data.setAndGetValue(i, paletteIndex); - self.paletteOccupancy[previousPaletteIndex] -= 1; - if(self.paletteOccupancy[previousPaletteIndex] == 0) { - self.activePaletteEntries -= 1; + const previousPaletteIndex = impl.data.setAndGetValue(i, paletteIndex); + impl.paletteOccupancy[previousPaletteIndex] -= 1; + if(impl.paletteOccupancy[previousPaletteIndex] == 0) { + impl.activePaletteEntries -= 1; } } - if(self.paletteOccupancy[paletteIndex] == 0) { - self.activePaletteEntries += 1; + if(impl.paletteOccupancy[paletteIndex] == 0) { + impl.activePaletteEntries += 1; } - self.paletteOccupancy[paletteIndex] += @intCast(endIndex - startIndex); + impl.paletteOccupancy[paletteIndex] += @intCast(endIndex - startIndex); } pub fn optimizeLayout(self: *Self) void { - const newBitSize = getTargetBitSize(@intCast(self.activePaletteEntries)); - if(self.data.bitSize == newBitSize) return; + const impl = self.impl.raw; + const newBitSize = getTargetBitSize(@intCast(impl.activePaletteEntries)); + if(impl.data.bitSize == newBitSize) return; - var newData = main.utils.DynamicPackedIntArray(size).initCapacity(newBitSize); - const paletteMap: []u32 = main.stackAllocator.alloc(u32, self.paletteLength); + var newSelf: Self = undefined; + newSelf.initCapacity(impl.activePaletteEntries); + const newImpl = newSelf.impl.raw; + const paletteMap: []u32 = main.stackAllocator.alloc(u32, impl.paletteLength); defer main.stackAllocator.free(paletteMap); { - var i: u32 = 0; - var len: u32 = self.paletteLength; - while(i < len) : (i += 1) outer: { - paletteMap[i] = i; - if(self.paletteOccupancy[i] == 0) { - while(true) { - len -= 1; - if(self.paletteOccupancy[len] != 0) break; - if(len == i) break :outer; - } - paletteMap[len] = i; - self.palette[i] = self.palette[len]; - self.paletteOccupancy[i] = self.paletteOccupancy[len]; - self.paletteOccupancy[len] = 0; + var iNew: u32 = 0; + var iOld: u32 = 0; + const len: u32 = impl.paletteLength; + while(iOld < len) : ({ + iNew += 1; + iOld += 1; + }) outer: { + while(impl.paletteOccupancy[iOld] == 0) { + iOld += 1; + if(iOld >= len) break :outer; } + if(iNew >= impl.activePaletteEntries) std.log.err("{} {}", .{iNew, impl.activePaletteEntries}); + std.debug.assert(iNew < impl.activePaletteEntries); + std.debug.assert(iOld < impl.paletteLength); + paletteMap[iOld] = iNew; + newImpl.palette[iNew] = .init(impl.palette[iOld].load(.unordered)); + newImpl.paletteOccupancy[iNew] = impl.paletteOccupancy[iOld]; } } for(0..size) |i| { - newData.setValue(i, paletteMap[self.data.getValue(i)]); + newImpl.data.setValue(i, paletteMap[impl.data.getValue(i)]); } - self.data.deinit(); - self.data = newData; - self.paletteLength = self.activePaletteEntries; - self.palette = main.globalAllocator.realloc(self.palette, @as(usize, 1) << self.data.bitSize); - self.paletteOccupancy = main.globalAllocator.realloc(self.paletteOccupancy, @as(usize, 1) << self.data.bitSize); + newImpl.paletteLength = impl.activePaletteEntries; + newImpl.activePaletteEntries = impl.activePaletteEntries; + newSelf.impl.raw = self.impl.swap(newSelf.impl.raw, .release); + newSelf.deferredDeinit(); } }; }