Make reading light data thread safe without mutexes (#1727)

After failing in #1725 I decided to use a different approach at
atomizing the palette compressed data, I added a new indirection which
can be used to swap the entire content in one atomic operation. This
should make the process itself cheaper than what I had implemented
before.

related: https://github.com/PixelGuys/Cubyz/issues/1471
https://github.com/PixelGuys/Cubyz/issues/1413

improves https://github.com/PixelGuys/Cubyz/issues/277

Remaining work:
- [x] double check the implementation
- [x] Fully remove the ReadWriteLock
- [x] Check if this improved meshing performance → yes it did by 10-20%
- [x] Check if this improved block update speed → yes it did by ~25%
This commit is contained in:
IntegratedQuantum 2025-08-02 14:42:34 +02:00 committed by GitHub
parent 047e29fe72
commit 90375b871a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 217 additions and 175 deletions

View File

@ -475,10 +475,6 @@ pub const BlockEntityTypes = struct {
c.glUniform1i(uniforms.quadIndex, @intFromEnum(quad));
const mesh = main.renderer.mesh_storage.getMesh(main.chunk.ChunkPosition.initFromWorldPos(signData.blockPos, 1)) orelse continue :outer;
mesh.lightingData[0].lock.lockRead();
defer mesh.lightingData[0].lock.unlockRead();
mesh.lightingData[1].lock.lockRead();
defer mesh.lightingData[1].lock.unlockRead();
const light: [4]u32 = main.renderer.chunk_meshing.PrimitiveMesh.getLight(mesh, signData.blockPos -% Vec3i{mesh.pos.wx, mesh.pos.wy, mesh.pos.wz}, 0, quad);
c.glUniform4ui(uniforms.lightData, light[0], light[1], light[2], light[3]);
c.glUniform3i(uniforms.chunkPos, signData.blockPos[0] & ~main.chunk.chunkMask, signData.blockPos[1] & ~main.chunk.chunkMask, signData.blockPos[2] & ~main.chunk.chunkMask);

View File

@ -286,7 +286,7 @@ pub const Chunk = struct { // MARK: Chunk
fn deinitContent(self: *Chunk) void {
std.debug.assert(self.blockPosToEntityDataMap.count() == 0);
self.blockPosToEntityDataMap.deinit(main.globalAllocator.allocator);
self.data.deinit();
self.data.deferredDeinit();
}
pub fn unloadBlockEntities(self: *Chunk, comptime side: main.utils.Side) void {

View File

@ -381,8 +381,6 @@ pub const PrimitiveMesh = struct { // MARK: PrimitiveMesh
self.max = @splat(-std.math.floatMax(f32));
self.lock.lockRead();
parent.lightingData[0].lock.lockRead();
parent.lightingData[1].lock.lockRead();
for(self.completeList.getEverything()) |*face| {
const light = getLight(parent, .{face.position.x, face.position.y, face.position.z}, face.blockAndQuad.texture, face.blockAndQuad.quadIndex);
const result = lightMap.getOrPut(light) catch unreachable;
@ -401,8 +399,6 @@ pub const PrimitiveMesh = struct { // MARK: PrimitiveMesh
self.max = @max(self.max, basePos + cornerPos);
}
}
parent.lightingData[0].lock.unlockRead();
parent.lightingData[1].lock.unlockRead();
self.lock.unlockRead();
}
@ -421,10 +417,6 @@ pub const PrimitiveMesh = struct { // MARK: PrimitiveMesh
return getValues(parent, wx, wy, wz);
}
const neighborMesh = mesh_storage.getMesh(.{.wx = wx, .wy = wy, .wz = wz, .voxelSize = parent.pos.voxelSize}) orelse return .{0, 0, 0, 0, 0, 0};
neighborMesh.lightingData[0].lock.lockRead();
neighborMesh.lightingData[1].lock.lockRead();
defer neighborMesh.lightingData[0].lock.unlockRead();
defer neighborMesh.lightingData[1].lock.unlockRead();
return getValues(neighborMesh, wx, wy, wz);
}
@ -807,7 +799,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
self.mutex.unlock();
self.lightingData[0].propagateLights(lightEmittingBlocks.items, true, lightRefreshList);
sunLight: {
var allSun: bool = self.chunk.data.paletteLength == 1 and self.chunk.data.palette[0].typ == 0;
var allSun: bool = self.chunk.data.palette().len == 1 and self.chunk.data.palette()[0].load(.unordered).typ == 0;
var sunStarters: [chunk.chunkSize*chunk.chunkSize][3]u8 = undefined;
var index: usize = 0;
const lightStartMap = mesh_storage.getLightMapPiece(self.pos.wx, self.pos.wy, self.pos.voxelSize) orelse break :sunLight;
@ -915,10 +907,10 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
hasInternalQuads: bool = false,
alwaysViewThrough: bool = false,
};
var paletteCache = main.stackAllocator.alloc(OcclusionInfo, self.chunk.data.paletteLength);
var paletteCache = main.stackAllocator.alloc(OcclusionInfo, self.chunk.data.palette().len);
defer main.stackAllocator.free(paletteCache);
for(0..self.chunk.data.paletteLength) |i| {
const block = self.chunk.data.palette[i];
for(0..self.chunk.data.palette().len) |i| {
const block = self.chunk.data.palette()[i].load(.unordered);
const model = blocks.meshes.model(block).model();
var result: OcclusionInfo = .{};
if(model.noNeighborsOccluded or block.viewThrough()) {
@ -946,7 +938,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
const y: u5 = @intCast(_y);
for(0..chunk.chunkSize) |_z| {
const z: u5 = @intCast(_z);
const paletteId = self.chunk.data.data.getValue(chunk.getIndex(x, y, z));
const paletteId = self.chunk.data.impl.raw.data.getValue(chunk.getIndex(x, y, z));
const occlusionInfo = paletteCache[paletteId];
const setBit = @as(u32, 1) << z;
if(occlusionInfo.alwaysViewThrough or (!occlusionInfo.canSeeAllNeighbors and occlusionInfo.canSeeNeighbor == 0)) {
@ -986,7 +978,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
const y: u5 = @intCast(_y);
for(0..chunk.chunkSize) |_z| {
const z: u5 = @intCast(_z);
const paletteId = self.chunk.data.data.getValue(chunk.getIndex(x, y, z));
const paletteId = self.chunk.data.impl.raw.data.getValue(chunk.getIndex(x, y, z));
const occlusionInfo = paletteCache[paletteId];
const setBit = @as(u32, 1) << z;
if(depthFilteredViewThroughMask[x][y] & setBit != 0) {} else if(occlusionInfo.canSeeAllNeighbors) {
@ -1002,7 +994,7 @@ pub const ChunkMesh = struct { // MARK: ChunkMesh
hasFaces[x][y] |= setBit;
}
if(occlusionInfo.hasInternalQuads) {
const block = self.chunk.data.palette[paletteId];
const block = self.chunk.data.palette()[paletteId].load(.unordered);
if(block.transparent()) {
appendInternalQuads(block, x, y, z, false, &transparentCore, main.stackAllocator);
} else {

View File

@ -17,6 +17,21 @@ pub fn deinit() void {
memoryPool.deinit();
}
const LightValue = packed struct(u32) {
r: u8,
g: u8,
b: u8,
pad: u8 = undefined,
fn fromArray(arr: [3]u8) LightValue {
return .{.r = arr[0], .g = arr[1], .b = arr[2]};
}
fn toArray(self: LightValue) [3]u8 {
return .{self.r, self.g, self.b};
}
};
fn extractColor(in: u32) [3]u8 {
return .{
@truncate(in >> 16),
@ -26,14 +41,14 @@ fn extractColor(in: u32) [3]u8 {
}
pub const ChannelChunk = struct {
data: main.utils.PaletteCompressedRegion([3]u8, chunk.chunkVolume),
lock: main.utils.ReadWriteLock,
data: main.utils.PaletteCompressedRegion(LightValue, chunk.chunkVolume),
mutex: std.Thread.Mutex,
ch: *chunk.Chunk,
isSun: bool,
pub fn init(ch: *chunk.Chunk, isSun: bool) *ChannelChunk {
const self = memoryPool.create();
self.lock = .{};
self.mutex = .{};
self.ch = ch;
self.isSun = isSun;
self.data.init();
@ -41,7 +56,7 @@ pub const ChannelChunk = struct {
}
pub fn deinit(self: *ChannelChunk) void {
self.data.deinit();
self.data.deferredDeinit();
memoryPool.destroy(self);
}
@ -66,9 +81,8 @@ pub const ChannelChunk = struct {
};
pub fn getValue(self: *ChannelChunk, x: i32, y: i32, z: i32) [3]u8 {
self.lock.assertLockedRead();
const index = chunk.getIndex(x, y, z);
return self.data.getValue(index);
return self.data.getValue(index).toArray();
}
fn calculateIncomingOcclusion(result: *[3]u8, block: blocks.Block, voxelSize: u31, neighbor: chunk.Neighbor) void {
@ -106,17 +120,17 @@ pub const ChannelChunk = struct {
}
}
self.lock.lockWrite();
self.mutex.lock();
while(lightQueue.popFront()) |entry| {
const index = chunk.getIndex(entry.x, entry.y, entry.z);
const oldValue: [3]u8 = self.data.getValue(index);
const oldValue: [3]u8 = self.data.getValue(index).toArray();
const newValue: [3]u8 = .{
@max(entry.value[0], oldValue[0]),
@max(entry.value[1], oldValue[1]),
@max(entry.value[2], oldValue[2]),
};
if(newValue[0] == oldValue[0] and newValue[1] == oldValue[1] and newValue[2] == oldValue[2]) continue;
self.data.setValue(index, newValue);
self.data.setValue(index, .fromArray(newValue));
for(chunk.Neighbor.iterable) |neighbor| {
if(neighbor.toInt() == entry.sourceDir) continue;
const nx = entry.x + neighbor.relX();
@ -140,7 +154,7 @@ pub const ChannelChunk = struct {
}
}
self.data.optimizeLayout();
self.lock.unlockWrite();
self.mutex.unlock();
self.addSelfToLightRefreshList(lightRefreshList);
for(chunk.Neighbor.iterable) |neighbor| {
@ -172,10 +186,10 @@ pub const ChannelChunk = struct {
}
var isFirstIteration: bool = isFirstBlock;
self.lock.lockWrite();
self.mutex.lock();
while(lightQueue.popFront()) |entry| {
const index = chunk.getIndex(entry.x, entry.y, entry.z);
const oldValue: [3]u8 = self.data.getValue(index);
const oldValue: [3]u8 = self.data.getValue(index).toArray();
var activeValue: @Vector(3, bool) = @bitCast(entry.activeValue);
var append: bool = false;
if(activeValue[0] and entry.value[0] != oldValue[0]) {
@ -209,7 +223,7 @@ pub const ChannelChunk = struct {
if(activeValue[0]) insertValue[0] = 0;
if(activeValue[1]) insertValue[1] = 0;
if(activeValue[2]) insertValue[2] = 0;
self.data.setValue(index, insertValue);
self.data.setValue(index, .fromArray(insertValue));
for(chunk.Neighbor.iterable) |neighbor| {
if(neighbor.toInt() == entry.sourceDir) continue;
const nx = entry.x + neighbor.relX();
@ -231,7 +245,7 @@ pub const ChannelChunk = struct {
lightQueue.pushBack(result);
}
}
self.lock.unlockWrite();
self.mutex.unlock();
self.addSelfToLightRefreshList(lightRefreshList);
for(chunk.Neighbor.iterable) |neighbor| {
@ -307,11 +321,9 @@ pub const ChannelChunk = struct {
const otherZ = z +% neighbor.relZ() & chunk.chunkMask;
const neighborMesh = mesh_storage.getNeighbor(self.ch.pos, self.ch.pos.voxelSize, neighbor) orelse continue;
const neighborLightChunk = neighborMesh.lightingData[@intFromBool(self.isSun)];
neighborLightChunk.lock.lockRead();
defer neighborLightChunk.lock.unlockRead();
const index = chunk.getIndex(x, y, z);
const neighborIndex = chunk.getIndex(otherX, otherY, otherZ);
var value: [3]u8 = neighborLightChunk.data.getValue(neighborIndex);
var value: [3]u8 = neighborLightChunk.data.getValue(neighborIndex).toArray();
if(!self.isSun or neighbor != .dirUp or value[0] != 255 or value[1] != 255 or value[2] != 255) {
value[0] -|= 8*|@as(u8, @intCast(self.ch.pos.voxelSize));
value[1] -|= 8*|@as(u8, @intCast(self.ch.pos.voxelSize));
@ -330,13 +342,9 @@ pub const ChannelChunk = struct {
pub fn propagateUniformSun(self: *ChannelChunk, lightRefreshList: *main.List(chunk.ChunkPosition)) void {
std.debug.assert(self.isSun);
self.lock.lockWrite();
if(self.data.paletteLength != 1) {
self.data.deinit();
self.data.init();
}
self.data.palette[0] = .{255, 255, 255};
self.lock.unlockWrite();
self.mutex.lock();
self.data.fillUniform(.fromArray(.{255, 255, 255}));
self.mutex.unlock();
const val = 255 -| 8*|@as(u8, @intCast(self.ch.pos.voxelSize));
var lightQueue = main.utils.CircularBufferQueue(Entry).init(main.stackAllocator, 1 << 12);
defer lightQueue.deinit();
@ -378,12 +386,10 @@ pub const ChannelChunk = struct {
pub fn propagateLightsDestructive(self: *ChannelChunk, lights: []const [3]u8, lightRefreshList: *main.List(chunk.ChunkPosition)) void {
var lightQueue = main.utils.CircularBufferQueue(Entry).init(main.stackAllocator, 1 << 12);
defer lightQueue.deinit();
self.lock.lockRead();
for(lights) |pos| {
const index = chunk.getIndex(pos[0], pos[1], pos[2]);
lightQueue.pushBack(.{.x = @intCast(pos[0]), .y = @intCast(pos[1]), .z = @intCast(pos[2]), .value = self.data.getValue(index), .sourceDir = 6, .activeValue = 0b111});
lightQueue.pushBack(.{.x = @intCast(pos[0]), .y = @intCast(pos[1]), .z = @intCast(pos[2]), .value = self.data.getValue(index).toArray(), .sourceDir = 6, .activeValue = 0b111});
}
self.lock.unlockRead();
var constructiveEntries: main.ListUnmanaged(ChunkEntries) = .{};
defer constructiveEntries.deinit(main.stackAllocator);
constructiveEntries.append(main.stackAllocator, .{
@ -395,10 +401,10 @@ pub const ChannelChunk = struct {
var entryList = entries.entries;
defer entryList.deinit(main.stackAllocator);
const channelChunk = if(mesh) |_mesh| _mesh.lightingData[@intFromBool(self.isSun)] else self;
channelChunk.lock.lockWrite();
channelChunk.mutex.lock();
for(entryList.items) |entry| {
const index = chunk.getIndex(entry.x, entry.y, entry.z);
var value = channelChunk.data.getValue(index);
var value = channelChunk.data.getValue(index).toArray();
const light = if(self.isSun) .{0, 0, 0} else extractColor(channelChunk.ch.data.getValue(index).light());
value = .{
@max(value[0], light[0]),
@ -406,10 +412,10 @@ pub const ChannelChunk = struct {
@max(value[2], light[2]),
};
if(value[0] == 0 and value[1] == 0 and value[2] == 0) continue;
channelChunk.data.setValue(index, .{0, 0, 0});
channelChunk.data.setValue(index, .fromArray(.{0, 0, 0}));
lightQueue.pushBack(.{.x = entry.x, .y = entry.y, .z = entry.z, .value = value, .sourceDir = 6, .activeValue = 0b111});
}
channelChunk.lock.unlockWrite();
channelChunk.mutex.unlock();
channelChunk.propagateDirect(&lightQueue, lightRefreshList);
}
}

View File

@ -197,10 +197,6 @@ pub fn getLight(wx: i32, wy: i32, wz: i32) ?[6]u8 {
const x = (wx >> mesh.chunk.voxelSizeShift) & chunk.chunkMask;
const y = (wy >> mesh.chunk.voxelSizeShift) & chunk.chunkMask;
const z = (wz >> mesh.chunk.voxelSizeShift) & chunk.chunkMask;
mesh.lightingData[0].lock.lockRead();
defer mesh.lightingData[0].lock.unlockRead();
mesh.lightingData[1].lock.lockRead();
defer mesh.lightingData[1].lock.unlockRead();
return mesh.lightingData[1].getValue(x, y, z) ++ mesh.lightingData[0].getValue(x, y, z);
}

View File

@ -282,18 +282,18 @@ pub const ChunkCompression = struct { // MARK: ChunkCompression
}
fn compressBlockData(ch: *chunk.Chunk, allowLossy: bool, writer: *BinaryWriter) void {
if(ch.data.paletteLength == 1) {
if(ch.data.palette().len == 1) {
writer.writeEnum(ChunkCompressionAlgo, .uniform);
writer.writeInt(u32, ch.data.palette[0].toInt());
writer.writeInt(u32, ch.data.palette()[0].load(.unordered).toInt());
return;
}
if(ch.data.paletteLength < 256) {
if(ch.data.palette().len < 256) {
var uncompressedData: [chunk.chunkVolume]u8 = undefined;
var solidMask: [chunk.chunkSize*chunk.chunkSize]u32 = undefined;
for(0..chunk.chunkVolume) |i| {
uncompressedData[i] = @intCast(ch.data.data.getValue(i));
uncompressedData[i] = @intCast(ch.data.impl.raw.data.getValue(i));
if(allowLossy) {
const block = ch.data.palette[uncompressedData[i]];
const block = ch.data.palette()[uncompressedData[i]].load(.unordered);
const model = main.blocks.meshes.model(block).model();
const occluder = model.allNeighborsOccluded and !block.viewThrough();
if(occluder) {
@ -323,10 +323,10 @@ pub const ChunkCompression = struct { // MARK: ChunkCompression
defer main.stackAllocator.free(compressedData);
writer.writeEnum(ChunkCompressionAlgo, .deflate_with_8bit_palette);
writer.writeInt(u8, @intCast(ch.data.paletteLength));
writer.writeInt(u8, @intCast(ch.data.palette().len));
for(0..ch.data.paletteLength) |i| {
writer.writeInt(u32, ch.data.palette[i].toInt());
for(0..ch.data.palette().len) |i| {
writer.writeInt(u32, ch.data.palette()[i].load(.unordered).toInt());
}
writer.writeVarInt(usize, compressedData.len);
writer.writeSlice(compressedData);
@ -347,7 +347,7 @@ pub const ChunkCompression = struct { // MARK: ChunkCompression
}
fn decompressBlockData(ch: *chunk.Chunk, reader: *BinaryReader) !void {
std.debug.assert(ch.data.paletteLength == 1);
std.debug.assert(ch.data.palette().len == 1);
const compressionAlgorithm = try reader.readEnum(ChunkCompressionAlgo);
@ -371,11 +371,11 @@ pub const ChunkCompression = struct { // MARK: ChunkCompression
.deflate_with_8bit_palette, .deflate_with_8bit_palette_no_block_entities => {
const paletteLength = try reader.readInt(u8);
ch.data.deinit();
ch.data.deferredDeinit();
ch.data.initCapacity(paletteLength);
for(0..paletteLength) |i| {
ch.data.palette[i] = main.blocks.Block.fromInt(try reader.readInt(u32));
ch.data.palette()[i] = .init(main.blocks.Block.fromInt(try reader.readInt(u32)));
}
const decompressedData = main.stackAllocator.alloc(u8, chunk.chunkVolume);
@ -392,7 +392,7 @@ pub const ChunkCompression = struct { // MARK: ChunkCompression
}
},
.uniform => {
ch.data.palette[0] = main.blocks.Block.fromInt(try reader.readInt(u32));
ch.data.palette()[0] = .init(main.blocks.Block.fromInt(try reader.readInt(u32)));
},
}
}

View File

@ -45,15 +45,11 @@ pub fn generate(worldSeed: u64, chunk: *main.chunk.ServerChunk, caveMap: CaveMap
}
}
if(minHeight > chunk.super.pos.wz +| chunk.super.width) {
chunk.super.data.deinit();
chunk.super.data.init();
chunk.super.data.palette[0] = stone;
chunk.super.data.fillUniform(stone);
return;
}
if(maxHeight < chunk.super.pos.wz) {
chunk.super.data.deinit();
chunk.super.data.init();
chunk.super.data.palette[0] = air;
chunk.super.data.fillUniform(air);
return;
}
}

View File

@ -328,8 +328,8 @@ const ChunkManager = struct { // MARK: ChunkManager
generator.generate(server.world.?.seed ^ generator.generatorSeed, ch, caveMap, biomeMap);
}
if(pos.voxelSize != 1) { // Generate LOD replacements
for(ch.super.data.palette[0..ch.super.data.paletteLength]) |*block| {
block.typ = block.lodReplacement();
for(ch.super.data.palette()) |*block| {
block.store(.{.typ = block.load(.unordered).lodReplacement(), .data = block.load(.unordered).data}, .unordered);
}
}
return ch;

View File

@ -989,7 +989,7 @@ pub fn deinitDynamicIntArrayStorage() void {
pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIntArray
std.debug.assert(std.math.isPowerOfTwo(size));
return struct {
data: []align(64) u32 = &.{},
data: []align(64) Atomic(u32) = &.{},
bitSize: u5 = 0,
const Self = @This();
@ -997,12 +997,12 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn
pub fn initCapacity(bitSize: u5) Self {
std.debug.assert(bitSize == 0 or bitSize & bitSize - 1 == 0); // Must be a power of 2
return .{
.data = dynamicIntArrayAllocator.allocator().alignedAlloc(u32, .@"64", @as(usize, @divExact(size, @bitSizeOf(u32)))*bitSize),
.data = dynamicIntArrayAllocator.allocator().alignedAlloc(Atomic(u32), .@"64", @as(usize, @divExact(size, @bitSizeOf(u32)))*bitSize),
.bitSize = bitSize,
};
}
pub fn deinit(self: *Self) void {
fn deinit(self: *Self) void {
dynamicIntArrayAllocator.allocator().free(self.data);
self.* = .{};
}
@ -1016,23 +1016,21 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn
return result;
}
pub fn resizeOnce(self: *Self) void {
const newBitSize = if(self.bitSize != 0) self.bitSize*2 else 1;
var newSelf = Self.initCapacity(newBitSize);
pub fn resizeOnceFrom(self: *Self, other: *const Self) void {
const newBitSize = if(other.bitSize != 0) other.bitSize*2 else 1;
std.debug.assert(self.bitSize == newBitSize);
switch(self.bitSize) {
0 => @memset(newSelf.data, 0),
switch(other.bitSize) {
0 => @memset(self.data, .init(0)),
inline 1, 2, 4, 8 => |bits| {
for(0..self.data.len) |i| {
const oldVal = self.data[i];
newSelf.data[2*i] = bitInterleave(bits, oldVal & 0xffff);
newSelf.data[2*i + 1] = bitInterleave(bits, oldVal >> 16);
for(0..other.data.len) |i| {
const oldVal = other.data[i].load(.unordered);
self.data[2*i].store(bitInterleave(bits, oldVal & 0xffff), .unordered);
self.data[2*i + 1].store(bitInterleave(bits, oldVal >> 16), .unordered);
}
},
else => unreachable,
}
dynamicIntArrayAllocator.allocator().free(self.data);
self.* = newSelf;
}
pub fn getValue(self: *const Self, i: usize) u32 {
@ -1042,7 +1040,7 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn
const intIndex = bitIndex >> 5;
const bitOffset: u5 = @intCast(bitIndex & 31);
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
return self.data[intIndex] >> bitOffset & bitMask;
return self.data[intIndex].load(.unordered) >> bitOffset & bitMask;
}
pub fn setValue(self: *Self, i: usize, value: u32) void {
@ -1053,9 +1051,9 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn
const bitOffset: u5 = @intCast(bitIndex & 31);
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
std.debug.assert(value <= bitMask);
const ptr: *u32 = &self.data[intIndex];
ptr.* &= ~(bitMask << bitOffset);
ptr.* |= value << bitOffset;
const ptr: *Atomic(u32) = &self.data[intIndex];
const newValue = (ptr.load(.unordered) & ~(bitMask << bitOffset)) | value << bitOffset;
ptr.store(newValue, .unordered);
}
pub fn setAndGetValue(self: *Self, i: usize, value: u32) u32 {
@ -1066,45 +1064,57 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn
const bitOffset: u5 = @intCast(bitIndex & 31);
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
std.debug.assert(value <= bitMask);
const ptr: *u32 = &self.data[intIndex];
const result = ptr.* >> bitOffset & bitMask;
ptr.* &= ~(bitMask << bitOffset);
ptr.* |= value << bitOffset;
const ptr: *Atomic(u32) = &self.data[intIndex];
const oldValue = ptr.load(.unordered);
const result = oldValue >> bitOffset & bitMask;
const newValue = (oldValue & ~(bitMask << bitOffset)) | value << bitOffset;
ptr.store(newValue, .unordered);
return result;
}
};
}
pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: PaletteCompressedRegion
return struct {
const Impl = struct {
data: DynamicPackedIntArray(size) = .{},
palette: []T,
palette: []Atomic(T),
paletteOccupancy: []u32,
paletteLength: u32,
activePaletteEntries: u32,
};
return struct {
impl: Atomic(*Impl),
const Self = @This();
pub fn init(self: *Self) void {
const impl = main.globalAllocator.create(Impl);
self.* = .{
.palette = main.globalAllocator.alloc(T, 1),
.impl = .init(impl),
};
impl.* = .{
.palette = main.globalAllocator.alloc(Atomic(T), 1),
.paletteOccupancy = main.globalAllocator.alloc(u32, 1),
.paletteLength = 1,
.activePaletteEntries = 1,
};
self.palette[0] = std.mem.zeroes(T);
self.paletteOccupancy[0] = size;
impl.palette[0] = .init(std.mem.zeroes(T));
impl.paletteOccupancy[0] = size;
}
pub fn initCopy(self: *Self, template: *const Self) void {
const dataDupe = DynamicPackedIntArray(size).initCapacity(template.data.bitSize);
@memcpy(dataDupe.data, template.data.data);
const impl = main.globalAllocator.create(Impl);
const templateImpl = template.impl.load(.acquire);
const dataDupe = DynamicPackedIntArray(size).initCapacity(templateImpl.data.bitSize);
@memcpy(dataDupe.data, templateImpl.data.data);
self.* = .{
.impl = .init(impl),
};
impl.* = .{
.data = dataDupe,
.palette = main.globalAllocator.dupe(T, template.palette),
.paletteOccupancy = main.globalAllocator.dupe(u32, template.paletteOccupancy),
.paletteLength = template.paletteLength,
.activePaletteEntries = template.activePaletteEntries,
.palette = main.globalAllocator.dupe(Atomic(T), templateImpl.palette),
.paletteOccupancy = main.globalAllocator.dupe(u32, templateImpl.paletteOccupancy),
.paletteLength = templateImpl.paletteLength,
.activePaletteEntries = templateImpl.activePaletteEntries,
};
}
@ -1112,21 +1122,32 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
std.debug.assert(paletteLength < 0x80000000 and paletteLength > 0);
const bitSize: u5 = getTargetBitSize(paletteLength);
const bufferLength = @as(u32, 1) << bitSize;
const impl = main.globalAllocator.create(Impl);
self.* = .{
.impl = .init(impl),
};
impl.* = .{
.data = DynamicPackedIntArray(size).initCapacity(bitSize),
.palette = main.globalAllocator.alloc(T, bufferLength),
.palette = main.globalAllocator.alloc(Atomic(T), bufferLength),
.paletteOccupancy = main.globalAllocator.alloc(u32, bufferLength),
.paletteLength = paletteLength,
.activePaletteEntries = 1,
};
self.palette[0] = std.mem.zeroes(T);
self.paletteOccupancy[0] = size;
impl.palette[0] = .init(std.mem.zeroes(T));
impl.paletteOccupancy[0] = size;
@memset(impl.paletteOccupancy[1..], 0);
@memset(impl.data.data, .init(0));
}
pub fn deinit(self: *Self) void {
self.data.deinit();
main.globalAllocator.free(self.palette);
main.globalAllocator.free(self.paletteOccupancy);
fn privateDeinit(impl: *Impl, _: usize) void {
impl.data.deinit();
main.globalAllocator.free(impl.palette);
main.globalAllocator.free(impl.paletteOccupancy);
main.globalAllocator.destroy(impl);
}
pub fn deferredDeinit(self: *Self) void {
main.heap.GarbageCollection.deferredFree(.{.ptr = self.impl.raw, .freeFunction = main.utils.castFunctionSelfToAnyopaque(privateDeinit)});
}
fn getTargetBitSize(paletteLength: u32) u5 {
@ -1137,57 +1158,87 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
}
pub fn getValue(self: *const Self, i: usize) T {
return self.palette[self.data.getValue(i)];
const impl = self.impl.load(.acquire);
return impl.palette[impl.data.getValue(i)].load(.unordered);
}
pub fn palette(self: *const Self) []Atomic(T) {
const impl = self.impl.raw;
return impl.palette[0..impl.paletteLength];
}
pub fn fillUniform(self: *Self, value: T) void {
const impl = self.impl.raw;
if(impl.paletteLength == 1) {
impl.palette[0].store(value, .unordered);
return;
}
var newSelf: Self = undefined;
newSelf.init();
newSelf.impl.raw.palette[0] = .init(value);
newSelf.impl.raw = self.impl.swap(newSelf.impl.raw, .release);
newSelf.deferredDeinit();
}
fn getOrInsertPaletteIndex(noalias self: *Self, val: T) u32 {
std.debug.assert(self.paletteLength <= self.palette.len);
var impl = self.impl.raw;
std.debug.assert(impl.paletteLength <= impl.palette.len);
var paletteIndex: u32 = 0;
while(paletteIndex < self.paletteLength) : (paletteIndex += 1) { // TODO: There got to be a faster way to do this. Either using SIMD or using a cache or hashmap.
if(std.meta.eql(self.palette[paletteIndex], val)) {
while(paletteIndex < impl.paletteLength) : (paletteIndex += 1) {
if(std.meta.eql(impl.palette[paletteIndex].load(.unordered), val)) {
break;
}
}
if(paletteIndex == self.paletteLength) {
if(self.paletteLength == self.palette.len) {
self.data.resizeOnce();
self.palette = main.globalAllocator.realloc(self.palette, @as(usize, 1) << self.data.bitSize);
const oldLen = self.paletteOccupancy.len;
self.paletteOccupancy = main.globalAllocator.realloc(self.paletteOccupancy, @as(usize, 1) << self.data.bitSize);
@memset(self.paletteOccupancy[oldLen..], 0);
if(paletteIndex == impl.paletteLength) {
if(impl.paletteLength == impl.palette.len) {
var newSelf: Self = undefined;
newSelf.initCapacity(impl.paletteLength*2);
const newImpl = newSelf.impl.raw;
// TODO: Resize stuff
newImpl.data.resizeOnceFrom(&impl.data);
@memcpy(newImpl.palette[0..impl.palette.len], impl.palette);
@memcpy(newImpl.paletteOccupancy[0..impl.paletteOccupancy.len], impl.paletteOccupancy);
@memset(newImpl.paletteOccupancy[impl.paletteOccupancy.len..], 0);
newImpl.activePaletteEntries = impl.activePaletteEntries;
newImpl.paletteLength = impl.paletteLength;
newSelf.impl.raw = self.impl.swap(newImpl, .release);
newSelf.deferredDeinit();
impl = newImpl;
}
self.palette[paletteIndex] = val;
self.paletteLength += 1;
std.debug.assert(self.paletteLength <= self.palette.len);
impl.palette[paletteIndex].store(val, .unordered);
impl.paletteLength += 1;
std.debug.assert(impl.paletteLength <= impl.palette.len);
}
return paletteIndex;
}
pub fn setRawValue(noalias self: *Self, i: usize, paletteIndex: u32) void {
const previousPaletteIndex = self.data.setAndGetValue(i, paletteIndex);
const impl = self.impl.raw;
const previousPaletteIndex = impl.data.setAndGetValue(i, paletteIndex);
if(previousPaletteIndex != paletteIndex) {
if(self.paletteOccupancy[paletteIndex] == 0) {
self.activePaletteEntries += 1;
if(impl.paletteOccupancy[paletteIndex] == 0) {
impl.activePaletteEntries += 1;
}
self.paletteOccupancy[paletteIndex] += 1;
self.paletteOccupancy[previousPaletteIndex] -= 1;
if(self.paletteOccupancy[previousPaletteIndex] == 0) {
self.activePaletteEntries -= 1;
impl.paletteOccupancy[paletteIndex] += 1;
impl.paletteOccupancy[previousPaletteIndex] -= 1;
if(impl.paletteOccupancy[previousPaletteIndex] == 0) {
impl.activePaletteEntries -= 1;
}
}
}
pub fn setValue(noalias self: *Self, i: usize, val: T) void {
const paletteIndex = self.getOrInsertPaletteIndex(val);
const previousPaletteIndex = self.data.setAndGetValue(i, paletteIndex);
const impl = self.impl.raw;
const previousPaletteIndex = impl.data.setAndGetValue(i, paletteIndex);
if(previousPaletteIndex != paletteIndex) {
if(self.paletteOccupancy[paletteIndex] == 0) {
self.activePaletteEntries += 1;
if(impl.paletteOccupancy[paletteIndex] == 0) {
impl.activePaletteEntries += 1;
}
self.paletteOccupancy[paletteIndex] += 1;
self.paletteOccupancy[previousPaletteIndex] -= 1;
if(self.paletteOccupancy[previousPaletteIndex] == 0) {
self.activePaletteEntries -= 1;
impl.paletteOccupancy[paletteIndex] += 1;
impl.paletteOccupancy[previousPaletteIndex] -= 1;
if(impl.paletteOccupancy[previousPaletteIndex] == 0) {
impl.activePaletteEntries -= 1;
}
}
}
@ -1195,52 +1246,57 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
pub fn setValueInColumn(noalias self: *Self, startIndex: usize, endIndex: usize, val: T) void {
std.debug.assert(startIndex < endIndex);
const paletteIndex = self.getOrInsertPaletteIndex(val);
const impl = self.impl.raw;
for(startIndex..endIndex) |i| {
const previousPaletteIndex = self.data.setAndGetValue(i, paletteIndex);
self.paletteOccupancy[previousPaletteIndex] -= 1;
if(self.paletteOccupancy[previousPaletteIndex] == 0) {
self.activePaletteEntries -= 1;
const previousPaletteIndex = impl.data.setAndGetValue(i, paletteIndex);
impl.paletteOccupancy[previousPaletteIndex] -= 1;
if(impl.paletteOccupancy[previousPaletteIndex] == 0) {
impl.activePaletteEntries -= 1;
}
}
if(self.paletteOccupancy[paletteIndex] == 0) {
self.activePaletteEntries += 1;
if(impl.paletteOccupancy[paletteIndex] == 0) {
impl.activePaletteEntries += 1;
}
self.paletteOccupancy[paletteIndex] += @intCast(endIndex - startIndex);
impl.paletteOccupancy[paletteIndex] += @intCast(endIndex - startIndex);
}
pub fn optimizeLayout(self: *Self) void {
const newBitSize = getTargetBitSize(@intCast(self.activePaletteEntries));
if(self.data.bitSize == newBitSize) return;
const impl = self.impl.raw;
const newBitSize = getTargetBitSize(@intCast(impl.activePaletteEntries));
if(impl.data.bitSize == newBitSize) return;
var newData = main.utils.DynamicPackedIntArray(size).initCapacity(newBitSize);
const paletteMap: []u32 = main.stackAllocator.alloc(u32, self.paletteLength);
var newSelf: Self = undefined;
newSelf.initCapacity(impl.activePaletteEntries);
const newImpl = newSelf.impl.raw;
const paletteMap: []u32 = main.stackAllocator.alloc(u32, impl.paletteLength);
defer main.stackAllocator.free(paletteMap);
{
var i: u32 = 0;
var len: u32 = self.paletteLength;
while(i < len) : (i += 1) outer: {
paletteMap[i] = i;
if(self.paletteOccupancy[i] == 0) {
while(true) {
len -= 1;
if(self.paletteOccupancy[len] != 0) break;
if(len == i) break :outer;
}
paletteMap[len] = i;
self.palette[i] = self.palette[len];
self.paletteOccupancy[i] = self.paletteOccupancy[len];
self.paletteOccupancy[len] = 0;
var iNew: u32 = 0;
var iOld: u32 = 0;
const len: u32 = impl.paletteLength;
while(iOld < len) : ({
iNew += 1;
iOld += 1;
}) outer: {
while(impl.paletteOccupancy[iOld] == 0) {
iOld += 1;
if(iOld >= len) break :outer;
}
if(iNew >= impl.activePaletteEntries) std.log.err("{} {}", .{iNew, impl.activePaletteEntries});
std.debug.assert(iNew < impl.activePaletteEntries);
std.debug.assert(iOld < impl.paletteLength);
paletteMap[iOld] = iNew;
newImpl.palette[iNew] = .init(impl.palette[iOld].load(.unordered));
newImpl.paletteOccupancy[iNew] = impl.paletteOccupancy[iOld];
}
}
for(0..size) |i| {
newData.setValue(i, paletteMap[self.data.getValue(i)]);
newImpl.data.setValue(i, paletteMap[impl.data.getValue(i)]);
}
self.data.deinit();
self.data = newData;
self.paletteLength = self.activePaletteEntries;
self.palette = main.globalAllocator.realloc(self.palette, @as(usize, 1) << self.data.bitSize);
self.paletteOccupancy = main.globalAllocator.realloc(self.paletteOccupancy, @as(usize, 1) << self.data.bitSize);
newImpl.paletteLength = impl.activePaletteEntries;
newImpl.activePaletteEntries = impl.activePaletteEntries;
newSelf.impl.raw = self.impl.swap(newSelf.impl.raw, .release);
newSelf.deferredDeinit();
}
};
}