diff --git a/src/utils.zig b/src/utils.zig index a373f643..ee241536 100644 --- a/src/utils.zig +++ b/src/utils.zig @@ -984,50 +984,50 @@ pub fn deinitDynamicIntArrayStorage() void { dynamicIntArrayAllocator.deinit(); } +/// When you are too poor to afford 128 bits for a slice +pub fn PowerOfTwoSlice(T: type) type { // MARK: PowerOfTwoSlice + const alignment = 64; + std.debug.assert(@sizeOf(usize) == @sizeOf(u64)); + return packed struct { + log2Size: u6, + ptr: u58, + + fn fromSlice(slice: []align(alignment) T) @This() { + if(slice.len == 0) return .{.log2Size = 0, .ptr = 0}; + std.debug.assert(slice.len == 0 or std.math.isPowerOfTwo(slice.len)); + return .{ + .log2Size = @intCast(@ctz(slice.len)), + .ptr = @intCast(@intFromPtr(slice.ptr)/alignment), + }; + } + + pub fn toSlice(self: @This()) []align(alignment) T { + if(@as(u64, @bitCast(self)) == 0) return &.{}; + const len = @as(usize, 1) << self.log2Size; + const ptr: [*]align(alignment) T = @ptrFromInt(@as(usize, self.ptr)*alignment); + return ptr[0..len]; + } + }; +} + /// An packed array of integers with dynamic bit size. /// The bit size can be changed using the `resize` function. pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIntArray std.debug.assert(std.math.isPowerOfTwo(size)); return struct { - const Content = packed struct(usize) { - const alignment: std.mem.Alignment = .@"64"; - bitSize: u5, // Fitting it into the 6 alignment bits - pad: u1 = 0, - dataPointer: u58, - - fn fromData(bitSize: u5, dataSlice: []align(64) Atomic(u32)) Content { - const expectedLen = @as(usize, @divExact(size, @bitSizeOf(u32)))*bitSize; - std.debug.assert(expectedLen == dataSlice.len); - return .{ - .bitSize = bitSize, - .dataPointer = @intCast(@intFromPtr(dataSlice.ptr) >> @intFromEnum(alignment)), - }; - } - - fn data(self: Content) []align(64) Atomic(u32) { - if(self.bitSize == 0) return &.{}; - const ptr: [*]align(64) Atomic(u32) = @ptrFromInt(@as(usize, self.dataPointer) << @intFromEnum(alignment)); - const len = @as(usize, @divExact(size, @bitSizeOf(u32)))*self.bitSize; - return ptr[0..len]; - } - }; - - content: Atomic(Content) = .init(@bitCast(@as(u64, 0))), + content: Atomic(PowerOfTwoSlice(Atomic(u32))) = .init(.fromSlice(&.{})), const Self = @This(); pub fn initCapacity(bitSize: u5) Self { std.debug.assert(bitSize == 0 or bitSize & bitSize - 1 == 0); // Must be a power of 2 return .{ - .content = .init(.fromData( - bitSize, - dynamicIntArrayAllocator.allocator().alignedAlloc(Atomic(u32), .@"64", @as(usize, @divExact(size, @bitSizeOf(u32)))*bitSize), - )), + .content = .init(.fromSlice(dynamicIntArrayAllocator.allocator().alignedAlloc(Atomic(u32), .@"64", @as(usize, @divExact(size, @bitSizeOf(u32)))*bitSize))), }; } pub fn deinit(self: *Self) void { - main.heap.GarbageCollection.deferredFreeSlice(dynamicIntArrayAllocator.allocator(), Atomic(u32), self.content.swap(@bitCast(@as(u64, 0)), .monotonic).data()); + main.heap.GarbageCollection.deferredFreeSlice(dynamicIntArrayAllocator.allocator(), Atomic(u32), self.content.swap(@bitCast(@as(u64, 0)), .monotonic).toSlice()); } inline fn bitInterleave(bits: comptime_int, source: u32) u32 { @@ -1039,48 +1039,53 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn return result; } + pub fn bitSizeUnsafe(self: *Self) u5 { + return @intCast(self.content.raw.toSlice().len*@bitSizeOf(u32)/size); + } + pub fn resizeOnce(self: *Self) void { const oldContent = self.content.load(.unordered); - const newBitSize = if(oldContent.bitSize != 0) oldContent.bitSize*2 else 1; + const newBitSize = if(self.bitSizeUnsafe() != 0) self.bitSizeUnsafe()*2 else 1; const newSelf = Self.initCapacity(newBitSize); const newContent = newSelf.content.raw; - switch(oldContent.bitSize) { - 0 => @memset(newContent.data(), .init(0)), + switch(self.bitSizeUnsafe()) { + 0 => @memset(newContent.toSlice(), .init(0)), inline 1, 2, 4, 8 => |bits| { - for(0..oldContent.data().len) |i| { - const oldVal = oldContent.data()[i].load(.unordered); - newContent.data()[2*i] = .init(bitInterleave(bits, oldVal & 0xffff)); - newContent.data()[2*i + 1] = .init(bitInterleave(bits, oldVal >> 16)); + for(0..oldContent.toSlice().len) |i| { + const oldVal = oldContent.toSlice()[i].load(.unordered); + newContent.toSlice()[2*i] = .init(bitInterleave(bits, oldVal & 0xffff)); + newContent.toSlice()[2*i + 1] = .init(bitInterleave(bits, oldVal >> 16)); } }, else => unreachable, } - main.heap.GarbageCollection.deferredFreeSlice(dynamicIntArrayAllocator.allocator(), Atomic(u32), oldContent.data()); + main.heap.GarbageCollection.deferredFreeSlice(dynamicIntArrayAllocator.allocator(), Atomic(u32), oldContent.toSlice()); self.content.store(newContent, .release); } pub fn getValue(self: *const Self, i: usize) u32 { std.debug.assert(i < size); const content = self.content.load(.acquire); - if(content.bitSize == 0) return 0; - const bitIndex = i*content.bitSize; + const bitSize: u5 = @intCast(content.toSlice().len*@bitSizeOf(u32)/size); + if(bitSize == 0) return 0; + const bitIndex = i*bitSize; const intIndex = bitIndex >> 5; const bitOffset: u5 = @intCast(bitIndex & 31); - const bitMask = (@as(u32, 1) << content.bitSize) - 1; - return content.data()[intIndex].load(.unordered) >> bitOffset & bitMask; + const bitMask = (@as(u32, 1) << bitSize) - 1; + return content.toSlice()[intIndex].load(.unordered) >> bitOffset & bitMask; } pub fn setValue(self: *Self, i: usize, value: u32) void { std.debug.assert(i < size); const content = self.content.load(.unordered); - if(content.bitSize == 0) return; - const bitIndex = i*content.bitSize; + if(self.bitSizeUnsafe() == 0) return; + const bitIndex = i*self.bitSizeUnsafe(); const intIndex = bitIndex >> 5; const bitOffset: u5 = @intCast(bitIndex & 31); - const bitMask = (@as(u32, 1) << content.bitSize) - 1; + const bitMask = (@as(u32, 1) << self.bitSizeUnsafe()) - 1; std.debug.assert(value <= bitMask); - const ptr: *Atomic(u32) = &content.data()[intIndex]; + const ptr: *Atomic(u32) = &content.toSlice()[intIndex]; const old = ptr.load(.unordered); ptr.store((old & ~(bitMask << bitOffset)) | value << bitOffset, .unordered); } @@ -1088,13 +1093,13 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn pub fn setAndGetValue(self: *Self, i: usize, value: u32) u32 { std.debug.assert(i < size); const content = self.content.load(.unordered); - if(content.bitSize == 0) return 0; - const bitIndex = i*content.bitSize; + if(self.bitSizeUnsafe() == 0) return 0; + const bitIndex = i*self.bitSizeUnsafe(); const intIndex = bitIndex >> 5; const bitOffset: u5 = @intCast(bitIndex & 31); - const bitMask = (@as(u32, 1) << content.bitSize) - 1; + const bitMask = (@as(u32, 1) << self.bitSizeUnsafe()) - 1; std.debug.assert(value <= bitMask); - const ptr: *Atomic(u32) = &content.data()[intIndex]; + const ptr: *Atomic(u32) = &content.toSlice()[intIndex]; const old = ptr.load(.unordered); ptr.store((old & ~(bitMask << bitOffset)) | value << bitOffset, .unordered); return old >> bitOffset & bitMask; @@ -1166,9 +1171,9 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale if(paletteIndex == self.paletteLength) { if(self.paletteLength == self.palette.len) { self.data.resizeOnce(); - self.palette = main.globalAllocator.realloc(self.palette, @as(usize, 1) << self.data.content.load(.unordered).bitSize); + self.palette = main.globalAllocator.realloc(self.palette, @as(usize, 1) << self.data.bitSizeUnsafe()); const oldLen = self.paletteOccupancy.len; - self.paletteOccupancy = main.globalAllocator.realloc(self.paletteOccupancy, @as(usize, 1) << self.data.content.load(.unordered).bitSize); + self.paletteOccupancy = main.globalAllocator.realloc(self.paletteOccupancy, @as(usize, 1) << self.data.bitSizeUnsafe()); @memset(self.paletteOccupancy[oldLen..], 0); } self.palette[paletteIndex] = val; @@ -1225,7 +1230,7 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale pub fn optimizeLayout(self: *Self) void { const newBitSize = getTargetBitSize(@intCast(self.activePaletteEntries)); - if(self.data.content.load(.unordered).bitSize == newBitSize) return; + if(self.data.bitSizeUnsafe() == newBitSize) return; var newData = main.utils.DynamicPackedIntArray(size).initCapacity(newBitSize); const paletteMap: []u32 = main.stackAllocator.alloc(u32, self.paletteLength); @@ -1254,8 +1259,8 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale newData.content.store(self.data.content.swap(newData.content.load(.unordered), .release), .unordered); newData.deinit(); self.paletteLength = self.activePaletteEntries; - self.palette = main.globalAllocator.realloc(self.palette, @as(usize, 1) << self.data.content.load(.unordered).bitSize); - self.paletteOccupancy = main.globalAllocator.realloc(self.paletteOccupancy, @as(usize, 1) << self.data.content.load(.unordered).bitSize); + self.palette = main.globalAllocator.realloc(self.palette, @as(usize, 1) << self.data.bitSizeUnsafe()); + self.paletteOccupancy = main.globalAllocator.realloc(self.paletteOccupancy, @as(usize, 1) << self.data.bitSizeUnsafe()); } }; }