mirror of
https://github.com/PixelGuys/Cubyz.git
synced 2025-08-03 11:17:05 -04:00
Restrict palette compression to power of 2 bitSizes and improve the addressing to use aligned reads.
This does make it slightly faster, and due to the missing padding bytes it also slightly reduces memory usage. fixes #443 would make #322 easier
This commit is contained in:
parent
b303244773
commit
588e12fcdb
@ -1316,14 +1316,15 @@ pub const ThreadPool = struct { // MARK: ThreadPool
|
||||
/// The bit size can be changed using the `resize` function.
|
||||
pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIntArray
|
||||
return struct {
|
||||
data: []u8 = &.{},
|
||||
data: []align(64)u32 = &.{},
|
||||
bitSize: u5 = 0,
|
||||
|
||||
const Self = @This();
|
||||
|
||||
pub fn initCapacity(allocator: main.utils.NeverFailingAllocator, bitSize: u5) Self {
|
||||
std.debug.assert(bitSize == 0 or bitSize & bitSize - 1 == 0); // Must be a power of 2
|
||||
return .{
|
||||
.data = allocator.alloc(u8, @as(usize, @divFloor(size + 7, 8))*bitSize + @sizeOf(u32)),
|
||||
.data = allocator.alignedAlloc(u32, 64, @as(usize, @divExact(size, @bitSizeOf(u32)))*bitSize),
|
||||
.bitSize = bitSize,
|
||||
};
|
||||
}
|
||||
@ -1334,6 +1335,7 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn
|
||||
}
|
||||
|
||||
pub fn resize(self: *Self, allocator: main.utils.NeverFailingAllocator, newBitSize: u5) void {
|
||||
std.debug.assert(newBitSize == 0 or newBitSize & newBitSize - 1 == 0); // Must be a power of 2
|
||||
if(newBitSize == self.bitSize) return;
|
||||
var newSelf = Self.initCapacity(allocator, newBitSize);
|
||||
|
||||
@ -1348,22 +1350,21 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn
|
||||
std.debug.assert(i < size);
|
||||
if(self.bitSize == 0) return 0;
|
||||
const bitIndex = i*self.bitSize;
|
||||
const byteIndex = bitIndex >> 3;
|
||||
const bitOffset: u5 = @intCast(bitIndex & 7);
|
||||
const intIndex = bitIndex >> 5;
|
||||
const bitOffset: u5 = @intCast(bitIndex & 31);
|
||||
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
|
||||
const ptr: *align(1) u32 = @ptrCast(&self.data[byteIndex]);
|
||||
return ptr.* >> bitOffset & bitMask;
|
||||
return self.data[intIndex] >> bitOffset & bitMask;
|
||||
}
|
||||
|
||||
pub fn setValue(self: *Self, i: usize, value: u32) void {
|
||||
std.debug.assert(i < size);
|
||||
if(self.bitSize == 0) return;
|
||||
const bitIndex = i*self.bitSize;
|
||||
const byteIndex = bitIndex >> 3;
|
||||
const bitOffset: u5 = @intCast(bitIndex & 7);
|
||||
const intIndex = bitIndex >> 5;
|
||||
const bitOffset: u5 = @intCast(bitIndex & 31);
|
||||
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
|
||||
std.debug.assert(value <= bitMask);
|
||||
const ptr: *align(1) u32 = @ptrCast(&self.data[byteIndex]);
|
||||
const ptr: *u32 = &self.data[intIndex];
|
||||
ptr.* &= ~(bitMask << bitOffset);
|
||||
ptr.* |= value << bitOffset;
|
||||
}
|
||||
@ -1372,11 +1373,11 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn
|
||||
std.debug.assert(i < size);
|
||||
if(self.bitSize == 0) return 0;
|
||||
const bitIndex = i*self.bitSize;
|
||||
const byteIndex = bitIndex >> 3;
|
||||
const bitOffset: u5 = @intCast(bitIndex & 7);
|
||||
const intIndex = bitIndex >> 5;
|
||||
const bitOffset: u5 = @intCast(bitIndex & 31);
|
||||
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
|
||||
std.debug.assert(value <= bitMask);
|
||||
const ptr: *align(1) u32 = @ptrCast(&self.data[byteIndex]);
|
||||
const ptr: *u32 = &self.data[intIndex];
|
||||
const result = ptr.* >> bitOffset & bitMask;
|
||||
ptr.* &= ~(bitMask << bitOffset);
|
||||
ptr.* |= value << bitOffset;
|
||||
@ -1407,9 +1408,11 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
|
||||
}
|
||||
|
||||
pub fn initCopy(self: *Self, template: *const Self) void {
|
||||
const dataDupe = main.globalAllocator.alignedAlloc(u32, 64, template.data.data.len);
|
||||
@memcpy(dataDupe, template.data.data);
|
||||
self.* = .{
|
||||
.data = .{
|
||||
.data = main.globalAllocator.dupe(u8, template.data.data),
|
||||
.data = dataDupe,
|
||||
.bitSize = template.data.bitSize,
|
||||
},
|
||||
.palette = main.globalAllocator.dupe(T, template.palette),
|
||||
@ -1421,7 +1424,7 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
|
||||
|
||||
pub fn initCapacity(self: *Self, paletteLength: u32) void {
|
||||
std.debug.assert(paletteLength < 0x80000000 and paletteLength > 0);
|
||||
const bitSize: u5 = @intCast(std.math.log2_int_ceil(u32, paletteLength));
|
||||
const bitSize: u5 = getTargetBitSize(paletteLength);
|
||||
const bufferLength = @as(u32, 1) << bitSize;
|
||||
self.* = .{
|
||||
.data = DynamicPackedIntArray(size).initCapacity(main.globalAllocator, bitSize),
|
||||
@ -1440,6 +1443,13 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
|
||||
main.globalAllocator.free(self.paletteOccupancy);
|
||||
}
|
||||
|
||||
fn getTargetBitSize(paletteLength: u32) u5 {
|
||||
const base: u5 = @intCast(std.math.log2_int_ceil(u32, paletteLength));
|
||||
if(base == 0) return 0;
|
||||
const logLog = std.math.log2_int_ceil(u5, base);
|
||||
return @as(u5, 1) << logLog;
|
||||
}
|
||||
|
||||
pub fn getValue(self: *const Self, i: usize) T {
|
||||
return self.palette[self.data.getValue(i)];
|
||||
}
|
||||
@ -1454,7 +1464,7 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
|
||||
}
|
||||
if(paletteIndex == self.paletteLength) {
|
||||
if(self.paletteLength == self.palette.len) {
|
||||
self.data.resize(main.globalAllocator, self.data.bitSize + 1);
|
||||
self.data.resize(main.globalAllocator, getTargetBitSize(self.paletteLength + 1));
|
||||
self.palette = main.globalAllocator.realloc(self.palette, @as(usize, 1) << self.data.bitSize);
|
||||
const oldLen = self.paletteOccupancy.len;
|
||||
self.paletteOccupancy = main.globalAllocator.realloc(self.paletteOccupancy, @as(usize, 1) << self.data.bitSize);
|
||||
@ -1513,9 +1523,10 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
|
||||
}
|
||||
|
||||
pub fn optimizeLayout(self: *Self) void {
|
||||
if(std.math.log2_int_ceil(usize, self.palette.len) == std.math.log2_int_ceil(usize, self.activePaletteEntries)) return;
|
||||
const newBitSize = getTargetBitSize(@intCast(self.activePaletteEntries));
|
||||
if(self.data.bitSize == newBitSize) return;
|
||||
|
||||
var newData = main.utils.DynamicPackedIntArray(size).initCapacity(main.globalAllocator, @intCast(std.math.log2_int_ceil(u32, self.activePaletteEntries)));
|
||||
var newData = main.utils.DynamicPackedIntArray(size).initCapacity(main.globalAllocator, newBitSize);
|
||||
const paletteMap: []u32 = main.stackAllocator.alloc(u32, self.paletteLength);
|
||||
defer main.stackAllocator.free(paletteMap);
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user