mirror of
https://github.com/PixelGuys/Cubyz.git
synced 2025-09-10 12:58:21 -04:00
Restrict palette compression to power of 2 bitSizes and improve the addressing to use aligned reads.
This does make it slightly faster, and due to the missing padding bytes it also slightly reduces memory usage. fixes #443 would make #322 easier
This commit is contained in:
parent
b303244773
commit
588e12fcdb
@ -1316,14 +1316,15 @@ pub const ThreadPool = struct { // MARK: ThreadPool
|
|||||||
/// The bit size can be changed using the `resize` function.
|
/// The bit size can be changed using the `resize` function.
|
||||||
pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIntArray
|
pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIntArray
|
||||||
return struct {
|
return struct {
|
||||||
data: []u8 = &.{},
|
data: []align(64)u32 = &.{},
|
||||||
bitSize: u5 = 0,
|
bitSize: u5 = 0,
|
||||||
|
|
||||||
const Self = @This();
|
const Self = @This();
|
||||||
|
|
||||||
pub fn initCapacity(allocator: main.utils.NeverFailingAllocator, bitSize: u5) Self {
|
pub fn initCapacity(allocator: main.utils.NeverFailingAllocator, bitSize: u5) Self {
|
||||||
|
std.debug.assert(bitSize == 0 or bitSize & bitSize - 1 == 0); // Must be a power of 2
|
||||||
return .{
|
return .{
|
||||||
.data = allocator.alloc(u8, @as(usize, @divFloor(size + 7, 8))*bitSize + @sizeOf(u32)),
|
.data = allocator.alignedAlloc(u32, 64, @as(usize, @divExact(size, @bitSizeOf(u32)))*bitSize),
|
||||||
.bitSize = bitSize,
|
.bitSize = bitSize,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -1334,6 +1335,7 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn resize(self: *Self, allocator: main.utils.NeverFailingAllocator, newBitSize: u5) void {
|
pub fn resize(self: *Self, allocator: main.utils.NeverFailingAllocator, newBitSize: u5) void {
|
||||||
|
std.debug.assert(newBitSize == 0 or newBitSize & newBitSize - 1 == 0); // Must be a power of 2
|
||||||
if(newBitSize == self.bitSize) return;
|
if(newBitSize == self.bitSize) return;
|
||||||
var newSelf = Self.initCapacity(allocator, newBitSize);
|
var newSelf = Self.initCapacity(allocator, newBitSize);
|
||||||
|
|
||||||
@ -1348,22 +1350,21 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn
|
|||||||
std.debug.assert(i < size);
|
std.debug.assert(i < size);
|
||||||
if(self.bitSize == 0) return 0;
|
if(self.bitSize == 0) return 0;
|
||||||
const bitIndex = i*self.bitSize;
|
const bitIndex = i*self.bitSize;
|
||||||
const byteIndex = bitIndex >> 3;
|
const intIndex = bitIndex >> 5;
|
||||||
const bitOffset: u5 = @intCast(bitIndex & 7);
|
const bitOffset: u5 = @intCast(bitIndex & 31);
|
||||||
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
|
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
|
||||||
const ptr: *align(1) u32 = @ptrCast(&self.data[byteIndex]);
|
return self.data[intIndex] >> bitOffset & bitMask;
|
||||||
return ptr.* >> bitOffset & bitMask;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn setValue(self: *Self, i: usize, value: u32) void {
|
pub fn setValue(self: *Self, i: usize, value: u32) void {
|
||||||
std.debug.assert(i < size);
|
std.debug.assert(i < size);
|
||||||
if(self.bitSize == 0) return;
|
if(self.bitSize == 0) return;
|
||||||
const bitIndex = i*self.bitSize;
|
const bitIndex = i*self.bitSize;
|
||||||
const byteIndex = bitIndex >> 3;
|
const intIndex = bitIndex >> 5;
|
||||||
const bitOffset: u5 = @intCast(bitIndex & 7);
|
const bitOffset: u5 = @intCast(bitIndex & 31);
|
||||||
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
|
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
|
||||||
std.debug.assert(value <= bitMask);
|
std.debug.assert(value <= bitMask);
|
||||||
const ptr: *align(1) u32 = @ptrCast(&self.data[byteIndex]);
|
const ptr: *u32 = &self.data[intIndex];
|
||||||
ptr.* &= ~(bitMask << bitOffset);
|
ptr.* &= ~(bitMask << bitOffset);
|
||||||
ptr.* |= value << bitOffset;
|
ptr.* |= value << bitOffset;
|
||||||
}
|
}
|
||||||
@ -1372,11 +1373,11 @@ pub fn DynamicPackedIntArray(size: comptime_int) type { // MARK: DynamicPackedIn
|
|||||||
std.debug.assert(i < size);
|
std.debug.assert(i < size);
|
||||||
if(self.bitSize == 0) return 0;
|
if(self.bitSize == 0) return 0;
|
||||||
const bitIndex = i*self.bitSize;
|
const bitIndex = i*self.bitSize;
|
||||||
const byteIndex = bitIndex >> 3;
|
const intIndex = bitIndex >> 5;
|
||||||
const bitOffset: u5 = @intCast(bitIndex & 7);
|
const bitOffset: u5 = @intCast(bitIndex & 31);
|
||||||
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
|
const bitMask = (@as(u32, 1) << self.bitSize) - 1;
|
||||||
std.debug.assert(value <= bitMask);
|
std.debug.assert(value <= bitMask);
|
||||||
const ptr: *align(1) u32 = @ptrCast(&self.data[byteIndex]);
|
const ptr: *u32 = &self.data[intIndex];
|
||||||
const result = ptr.* >> bitOffset & bitMask;
|
const result = ptr.* >> bitOffset & bitMask;
|
||||||
ptr.* &= ~(bitMask << bitOffset);
|
ptr.* &= ~(bitMask << bitOffset);
|
||||||
ptr.* |= value << bitOffset;
|
ptr.* |= value << bitOffset;
|
||||||
@ -1407,9 +1408,11 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn initCopy(self: *Self, template: *const Self) void {
|
pub fn initCopy(self: *Self, template: *const Self) void {
|
||||||
|
const dataDupe = main.globalAllocator.alignedAlloc(u32, 64, template.data.data.len);
|
||||||
|
@memcpy(dataDupe, template.data.data);
|
||||||
self.* = .{
|
self.* = .{
|
||||||
.data = .{
|
.data = .{
|
||||||
.data = main.globalAllocator.dupe(u8, template.data.data),
|
.data = dataDupe,
|
||||||
.bitSize = template.data.bitSize,
|
.bitSize = template.data.bitSize,
|
||||||
},
|
},
|
||||||
.palette = main.globalAllocator.dupe(T, template.palette),
|
.palette = main.globalAllocator.dupe(T, template.palette),
|
||||||
@ -1421,7 +1424,7 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
|
|||||||
|
|
||||||
pub fn initCapacity(self: *Self, paletteLength: u32) void {
|
pub fn initCapacity(self: *Self, paletteLength: u32) void {
|
||||||
std.debug.assert(paletteLength < 0x80000000 and paletteLength > 0);
|
std.debug.assert(paletteLength < 0x80000000 and paletteLength > 0);
|
||||||
const bitSize: u5 = @intCast(std.math.log2_int_ceil(u32, paletteLength));
|
const bitSize: u5 = getTargetBitSize(paletteLength);
|
||||||
const bufferLength = @as(u32, 1) << bitSize;
|
const bufferLength = @as(u32, 1) << bitSize;
|
||||||
self.* = .{
|
self.* = .{
|
||||||
.data = DynamicPackedIntArray(size).initCapacity(main.globalAllocator, bitSize),
|
.data = DynamicPackedIntArray(size).initCapacity(main.globalAllocator, bitSize),
|
||||||
@ -1440,6 +1443,13 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
|
|||||||
main.globalAllocator.free(self.paletteOccupancy);
|
main.globalAllocator.free(self.paletteOccupancy);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn getTargetBitSize(paletteLength: u32) u5 {
|
||||||
|
const base: u5 = @intCast(std.math.log2_int_ceil(u32, paletteLength));
|
||||||
|
if(base == 0) return 0;
|
||||||
|
const logLog = std.math.log2_int_ceil(u5, base);
|
||||||
|
return @as(u5, 1) << logLog;
|
||||||
|
}
|
||||||
|
|
||||||
pub fn getValue(self: *const Self, i: usize) T {
|
pub fn getValue(self: *const Self, i: usize) T {
|
||||||
return self.palette[self.data.getValue(i)];
|
return self.palette[self.data.getValue(i)];
|
||||||
}
|
}
|
||||||
@ -1454,7 +1464,7 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
|
|||||||
}
|
}
|
||||||
if(paletteIndex == self.paletteLength) {
|
if(paletteIndex == self.paletteLength) {
|
||||||
if(self.paletteLength == self.palette.len) {
|
if(self.paletteLength == self.palette.len) {
|
||||||
self.data.resize(main.globalAllocator, self.data.bitSize + 1);
|
self.data.resize(main.globalAllocator, getTargetBitSize(self.paletteLength + 1));
|
||||||
self.palette = main.globalAllocator.realloc(self.palette, @as(usize, 1) << self.data.bitSize);
|
self.palette = main.globalAllocator.realloc(self.palette, @as(usize, 1) << self.data.bitSize);
|
||||||
const oldLen = self.paletteOccupancy.len;
|
const oldLen = self.paletteOccupancy.len;
|
||||||
self.paletteOccupancy = main.globalAllocator.realloc(self.paletteOccupancy, @as(usize, 1) << self.data.bitSize);
|
self.paletteOccupancy = main.globalAllocator.realloc(self.paletteOccupancy, @as(usize, 1) << self.data.bitSize);
|
||||||
@ -1513,9 +1523,10 @@ pub fn PaletteCompressedRegion(T: type, size: comptime_int) type { // MARK: Pale
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn optimizeLayout(self: *Self) void {
|
pub fn optimizeLayout(self: *Self) void {
|
||||||
if(std.math.log2_int_ceil(usize, self.palette.len) == std.math.log2_int_ceil(usize, self.activePaletteEntries)) return;
|
const newBitSize = getTargetBitSize(@intCast(self.activePaletteEntries));
|
||||||
|
if(self.data.bitSize == newBitSize) return;
|
||||||
|
|
||||||
var newData = main.utils.DynamicPackedIntArray(size).initCapacity(main.globalAllocator, @intCast(std.math.log2_int_ceil(u32, self.activePaletteEntries)));
|
var newData = main.utils.DynamicPackedIntArray(size).initCapacity(main.globalAllocator, newBitSize);
|
||||||
const paletteMap: []u32 = main.stackAllocator.alloc(u32, self.paletteLength);
|
const paletteMap: []u32 = main.stackAllocator.alloc(u32, self.paletteLength);
|
||||||
defer main.stackAllocator.free(paletteMap);
|
defer main.stackAllocator.free(paletteMap);
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user