dwarfs/scripts/extract_blocks.py
Marcus Holland-Moritz 8ed2fceba3 Fix extract_blocks.py
2022-10-29 18:53:52 +02:00

67 lines
1.8 KiB
Python
Executable File

#!/bin/env python3
"""
A very rudimentary DwarFS image parser
This currently doesn't do very much apart from extracting the block data
(*without* headers) and writing the blocks to individual files.
Note that this *cannot* currently handle DwarFS image offsets.
The main idea behind this script is to allow experimenting with different
compression algorithms without having to have full-fledged integration
into DwarFS. Essentially, this allows you to individually compress the
blocks externally and see which algorithms/configurations work best.
"""
import sys
from struct import unpack
if len(sys.argv) != 3:
print(f"USAGE: {__file__} <image> <basename>")
raise SystemExit(1)
sectypes = {
0: "block",
7: "schema",
8: "metadata",
9: "index",
}
compalgs = {
0: "none",
1: "lzma",
2: "zstd",
3: "lz4",
4: "lz4hc",
}
_, image, basename = sys.argv
with open(image, "rb") as image:
while True:
header = image.read(0x40)
if len(header) < 0x40:
break
ident, major, minor, sha512, xxh3, secno, sectype, compalg, blocklen = unpack(
"<6sBB32s8sLHHQ", header
)
if ident != b"DWARFS":
print(f"error: expected dwarfs header")
raise SystemExit(1)
if sectype not in sectypes:
print(f"error: unexpected section type ({sectype})")
raise SystemExit(1)
if compalg not in compalgs:
print(f"error: unexpected compression algorithm ({compalg})")
raise SystemExit(1)
print(
f"{ident.decode('ascii')} v{major}.{minor} [{secno}] {sectypes[sectype]} ({compalgs[compalg]}) {blocklen} bytes"
)
block = image.read(blocklen)
with open(f"{basename}{sectypes[sectype]}{secno}", "wb") as out:
out.write(block)