mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-03 09:47:01 -04:00
426 lines
14 KiB
Thrift
426 lines
14 KiB
Thrift
/* vim:set ts=2 sw=2 sts=2 et: */
|
|
/**
|
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
|
*
|
|
* This file is part of dwarfs.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the “Software”), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
include "thrift/annotation/cpp.thrift"
|
|
|
|
namespace cpp2 dwarfs.thrift.metadata
|
|
|
|
@cpp.Type{name = "uint8_t"}
|
|
typedef byte UInt8
|
|
@cpp.Type{name = "uint16_t"}
|
|
typedef i16 UInt16
|
|
@cpp.Type{name = "uint32_t"}
|
|
typedef i32 UInt32
|
|
@cpp.Type{name = "uint64_t"}
|
|
typedef i64 UInt64
|
|
|
|
/**
|
|
* One chunk of data
|
|
*
|
|
* A single file inode can be composed of multiple chunks, e.g. because
|
|
* segments can be reused or because a single file spans multiple blocks.
|
|
* Chunks may be overlapping if there is identical data in different files.
|
|
*
|
|
* A chunk is really just a view onto an otherwise unstructured file system
|
|
* block.
|
|
*/
|
|
struct chunk {
|
|
1: UInt32 block // file system block number
|
|
2: UInt32 offset // offset from start of block, in bytes
|
|
3: UInt32 size // size of chunk, in bytes
|
|
}
|
|
|
|
/**
|
|
* One directory
|
|
*
|
|
* This structure represents the links between directory entries.
|
|
* The `parent_entry` references the parent directory's `dir_entry`.
|
|
* The `first_entry` members can be used to access the entries contained
|
|
* in the directory.
|
|
*
|
|
* The range of contained entries is:
|
|
*
|
|
* dir_entries[directory[inode].first_entry]
|
|
* ..
|
|
* dir_entries[directory[inode + 1].first_entry - 1]
|
|
*
|
|
* Note that as of v2.3, directory entries can be stored "packed", in
|
|
* which case only the `first_entry` fields are populated and stored
|
|
* delta-compressed. The `first_entry` field must be unpacked before
|
|
* using and the `parent_entry` and `self_entry` fields must be built
|
|
* by traversing the `dir_entries` using the unpacked `first_entry`
|
|
* fields.
|
|
*/
|
|
struct directory {
|
|
1: UInt32 parent_entry // indexes into `dir_entries`
|
|
|
|
2: UInt32 first_entry // indexes into `dir_entries`
|
|
|
|
//==========================================================//
|
|
// fields added with dwarfs-0.11.0, file system version 2.5 //
|
|
//==========================================================//
|
|
|
|
3: UInt32 self_entry // indexes into `dir_entries`
|
|
}
|
|
|
|
/**
|
|
* Inode Data
|
|
*
|
|
* This structure contains all necessary metadata for an inode, such as
|
|
* its mode (i.e. permissions and inode type), its owner/group and its
|
|
* timestamps.
|
|
*/
|
|
struct inode_data {
|
|
// index into `metadata.modes[]`
|
|
2: UInt32 mode_index
|
|
|
|
// index into `metadata.uids[]`
|
|
4: UInt32 owner_index
|
|
|
|
// index into `metadata.gids[]`
|
|
5: UInt32 group_index
|
|
|
|
// atime relative to `metadata.timestamp_base`
|
|
6: UInt64 atime_offset
|
|
|
|
// mtime relative to `metadata.timestamp_base`
|
|
7: UInt64 mtime_offset
|
|
|
|
// ctime relative to `metadata.timestamp_base`
|
|
8: UInt64 ctime_offset
|
|
|
|
/**
|
|
* ==================================================================
|
|
* NOTE: These fields has been deprecated with filesystem version 2.3
|
|
* They are still being used to read older filesystem versions.
|
|
* They do *not* occupy any space in version 2.3 and above.
|
|
*/
|
|
|
|
// index into `metadata.names[]`
|
|
1: UInt32 name_index_v2_2
|
|
|
|
// inode number
|
|
3: UInt32 inode_v2_2
|
|
|
|
/* ==================================================================
|
|
*/
|
|
}
|
|
|
|
/**
|
|
* A directory entry
|
|
*
|
|
* This structure represents a single directory entry and just combines
|
|
* a name with an inode number. The inode number can then be used to
|
|
* look up almost all other metadata.
|
|
*/
|
|
struct dir_entry {
|
|
// index into metadata.names
|
|
1: UInt32 name_index
|
|
|
|
// index into metadata.inodes
|
|
2: UInt32 inode_num
|
|
}
|
|
|
|
/**
|
|
* File system options
|
|
*/
|
|
struct fs_options {
|
|
// file system contains only mtime time stamps
|
|
1: bool mtime_only
|
|
|
|
// time base and offsets are stored with this resolution
|
|
// 1 = seconds, 60 = minutes, 3600 = hours, ...
|
|
2: optional UInt32 time_resolution_sec
|
|
|
|
3: bool packed_chunk_table
|
|
4: bool packed_directories
|
|
5: bool packed_shared_files_table
|
|
}
|
|
|
|
/**
|
|
* An (optionally packed) string table
|
|
*/
|
|
struct string_table {
|
|
// raw buffer containing the concatenation of all individual,
|
|
// potentially compressed, strings
|
|
1: string buffer
|
|
|
|
// symbol table for fsst compression; if fsst is not used, this
|
|
// will not be set and `buffer` will contain uncompressed strings
|
|
2: optional string symtab
|
|
|
|
// the (optionally packed) index; if packed, the index is stored
|
|
// delta-compressed
|
|
3: list<UInt32> index
|
|
|
|
// indicates if the index is packed
|
|
4: bool packed_index
|
|
}
|
|
|
|
/*
|
|
* For highly fragmented inodes, computing the size from the
|
|
* individual chunks can be extremely slow. This cache can be
|
|
* used to bypass the chunk lookup and size computation.
|
|
*/
|
|
struct inode_size_cache {
|
|
// lookup from inode number to size
|
|
1: map<UInt32, UInt64> lookup
|
|
|
|
// minimum number of chunks for a file to be found in the cache,
|
|
// corresponds to scanner_options.inode_size_cache_min_chunk_count
|
|
2: UInt64 min_chunk_count
|
|
}
|
|
|
|
/**
|
|
* File System Metadata
|
|
*
|
|
* This is the root structure for all file system metadata.
|
|
*/
|
|
struct metadata {
|
|
/**
|
|
* Ranges of chunks that make up regular files. Identical
|
|
* files share the same chunk range. The range of chunks
|
|
* for a regular file are:
|
|
*
|
|
* chunks[chunk_table[index]] .. chunks[chunk_table[index + 1] - 1]
|
|
*
|
|
* Here, `index` is either `inode - file_inode_offset` for
|
|
* unique file inodes, or for shared file inodes:
|
|
*
|
|
* shared_files[inode - file_inode_offset - unique_files] + unique_files
|
|
*
|
|
* Note that here `shared_files` is the unpacked version of
|
|
* `shared_files_table`.
|
|
*/
|
|
1: list<chunk> chunks
|
|
|
|
/**
|
|
* All directories, indexed by inode number. There's one extra
|
|
* sentinel directory at the end that has `first_entry` point to
|
|
* the end of `dir_entries`, so directory entry lookup work the
|
|
* same for all directories.
|
|
*
|
|
* Note that this list is stored in a packed format as of v2.3
|
|
* if `options.packed_directories` is `true` and must be unpacked
|
|
* before use. See the documentation for the `directory` struct.
|
|
*/
|
|
2: list<directory> directories
|
|
|
|
/**
|
|
* Inode metadata, indexed by inode number.
|
|
*
|
|
* Inodes are assigned strictly in the following order:
|
|
*
|
|
* - directories, starting with the root dir at inode 0
|
|
* - symbolic links
|
|
* - unique regular files
|
|
* - shared regular files
|
|
* - character and block devices
|
|
* - named pipes and sockets
|
|
*
|
|
* The inode type can be determined from its mode, which makes
|
|
* it possible to find the inode offsets for each distinct type
|
|
* by a simple binary search. These inode offsets are required
|
|
* to perform lookups into lists indexed by non-directory inode
|
|
* numbers.
|
|
*
|
|
* The number of shared regular files can be determined from
|
|
* `shared_files_table`.
|
|
*/
|
|
3: list<inode_data> inodes
|
|
|
|
/**
|
|
* Chunk lookup table, indexed by `inode - file_inode_offset`.
|
|
* There's one extra sentinel item at the end that points to the
|
|
* end of `chunks`, so chunk lookups work the same for all inodes.
|
|
*
|
|
* Note that this list is stored delta-compressed as of v2.3
|
|
* if `options.packed_chunk_table` is `true` and must be unpacked
|
|
* before use.
|
|
*/
|
|
4: list<UInt32> chunk_table
|
|
|
|
/**
|
|
* =========================================================================
|
|
* NOTE: This has been deprecated with filesystem version 2.3
|
|
* It is still being used to read older filesystem versions.
|
|
*/
|
|
5: list<UInt32> entry_table_v2_2
|
|
/* =========================================================================
|
|
*/
|
|
|
|
// symlink lookup table, indexed by `inode - symlink_inode_offset`
|
|
6: list<UInt32> symlink_table
|
|
|
|
// user ids, for lookup by `inode.owner_index`
|
|
7: list<UInt32> uids
|
|
|
|
// group ids, for lookup by `inode.group_index`
|
|
8: list<UInt32> gids
|
|
|
|
// inode modes, for lookup by `inode.mode_index`
|
|
9: list<UInt32> modes
|
|
|
|
// directory entry names, for lookup by `dir_entry.name_index`
|
|
10: list<string> names
|
|
|
|
// symlink targets, for lookup by index from `symlink_table`
|
|
11: list<string> symlinks
|
|
|
|
// timestamp base for all inode timestamps
|
|
12: UInt64 timestamp_base
|
|
|
|
/************************ DEPRECATED **********************
|
|
*
|
|
* These are redundant and can be determined at run-time
|
|
* with a simple binary search. Compatibility is not
|
|
* affected.
|
|
*
|
|
* 13: UInt32 chunk_inode_offset;
|
|
* 14: UInt32 link_inode_offset;
|
|
*
|
|
*********************************************************/
|
|
|
|
// file system block size in bytes
|
|
15: UInt32 block_size
|
|
|
|
// total file system size in bytes
|
|
16: UInt64 total_fs_size
|
|
|
|
//=========================================================//
|
|
// fields added with dwarfs-0.3.0, file system version 2.1 //
|
|
//=========================================================//
|
|
|
|
// device ids, for lookup by `inode - device_inode_offset`
|
|
17: optional list<UInt64> devices
|
|
|
|
// file system options
|
|
18: optional fs_options options
|
|
|
|
//=========================================================//
|
|
// fields added with dwarfs-0.5.0, file system version 2.3 //
|
|
//=========================================================//
|
|
|
|
/**
|
|
* All directory entries
|
|
*
|
|
* Starting with the root directory entry at index 0, this
|
|
* list contains ranges all directory entries of the file
|
|
* system. Along with `directories`, this allows traversal
|
|
* of the full file system structure.
|
|
*
|
|
* The ranges of entries that belong to a single directory
|
|
* are determined by `directory.first_entry`. Within a single
|
|
* directory, entries are ordered asciibetically by name,
|
|
* which makes it possible to efficiently find entries using
|
|
* binary search.
|
|
*/
|
|
19: optional list<dir_entry> dir_entries
|
|
|
|
/**
|
|
* Shared files mapping
|
|
*
|
|
* Note that this list is stored in a packed format if
|
|
* `options.packed_shared_files_table` is `true` and must be
|
|
* unpacked before use.
|
|
*
|
|
* In packed format, it is stored as number of repetitions
|
|
* per index, offset by 2 (the minimum number of repetitions),
|
|
* so e.g. a packed list
|
|
*
|
|
* [0, 3, 1, 0, 1]
|
|
*
|
|
* would unpack to:
|
|
*
|
|
* [0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4]
|
|
*
|
|
* So the packed 5-element array provides mappings for 15 shared
|
|
* file inodes. Assuming 10 unique files and a file inode offset
|
|
* of 10, a regular file inode 25 would be a shared file inode,
|
|
* and the index for lookup in `chunk_table` would be `10 + 1`.
|
|
*/
|
|
20: optional list<UInt32> shared_files_table
|
|
|
|
// total size of hardlinked files beyond the first link, in bytes
|
|
21: optional UInt64 total_hardlink_size
|
|
|
|
// version string
|
|
22: optional string dwarfs_version
|
|
|
|
// unix timestamp of metadata creation time
|
|
23: optional UInt64 create_timestamp
|
|
|
|
24: optional string_table compact_names
|
|
|
|
25: optional string_table compact_symlinks
|
|
|
|
//=========================================================//
|
|
// fields added with dwarfs-0.7.0, file system version 2.5 //
|
|
//=========================================================//
|
|
|
|
// preferred path separator of original file system
|
|
26: optional UInt32 preferred_path_separator
|
|
|
|
//=========================================================//
|
|
// fields added with dwarfs-0.7.3, file system version 2.5 //
|
|
//=========================================================//
|
|
|
|
// We don't need to increment the file system minor version
|
|
// as file systems created with this new version are still
|
|
// readable by older binaries as long as they don't use any
|
|
// unsupported features (e.g. FLAC compression).
|
|
|
|
// The set of features used in this file system image. As long
|
|
// as an older binary supports all features, it will be able
|
|
// to use images created with newer versions. We use strings
|
|
// here instead of an enum so older versions can still output
|
|
// names of features used by a newer version.
|
|
27: optional set<string> features
|
|
|
|
//=========================================================//
|
|
// fields added with dwarfs-0.8.0, file system version 2.5 //
|
|
//=========================================================//
|
|
|
|
// The set of categories used in this file system image. Used
|
|
// for displaying and to select compression algorithms when
|
|
// recompressing the image.
|
|
28: optional list<string> category_names
|
|
|
|
// The category of each block in the file system image. The
|
|
// index into this vector is the block number and the value
|
|
// is an index into `category_names`.
|
|
29: optional list<UInt32> block_categories
|
|
|
|
//==========================================================//
|
|
// fields added with dwarfs-0.11.0, file system version 2.5 //
|
|
//==========================================================//
|
|
|
|
// Size cache for highly fragmented file inodes
|
|
30: optional inode_size_cache reg_file_size_cache
|
|
}
|