/* vim:set ts=2 sw=2 sts=2 et: */ /** * \author Marcus Holland-Moritz (github@mhxnet.de) * \copyright Copyright (c) Marcus Holland-Moritz * * This file is part of dwarfs. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the “Software”), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * SPDX-License-Identifier: MIT */ include "thrift/annotation/cpp.thrift" namespace cpp2 dwarfs.thrift.metadata @cpp.Type{name = "uint8_t"} typedef byte UInt8 @cpp.Type{name = "uint16_t"} typedef i16 UInt16 @cpp.Type{name = "uint32_t"} typedef i32 UInt32 @cpp.Type{name = "uint64_t"} typedef i64 UInt64 /** * One chunk of data * * A single file inode can be composed of multiple chunks, e.g. because * segments can be reused or because a single file spans multiple blocks. * Chunks may be overlapping if there is identical data in different files. * * A chunk is really just a view onto an otherwise unstructured file system * block. */ struct chunk { 1: UInt32 block // file system block number 2: UInt32 offset // offset from start of block, in bytes 3: UInt32 size // size of chunk, in bytes } /** * One directory * * This structure represents the links between directory entries. * The `parent_entry` references the parent directory's `dir_entry`. * The `first_entry` members can be used to access the entries contained * in the directory. * * The range of contained entries is: * * dir_entries[directory[inode].first_entry] * .. * dir_entries[directory[inode + 1].first_entry - 1] * * Note that as of v2.3, directory entries can be stored "packed", in * which case only the `first_entry` fields are populated and stored * delta-compressed. The `first_entry` field must be unpacked before * using and the `parent_entry` and `self_entry` fields must be built * by traversing the `dir_entries` using the unpacked `first_entry` * fields. */ struct directory { 1: UInt32 parent_entry // indexes into `dir_entries` 2: UInt32 first_entry // indexes into `dir_entries` //==========================================================// // fields added with dwarfs-0.11.0, file system version 2.5 // //==========================================================// 3: UInt32 self_entry // indexes into `dir_entries` } /** * Inode Data * * This structure contains all necessary metadata for an inode, such as * its mode (i.e. permissions and inode type), its owner/group and its * timestamps. */ struct inode_data { // index into `metadata.modes[]` 2: UInt32 mode_index // index into `metadata.uids[]` 4: UInt32 owner_index // index into `metadata.gids[]` 5: UInt32 group_index // atime relative to `metadata.timestamp_base` 6: UInt64 atime_offset // mtime relative to `metadata.timestamp_base` 7: UInt64 mtime_offset // ctime relative to `metadata.timestamp_base` 8: UInt64 ctime_offset /** * ================================================================== * NOTE: These fields has been deprecated with filesystem version 2.3 * They are still being used to read older filesystem versions. * They do *not* occupy any space in version 2.3 and above. */ // index into `metadata.names[]` 1: UInt32 name_index_v2_2 // inode number 3: UInt32 inode_v2_2 /* ================================================================== */ } /** * A directory entry * * This structure represents a single directory entry and just combines * a name with an inode number. The inode number can then be used to * look up almost all other metadata. */ struct dir_entry { // index into metadata.names 1: UInt32 name_index // index into metadata.inodes 2: UInt32 inode_num } /** * File system options */ struct fs_options { // file system contains only mtime time stamps 1: bool mtime_only // time base and offsets are stored with this resolution // 1 = seconds, 60 = minutes, 3600 = hours, ... 2: optional UInt32 time_resolution_sec 3: bool packed_chunk_table 4: bool packed_directories 5: bool packed_shared_files_table } /** * An (optionally packed) string table */ struct string_table { // raw buffer containing the concatenation of all individual, // potentially compressed, strings 1: string buffer // symbol table for fsst compression; if fsst is not used, this // will not be set and `buffer` will contain uncompressed strings 2: optional string symtab // the (optionally packed) index; if packed, the index is stored // delta-compressed 3: list index // indicates if the index is packed 4: bool packed_index } /* * For highly fragmented inodes, computing the size from the * individual chunks can be extremely slow. This cache can be * used to bypass the chunk lookup and size computation. */ struct inode_size_cache { // lookup from inode number to size 1: map lookup // minimum number of chunks for a file to be found in the cache, // corresponds to scanner_options.inode_size_cache_min_chunk_count 2: UInt64 min_chunk_count } /* * This structure contains the version of the metadata format used * for tracking metadata rewrite history. */ struct history_entry { // major and minor version numbers corresponding to the block header 1: UInt8 major 2: UInt8 minor // version string of dwarfs library used to create the metadata 3: optional string dwarfs_version 4: UInt32 block_size 5: optional fs_options options } /** * File System Metadata * * This is the root structure for all file system metadata. */ struct metadata { /** * Ranges of chunks that make up regular files. Identical * files share the same chunk range. The range of chunks * for a regular file are: * * chunks[chunk_table[index]] .. chunks[chunk_table[index + 1] - 1] * * Here, `index` is either `inode - file_inode_offset` for * unique file inodes, or for shared file inodes: * * shared_files[inode - file_inode_offset - unique_files] + unique_files * * Note that here `shared_files` is the unpacked version of * `shared_files_table`. */ 1: list chunks /** * All directories, indexed by inode number. There's one extra * sentinel directory at the end that has `first_entry` point to * the end of `dir_entries`, so directory entry lookup work the * same for all directories. * * Note that this list is stored in a packed format as of v2.3 * if `options.packed_directories` is `true` and must be unpacked * before use. See the documentation for the `directory` struct. */ 2: list directories /** * Inode metadata, indexed by inode number. * * Inodes are assigned strictly in the following order: * * - directories, starting with the root dir at inode 0 * - symbolic links * - unique regular files * - shared regular files * - character and block devices * - named pipes and sockets * * The inode type can be determined from its mode, which makes * it possible to find the inode offsets for each distinct type * by a simple binary search. These inode offsets are required * to perform lookups into lists indexed by non-directory inode * numbers. * * The number of shared regular files can be determined from * `shared_files_table`. */ 3: list inodes /** * Chunk lookup table, indexed by `inode - file_inode_offset`. * There's one extra sentinel item at the end that points to the * end of `chunks`, so chunk lookups work the same for all inodes. * * Note that this list is stored delta-compressed as of v2.3 * if `options.packed_chunk_table` is `true` and must be unpacked * before use. */ 4: list chunk_table /** * ========================================================================= * NOTE: This has been deprecated with filesystem version 2.3 * It is still being used to read older filesystem versions. */ 5: list entry_table_v2_2 /* ========================================================================= */ // symlink lookup table, indexed by `inode - symlink_inode_offset` 6: list symlink_table // user ids, for lookup by `inode.owner_index` 7: list uids // group ids, for lookup by `inode.group_index` 8: list gids // inode modes, for lookup by `inode.mode_index` 9: list modes // directory entry names, for lookup by `dir_entry.name_index` 10: list names // symlink targets, for lookup by index from `symlink_table` 11: list symlinks // timestamp base for all inode timestamps 12: UInt64 timestamp_base /************************ DEPRECATED ********************** * * These are redundant and can be determined at run-time * with a simple binary search. Compatibility is not * affected. * * 13: UInt32 chunk_inode_offset; * 14: UInt32 link_inode_offset; * *********************************************************/ // file system block size in bytes 15: UInt32 block_size // total file system size in bytes 16: UInt64 total_fs_size //=========================================================// // fields added with dwarfs-0.3.0, file system version 2.1 // //=========================================================// // device ids, for lookup by `inode - device_inode_offset` 17: optional list devices // file system options 18: optional fs_options options //=========================================================// // fields added with dwarfs-0.5.0, file system version 2.3 // //=========================================================// /** * All directory entries * * Starting with the root directory entry at index 0, this * list contains ranges all directory entries of the file * system. Along with `directories`, this allows traversal * of the full file system structure. * * The ranges of entries that belong to a single directory * are determined by `directory.first_entry`. Within a single * directory, entries are ordered asciibetically by name, * which makes it possible to efficiently find entries using * binary search. */ 19: optional list dir_entries /** * Shared files mapping * * Note that this list is stored in a packed format if * `options.packed_shared_files_table` is `true` and must be * unpacked before use. * * In packed format, it is stored as number of repetitions * per index, offset by 2 (the minimum number of repetitions), * so e.g. a packed list * * [0, 3, 1, 0, 1] * * would unpack to: * * [0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4] * * So the packed 5-element array provides mappings for 15 shared * file inodes. Assuming 10 unique files and a file inode offset * of 10, a regular file inode 25 would be a shared file inode, * and the index for lookup in `chunk_table` would be `10 + 1`. */ 20: optional list shared_files_table // total size of hardlinked files beyond the first link, in bytes 21: optional UInt64 total_hardlink_size // version string 22: optional string dwarfs_version // unix timestamp of metadata creation time 23: optional UInt64 create_timestamp 24: optional string_table compact_names 25: optional string_table compact_symlinks //=========================================================// // fields added with dwarfs-0.7.0, file system version 2.5 // //=========================================================// // preferred path separator of original file system 26: optional UInt32 preferred_path_separator //=========================================================// // fields added with dwarfs-0.7.3, file system version 2.5 // //=========================================================// // We don't need to increment the file system minor version // as file systems created with this new version are still // readable by older binaries as long as they don't use any // unsupported features (e.g. FLAC compression). // The set of features used in this file system image. As long // as an older binary supports all features, it will be able // to use images created with newer versions. We use strings // here instead of an enum so older versions can still output // names of features used by a newer version. 27: optional set features //=========================================================// // fields added with dwarfs-0.8.0, file system version 2.5 // //=========================================================// // The set of categories used in this file system image. Used // for displaying and to select compression algorithms when // recompressing the image. 28: optional list category_names // The category of each block in the file system image. The // index into this vector is the block number and the value // is an index into `category_names`. 29: optional list block_categories //==========================================================// // fields added with dwarfs-0.11.0, file system version 2.5 // //==========================================================// // Size cache for highly fragmented file inodes 30: optional inode_size_cache reg_file_size_cache //==========================================================// // fields added with dwarfs-0.13.0, file system version 2.5 // //==========================================================// // Unique block categorization metadata JSON strings. These // can be used to compress a block with a metadata-dependent // algorithm after having been compressed with a general // purpose algorithm. 31: optional list category_metadata_json // The metadata associated with each block. Maps from block // number to index into `categorization_metadata_json`. 32: optional map block_category_metadata // version strings for all metadata versions 33: optional list metadata_version_history }