From 11589d04f07922fda494532dd0a73d35226b0eea Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Thu, 18 Mar 2021 12:34:44 +0100 Subject: [PATCH] Properly document the thrift definitions --- thrift/metadata.thrift | 244 +++++++++++++++++++++++++---------------- 1 file changed, 150 insertions(+), 94 deletions(-) diff --git a/thrift/metadata.thrift b/thrift/metadata.thrift index ff4829dd..35942ead 100644 --- a/thrift/metadata.thrift +++ b/thrift/metadata.thrift @@ -26,91 +26,101 @@ typedef i32 (cpp2.type = "uint32_t") UInt32 typedef i64 (cpp2.type = "uint64_t") UInt64 /** - * One chunk of data. A single file can be composed of multiple - * chunks. Chunks may be overlapping if there is identical data - * in different files. + * One chunk of data + * + * A single file inode can be composed of multiple chunks, e.g. because + * segments can be reused or because a single file spans multiple blocks. + * Chunks may be overlapping if there is identical data in different files. + * + * A chunk is really just a view onto an otherwise unstructured file system + * block. */ struct chunk { - 1: required UInt32 block, - 2: required UInt32 offset, - 3: required UInt32 size, + 1: required UInt32 block, // file system block number + 2: required UInt32 offset, // offset from start of block, in bytes + 3: required UInt32 size, // size of chunk, in bytes } /** - * One directory. This contains only a link to its parent inode - * and a range of `entry` objects that can be looked up in - * `metadata.entries`. + * One directory + * + * This structure represents the links between directory entries. + * The `parent_entry` references the parent directory's `dir_entry`. + * The `first_entry` members can be used to access the entries contained + * in the directory. + * + * The range of contained entries is: + * + * dir_entries[directory[inode].first_entry] + * .. + * dir_entries[directory[inode + 1].first_entry - 1] */ struct directory { - 1: required UInt32 parent_entry, // indexes into dir_entries - 2: required UInt32 first_entry, // indexes into dir_entries + 1: required UInt32 parent_entry, // indexes into `dir_entries` + 2: required UInt32 first_entry, // indexes into `dir_entries` } /** - * One entry. This can be files, directories or links. This is - * by far the most common metadata object type, so it has been - * optimized for size. + * Inode Data + * + * This structure contains all necessary metadata for an inode, such as + * its mode (i.e. permissions and inode type), its owner/group and its + * timestamps. */ struct inode_data { - /** - * ========================================================================= - * NOTE: This has been deprecated with filesystem version 2.3 (DwarFS 0.5.0) - * It is still being used to read older filesystem versions. - * ========================================================================= - */ - // index into metadata.names - 1: required UInt32 name_index_v2_2, - - // index into metadata.modes + // index into `metadata.modes[]` 2: required UInt16 mode_index, - /** - * Inode number. Can be used in different ways: - * - * - For directories, the inode can be used as an index into - * metadata.directories. - * - For links, (inode - link_index_offset) can be - * used as an index into metadata.links. - * - For files, (inode - chunk_index_offset) can be - * used as in index into metadata.chunk_table. - */ - 3: required UInt32 inode_v2_2, - - //-------------------------------------------------------------------------- - // TODO: actually, the inode field is redundant as of v2.3, as entries are - // ordered by inode already; maybe we can drop this? - // - // we definitely need it for files to point into chunk_table - //-------------------------------------------------------------------------- - - // index into metadata.uids + // index into `metadata.uids[]` 4: required UInt16 owner_index, - // index into metadata.gids + // index into `metadata.gids[]` 5: required UInt16 group_index, - // atime relative to metadata.timestamp_base + // atime relative to `metadata.timestamp_base` 6: required UInt64 atime_offset, - // mtime relative to metadata.timestamp_base + // mtime relative to `metadata.timestamp_base` 7: required UInt64 mtime_offset, - // ctime relative to metadata.timestamp_base + // ctime relative to `metadata.timestamp_base` 8: required UInt64 ctime_offset, + + /** + * ================================================================== + * NOTE: These fields has been deprecated with filesystem version 2.3 + * They are still being used to read older filesystem versions. + * They do *not* occupy any space in version 2.3 and above. + */ + + // index into `metadata.names[]` + 1: required UInt32 name_index_v2_2, + + // inode number + 3: required UInt32 inode_v2_2, + + /* ================================================================== + */ } -////// -////// entries can now be stored in inode-order (we don't need old_entry_table any more :-) -////// - -struct dir_entry { ///// <--------- or entry? +/** + * A directory entry + * + * This structure represents a single directory entry and just combines + * a name with an inode number. The inode number can then be used to + * look up almost all other metadata. + */ +struct dir_entry { // index into metadata.names 1: required UInt32 name_index, // index into metadata.entries - 2: required UInt32 inode_num, ///// <--------- entries (inodes) are shared for hardlinks + 2: required UInt32 inode_num, } +/** + * File system options + */ struct fs_options { // file system contains only mtime time stamps 1: required bool mtime_only, @@ -120,75 +130,95 @@ struct fs_options { 2: optional UInt32 time_resolution_sec, } +/** + * File System Metadata + * + * This is the root structure for all file system metadata. + */ struct metadata { /** * Ranges of chunks that make up regular files. Identical - * files share the same inode number. The range of chunks - * for a regular file inode are: + * files share the same chunk range. The range of chunks + * for a regular file are: * - * chunks[chunk_table[inode]] .. chunks[chunk_table[inode + 1] - 1] + * chunks[chunk_table[index]] .. chunks[chunk_table[index + 1] - 1] + * + * Here, `index` is either `inode - file_inode_offset` for + * unique file inodes, or for shared file inodes: + * + * shared_files[inode - file_inode_offset - unique_files] + unique_files + * + * Note that here `shared_files` is the unpacked version of + * `shared_files_table`. */ 1: required list chunks, /** * All directories, indexed by inode number. There's one extra - * dummy directory at the end whose `first_entry` point to the - * end of `entries`, so that directory entry lookup work the + * sentinel directory at the end that has `first_entry` point to + * the end of `dir_entries`, so directory entry lookup work the * same for all directories. */ 2: required list directories, /** - * All entries, can be looked up by inode through entry_table_v2_2, or by - * directory through `first_entry`, where the entries will be between - * `directories[n].first_entry` and `directories[n+1].first_entry`. + * Inode metadata, indexed by inode number. + * + * Inodes are assigned strictly in the following order: + * + * - directories, starting with the root dir at inode 0 + * - symbolic links + * - unique regular files + * - shared regular files + * - character and block devices + * - named pipes and sockets + * + * The inode type can be determined from its mode, which makes + * it possible to find the inode offsets for each distinct type + * by a simple binary search. These inode offsets are required + * to perform lookups into lists indexed by non-directory inode + * numbers. + * + * The number of shared regular files can be determined from + * `shared_files_table`. */ 3: required list inodes, /** - * Chunk lookup table, indexed by (inode - chunk_index_offset). - * There's one extra dummy item at the end that points to the + * Chunk lookup table, indexed by `inode - file_inode_offset`. + * There's one extra sentinel item at the end that points to the * end of `chunks`, so chunk lookups work the same for all inodes. */ 4: required list chunk_table, /** * ========================================================================= - * NOTE: This has been deprecated with filesystem version 2.3 (DwarFS 0.5.0) + * NOTE: This has been deprecated with filesystem version 2.3 * It is still being used to read older filesystem versions. - * ========================================================================= - * - * Entry lookup table, indexed by inode - * - * This list contains all inodes strictly in the following order: - * - * - directories, starting with the root dir at inode 0 - * - symbolic links - * - regular files - * - character and block devices - * - named pipes and sockets */ - 5: required list entry_table_v2_2, ///// <------------ deprecate (see above) + 5: required list entry_table_v2_2, + /* ========================================================================= + */ - // symlink lookup table, indexed by (inode - symlink_table_offset) + // symlink lookup table, indexed by `inode - symlink_inode_offset` 6: required list symlink_table, - // user ids, for lookup by index in entry.owner + // user ids, for lookup by `inode.owner_index` 7: required list uids, - // group ids, for lookup by index in entry.group + // group ids, for lookup by `inode.group_index` 8: required list gids, - // entry modes, for lookup by index in entry.mode + // inode modes, for lookup by `inode.mode_index` 9: required list modes, - // entry names, for lookup by index in entry.name_index + // directory entry names, for lookup by `dir_entry.name_index` 10: required list names, - // link targets, for lookup by index from symlink_table + // symlink targets, for lookup by index from `symlink_table` 11: required list symlinks, - // timestamp base for all entry timestamps + // timestamp base for all inode timestamps 12: required UInt64 timestamp_base, /************************ DEPRECATED ********************** @@ -197,22 +227,22 @@ struct metadata { * with a simple binary search. Compatibility is not * affected. * - * 13: required UInt32 chunk_index_offset; - * 14: required UInt32 link_index_offset; + * 13: required UInt32 chunk_inode_offset; + * 14: required UInt32 link_inode_offset; * *********************************************************/ - // block size + // file system block size in bytes 15: required UInt32 block_size, - // total file system size + // total file system size in bytes 16: required UInt64 total_fs_size, //=========================================================// // fields added with dwarfs-0.3.0, file system version 2.1 // //=========================================================// - // device ids, for lookup by (inode - device_index_offset) + // device ids, for lookup by `inode - device_inode_offset` 17: optional list devices, // file system options @@ -223,21 +253,47 @@ struct metadata { //=========================================================// /** - * TODO TODO TODO describe this + * All directory entries + * + * Starting with the root directory entry at index 0, this + * list contains ranges all directory entries of the file + * system. Along with `directories`, this allows traversal + * of the full file system structure. + * + * The ranges of entries that belong to a single directory + * are determined by `directory.first_entry`. Within a single + * directory, entries are ordered asciibetically by name, + * which makes it possible to efficiently find entries using + * binary search. */ 19: optional list dir_entries, /** - * Maps from file inode to chunk_table index + * Shared files mapping + * + * Note that this table cannot be used directly and must first + * be unpacked. It is stored as number of repetitions per index, + * offset by 2 (the minimum number of repetitions), so e.g. + * + * [0, 3, 1, 0, 1] + * + * would unpack to: + * + * [0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4] + * + * So the packed 5-element array provides mappings for 15 shared + * file inodes. Assuming 10 unique files and a file inode offset + * of 10, a regular file inode 25 would be a shared file inode, + * and the index for lookup in `chunk_table` would be `10 + 1`. */ 20: optional list shared_files_table, - // total file system size (without hardlinks) + // total size of hardlinked files beyond the first link, in bytes 21: optional UInt64 total_hardlink_size, - // version + // version string 22: optional string dwarfs_version, - // timestamp + // unix timestamp of metadata creation time 23: optional UInt64 create_timestamp, }