Properly document the thrift definitions

This commit is contained in:
Marcus Holland-Moritz 2021-03-18 12:34:44 +01:00
parent 4e09098727
commit 11589d04f0

View File

@ -26,91 +26,101 @@ typedef i32 (cpp2.type = "uint32_t") UInt32
typedef i64 (cpp2.type = "uint64_t") UInt64 typedef i64 (cpp2.type = "uint64_t") UInt64
/** /**
* One chunk of data. A single file can be composed of multiple * One chunk of data
* chunks. Chunks may be overlapping if there is identical data *
* in different files. * A single file inode can be composed of multiple chunks, e.g. because
* segments can be reused or because a single file spans multiple blocks.
* Chunks may be overlapping if there is identical data in different files.
*
* A chunk is really just a view onto an otherwise unstructured file system
* block.
*/ */
struct chunk { struct chunk {
1: required UInt32 block, 1: required UInt32 block, // file system block number
2: required UInt32 offset, 2: required UInt32 offset, // offset from start of block, in bytes
3: required UInt32 size, 3: required UInt32 size, // size of chunk, in bytes
} }
/** /**
* One directory. This contains only a link to its parent inode * One directory
* and a range of `entry` objects that can be looked up in *
* `metadata.entries`. * This structure represents the links between directory entries.
* The `parent_entry` references the parent directory's `dir_entry`.
* The `first_entry` members can be used to access the entries contained
* in the directory.
*
* The range of contained entries is:
*
* dir_entries[directory[inode].first_entry]
* ..
* dir_entries[directory[inode + 1].first_entry - 1]
*/ */
struct directory { struct directory {
1: required UInt32 parent_entry, // indexes into dir_entries 1: required UInt32 parent_entry, // indexes into `dir_entries`
2: required UInt32 first_entry, // indexes into dir_entries 2: required UInt32 first_entry, // indexes into `dir_entries`
} }
/** /**
* One entry. This can be files, directories or links. This is * Inode Data
* by far the most common metadata object type, so it has been *
* optimized for size. * This structure contains all necessary metadata for an inode, such as
* its mode (i.e. permissions and inode type), its owner/group and its
* timestamps.
*/ */
struct inode_data { struct inode_data {
/** // index into `metadata.modes[]`
* =========================================================================
* NOTE: This has been deprecated with filesystem version 2.3 (DwarFS 0.5.0)
* It is still being used to read older filesystem versions.
* =========================================================================
*/
// index into metadata.names
1: required UInt32 name_index_v2_2,
// index into metadata.modes
2: required UInt16 mode_index, 2: required UInt16 mode_index,
/** // index into `metadata.uids[]`
* Inode number. Can be used in different ways:
*
* - For directories, the inode can be used as an index into
* metadata.directories.
* - For links, (inode - link_index_offset) can be
* used as an index into metadata.links.
* - For files, (inode - chunk_index_offset) can be
* used as in index into metadata.chunk_table.
*/
3: required UInt32 inode_v2_2,
//--------------------------------------------------------------------------
// TODO: actually, the inode field is redundant as of v2.3, as entries are
// ordered by inode already; maybe we can drop this?
//
// we definitely need it for files to point into chunk_table
//--------------------------------------------------------------------------
// index into metadata.uids
4: required UInt16 owner_index, 4: required UInt16 owner_index,
// index into metadata.gids // index into `metadata.gids[]`
5: required UInt16 group_index, 5: required UInt16 group_index,
// atime relative to metadata.timestamp_base // atime relative to `metadata.timestamp_base`
6: required UInt64 atime_offset, 6: required UInt64 atime_offset,
// mtime relative to metadata.timestamp_base // mtime relative to `metadata.timestamp_base`
7: required UInt64 mtime_offset, 7: required UInt64 mtime_offset,
// ctime relative to metadata.timestamp_base // ctime relative to `metadata.timestamp_base`
8: required UInt64 ctime_offset, 8: required UInt64 ctime_offset,
/**
* ==================================================================
* NOTE: These fields has been deprecated with filesystem version 2.3
* They are still being used to read older filesystem versions.
* They do *not* occupy any space in version 2.3 and above.
*/
// index into `metadata.names[]`
1: required UInt32 name_index_v2_2,
// inode number
3: required UInt32 inode_v2_2,
/* ==================================================================
*/
} }
////// /**
////// entries can now be stored in inode-order (we don't need old_entry_table any more :-) * A directory entry
////// *
* This structure represents a single directory entry and just combines
struct dir_entry { ///// <--------- or entry? * a name with an inode number. The inode number can then be used to
* look up almost all other metadata.
*/
struct dir_entry {
// index into metadata.names // index into metadata.names
1: required UInt32 name_index, 1: required UInt32 name_index,
// index into metadata.entries // index into metadata.entries
2: required UInt32 inode_num, ///// <--------- entries (inodes) are shared for hardlinks 2: required UInt32 inode_num,
} }
/**
* File system options
*/
struct fs_options { struct fs_options {
// file system contains only mtime time stamps // file system contains only mtime time stamps
1: required bool mtime_only, 1: required bool mtime_only,
@ -120,75 +130,95 @@ struct fs_options {
2: optional UInt32 time_resolution_sec, 2: optional UInt32 time_resolution_sec,
} }
/**
* File System Metadata
*
* This is the root structure for all file system metadata.
*/
struct metadata { struct metadata {
/** /**
* Ranges of chunks that make up regular files. Identical * Ranges of chunks that make up regular files. Identical
* files share the same inode number. The range of chunks * files share the same chunk range. The range of chunks
* for a regular file inode are: * for a regular file are:
* *
* chunks[chunk_table[inode]] .. chunks[chunk_table[inode + 1] - 1] * chunks[chunk_table[index]] .. chunks[chunk_table[index + 1] - 1]
*
* Here, `index` is either `inode - file_inode_offset` for
* unique file inodes, or for shared file inodes:
*
* shared_files[inode - file_inode_offset - unique_files] + unique_files
*
* Note that here `shared_files` is the unpacked version of
* `shared_files_table`.
*/ */
1: required list<chunk> chunks, 1: required list<chunk> chunks,
/** /**
* All directories, indexed by inode number. There's one extra * All directories, indexed by inode number. There's one extra
* dummy directory at the end whose `first_entry` point to the * sentinel directory at the end that has `first_entry` point to
* end of `entries`, so that directory entry lookup work the * the end of `dir_entries`, so directory entry lookup work the
* same for all directories. * same for all directories.
*/ */
2: required list<directory> directories, 2: required list<directory> directories,
/** /**
* All entries, can be looked up by inode through entry_table_v2_2, or by * Inode metadata, indexed by inode number.
* directory through `first_entry`, where the entries will be between *
* `directories[n].first_entry` and `directories[n+1].first_entry`. * Inodes are assigned strictly in the following order:
*
* - directories, starting with the root dir at inode 0
* - symbolic links
* - unique regular files
* - shared regular files
* - character and block devices
* - named pipes and sockets
*
* The inode type can be determined from its mode, which makes
* it possible to find the inode offsets for each distinct type
* by a simple binary search. These inode offsets are required
* to perform lookups into lists indexed by non-directory inode
* numbers.
*
* The number of shared regular files can be determined from
* `shared_files_table`.
*/ */
3: required list<inode_data> inodes, 3: required list<inode_data> inodes,
/** /**
* Chunk lookup table, indexed by (inode - chunk_index_offset). * Chunk lookup table, indexed by `inode - file_inode_offset`.
* There's one extra dummy item at the end that points to the * There's one extra sentinel item at the end that points to the
* end of `chunks`, so chunk lookups work the same for all inodes. * end of `chunks`, so chunk lookups work the same for all inodes.
*/ */
4: required list<UInt32> chunk_table, 4: required list<UInt32> chunk_table,
/** /**
* ========================================================================= * =========================================================================
* NOTE: This has been deprecated with filesystem version 2.3 (DwarFS 0.5.0) * NOTE: This has been deprecated with filesystem version 2.3
* It is still being used to read older filesystem versions. * It is still being used to read older filesystem versions.
* =========================================================================
*
* Entry lookup table, indexed by inode
*
* This list contains all inodes strictly in the following order:
*
* - directories, starting with the root dir at inode 0
* - symbolic links
* - regular files
* - character and block devices
* - named pipes and sockets
*/ */
5: required list<UInt32> entry_table_v2_2, ///// <------------ deprecate (see above) 5: required list<UInt32> entry_table_v2_2,
/* =========================================================================
*/
// symlink lookup table, indexed by (inode - symlink_table_offset) // symlink lookup table, indexed by `inode - symlink_inode_offset`
6: required list<UInt32> symlink_table, 6: required list<UInt32> symlink_table,
// user ids, for lookup by index in entry.owner // user ids, for lookup by `inode.owner_index`
7: required list<UInt16> uids, 7: required list<UInt16> uids,
// group ids, for lookup by index in entry.group // group ids, for lookup by `inode.group_index`
8: required list<UInt16> gids, 8: required list<UInt16> gids,
// entry modes, for lookup by index in entry.mode // inode modes, for lookup by `inode.mode_index`
9: required list<UInt16> modes, 9: required list<UInt16> modes,
// entry names, for lookup by index in entry.name_index // directory entry names, for lookup by `dir_entry.name_index`
10: required list<string> names, 10: required list<string> names,
// link targets, for lookup by index from symlink_table // symlink targets, for lookup by index from `symlink_table`
11: required list<string> symlinks, 11: required list<string> symlinks,
// timestamp base for all entry timestamps // timestamp base for all inode timestamps
12: required UInt64 timestamp_base, 12: required UInt64 timestamp_base,
/************************ DEPRECATED ********************** /************************ DEPRECATED **********************
@ -197,22 +227,22 @@ struct metadata {
* with a simple binary search. Compatibility is not * with a simple binary search. Compatibility is not
* affected. * affected.
* *
* 13: required UInt32 chunk_index_offset; * 13: required UInt32 chunk_inode_offset;
* 14: required UInt32 link_index_offset; * 14: required UInt32 link_inode_offset;
* *
*********************************************************/ *********************************************************/
// block size // file system block size in bytes
15: required UInt32 block_size, 15: required UInt32 block_size,
// total file system size // total file system size in bytes
16: required UInt64 total_fs_size, 16: required UInt64 total_fs_size,
//=========================================================// //=========================================================//
// fields added with dwarfs-0.3.0, file system version 2.1 // // fields added with dwarfs-0.3.0, file system version 2.1 //
//=========================================================// //=========================================================//
// device ids, for lookup by (inode - device_index_offset) // device ids, for lookup by `inode - device_inode_offset`
17: optional list<UInt64> devices, 17: optional list<UInt64> devices,
// file system options // file system options
@ -223,21 +253,47 @@ struct metadata {
//=========================================================// //=========================================================//
/** /**
* TODO TODO TODO describe this * All directory entries
*
* Starting with the root directory entry at index 0, this
* list contains ranges all directory entries of the file
* system. Along with `directories`, this allows traversal
* of the full file system structure.
*
* The ranges of entries that belong to a single directory
* are determined by `directory.first_entry`. Within a single
* directory, entries are ordered asciibetically by name,
* which makes it possible to efficiently find entries using
* binary search.
*/ */
19: optional list<dir_entry> dir_entries, 19: optional list<dir_entry> dir_entries,
/** /**
* Maps from file inode to chunk_table index * Shared files mapping
*
* Note that this table cannot be used directly and must first
* be unpacked. It is stored as number of repetitions per index,
* offset by 2 (the minimum number of repetitions), so e.g.
*
* [0, 3, 1, 0, 1]
*
* would unpack to:
*
* [0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4]
*
* So the packed 5-element array provides mappings for 15 shared
* file inodes. Assuming 10 unique files and a file inode offset
* of 10, a regular file inode 25 would be a shared file inode,
* and the index for lookup in `chunk_table` would be `10 + 1`.
*/ */
20: optional list<UInt32> shared_files_table, 20: optional list<UInt32> shared_files_table,
// total file system size (without hardlinks) // total size of hardlinked files beyond the first link, in bytes
21: optional UInt64 total_hardlink_size, 21: optional UInt64 total_hardlink_size,
// version // version string
22: optional string dwarfs_version, 22: optional string dwarfs_version,
// timestamp // unix timestamp of metadata creation time
23: optional UInt64 create_timestamp, 23: optional UInt64 create_timestamp,
} }