Properly document the thrift definitions

This commit is contained in:
Marcus Holland-Moritz 2021-03-18 12:34:44 +01:00
parent 4e09098727
commit 11589d04f0

View File

@ -26,91 +26,101 @@ typedef i32 (cpp2.type = "uint32_t") UInt32
typedef i64 (cpp2.type = "uint64_t") UInt64
/**
* One chunk of data. A single file can be composed of multiple
* chunks. Chunks may be overlapping if there is identical data
* in different files.
* One chunk of data
*
* A single file inode can be composed of multiple chunks, e.g. because
* segments can be reused or because a single file spans multiple blocks.
* Chunks may be overlapping if there is identical data in different files.
*
* A chunk is really just a view onto an otherwise unstructured file system
* block.
*/
struct chunk {
1: required UInt32 block,
2: required UInt32 offset,
3: required UInt32 size,
1: required UInt32 block, // file system block number
2: required UInt32 offset, // offset from start of block, in bytes
3: required UInt32 size, // size of chunk, in bytes
}
/**
* One directory. This contains only a link to its parent inode
* and a range of `entry` objects that can be looked up in
* `metadata.entries`.
* One directory
*
* This structure represents the links between directory entries.
* The `parent_entry` references the parent directory's `dir_entry`.
* The `first_entry` members can be used to access the entries contained
* in the directory.
*
* The range of contained entries is:
*
* dir_entries[directory[inode].first_entry]
* ..
* dir_entries[directory[inode + 1].first_entry - 1]
*/
struct directory {
1: required UInt32 parent_entry, // indexes into dir_entries
2: required UInt32 first_entry, // indexes into dir_entries
1: required UInt32 parent_entry, // indexes into `dir_entries`
2: required UInt32 first_entry, // indexes into `dir_entries`
}
/**
* One entry. This can be files, directories or links. This is
* by far the most common metadata object type, so it has been
* optimized for size.
* Inode Data
*
* This structure contains all necessary metadata for an inode, such as
* its mode (i.e. permissions and inode type), its owner/group and its
* timestamps.
*/
struct inode_data {
/**
* =========================================================================
* NOTE: This has been deprecated with filesystem version 2.3 (DwarFS 0.5.0)
* It is still being used to read older filesystem versions.
* =========================================================================
*/
// index into metadata.names
1: required UInt32 name_index_v2_2,
// index into metadata.modes
// index into `metadata.modes[]`
2: required UInt16 mode_index,
/**
* Inode number. Can be used in different ways:
*
* - For directories, the inode can be used as an index into
* metadata.directories.
* - For links, (inode - link_index_offset) can be
* used as an index into metadata.links.
* - For files, (inode - chunk_index_offset) can be
* used as in index into metadata.chunk_table.
*/
3: required UInt32 inode_v2_2,
//--------------------------------------------------------------------------
// TODO: actually, the inode field is redundant as of v2.3, as entries are
// ordered by inode already; maybe we can drop this?
//
// we definitely need it for files to point into chunk_table
//--------------------------------------------------------------------------
// index into metadata.uids
// index into `metadata.uids[]`
4: required UInt16 owner_index,
// index into metadata.gids
// index into `metadata.gids[]`
5: required UInt16 group_index,
// atime relative to metadata.timestamp_base
// atime relative to `metadata.timestamp_base`
6: required UInt64 atime_offset,
// mtime relative to metadata.timestamp_base
// mtime relative to `metadata.timestamp_base`
7: required UInt64 mtime_offset,
// ctime relative to metadata.timestamp_base
// ctime relative to `metadata.timestamp_base`
8: required UInt64 ctime_offset,
/**
* ==================================================================
* NOTE: These fields has been deprecated with filesystem version 2.3
* They are still being used to read older filesystem versions.
* They do *not* occupy any space in version 2.3 and above.
*/
// index into `metadata.names[]`
1: required UInt32 name_index_v2_2,
// inode number
3: required UInt32 inode_v2_2,
/* ==================================================================
*/
}
//////
////// entries can now be stored in inode-order (we don't need old_entry_table any more :-)
//////
struct dir_entry { ///// <--------- or entry?
/**
* A directory entry
*
* This structure represents a single directory entry and just combines
* a name with an inode number. The inode number can then be used to
* look up almost all other metadata.
*/
struct dir_entry {
// index into metadata.names
1: required UInt32 name_index,
// index into metadata.entries
2: required UInt32 inode_num, ///// <--------- entries (inodes) are shared for hardlinks
2: required UInt32 inode_num,
}
/**
* File system options
*/
struct fs_options {
// file system contains only mtime time stamps
1: required bool mtime_only,
@ -120,75 +130,95 @@ struct fs_options {
2: optional UInt32 time_resolution_sec,
}
/**
* File System Metadata
*
* This is the root structure for all file system metadata.
*/
struct metadata {
/**
* Ranges of chunks that make up regular files. Identical
* files share the same inode number. The range of chunks
* for a regular file inode are:
* files share the same chunk range. The range of chunks
* for a regular file are:
*
* chunks[chunk_table[inode]] .. chunks[chunk_table[inode + 1] - 1]
* chunks[chunk_table[index]] .. chunks[chunk_table[index + 1] - 1]
*
* Here, `index` is either `inode - file_inode_offset` for
* unique file inodes, or for shared file inodes:
*
* shared_files[inode - file_inode_offset - unique_files] + unique_files
*
* Note that here `shared_files` is the unpacked version of
* `shared_files_table`.
*/
1: required list<chunk> chunks,
/**
* All directories, indexed by inode number. There's one extra
* dummy directory at the end whose `first_entry` point to the
* end of `entries`, so that directory entry lookup work the
* sentinel directory at the end that has `first_entry` point to
* the end of `dir_entries`, so directory entry lookup work the
* same for all directories.
*/
2: required list<directory> directories,
/**
* All entries, can be looked up by inode through entry_table_v2_2, or by
* directory through `first_entry`, where the entries will be between
* `directories[n].first_entry` and `directories[n+1].first_entry`.
* Inode metadata, indexed by inode number.
*
* Inodes are assigned strictly in the following order:
*
* - directories, starting with the root dir at inode 0
* - symbolic links
* - unique regular files
* - shared regular files
* - character and block devices
* - named pipes and sockets
*
* The inode type can be determined from its mode, which makes
* it possible to find the inode offsets for each distinct type
* by a simple binary search. These inode offsets are required
* to perform lookups into lists indexed by non-directory inode
* numbers.
*
* The number of shared regular files can be determined from
* `shared_files_table`.
*/
3: required list<inode_data> inodes,
/**
* Chunk lookup table, indexed by (inode - chunk_index_offset).
* There's one extra dummy item at the end that points to the
* Chunk lookup table, indexed by `inode - file_inode_offset`.
* There's one extra sentinel item at the end that points to the
* end of `chunks`, so chunk lookups work the same for all inodes.
*/
4: required list<UInt32> chunk_table,
/**
* =========================================================================
* NOTE: This has been deprecated with filesystem version 2.3 (DwarFS 0.5.0)
* NOTE: This has been deprecated with filesystem version 2.3
* It is still being used to read older filesystem versions.
* =========================================================================
*
* Entry lookup table, indexed by inode
*
* This list contains all inodes strictly in the following order:
*
* - directories, starting with the root dir at inode 0
* - symbolic links
* - regular files
* - character and block devices
* - named pipes and sockets
*/
5: required list<UInt32> entry_table_v2_2, ///// <------------ deprecate (see above)
5: required list<UInt32> entry_table_v2_2,
/* =========================================================================
*/
// symlink lookup table, indexed by (inode - symlink_table_offset)
// symlink lookup table, indexed by `inode - symlink_inode_offset`
6: required list<UInt32> symlink_table,
// user ids, for lookup by index in entry.owner
// user ids, for lookup by `inode.owner_index`
7: required list<UInt16> uids,
// group ids, for lookup by index in entry.group
// group ids, for lookup by `inode.group_index`
8: required list<UInt16> gids,
// entry modes, for lookup by index in entry.mode
// inode modes, for lookup by `inode.mode_index`
9: required list<UInt16> modes,
// entry names, for lookup by index in entry.name_index
// directory entry names, for lookup by `dir_entry.name_index`
10: required list<string> names,
// link targets, for lookup by index from symlink_table
// symlink targets, for lookup by index from `symlink_table`
11: required list<string> symlinks,
// timestamp base for all entry timestamps
// timestamp base for all inode timestamps
12: required UInt64 timestamp_base,
/************************ DEPRECATED **********************
@ -197,22 +227,22 @@ struct metadata {
* with a simple binary search. Compatibility is not
* affected.
*
* 13: required UInt32 chunk_index_offset;
* 14: required UInt32 link_index_offset;
* 13: required UInt32 chunk_inode_offset;
* 14: required UInt32 link_inode_offset;
*
*********************************************************/
// block size
// file system block size in bytes
15: required UInt32 block_size,
// total file system size
// total file system size in bytes
16: required UInt64 total_fs_size,
//=========================================================//
// fields added with dwarfs-0.3.0, file system version 2.1 //
//=========================================================//
// device ids, for lookup by (inode - device_index_offset)
// device ids, for lookup by `inode - device_inode_offset`
17: optional list<UInt64> devices,
// file system options
@ -223,21 +253,47 @@ struct metadata {
//=========================================================//
/**
* TODO TODO TODO describe this
* All directory entries
*
* Starting with the root directory entry at index 0, this
* list contains ranges all directory entries of the file
* system. Along with `directories`, this allows traversal
* of the full file system structure.
*
* The ranges of entries that belong to a single directory
* are determined by `directory.first_entry`. Within a single
* directory, entries are ordered asciibetically by name,
* which makes it possible to efficiently find entries using
* binary search.
*/
19: optional list<dir_entry> dir_entries,
/**
* Maps from file inode to chunk_table index
* Shared files mapping
*
* Note that this table cannot be used directly and must first
* be unpacked. It is stored as number of repetitions per index,
* offset by 2 (the minimum number of repetitions), so e.g.
*
* [0, 3, 1, 0, 1]
*
* would unpack to:
*
* [0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4]
*
* So the packed 5-element array provides mappings for 15 shared
* file inodes. Assuming 10 unique files and a file inode offset
* of 10, a regular file inode 25 would be a shared file inode,
* and the index for lookup in `chunk_table` would be `10 + 1`.
*/
20: optional list<UInt32> shared_files_table,
// total file system size (without hardlinks)
// total size of hardlinked files beyond the first link, in bytes
21: optional UInt64 total_hardlink_size,
// version
// version string
22: optional string dwarfs_version,
// timestamp
// unix timestamp of metadata creation time
23: optional UInt64 create_timestamp,
}