/* vim:set ts=2 sw=2 sts=2 et: */ /** * \author Marcus Holland-Moritz (github@mhxnet.de) * \copyright Copyright (c) Marcus Holland-Moritz * * This file is part of dwarfs. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the “Software”), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * SPDX-License-Identifier: MIT */ #include #include #include #include #include #include #include #include #include #include #include namespace dwarfs::reader::internal { using namespace dwarfs::internal; file_off_t filesystem_parser::find_image_offset(mmif& mm, file_off_t image_offset) { if (image_offset != filesystem_options::IMAGE_OFFSET_AUTO) { return image_offset; } static constexpr std::array magic{ {'D', 'W', 'A', 'R', 'F', 'S', MAJOR_VERSION}}; file_off_t start = 0; for (;;) { if (start + magic.size() >= mm.size()) { break; } auto ss = mm.span(start); #if __cpp_lib_boyer_moore_searcher >= 201603 auto searcher = std::boyer_moore_searcher(magic.begin(), magic.end()); #else auto searcher = std::default_searcher(magic.begin(), magic.end()); #endif auto it = std::search(ss.begin(), ss.end(), searcher); if (it == ss.end()) { break; } file_off_t pos = start + std::distance(ss.begin(), it); if (pos + sizeof(file_header) >= mm.size()) { break; } auto fh = mm.as(pos); if (fh->minor < 2) { // v1 section header, presumably if (pos + sizeof(file_header) + sizeof(section_header) >= mm.size()) { break; } auto sh = mm.as(pos + sizeof(file_header)); // The only compression types supported before v0.3.0 auto is_valid_compression = [](compression_type_v1 c) { return c == compression_type_v1::NONE || c == compression_type_v1::LZMA || c == compression_type_v1::ZSTD || c == compression_type_v1::LZ4 || c == compression_type_v1::LZ4HC; }; // First section must be either a block or the metadata schema, // using a valid compression type. if ((sh->type == section_type::BLOCK || sh->type == section_type::METADATA_V2_SCHEMA) && is_valid_compression(sh->compression) && sh->length > 0) { auto nextshpos = pos + sizeof(file_header) + sizeof(section_header) + sh->length; if (nextshpos + sizeof(section_header) < mm.size()) { auto nsh = mm.as(nextshpos); // the next section must be a block or a metadata schema if the first // section was a block *or* a metadata block if the first section was // a metadata schema if ((sh->type == section_type::BLOCK ? nsh->type == section_type::BLOCK || nsh->type == section_type::METADATA_V2_SCHEMA : nsh->type == section_type::METADATA_V2) && is_valid_compression(nsh->compression) && nsh->length > 0) { // we can be somewhat sure that this is where the filesystem starts return pos; } } } } else { // do a little more validation before we return if (pos + sizeof(section_header_v2) >= mm.size()) { break; } auto sh = mm.as(pos); if (sh->number == 0) { auto endpos = pos + sh->length + 2 * sizeof(section_header_v2); if (endpos >= sh->length) { if (endpos >= mm.size()) { break; } auto ps = mm.as(pos + sh->length + sizeof(section_header_v2)); if (::memcmp(ps, magic.data(), magic.size()) == 0 and reinterpret_cast(ps)->number == 1) { return pos; } } } } start = pos + magic.size(); } DWARFS_THROW(runtime_error, "no filesystem found"); } filesystem_parser::filesystem_parser(std::shared_ptr mm, file_off_t image_offset, file_off_t image_size) : mm_{std::move(mm)} , image_offset_{find_image_offset(*mm_, image_offset)} , image_size_{ std::min(image_size, mm_->size() - image_offset_)} { if (std::cmp_less(image_size_, sizeof(file_header))) { DWARFS_THROW(runtime_error, "file too small"); } auto fh = mm_->as(image_offset_); if (fh->magic_sv() != "DWARFS") { DWARFS_THROW(runtime_error, "magic not found"); } if (fh->major != MAJOR_VERSION) { DWARFS_THROW(runtime_error, "different major version"); } if (fh->minor > MINOR_VERSION) { DWARFS_THROW(runtime_error, "newer minor version"); } version_ = fh->minor >= 2 ? 2 : 1; major_ = fh->major; minor_ = fh->minor; if (minor_ >= 4) { find_index(); } rewind(); } std::optional filesystem_parser::next_section() { if (index_.empty()) { if (std::cmp_less(offset_, image_offset_ + image_size_)) { auto section = fs_section(*mm_, offset_, version_); offset_ = section.end(); return section; } } else { if (std::cmp_less(offset_, index_.size())) { uint64_t id = index_[offset_++]; uint64_t offset = id & section_offset_mask; uint64_t next_offset = std::cmp_less(offset_, index_.size()) ? index_[offset_] & section_offset_mask : image_size_; return fs_section(mm_, static_cast(id >> 48), image_offset_ + offset, next_offset - offset, version_); } } return std::nullopt; } std::optional> filesystem_parser::header() const { if (image_offset_ == 0) { return std::nullopt; } return mm_->span(0, image_offset_); } void filesystem_parser::rewind() { if (index_.empty()) { offset_ = image_offset_; if (version_ == 1) { offset_ += sizeof(file_header); } } else { offset_ = 0; } } std::string filesystem_parser::version() const { return fmt::format("{0}.{1} [{2}]", major_, minor_, version_); } bool filesystem_parser::has_checksums() const { return version_ >= 2; } bool filesystem_parser::has_index() const { return !index_.empty(); } size_t filesystem_parser::filesystem_size() const { return image_offset_ + image_size_; } std::span filesystem_parser::section_data(fs_section const& s) const { return s.data(*mm_); } void filesystem_parser::find_index() { uint64_t index_pos; ::memcpy(&index_pos, mm_->as(image_offset_ + image_size_ - sizeof(uint64_t)), sizeof(uint64_t)); if ((index_pos >> 48) != static_cast(section_type::SECTION_INDEX)) { return; } index_pos &= section_offset_mask; index_pos += image_offset_; if (std::cmp_greater_equal(index_pos, image_offset_ + image_size_)) { return; } auto section = fs_section(*mm_, index_pos, version_); if (!section.check_fast(*mm_)) { return; } if (section.type() != section_type::SECTION_INDEX) { return; } if (section.length() % sizeof(uint64_t) != 0) { return; } auto const section_count = section.length() / sizeof(uint64_t); // at least METADATA_V2_SCHEMA, METADATA_V2, and SECTION_INDEX if (section_count < 3) { return; } // the section index must be the last section if (std::cmp_not_equal(section.end(), image_offset_ + image_size_)) { return; } auto const index = section.data(*mm_); index_.resize(section_count); ::memcpy(index_.data(), index.data(), index.size()); // index entries must be sorted by offset if (!std::ranges::is_sorted(index_, [](auto const a, auto const b) { return (a & section_offset_mask) < (b & section_offset_mask); })) { // remove the index again if it is not sorted index_.clear(); } } } // namespace dwarfs::reader::internal