mirror of
https://github.com/mhx/dwarfs.git
synced 2025-08-03 17:56:12 -04:00
305 lines
8.9 KiB
C++
305 lines
8.9 KiB
C++
/* vim:set ts=2 sw=2 sts=2 et: */
|
|
/**
|
|
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
|
* \copyright Copyright (c) Marcus Holland-Moritz
|
|
*
|
|
* This file is part of dwarfs.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the “Software”), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*
|
|
* SPDX-License-Identifier: MIT
|
|
*/
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <cstring>
|
|
#include <functional>
|
|
#include <utility>
|
|
#include <version>
|
|
|
|
#include <fmt/format.h>
|
|
|
|
#include <dwarfs/error.h>
|
|
#include <dwarfs/mmif.h>
|
|
#include <dwarfs/reader/filesystem_options.h>
|
|
|
|
#include <dwarfs/reader/internal/filesystem_parser.h>
|
|
|
|
namespace dwarfs::reader::internal {
|
|
|
|
using namespace dwarfs::internal;
|
|
|
|
file_off_t
|
|
filesystem_parser::find_image_offset(mmif& mm, file_off_t image_offset) {
|
|
if (image_offset != filesystem_options::IMAGE_OFFSET_AUTO) {
|
|
return image_offset;
|
|
}
|
|
|
|
static constexpr std::array<char, 7> magic{
|
|
{'D', 'W', 'A', 'R', 'F', 'S', MAJOR_VERSION}};
|
|
|
|
file_off_t start = 0;
|
|
for (;;) {
|
|
if (start + magic.size() >= mm.size()) {
|
|
break;
|
|
}
|
|
|
|
auto ss = mm.span<char>(start);
|
|
#if __cpp_lib_boyer_moore_searcher >= 201603
|
|
auto searcher = std::boyer_moore_searcher(magic.begin(), magic.end());
|
|
#else
|
|
auto searcher = std::default_searcher(magic.begin(), magic.end());
|
|
#endif
|
|
auto it = std::search(ss.begin(), ss.end(), searcher);
|
|
|
|
if (it == ss.end()) {
|
|
break;
|
|
}
|
|
|
|
file_off_t pos = start + std::distance(ss.begin(), it);
|
|
|
|
if (pos + sizeof(file_header) >= mm.size()) {
|
|
break;
|
|
}
|
|
|
|
auto fh = mm.as<file_header>(pos);
|
|
|
|
if (fh->minor < 2) {
|
|
// v1 section header, presumably
|
|
if (pos + sizeof(file_header) + sizeof(section_header) >= mm.size()) {
|
|
break;
|
|
}
|
|
|
|
auto sh = mm.as<section_header>(pos + sizeof(file_header));
|
|
|
|
// The only compression types supported before v0.3.0
|
|
auto is_valid_compression = [](compression_type_v1 c) {
|
|
return c == compression_type_v1::NONE ||
|
|
c == compression_type_v1::LZMA ||
|
|
c == compression_type_v1::ZSTD ||
|
|
c == compression_type_v1::LZ4 || c == compression_type_v1::LZ4HC;
|
|
};
|
|
|
|
// First section must be either a block or the metadata schema,
|
|
// using a valid compression type.
|
|
if ((sh->type == section_type::BLOCK ||
|
|
sh->type == section_type::METADATA_V2_SCHEMA) &&
|
|
is_valid_compression(sh->compression) && sh->length > 0) {
|
|
auto nextshpos =
|
|
pos + sizeof(file_header) + sizeof(section_header) + sh->length;
|
|
if (nextshpos + sizeof(section_header) < mm.size()) {
|
|
auto nsh = mm.as<section_header>(nextshpos);
|
|
// the next section must be a block or a metadata schema if the first
|
|
// section was a block *or* a metadata block if the first section was
|
|
// a metadata schema
|
|
if ((sh->type == section_type::BLOCK
|
|
? nsh->type == section_type::BLOCK ||
|
|
nsh->type == section_type::METADATA_V2_SCHEMA
|
|
: nsh->type == section_type::METADATA_V2) &&
|
|
is_valid_compression(nsh->compression) && nsh->length > 0) {
|
|
// we can be somewhat sure that this is where the filesystem starts
|
|
return pos;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// do a little more validation before we return
|
|
if (pos + sizeof(section_header_v2) >= mm.size()) {
|
|
break;
|
|
}
|
|
|
|
auto sh = mm.as<section_header_v2>(pos);
|
|
|
|
if (sh->number == 0) {
|
|
auto endpos = pos + sh->length + 2 * sizeof(section_header_v2);
|
|
|
|
if (endpos >= sh->length) {
|
|
if (endpos >= mm.size()) {
|
|
break;
|
|
}
|
|
|
|
auto ps = mm.as<void>(pos + sh->length + sizeof(section_header_v2));
|
|
|
|
if (::memcmp(ps, magic.data(), magic.size()) == 0 and
|
|
reinterpret_cast<section_header_v2 const*>(ps)->number == 1) {
|
|
return pos;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
start = pos + magic.size();
|
|
}
|
|
|
|
DWARFS_THROW(runtime_error, "no filesystem found");
|
|
}
|
|
|
|
filesystem_parser::filesystem_parser(std::shared_ptr<mmif> mm,
|
|
file_off_t image_offset,
|
|
file_off_t image_size)
|
|
: mm_{std::move(mm)}
|
|
, image_offset_{find_image_offset(*mm_, image_offset)}
|
|
, image_size_{
|
|
std::min<file_off_t>(image_size, mm_->size() - image_offset_)} {
|
|
if (std::cmp_less(image_size_, sizeof(file_header))) {
|
|
DWARFS_THROW(runtime_error, "file too small");
|
|
}
|
|
|
|
auto fh = mm_->as<file_header>(image_offset_);
|
|
|
|
if (fh->magic_sv() != "DWARFS") {
|
|
DWARFS_THROW(runtime_error, "magic not found");
|
|
}
|
|
|
|
if (fh->major != MAJOR_VERSION) {
|
|
DWARFS_THROW(runtime_error, "different major version");
|
|
}
|
|
|
|
if (fh->minor > MINOR_VERSION) {
|
|
DWARFS_THROW(runtime_error, "newer minor version");
|
|
}
|
|
|
|
version_ = fh->minor >= 2 ? 2 : 1;
|
|
major_ = fh->major;
|
|
minor_ = fh->minor;
|
|
|
|
if (minor_ >= 4) {
|
|
find_index();
|
|
}
|
|
|
|
rewind();
|
|
}
|
|
|
|
std::optional<fs_section> filesystem_parser::next_section() {
|
|
if (index_.empty()) {
|
|
if (std::cmp_less(offset_, image_offset_ + image_size_)) {
|
|
auto section = fs_section(*mm_, offset_, version_);
|
|
offset_ = section.end();
|
|
return section;
|
|
}
|
|
} else {
|
|
if (std::cmp_less(offset_, index_.size())) {
|
|
uint64_t id = index_[offset_++];
|
|
uint64_t offset = id & section_offset_mask;
|
|
uint64_t next_offset = std::cmp_less(offset_, index_.size())
|
|
? index_[offset_] & section_offset_mask
|
|
: image_size_;
|
|
return fs_section(mm_, static_cast<section_type>(id >> 48),
|
|
image_offset_ + offset, next_offset - offset, version_);
|
|
}
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
std::optional<std::span<uint8_t const>> filesystem_parser::header() const {
|
|
if (image_offset_ == 0) {
|
|
return std::nullopt;
|
|
}
|
|
return mm_->span(0, image_offset_);
|
|
}
|
|
|
|
void filesystem_parser::rewind() {
|
|
if (index_.empty()) {
|
|
offset_ = image_offset_;
|
|
if (version_ == 1) {
|
|
offset_ += sizeof(file_header);
|
|
}
|
|
} else {
|
|
offset_ = 0;
|
|
}
|
|
}
|
|
|
|
std::string filesystem_parser::version() const {
|
|
return fmt::format("{0}.{1} [{2}]", major_, minor_, version_);
|
|
}
|
|
|
|
bool filesystem_parser::has_checksums() const { return version_ >= 2; }
|
|
|
|
bool filesystem_parser::has_index() const { return !index_.empty(); }
|
|
|
|
size_t filesystem_parser::filesystem_size() const {
|
|
return image_offset_ + image_size_;
|
|
}
|
|
|
|
std::span<uint8_t const>
|
|
filesystem_parser::section_data(fs_section const& s) const {
|
|
return s.data(*mm_);
|
|
}
|
|
|
|
void filesystem_parser::find_index() {
|
|
uint64_t index_pos;
|
|
|
|
::memcpy(&index_pos,
|
|
mm_->as<void>(image_offset_ + image_size_ - sizeof(uint64_t)),
|
|
sizeof(uint64_t));
|
|
|
|
if ((index_pos >> 48) != static_cast<uint16_t>(section_type::SECTION_INDEX)) {
|
|
return;
|
|
}
|
|
|
|
index_pos &= section_offset_mask;
|
|
index_pos += image_offset_;
|
|
|
|
if (std::cmp_greater_equal(index_pos, image_offset_ + image_size_)) {
|
|
return;
|
|
}
|
|
|
|
auto section = fs_section(*mm_, index_pos, version_);
|
|
|
|
if (!section.check_fast(*mm_)) {
|
|
return;
|
|
}
|
|
|
|
if (section.type() != section_type::SECTION_INDEX) {
|
|
return;
|
|
}
|
|
|
|
if (section.length() % sizeof(uint64_t) != 0) {
|
|
return;
|
|
}
|
|
|
|
auto const section_count = section.length() / sizeof(uint64_t);
|
|
|
|
// at least METADATA_V2_SCHEMA, METADATA_V2, and SECTION_INDEX
|
|
if (section_count < 3) {
|
|
return;
|
|
}
|
|
|
|
// the section index must be the last section
|
|
if (std::cmp_not_equal(section.end(), image_offset_ + image_size_)) {
|
|
return;
|
|
}
|
|
|
|
auto const index = section.data(*mm_);
|
|
|
|
index_.resize(section_count);
|
|
::memcpy(index_.data(), index.data(), index.size());
|
|
|
|
// index entries must be sorted by offset
|
|
if (!std::ranges::is_sorted(index_, [](auto const a, auto const b) {
|
|
return (a & section_offset_mask) < (b & section_offset_mask);
|
|
})) {
|
|
// remove the index again if it is not sorted
|
|
index_.clear();
|
|
}
|
|
}
|
|
|
|
} // namespace dwarfs::reader::internal
|