mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-15 15:26:19 -04:00
New offset cache + tests
This commit is contained in:
parent
17f1fdafad
commit
d57f6e0669
203
include/dwarfs/offset_cache.h
Normal file
203
include/dwarfs/offset_cache.h
Normal file
@ -0,0 +1,203 @@
|
||||
/* vim:set ts=2 sw=2 sts=2 et: */
|
||||
/**
|
||||
* \author Marcus Holland-Moritz (github@mhxnet.de)
|
||||
* \copyright Copyright (c) Marcus Holland-Moritz
|
||||
*
|
||||
* This file is part of dwarfs.
|
||||
*
|
||||
* dwarfs is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* dwarfs is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <ostream>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include <folly/container/EvictingCacheMap.h>
|
||||
#include <folly/small_vector.h>
|
||||
|
||||
namespace dwarfs {
|
||||
|
||||
template <typename InodeT, typename FileOffsetT, typename ChunkIndexT,
|
||||
size_t ChunkIndexInterval, size_t UpdaterMaxInlineOffsets>
|
||||
class basic_offset_cache {
|
||||
public:
|
||||
using inode_type = InodeT;
|
||||
using file_offset_type = FileOffsetT;
|
||||
using chunk_index_type = ChunkIndexT;
|
||||
|
||||
static constexpr size_t const chunk_index_interval = ChunkIndexInterval;
|
||||
|
||||
class updater;
|
||||
|
||||
class chunk_offsets {
|
||||
public:
|
||||
chunk_offsets(chunk_index_type total_chunks) {
|
||||
// TODO: we can use an offset as we don't have to cache (0, 0)
|
||||
// also, we may not have to cache the largest offset
|
||||
//
|
||||
// TODO: potentially add an `open` call to filesystem_v2 and
|
||||
// use that to cache the last position; this should be
|
||||
// more efficient as it won't require any mutexes; still
|
||||
// keep the offset cache, but don't overcomplicate it
|
||||
// with the "last_*" stuff
|
||||
offsets_.reserve(total_chunks / chunk_index_interval - 1);
|
||||
}
|
||||
|
||||
void update(chunk_index_type first_index,
|
||||
std::span<file_offset_type const> offsets) {
|
||||
std::lock_guard lock(mx_);
|
||||
|
||||
if (first_index + offsets.size() > offsets_.size()) {
|
||||
assert(first_index <= offsets_.size());
|
||||
auto new_offsets = offsets.subspan(offsets_.size() - first_index);
|
||||
std::copy(new_offsets.begin(), new_offsets.end(),
|
||||
std::back_inserter(offsets_));
|
||||
}
|
||||
}
|
||||
|
||||
void update(updater const& upd) {
|
||||
update(upd.first_index(), upd.offsets());
|
||||
}
|
||||
|
||||
// void set_last(chunk_index_type chunk_index, file_offset_type file_offset)
|
||||
// {
|
||||
// std::lock_guard lock(mx_);
|
||||
// last_chunk_index_ = chunk_index;
|
||||
// last_file_offset_ = file_offset;
|
||||
// }
|
||||
|
||||
std::pair<chunk_index_type, file_offset_type>
|
||||
find(file_offset_type offset, updater& upd) {
|
||||
std::lock_guard lock(mx_);
|
||||
|
||||
upd.set_first_index(offsets_.size());
|
||||
|
||||
if (!offsets_.empty()) {
|
||||
chunk_index_type best_index = offsets_.size();
|
||||
|
||||
if (offset < offsets_.back()) {
|
||||
auto it = std::lower_bound(offsets_.begin(), offsets_.end(), offset);
|
||||
|
||||
if (it != offsets_.end()) {
|
||||
best_index = std::distance(offsets_.begin(), it);
|
||||
}
|
||||
}
|
||||
|
||||
if (best_index > 0) {
|
||||
return {chunk_index_interval * best_index, offsets_[best_index - 1]};
|
||||
}
|
||||
}
|
||||
|
||||
return {0, 0};
|
||||
}
|
||||
|
||||
void dump(std::ostream& os) const {
|
||||
std::vector<file_offset_type> offsets;
|
||||
{
|
||||
std::lock_guard lock(mx_);
|
||||
offsets = offsets_;
|
||||
}
|
||||
for (auto off : offsets) {
|
||||
os << " " << off << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// chunk_index_type last_chunk_index_;
|
||||
// file_offset_type last_file_offset_;
|
||||
std::vector<file_offset_type> offsets_;
|
||||
std::mutex mutable mx_;
|
||||
};
|
||||
|
||||
using value_type = std::shared_ptr<chunk_offsets>;
|
||||
|
||||
class updater {
|
||||
public:
|
||||
static constexpr size_t const max_inline_offsets = UpdaterMaxInlineOffsets;
|
||||
|
||||
void set_first_index(chunk_index_type first_ix) { first_index_ = first_ix; }
|
||||
|
||||
void add_offset(chunk_index_type index, file_offset_type offset) {
|
||||
if (index < chunk_index_interval || index % chunk_index_interval != 0)
|
||||
[[likely]] {
|
||||
return;
|
||||
}
|
||||
|
||||
auto ix = index / chunk_index_interval - 1;
|
||||
assert(ix <= first_index_ + offset_.size());
|
||||
|
||||
if (ix == first_index_ + offsets_.size()) {
|
||||
offsets_.push_back(offset);
|
||||
}
|
||||
}
|
||||
|
||||
chunk_index_type first_index() const { return first_index_; }
|
||||
|
||||
std::span<file_offset_type const> offsets() const { return offsets_; }
|
||||
|
||||
private:
|
||||
folly::small_vector<file_offset_type, max_inline_offsets> offsets_;
|
||||
chunk_index_type first_index_{0};
|
||||
};
|
||||
|
||||
basic_offset_cache(size_t cache_size)
|
||||
: cache_{cache_size} {}
|
||||
|
||||
value_type find(inode_type inode, chunk_index_type num_chunks) const {
|
||||
{
|
||||
std::lock_guard lock(mx_);
|
||||
|
||||
if (auto it = cache_.find(inode); it != cache_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
}
|
||||
|
||||
return std::make_shared<chunk_offsets>(num_chunks);
|
||||
}
|
||||
|
||||
void set(inode_type inode, value_type value) {
|
||||
std::lock_guard lock(mx_);
|
||||
cache_.set(inode, std::move(value));
|
||||
}
|
||||
|
||||
void dump(std::ostream& os) const {
|
||||
std::vector<std::pair<typename cache_type::key_type,
|
||||
typename cache_type::mapped_type>>
|
||||
contents;
|
||||
|
||||
{
|
||||
std::lock_guard lock(mx_);
|
||||
std::copy(cache_.begin(), cache_.end(), std::back_inserter(contents));
|
||||
}
|
||||
|
||||
for (auto const& [inode, ent] : contents) {
|
||||
os << "inode " << inode << ":\n";
|
||||
ent->dump(os);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
using cache_type = folly::EvictingCacheMap<inode_type, value_type>;
|
||||
|
||||
cache_type mutable cache_;
|
||||
std::mutex mutable mx_;
|
||||
};
|
||||
|
||||
} // namespace dwarfs
|
@ -21,14 +21,16 @@
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <array>
|
||||
#include <numeric>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "dwarfs/offset_cache.h"
|
||||
#include "dwarfs/util.h"
|
||||
|
||||
using namespace dwarfs;
|
||||
|
||||
namespace {} // namespace
|
||||
|
||||
TEST(utf8_display_width, basic) {
|
||||
EXPECT_EQ(0, utf8_display_width(""));
|
||||
EXPECT_EQ(1, utf8_display_width(u8string_to_string(u8"a")));
|
||||
@ -154,3 +156,133 @@ TEST(shorten_path, string_utf8) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
using cache_type = basic_offset_cache<uint32_t, uint32_t, uint32_t, 4, 4>;
|
||||
constexpr std::array<cache_type::file_offset_type, 32> const test_chunks{
|
||||
3, 15, 13, 1, 11, 6, 9, 15, 1, 16, 1, 13, 11, 16, 10, 14,
|
||||
4, 14, 4, 16, 8, 12, 16, 2, 16, 10, 15, 15, 2, 15, 5, 8,
|
||||
};
|
||||
constexpr cache_type::inode_type const test_inode = 42;
|
||||
constexpr size_t const total_size =
|
||||
std::accumulate(test_chunks.begin(), test_chunks.end(), 0);
|
||||
|
||||
std::tuple<cache_type::chunk_index_type, cache_type::file_offset_type, size_t>
|
||||
find_file_position(cache_type::inode_type const inode,
|
||||
std::span<cache_type::file_offset_type const> chunks,
|
||||
cache_type::file_offset_type file_offset,
|
||||
cache_type* cache = nullptr) {
|
||||
cache_type::value_type ent;
|
||||
|
||||
if (cache) {
|
||||
ent = cache->find(inode, chunks.size());
|
||||
|
||||
if (!ent) {
|
||||
throw std::runtime_error("find() did not return an object");
|
||||
}
|
||||
}
|
||||
|
||||
auto upd = cache_type::updater();
|
||||
auto it = chunks.begin();
|
||||
auto end = chunks.end();
|
||||
cache_type::chunk_index_type chunk_index = 0;
|
||||
cache_type::file_offset_type chunk_offset = 0;
|
||||
|
||||
if (ent) {
|
||||
std::tie(chunk_index, chunk_offset) = ent->find(file_offset, upd);
|
||||
std::advance(it, chunk_index);
|
||||
}
|
||||
|
||||
auto remaining_offset = file_offset - chunk_offset;
|
||||
size_t num_lookups = 0;
|
||||
|
||||
while (it < end) {
|
||||
++num_lookups;
|
||||
auto chunk_size = *it;
|
||||
|
||||
if (remaining_offset < chunk_size) {
|
||||
break;
|
||||
}
|
||||
|
||||
remaining_offset -= chunk_size;
|
||||
chunk_offset += chunk_size;
|
||||
++it;
|
||||
|
||||
upd.add_offset(++chunk_index, chunk_offset);
|
||||
}
|
||||
|
||||
if (ent) {
|
||||
ent->update(upd);
|
||||
cache->set(inode, ent);
|
||||
}
|
||||
|
||||
return {chunk_index, remaining_offset, num_lookups};
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST(offset_cache, basic) {
|
||||
cache_type cache(4);
|
||||
|
||||
size_t total_ref_lookups = 0;
|
||||
size_t total_test_lookups = 0;
|
||||
|
||||
for (cache_type::file_offset_type offset = 0; offset < total_size; ++offset) {
|
||||
auto [ref_ix, ref_off, ref_lookups] =
|
||||
find_file_position(test_inode, test_chunks, offset);
|
||||
|
||||
auto [test_ix, test_off, test_lookups] =
|
||||
find_file_position(test_inode, test_chunks, offset, &cache);
|
||||
|
||||
auto ref_offset = std::accumulate(test_chunks.begin(),
|
||||
test_chunks.begin() + ref_ix, ref_off);
|
||||
|
||||
EXPECT_EQ(offset, ref_offset);
|
||||
|
||||
EXPECT_EQ(ref_ix + 1, ref_lookups);
|
||||
EXPECT_LE(test_lookups, 5);
|
||||
|
||||
EXPECT_EQ(ref_ix, test_ix);
|
||||
EXPECT_EQ(ref_off, test_off);
|
||||
|
||||
total_ref_lookups += ref_lookups;
|
||||
total_test_lookups += test_lookups;
|
||||
}
|
||||
|
||||
EXPECT_GT(total_test_lookups, 0);
|
||||
EXPECT_LT(total_test_lookups, total_ref_lookups);
|
||||
|
||||
for (cache_type::file_offset_type offset = total_size; offset-- > 0;) {
|
||||
auto [ref_ix, ref_off, ref_lookups] =
|
||||
find_file_position(test_inode, test_chunks, offset);
|
||||
|
||||
auto [test_ix, test_off, test_lookups] =
|
||||
find_file_position(test_inode, test_chunks, offset, &cache);
|
||||
|
||||
auto ref_offset = std::accumulate(test_chunks.begin(),
|
||||
test_chunks.begin() + ref_ix, ref_off);
|
||||
|
||||
EXPECT_EQ(offset, ref_offset);
|
||||
|
||||
EXPECT_EQ(ref_ix + 1, ref_lookups);
|
||||
EXPECT_LE(test_lookups, 5);
|
||||
|
||||
EXPECT_EQ(ref_ix, test_ix);
|
||||
EXPECT_EQ(ref_off, test_off);
|
||||
|
||||
total_ref_lookups += ref_lookups;
|
||||
total_test_lookups += test_lookups;
|
||||
}
|
||||
}
|
||||
|
||||
TEST(offset_cache, prefill) {
|
||||
cache_type prefilled_cache(4);
|
||||
|
||||
auto [prefill_ix, prefill_off, prefill_lookups] = find_file_position(
|
||||
test_inode, test_chunks, total_size - 1, &prefilled_cache);
|
||||
|
||||
EXPECT_EQ(test_chunks.size(), prefill_lookups);
|
||||
EXPECT_EQ(test_chunks.size() - 1, prefill_ix);
|
||||
EXPECT_EQ(test_chunks.back() - 1, prefill_off);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user