diff --git a/include/dwarfs/offset_cache.h b/include/dwarfs/offset_cache.h
new file mode 100644
index 00000000..6db3028b
--- /dev/null
+++ b/include/dwarfs/offset_cache.h
@@ -0,0 +1,203 @@
+/* vim:set ts=2 sw=2 sts=2 et: */
+/**
+ * \author Marcus Holland-Moritz (github@mhxnet.de)
+ * \copyright Copyright (c) Marcus Holland-Moritz
+ *
+ * This file is part of dwarfs.
+ *
+ * dwarfs is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * dwarfs is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with dwarfs. If not, see .
+ */
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+namespace dwarfs {
+
+template
+class basic_offset_cache {
+ public:
+ using inode_type = InodeT;
+ using file_offset_type = FileOffsetT;
+ using chunk_index_type = ChunkIndexT;
+
+ static constexpr size_t const chunk_index_interval = ChunkIndexInterval;
+
+ class updater;
+
+ class chunk_offsets {
+ public:
+ chunk_offsets(chunk_index_type total_chunks) {
+ // TODO: we can use an offset as we don't have to cache (0, 0)
+ // also, we may not have to cache the largest offset
+ //
+ // TODO: potentially add an `open` call to filesystem_v2 and
+ // use that to cache the last position; this should be
+ // more efficient as it won't require any mutexes; still
+ // keep the offset cache, but don't overcomplicate it
+ // with the "last_*" stuff
+ offsets_.reserve(total_chunks / chunk_index_interval - 1);
+ }
+
+ void update(chunk_index_type first_index,
+ std::span offsets) {
+ std::lock_guard lock(mx_);
+
+ if (first_index + offsets.size() > offsets_.size()) {
+ assert(first_index <= offsets_.size());
+ auto new_offsets = offsets.subspan(offsets_.size() - first_index);
+ std::copy(new_offsets.begin(), new_offsets.end(),
+ std::back_inserter(offsets_));
+ }
+ }
+
+ void update(updater const& upd) {
+ update(upd.first_index(), upd.offsets());
+ }
+
+ // void set_last(chunk_index_type chunk_index, file_offset_type file_offset)
+ // {
+ // std::lock_guard lock(mx_);
+ // last_chunk_index_ = chunk_index;
+ // last_file_offset_ = file_offset;
+ // }
+
+ std::pair
+ find(file_offset_type offset, updater& upd) {
+ std::lock_guard lock(mx_);
+
+ upd.set_first_index(offsets_.size());
+
+ if (!offsets_.empty()) {
+ chunk_index_type best_index = offsets_.size();
+
+ if (offset < offsets_.back()) {
+ auto it = std::lower_bound(offsets_.begin(), offsets_.end(), offset);
+
+ if (it != offsets_.end()) {
+ best_index = std::distance(offsets_.begin(), it);
+ }
+ }
+
+ if (best_index > 0) {
+ return {chunk_index_interval * best_index, offsets_[best_index - 1]};
+ }
+ }
+
+ return {0, 0};
+ }
+
+ void dump(std::ostream& os) const {
+ std::vector offsets;
+ {
+ std::lock_guard lock(mx_);
+ offsets = offsets_;
+ }
+ for (auto off : offsets) {
+ os << " " << off << "\n";
+ }
+ }
+
+ private:
+ // chunk_index_type last_chunk_index_;
+ // file_offset_type last_file_offset_;
+ std::vector offsets_;
+ std::mutex mutable mx_;
+ };
+
+ using value_type = std::shared_ptr;
+
+ class updater {
+ public:
+ static constexpr size_t const max_inline_offsets = UpdaterMaxInlineOffsets;
+
+ void set_first_index(chunk_index_type first_ix) { first_index_ = first_ix; }
+
+ void add_offset(chunk_index_type index, file_offset_type offset) {
+ if (index < chunk_index_interval || index % chunk_index_interval != 0)
+ [[likely]] {
+ return;
+ }
+
+ auto ix = index / chunk_index_interval - 1;
+ assert(ix <= first_index_ + offset_.size());
+
+ if (ix == first_index_ + offsets_.size()) {
+ offsets_.push_back(offset);
+ }
+ }
+
+ chunk_index_type first_index() const { return first_index_; }
+
+ std::span offsets() const { return offsets_; }
+
+ private:
+ folly::small_vector offsets_;
+ chunk_index_type first_index_{0};
+ };
+
+ basic_offset_cache(size_t cache_size)
+ : cache_{cache_size} {}
+
+ value_type find(inode_type inode, chunk_index_type num_chunks) const {
+ {
+ std::lock_guard lock(mx_);
+
+ if (auto it = cache_.find(inode); it != cache_.end()) {
+ return it->second;
+ }
+ }
+
+ return std::make_shared(num_chunks);
+ }
+
+ void set(inode_type inode, value_type value) {
+ std::lock_guard lock(mx_);
+ cache_.set(inode, std::move(value));
+ }
+
+ void dump(std::ostream& os) const {
+ std::vector>
+ contents;
+
+ {
+ std::lock_guard lock(mx_);
+ std::copy(cache_.begin(), cache_.end(), std::back_inserter(contents));
+ }
+
+ for (auto const& [inode, ent] : contents) {
+ os << "inode " << inode << ":\n";
+ ent->dump(os);
+ }
+ }
+
+ private:
+ using cache_type = folly::EvictingCacheMap;
+
+ cache_type mutable cache_;
+ std::mutex mutable mx_;
+};
+
+} // namespace dwarfs
diff --git a/test/utils_test.cpp b/test/utils_test.cpp
index cd006e1f..42598b50 100644
--- a/test/utils_test.cpp
+++ b/test/utils_test.cpp
@@ -21,14 +21,16 @@
#include
+#include
+#include
+#include
#include
+#include "dwarfs/offset_cache.h"
#include "dwarfs/util.h"
using namespace dwarfs;
-namespace {} // namespace
-
TEST(utf8_display_width, basic) {
EXPECT_EQ(0, utf8_display_width(""));
EXPECT_EQ(1, utf8_display_width(u8string_to_string(u8"a")));
@@ -154,3 +156,133 @@ TEST(shorten_path, string_utf8) {
}
}
}
+
+namespace {
+
+using cache_type = basic_offset_cache;
+constexpr std::array const test_chunks{
+ 3, 15, 13, 1, 11, 6, 9, 15, 1, 16, 1, 13, 11, 16, 10, 14,
+ 4, 14, 4, 16, 8, 12, 16, 2, 16, 10, 15, 15, 2, 15, 5, 8,
+};
+constexpr cache_type::inode_type const test_inode = 42;
+constexpr size_t const total_size =
+ std::accumulate(test_chunks.begin(), test_chunks.end(), 0);
+
+std::tuple
+find_file_position(cache_type::inode_type const inode,
+ std::span chunks,
+ cache_type::file_offset_type file_offset,
+ cache_type* cache = nullptr) {
+ cache_type::value_type ent;
+
+ if (cache) {
+ ent = cache->find(inode, chunks.size());
+
+ if (!ent) {
+ throw std::runtime_error("find() did not return an object");
+ }
+ }
+
+ auto upd = cache_type::updater();
+ auto it = chunks.begin();
+ auto end = chunks.end();
+ cache_type::chunk_index_type chunk_index = 0;
+ cache_type::file_offset_type chunk_offset = 0;
+
+ if (ent) {
+ std::tie(chunk_index, chunk_offset) = ent->find(file_offset, upd);
+ std::advance(it, chunk_index);
+ }
+
+ auto remaining_offset = file_offset - chunk_offset;
+ size_t num_lookups = 0;
+
+ while (it < end) {
+ ++num_lookups;
+ auto chunk_size = *it;
+
+ if (remaining_offset < chunk_size) {
+ break;
+ }
+
+ remaining_offset -= chunk_size;
+ chunk_offset += chunk_size;
+ ++it;
+
+ upd.add_offset(++chunk_index, chunk_offset);
+ }
+
+ if (ent) {
+ ent->update(upd);
+ cache->set(inode, ent);
+ }
+
+ return {chunk_index, remaining_offset, num_lookups};
+}
+
+} // namespace
+
+TEST(offset_cache, basic) {
+ cache_type cache(4);
+
+ size_t total_ref_lookups = 0;
+ size_t total_test_lookups = 0;
+
+ for (cache_type::file_offset_type offset = 0; offset < total_size; ++offset) {
+ auto [ref_ix, ref_off, ref_lookups] =
+ find_file_position(test_inode, test_chunks, offset);
+
+ auto [test_ix, test_off, test_lookups] =
+ find_file_position(test_inode, test_chunks, offset, &cache);
+
+ auto ref_offset = std::accumulate(test_chunks.begin(),
+ test_chunks.begin() + ref_ix, ref_off);
+
+ EXPECT_EQ(offset, ref_offset);
+
+ EXPECT_EQ(ref_ix + 1, ref_lookups);
+ EXPECT_LE(test_lookups, 5);
+
+ EXPECT_EQ(ref_ix, test_ix);
+ EXPECT_EQ(ref_off, test_off);
+
+ total_ref_lookups += ref_lookups;
+ total_test_lookups += test_lookups;
+ }
+
+ EXPECT_GT(total_test_lookups, 0);
+ EXPECT_LT(total_test_lookups, total_ref_lookups);
+
+ for (cache_type::file_offset_type offset = total_size; offset-- > 0;) {
+ auto [ref_ix, ref_off, ref_lookups] =
+ find_file_position(test_inode, test_chunks, offset);
+
+ auto [test_ix, test_off, test_lookups] =
+ find_file_position(test_inode, test_chunks, offset, &cache);
+
+ auto ref_offset = std::accumulate(test_chunks.begin(),
+ test_chunks.begin() + ref_ix, ref_off);
+
+ EXPECT_EQ(offset, ref_offset);
+
+ EXPECT_EQ(ref_ix + 1, ref_lookups);
+ EXPECT_LE(test_lookups, 5);
+
+ EXPECT_EQ(ref_ix, test_ix);
+ EXPECT_EQ(ref_off, test_off);
+
+ total_ref_lookups += ref_lookups;
+ total_test_lookups += test_lookups;
+ }
+}
+
+TEST(offset_cache, prefill) {
+ cache_type prefilled_cache(4);
+
+ auto [prefill_ix, prefill_off, prefill_lookups] = find_file_position(
+ test_inode, test_chunks, total_size - 1, &prefilled_cache);
+
+ EXPECT_EQ(test_chunks.size(), prefill_lookups);
+ EXPECT_EQ(test_chunks.size() - 1, prefill_ix);
+ EXPECT_EQ(test_chunks.back() - 1, prefill_off);
+}