From db8fb532a119b498d60fa819b78b473f75d7949b Mon Sep 17 00:00:00 2001 From: Marcus Holland-Moritz Date: Sun, 2 Jul 2023 02:39:32 +0200 Subject: [PATCH] Add inode reader offset cache --- include/dwarfs/inode_reader_v2.h | 28 ++++---- src/dwarfs/filesystem_v2.cpp | 6 +- src/dwarfs/inode_reader_v2.cpp | 116 +++++++++++++++++++++++++------ 3 files changed, 111 insertions(+), 39 deletions(-) diff --git a/include/dwarfs/inode_reader_v2.h b/include/dwarfs/inode_reader_v2.h index 287fea8d..09a28a67 100644 --- a/include/dwarfs/inode_reader_v2.h +++ b/include/dwarfs/inode_reader_v2.h @@ -46,19 +46,20 @@ class inode_reader_v2 { inode_reader_v2& operator=(inode_reader_v2&&) = default; - ssize_t - read(char* buf, size_t size, file_off_t offset, chunk_range chunks) const { - return impl_->read(buf, size, offset, chunks); + ssize_t read(char* buf, uint32_t inode, size_t size, file_off_t offset, + chunk_range chunks) const { + return impl_->read(buf, inode, size, offset, chunks); } - ssize_t readv(iovec_read_buf& buf, size_t size, file_off_t offset, - chunk_range chunks) const { - return impl_->readv(buf, size, offset, chunks); + ssize_t readv(iovec_read_buf& buf, uint32_t inode, size_t size, + file_off_t offset, chunk_range chunks) const { + return impl_->readv(buf, inode, size, offset, chunks); } folly::Expected>, int> - readv(size_t size, file_off_t offset, chunk_range chunks) const { - return impl_->readv(size, offset, chunks); + readv(uint32_t inode, size_t size, file_off_t offset, + chunk_range chunks) const { + return impl_->readv(inode, size, offset, chunks); } void @@ -76,12 +77,13 @@ class inode_reader_v2 { public: virtual ~impl() = default; - virtual ssize_t read(char* buf, size_t size, file_off_t offset, - chunk_range chunks) const = 0; - virtual ssize_t readv(iovec_read_buf& buf, size_t size, file_off_t offset, - chunk_range chunks) const = 0; + virtual ssize_t read(char* buf, uint32_t inode, size_t size, + file_off_t offset, chunk_range chunks) const = 0; + virtual ssize_t readv(iovec_read_buf& buf, uint32_t inode, size_t size, + file_off_t offset, chunk_range chunks) const = 0; virtual folly::Expected>, int> - readv(size_t size, file_off_t offset, chunk_range chunks) const = 0; + readv(uint32_t inode, size_t size, file_off_t offset, + chunk_range chunks) const = 0; virtual void dump(std::ostream& os, const std::string& indent, chunk_range chunks) const = 0; virtual void set_num_workers(size_t num) = 0; diff --git a/src/dwarfs/filesystem_v2.cpp b/src/dwarfs/filesystem_v2.cpp index 17c31fac..07662237 100644 --- a/src/dwarfs/filesystem_v2.cpp +++ b/src/dwarfs/filesystem_v2.cpp @@ -602,7 +602,7 @@ ssize_t filesystem_::read(uint32_t inode, char* buf, size_t size, file_off_t offset) const { PERFMON_CLS_SCOPED_SECTION(read) if (auto chunks = meta_.get_chunks(inode)) { - return ir_.read(buf, size, offset, *chunks); + return ir_.read(buf, inode, size, offset, *chunks); } return -EBADF; } @@ -612,7 +612,7 @@ ssize_t filesystem_::readv(uint32_t inode, iovec_read_buf& buf, size_t size, file_off_t offset) const { PERFMON_CLS_SCOPED_SECTION(readv_iovec) if (auto chunks = meta_.get_chunks(inode)) { - return ir_.readv(buf, size, offset, *chunks); + return ir_.readv(buf, inode, size, offset, *chunks); } return -EBADF; } @@ -623,7 +623,7 @@ filesystem_::readv(uint32_t inode, size_t size, file_off_t offset) const { PERFMON_CLS_SCOPED_SECTION(readv_future) if (auto chunks = meta_.get_chunks(inode)) { - return ir_.readv(size, offset, *chunks); + return ir_.readv(inode, size, offset, *chunks); } return folly::makeUnexpected(-EBADF); } diff --git a/src/dwarfs/inode_reader_v2.cpp b/src/dwarfs/inode_reader_v2.cpp index c6563f35..1868840c 100644 --- a/src/dwarfs/inode_reader_v2.cpp +++ b/src/dwarfs/inode_reader_v2.cpp @@ -30,6 +30,7 @@ #include #include +#include #include #include "dwarfs/block_cache.h" @@ -41,12 +42,25 @@ namespace dwarfs { namespace { +constexpr size_t const offset_cache_min_chunks = 128; +constexpr size_t const offset_cache_size = 16; + +struct offset_cache_entry { + offset_cache_entry(file_off_t off, size_t ix) + : file_offset{off} + , chunk_index{ix} {} + + file_off_t file_offset; + size_t chunk_index; +}; + template class inode_reader_ final : public inode_reader_v2::impl { public: inode_reader_(logger& lgr, block_cache&& bc) : cache_(std::move(bc)) , LOG_PROXY_INIT(lgr) + , offset_cache_{offset_cache_size} , iovec_sizes_(1, 0, 256) {} ~inode_reader_() override { @@ -58,14 +72,20 @@ class inode_reader_ final : public inode_reader_v2::impl { LOG_INFO << "iovec size p99: " << iovec_sizes_.getPercentileEstimate(0.99); } + if (oc_put_.load() > 0) { + LOG_INFO << "offset cache put: " << oc_put_.load(); + LOG_INFO << "offset cache get: " << oc_get_.load(); + LOG_INFO << "offset cache hit: " << oc_hit_.load(); + } } - ssize_t read(char* buf, size_t size, file_off_t offset, + ssize_t read(char* buf, uint32_t inode, size_t size, file_off_t offset, chunk_range chunks) const override; - ssize_t readv(iovec_read_buf& buf, size_t size, file_off_t offset, - chunk_range chunks) const override; + ssize_t readv(iovec_read_buf& buf, uint32_t inode, size_t size, + file_off_t offset, chunk_range chunks) const override; folly::Expected>, int> - readv(size_t size, file_off_t offset, chunk_range chunks) const override; + readv(uint32_t inode, size_t size, file_off_t offset, + chunk_range chunks) const override; void dump(std::ostream& os, const std::string& indent, chunk_range chunks) const override; void set_num_workers(size_t num) override { cache_.set_num_workers(num); } @@ -75,16 +95,22 @@ class inode_reader_ final : public inode_reader_v2::impl { private: folly::Expected>, int> - read(size_t size, file_off_t offset, chunk_range chunks) const; + read(uint32_t inode, size_t size, file_off_t offset, + chunk_range chunks) const; template - ssize_t read(size_t size, file_off_t offset, chunk_range chunks, - const StoreFunc& store) const; + ssize_t read(uint32_t inode, size_t size, file_off_t offset, + chunk_range chunks, const StoreFunc& store) const; block_cache cache_; LOG_PROXY_DECL(LoggerPolicy); + mutable folly::EvictingCacheMap offset_cache_; + mutable std::mutex offset_cache_mutex_; mutable folly::Histogram iovec_sizes_; mutable std::mutex iovec_sizes_mutex_; + mutable std::atomic oc_put_{0}; + mutable std::atomic oc_get_{0}; + mutable std::atomic oc_hit_{0}; }; template @@ -99,7 +125,8 @@ void inode_reader_::dump(std::ostream& os, template folly::Expected>, int> -inode_reader_::readv(size_t size, file_off_t offset, +inode_reader_::readv(uint32_t inode, size_t const size, + file_off_t offset, chunk_range chunks) const { if (offset < 0) { return folly::makeUnexpected(-EINVAL); @@ -114,6 +141,28 @@ inode_reader_::readv(size_t size, file_off_t offset, auto it = chunks.begin(); auto end = chunks.end(); + file_off_t it_offset = 0; + + if (offset > 0 && chunks.size() >= offset_cache_min_chunks) { + ++oc_get_; + + std::optional oce; + + { + std::lock_guard lock(offset_cache_mutex_); + + if (auto oci = offset_cache_.find(inode); oci != offset_cache_.end()) { + oce = oci->second; + } + } + + if (oce && oce->file_offset <= offset) { + std::advance(it, oce->chunk_index); + offset -= oce->file_offset; + it_offset = oce->file_offset; + ++oc_hit_; + } + } // search for the first chunk that contains data from this request while (it < end) { @@ -124,6 +173,7 @@ inode_reader_::readv(size_t size, file_off_t offset, } offset -= chunksize; + it_offset += chunksize; ++it; } @@ -132,7 +182,9 @@ inode_reader_::readv(size_t size, file_off_t offset, return ranges; } - for (size_t num_read = 0; it != end && num_read < size; ++it) { + size_t num_read = 0; + + while (it != end) { size_t chunksize = it->size() - offset; size_t chunkoff = it->offset() + offset; @@ -148,7 +200,24 @@ inode_reader_::readv(size_t size, file_off_t offset, ranges.emplace_back(cache_.get(it->block(), chunkoff, chunksize)); num_read += chunksize; + + if (num_read == size) { + if (chunks.size() >= offset_cache_min_chunks) { + offset_cache_entry oce(it_offset, std::distance(chunks.begin(), it)); + ++oc_put_; + + { + std::lock_guard lock(offset_cache_mutex_); + offset_cache_.set(inode, oce); + } + } + + break; + } + offset = 0; + it_offset += it->size(); + ++it; } return ranges; @@ -156,10 +225,10 @@ inode_reader_::readv(size_t size, file_off_t offset, template template -ssize_t inode_reader_::read(size_t size, file_off_t offset, - chunk_range chunks, +ssize_t inode_reader_::read(uint32_t inode, size_t size, + file_off_t offset, chunk_range chunks, const StoreFunc& store) const { - auto ranges = readv(size, offset, chunks); + auto ranges = readv(inode, size, offset, chunks); if (!ranges) { return ranges.error(); @@ -185,24 +254,25 @@ ssize_t inode_reader_::read(size_t size, file_off_t offset, template ssize_t -inode_reader_::read(char* buf, size_t size, file_off_t offset, - chunk_range chunks) const { - return read(size, offset, chunks, +inode_reader_::read(char* buf, uint32_t inode, size_t size, + file_off_t offset, chunk_range chunks) const { + return read(inode, size, offset, chunks, [&](size_t num_read, const block_range& br) { ::memcpy(buf + num_read, br.data(), br.size()); }); } template -ssize_t inode_reader_::readv(iovec_read_buf& buf, size_t size, - file_off_t offset, +ssize_t inode_reader_::readv(iovec_read_buf& buf, uint32_t inode, + size_t size, file_off_t offset, chunk_range chunks) const { - auto rv = read(size, offset, chunks, [&](size_t, const block_range& br) { - buf.buf.resize(buf.buf.size() + 1); - buf.buf.back().iov_base = const_cast(br.data()); - buf.buf.back().iov_len = br.size(); - buf.ranges.emplace_back(br); - }); + auto rv = + read(inode, size, offset, chunks, [&](size_t, const block_range& br) { + buf.buf.resize(buf.buf.size() + 1); + buf.buf.back().iov_base = const_cast(br.data()); + buf.buf.back().iov_len = br.size(); + buf.ranges.emplace_back(br); + }); { std::lock_guard lock(iovec_sizes_mutex_); iovec_sizes_.addValue(buf.buf.size());