Small refactor to avoid excessive vector allocation

This commit is contained in:
Marcus Holland-Moritz 2021-03-22 10:21:07 +01:00
parent 6c24e55897
commit a22aa99729
4 changed files with 16 additions and 14 deletions

View File

@ -27,6 +27,7 @@
#include <folly/small_vector.h>
#include "dwarfs/nilsimsa.h"
#include "dwarfs/object.h"
namespace dwarfs {
@ -50,7 +51,7 @@ class inode : public object {
virtual void set_num(uint32_t num) = 0;
virtual uint32_t num() const = 0;
virtual uint32_t similarity_hash() const = 0;
virtual std::vector<uint64_t> const& nilsimsa_similarity_hash() const = 0;
virtual nilsimsa::hash_type const& nilsimsa_similarity_hash() const = 0;
virtual size_t size() const = 0;
virtual file const* any() const = 0;
virtual files_vector const& files() const = 0;

View File

@ -21,10 +21,10 @@
#pragma once
#include <array>
#include <cstdint>
#include <memory>
#include <type_traits>
#include <vector>
#include "dwarfs/compiler.h"
@ -45,11 +45,13 @@ namespace dwarfs {
class nilsimsa {
public:
using hash_type = std::array<uint64_t, 4>;
nilsimsa();
~nilsimsa();
void update(uint8_t const* data, size_t size);
std::vector<uint64_t> finalize() const;
void finalize(hash_type& hash) const;
#ifdef DWARFS_MULTIVERSIONING
__attribute__((target("popcnt"))) static int

View File

@ -93,6 +93,10 @@ class inode_ : public inode {
public:
using chunk_type = thrift::metadata::chunk;
inode_() {
std::fill(nilsimsa_similarity_hash_.begin(), nilsimsa_similarity_hash_.end(), 0);
}
void set_num(uint32_t num) override {
DWARFS_CHECK(!num_, "attempt to set inode number multiple times");
num_ = num;
@ -107,7 +111,7 @@ class inode_ : public inode {
return similarity_hash_;
}
std::vector<uint64_t> const& nilsimsa_similarity_hash() const override {
nilsimsa::hash_type const& nilsimsa_similarity_hash() const override {
if (files_.empty()) {
DWARFS_THROW(runtime_error, "inode has no file");
}
@ -156,7 +160,7 @@ class inode_ : public inode {
}
if (opts.with_nilsimsa) {
nilsimsa_similarity_hash_ = nc.finalize();
nc.finalize(nilsimsa_similarity_hash_);
}
}
}
@ -189,7 +193,7 @@ class inode_ : public inode {
uint32_t similarity_hash_{0};
files_vector files_;
std::vector<chunk_type> chunks_;
std::vector<uint64_t> nilsimsa_similarity_hash_;
nilsimsa::hash_type nilsimsa_similarity_hash_;
};
} // namespace

View File

@ -19,8 +19,6 @@
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <array>
#include "dwarfs/compiler.h"
#include "dwarfs/nilsimsa.h"
@ -76,7 +74,7 @@ class nilsimsa::impl {
update_fast(data, size);
}
std::vector<uint64_t> finalize() const {
void finalize(hash_type& hash) const {
size_t total = 0;
if (size_ == 3) {
@ -89,16 +87,13 @@ class nilsimsa::impl {
size_t threshold = total / acc_.size();
std::vector<uint64_t> hash;
hash.resize(4);
std::fill(hash.begin(), hash.end(), 0);
for (size_t i = 0; i < acc_.size(); i++) {
if (acc_[i] > threshold) {
hash[i >> 6] |= UINT64_C(1) << (i & 0x3F);
}
}
return hash;
}
private:
@ -195,7 +190,7 @@ void nilsimsa::update(uint8_t const* data, size_t size) {
impl_->update(data, size);
}
std::vector<uint64_t> nilsimsa::finalize() const { return impl_->finalize(); }
void nilsimsa::finalize(hash_type& hash) const { impl_->finalize(hash); }
#ifdef DWARFS_MULTIVERSIONING
__attribute__((target("popcnt"))) int