mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-18 00:40:30 -04:00
Small refactor to avoid excessive vector allocation
This commit is contained in:
parent
6c24e55897
commit
a22aa99729
@ -27,6 +27,7 @@
|
||||
|
||||
#include <folly/small_vector.h>
|
||||
|
||||
#include "dwarfs/nilsimsa.h"
|
||||
#include "dwarfs/object.h"
|
||||
|
||||
namespace dwarfs {
|
||||
@ -50,7 +51,7 @@ class inode : public object {
|
||||
virtual void set_num(uint32_t num) = 0;
|
||||
virtual uint32_t num() const = 0;
|
||||
virtual uint32_t similarity_hash() const = 0;
|
||||
virtual std::vector<uint64_t> const& nilsimsa_similarity_hash() const = 0;
|
||||
virtual nilsimsa::hash_type const& nilsimsa_similarity_hash() const = 0;
|
||||
virtual size_t size() const = 0;
|
||||
virtual file const* any() const = 0;
|
||||
virtual files_vector const& files() const = 0;
|
||||
|
@ -21,10 +21,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "dwarfs/compiler.h"
|
||||
|
||||
@ -45,11 +45,13 @@ namespace dwarfs {
|
||||
|
||||
class nilsimsa {
|
||||
public:
|
||||
using hash_type = std::array<uint64_t, 4>;
|
||||
|
||||
nilsimsa();
|
||||
~nilsimsa();
|
||||
|
||||
void update(uint8_t const* data, size_t size);
|
||||
std::vector<uint64_t> finalize() const;
|
||||
void finalize(hash_type& hash) const;
|
||||
|
||||
#ifdef DWARFS_MULTIVERSIONING
|
||||
__attribute__((target("popcnt"))) static int
|
||||
|
@ -93,6 +93,10 @@ class inode_ : public inode {
|
||||
public:
|
||||
using chunk_type = thrift::metadata::chunk;
|
||||
|
||||
inode_() {
|
||||
std::fill(nilsimsa_similarity_hash_.begin(), nilsimsa_similarity_hash_.end(), 0);
|
||||
}
|
||||
|
||||
void set_num(uint32_t num) override {
|
||||
DWARFS_CHECK(!num_, "attempt to set inode number multiple times");
|
||||
num_ = num;
|
||||
@ -107,7 +111,7 @@ class inode_ : public inode {
|
||||
return similarity_hash_;
|
||||
}
|
||||
|
||||
std::vector<uint64_t> const& nilsimsa_similarity_hash() const override {
|
||||
nilsimsa::hash_type const& nilsimsa_similarity_hash() const override {
|
||||
if (files_.empty()) {
|
||||
DWARFS_THROW(runtime_error, "inode has no file");
|
||||
}
|
||||
@ -156,7 +160,7 @@ class inode_ : public inode {
|
||||
}
|
||||
|
||||
if (opts.with_nilsimsa) {
|
||||
nilsimsa_similarity_hash_ = nc.finalize();
|
||||
nc.finalize(nilsimsa_similarity_hash_);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -189,7 +193,7 @@ class inode_ : public inode {
|
||||
uint32_t similarity_hash_{0};
|
||||
files_vector files_;
|
||||
std::vector<chunk_type> chunks_;
|
||||
std::vector<uint64_t> nilsimsa_similarity_hash_;
|
||||
nilsimsa::hash_type nilsimsa_similarity_hash_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
@ -19,8 +19,6 @@
|
||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "dwarfs/compiler.h"
|
||||
#include "dwarfs/nilsimsa.h"
|
||||
|
||||
@ -76,7 +74,7 @@ class nilsimsa::impl {
|
||||
update_fast(data, size);
|
||||
}
|
||||
|
||||
std::vector<uint64_t> finalize() const {
|
||||
void finalize(hash_type& hash) const {
|
||||
size_t total = 0;
|
||||
|
||||
if (size_ == 3) {
|
||||
@ -89,16 +87,13 @@ class nilsimsa::impl {
|
||||
|
||||
size_t threshold = total / acc_.size();
|
||||
|
||||
std::vector<uint64_t> hash;
|
||||
hash.resize(4);
|
||||
std::fill(hash.begin(), hash.end(), 0);
|
||||
|
||||
for (size_t i = 0; i < acc_.size(); i++) {
|
||||
if (acc_[i] > threshold) {
|
||||
hash[i >> 6] |= UINT64_C(1) << (i & 0x3F);
|
||||
}
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -195,7 +190,7 @@ void nilsimsa::update(uint8_t const* data, size_t size) {
|
||||
impl_->update(data, size);
|
||||
}
|
||||
|
||||
std::vector<uint64_t> nilsimsa::finalize() const { return impl_->finalize(); }
|
||||
void nilsimsa::finalize(hash_type& hash) const { impl_->finalize(hash); }
|
||||
|
||||
#ifdef DWARFS_MULTIVERSIONING
|
||||
__attribute__((target("popcnt"))) int
|
||||
|
Loading…
x
Reference in New Issue
Block a user