mirror of
https://github.com/mhx/dwarfs.git
synced 2025-09-18 08:49:29 -04:00
Small refactor to avoid excessive vector allocation
This commit is contained in:
parent
6c24e55897
commit
a22aa99729
@ -27,6 +27,7 @@
|
|||||||
|
|
||||||
#include <folly/small_vector.h>
|
#include <folly/small_vector.h>
|
||||||
|
|
||||||
|
#include "dwarfs/nilsimsa.h"
|
||||||
#include "dwarfs/object.h"
|
#include "dwarfs/object.h"
|
||||||
|
|
||||||
namespace dwarfs {
|
namespace dwarfs {
|
||||||
@ -50,7 +51,7 @@ class inode : public object {
|
|||||||
virtual void set_num(uint32_t num) = 0;
|
virtual void set_num(uint32_t num) = 0;
|
||||||
virtual uint32_t num() const = 0;
|
virtual uint32_t num() const = 0;
|
||||||
virtual uint32_t similarity_hash() const = 0;
|
virtual uint32_t similarity_hash() const = 0;
|
||||||
virtual std::vector<uint64_t> const& nilsimsa_similarity_hash() const = 0;
|
virtual nilsimsa::hash_type const& nilsimsa_similarity_hash() const = 0;
|
||||||
virtual size_t size() const = 0;
|
virtual size_t size() const = 0;
|
||||||
virtual file const* any() const = 0;
|
virtual file const* any() const = 0;
|
||||||
virtual files_vector const& files() const = 0;
|
virtual files_vector const& files() const = 0;
|
||||||
|
@ -21,10 +21,10 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "dwarfs/compiler.h"
|
#include "dwarfs/compiler.h"
|
||||||
|
|
||||||
@ -45,11 +45,13 @@ namespace dwarfs {
|
|||||||
|
|
||||||
class nilsimsa {
|
class nilsimsa {
|
||||||
public:
|
public:
|
||||||
|
using hash_type = std::array<uint64_t, 4>;
|
||||||
|
|
||||||
nilsimsa();
|
nilsimsa();
|
||||||
~nilsimsa();
|
~nilsimsa();
|
||||||
|
|
||||||
void update(uint8_t const* data, size_t size);
|
void update(uint8_t const* data, size_t size);
|
||||||
std::vector<uint64_t> finalize() const;
|
void finalize(hash_type& hash) const;
|
||||||
|
|
||||||
#ifdef DWARFS_MULTIVERSIONING
|
#ifdef DWARFS_MULTIVERSIONING
|
||||||
__attribute__((target("popcnt"))) static int
|
__attribute__((target("popcnt"))) static int
|
||||||
|
@ -93,6 +93,10 @@ class inode_ : public inode {
|
|||||||
public:
|
public:
|
||||||
using chunk_type = thrift::metadata::chunk;
|
using chunk_type = thrift::metadata::chunk;
|
||||||
|
|
||||||
|
inode_() {
|
||||||
|
std::fill(nilsimsa_similarity_hash_.begin(), nilsimsa_similarity_hash_.end(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
void set_num(uint32_t num) override {
|
void set_num(uint32_t num) override {
|
||||||
DWARFS_CHECK(!num_, "attempt to set inode number multiple times");
|
DWARFS_CHECK(!num_, "attempt to set inode number multiple times");
|
||||||
num_ = num;
|
num_ = num;
|
||||||
@ -107,7 +111,7 @@ class inode_ : public inode {
|
|||||||
return similarity_hash_;
|
return similarity_hash_;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint64_t> const& nilsimsa_similarity_hash() const override {
|
nilsimsa::hash_type const& nilsimsa_similarity_hash() const override {
|
||||||
if (files_.empty()) {
|
if (files_.empty()) {
|
||||||
DWARFS_THROW(runtime_error, "inode has no file");
|
DWARFS_THROW(runtime_error, "inode has no file");
|
||||||
}
|
}
|
||||||
@ -156,7 +160,7 @@ class inode_ : public inode {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (opts.with_nilsimsa) {
|
if (opts.with_nilsimsa) {
|
||||||
nilsimsa_similarity_hash_ = nc.finalize();
|
nc.finalize(nilsimsa_similarity_hash_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -189,7 +193,7 @@ class inode_ : public inode {
|
|||||||
uint32_t similarity_hash_{0};
|
uint32_t similarity_hash_{0};
|
||||||
files_vector files_;
|
files_vector files_;
|
||||||
std::vector<chunk_type> chunks_;
|
std::vector<chunk_type> chunks_;
|
||||||
std::vector<uint64_t> nilsimsa_similarity_hash_;
|
nilsimsa::hash_type nilsimsa_similarity_hash_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
@ -19,8 +19,6 @@
|
|||||||
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include <array>
|
|
||||||
|
|
||||||
#include "dwarfs/compiler.h"
|
#include "dwarfs/compiler.h"
|
||||||
#include "dwarfs/nilsimsa.h"
|
#include "dwarfs/nilsimsa.h"
|
||||||
|
|
||||||
@ -76,7 +74,7 @@ class nilsimsa::impl {
|
|||||||
update_fast(data, size);
|
update_fast(data, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint64_t> finalize() const {
|
void finalize(hash_type& hash) const {
|
||||||
size_t total = 0;
|
size_t total = 0;
|
||||||
|
|
||||||
if (size_ == 3) {
|
if (size_ == 3) {
|
||||||
@ -89,16 +87,13 @@ class nilsimsa::impl {
|
|||||||
|
|
||||||
size_t threshold = total / acc_.size();
|
size_t threshold = total / acc_.size();
|
||||||
|
|
||||||
std::vector<uint64_t> hash;
|
std::fill(hash.begin(), hash.end(), 0);
|
||||||
hash.resize(4);
|
|
||||||
|
|
||||||
for (size_t i = 0; i < acc_.size(); i++) {
|
for (size_t i = 0; i < acc_.size(); i++) {
|
||||||
if (acc_[i] > threshold) {
|
if (acc_[i] > threshold) {
|
||||||
hash[i >> 6] |= UINT64_C(1) << (i & 0x3F);
|
hash[i >> 6] |= UINT64_C(1) << (i & 0x3F);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return hash;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -195,7 +190,7 @@ void nilsimsa::update(uint8_t const* data, size_t size) {
|
|||||||
impl_->update(data, size);
|
impl_->update(data, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<uint64_t> nilsimsa::finalize() const { return impl_->finalize(); }
|
void nilsimsa::finalize(hash_type& hash) const { impl_->finalize(hash); }
|
||||||
|
|
||||||
#ifdef DWARFS_MULTIVERSIONING
|
#ifdef DWARFS_MULTIVERSIONING
|
||||||
__attribute__((target("popcnt"))) int
|
__attribute__((target("popcnt"))) int
|
||||||
|
Loading…
x
Reference in New Issue
Block a user