Add new checksum abstraction, including support for xxHash

This commit is contained in:
Marcus Holland-Moritz 2020-12-16 14:27:35 +01:00
parent 9cd9deb45b
commit dbf4ec4f35
7 changed files with 146 additions and 7 deletions

3
.gitmodules vendored
View File

@ -7,3 +7,6 @@
[submodule "zstd"] [submodule "zstd"]
path = zstd path = zstd
url = https://github.com/facebook/zstd url = https://github.com/facebook/zstd
[submodule "xxHash"]
path = xxHash
url = https://github.com/Cyan4973/xxHash

View File

@ -160,12 +160,23 @@ if(WITH_TESTS)
include(GoogleTest) include(GoogleTest)
endif() endif()
add_library(xxhash
xxHash/xxhash.c
)
target_compile_options(xxhash PRIVATE -Wall -Wextra -Wconversion -Wcast-qual -Wcast-align -Wshadow
-Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement
-Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security
-Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings
-Wredundant-decls -Wstrict-overflow=2)
list( list(
APPEND APPEND
LIBDWARFS_SRC LIBDWARFS_SRC
src/dwarfs/block_cache.cpp src/dwarfs/block_cache.cpp
src/dwarfs/block_compressor.cpp src/dwarfs/block_compressor.cpp
src/dwarfs/block_manager.cpp src/dwarfs/block_manager.cpp
src/dwarfs/checksum.cpp
src/dwarfs/console_writer.cpp src/dwarfs/console_writer.cpp
src/dwarfs/entry.cpp src/dwarfs/entry.cpp
src/dwarfs/error.cpp src/dwarfs/error.cpp
@ -324,6 +335,7 @@ list(
${CMAKE_CURRENT_SOURCE_DIR}/folly ${CMAKE_CURRENT_SOURCE_DIR}/folly
${CMAKE_CURRENT_SOURCE_DIR}/fbthrift ${CMAKE_CURRENT_SOURCE_DIR}/fbthrift
${CMAKE_CURRENT_SOURCE_DIR}/zstd/lib ${CMAKE_CURRENT_SOURCE_DIR}/zstd/lib
${CMAKE_CURRENT_SOURCE_DIR}/xxHash
${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}
${SPARSEHASH_INCLUDE_DIR}) ${SPARSEHASH_INCLUDE_DIR})
@ -391,6 +403,7 @@ target_link_libraries(
folly folly
${Boost_LIBRARIES} ${Boost_LIBRARIES}
libzstd_static libzstd_static
xxhash
PkgConfig::LIBLZ4 PkgConfig::LIBLZ4
PkgConfig::LIBLZMA) PkgConfig::LIBLZMA)

View File

@ -13,9 +13,10 @@ fi
g++ -static -static-libgcc -static-libstdc++ "$@" -o "$target" \ g++ -static -static-libgcc -static-libstdc++ "$@" -o "$target" \
-Wl,-allow-multiple-definition -Wl,-Bstatic \ -Wl,-allow-multiple-definition -Wl,-Bstatic \
libdwarfs.a \ libdwarfs.a \
libmetadata_thrift.a \ libmetadata_thrift.a \
libthrift_light.a \ libthrift_light.a \
libxxhash.a \
folly/libfolly.a \ folly/libfolly.a \
zstd/build/cmake/lib/libzstd.a \ zstd/build/cmake/lib/libzstd.a \
$fuse \ $fuse \

52
include/dwarfs/checksum.h Normal file
View File

@ -0,0 +1,52 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#pragma once
#include <cstddef>
#include <stdexcept>
namespace dwarfs {
enum class checksum_algorithm {
SHA1,
SHA2_512_256,
XXH3_64,
};
constexpr size_t checksum_size(checksum_algorithm alg) {
switch (alg) {
case checksum_algorithm::SHA1:
return 20;
case checksum_algorithm::SHA2_512_256:
return 32;
case checksum_algorithm::XXH3_64:
return 8;
}
throw std::logic_error("unknown algorithm");
}
bool compute_checksum(checksum_algorithm alg, void const* data, size_t size,
void* result);
bool verify_checksum(checksum_algorithm alg, void const* data, size_t size,
const void* checksum);
} // namespace dwarfs

68
src/dwarfs/checksum.cpp Normal file
View File

@ -0,0 +1,68 @@
/* vim:set ts=2 sw=2 sts=2 et: */
/**
* \author Marcus Holland-Moritz (github@mhxnet.de)
* \copyright Copyright (c) Marcus Holland-Moritz
*
* This file is part of dwarfs.
*
* dwarfs is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* dwarfs is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with dwarfs. If not, see <https://www.gnu.org/licenses/>.
*/
#include <cstring>
#include <openssl/evp.h>
#include <xxhash.h>
#include "dwarfs/checksum.h"
namespace dwarfs {
namespace {
bool compute_evp(const EVP_MD* algorithm, void const* data, size_t size,
void* result) {
return EVP_Digest(data, size, reinterpret_cast<unsigned char*>(result),
nullptr, algorithm, nullptr);
}
bool compute_xxh3_64(void const* data, size_t size, void* result) {
auto checksum = XXH3_64bits(data, size);
::memcpy(result, &checksum, sizeof(checksum));
return true;
}
} // namespace
bool compute_checksum(checksum_algorithm alg, void const* data, size_t size,
void* result) {
switch (alg) {
case checksum_algorithm::SHA1:
return compute_evp(EVP_sha1(), data, size, result);
case checksum_algorithm::SHA2_512_256:
return compute_evp(EVP_sha512_256(), data, size, result);
case checksum_algorithm::XXH3_64:
return compute_xxh3_64(data, size, result);
}
return false;
}
bool verify_checksum(checksum_algorithm alg, void const* data, size_t size,
const void* checksum) {
char tmp[EVP_MAX_MD_SIZE];
return compute_checksum(alg, data, size, tmp) &&
::memcmp(checksum, tmp, checksum_size(alg)) == 0;
}
} // namespace dwarfs

View File

@ -23,10 +23,9 @@
#include <cstring> #include <cstring>
#include <utility> #include <utility>
#include <openssl/sha.h>
#include <fmt/format.h> #include <fmt/format.h>
#include "dwarfs/checksum.h"
#include "dwarfs/entry.h" #include "dwarfs/entry.h"
#include "dwarfs/error.h" #include "dwarfs/error.h"
#include "dwarfs/global_entry_data.h" #include "dwarfs/global_entry_data.h"
@ -163,15 +162,17 @@ uint32_t file::inode_num() const { return inode_->num(); }
void file::accept(entry_visitor& v, bool) { v.visit(this); } void file::accept(entry_visitor& v, bool) { v.visit(this); }
void file::scan(os_access& os, progress& prog) { void file::scan(os_access& os, progress& prog) {
static_assert(SHA_DIGEST_LENGTH == sizeof(data::hash_type)); constexpr auto alg = checksum_algorithm::SHA1;
static_assert(checksum_size(alg) == sizeof(data::hash_type));
if (size_t s = size(); s > 0) { if (size_t s = size(); s > 0) {
prog.original_size += s; prog.original_size += s;
auto mm = os.map_file(path(), s); auto mm = os.map_file(path(), s);
::SHA1(mm->as<unsigned char>(), s, DWARFS_CHECK(compute_checksum(alg, mm->as<void>(), s, &data_->hash[0]),
reinterpret_cast<unsigned char*>(&data_->hash[0])); "checksum computation failed");
} else { } else {
::SHA1(nullptr, 0, reinterpret_cast<unsigned char*>(&data_->hash[0])); DWARFS_CHECK(compute_checksum(alg, nullptr, 0, &data_->hash[0]),
"checksum computation failed");
} }
} }

1
xxHash Submodule

@ -0,0 +1 @@
Subproject commit 94e5f23e736f2bb67ebdf90727353e65344f9fc0