diff --git a/.gitmodules b/.gitmodules index 4037d009..c8c57115 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "zstd"] path = zstd url = https://github.com/facebook/zstd +[submodule "xxHash"] + path = xxHash + url = https://github.com/Cyan4973/xxHash diff --git a/CMakeLists.txt b/CMakeLists.txt index c1abd346..44a43621 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -160,12 +160,23 @@ if(WITH_TESTS) include(GoogleTest) endif() +add_library(xxhash + xxHash/xxhash.c + ) + +target_compile_options(xxhash PRIVATE -Wall -Wextra -Wconversion -Wcast-qual -Wcast-align -Wshadow + -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement + -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security + -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings + -Wredundant-decls -Wstrict-overflow=2) + list( APPEND LIBDWARFS_SRC src/dwarfs/block_cache.cpp src/dwarfs/block_compressor.cpp src/dwarfs/block_manager.cpp + src/dwarfs/checksum.cpp src/dwarfs/console_writer.cpp src/dwarfs/entry.cpp src/dwarfs/error.cpp @@ -324,6 +335,7 @@ list( ${CMAKE_CURRENT_SOURCE_DIR}/folly ${CMAKE_CURRENT_SOURCE_DIR}/fbthrift ${CMAKE_CURRENT_SOURCE_DIR}/zstd/lib + ${CMAKE_CURRENT_SOURCE_DIR}/xxHash ${CMAKE_CURRENT_BINARY_DIR} ${SPARSEHASH_INCLUDE_DIR}) @@ -391,6 +403,7 @@ target_link_libraries( folly ${Boost_LIBRARIES} libzstd_static + xxhash PkgConfig::LIBLZ4 PkgConfig::LIBLZMA) diff --git a/cmake/static_link.sh b/cmake/static_link.sh index 3ef5b78b..7b286327 100644 --- a/cmake/static_link.sh +++ b/cmake/static_link.sh @@ -13,9 +13,10 @@ fi g++ -static -static-libgcc -static-libstdc++ "$@" -o "$target" \ -Wl,-allow-multiple-definition -Wl,-Bstatic \ - libdwarfs.a \ + libdwarfs.a \ libmetadata_thrift.a \ libthrift_light.a \ + libxxhash.a \ folly/libfolly.a \ zstd/build/cmake/lib/libzstd.a \ $fuse \ diff --git a/include/dwarfs/checksum.h b/include/dwarfs/checksum.h new file mode 100644 index 00000000..adfd4dfb --- /dev/null +++ b/include/dwarfs/checksum.h @@ -0,0 +1,52 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#pragma once + +#include +#include + +namespace dwarfs { + +enum class checksum_algorithm { + SHA1, + SHA2_512_256, + XXH3_64, +}; + +constexpr size_t checksum_size(checksum_algorithm alg) { + switch (alg) { + case checksum_algorithm::SHA1: + return 20; + case checksum_algorithm::SHA2_512_256: + return 32; + case checksum_algorithm::XXH3_64: + return 8; + } + throw std::logic_error("unknown algorithm"); +} + +bool compute_checksum(checksum_algorithm alg, void const* data, size_t size, + void* result); +bool verify_checksum(checksum_algorithm alg, void const* data, size_t size, + const void* checksum); + +} // namespace dwarfs diff --git a/src/dwarfs/checksum.cpp b/src/dwarfs/checksum.cpp new file mode 100644 index 00000000..f3d52365 --- /dev/null +++ b/src/dwarfs/checksum.cpp @@ -0,0 +1,68 @@ +/* vim:set ts=2 sw=2 sts=2 et: */ +/** + * \author Marcus Holland-Moritz (github@mhxnet.de) + * \copyright Copyright (c) Marcus Holland-Moritz + * + * This file is part of dwarfs. + * + * dwarfs is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * dwarfs is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with dwarfs. If not, see . + */ + +#include + +#include + +#include + +#include "dwarfs/checksum.h" + +namespace dwarfs { + +namespace { + +bool compute_evp(const EVP_MD* algorithm, void const* data, size_t size, + void* result) { + return EVP_Digest(data, size, reinterpret_cast(result), + nullptr, algorithm, nullptr); +} + +bool compute_xxh3_64(void const* data, size_t size, void* result) { + auto checksum = XXH3_64bits(data, size); + ::memcpy(result, &checksum, sizeof(checksum)); + return true; +} + +} // namespace + +bool compute_checksum(checksum_algorithm alg, void const* data, size_t size, + void* result) { + switch (alg) { + case checksum_algorithm::SHA1: + return compute_evp(EVP_sha1(), data, size, result); + case checksum_algorithm::SHA2_512_256: + return compute_evp(EVP_sha512_256(), data, size, result); + case checksum_algorithm::XXH3_64: + return compute_xxh3_64(data, size, result); + } + return false; +} + +bool verify_checksum(checksum_algorithm alg, void const* data, size_t size, + const void* checksum) { + char tmp[EVP_MAX_MD_SIZE]; + return compute_checksum(alg, data, size, tmp) && + ::memcmp(checksum, tmp, checksum_size(alg)) == 0; +} + +} // namespace dwarfs diff --git a/src/dwarfs/entry.cpp b/src/dwarfs/entry.cpp index 6bbf1863..f6966e49 100644 --- a/src/dwarfs/entry.cpp +++ b/src/dwarfs/entry.cpp @@ -23,10 +23,9 @@ #include #include -#include - #include +#include "dwarfs/checksum.h" #include "dwarfs/entry.h" #include "dwarfs/error.h" #include "dwarfs/global_entry_data.h" @@ -163,15 +162,17 @@ uint32_t file::inode_num() const { return inode_->num(); } void file::accept(entry_visitor& v, bool) { v.visit(this); } void file::scan(os_access& os, progress& prog) { - static_assert(SHA_DIGEST_LENGTH == sizeof(data::hash_type)); + constexpr auto alg = checksum_algorithm::SHA1; + static_assert(checksum_size(alg) == sizeof(data::hash_type)); if (size_t s = size(); s > 0) { prog.original_size += s; auto mm = os.map_file(path(), s); - ::SHA1(mm->as(), s, - reinterpret_cast(&data_->hash[0])); + DWARFS_CHECK(compute_checksum(alg, mm->as(), s, &data_->hash[0]), + "checksum computation failed"); } else { - ::SHA1(nullptr, 0, reinterpret_cast(&data_->hash[0])); + DWARFS_CHECK(compute_checksum(alg, nullptr, 0, &data_->hash[0]), + "checksum computation failed"); } } diff --git a/xxHash b/xxHash new file mode 160000 index 00000000..94e5f23e --- /dev/null +++ b/xxHash @@ -0,0 +1 @@ +Subproject commit 94e5f23e736f2bb67ebdf90727353e65344f9fc0