lib: fix memcpy() performance with freestanding library builds

With -ffreestanding, for memcpy() to be optimized properly when used for
unaligned accesses, we need to use __builtin_memcpy().
This commit is contained in:
Eric Biggers 2020-05-08 23:03:58 -07:00
parent 3dfd93e365
commit 14be043724
2 changed files with 25 additions and 1 deletions

View File

@ -37,10 +37,29 @@ void *libdeflate_aligned_malloc(size_t alignment, size_t size);
void libdeflate_aligned_free(void *ptr);
#ifdef FREESTANDING
/*
* With -ffreestanding, <string.h> may be missing, and we must provide
* implementations of memset(), memcpy(), memmove(), and memcmp().
* See https://gcc.gnu.org/onlinedocs/gcc/Standards.html
*
* Also, -ffreestanding disables interpreting calls to these functions as
* built-ins. E.g., calling memcpy(&v, p, WORDBYTES) will make a function call,
* not be optimized to a single load instruction. For performance reasons we
* don't want that. So, declare these functions as macros that expand to the
* corresponding built-ins. This approach is recommended in the gcc man page.
* We still need the actual function definitions in case gcc calls them.
*/
void *memset(void *s, int c, size_t n);
#define memset(s, c, n) __builtin_memset((s), (c), (n))
void *memcpy(void *dest, const void *src, size_t n);
#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
void *memmove(void *dest, const void *src, size_t n);
#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
int memcmp(const void *s1, const void *s2, size_t n);
#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
#else
#include <string.h>
#endif

View File

@ -80,9 +80,11 @@ libdeflate_set_memory_allocator(void *(*malloc_func)(size_t),
/*
* Implementations of libc functions for freestanding library builds.
* Not optimized yet. Normal library builds don't use these.
* Normal library builds don't use these. Not optimized yet; usually the
* compiler expands these functions and doesn't actually call them anyway.
*/
#ifdef FREESTANDING
#undef memset
void *memset(void *s, int c, size_t n)
{
u8 *p = s;
@ -93,6 +95,7 @@ void *memset(void *s, int c, size_t n)
return s;
}
#undef memcpy
void *memcpy(void *dest, const void *src, size_t n)
{
u8 *d = dest;
@ -104,6 +107,7 @@ void *memcpy(void *dest, const void *src, size_t n)
return dest;
}
#undef memmove
void *memmove(void *dest, const void *src, size_t n)
{
u8 *d = dest;
@ -118,6 +122,7 @@ void *memmove(void *dest, const void *src, size_t n)
return dest;
}
#undef memcmp
int memcmp(const void *s1, const void *s2, size_t n)
{
const u8 *p1 = s1;