mirror of
https://github.com/cuberite/libdeflate.git
synced 2025-09-15 23:34:56 -04:00
lib: fix memcpy() performance with freestanding library builds
With -ffreestanding, for memcpy() to be optimized properly when used for unaligned accesses, we need to use __builtin_memcpy().
This commit is contained in:
parent
3dfd93e365
commit
14be043724
@ -37,10 +37,29 @@ void *libdeflate_aligned_malloc(size_t alignment, size_t size);
|
||||
void libdeflate_aligned_free(void *ptr);
|
||||
|
||||
#ifdef FREESTANDING
|
||||
/*
|
||||
* With -ffreestanding, <string.h> may be missing, and we must provide
|
||||
* implementations of memset(), memcpy(), memmove(), and memcmp().
|
||||
* See https://gcc.gnu.org/onlinedocs/gcc/Standards.html
|
||||
*
|
||||
* Also, -ffreestanding disables interpreting calls to these functions as
|
||||
* built-ins. E.g., calling memcpy(&v, p, WORDBYTES) will make a function call,
|
||||
* not be optimized to a single load instruction. For performance reasons we
|
||||
* don't want that. So, declare these functions as macros that expand to the
|
||||
* corresponding built-ins. This approach is recommended in the gcc man page.
|
||||
* We still need the actual function definitions in case gcc calls them.
|
||||
*/
|
||||
void *memset(void *s, int c, size_t n);
|
||||
#define memset(s, c, n) __builtin_memset((s), (c), (n))
|
||||
|
||||
void *memcpy(void *dest, const void *src, size_t n);
|
||||
#define memcpy(dest, src, n) __builtin_memcpy((dest), (src), (n))
|
||||
|
||||
void *memmove(void *dest, const void *src, size_t n);
|
||||
#define memmove(dest, src, n) __builtin_memmove((dest), (src), (n))
|
||||
|
||||
int memcmp(const void *s1, const void *s2, size_t n);
|
||||
#define memcmp(s1, s2, n) __builtin_memcmp((s1), (s2), (n))
|
||||
#else
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
@ -80,9 +80,11 @@ libdeflate_set_memory_allocator(void *(*malloc_func)(size_t),
|
||||
|
||||
/*
|
||||
* Implementations of libc functions for freestanding library builds.
|
||||
* Not optimized yet. Normal library builds don't use these.
|
||||
* Normal library builds don't use these. Not optimized yet; usually the
|
||||
* compiler expands these functions and doesn't actually call them anyway.
|
||||
*/
|
||||
#ifdef FREESTANDING
|
||||
#undef memset
|
||||
void *memset(void *s, int c, size_t n)
|
||||
{
|
||||
u8 *p = s;
|
||||
@ -93,6 +95,7 @@ void *memset(void *s, int c, size_t n)
|
||||
return s;
|
||||
}
|
||||
|
||||
#undef memcpy
|
||||
void *memcpy(void *dest, const void *src, size_t n)
|
||||
{
|
||||
u8 *d = dest;
|
||||
@ -104,6 +107,7 @@ void *memcpy(void *dest, const void *src, size_t n)
|
||||
return dest;
|
||||
}
|
||||
|
||||
#undef memmove
|
||||
void *memmove(void *dest, const void *src, size_t n)
|
||||
{
|
||||
u8 *d = dest;
|
||||
@ -118,6 +122,7 @@ void *memmove(void *dest, const void *src, size_t n)
|
||||
return dest;
|
||||
}
|
||||
|
||||
#undef memcmp
|
||||
int memcmp(const void *s1, const void *s2, size_t n)
|
||||
{
|
||||
const u8 *p1 = s1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user