From a07ed5824a3025b2df7aa398d77e8f26453fae8a Mon Sep 17 00:00:00 2001 From: Ingvar Stepanyan Date: Wed, 13 Jan 2021 13:51:23 +0000 Subject: [PATCH] Assume fast unaligned access on WebAssembly I saw this tweet claiming this flag makes libdeflate run 20% faster on WebAssembly: https://twitter.com/Algunenano/status/1317098341377900550. Indeed, when tried even in a complex PNG compression benchmark I've observed 10-15% improvement when this flag is enabled. Even though WebAssembly might be running on top of a variety of underlying platforms, the spec requires it to support unaligned access, and on majority of platforms it will translate to a faster code. Hence, I think it makes sense to enable this flag by default. --- common/compiler_gcc.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/common/compiler_gcc.h b/common/compiler_gcc.h index 8259946..892d261 100644 --- a/common/compiler_gcc.h +++ b/common/compiler_gcc.h @@ -175,7 +175,16 @@ typedef unsigned char __v32qu __attribute__((__vector_size__(32))); # define bswap64 __builtin_bswap64 #endif -#if defined(__x86_64__) || defined(__i386__) || defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) +#if defined(__x86_64__) || defined(__i386__) || \ + defined(__ARM_FEATURE_UNALIGNED) || defined(__powerpc64__) || \ + /* + * For all compilation purposes, WebAssembly behaves like any other CPU + * instruction set. Even though WebAssembly engine might be running on top + * of different actual CPU architectures, the WebAssembly spec itself + * permits unaligned access and it will be fast on most of those platforms, + * and simulated at the engine level on others, so it's worth treating it + * as a CPU architecture with fast unaligned access. + */ defined(__wasm__) # define UNALIGNED_ACCESS_IS_FAST 1 #endif