From 6c8edca2d41580e1ca9dcaf8fd4fba70670a19c9 Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Thu, 17 Dec 2015 01:40:26 +0000 Subject: [PATCH 1/2] Fix build errors on x32 by using the generic 'add' instruction On x32 systems, pointers are 4-bytes wide and are therefore stored in %e?x registers (instead of %r?x registers). These registers must be accessed using "addl" instead of "addq", however the GNU assembler will acccept the generic "add" instruction and determine the correct opcode based on the registers passed to it. --- library/aesni.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/library/aesni.c b/library/aesni.c index 83a5868bd..1ca3c3ef5 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -100,7 +100,7 @@ int mbedtls_aesni_crypt_ecb( mbedtls_aes_context *ctx, asm( "movdqu (%3), %%xmm0 \n\t" // load input "movdqu (%1), %%xmm1 \n\t" // load round key 0 "pxor %%xmm1, %%xmm0 \n\t" // round 0 - "addq $16, %1 \n\t" // point to next round key + "add $16, %1 \n\t" // point to next round key "subl $1, %0 \n\t" // normal rounds = nr - 1 "test %2, %2 \n\t" // mode? "jz 2f \n\t" // 0 = decrypt @@ -108,7 +108,7 @@ int mbedtls_aesni_crypt_ecb( mbedtls_aes_context *ctx, "1: \n\t" // encryption loop "movdqu (%1), %%xmm1 \n\t" // load round key AESENC xmm1_xmm0 "\n\t" // do round - "addq $16, %1 \n\t" // point to next round key + "add $16, %1 \n\t" // point to next round key "subl $1, %0 \n\t" // loop "jnz 1b \n\t" "movdqu (%1), %%xmm1 \n\t" // load round key @@ -118,7 +118,7 @@ int mbedtls_aesni_crypt_ecb( mbedtls_aes_context *ctx, "2: \n\t" // decryption loop "movdqu (%1), %%xmm1 \n\t" AESDEC xmm1_xmm0 "\n\t" // do round - "addq $16, %1 \n\t" + "add $16, %1 \n\t" "subl $1, %0 \n\t" "jnz 2b \n\t" "movdqu (%1), %%xmm1 \n\t" // load round key From 21e402a3aef100414546ac77a8093ea7c0e917c0 Mon Sep 17 00:00:00 2001 From: James Cowgill Date: Thu, 17 Dec 2015 01:51:09 +0000 Subject: [PATCH 2/2] Fix segfault on x32 by using better register constraints in bn_mul.h On x32, pointers are only 4-bytes wide and need to be loaded using the "movl" instruction instead of "movq" to avoid loading garbage into the register. The MULADDC routines for x86-64 are adjusted to work on x32 as well by getting gcc to load all the registers for us in advance (and storing them later) by using better register constraints. The b, c, D and S constraints correspond to the rbx, rcx, rdi and rsi registers respectively. --- include/mbedtls/bn_mul.h | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/include/mbedtls/bn_mul.h b/include/mbedtls/bn_mul.h index 5408d4146..71dd672c2 100644 --- a/include/mbedtls/bn_mul.h +++ b/include/mbedtls/bn_mul.h @@ -162,10 +162,6 @@ #define MULADDC_INIT \ asm( \ - "movq %3, %%rsi \n\t" \ - "movq %4, %%rdi \n\t" \ - "movq %5, %%rcx \n\t" \ - "movq %6, %%rbx \n\t" \ "xorq %%r8, %%r8 \n\t" #define MULADDC_CORE \ @@ -181,12 +177,9 @@ "addq $8, %%rdi \n\t" #define MULADDC_STOP \ - "movq %%rcx, %0 \n\t" \ - "movq %%rdi, %1 \n\t" \ - "movq %%rsi, %2 \n\t" \ - : "=m" (c), "=m" (d), "=m" (s) \ - : "m" (s), "m" (d), "m" (c), "m" (b) \ - : "rax", "rcx", "rdx", "rbx", "rsi", "rdi", "r8" \ + : "+c" (c), "+D" (d), "+S" (s) \ + : "b" (b) \ + : "rax", "rdx", "r8" \ ); #endif /* AMD64 */