From 6c8edca2d41580e1ca9dcaf8fd4fba70670a19c9 Mon Sep 17 00:00:00 2001
From: James Cowgill <james410@cowgill.org.uk>
Date: Thu, 17 Dec 2015 01:40:26 +0000
Subject: [PATCH 1/2] Fix build errors on x32 by using the generic 'add'
 instruction

On x32 systems, pointers are 4-bytes wide and are therefore stored in %e?x
registers (instead of %r?x registers). These registers must be accessed using
"addl" instead of "addq", however the GNU assembler will acccept the generic
"add" instruction and determine the correct opcode based on the registers
passed to it.
---
 library/aesni.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/library/aesni.c b/library/aesni.c
index 83a5868bd..1ca3c3ef5 100644
--- a/library/aesni.c
+++ b/library/aesni.c
@@ -100,7 +100,7 @@ int mbedtls_aesni_crypt_ecb( mbedtls_aes_context *ctx,
     asm( "movdqu    (%3), %%xmm0    \n\t" // load input
          "movdqu    (%1), %%xmm1    \n\t" // load round key 0
          "pxor      %%xmm1, %%xmm0  \n\t" // round 0
-         "addq      $16, %1         \n\t" // point to next round key
+         "add       $16, %1         \n\t" // point to next round key
          "subl      $1, %0          \n\t" // normal rounds = nr - 1
          "test      %2, %2          \n\t" // mode?
          "jz        2f              \n\t" // 0 = decrypt
@@ -108,7 +108,7 @@ int mbedtls_aesni_crypt_ecb( mbedtls_aes_context *ctx,
          "1:                        \n\t" // encryption loop
          "movdqu    (%1), %%xmm1    \n\t" // load round key
          AESENC     xmm1_xmm0      "\n\t" // do round
-         "addq      $16, %1         \n\t" // point to next round key
+         "add       $16, %1         \n\t" // point to next round key
          "subl      $1, %0          \n\t" // loop
          "jnz       1b              \n\t"
          "movdqu    (%1), %%xmm1    \n\t" // load round key
@@ -118,7 +118,7 @@ int mbedtls_aesni_crypt_ecb( mbedtls_aes_context *ctx,
          "2:                        \n\t" // decryption loop
          "movdqu    (%1), %%xmm1    \n\t"
          AESDEC     xmm1_xmm0      "\n\t" // do round
-         "addq      $16, %1         \n\t"
+         "add       $16, %1         \n\t"
          "subl      $1, %0          \n\t"
          "jnz       2b              \n\t"
          "movdqu    (%1), %%xmm1    \n\t" // load round key

From 21e402a3aef100414546ac77a8093ea7c0e917c0 Mon Sep 17 00:00:00 2001
From: James Cowgill <james410@cowgill.org.uk>
Date: Thu, 17 Dec 2015 01:51:09 +0000
Subject: [PATCH 2/2] Fix segfault on x32 by using better register constraints
 in bn_mul.h

On x32, pointers are only 4-bytes wide and need to be loaded using the "movl"
instruction instead of "movq" to avoid loading garbage into the register.

The MULADDC routines for x86-64 are adjusted to work on x32 as well by getting
gcc to load all the registers for us in advance (and storing them later) by
using better register constraints. The b, c, D and S constraints correspond to
the rbx, rcx, rdi and rsi registers respectively.
---
 include/mbedtls/bn_mul.h | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/include/mbedtls/bn_mul.h b/include/mbedtls/bn_mul.h
index 5408d4146..71dd672c2 100644
--- a/include/mbedtls/bn_mul.h
+++ b/include/mbedtls/bn_mul.h
@@ -162,10 +162,6 @@
 
 #define MULADDC_INIT                        \
     asm(                                    \
-        "movq   %3, %%rsi           \n\t"   \
-        "movq   %4, %%rdi           \n\t"   \
-        "movq   %5, %%rcx           \n\t"   \
-        "movq   %6, %%rbx           \n\t"   \
         "xorq   %%r8, %%r8          \n\t"
 
 #define MULADDC_CORE                        \
@@ -181,12 +177,9 @@
         "addq   $8,      %%rdi      \n\t"
 
 #define MULADDC_STOP                        \
-        "movq   %%rcx, %0           \n\t"   \
-        "movq   %%rdi, %1           \n\t"   \
-        "movq   %%rsi, %2           \n\t"   \
-        : "=m" (c), "=m" (d), "=m" (s)                      \
-        : "m" (s), "m" (d), "m" (c), "m" (b)                \
-        : "rax", "rcx", "rdx", "rbx", "rsi", "rdi", "r8"    \
+        : "+c" (c), "+D" (d), "+S" (s)      \
+        : "b" (b)                           \
+        : "rax", "rdx", "r8"                \
     );
 
 #endif /* AMD64 */