From 91c0286917ffbfed6499936a179f6c7f35cbb430 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 4 Oct 2022 13:27:40 +0100
Subject: [PATCH 01/16] mpi_exp_mod: load the output variable to the table

This is done in preparation for constant time loading that will be added
in a later commit.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 45 +++++++++++++++++++++++++++++----------------
 1 file changed, 29 insertions(+), 16 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index ce72b1fb0..12d7b5cc8 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2005,7 +2005,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     size_t i, j, nblimbs;
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
-    mbedtls_mpi RR, T, W[ 1 << MBEDTLS_MPI_WINDOW_SIZE ], WW, Apos;
+    mbedtls_mpi RR, T, W[ ( 1 << MBEDTLS_MPI_WINDOW_SIZE ) + 1 ], WW, Apos;
     int neg;
 
     MPI_VALIDATE_RET( X != NULL );
@@ -2052,6 +2052,14 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1],  j ) );
     MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) );
 
+    /*
+     * Append the output variable to the end of the  table for constant time
+     * lookup. From this point on we need to use the table entry in each
+     * calculation, this makes it safe to use simple assignment.
+     */
+    const size_t x_index = sizeof( W ) / sizeof( W[0] ) - 1;
+    W[x_index] = *X;
+
     /*
      * Compensate for negative A (and correct at the end)
      */
@@ -2097,10 +2105,10 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     mpi_montmul( &W[1], &RR, N, mm, &T );
 
     /*
-     * X = R^2 * R^-1 mod N = R mod N
+     * W[x_index] = R^2 * R^-1 mod N = R mod N
      */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( X, &RR ) );
-    mpi_montred( X, N, mm, &T );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[x_index], &RR ) );
+    mpi_montred( &W[x_index], N, mm, &T );
 
     if( wsize > 1 )
     {
@@ -2158,9 +2166,9 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
         if( ei == 0 && state == 1 )
         {
             /*
-             * out of window, square X
+             * out of window, square W[x_index]
              */
-            mpi_montmul( X, X, N, mm, &T );
+            mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
             continue;
         }
 
@@ -2175,16 +2183,16 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
         if( nbits == wsize )
         {
             /*
-             * X = X^wsize R^-1 mod N
+             * W[x_index] = W[x_index]^wsize R^-1 mod N
              */
             for( i = 0; i < wsize; i++ )
-                mpi_montmul( X, X, N, mm, &T );
+                mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
 
             /*
-             * X = X * W[wbits] R^-1 mod N
+             * W[x_index] = W[x_index] * W[wbits] R^-1 mod N
              */
             MBEDTLS_MPI_CHK( mpi_select( &WW, W, (size_t) 1 << wsize, wbits ) );
-            mpi_montmul( X, &WW, N, mm, &T );
+            mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
             state--;
             nbits = 0;
@@ -2197,25 +2205,30 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      */
     for( i = 0; i < nbits; i++ )
     {
-        mpi_montmul( X, X, N, mm, &T );
+        mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
 
         wbits <<= 1;
 
         if( ( wbits & ( one << wsize ) ) != 0 )
-            mpi_montmul( X, &W[1], N, mm, &T );
+            mpi_montmul( &W[x_index], &W[1], N, mm, &T );
     }
 
     /*
-     * X = A^E * R * R^-1 mod N = A^E mod N
+     * W[x_index] = A^E * R * R^-1 mod N = A^E mod N
      */
-    mpi_montred( X, N, mm, &T );
+    mpi_montred( &W[x_index], N, mm, &T );
 
     if( neg && E->n != 0 && ( E->p[0] & 1 ) != 0 )
     {
-        X->s = -1;
-        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( X, N, X ) );
+        W[x_index].s = -1;
+        MBEDTLS_MPI_CHK( mbedtls_mpi_add_mpi( &W[x_index], N, &W[x_index] ) );
     }
 
+    /*
+     * Load the result in the output variable.
+     */
+    *X = W[x_index];
+
 cleanup:
 
     for( i = ( one << ( wsize - 1 ) ); i < ( one << wsize ); i++ )

From 95655a2ba0c4d21f15f2f0e59d5bb514f4914074 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 4 Oct 2022 14:00:09 +0100
Subject: [PATCH 02/16] mpi_exp_mod: protect out of window zeroes

Out of window zeroes were doing squaring on the output variable
directly. This leaks the position of windows and the out of window
zeroes.

Loading the output variable from the table in constant time removes this
leakage.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 12d7b5cc8..b25535a06 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2006,6 +2006,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
     mbedtls_mpi RR, T, W[ ( 1 << MBEDTLS_MPI_WINDOW_SIZE ) + 1 ], WW, Apos;
+    const size_t w_count = sizeof( W ) / sizeof( W[0] );
     int neg;
 
     MPI_VALIDATE_RET( X != NULL );
@@ -2057,7 +2058,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      * lookup. From this point on we need to use the table entry in each
      * calculation, this makes it safe to use simple assignment.
      */
-    const size_t x_index = sizeof( W ) / sizeof( W[0] ) - 1;
+    const size_t x_index = w_count - 1;
     W[x_index] = *X;
 
     /*
@@ -2168,7 +2169,8 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
             /*
              * out of window, square W[x_index]
              */
-            mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+            mpi_montmul( &W[x_index], &WW, N, mm, &T );
             continue;
         }
 
@@ -2186,12 +2188,15 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
              * W[x_index] = W[x_index]^wsize R^-1 mod N
              */
             for( i = 0; i < wsize; i++ )
-                mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
+            {
+                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+                mpi_montmul( &W[x_index], &WW, N, mm, &T );
+            }
 
             /*
              * W[x_index] = W[x_index] * W[wbits] R^-1 mod N
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, (size_t) 1 << wsize, wbits ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, wbits ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
             state--;
@@ -2205,12 +2210,16 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      */
     for( i = 0; i < nbits; i++ )
     {
-        mpi_montmul( &W[x_index], &W[x_index], N, mm, &T );
+        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+        mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
         wbits <<= 1;
 
         if( ( wbits & ( one << wsize ) ) != 0 )
-            mpi_montmul( &W[x_index], &W[1], N, mm, &T );
+        {
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, 1 ) );
+            mpi_montmul( &W[x_index], &WW, N, mm, &T );
+        }
     }
 
     /*

From 9e4ea3a8a879559d1deac2b7e4cb994addc2cd6e Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 4 Oct 2022 14:57:17 +0100
Subject: [PATCH 03/16] Add ChangeLog entry

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 ChangeLog.d/rsa-fix-priviliged-side-channel.txt | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 ChangeLog.d/rsa-fix-priviliged-side-channel.txt

diff --git a/ChangeLog.d/rsa-fix-priviliged-side-channel.txt b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
new file mode 100644
index 000000000..d4ffa915c
--- /dev/null
+++ b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
@@ -0,0 +1,8 @@
+Security
+   * An adversary with access to precise enough information about memory
+     accesses (typically, an untrusted operating system attacking a secure
+     enclave) could recover an RSA private key after observing the victim
+     performing a single private-key operation if the window size used for the
+     exponentiation was 3 or smaller. Found and reported by Zili KOU,
+     Wenjian HE, Sharad Sinha, and Wei ZHANG.
+

From 3a3c50ca0ada80d19926191307ea3ff86b21bf68 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Fri, 11 Nov 2022 15:56:38 +0000
Subject: [PATCH 04/16] mpi_exp_mod: improve documentation

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index b25535a06..02c0f8a95 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2054,11 +2054,20 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) );
 
     /*
-     * Append the output variable to the end of the  table for constant time
-     * lookup. From this point on we need to use the table entry in each
-     * calculation, this makes it safe to use simple assignment.
+     * If we call mpi_montmul() without doing a table lookup first, we leak
+     * through timing side channels the fact that a squaring is happening. In
+     * some strong attack settings this can be enough to defeat blinding.
+     *
+     * To prevent this leak, we append the output variable to the end of the
+     * table. This allows as to always do a constant time lookup whenever we
+     * call mpi_montmul().
      */
     const size_t x_index = w_count - 1;
+    /*
+     * To prevent the leak, we need to use the table entry in each calculation
+     * from this point on. This makes it safe to load X into the table by a
+     * simple assignment.
+     */
     W[x_index] = *X;
 
     /*

From f0ceb1cae1bbd01e82bfc45503ebc8425210da82 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 14:31:22 +0000
Subject: [PATCH 05/16] mpi_exp_mod: remove memory ownership confusion

Elements of W didn't all have the same owner: all were owned by this
function, except W[x_index]. It is more robust if we make a proper copy
of X.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 02c0f8a95..8a4e67a58 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2043,16 +2043,6 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
         wsize = MBEDTLS_MPI_WINDOW_SIZE;
 #endif
 
-    j = N->n + 1;
-    /* All W[i] and X must have at least N->n limbs for the mpi_montmul()
-     * and mpi_montred() calls later. Here we ensure that W[1] and X are
-     * large enough, and later we'll grow other W[i] to the same length.
-     * They must not be shrunk midway through this function!
-     */
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( X, j ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1],  j ) );
-    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) );
-
     /*
      * If we call mpi_montmul() without doing a table lookup first, we leak
      * through timing side channels the fact that a squaring is happening. In
@@ -2061,14 +2051,23 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      * To prevent this leak, we append the output variable to the end of the
      * table. This allows as to always do a constant time lookup whenever we
      * call mpi_montmul().
+     *
+     * To achieve this, we make a copy of X and we use the table entry in each
+     * calculation from this point on.
      */
     const size_t x_index = w_count - 1;
-    /*
-     * To prevent the leak, we need to use the table entry in each calculation
-     * from this point on. This makes it safe to load X into the table by a
-     * simple assignment.
+    mbedtls_mpi_init( &W[x_index] );
+    mbedtls_mpi_copy( &W[x_index], X );
+
+    j = N->n + 1;
+    /* All W[i] and X must have at least N->n limbs for the mpi_montmul()
+     * and mpi_montred() calls later. Here we ensure that W[1] and X are
+     * large enough, and later we'll grow other W[i] to the same length.
+     * They must not be shrunk midway through this function!
      */
-    W[x_index] = *X;
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[x_index], j ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[1],  j ) );
+    MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T, j * 2 ) );
 
     /*
      * Compensate for negative A (and correct at the end)
@@ -2245,14 +2244,17 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     /*
      * Load the result in the output variable.
      */
-    *X = W[x_index];
+    mbedtls_mpi_copy( X, &W[x_index] );
 
 cleanup:
 
     for( i = ( one << ( wsize - 1 ) ); i < ( one << wsize ); i++ )
         mbedtls_mpi_free( &W[i] );
 
-    mbedtls_mpi_free( &W[1] ); mbedtls_mpi_free( &T ); mbedtls_mpi_free( &Apos );
+    mbedtls_mpi_free( &W[1] );
+    mbedtls_mpi_free( &W[x_index] );
+    mbedtls_mpi_free( &T );
+    mbedtls_mpi_free( &Apos );
     mbedtls_mpi_free( &WW );
 
     if( prec_RR == NULL || prec_RR->p == NULL )

From 6632383993e5f79bddf847d8630f90b8b33e0b12 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 14:48:02 +0000
Subject: [PATCH 06/16] mpi_exp_mod: rename local variables

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 54 +++++++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 8a4e67a58..44d1acab7 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2001,12 +2001,12 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
                          mbedtls_mpi *prec_RR )
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    size_t wbits, wsize, one = 1;
+    size_t window_bitsize, one = 1;
     size_t i, j, nblimbs;
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
     mbedtls_mpi RR, T, W[ ( 1 << MBEDTLS_MPI_WINDOW_SIZE ) + 1 ], WW, Apos;
-    const size_t w_count = sizeof( W ) / sizeof( W[0] );
+    const size_t w_table_size = sizeof( W ) / sizeof( W[0] );
     int neg;
 
     MPI_VALIDATE_RET( X != NULL );
@@ -2035,12 +2035,12 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
     i = mbedtls_mpi_bitlen( E );
 
-    wsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
+    window_bitsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
             ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
 
 #if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
-    if( wsize > MBEDTLS_MPI_WINDOW_SIZE )
-        wsize = MBEDTLS_MPI_WINDOW_SIZE;
+    if( window_bitsize > MBEDTLS_MPI_WINDOW_SIZE )
+        window_bitsize = MBEDTLS_MPI_WINDOW_SIZE;
 #endif
 
     /*
@@ -2055,7 +2055,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      * To achieve this, we make a copy of X and we use the table entry in each
      * calculation from this point on.
      */
-    const size_t x_index = w_count - 1;
+    const size_t x_index = w_table_size - 1;
     mbedtls_mpi_init( &W[x_index] );
     mbedtls_mpi_copy( &W[x_index], X );
 
@@ -2119,23 +2119,23 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[x_index], &RR ) );
     mpi_montred( &W[x_index], N, mm, &T );
 
-    if( wsize > 1 )
+    if( window_bitsize > 1 )
     {
         /*
-         * W[1 << (wsize - 1)] = W[1] ^ (wsize - 1)
+         * W[1 << (window_bitsize - 1)] = W[1] ^ (window_bitsize - 1)
          */
-        j =  one << ( wsize - 1 );
+        j =  one << ( window_bitsize - 1 );
 
         MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[j], N->n + 1 ) );
         MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[j], &W[1]    ) );
 
-        for( i = 0; i < wsize - 1; i++ )
+        for( i = 0; i < window_bitsize - 1; i++ )
             mpi_montmul( &W[j], &W[j], N, mm, &T );
 
         /*
          * W[i] = W[i - 1] * W[1]
          */
-        for( i = j + 1; i < ( one << wsize ); i++ )
+        for( i = j + 1; i < ( one << window_bitsize ); i++ )
         {
             MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) );
             MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) );
@@ -2147,7 +2147,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     nblimbs = E->n;
     bufsize = 0;
     nbits   = 0;
-    wbits   = 0;
+    size_t exponent_bits_in_window = 0;
     state   = 0;
 
     while( 1 )
@@ -2177,7 +2177,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
             /*
              * out of window, square W[x_index]
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
             continue;
         }
@@ -2188,28 +2188,29 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
         state = 2;
 
         nbits++;
-        wbits |= ( ei << ( wsize - nbits ) );
+        exponent_bits_in_window |= ( ei << ( window_bitsize - nbits ) );
 
-        if( nbits == wsize )
+        if( nbits == window_bitsize )
         {
             /*
-             * W[x_index] = W[x_index]^wsize R^-1 mod N
+             * W[x_index] = W[x_index]^window_bitsize R^-1 mod N
              */
-            for( i = 0; i < wsize; i++ )
+            for( i = 0; i < window_bitsize; i++ )
             {
-                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
                 mpi_montmul( &W[x_index], &WW, N, mm, &T );
             }
 
             /*
-             * W[x_index] = W[x_index] * W[wbits] R^-1 mod N
+             * W[x_index] = W[x_index] * W[exponent_bits_in_window] R^-1 mod N
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, wbits ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size,
+                                         exponent_bits_in_window ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
             state--;
             nbits = 0;
-            wbits = 0;
+            exponent_bits_in_window = 0;
         }
     }
 
@@ -2218,14 +2219,14 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      */
     for( i = 0; i < nbits; i++ )
     {
-        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, x_index ) );
+        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
         mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
-        wbits <<= 1;
+        exponent_bits_in_window <<= 1;
 
-        if( ( wbits & ( one << wsize ) ) != 0 )
+        if( ( exponent_bits_in_window & ( one << window_bitsize ) ) != 0 )
         {
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_count, 1 ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, 1 ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
         }
     }
@@ -2248,7 +2249,8 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
 cleanup:
 
-    for( i = ( one << ( wsize - 1 ) ); i < ( one << wsize ); i++ )
+    for( i = ( one << ( window_bitsize - 1 ) );
+         i < ( one << window_bitsize ); i++ )
         mbedtls_mpi_free( &W[i] );
 
     mbedtls_mpi_free( &W[1] );

From aadbadbf4209b2b51191ae15e8ef98cfd6e22199 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 14:55:05 +0000
Subject: [PATCH 07/16] mpi_exp_mod: move X next to the precomputed values

With small exponents (for example, when doing RSA-1024 with CRT, each
prime is 512 bits and we'll use wsize = 5 which may be smaller that the
maximum - or even worse when doing public RSA operations which typically
have a 16-bit exponent so we'll use wsize = 1) the usage of W will have
pre-computed values, then empty space, then the accumulator at the very
end.

Move X next to the precomputed values to make accesses more efficient
and intuitive.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 44d1acab7..986a9e98f 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2006,7 +2006,6 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
     mbedtls_mpi RR, T, W[ ( 1 << MBEDTLS_MPI_WINDOW_SIZE ) + 1 ], WW, Apos;
-    const size_t w_table_size = sizeof( W ) / sizeof( W[0] );
     int neg;
 
     MPI_VALIDATE_RET( X != NULL );
@@ -2037,6 +2036,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
     window_bitsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
             ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
+    const size_t w_table_used_size = ( 1 << window_bitsize ) + 1;
 
 #if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
     if( window_bitsize > MBEDTLS_MPI_WINDOW_SIZE )
@@ -2055,7 +2055,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      * To achieve this, we make a copy of X and we use the table entry in each
      * calculation from this point on.
      */
-    const size_t x_index = w_table_size - 1;
+    const size_t x_index = w_table_used_size - 1;
     mbedtls_mpi_init( &W[x_index] );
     mbedtls_mpi_copy( &W[x_index], X );
 
@@ -2177,7 +2177,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
             /*
              * out of window, square W[x_index]
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size, x_index ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
             continue;
         }
@@ -2197,14 +2197,15 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
              */
             for( i = 0; i < window_bitsize; i++ )
             {
-                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
+                MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size,
+                                             x_index ) );
                 mpi_montmul( &W[x_index], &WW, N, mm, &T );
             }
 
             /*
              * W[x_index] = W[x_index] * W[exponent_bits_in_window] R^-1 mod N
              */
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size,
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size,
                                          exponent_bits_in_window ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
@@ -2219,14 +2220,14 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      */
     for( i = 0; i < nbits; i++ )
     {
-        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, x_index ) );
+        MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size, x_index ) );
         mpi_montmul( &W[x_index], &WW, N, mm, &T );
 
         exponent_bits_in_window <<= 1;
 
         if( ( exponent_bits_in_window & ( one << window_bitsize ) ) != 0 )
         {
-            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_size, 1 ) );
+            MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size, 1 ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );
         }
     }

From a92f9155a54815d240d68223a39415fdbbbb71e9 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 15:05:31 +0000
Subject: [PATCH 08/16] mpi_exp_mod: simplify freeing loop

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 986a9e98f..1ce552ccb 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2250,12 +2250,12 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
 cleanup:
 
-    for( i = ( one << ( window_bitsize - 1 ) );
-         i < ( one << window_bitsize ); i++ )
+    /* The first bit of the sliding window is always 1 and therefore the first
+     * half of the table was unused. */
+    for( i = w_table_used_size/2; i < w_table_used_size; i++ )
         mbedtls_mpi_free( &W[i] );
 
     mbedtls_mpi_free( &W[1] );
-    mbedtls_mpi_free( &W[x_index] );
     mbedtls_mpi_free( &T );
     mbedtls_mpi_free( &Apos );
     mbedtls_mpi_free( &WW );

From d88e21941cdaed9c42d3d2adf3bcf587600ca018 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 15:54:20 +0000
Subject: [PATCH 09/16] mpi_exp_mod: remove the 'one' variable

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 1ce552ccb..ad1a5d4ab 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2001,7 +2001,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
                          mbedtls_mpi *prec_RR )
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    size_t window_bitsize, one = 1;
+    size_t window_bitsize;
     size_t i, j, nblimbs;
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
@@ -2122,9 +2122,12 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     if( window_bitsize > 1 )
     {
         /*
-         * W[1 << (window_bitsize - 1)] = W[1] ^ (window_bitsize - 1)
+         * W[i] = W[1] ^ i
+         *
+         * The first bit of the sliding window is always 1 and therefore we
+         * only need to store the second half of the table.
          */
-        j =  one << ( window_bitsize - 1 );
+        j = w_table_used_size / 2;
 
         MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[j], N->n + 1 ) );
         MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[j], &W[1]    ) );
@@ -2134,8 +2137,10 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
         /*
          * W[i] = W[i - 1] * W[1]
+         * (The last element in the table is for the result X, so we don't need
+         * to calculate that.)
          */
-        for( i = j + 1; i < ( one << window_bitsize ); i++ )
+        for( i = j + 1; i < w_table_used_size - 1; i++ )
         {
             MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) );
             MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) );
@@ -2225,7 +2230,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
         exponent_bits_in_window <<= 1;
 
-        if( ( exponent_bits_in_window & ( one << window_bitsize ) ) != 0 )
+        if( ( exponent_bits_in_window & ( (size_t) 1 << window_bitsize ) ) != 0 )
         {
             MBEDTLS_MPI_CHK( mpi_select( &WW, W, w_table_used_size, 1 ) );
             mpi_montmul( &W[x_index], &WW, N, mm, &T );

From 6e2d8e3e28bf602179256f1371f725d9502bfa92 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 16:14:54 +0000
Subject: [PATCH 10/16] mpi_exp_mod: improve documentation

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index ad1a5d4ab..a840a67f7 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2044,13 +2044,33 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 #endif
 
     /*
-     * If we call mpi_montmul() without doing a table lookup first, we leak
-     * through timing side channels the fact that a squaring is happening. In
-     * some strong attack settings this can be enough to defeat blinding.
+     * This function is not constant-trace: its memory accesses depend on the
+     * exponent value. To defend against timing attacks, callers (such as RSA
+     * and DHM) should use exponent blinding. However this is not enough if the
+     * adversary can find the exponent in a single trace, so this function
+     * takes extra precautions against adversaries who can observe memory
+     * access patterns.
      *
-     * To prevent this leak, we append the output variable to the end of the
-     * table. This allows as to always do a constant time lookup whenever we
-     * call mpi_montmul().
+     * This function performs a series of multiplications by table elements and
+     * squarings, and we want the prevent the adversary from finding out which
+     * table element was used, and from distinguishing between multiplications
+     * and squarings. Firstly, when multiplying by an element of the window
+     * W[i], we do a constant-trace table lookup to obfuscate i. This leaves
+     * squarings as having a different memory access patterns from other
+     * multiplications. So secondly, we put the accumulator X in the table as
+     * well, and also do a constant-trace table lookup to multiply by X.
+     *
+     * This way, all multiplications take the form of a lookup-and-multiply.
+     * The number of lookup-and-multiply operations inside each iteration of
+     * the main loop still depends on the bits of the exponent, but since the
+     * other operations in the loop don't have an easily recognizable memory
+     * trace, an adversary is unlikely to be able to observe the exact
+     * patterns.
+     *
+     * An adversary may still be able to recover the exponent if they can
+     * observe both memory accesses and branches. However, branch prediction
+     * exploitation typically requires many traces of execution over the same
+     * data, which is defeated by randomized blinding.
      *
      * To achieve this, we make a copy of X and we use the table entry in each
      * calculation from this point on.

From 82e8133edca8846f18a62a49aa28d15c37b696a6 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Mon, 21 Nov 2022 16:22:35 +0000
Subject: [PATCH 11/16] Add paper title to Changelog

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 ChangeLog.d/rsa-fix-priviliged-side-channel.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ChangeLog.d/rsa-fix-priviliged-side-channel.txt b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
index d4ffa915c..f112eae43 100644
--- a/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
+++ b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
@@ -4,5 +4,6 @@ Security
      enclave) could recover an RSA private key after observing the victim
      performing a single private-key operation if the window size used for the
      exponentiation was 3 or smaller. Found and reported by Zili KOU,
-     Wenjian HE, Sharad Sinha, and Wei ZHANG.
+     Wenjian HE, Sharad Sinha, and Wei ZHANG. See "Cache Side-channel Attacks
+     and Defenses of the Sliding Window Algorithm in TEEs" - DATE 2023.
 

From 2b72690e14b8524ff8d759b6ca521e614a7d1de1 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 22 Nov 2022 10:15:00 +0000
Subject: [PATCH 12/16] mpi_mod_exp: be pedantic about right shift

The window size starts giving diminishing returns around 6 on most
platforms and highly unlikely to be more than 31 in practical use cases.
Still, compilers and static analysers might complain about this and
better to be pedantic.

Co-authored-by: Gilles Peskine <gilles.peskine@arm.com>
Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/bignum.c b/library/bignum.c
index a840a67f7..1b1c119f5 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2036,7 +2036,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
     window_bitsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
             ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
-    const size_t w_table_used_size = ( 1 << window_bitsize ) + 1;
+    const size_t w_table_used_size = ( (size_t)1 << window_bitsize ) + 1;
 
 #if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
     if( window_bitsize > MBEDTLS_MPI_WINDOW_SIZE )

From 6fa7a766ccd4632acb3d8a6c15701198b9c77c96 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 22 Nov 2022 10:18:06 +0000
Subject: [PATCH 13/16] mpi_exp_mod: fix out of bounds access

The table size was set before the configured window size bound was
applied which lead to out of bounds access when the configured window
size bound is less.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/library/bignum.c b/library/bignum.c
index 1b1c119f5..e2dfae74f 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2036,13 +2036,14 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
     window_bitsize = ( i > 671 ) ? 6 : ( i > 239 ) ? 5 :
             ( i >  79 ) ? 4 : ( i >  23 ) ? 3 : 1;
-    const size_t w_table_used_size = ( (size_t)1 << window_bitsize ) + 1;
 
 #if( MBEDTLS_MPI_WINDOW_SIZE < 6 )
     if( window_bitsize > MBEDTLS_MPI_WINDOW_SIZE )
         window_bitsize = MBEDTLS_MPI_WINDOW_SIZE;
 #endif
 
+    const size_t w_table_used_size = ( (size_t) 1 << window_bitsize ) + 1;
+
     /*
      * This function is not constant-trace: its memory accesses depend on the
      * exponent value. To defend against timing attacks, callers (such as RSA

From 6c5b5adb46b6403ce7da9c13c3c40b4ceeec6661 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 22 Nov 2022 10:47:10 +0000
Subject: [PATCH 14/16] mpi_exp_mod: reduce the table size by one

The first half of the table is not used, let's reuse index 0 for the
result instead of appending it in the end.

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index e2dfae74f..5e1235d18 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2005,7 +2005,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     size_t i, j, nblimbs;
     size_t bufsize, nbits;
     mbedtls_mpi_uint ei, mm, state;
-    mbedtls_mpi RR, T, W[ ( 1 << MBEDTLS_MPI_WINDOW_SIZE ) + 1 ], WW, Apos;
+    mbedtls_mpi RR, T, W[ (size_t) 1 << MBEDTLS_MPI_WINDOW_SIZE ], WW, Apos;
     int neg;
 
     MPI_VALIDATE_RET( X != NULL );
@@ -2042,7 +2042,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
         window_bitsize = MBEDTLS_MPI_WINDOW_SIZE;
 #endif
 
-    const size_t w_table_used_size = ( (size_t) 1 << window_bitsize ) + 1;
+    const size_t w_table_used_size = (size_t) 1 << window_bitsize;
 
     /*
      * This function is not constant-trace: its memory accesses depend on the
@@ -2076,7 +2076,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
      * To achieve this, we make a copy of X and we use the table entry in each
      * calculation from this point on.
      */
-    const size_t x_index = w_table_used_size - 1;
+    const size_t x_index = 0;
     mbedtls_mpi_init( &W[x_index] );
     mbedtls_mpi_copy( &W[x_index], X );
 
@@ -2140,6 +2140,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
     MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[x_index], &RR ) );
     mpi_montred( &W[x_index], N, mm, &T );
 
+
     if( window_bitsize > 1 )
     {
         /*
@@ -2147,6 +2148,10 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
          *
          * The first bit of the sliding window is always 1 and therefore we
          * only need to store the second half of the table.
+         *
+         * (There are two special elements in the table: W[0] for the
+         * accumulator/result and W[1] for A in Montgomery form. Both of these
+         * are already set at this point.)
          */
         j = w_table_used_size / 2;
 
@@ -2158,10 +2163,8 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A,
 
         /*
          * W[i] = W[i - 1] * W[1]
-         * (The last element in the table is for the result X, so we don't need
-         * to calculate that.)
          */
-        for( i = j + 1; i < w_table_used_size - 1; i++ )
+        for( i = j + 1; i < w_table_used_size; i++ )
         {
             MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &W[i], N->n + 1 ) );
             MBEDTLS_MPI_CHK( mbedtls_mpi_copy( &W[i], &W[i - 1] ) );
@@ -2281,6 +2284,7 @@ cleanup:
     for( i = w_table_used_size/2; i < w_table_used_size; i++ )
         mbedtls_mpi_free( &W[i] );
 
+    mbedtls_mpi_free( &W[0] );
     mbedtls_mpi_free( &W[1] );
     mbedtls_mpi_free( &T );
     mbedtls_mpi_free( &Apos );

From c77286971343e9a8156a87d93e441b2ec8be039f Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 22 Nov 2022 10:51:25 +0000
Subject: [PATCH 15/16] Changelog: expand conference acronym for clarity

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 ChangeLog.d/rsa-fix-priviliged-side-channel.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ChangeLog.d/rsa-fix-priviliged-side-channel.txt b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
index f112eae43..bafe18d30 100644
--- a/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
+++ b/ChangeLog.d/rsa-fix-priviliged-side-channel.txt
@@ -5,5 +5,6 @@ Security
      performing a single private-key operation if the window size used for the
      exponentiation was 3 or smaller. Found and reported by Zili KOU,
      Wenjian HE, Sharad Sinha, and Wei ZHANG. See "Cache Side-channel Attacks
-     and Defenses of the Sliding Window Algorithm in TEEs" - DATE 2023.
+     and Defenses of the Sliding Window Algorithm in TEEs" - Design, Automation
+     and Test in Europe 2023.
 

From b118d54ff692945050fdf52a6ca0e184000dc5e6 Mon Sep 17 00:00:00 2001
From: Janos Follath <janos.follath@arm.com>
Date: Tue, 22 Nov 2022 15:00:46 +0000
Subject: [PATCH 16/16] mpi_exp_mod: use x_index consistently

Signed-off-by: Janos Follath <janos.follath@arm.com>
---
 library/bignum.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/library/bignum.c b/library/bignum.c
index 5e1235d18..9392b217e 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -2284,7 +2284,7 @@ cleanup:
     for( i = w_table_used_size/2; i < w_table_used_size; i++ )
         mbedtls_mpi_free( &W[i] );
 
-    mbedtls_mpi_free( &W[0] );
+    mbedtls_mpi_free( &W[x_index] );
     mbedtls_mpi_free( &W[1] );
     mbedtls_mpi_free( &T );
     mbedtls_mpi_free( &Apos );