From df87a12c8154afd3f90057037716dc590ebe9859 Mon Sep 17 00:00:00 2001 From: Jerry Yu Date: Tue, 10 Jan 2023 18:17:15 +0800 Subject: [PATCH] Add GCM support Signed-off-by: Jerry Yu --- library/aesce.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++ library/aesce.h | 18 ++++++++++++++ library/gcm.c | 24 +++++++++++++++++++ 3 files changed, 105 insertions(+) diff --git a/library/aesce.c b/library/aesce.c index acfac2343..011c98997 100644 --- a/library/aesce.c +++ b/library/aesce.c @@ -276,6 +276,69 @@ int mbedtls_aesce_setkey_enc(unsigned char *rk, return 0; } +#if defined(MBEDTLS_GCM_C) + +static inline uint8x16_t pmull_low(uint8x16_t a, uint8x16_t b) +{ + return vreinterpretq_u8_p128( + vmull_p64( + (poly64_t) vget_low_p64(vreinterpretq_p64_u8(a)), + (poly64_t) vget_low_p64(vreinterpretq_p64_u8(b)))); +} + +static inline uint8x16_t pmull_high(uint8x16_t a, uint8x16_t b) +{ + return vreinterpretq_u8_p128( + vmull_high_p64(vreinterpretq_p64_u8(a), + vreinterpretq_p64_u8(b))); +} + +static inline uint8x16x3_t poly_mult_128(uint8x16_t a, uint8x16_t b) +{ + uint8x16x3_t ret; + uint8x16_t c = vextq_u8(b, b, 8); + ret.val[0] = pmull_high(a, b); /* a1*b1 */ + ret.val[1] = veorq_u8(pmull_high(a, c), /* a1*b0 + a0*b1 */ + pmull_low(a, c)); + ret.val[2] = pmull_low(a, b); /* a0*b0 */ + return ret; +} + +static inline uint8x16_t poly_mult_reduce(uint8x16x3_t a) +{ + uint8x16_t const Z = vdupq_n_u8(0); + /* use 'asm' as an optimisation barrier to prevent loading R from memory */ + uint64x2_t r = vreinterpretq_u64_u8(vdupq_n_u8(0x87)); + asm ("" : "+w" (r)); + uint8x16_t const R = vreinterpretq_u8_u64(vshrq_n_u64(r, 64 - 8)); + uint8x16_t d = a.val[0]; /* d3:d2:00:00 */ + uint8x16_t j = a.val[1]; /* j2:j1:00 */ + uint8x16_t g = a.val[2]; /* g1:g0 = a0*b0 */ + uint8x16_t h = pmull_high(d, R); /* h2:h1:00 = reduction of d3 */ + uint8x16_t i = pmull_low(d, R); /* i1:i0 = reduction of d2 */ + uint8x16_t k = veorq_u8(j, h); /* k2:k1:00 = j2:j1 + h2:h1 */ + uint8x16_t l = pmull_high(k, R); /* l1:l0 = reduction of k2 */ + uint8x16_t m = vextq_u8(Z, k, 8); /* m1:00 = k1:00 */ + uint8x16_t n = veorq_u8(g, i); /* n1:n0 = g1:g0 + i1:i0 */ + uint8x16_t o = veorq_u8(n, l); /* o1:o0 = l1:l0 + n1:n0 */ + return veorq_u8(o, m); /* = o1:o0 + m1:00 */ +} + +/* + * GCM multiplication: c = a times b in GF(2^128) + */ +void mbedtls_aesce_gcm_mult(unsigned char c[16], + const unsigned char a[16], + const unsigned char b[16]) +{ + uint8x16_t va, vb, vc; + va = vrbitq_u8(vld1q_u8(&a[0])); + vb = vrbitq_u8(vld1q_u8(&b[0])); + vc = vrbitq_u8(poly_mult_reduce(poly_mult_128(va, vb))); + vst1q_u8(&c[0], vc); +} + +#endif /* MBEDTLS_GCM_C */ #if defined(MBEDTLS_POP_TARGET_PRAGMA) #if defined(__clang__) diff --git a/library/aesce.h b/library/aesce.h index da4244699..1b3f81632 100644 --- a/library/aesce.h +++ b/library/aesce.h @@ -64,6 +64,24 @@ int mbedtls_aesce_crypt_ecb(mbedtls_aes_context *ctx, const unsigned char input[16], unsigned char output[16]); +/** + * \brief Internal GCM multiplication: c = a * b in GF(2^128) + * + * \note This function is only for internal use by other library + * functions; you must not call it directly. + * + * \param c Result + * \param a First operand + * \param b Second operand + * + * \note Both operands and result are bit strings interpreted as + * elements of GF(2^128) as per the GCM spec. + */ +void mbedtls_aesce_gcm_mult(unsigned char c[16], + const unsigned char a[16], + const unsigned char b[16]); + + /** * \brief Internal round key inversion. This function computes * decryption round keys from the encryption round keys. diff --git a/library/gcm.c b/library/gcm.c index 6d4495fd3..0fa0008e3 100644 --- a/library/gcm.c +++ b/library/gcm.c @@ -42,6 +42,10 @@ #include "aesni.h" #endif +#if defined(MBEDTLS_AESCE_C) +#include "aesce.h" +#endif + #if !defined(MBEDTLS_GCM_ALT) /* @@ -93,6 +97,12 @@ static int gcm_gen_table(mbedtls_gcm_context *ctx) } #endif +#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64) + if (mbedtls_aesce_has_support()) { + return 0; + } +#endif + /* 0 corresponds to 0 in GF(2^128) */ ctx->HH[0] = 0; ctx->HL[0] = 0; @@ -197,6 +207,20 @@ static void gcm_mult(mbedtls_gcm_context *ctx, const unsigned char x[16], } #endif /* MBEDTLS_AESNI_C && MBEDTLS_HAVE_X86_64 */ +#if defined(MBEDTLS_AESCE_C) && defined(MBEDTLS_HAVE_ARM64) + if (mbedtls_aesce_has_support()) { + unsigned char h[16]; + + MBEDTLS_PUT_UINT32_BE(ctx->HH[8] >> 32, h, 0); + MBEDTLS_PUT_UINT32_BE(ctx->HH[8], h, 4); + MBEDTLS_PUT_UINT32_BE(ctx->HL[8] >> 32, h, 8); + MBEDTLS_PUT_UINT32_BE(ctx->HL[8], h, 12); + + mbedtls_aesce_gcm_mult(output, x, h); + return; + } +#endif + lo = x[15] & 0xf; zh = ctx->HH[lo];