From 37d63769683a5d79570620da54eb79d9eed0aa64 Mon Sep 17 00:00:00 2001 From: Morph <39850852+Morph1984@users.noreply.github.com> Date: Mon, 5 Sep 2022 17:00:02 -0400 Subject: [PATCH] aesni: Unroll aes rounds --- library/aesni.c | 80 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 78 insertions(+), 2 deletions(-) diff --git a/library/aesni.c b/library/aesni.c index 5af0e4c11..e54ab0c33 100644 --- a/library/aesni.c +++ b/library/aesni.c @@ -117,14 +117,90 @@ int mbedtls_aesni_crypt_ecb( mbedtls_aes_context *ctx, if (mode == MBEDTLS_AES_ENCRYPT) { - for (i = ctx->nr - 1; i; --i) + if (ctx->nr == 10) { a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + } else if (ctx->nr == 12) { + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + } else if (ctx->nr == 14) { + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + } else { + for (i = ctx->nr - 1; i; --i) + a = _mm_aesenc_si128( a, _mm_loadu_si128( rk++ ) ); + } a = _mm_aesenclast_si128( a, _mm_loadu_si128( rk ) ); } else { - for (i = ctx->nr - 1; i; --i) + if (ctx->nr == 10) { a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + } else if (ctx->nr == 12) { + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + } else if (ctx->nr == 14) { + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + } else { + for (i = ctx->nr - 1; i; --i) + a = _mm_aesdec_si128( a, _mm_loadu_si128( rk++ ) ); + } a = _mm_aesdeclast_si128( a, _mm_loadu_si128( rk ) ); }