mirror of
https://github.com/yuzu-emu/mbedtls.git
synced 2024-11-25 20:35:39 +01:00
Add option to avoid 64-bit multiplication
Motivation is similar to NO_UDBL_DIVISION. The alternative implementation of 64-bit mult is straightforward and aims at obvious correctness. Also, visual examination of the generate assembly show that it's quite efficient with clang, armcc5 and arm-clang. However current GCC generates fairly inefficient code for it. I tried to rework the code in order to make GCC generate more efficient code. Unfortunately the only way to do that is to get rid of 64-bit add and handle the carry manually, but this causes other compilers to generate less efficient code with branches, which is not acceptable from a side-channel point of view. So let's keep the obvious code that works for most compilers and hope future versions of GCC learn to manage registers in a sensible way in that context. See https://bugs.launchpad.net/gcc-arm-embedded/+bug/1775263
This commit is contained in:
parent
94175a50f7
commit
2adb375c50
@ -84,6 +84,28 @@
|
|||||||
*/
|
*/
|
||||||
//#define MBEDTLS_NO_UDBL_DIVISION
|
//#define MBEDTLS_NO_UDBL_DIVISION
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \def MBEDTLS_NO_64BIT_MULTIPLICATION
|
||||||
|
*
|
||||||
|
* The platform lacks support for 32x32 -> 64-bit multiplication.
|
||||||
|
*
|
||||||
|
* Used in:
|
||||||
|
* library/poly1305.c
|
||||||
|
*
|
||||||
|
* Some parts of the library may use multiplication of two unsigned 32-bit
|
||||||
|
* operands with a 64-bit result in order to speed up computations. On some
|
||||||
|
* platforms, this is not available in hardware and has to be implemented in
|
||||||
|
* software, usually in a library provided by the toolchain.
|
||||||
|
*
|
||||||
|
* Sometimes it is not desirable to have to link to that library. This option
|
||||||
|
* removes the dependency of that library on platforms that lack a hardware
|
||||||
|
* 64-bit multiplier by embedding a software implementation in Mbed TLS.
|
||||||
|
*
|
||||||
|
* Note that depending on the compiler, this may decrease performance compared
|
||||||
|
* to using the library function provided by the toolchain.
|
||||||
|
*/
|
||||||
|
//#define MBEDTLS_NO_64BIT_MULTIPLICATION
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \def MBEDTLS_HAVE_SSE2
|
* \def MBEDTLS_HAVE_SSE2
|
||||||
*
|
*
|
||||||
|
@ -53,6 +53,34 @@
|
|||||||
| (uint32_t) ( (uint32_t) data[( offset ) + 3] << 24 ) \
|
| (uint32_t) ( (uint32_t) data[( offset ) + 3] << 24 ) \
|
||||||
)
|
)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Our implementation is tuned for 32-bit platforms with a 64-bit multiplier.
|
||||||
|
* However we provided an alternative for platforms without such a multiplier.
|
||||||
|
*/
|
||||||
|
#if defined(MBEDTLS_NO_64BIT_MULTIPLICATION)
|
||||||
|
static uint64_t mul64( uint32_t a, uint32_t b )
|
||||||
|
{
|
||||||
|
/* a = al + 2**16 ah, b = bl + 2**16 bh */
|
||||||
|
const uint16_t al = (uint16_t) a;
|
||||||
|
const uint16_t bl = (uint16_t) b;
|
||||||
|
const uint16_t ah = a >> 16;
|
||||||
|
const uint16_t bh = b >> 16;
|
||||||
|
|
||||||
|
/* ab = al*bl + 2**16 (ah*bl + bl*bh) + 2**32 ah*bh */
|
||||||
|
const uint32_t lo = (uint32_t) al * bl;
|
||||||
|
const uint64_t me = (uint64_t)( (uint32_t) ah * bl ) + (uint32_t) al * bh;
|
||||||
|
const uint32_t hi = (uint32_t) ah * bh;
|
||||||
|
|
||||||
|
return( lo + ( me << 16 ) + ( (uint64_t) hi << 32 ) );
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline uint64_t mul64( uint32_t a, uint32_t b )
|
||||||
|
{
|
||||||
|
return( (uint64_t) a * b );
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* \brief Process blocks with Poly1305.
|
* \brief Process blocks with Poly1305.
|
||||||
*
|
*
|
||||||
@ -112,25 +140,25 @@ static void poly1305_process( mbedtls_poly1305_context *ctx,
|
|||||||
acc4 += (uint32_t) ( d3 >> 32U ) + needs_padding;
|
acc4 += (uint32_t) ( d3 >> 32U ) + needs_padding;
|
||||||
|
|
||||||
/* Compute: acc *= r */
|
/* Compute: acc *= r */
|
||||||
d0 = ( (uint64_t) acc0 * r0 ) +
|
d0 = mul64( acc0, r0 ) +
|
||||||
( (uint64_t) acc1 * rs3 ) +
|
mul64( acc1, rs3 ) +
|
||||||
( (uint64_t) acc2 * rs2 ) +
|
mul64( acc2, rs2 ) +
|
||||||
( (uint64_t) acc3 * rs1 );
|
mul64( acc3, rs1 );
|
||||||
d1 = ( (uint64_t) acc0 * r1 ) +
|
d1 = mul64( acc0, r1 ) +
|
||||||
( (uint64_t) acc1 * r0 ) +
|
mul64( acc1, r0 ) +
|
||||||
( (uint64_t) acc2 * rs3 ) +
|
mul64( acc2, rs3 ) +
|
||||||
( (uint64_t) acc3 * rs2 ) +
|
mul64( acc3, rs2 ) +
|
||||||
( (uint64_t) acc4 * rs1 );
|
mul64( acc4, rs1 );
|
||||||
d2 = ( (uint64_t) acc0 * r2 ) +
|
d2 = mul64( acc0, r2 ) +
|
||||||
( (uint64_t) acc1 * r1 ) +
|
mul64( acc1, r1 ) +
|
||||||
( (uint64_t) acc2 * r0 ) +
|
mul64( acc2, r0 ) +
|
||||||
( (uint64_t) acc3 * rs3 ) +
|
mul64( acc3, rs3 ) +
|
||||||
( (uint64_t) acc4 * rs2 );
|
mul64( acc4, rs2 );
|
||||||
d3 = ( (uint64_t) acc0 * r3 ) +
|
d3 = mul64( acc0, r3 ) +
|
||||||
( (uint64_t) acc1 * r2 ) +
|
mul64( acc1, r2 ) +
|
||||||
( (uint64_t) acc2 * r1 ) +
|
mul64( acc2, r1 ) +
|
||||||
( (uint64_t) acc3 * r0 ) +
|
mul64( acc3, r0 ) +
|
||||||
( (uint64_t) acc4 * rs3 );
|
mul64( acc4, rs3 );
|
||||||
acc4 *= r0;
|
acc4 *= r0;
|
||||||
|
|
||||||
/* Compute: acc %= (2^130 - 5) (partial remainder) */
|
/* Compute: acc %= (2^130 - 5) (partial remainder) */
|
||||||
|
@ -39,6 +39,9 @@ static const char *features[] = {
|
|||||||
#if defined(MBEDTLS_NO_UDBL_DIVISION)
|
#if defined(MBEDTLS_NO_UDBL_DIVISION)
|
||||||
"MBEDTLS_NO_UDBL_DIVISION",
|
"MBEDTLS_NO_UDBL_DIVISION",
|
||||||
#endif /* MBEDTLS_NO_UDBL_DIVISION */
|
#endif /* MBEDTLS_NO_UDBL_DIVISION */
|
||||||
|
#if defined(MBEDTLS_NO_64BIT_MULTIPLICATION)
|
||||||
|
"MBEDTLS_NO_64BIT_MULTIPLICATION",
|
||||||
|
#endif /* MBEDTLS_NO_64BIT_MULTIPLICATION */
|
||||||
#if defined(MBEDTLS_HAVE_SSE2)
|
#if defined(MBEDTLS_HAVE_SSE2)
|
||||||
"MBEDTLS_HAVE_SSE2",
|
"MBEDTLS_HAVE_SSE2",
|
||||||
#endif /* MBEDTLS_HAVE_SSE2 */
|
#endif /* MBEDTLS_HAVE_SSE2 */
|
||||||
|
@ -95,6 +95,7 @@ MBEDTLS_X509_ALLOW_UNSUPPORTED_CRITICAL_EXTENSION
|
|||||||
MBEDTLS_ZLIB_SUPPORT
|
MBEDTLS_ZLIB_SUPPORT
|
||||||
MBEDTLS_PKCS11_C
|
MBEDTLS_PKCS11_C
|
||||||
MBEDTLS_NO_UDBL_DIVISION
|
MBEDTLS_NO_UDBL_DIVISION
|
||||||
|
MBEDTLS_NO_64BIT_MULTIPLICATION
|
||||||
_ALT\s*$
|
_ALT\s*$
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -344,6 +344,12 @@ if_build_succeeded () {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# to be used instead of ! for commands run with
|
||||||
|
# record_status or if_build_succeeded
|
||||||
|
not() {
|
||||||
|
! "$@"
|
||||||
|
}
|
||||||
|
|
||||||
msg "info: $0 configuration"
|
msg "info: $0 configuration"
|
||||||
echo "MEMORY: $MEMORY"
|
echo "MEMORY: $MEMORY"
|
||||||
echo "FORCE: $FORCE"
|
echo "FORCE: $FORCE"
|
||||||
@ -691,6 +697,31 @@ make CC=gcc CFLAGS='-Werror -Wall -Wextra -DMBEDTLS_HAVE_INT64'
|
|||||||
msg "test: gcc, force 64-bit bignum limbs"
|
msg "test: gcc, force 64-bit bignum limbs"
|
||||||
make test
|
make test
|
||||||
|
|
||||||
|
|
||||||
|
msg "build: MBEDTLS_NO_UDBL_DIVISION native" # ~ 10s
|
||||||
|
cleanup
|
||||||
|
cp "$CONFIG_H" "$CONFIG_BAK"
|
||||||
|
scripts/config.pl full
|
||||||
|
scripts/config.pl unset MBEDTLS_MEMORY_BACKTRACE # too slow for tests
|
||||||
|
scripts/config.pl set MBEDTLS_NO_UDBL_DIVISION
|
||||||
|
make CFLAGS='-Werror -O1'
|
||||||
|
|
||||||
|
msg "test: MBEDTLS_NO_UDBL_DIVISION native" # ~ 10s
|
||||||
|
make test
|
||||||
|
|
||||||
|
|
||||||
|
msg "build: MBEDTLS_NO_64BIT_MULTIPLICATION native" # ~ 10s
|
||||||
|
cleanup
|
||||||
|
cp "$CONFIG_H" "$CONFIG_BAK"
|
||||||
|
scripts/config.pl full
|
||||||
|
scripts/config.pl unset MBEDTLS_MEMORY_BACKTRACE # too slow for tests
|
||||||
|
scripts/config.pl set MBEDTLS_NO_64BIT_MULTIPLICATION
|
||||||
|
make CFLAGS='-Werror -O1'
|
||||||
|
|
||||||
|
msg "test: MBEDTLS_NO_64BIT_MULTIPLICATION native" # ~ 10s
|
||||||
|
make test
|
||||||
|
|
||||||
|
|
||||||
msg "build: arm-none-eabi-gcc, make" # ~ 10s
|
msg "build: arm-none-eabi-gcc, make" # ~ 10s
|
||||||
cleanup
|
cleanup
|
||||||
cp "$CONFIG_H" "$CONFIG_BAK"
|
cp "$CONFIG_H" "$CONFIG_BAK"
|
||||||
@ -726,7 +757,27 @@ scripts/config.pl unset MBEDTLS_MEMORY_BUFFER_ALLOC_C # calls exit
|
|||||||
scripts/config.pl set MBEDTLS_NO_UDBL_DIVISION
|
scripts/config.pl set MBEDTLS_NO_UDBL_DIVISION
|
||||||
make CC=arm-none-eabi-gcc AR=arm-none-eabi-ar LD=arm-none-eabi-ld CFLAGS='-Werror -Wall -Wextra' lib
|
make CC=arm-none-eabi-gcc AR=arm-none-eabi-ar LD=arm-none-eabi-ld CFLAGS='-Werror -Wall -Wextra' lib
|
||||||
echo "Checking that software 64-bit division is not required"
|
echo "Checking that software 64-bit division is not required"
|
||||||
! grep __aeabi_uldiv library/*.o
|
if_build_succeeded not grep __aeabi_uldiv library/*.o
|
||||||
|
|
||||||
|
msg "build: arm-none-eabi-gcc MBEDTLS_NO_64BIT_MULTIPLICATION, make" # ~ 10s
|
||||||
|
cleanup
|
||||||
|
cp "$CONFIG_H" "$CONFIG_BAK"
|
||||||
|
scripts/config.pl full
|
||||||
|
scripts/config.pl unset MBEDTLS_NET_C
|
||||||
|
scripts/config.pl unset MBEDTLS_TIMING_C
|
||||||
|
scripts/config.pl unset MBEDTLS_FS_IO
|
||||||
|
scripts/config.pl unset MBEDTLS_ENTROPY_NV_SEED
|
||||||
|
scripts/config.pl set MBEDTLS_NO_PLATFORM_ENTROPY
|
||||||
|
# following things are not in the default config
|
||||||
|
scripts/config.pl unset MBEDTLS_HAVEGE_C # depends on timing.c
|
||||||
|
scripts/config.pl unset MBEDTLS_THREADING_PTHREAD
|
||||||
|
scripts/config.pl unset MBEDTLS_THREADING_C
|
||||||
|
scripts/config.pl unset MBEDTLS_MEMORY_BACKTRACE # execinfo.h
|
||||||
|
scripts/config.pl unset MBEDTLS_MEMORY_BUFFER_ALLOC_C # calls exit
|
||||||
|
scripts/config.pl set MBEDTLS_NO_64BIT_MULTIPLICATION
|
||||||
|
make CC=arm-none-eabi-gcc AR=arm-none-eabi-ar LD=arm-none-eabi-ld CFLAGS='-Werror -O1 -march=armv6-m -mthumb' lib
|
||||||
|
echo "Checking that software 64-bit multiplication is not required"
|
||||||
|
if_build_succeeded not grep __aeabi_lmul library/*.o
|
||||||
|
|
||||||
msg "build: ARM Compiler 5, make"
|
msg "build: ARM Compiler 5, make"
|
||||||
cleanup
|
cleanup
|
||||||
|
Loading…
Reference in New Issue
Block a user