Small performance improvement of mbedtls_mpi_div_mpi():

1. don't use dynamic allocator for fixed size T2;
2. move T2 initialization out of the inner loop.
This commit is contained in:
Alexander K 2019-10-31 14:46:45 +03:00
parent ccde952df0
commit 35d6d46169

View File

@ -1632,6 +1632,7 @@ int mbedtls_mpi_div_mpi( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,
int ret; int ret;
size_t i, n, t, k; size_t i, n, t, k;
mbedtls_mpi X, Y, Z, T1, T2; mbedtls_mpi X, Y, Z, T1, T2;
mbedtls_mpi_uint __tp2[3];
MPI_VALIDATE_RET( A != NULL ); MPI_VALIDATE_RET( A != NULL );
MPI_VALIDATE_RET( B != NULL ); MPI_VALIDATE_RET( B != NULL );
@ -1639,7 +1640,11 @@ int mbedtls_mpi_div_mpi( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,
return( MBEDTLS_ERR_MPI_DIVISION_BY_ZERO ); return( MBEDTLS_ERR_MPI_DIVISION_BY_ZERO );
mbedtls_mpi_init( &X ); mbedtls_mpi_init( &Y ); mbedtls_mpi_init( &Z ); mbedtls_mpi_init( &X ); mbedtls_mpi_init( &Y ); mbedtls_mpi_init( &Z );
mbedtls_mpi_init( &T1 ); mbedtls_mpi_init( &T2 ); mbedtls_mpi_init( &T1 );
/* Avoid dynamic memory allocations for constant-size T2. */
T2.s = 1;
T2.n = 3;
T2.p = __tp2;
if( mbedtls_mpi_cmp_abs( A, B ) < 0 ) if( mbedtls_mpi_cmp_abs( A, B ) < 0 )
{ {
@ -1655,7 +1660,6 @@ int mbedtls_mpi_div_mpi( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &Z, A->n + 2 ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &Z, A->n + 2 ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &Z, 0 ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &Z, 0 ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T1, 2 ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T1, 2 ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_grow( &T2, 3 ) );
k = mbedtls_mpi_bitlen( &Y ) % biL; k = mbedtls_mpi_bitlen( &Y ) % biL;
if( k < biL - 1 ) if( k < biL - 1 )
@ -1687,6 +1691,10 @@ int mbedtls_mpi_div_mpi( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,
Y.p[t], NULL); Y.p[t], NULL);
} }
T2.p[0] = ( i < 2 ) ? 0 : X.p[i - 2];
T2.p[1] = ( i < 1 ) ? 0 : X.p[i - 1];
T2.p[2] = X.p[i];
Z.p[i - t - 1]++; Z.p[i - t - 1]++;
do do
{ {
@ -1696,11 +1704,6 @@ int mbedtls_mpi_div_mpi( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,
T1.p[0] = ( t < 1 ) ? 0 : Y.p[t - 1]; T1.p[0] = ( t < 1 ) ? 0 : Y.p[t - 1];
T1.p[1] = Y.p[t]; T1.p[1] = Y.p[t];
MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &T1, &T1, Z.p[i - t - 1] ) ); MBEDTLS_MPI_CHK( mbedtls_mpi_mul_int( &T1, &T1, Z.p[i - t - 1] ) );
MBEDTLS_MPI_CHK( mbedtls_mpi_lset( &T2, 0 ) );
T2.p[0] = ( i < 2 ) ? 0 : X.p[i - 2];
T2.p[1] = ( i < 1 ) ? 0 : X.p[i - 1];
T2.p[2] = X.p[i];
} }
while( mbedtls_mpi_cmp_mpi( &T1, &T2 ) > 0 ); while( mbedtls_mpi_cmp_mpi( &T1, &T2 ) > 0 );
@ -1736,7 +1739,7 @@ int mbedtls_mpi_div_mpi( mbedtls_mpi *Q, mbedtls_mpi *R, const mbedtls_mpi *A,
cleanup: cleanup:
mbedtls_mpi_free( &X ); mbedtls_mpi_free( &Y ); mbedtls_mpi_free( &Z ); mbedtls_mpi_free( &X ); mbedtls_mpi_free( &Y ); mbedtls_mpi_free( &Z );
mbedtls_mpi_free( &T1 ); mbedtls_mpi_free( &T2 ); mbedtls_mpi_free( &T1 );
return( ret ); return( ret );
} }