mirror of
https://github.com/yuzu-emu/mbedtls.git
synced 2024-11-22 16:25:43 +01:00
Perf: rewrite of ecp_double_jac
- Improve optimization for special case A == -3. - Add optimization for special case A == 0. - Use alternative base formula, saving several additions. - Reduce temp variables to 4 (from 6).
This commit is contained in:
parent
82788fb63b
commit
ce661b2cb8
@ -912,70 +912,86 @@ cleanup:
|
||||
/*
|
||||
* Point doubling R = 2 P, Jacobian coordinates
|
||||
*
|
||||
* http://www.hyperelliptic.org/EFD/g1p/auto-code/shortw/jacobian/doubling/dbl-2007-bl.op3
|
||||
* with heavy variable renaming, some reordering and one minor modification
|
||||
* (a = 2 * b, c = d - 2a replaced with c = d, c = c - b, c = c - b)
|
||||
* in order to use a lot less intermediate variables (6 vs 25).
|
||||
* Based on http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-1998-cmo-2 .
|
||||
*
|
||||
* Cost: 1D := 2M + 8S
|
||||
* We follow the variable naming fairly closely. The formula variations that trade a MUL for a SQR
|
||||
* (plus a few ADDs) aren't useful as our bignum implementation doesn't distinguish squaring.
|
||||
*
|
||||
* Standard optimizations are applied when curve parameter A is one of { 0, -3 }.
|
||||
*
|
||||
* Cost: 1D := 3M + 4S (A == 0)
|
||||
* 4M + 4S (A == -3)
|
||||
* 3M + 6S + 1a otherwise
|
||||
*/
|
||||
static int ecp_double_jac( const ecp_group *grp, ecp_point *R,
|
||||
const ecp_point *P )
|
||||
{
|
||||
int ret;
|
||||
mpi T1, T2, T3, X3, Y3, Z3;
|
||||
mpi M, S, T, U;
|
||||
|
||||
#if defined(POLARSSL_SELF_TEST)
|
||||
dbl_count++;
|
||||
#endif
|
||||
|
||||
mpi_init( &T1 ); mpi_init( &T2 ); mpi_init( &T3 );
|
||||
mpi_init( &X3 ); mpi_init( &Y3 ); mpi_init( &Z3 );
|
||||
|
||||
MPI_CHK( mpi_mul_mpi( &T3, &P->X, &P->X ) ); MOD_MUL( T3 );
|
||||
MPI_CHK( mpi_mul_mpi( &T2, &P->Y, &P->Y ) ); MOD_MUL( T2 );
|
||||
MPI_CHK( mpi_mul_mpi( &Y3, &T2, &T2 ) ); MOD_MUL( Y3 );
|
||||
MPI_CHK( mpi_add_mpi( &X3, &P->X, &T2 ) ); MOD_ADD( X3 );
|
||||
MPI_CHK( mpi_mul_mpi( &X3, &X3, &X3 ) ); MOD_MUL( X3 );
|
||||
MPI_CHK( mpi_sub_mpi( &X3, &X3, &Y3 ) ); MOD_SUB( X3 );
|
||||
MPI_CHK( mpi_sub_mpi( &X3, &X3, &T3 ) ); MOD_SUB( X3 );
|
||||
MPI_CHK( mpi_mul_int( &T1, &X3, 2 ) ); MOD_ADD( T1 );
|
||||
MPI_CHK( mpi_mul_mpi( &Z3, &P->Z, &P->Z ) ); MOD_MUL( Z3 );
|
||||
MPI_CHK( mpi_mul_mpi( &X3, &Z3, &Z3 ) ); MOD_MUL( X3 );
|
||||
MPI_CHK( mpi_mul_int( &T3, &T3, 3 ) ); MOD_ADD( T3 );
|
||||
mpi_init( &M ); mpi_init( &S ); mpi_init( &T ); mpi_init( &U );
|
||||
|
||||
/* Special case for A = -3 */
|
||||
if( grp->A.p == NULL )
|
||||
{
|
||||
MPI_CHK( mpi_mul_int( &X3, &X3, 3 ) );
|
||||
X3.s = -1; /* mpi_mul_int doesn't handle negative numbers */
|
||||
MOD_SUB( X3 );
|
||||
/* M = 3(X + Z^2)(X - Z^2) */
|
||||
MPI_CHK( mpi_mul_mpi( &S, &P->Z, &P->Z ) ); MOD_MUL( S );
|
||||
MPI_CHK( mpi_add_mpi( &T, &P->X, &S ) ); MOD_ADD( T );
|
||||
MPI_CHK( mpi_sub_mpi( &U, &P->X, &S ) ); MOD_SUB( U );
|
||||
MPI_CHK( mpi_mul_mpi( &S, &T, &U ) ); MOD_MUL( S );
|
||||
MPI_CHK( mpi_mul_int( &M, &S, 3 ) ); MOD_ADD( M );
|
||||
}
|
||||
else
|
||||
{
|
||||
MPI_CHK( mpi_mul_mpi( &X3, &X3, &grp->A ) ); MOD_MUL( X3 );
|
||||
/* M = 3.X^2 */
|
||||
MPI_CHK( mpi_mul_mpi( &S, &P->X, &P->X ) ); MOD_MUL( S );
|
||||
MPI_CHK( mpi_mul_int( &M, &S, 3 ) ); MOD_ADD( M );
|
||||
|
||||
/* Optimize away for "koblitz" curves with A = 0 */
|
||||
if( mpi_cmp_int( &grp->A, 0 ) != 0 )
|
||||
{
|
||||
/* M += A.Z^4 */
|
||||
MPI_CHK( mpi_mul_mpi( &S, &P->Z, &P->Z ) ); MOD_MUL( S );
|
||||
MPI_CHK( mpi_mul_mpi( &T, &S, &S ) ); MOD_MUL( T );
|
||||
MPI_CHK( mpi_mul_mpi( &S, &T, &grp->A ) ); MOD_MUL( S );
|
||||
MPI_CHK( mpi_add_mpi( &M, &M, &S ) ); MOD_ADD( M );
|
||||
}
|
||||
}
|
||||
|
||||
MPI_CHK( mpi_add_mpi( &T3, &T3, &X3 ) ); MOD_ADD( T3 );
|
||||
MPI_CHK( mpi_mul_mpi( &X3, &T3, &T3 ) ); MOD_MUL( X3 );
|
||||
MPI_CHK( mpi_sub_mpi( &X3, &X3, &T1 ) ); MOD_SUB( X3 );
|
||||
MPI_CHK( mpi_sub_mpi( &X3, &X3, &T1 ) ); MOD_SUB( X3 );
|
||||
MPI_CHK( mpi_sub_mpi( &T1, &T1, &X3 ) ); MOD_SUB( T1 );
|
||||
MPI_CHK( mpi_mul_mpi( &T1, &T3, &T1 ) ); MOD_MUL( T1 );
|
||||
MPI_CHK( mpi_mul_int( &T3, &Y3, 8 ) ); MOD_ADD( T3 );
|
||||
MPI_CHK( mpi_sub_mpi( &Y3, &T1, &T3 ) ); MOD_SUB( Y3 );
|
||||
MPI_CHK( mpi_add_mpi( &T1, &P->Y, &P->Z ) ); MOD_ADD( T1 );
|
||||
MPI_CHK( mpi_mul_mpi( &T1, &T1, &T1 ) ); MOD_MUL( T1 );
|
||||
MPI_CHK( mpi_sub_mpi( &T1, &T1, &T2 ) ); MOD_SUB( T1 );
|
||||
MPI_CHK( mpi_sub_mpi( &Z3, &T1, &Z3 ) ); MOD_SUB( Z3 );
|
||||
/* S = 4.X.Y^2 */
|
||||
MPI_CHK( mpi_mul_mpi( &T, &P->Y, &P->Y ) ); MOD_MUL( T );
|
||||
MPI_CHK( mpi_shift_l( &T, 1 ) ); MOD_ADD( T );
|
||||
MPI_CHK( mpi_mul_mpi( &S, &P->X, &T ) ); MOD_MUL( S );
|
||||
MPI_CHK( mpi_shift_l( &S, 1 ) ); MOD_ADD( S );
|
||||
|
||||
MPI_CHK( mpi_copy( &R->X, &X3 ) );
|
||||
MPI_CHK( mpi_copy( &R->Y, &Y3 ) );
|
||||
MPI_CHK( mpi_copy( &R->Z, &Z3 ) );
|
||||
/* U = 8.Y^4 */
|
||||
MPI_CHK( mpi_mul_mpi( &U, &T, &T ) ); MOD_MUL( U );
|
||||
MPI_CHK( mpi_shift_l( &U, 1 ) ); MOD_ADD( U );
|
||||
|
||||
/* T = M^2 - 2.S */
|
||||
MPI_CHK( mpi_mul_mpi( &T, &M, &M ) ); MOD_MUL( T );
|
||||
MPI_CHK( mpi_sub_mpi( &T, &T, &S ) ); MOD_SUB( T );
|
||||
MPI_CHK( mpi_sub_mpi( &T, &T, &S ) ); MOD_SUB( T );
|
||||
|
||||
/* S = M(S - T) - U */
|
||||
MPI_CHK( mpi_sub_mpi( &S, &S, &T ) ); MOD_SUB( S );
|
||||
MPI_CHK( mpi_mul_mpi( &S, &S, &M ) ); MOD_MUL( S );
|
||||
MPI_CHK( mpi_sub_mpi( &S, &S, &U ) ); MOD_SUB( S );
|
||||
|
||||
/* U = 2.Y.Z */
|
||||
MPI_CHK( mpi_mul_mpi( &U, &P->Y, &P->Z ) ); MOD_MUL( U );
|
||||
MPI_CHK( mpi_shift_l( &U, 1 ) ); MOD_ADD( U );
|
||||
|
||||
MPI_CHK( mpi_copy( &R->X, &T ) );
|
||||
MPI_CHK( mpi_copy( &R->Y, &S ) );
|
||||
MPI_CHK( mpi_copy( &R->Z, &U ) );
|
||||
|
||||
cleanup:
|
||||
mpi_free( &T1 ); mpi_free( &T2 ); mpi_free( &T3 );
|
||||
mpi_free( &X3 ); mpi_free( &Y3 ); mpi_free( &Z3 );
|
||||
mpi_free( &M ); mpi_free( &S ); mpi_free( &T ); mpi_free( &U );
|
||||
|
||||
return( ret );
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user