mirror of
https://github.com/yuzu-emu/mbedtls.git
synced 2024-11-27 10:04:25 +01:00
Base64 decoding: use ranges instead of tables
Instead of doing constant-flow table lookup, which requires 128 memory loads for each lookup into a 128-entry table, do a range-based calculation, which requires more CPU instructions per range but there are only 5 ranges. Experimentally, this is ~12x faster on my PC (based on programs/x509/load_roots). The code is slightly smaller, too. Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
This commit is contained in:
parent
6b541a026b
commit
c48ab11400
@ -46,23 +46,6 @@ static const unsigned char base64_enc_map[64] =
|
|||||||
'8', '9', '+', '/'
|
'8', '9', '+', '/'
|
||||||
};
|
};
|
||||||
|
|
||||||
static const unsigned char base64_dec_map[128] =
|
|
||||||
{
|
|
||||||
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
|
|
||||||
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
|
|
||||||
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
|
|
||||||
127, 127, 127, 127, 127, 127, 127, 127, 127, 127,
|
|
||||||
127, 127, 127, 62, 127, 127, 127, 63, 52, 53,
|
|
||||||
54, 55, 56, 57, 58, 59, 60, 61, 127, 127,
|
|
||||||
127, 127, 127, 127, 127, 0, 1, 2, 3, 4,
|
|
||||||
5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
|
|
||||||
15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
|
|
||||||
25, 127, 127, 127, 127, 127, 127, 26, 27, 28,
|
|
||||||
29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
|
|
||||||
39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
|
|
||||||
49, 50, 51, 127, 127, 127, 127, 127
|
|
||||||
};
|
|
||||||
|
|
||||||
#define BASE64_SIZE_T_MAX ( (size_t) -1 ) /* SIZE_T_MAX is not standard */
|
#define BASE64_SIZE_T_MAX ( (size_t) -1 ) /* SIZE_T_MAX is not standard */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -133,6 +116,18 @@ static unsigned char mbedtls_base64_table_lookup( const unsigned char * const ta
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return 0xff if low <= c <= high, 0 otherwise.
|
||||||
|
*
|
||||||
|
* Constant flow with respect to c.
|
||||||
|
*/
|
||||||
|
static unsigned char mask_of_range( unsigned char low, unsigned char high,
|
||||||
|
unsigned char c )
|
||||||
|
{
|
||||||
|
unsigned low_mask = ( c - low ) >> 8;
|
||||||
|
unsigned high_mask = ( c - high - 1 ) >> 8;
|
||||||
|
return( ~low_mask & high_mask & 0xff );
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Encode a buffer into base64 format
|
* Encode a buffer into base64 format
|
||||||
*/
|
*/
|
||||||
@ -211,6 +206,34 @@ int mbedtls_base64_encode( unsigned char *dst, size_t dlen, size_t *olen,
|
|||||||
return( 0 );
|
return( 0 );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Given a Base64 digit, return its value.
|
||||||
|
* If c is not a Base64 digit ('A'..'Z', 'a'..'z', '0'..'9', '+' or '/'),
|
||||||
|
* return -1.
|
||||||
|
*
|
||||||
|
* The implementation assumes that letters are consecutive (e.g. ASCII
|
||||||
|
* but not EBCDIC).
|
||||||
|
*
|
||||||
|
* The implementation is constant-flow (no branch or memory access depending
|
||||||
|
* on the value of c) unless the compiler inlines and optimizes a specific
|
||||||
|
* access.
|
||||||
|
*/
|
||||||
|
static signed char dec_value( unsigned char c )
|
||||||
|
{
|
||||||
|
unsigned char val = 0;
|
||||||
|
/* For each range of digits, if c is in that range, mask val with
|
||||||
|
* the corresponding value. Since c can only be in a single range,
|
||||||
|
* only at most one masking will change val. Set val to one plus
|
||||||
|
* the desired value so that it stays 0 if c is in none of the ranges. */
|
||||||
|
val |= mask_of_range( 'A', 'Z', c ) & ( c - 'A' + 0 + 1 );
|
||||||
|
val |= mask_of_range( 'a', 'z', c ) & ( c - 'a' + 26 + 1 );
|
||||||
|
val |= mask_of_range( '0', '9', c ) & ( c - '0' + 52 + 1 );
|
||||||
|
val |= mask_of_range( '+', '+', c ) & ( c - '+' + 62 + 1 );
|
||||||
|
val |= mask_of_range( '/', '/', c ) & ( c - '/' + 63 + 1 );
|
||||||
|
/* At this point, val is 0 if c is an invalid digit and v+1 if c is
|
||||||
|
* a digit with the value v. */
|
||||||
|
return( val - 1 );
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decode a base64-formatted buffer
|
* Decode a base64-formatted buffer
|
||||||
*/
|
*/
|
||||||
@ -220,7 +243,6 @@ int mbedtls_base64_decode( unsigned char *dst, size_t dlen, size_t *olen,
|
|||||||
size_t i, n;
|
size_t i, n;
|
||||||
uint32_t j, x;
|
uint32_t j, x;
|
||||||
unsigned char *p;
|
unsigned char *p;
|
||||||
unsigned char dec_map_lookup;
|
|
||||||
|
|
||||||
/* First pass: check for validity and get output length */
|
/* First pass: check for validity and get output length */
|
||||||
for( i = n = j = 0; i < slen; i++ )
|
for( i = n = j = 0; i < slen; i++ )
|
||||||
@ -260,8 +282,7 @@ int mbedtls_base64_decode( unsigned char *dst, size_t dlen, size_t *olen,
|
|||||||
{
|
{
|
||||||
if( j != 0 )
|
if( j != 0 )
|
||||||
return( MBEDTLS_ERR_BASE64_INVALID_CHARACTER );
|
return( MBEDTLS_ERR_BASE64_INVALID_CHARACTER );
|
||||||
dec_map_lookup = mbedtls_base64_table_lookup( base64_dec_map, sizeof( base64_dec_map ), src[i] );
|
if( dec_value( src[i] ) < 0 )
|
||||||
if( dec_map_lookup == 127 )
|
|
||||||
return( MBEDTLS_ERR_BASE64_INVALID_CHARACTER );
|
return( MBEDTLS_ERR_BASE64_INVALID_CHARACTER );
|
||||||
}
|
}
|
||||||
n++;
|
n++;
|
||||||
@ -298,8 +319,7 @@ int mbedtls_base64_decode( unsigned char *dst, size_t dlen, size_t *olen,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
dec_map_lookup = mbedtls_base64_table_lookup( base64_dec_map, sizeof( base64_dec_map ), *src );
|
x = ( x << 6 ) | ( dec_value( *src ) & 0x3F );
|
||||||
x = ( x << 6 ) | ( dec_map_lookup & 0x3F );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( ++n == 4 )
|
if( ++n == 4 )
|
||||||
|
Loading…
Reference in New Issue
Block a user