mirror of
https://github.com/yuzu-emu/mbedtls.git
synced 2024-11-24 05:35:50 +01:00
08966e6fbc
When generating expressions to construct test case data, there can be duplicate values, for example if a value of the form C(A) is present as such in test_suite_psa_crypto_metadata.data and also constructed by enumerating the argument A for the constructor C. Eliminate such duplicates in generate_expressions. This commit removes many test cases that were exact duplicates (and were near-duplicates differing only in whitespace before the whitespace normalization). Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
496 lines
21 KiB
Python
496 lines
21 KiB
Python
"""Collect macro definitions from header files.
|
|
"""
|
|
|
|
# Copyright The Mbed TLS Contributors
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import itertools
|
|
import re
|
|
from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
|
|
|
|
|
|
class ReadFileLineException(Exception):
|
|
def __init__(self, filename: str, line_number: Union[int, str]) -> None:
|
|
message = 'in {} at {}'.format(filename, line_number)
|
|
super(ReadFileLineException, self).__init__(message)
|
|
self.filename = filename
|
|
self.line_number = line_number
|
|
|
|
|
|
class read_file_lines:
|
|
# Dear Pylint, conventionally, a context manager class name is lowercase.
|
|
# pylint: disable=invalid-name,too-few-public-methods
|
|
"""Context manager to read a text file line by line.
|
|
|
|
```
|
|
with read_file_lines(filename) as lines:
|
|
for line in lines:
|
|
process(line)
|
|
```
|
|
is equivalent to
|
|
```
|
|
with open(filename, 'r') as input_file:
|
|
for line in input_file:
|
|
process(line)
|
|
```
|
|
except that if process(line) raises an exception, then the read_file_lines
|
|
snippet annotates the exception with the file name and line number.
|
|
"""
|
|
def __init__(self, filename: str, binary: bool = False) -> None:
|
|
self.filename = filename
|
|
self.line_number = 'entry' #type: Union[int, str]
|
|
self.generator = None #type: Optional[Iterable[Tuple[int, str]]]
|
|
self.binary = binary
|
|
def __enter__(self) -> 'read_file_lines':
|
|
self.generator = enumerate(open(self.filename,
|
|
'rb' if self.binary else 'r'))
|
|
return self
|
|
def __iter__(self) -> Iterator[str]:
|
|
assert self.generator is not None
|
|
for line_number, content in self.generator:
|
|
self.line_number = line_number
|
|
yield content
|
|
self.line_number = 'exit'
|
|
def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
|
|
if exc_type is not None:
|
|
raise ReadFileLineException(self.filename, self.line_number) \
|
|
from exc_value
|
|
|
|
|
|
class PSAMacroEnumerator:
|
|
"""Information about constructors of various PSA Crypto types.
|
|
|
|
This includes macro names as well as information about their arguments
|
|
when applicable.
|
|
|
|
This class only provides ways to enumerate expressions that evaluate to
|
|
values of the covered types. Derived classes are expected to populate
|
|
the set of known constructors of each kind, as well as populate
|
|
`self.arguments_for` for arguments that are not of a kind that is
|
|
enumerated here.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
"""Set up an empty set of known constructor macros.
|
|
"""
|
|
self.statuses = set() #type: Set[str]
|
|
self.algorithms = set() #type: Set[str]
|
|
self.ecc_curves = set() #type: Set[str]
|
|
self.dh_groups = set() #type: Set[str]
|
|
self.key_types = set() #type: Set[str]
|
|
self.key_usage_flags = set() #type: Set[str]
|
|
self.hash_algorithms = set() #type: Set[str]
|
|
self.mac_algorithms = set() #type: Set[str]
|
|
self.ka_algorithms = set() #type: Set[str]
|
|
self.kdf_algorithms = set() #type: Set[str]
|
|
self.aead_algorithms = set() #type: Set[str]
|
|
# macro name -> list of argument names
|
|
self.argspecs = {} #type: Dict[str, List[str]]
|
|
# argument name -> list of values
|
|
self.arguments_for = {
|
|
'mac_length': [],
|
|
'min_mac_length': [],
|
|
'tag_length': [],
|
|
'min_tag_length': [],
|
|
} #type: Dict[str, List[str]]
|
|
self.include_intermediate = False
|
|
|
|
def is_internal_name(self, name: str) -> bool:
|
|
"""Whether this is an internal macro. Internal macros will be skipped."""
|
|
if not self.include_intermediate:
|
|
if name.endswith('_BASE') or name.endswith('_NONE'):
|
|
return True
|
|
if '_CATEGORY_' in name:
|
|
return True
|
|
return name.endswith('_FLAG') or name.endswith('_MASK')
|
|
|
|
def gather_arguments(self) -> None:
|
|
"""Populate the list of values for macro arguments.
|
|
|
|
Call this after parsing all the inputs.
|
|
"""
|
|
self.arguments_for['hash_alg'] = sorted(self.hash_algorithms)
|
|
self.arguments_for['mac_alg'] = sorted(self.mac_algorithms)
|
|
self.arguments_for['ka_alg'] = sorted(self.ka_algorithms)
|
|
self.arguments_for['kdf_alg'] = sorted(self.kdf_algorithms)
|
|
self.arguments_for['aead_alg'] = sorted(self.aead_algorithms)
|
|
self.arguments_for['curve'] = sorted(self.ecc_curves)
|
|
self.arguments_for['group'] = sorted(self.dh_groups)
|
|
|
|
@staticmethod
|
|
def _format_arguments(name: str, arguments: Iterable[str]) -> str:
|
|
"""Format a macro call with arguments.
|
|
|
|
The resulting format is consistent with
|
|
`InputsForTest.normalize_argument`.
|
|
"""
|
|
return name + '(' + ', '.join(arguments) + ')'
|
|
|
|
_argument_split_re = re.compile(r' *, *')
|
|
@classmethod
|
|
def _argument_split(cls, arguments: str) -> List[str]:
|
|
return re.split(cls._argument_split_re, arguments)
|
|
|
|
def distribute_arguments(self, name: str) -> Iterator[str]:
|
|
"""Generate macro calls with each tested argument set.
|
|
|
|
If name is a macro without arguments, just yield "name".
|
|
If name is a macro with arguments, yield a series of
|
|
"name(arg1,...,argN)" where each argument takes each possible
|
|
value at least once.
|
|
"""
|
|
try:
|
|
if name not in self.argspecs:
|
|
yield name
|
|
return
|
|
argspec = self.argspecs[name]
|
|
if argspec == []:
|
|
yield name + '()'
|
|
return
|
|
argument_lists = [self.arguments_for[arg] for arg in argspec]
|
|
arguments = [values[0] for values in argument_lists]
|
|
yield self._format_arguments(name, arguments)
|
|
# Dear Pylint, enumerate won't work here since we're modifying
|
|
# the array.
|
|
# pylint: disable=consider-using-enumerate
|
|
for i in range(len(arguments)):
|
|
for value in argument_lists[i][1:]:
|
|
arguments[i] = value
|
|
yield self._format_arguments(name, arguments)
|
|
arguments[i] = argument_lists[0][0]
|
|
except BaseException as e:
|
|
raise Exception('distribute_arguments({})'.format(name)) from e
|
|
|
|
def distribute_arguments_without_duplicates(
|
|
self, seen: Set[str], name: str
|
|
) -> Iterator[str]:
|
|
"""Same as `distribute_arguments`, but don't repeat seen results."""
|
|
for result in self.distribute_arguments(name):
|
|
if result not in seen:
|
|
seen.add(result)
|
|
yield result
|
|
|
|
def generate_expressions(self, names: Iterable[str]) -> Iterator[str]:
|
|
"""Generate expressions covering values constructed from the given names.
|
|
|
|
`names` can be any iterable collection of macro names.
|
|
|
|
For example:
|
|
* ``generate_expressions(['PSA_ALG_CMAC', 'PSA_ALG_HMAC'])``
|
|
generates ``'PSA_ALG_CMAC'`` as well as ``'PSA_ALG_HMAC(h)'`` for
|
|
every known hash algorithm ``h``.
|
|
* ``macros.generate_expressions(macros.key_types)`` generates all
|
|
key types.
|
|
"""
|
|
seen = set() #type: Set[str]
|
|
return itertools.chain(*(
|
|
self.distribute_arguments_without_duplicates(seen, name)
|
|
for name in names
|
|
))
|
|
|
|
|
|
class PSAMacroCollector(PSAMacroEnumerator):
|
|
"""Collect PSA crypto macro definitions from C header files.
|
|
"""
|
|
|
|
def __init__(self, include_intermediate: bool = False) -> None:
|
|
"""Set up an object to collect PSA macro definitions.
|
|
|
|
Call the read_file method of the constructed object on each header file.
|
|
|
|
* include_intermediate: if true, include intermediate macros such as
|
|
PSA_XXX_BASE that do not designate semantic values.
|
|
"""
|
|
super().__init__()
|
|
self.include_intermediate = include_intermediate
|
|
self.key_types_from_curve = {} #type: Dict[str, str]
|
|
self.key_types_from_group = {} #type: Dict[str, str]
|
|
self.algorithms_from_hash = {} #type: Dict[str, str]
|
|
|
|
def record_algorithm_subtype(self, name: str, expansion: str) -> None:
|
|
"""Record the subtype of an algorithm constructor.
|
|
|
|
Given a ``PSA_ALG_xxx`` macro name and its expansion, if the algorithm
|
|
is of a subtype that is tracked in its own set, add it to the relevant
|
|
set.
|
|
"""
|
|
# This code is very ad hoc and fragile. It should be replaced by
|
|
# something more robust.
|
|
if re.match(r'MAC(?:_|\Z)', name):
|
|
self.mac_algorithms.add(name)
|
|
elif re.match(r'KDF(?:_|\Z)', name):
|
|
self.kdf_algorithms.add(name)
|
|
elif re.search(r'0x020000[0-9A-Fa-f]{2}', expansion):
|
|
self.hash_algorithms.add(name)
|
|
elif re.search(r'0x03[0-9A-Fa-f]{6}', expansion):
|
|
self.mac_algorithms.add(name)
|
|
elif re.search(r'0x05[0-9A-Fa-f]{6}', expansion):
|
|
self.aead_algorithms.add(name)
|
|
elif re.search(r'0x09[0-9A-Fa-f]{2}0000', expansion):
|
|
self.ka_algorithms.add(name)
|
|
elif re.search(r'0x08[0-9A-Fa-f]{6}', expansion):
|
|
self.kdf_algorithms.add(name)
|
|
|
|
# "#define" followed by a macro name with either no parameters
|
|
# or a single parameter and a non-empty expansion.
|
|
# Grab the macro name in group 1, the parameter name if any in group 2
|
|
# and the expansion in group 3.
|
|
_define_directive_re = re.compile(r'\s*#\s*define\s+(\w+)' +
|
|
r'(?:\s+|\((\w+)\)\s*)' +
|
|
r'(.+)')
|
|
_deprecated_definition_re = re.compile(r'\s*MBEDTLS_DEPRECATED')
|
|
|
|
def read_line(self, line):
|
|
"""Parse a C header line and record the PSA identifier it defines if any.
|
|
This function analyzes lines that start with "#define PSA_"
|
|
(up to non-significant whitespace) and skips all non-matching lines.
|
|
"""
|
|
# pylint: disable=too-many-branches
|
|
m = re.match(self._define_directive_re, line)
|
|
if not m:
|
|
return
|
|
name, parameter, expansion = m.groups()
|
|
expansion = re.sub(r'/\*.*?\*/|//.*', r' ', expansion)
|
|
if parameter:
|
|
self.argspecs[name] = [parameter]
|
|
if re.match(self._deprecated_definition_re, expansion):
|
|
# Skip deprecated values, which are assumed to be
|
|
# backward compatibility aliases that share
|
|
# numerical values with non-deprecated values.
|
|
return
|
|
if self.is_internal_name(name):
|
|
# Macro only to build actual values
|
|
return
|
|
elif (name.startswith('PSA_ERROR_') or name == 'PSA_SUCCESS') \
|
|
and not parameter:
|
|
self.statuses.add(name)
|
|
elif name.startswith('PSA_KEY_TYPE_') and not parameter:
|
|
self.key_types.add(name)
|
|
elif name.startswith('PSA_KEY_TYPE_') and parameter == 'curve':
|
|
self.key_types_from_curve[name] = name[:13] + 'IS_' + name[13:]
|
|
elif name.startswith('PSA_KEY_TYPE_') and parameter == 'group':
|
|
self.key_types_from_group[name] = name[:13] + 'IS_' + name[13:]
|
|
elif name.startswith('PSA_ECC_FAMILY_') and not parameter:
|
|
self.ecc_curves.add(name)
|
|
elif name.startswith('PSA_DH_FAMILY_') and not parameter:
|
|
self.dh_groups.add(name)
|
|
elif name.startswith('PSA_ALG_') and not parameter:
|
|
if name in ['PSA_ALG_ECDSA_BASE',
|
|
'PSA_ALG_RSA_PKCS1V15_SIGN_BASE']:
|
|
# Ad hoc skipping of duplicate names for some numerical values
|
|
return
|
|
self.algorithms.add(name)
|
|
self.record_algorithm_subtype(name, expansion)
|
|
elif name.startswith('PSA_ALG_') and parameter == 'hash_alg':
|
|
if name in ['PSA_ALG_DSA', 'PSA_ALG_ECDSA']:
|
|
# A naming irregularity
|
|
tester = name[:8] + 'IS_RANDOMIZED_' + name[8:]
|
|
else:
|
|
tester = name[:8] + 'IS_' + name[8:]
|
|
self.algorithms_from_hash[name] = tester
|
|
elif name.startswith('PSA_KEY_USAGE_') and not parameter:
|
|
self.key_usage_flags.add(name)
|
|
else:
|
|
# Other macro without parameter
|
|
return
|
|
|
|
_nonascii_re = re.compile(rb'[^\x00-\x7f]+')
|
|
_continued_line_re = re.compile(rb'\\\r?\n\Z')
|
|
def read_file(self, header_file):
|
|
for line in header_file:
|
|
m = re.search(self._continued_line_re, line)
|
|
while m:
|
|
cont = next(header_file)
|
|
line = line[:m.start(0)] + cont
|
|
m = re.search(self._continued_line_re, line)
|
|
line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
|
|
self.read_line(line)
|
|
|
|
|
|
class InputsForTest(PSAMacroEnumerator):
|
|
# pylint: disable=too-many-instance-attributes
|
|
"""Accumulate information about macros to test.
|
|
enumerate
|
|
This includes macro names as well as information about their arguments
|
|
when applicable.
|
|
"""
|
|
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
self.all_declared = set() #type: Set[str]
|
|
# Identifier prefixes
|
|
self.table_by_prefix = {
|
|
'ERROR': self.statuses,
|
|
'ALG': self.algorithms,
|
|
'ECC_CURVE': self.ecc_curves,
|
|
'DH_GROUP': self.dh_groups,
|
|
'KEY_TYPE': self.key_types,
|
|
'KEY_USAGE': self.key_usage_flags,
|
|
} #type: Dict[str, Set[str]]
|
|
# Test functions
|
|
self.table_by_test_function = {
|
|
# Any function ending in _algorithm also gets added to
|
|
# self.algorithms.
|
|
'key_type': [self.key_types],
|
|
'block_cipher_key_type': [self.key_types],
|
|
'stream_cipher_key_type': [self.key_types],
|
|
'ecc_key_family': [self.ecc_curves],
|
|
'ecc_key_types': [self.ecc_curves],
|
|
'dh_key_family': [self.dh_groups],
|
|
'dh_key_types': [self.dh_groups],
|
|
'hash_algorithm': [self.hash_algorithms],
|
|
'mac_algorithm': [self.mac_algorithms],
|
|
'cipher_algorithm': [],
|
|
'hmac_algorithm': [self.mac_algorithms],
|
|
'aead_algorithm': [self.aead_algorithms],
|
|
'key_derivation_algorithm': [self.kdf_algorithms],
|
|
'key_agreement_algorithm': [self.ka_algorithms],
|
|
'asymmetric_signature_algorithm': [],
|
|
'asymmetric_signature_wildcard': [self.algorithms],
|
|
'asymmetric_encryption_algorithm': [],
|
|
'other_algorithm': [],
|
|
} #type: Dict[str, List[Set[str]]]
|
|
self.arguments_for['mac_length'] += ['1', '63']
|
|
self.arguments_for['min_mac_length'] += ['1', '63']
|
|
self.arguments_for['tag_length'] += ['1', '63']
|
|
self.arguments_for['min_tag_length'] += ['1', '63']
|
|
|
|
def add_numerical_values(self) -> None:
|
|
"""Add numerical values that are not supported to the known identifiers."""
|
|
# Sets of names per type
|
|
self.algorithms.add('0xffffffff')
|
|
self.ecc_curves.add('0xff')
|
|
self.dh_groups.add('0xff')
|
|
self.key_types.add('0xffff')
|
|
self.key_usage_flags.add('0x80000000')
|
|
|
|
# Hard-coded values for unknown algorithms
|
|
#
|
|
# These have to have values that are correct for their respective
|
|
# PSA_ALG_IS_xxx macros, but are also not currently assigned and are
|
|
# not likely to be assigned in the near future.
|
|
self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH
|
|
self.mac_algorithms.add('0x03007fff')
|
|
self.ka_algorithms.add('0x09fc0000')
|
|
self.kdf_algorithms.add('0x080000ff')
|
|
# For AEAD algorithms, the only variability is over the tag length,
|
|
# and this only applies to known algorithms, so don't test an
|
|
# unknown algorithm.
|
|
|
|
def get_names(self, type_word: str) -> Set[str]:
|
|
"""Return the set of known names of values of the given type."""
|
|
return {
|
|
'status': self.statuses,
|
|
'algorithm': self.algorithms,
|
|
'ecc_curve': self.ecc_curves,
|
|
'dh_group': self.dh_groups,
|
|
'key_type': self.key_types,
|
|
'key_usage': self.key_usage_flags,
|
|
}[type_word]
|
|
|
|
# Regex for interesting header lines.
|
|
# Groups: 1=macro name, 2=type, 3=argument list (optional).
|
|
_header_line_re = \
|
|
re.compile(r'#define +' +
|
|
r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' +
|
|
r'(?:\(([^\n()]*)\))?')
|
|
# Regex of macro names to exclude.
|
|
_excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z')
|
|
# Additional excluded macros.
|
|
_excluded_names = set([
|
|
# Macros that provide an alternative way to build the same
|
|
# algorithm as another macro.
|
|
'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG',
|
|
'PSA_ALG_FULL_LENGTH_MAC',
|
|
# Auxiliary macro whose name doesn't fit the usual patterns for
|
|
# auxiliary macros.
|
|
'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE',
|
|
])
|
|
def parse_header_line(self, line: str) -> None:
|
|
"""Parse a C header line, looking for "#define PSA_xxx"."""
|
|
m = re.match(self._header_line_re, line)
|
|
if not m:
|
|
return
|
|
name = m.group(1)
|
|
self.all_declared.add(name)
|
|
if re.search(self._excluded_name_re, name) or \
|
|
name in self._excluded_names or \
|
|
self.is_internal_name(name):
|
|
return
|
|
dest = self.table_by_prefix.get(m.group(2))
|
|
if dest is None:
|
|
return
|
|
dest.add(name)
|
|
if m.group(3):
|
|
self.argspecs[name] = self._argument_split(m.group(3))
|
|
|
|
_nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern
|
|
def parse_header(self, filename: str) -> None:
|
|
"""Parse a C header file, looking for "#define PSA_xxx"."""
|
|
with read_file_lines(filename, binary=True) as lines:
|
|
for line in lines:
|
|
line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
|
|
self.parse_header_line(line)
|
|
|
|
_macro_identifier_re = re.compile(r'[A-Z]\w+')
|
|
def generate_undeclared_names(self, expr: str) -> Iterable[str]:
|
|
for name in re.findall(self._macro_identifier_re, expr):
|
|
if name not in self.all_declared:
|
|
yield name
|
|
|
|
def accept_test_case_line(self, function: str, argument: str) -> bool:
|
|
#pylint: disable=unused-argument
|
|
undeclared = list(self.generate_undeclared_names(argument))
|
|
if undeclared:
|
|
raise Exception('Undeclared names in test case', undeclared)
|
|
return True
|
|
|
|
@staticmethod
|
|
def normalize_argument(argument: str) -> str:
|
|
"""Normalize whitespace in the given C expression.
|
|
|
|
The result uses the same whitespace as
|
|
` PSAMacroEnumerator.distribute_arguments`.
|
|
"""
|
|
return re.sub(r',', r', ', re.sub(r' +', r'', argument))
|
|
|
|
def add_test_case_line(self, function: str, argument: str) -> None:
|
|
"""Parse a test case data line, looking for algorithm metadata tests."""
|
|
sets = []
|
|
if function.endswith('_algorithm'):
|
|
sets.append(self.algorithms)
|
|
if function == 'key_agreement_algorithm' and \
|
|
argument.startswith('PSA_ALG_KEY_AGREEMENT('):
|
|
# We only want *raw* key agreement algorithms as such, so
|
|
# exclude ones that are already chained with a KDF.
|
|
# Keep the expression as one to test as an algorithm.
|
|
function = 'other_algorithm'
|
|
sets += self.table_by_test_function[function]
|
|
if self.accept_test_case_line(function, argument):
|
|
for s in sets:
|
|
s.add(self.normalize_argument(argument))
|
|
|
|
# Regex matching a *.data line containing a test function call and
|
|
# its arguments. The actual definition is partly positional, but this
|
|
# regex is good enough in practice.
|
|
_test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)')
|
|
def parse_test_cases(self, filename: str) -> None:
|
|
"""Parse a test case file (*.data), looking for algorithm metadata tests."""
|
|
with read_file_lines(filename) as lines:
|
|
for line in lines:
|
|
m = re.match(self._test_case_line_re, line)
|
|
if m:
|
|
self.add_test_case_line(m.group(1), m.group(2))
|