From 3cf3a8ebdf9ba14fdd95b3aebff7693f90db22d4 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Tue, 30 Mar 2021 19:09:05 +0200
Subject: [PATCH] Move InputsForTest to macro_collector.py

This is useful to generate PSA tests for more than constant names.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/mbedtls_dev/macro_collector.py   | 215 ++++++++++++++++++++++-
 tests/scripts/test_psa_constant_names.py | 214 +---------------------
 2 files changed, 216 insertions(+), 213 deletions(-)

diff --git a/scripts/mbedtls_dev/macro_collector.py b/scripts/mbedtls_dev/macro_collector.py
index a2192baf4..0c9a9a5b2 100644
--- a/scripts/mbedtls_dev/macro_collector.py
+++ b/scripts/mbedtls_dev/macro_collector.py
@@ -18,7 +18,55 @@
 
 import itertools
 import re
-from typing import Dict, Iterable, Iterator, List, Set
+from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
+
+
+class ReadFileLineException(Exception):
+    def __init__(self, filename: str, line_number: Union[int, str]) -> None:
+        message = 'in {} at {}'.format(filename, line_number)
+        super(ReadFileLineException, self).__init__(message)
+        self.filename = filename
+        self.line_number = line_number
+
+
+class read_file_lines:
+    # Dear Pylint, conventionally, a context manager class name is lowercase.
+    # pylint: disable=invalid-name,too-few-public-methods
+    """Context manager to read a text file line by line.
+
+    ```
+    with read_file_lines(filename) as lines:
+        for line in lines:
+            process(line)
+    ```
+    is equivalent to
+    ```
+    with open(filename, 'r') as input_file:
+        for line in input_file:
+            process(line)
+    ```
+    except that if process(line) raises an exception, then the read_file_lines
+    snippet annotates the exception with the file name and line number.
+    """
+    def __init__(self, filename: str, binary: bool = False) -> None:
+        self.filename = filename
+        self.line_number = 'entry' #type: Union[int, str]
+        self.generator = None #type: Optional[Iterable[Tuple[int, str]]]
+        self.binary = binary
+    def __enter__(self) -> 'read_file_lines':
+        self.generator = enumerate(open(self.filename,
+                                        'rb' if self.binary else 'r'))
+        return self
+    def __iter__(self) -> Iterator[str]:
+        assert self.generator is not None
+        for line_number, content in self.generator:
+            self.line_number = line_number
+            yield content
+        self.line_number = 'exit'
+    def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
+        if exc_type is not None:
+            raise ReadFileLineException(self.filename, self.line_number) \
+                from exc_value
 
 
 class PSAMacroEnumerator:
@@ -251,3 +299,168 @@ class PSAMacroCollector(PSAMacroEnumerator):
                 m = re.search(self._continued_line_re, line)
             line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
             self.read_line(line)
+
+
+class InputsForTest(PSAMacroEnumerator):
+    # pylint: disable=too-many-instance-attributes
+    """Accumulate information about macros to test.
+enumerate
+    This includes macro names as well as information about their arguments
+    when applicable.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self.all_declared = set() #type: Set[str]
+        # Sets of names per type
+        self.statuses.add('PSA_SUCCESS')
+        self.algorithms.add('0xffffffff')
+        self.ecc_curves.add('0xff')
+        self.dh_groups.add('0xff')
+        self.key_types.add('0xffff')
+        self.key_usage_flags.add('0x80000000')
+
+        # Hard-coded values for unknown algorithms
+        #
+        # These have to have values that are correct for their respective
+        # PSA_ALG_IS_xxx macros, but are also not currently assigned and are
+        # not likely to be assigned in the near future.
+        self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH
+        self.mac_algorithms.add('0x03007fff')
+        self.ka_algorithms.add('0x09fc0000')
+        self.kdf_algorithms.add('0x080000ff')
+        # For AEAD algorithms, the only variability is over the tag length,
+        # and this only applies to known algorithms, so don't test an
+        # unknown algorithm.
+
+        # Identifier prefixes
+        self.table_by_prefix = {
+            'ERROR': self.statuses,
+            'ALG': self.algorithms,
+            'ECC_CURVE': self.ecc_curves,
+            'DH_GROUP': self.dh_groups,
+            'KEY_TYPE': self.key_types,
+            'KEY_USAGE': self.key_usage_flags,
+        } #type: Dict[str, Set[str]]
+        # Test functions
+        self.table_by_test_function = {
+            # Any function ending in _algorithm also gets added to
+            # self.algorithms.
+            'key_type': [self.key_types],
+            'block_cipher_key_type': [self.key_types],
+            'stream_cipher_key_type': [self.key_types],
+            'ecc_key_family': [self.ecc_curves],
+            'ecc_key_types': [self.ecc_curves],
+            'dh_key_family': [self.dh_groups],
+            'dh_key_types': [self.dh_groups],
+            'hash_algorithm': [self.hash_algorithms],
+            'mac_algorithm': [self.mac_algorithms],
+            'cipher_algorithm': [],
+            'hmac_algorithm': [self.mac_algorithms],
+            'aead_algorithm': [self.aead_algorithms],
+            'key_derivation_algorithm': [self.kdf_algorithms],
+            'key_agreement_algorithm': [self.ka_algorithms],
+            'asymmetric_signature_algorithm': [],
+            'asymmetric_signature_wildcard': [self.algorithms],
+            'asymmetric_encryption_algorithm': [],
+            'other_algorithm': [],
+        } #type: Dict[str, List[Set[str]]]
+        self.arguments_for['mac_length'] += ['1', '63']
+        self.arguments_for['min_mac_length'] += ['1', '63']
+        self.arguments_for['tag_length'] += ['1', '63']
+        self.arguments_for['min_tag_length'] += ['1', '63']
+
+    def get_names(self, type_word: str) -> Set[str]:
+        """Return the set of known names of values of the given type."""
+        return {
+            'status': self.statuses,
+            'algorithm': self.algorithms,
+            'ecc_curve': self.ecc_curves,
+            'dh_group': self.dh_groups,
+            'key_type': self.key_types,
+            'key_usage': self.key_usage_flags,
+        }[type_word]
+
+    # Regex for interesting header lines.
+    # Groups: 1=macro name, 2=type, 3=argument list (optional).
+    _header_line_re = \
+        re.compile(r'#define +' +
+                   r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' +
+                   r'(?:\(([^\n()]*)\))?')
+    # Regex of macro names to exclude.
+    _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z')
+    # Additional excluded macros.
+    _excluded_names = set([
+        # Macros that provide an alternative way to build the same
+        # algorithm as another macro.
+        'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG',
+        'PSA_ALG_FULL_LENGTH_MAC',
+        # Auxiliary macro whose name doesn't fit the usual patterns for
+        # auxiliary macros.
+        'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE',
+    ])
+    def parse_header_line(self, line: str) -> None:
+        """Parse a C header line, looking for "#define PSA_xxx"."""
+        m = re.match(self._header_line_re, line)
+        if not m:
+            return
+        name = m.group(1)
+        self.all_declared.add(name)
+        if re.search(self._excluded_name_re, name) or \
+           name in self._excluded_names:
+            return
+        dest = self.table_by_prefix.get(m.group(2))
+        if dest is None:
+            return
+        dest.add(name)
+        if m.group(3):
+            self.argspecs[name] = self._argument_split(m.group(3))
+
+    _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern
+    def parse_header(self, filename: str) -> None:
+        """Parse a C header file, looking for "#define PSA_xxx"."""
+        with read_file_lines(filename, binary=True) as lines:
+            for line in lines:
+                line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
+                self.parse_header_line(line)
+
+    _macro_identifier_re = re.compile(r'[A-Z]\w+')
+    def generate_undeclared_names(self, expr: str) -> Iterable[str]:
+        for name in re.findall(self._macro_identifier_re, expr):
+            if name not in self.all_declared:
+                yield name
+
+    def accept_test_case_line(self, function: str, argument: str) -> bool:
+        #pylint: disable=unused-argument
+        undeclared = list(self.generate_undeclared_names(argument))
+        if undeclared:
+            raise Exception('Undeclared names in test case', undeclared)
+        return True
+
+    def add_test_case_line(self, function: str, argument: str) -> None:
+        """Parse a test case data line, looking for algorithm metadata tests."""
+        sets = []
+        if function.endswith('_algorithm'):
+            sets.append(self.algorithms)
+            if function == 'key_agreement_algorithm' and \
+               argument.startswith('PSA_ALG_KEY_AGREEMENT('):
+                # We only want *raw* key agreement algorithms as such, so
+                # exclude ones that are already chained with a KDF.
+                # Keep the expression as one to test as an algorithm.
+                function = 'other_algorithm'
+        sets += self.table_by_test_function[function]
+        if self.accept_test_case_line(function, argument):
+            for s in sets:
+                s.add(argument)
+
+    # Regex matching a *.data line containing a test function call and
+    # its arguments. The actual definition is partly positional, but this
+    # regex is good enough in practice.
+    _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)')
+    def parse_test_cases(self, filename: str) -> None:
+        """Parse a test case file (*.data), looking for algorithm metadata tests."""
+        with read_file_lines(filename) as lines:
+            for line in lines:
+                m = re.match(self._test_case_line_re, line)
+                if m:
+                    self.add_test_case_line(m.group(1), m.group(2))
diff --git a/tests/scripts/test_psa_constant_names.py b/tests/scripts/test_psa_constant_names.py
index 237a344eb..c6f23054c 100755
--- a/tests/scripts/test_psa_constant_names.py
+++ b/tests/scripts/test_psa_constant_names.py
@@ -28,223 +28,13 @@ import os
 import re
 import subprocess
 import sys
-from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
+from typing import Iterable, List, Optional, Tuple
 
 import scripts_path # pylint: disable=unused-import
 from mbedtls_dev import c_build_helper
-from mbedtls_dev.macro_collector import PSAMacroEnumerator
+from mbedtls_dev.macro_collector import InputsForTest, PSAMacroEnumerator
 from mbedtls_dev import typing_util
 
-class ReadFileLineException(Exception):
-    def __init__(self, filename: str, line_number: Union[int, str]) -> None:
-        message = 'in {} at {}'.format(filename, line_number)
-        super(ReadFileLineException, self).__init__(message)
-        self.filename = filename
-        self.line_number = line_number
-
-class read_file_lines:
-    # Dear Pylint, conventionally, a context manager class name is lowercase.
-    # pylint: disable=invalid-name,too-few-public-methods
-    """Context manager to read a text file line by line.
-
-    ```
-    with read_file_lines(filename) as lines:
-        for line in lines:
-            process(line)
-    ```
-    is equivalent to
-    ```
-    with open(filename, 'r') as input_file:
-        for line in input_file:
-            process(line)
-    ```
-    except that if process(line) raises an exception, then the read_file_lines
-    snippet annotates the exception with the file name and line number.
-    """
-    def __init__(self, filename: str, binary: bool = False) -> None:
-        self.filename = filename
-        self.line_number = 'entry' #type: Union[int, str]
-        self.generator = None #type: Optional[Iterable[Tuple[int, str]]]
-        self.binary = binary
-    def __enter__(self) -> 'read_file_lines':
-        self.generator = enumerate(open(self.filename,
-                                        'rb' if self.binary else 'r'))
-        return self
-    def __iter__(self) -> Iterator[str]:
-        assert self.generator is not None
-        for line_number, content in self.generator:
-            self.line_number = line_number
-            yield content
-        self.line_number = 'exit'
-    def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
-        if exc_type is not None:
-            raise ReadFileLineException(self.filename, self.line_number) \
-                from exc_value
-
-class InputsForTest(PSAMacroEnumerator):
-    # pylint: disable=too-many-instance-attributes
-    """Accumulate information about macros to test.
-enumerate
-    This includes macro names as well as information about their arguments
-    when applicable.
-    """
-
-    def __init__(self) -> None:
-        super().__init__()
-        self.all_declared = set() #type: Set[str]
-        # Sets of names per type
-        self.statuses.add('PSA_SUCCESS')
-        self.algorithms.add('0xffffffff')
-        self.ecc_curves.add('0xff')
-        self.dh_groups.add('0xff')
-        self.key_types.add('0xffff')
-        self.key_usage_flags.add('0x80000000')
-
-        # Hard-coded values for unknown algorithms
-        #
-        # These have to have values that are correct for their respective
-        # PSA_ALG_IS_xxx macros, but are also not currently assigned and are
-        # not likely to be assigned in the near future.
-        self.hash_algorithms.add('0x020000fe') # 0x020000ff is PSA_ALG_ANY_HASH
-        self.mac_algorithms.add('0x03007fff')
-        self.ka_algorithms.add('0x09fc0000')
-        self.kdf_algorithms.add('0x080000ff')
-        # For AEAD algorithms, the only variability is over the tag length,
-        # and this only applies to known algorithms, so don't test an
-        # unknown algorithm.
-
-        # Identifier prefixes
-        self.table_by_prefix = {
-            'ERROR': self.statuses,
-            'ALG': self.algorithms,
-            'ECC_CURVE': self.ecc_curves,
-            'DH_GROUP': self.dh_groups,
-            'KEY_TYPE': self.key_types,
-            'KEY_USAGE': self.key_usage_flags,
-        } #type: Dict[str, Set[str]]
-        # Test functions
-        self.table_by_test_function = {
-            # Any function ending in _algorithm also gets added to
-            # self.algorithms.
-            'key_type': [self.key_types],
-            'block_cipher_key_type': [self.key_types],
-            'stream_cipher_key_type': [self.key_types],
-            'ecc_key_family': [self.ecc_curves],
-            'ecc_key_types': [self.ecc_curves],
-            'dh_key_family': [self.dh_groups],
-            'dh_key_types': [self.dh_groups],
-            'hash_algorithm': [self.hash_algorithms],
-            'mac_algorithm': [self.mac_algorithms],
-            'cipher_algorithm': [],
-            'hmac_algorithm': [self.mac_algorithms],
-            'aead_algorithm': [self.aead_algorithms],
-            'key_derivation_algorithm': [self.kdf_algorithms],
-            'key_agreement_algorithm': [self.ka_algorithms],
-            'asymmetric_signature_algorithm': [],
-            'asymmetric_signature_wildcard': [self.algorithms],
-            'asymmetric_encryption_algorithm': [],
-            'other_algorithm': [],
-        } #type: Dict[str, List[Set[str]]]
-        self.arguments_for['mac_length'] += ['1', '63']
-        self.arguments_for['min_mac_length'] += ['1', '63']
-        self.arguments_for['tag_length'] += ['1', '63']
-        self.arguments_for['min_tag_length'] += ['1', '63']
-
-    def get_names(self, type_word: str) -> Set[str]:
-        """Return the set of known names of values of the given type."""
-        return {
-            'status': self.statuses,
-            'algorithm': self.algorithms,
-            'ecc_curve': self.ecc_curves,
-            'dh_group': self.dh_groups,
-            'key_type': self.key_types,
-            'key_usage': self.key_usage_flags,
-        }[type_word]
-
-    # Regex for interesting header lines.
-    # Groups: 1=macro name, 2=type, 3=argument list (optional).
-    _header_line_re = \
-        re.compile(r'#define +' +
-                   r'(PSA_((?:(?:DH|ECC|KEY)_)?[A-Z]+)_\w+)' +
-                   r'(?:\(([^\n()]*)\))?')
-    # Regex of macro names to exclude.
-    _excluded_name_re = re.compile(r'_(?:GET|IS|OF)_|_(?:BASE|FLAG|MASK)\Z')
-    # Additional excluded macros.
-    _excluded_names = set([
-        # Macros that provide an alternative way to build the same
-        # algorithm as another macro.
-        'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG',
-        'PSA_ALG_FULL_LENGTH_MAC',
-        # Auxiliary macro whose name doesn't fit the usual patterns for
-        # auxiliary macros.
-        'PSA_ALG_AEAD_WITH_DEFAULT_LENGTH_TAG_CASE',
-    ])
-    def parse_header_line(self, line: str) -> None:
-        """Parse a C header line, looking for "#define PSA_xxx"."""
-        m = re.match(self._header_line_re, line)
-        if not m:
-            return
-        name = m.group(1)
-        self.all_declared.add(name)
-        if re.search(self._excluded_name_re, name) or \
-           name in self._excluded_names:
-            return
-        dest = self.table_by_prefix.get(m.group(2))
-        if dest is None:
-            return
-        dest.add(name)
-        if m.group(3):
-            self.argspecs[name] = self._argument_split(m.group(3))
-
-    _nonascii_re = re.compile(rb'[^\x00-\x7f]+') #type: Pattern
-    def parse_header(self, filename: str) -> None:
-        """Parse a C header file, looking for "#define PSA_xxx"."""
-        with read_file_lines(filename, binary=True) as lines:
-            for line in lines:
-                line = re.sub(self._nonascii_re, rb'', line).decode('ascii')
-                self.parse_header_line(line)
-
-    _macro_identifier_re = re.compile(r'[A-Z]\w+')
-    def generate_undeclared_names(self, expr: str) -> Iterable[str]:
-        for name in re.findall(self._macro_identifier_re, expr):
-            if name not in self.all_declared:
-                yield name
-
-    def accept_test_case_line(self, function: str, argument: str) -> bool:
-        #pylint: disable=unused-argument
-        undeclared = list(self.generate_undeclared_names(argument))
-        if undeclared:
-            raise Exception('Undeclared names in test case', undeclared)
-        return True
-
-    def add_test_case_line(self, function: str, argument: str) -> None:
-        """Parse a test case data line, looking for algorithm metadata tests."""
-        sets = []
-        if function.endswith('_algorithm'):
-            sets.append(self.algorithms)
-            if function == 'key_agreement_algorithm' and \
-               argument.startswith('PSA_ALG_KEY_AGREEMENT('):
-                # We only want *raw* key agreement algorithms as such, so
-                # exclude ones that are already chained with a KDF.
-                # Keep the expression as one to test as an algorithm.
-                function = 'other_algorithm'
-        sets += self.table_by_test_function[function]
-        if self.accept_test_case_line(function, argument):
-            for s in sets:
-                s.add(argument)
-
-    # Regex matching a *.data line containing a test function call and
-    # its arguments. The actual definition is partly positional, but this
-    # regex is good enough in practice.
-    _test_case_line_re = re.compile(r'(?!depends_on:)(\w+):([^\n :][^:\n]*)')
-    def parse_test_cases(self, filename: str) -> None:
-        """Parse a test case file (*.data), looking for algorithm metadata tests."""
-        with read_file_lines(filename) as lines:
-            for line in lines:
-                m = re.match(self._test_case_line_re, line)
-                if m:
-                    self.add_test_case_line(m.group(1), m.group(2))
-
 def gather_inputs(headers: Iterable[str],
                   test_suites: Iterable[str],
                   inputs_class=InputsForTest) -> PSAMacroEnumerator: