Merge pull request #3458 from gilles-peskine-arm/analyze_outcomes-count_test_cases-1

Test outcome analysis: check that all available test cases have been executed
2024-11-25 17:15:42 +01:00 · 2020-07-03 15:12:44 +02:00 · 2020-07-03 15:12:44 +02:00 · 2426506fa0
commit 2426506fa0
parent 74ff13cfae bbb3664957
7 changed files with 334 additions and 148 deletions
--- a/docs/architecture/testing/test-framework.md
+++ b/docs/architecture/testing/test-framework.md
@ -22,7 +22,7 @@ Each test case has a description which succinctly describes for a human audience
 * Make the description descriptive. “foo: x=2, y=4” is more descriptive than “foo #2”. “foo: 0<x<y, both even” is even better if these inequalities and parities are why this particular test data was chosen.
 * Avoid changing the description of an existing test case without a good reason. This breaks the tracking of failures across CI runs, since this tracking is based on the descriptions.

-`tests/scripts/check-test-cases.py` enforces some rules and warns if some guidelines are violated.
+`tests/scripts/check_test_cases.py` enforces some rules and warns if some guidelines are violated.

 ## TLS tests

@ -32,7 +32,7 @@ Each test case has a description which succinctly describes for a human audience

 Each test case in `ssl-opt.sh` has a description which succinctly describes for a human audience what the test does. The test description is the first parameter to `run_tests`.

-The same rules and guidelines apply as for [unit test descriptions](#unit-test-descriptions). In addition, the description must be written on the same line as `run_test`, in double quotes, for the sake of `check-test-cases.py`.
+The same rules and guidelines apply as for [unit test descriptions](#unit-test-descriptions). In addition, the description must be written on the same line as `run_test`, in double quotes, for the sake of `check_test_cases.py`.

 ## Running tests

--- a/tests/scripts/all.sh
+++ b/tests/scripts/all.sh
@ -680,7 +680,7 @@ component_check_doxy_blocks () {

 component_check_files () {
    msg "Check: file sanity checks (permissions, encodings)" # < 1s
-    record_status tests/scripts/check-files.py
+    record_status tests/scripts/check_files.py
 }

 component_check_changelog () {
@ -707,7 +707,7 @@ component_check_test_cases () {
    else
        opt=''
    fi
-    record_status tests/scripts/check-test-cases.py $opt
+    record_status tests/scripts/check_test_cases.py $opt
    unset opt
 }

--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+
+"""Analyze the test outcomes from a full CI run.
+
+This script can also run on outcomes from a partial run, but the results are
+less likely to be useful.
+"""
+
+import argparse
+import re
+import sys
+import traceback
+
+import check_test_cases
+
+class Results:
+    """Process analysis results."""
+
+    def __init__(self):
+        self.error_count = 0
+        self.warning_count = 0
+
+    @staticmethod
+    def log(fmt, *args, **kwargs):
+        sys.stderr.write((fmt + '\n').format(*args, **kwargs))
+
+    def error(self, fmt, *args, **kwargs):
+        self.log('Error: ' + fmt, *args, **kwargs)
+        self.error_count += 1
+
+    def warning(self, fmt, *args, **kwargs):
+        self.log('Warning: ' + fmt, *args, **kwargs)
+        self.warning_count += 1
+
+class TestCaseOutcomes:
+    """The outcomes of one test case across many configurations."""
+    # pylint: disable=too-few-public-methods
+
+    def __init__(self):
+        # Collect a list of witnesses of the test case succeeding or failing.
+        # Currently we don't do anything with witnesses except count them.
+        # The format of a witness is determined by the read_outcome_file
+        # function; it's the platform and configuration joined by ';'.
+        self.successes = []
+        self.failures = []
+
+    def hits(self):
+        """Return the number of times a test case has been run.
+
+        This includes passes and failures, but not skips.
+        """
+        return len(self.successes) + len(self.failures)
+
+class TestDescriptions(check_test_cases.TestDescriptionExplorer):
+    """Collect the available test cases."""
+
+    def __init__(self):
+        super().__init__()
+        self.descriptions = set()
+
+    def process_test_case(self, _per_file_state,
+                          file_name, _line_number, description):
+        """Record an available test case."""
+        base_name = re.sub(r'\.[^.]*$', '', re.sub(r'.*/', '', file_name))
+        key = ';'.join([base_name, description.decode('utf-8')])
+        self.descriptions.add(key)
+
+def collect_available_test_cases():
+    """Collect the available test cases."""
+    explorer = TestDescriptions()
+    explorer.walk_all()
+    return sorted(explorer.descriptions)
+
+def analyze_coverage(results, outcomes):
+    """Check that all available test cases are executed at least once."""
+    available = collect_available_test_cases()
+    for key in available:
+        hits = outcomes[key].hits() if key in outcomes else 0
+        if hits == 0:
+            # Make this a warning, not an error, as long as we haven't
+            # fixed this branch to have full coverage of test cases.
+            results.warning('Test case not executed: {}', key)
+
+def analyze_outcomes(outcomes):
+    """Run all analyses on the given outcome collection."""
+    results = Results()
+    analyze_coverage(results, outcomes)
+    return results
+
+def read_outcome_file(outcome_file):
+    """Parse an outcome file and return an outcome collection.
+
+An outcome collection is a dictionary mapping keys to TestCaseOutcomes objects.
+The keys are the test suite name and the test case description, separated
+by a semicolon.
+"""
+    outcomes = {}
+    with open(outcome_file, 'r', encoding='utf-8') as input_file:
+        for line in input_file:
+            (platform, config, suite, case, result, _cause) = line.split(';')
+            key = ';'.join([suite, case])
+            setup = ';'.join([platform, config])
+            if key not in outcomes:
+                outcomes[key] = TestCaseOutcomes()
+            if result == 'PASS':
+                outcomes[key].successes.append(setup)
+            elif result == 'FAIL':
+                outcomes[key].failures.append(setup)
+    return outcomes
+
+def analyze_outcome_file(outcome_file):
+    """Analyze the given outcome file."""
+    outcomes = read_outcome_file(outcome_file)
+    return analyze_outcomes(outcomes)
+
+def main():
+    try:
+        parser = argparse.ArgumentParser(description=__doc__)
+        parser.add_argument('outcomes', metavar='OUTCOMES.CSV',
+                            help='Outcome file to analyze')
+        options = parser.parse_args()
+        results = analyze_outcome_file(options.outcomes)
+        if results.error_count > 0:
+            sys.exit(1)
+    except Exception: # pylint: disable=broad-except
+        # Print the backtrace and exit explicitly with our chosen status.
+        traceback.print_exc()
+        sys.exit(120)
+
+if __name__ == '__main__':
+    main()
--- a/tests/scripts/basic-in-docker.sh
+++ b/tests/scripts/basic-in-docker.sh
@ -4,8 +4,10 @@
 #
 # Purpose
 # -------
-# This runs a rough equivalent of the travis.yml in a Docker container.
-# The tests are run for both clang and gcc.
+# This runs sanity checks and library tests in a Docker container. The tests
+# are run for both clang and gcc. The testing includes a full test run
+# in the default configuration, partial test runs in the reference
+# configurations, and some dependency tests.
 #
 # Notes for users
 # ---------------
@ -30,12 +32,7 @@

 source tests/scripts/docker_env.sh

-run_in_docker tests/scripts/recursion.pl library/*.c
-run_in_docker tests/scripts/check-generated-files.sh
-run_in_docker tests/scripts/check-doxy-blocks.pl
-run_in_docker tests/scripts/check-names.sh
-run_in_docker tests/scripts/check-files.py
-run_in_docker tests/scripts/doxygen.sh
+run_in_docker tests/scripts/all.sh 'check_*'

 for compiler in clang gcc; do
    run_in_docker -e CC=${compiler} cmake -D CMAKE_BUILD_TYPE:String="Check" .
--- a/tests/scripts/check-test-cases.py
+++ b/tests/scripts/check-test-cases.py
@ -1,136 +0,0 @@
-#!/usr/bin/env python3
-
-"""Sanity checks for test data.
-"""
-
-# Copyright (C) 2019, Arm Limited, All Rights Reserved
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may
-# not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
-# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# This file is part of Mbed TLS (https://tls.mbed.org)
-
-import argparse
-import glob
-import os
-import re
-import sys
-
-class Results:
-    """Store file and line information about errors or warnings in test suites."""
-
-    def __init__(self, options):
-        self.errors = 0
-        self.warnings = 0
-        self.ignore_warnings = options.quiet
-
-    def error(self, file_name, line_number, fmt, *args):
-        sys.stderr.write(('{}:{}:ERROR:' + fmt + '\n').
-                         format(file_name, line_number, *args))
-        self.errors += 1
-
-    def warning(self, file_name, line_number, fmt, *args):
-        if not self.ignore_warnings:
-            sys.stderr.write(('{}:{}:Warning:' + fmt + '\n')
-                             .format(file_name, line_number, *args))
-            self.warnings += 1
-
-def collect_test_directories():
-    """Get the relative path for the TLS and Crypto test directories."""
-    if os.path.isdir('tests'):
-        tests_dir = 'tests'
-    elif os.path.isdir('suites'):
-        tests_dir = '.'
-    elif os.path.isdir('../suites'):
-        tests_dir = '..'
-    directories = [tests_dir]
-    return directories
-
-def check_description(results, seen, file_name, line_number, description):
-    """Check test case descriptions for errors."""
-    if description in seen:
-        results.error(file_name, line_number,
-                      'Duplicate description (also line {})',
-                      seen[description])
-        return
-    if re.search(br'[\t;]', description):
-        results.error(file_name, line_number,
-                      'Forbidden character \'{}\' in description',
-                      re.search(br'[\t;]', description).group(0).decode('ascii'))
-    if re.search(br'[^ -~]', description):
-        results.error(file_name, line_number,
-                      'Non-ASCII character in description')
-    if len(description) > 66:
-        results.warning(file_name, line_number,
-                        'Test description too long ({} > 66)',
-                        len(description))
-    seen[description] = line_number
-
-def check_test_suite(results, data_file_name):
-    """Check the test cases in the given unit test data file."""
-    in_paragraph = False
-    descriptions = {}
-    with open(data_file_name, 'rb') as data_file:
-        for line_number, line in enumerate(data_file, 1):
-            line = line.rstrip(b'\r\n')
-            if not line:
-                in_paragraph = False
-                continue
-            if line.startswith(b'#'):
-                continue
-            if not in_paragraph:
-                # This is a test case description line.
-                check_description(results, descriptions,
-                                  data_file_name, line_number, line)
-            in_paragraph = True
-
-def check_ssl_opt_sh(results, file_name):
-    """Check the test cases in ssl-opt.sh or a file with a similar format."""
-    descriptions = {}
-    with open(file_name, 'rb') as file_contents:
-        for line_number, line in enumerate(file_contents, 1):
-            # Assume that all run_test calls have the same simple form
-            # with the test description entirely on the same line as the
-            # function name.
-            m = re.match(br'\s*run_test\s+"((?:[^\\"]|\\.)*)"', line)
-            if not m:
-                continue
-            description = m.group(1)
-            check_description(results, descriptions,
-                              file_name, line_number, description)
-
-def main():
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument('--quiet', '-q',
-                        action='store_true',
-                        help='Hide warnings')
-    parser.add_argument('--verbose', '-v',
-                        action='store_false', dest='quiet',
-                        help='Show warnings (default: on; undoes --quiet)')
-    options = parser.parse_args()
-    test_directories = collect_test_directories()
-    results = Results(options)
-    for directory in test_directories:
-        for data_file_name in glob.glob(os.path.join(directory, 'suites',
-                                                     '*.data')):
-            check_test_suite(results, data_file_name)
-        ssl_opt_sh = os.path.join(directory, 'ssl-opt.sh')
-        if os.path.exists(ssl_opt_sh):
-            check_ssl_opt_sh(results, ssl_opt_sh)
-    if (results.warnings or results.errors) and not options.quiet:
-        sys.stderr.write('{}: {} errors, {} warnings\n'
-                         .format(sys.argv[0], results.errors, results.warnings))
-    sys.exit(1 if results.errors else 0)
-
-if __name__ == '__main__':
-    main()
--- a/tests/scripts/check_files.py
+++ b/tests/scripts/check_files.py
--- a/tests/scripts/check_test_cases.py
+++ b/tests/scripts/check_test_cases.py
@ -0,0 +1,194 @@
+#!/usr/bin/env python3
+
+"""Sanity checks for test data.
+
+This program contains a class for traversing test cases that can be used
+independently of the checks.
+"""
+
+# Copyright (C) 2019, Arm Limited, All Rights Reserved
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# This file is part of Mbed TLS (https://tls.mbed.org)
+
+import argparse
+import glob
+import os
+import re
+import sys
+
+class Results:
+    """Store file and line information about errors or warnings in test suites."""
+
+    def __init__(self, options):
+        self.errors = 0
+        self.warnings = 0
+        self.ignore_warnings = options.quiet
+
+    def error(self, file_name, line_number, fmt, *args):
+        sys.stderr.write(('{}:{}:ERROR:' + fmt + '\n').
+                         format(file_name, line_number, *args))
+        self.errors += 1
+
+    def warning(self, file_name, line_number, fmt, *args):
+        if not self.ignore_warnings:
+            sys.stderr.write(('{}:{}:Warning:' + fmt + '\n')
+                             .format(file_name, line_number, *args))
+            self.warnings += 1
+
+class TestDescriptionExplorer:
+    """An iterator over test cases with descriptions.
+
+The test cases that have descriptions are:
+* Individual unit tests (entries in a .data file) in test suites.
+* Individual test cases in ssl-opt.sh.
+
+This is an abstract class. To use it, derive a class that implements
+the process_test_case method, and call walk_all().
+"""
+
+    def process_test_case(self, per_file_state,
+                          file_name, line_number, description):
+        """Process a test case.
+
+per_file_state: an object created by new_per_file_state() at the beginning
+                of each file.
+file_name: a relative path to the file containing the test case.
+line_number: the line number in the given file.
+description: the test case description as a byte string.
+"""
+        raise NotImplementedError
+
+    def new_per_file_state(self):
+        """Return a new per-file state object.
+
+The default per-file state object is None. Child classes that require per-file
+state may override this method.
+"""
+        #pylint: disable=no-self-use
+        return None
+
+    def walk_test_suite(self, data_file_name):
+        """Iterate over the test cases in the given unit test data file."""
+        in_paragraph = False
+        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
+        with open(data_file_name, 'rb') as data_file:
+            for line_number, line in enumerate(data_file, 1):
+                line = line.rstrip(b'\r\n')
+                if not line:
+                    in_paragraph = False
+                    continue
+                if line.startswith(b'#'):
+                    continue
+                if not in_paragraph:
+                    # This is a test case description line.
+                    self.process_test_case(descriptions,
+                                           data_file_name, line_number, line)
+                in_paragraph = True
+
+    def walk_ssl_opt_sh(self, file_name):
+        """Iterate over the test cases in ssl-opt.sh or a file with a similar format."""
+        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
+        with open(file_name, 'rb') as file_contents:
+            for line_number, line in enumerate(file_contents, 1):
+                # Assume that all run_test calls have the same simple form
+                # with the test description entirely on the same line as the
+                # function name.
+                m = re.match(br'\s*run_test\s+"((?:[^\\"]|\\.)*)"', line)
+                if not m:
+                    continue
+                description = m.group(1)
+                self.process_test_case(descriptions,
+                                       file_name, line_number, description)
+
+    @staticmethod
+    def collect_test_directories():
+        """Get the relative path for the TLS and Crypto test directories."""
+        if os.path.isdir('tests'):
+            tests_dir = 'tests'
+        elif os.path.isdir('suites'):
+            tests_dir = '.'
+        elif os.path.isdir('../suites'):
+            tests_dir = '..'
+        directories = [tests_dir]
+        return directories
+
+    def walk_all(self):
+        """Iterate over all named test cases."""
+        test_directories = self.collect_test_directories()
+        for directory in test_directories:
+            for data_file_name in glob.glob(os.path.join(directory, 'suites',
+                                                         '*.data')):
+                self.walk_test_suite(data_file_name)
+            ssl_opt_sh = os.path.join(directory, 'ssl-opt.sh')
+            if os.path.exists(ssl_opt_sh):
+                self.walk_ssl_opt_sh(ssl_opt_sh)
+
+class DescriptionChecker(TestDescriptionExplorer):
+    """Check all test case descriptions.
+
+* Check that each description is valid (length, allowed character set, etc.).
+* Check that there is no duplicated description inside of one test suite.
+"""
+
+    def __init__(self, results):
+        self.results = results
+
+    def new_per_file_state(self):
+        """Dictionary mapping descriptions to their line number."""
+        return {}
+
+    def process_test_case(self, per_file_state,
+                          file_name, line_number, description):
+        """Check test case descriptions for errors."""
+        results = self.results
+        seen = per_file_state
+        if description in seen:
+            results.error(file_name, line_number,
+                          'Duplicate description (also line {})',
+                          seen[description])
+            return
+        if re.search(br'[\t;]', description):
+            results.error(file_name, line_number,
+                          'Forbidden character \'{}\' in description',
+                          re.search(br'[\t;]', description).group(0).decode('ascii'))
+        if re.search(br'[^ -~]', description):
+            results.error(file_name, line_number,
+                          'Non-ASCII character in description')
+        if len(description) > 66:
+            results.warning(file_name, line_number,
+                            'Test description too long ({} > 66)',
+                            len(description))
+        seen[description] = line_number
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument('--quiet', '-q',
+                        action='store_true',
+                        help='Hide warnings')
+    parser.add_argument('--verbose', '-v',
+                        action='store_false', dest='quiet',
+                        help='Show warnings (default: on; undoes --quiet)')
+    options = parser.parse_args()
+    results = Results(options)
+    checker = DescriptionChecker(results)
+    checker.walk_all()
+    if (results.warnings or results.errors) and not options.quiet:
+        sys.stderr.write('{}: {} errors, {} warnings\n'
+                         .format(sys.argv[0], results.errors, results.warnings))
+    sys.exit(1 if results.errors else 0)
+
+if __name__ == '__main__':
+    main()