From 89458d1420bd3ccefbc0c4a2a6e037e1a274ccea Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Mon, 27 Sep 2021 19:20:17 +0200 Subject: [PATCH 1/2] More robust handling of excluded files Don't try to enumerate excluded files. List included files, and remove names from the list if they match an excluded-file pattern. This resolves the problem that the script could get into an infinite loop due to the use of recursive globbing. Unfortunately, Python's recursive globs follows symbolic links to directories, which leads to an infinite loop if a symbolic link points to an ancestor of the directory that contains it. Signed-off-by: Gilles Peskine --- tests/scripts/check_names.py | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index a9aa118ea..737b3df3d 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -44,6 +44,7 @@ error. It must be run from Mbed TLS root. import abc import argparse +import fnmatch import glob import textwrap import os @@ -222,8 +223,9 @@ class CodeParser(): # Memo for storing "glob expression": set(filepaths) self.files = {} - # Globally excluded filenames - self.excluded_files = ["**/bn_mul", "**/compat-2.x.h"] + # Globally excluded filenames. + # Note that "*" can match directory separators in exclude lists. + self.excluded_files = ["*/bn_mul", "*/compat-2.x.h"] @staticmethod def check_repo_path(): @@ -302,6 +304,15 @@ class CodeParser(): "mbed_words": mbed_words } + def is_file_excluded(self, path, exclude_wildcards): + """Whether the give file path is excluded.""" + # exclude_wildcards may be None. Also, consider the global exclusions. + exclude_wildcards = (exclude_wildcards or []) + self.excluded_files + for pattern in exclude_wildcards: + if fnmatch.fnmatch(path, pattern): + return True + return False + def get_files(self, include_wildcards, exclude_wildcards): """ Get all files that match any of the UNIX-style wildcards. While the @@ -317,25 +328,11 @@ class CodeParser(): """ accumulator = set() - # exclude_wildcards may be None. Also, consider the global exclusions. - exclude_wildcards = (exclude_wildcards or []) + self.excluded_files - - # Internal function to hit the memoisation cache or add to it the result - # of a glob operation. Used both for inclusion and exclusion since the - # only difference between them is whether they perform set union or - # difference on the return value of this function. - def hit_cache(wildcard): - if wildcard not in self.files: - self.files[wildcard] = set(glob.glob(wildcard, recursive=True)) - return self.files[wildcard] - for include_wildcard in include_wildcards: - accumulator = accumulator.union(hit_cache(include_wildcard)) + accumulator = accumulator.union(glob.iglob(include_wildcard)) - for exclude_wildcard in exclude_wildcards: - accumulator = accumulator.difference(hit_cache(exclude_wildcard)) - - return list(accumulator) + return list(path for path in accumulator + if not self.is_file_excluded(path, exclude_wildcards)) def parse_macros(self, include, exclude=None): """ From 8a8322407227becbf3247a8b24b4b1087ee5d1b3 Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Tue, 28 Sep 2021 10:12:49 +0200 Subject: [PATCH 2/2] Fix typo Signed-off-by: Gilles Peskine --- tests/scripts/check_names.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/scripts/check_names.py b/tests/scripts/check_names.py index 737b3df3d..ac2490fc1 100755 --- a/tests/scripts/check_names.py +++ b/tests/scripts/check_names.py @@ -305,7 +305,7 @@ class CodeParser(): } def is_file_excluded(self, path, exclude_wildcards): - """Whether the give file path is excluded.""" + """Whether the given file path is excluded.""" # exclude_wildcards may be None. Also, consider the global exclusions. exclude_wildcards = (exclude_wildcards or []) + self.excluded_files for pattern in exclude_wildcards: