From e9a84e8261a36bc821550b9c5044771d205680b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Sat, 11 Dec 2021 12:46:27 +0100 Subject: [PATCH] Simplify and tighten license and documentation file name matching [\.] is the same as [.], and [-] matches the dash, no need for backclash-escaping. Also, let's shorten the patches by using alternatives for the common parts. Before we would match any prefix, even though the matches were clearly intended to cover the whole file name. Let's use fullmatch to make it clear that the whole string must be matched. --- rust2rpm/__main__.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/rust2rpm/__main__.py b/rust2rpm/__main__.py index 16b1344..7c1e2d2 100644 --- a/rust2rpm/__main__.py +++ b/rust2rpm/__main__.py @@ -37,15 +37,14 @@ JINJA_ENV = jinja2.Environment( extensions=["jinja2.ext.do"], trim_blocks=True, lstrip_blocks=True) -LICENSES = re.compile( - r"(COPYING|COPYING[\.\-].*|COPYRIGHT|COPYRIGHT[\.\-].*|" - r"EULA|EULA[\.\-].*|[Ll]icen[cs]e|[Ll]icen[cs]e.*|LICEN[CS]E|" - r"LICEN[CS]E[\.\-].*|.*[\.\-]LICEN[CS]E.*|NOTICE|NOTICE[\.\-].*|" - r"PATENTS|PATENTS[\.\-].*|UNLICEN[CS]E|UNLICEN[CS]E[\.\-].*|" - r"agpl[\.\-].*|gpl[\.\-].*|lgpl[\.\-].*|AGPL-.*[0-9].*|" - r"APACHE-.*[0-9].*|BSD-.*[0-9].*|CC-BY-.*|GFDL-.*[0-9].*|" - r"GNU-.*[0-9].*|GPL-.*[0-9].*|LGPL-.*[0-9].*|MIT-.*[0-9].*|" - r"MPL-.*[0-9].*|OFL-.*[0-9].*)") +LICENSES = re.compile(r""" + COPYING(?:[.-].*)?|COPYRIGHT(?:[.-].*)?| + EULA(?:[.-].*)?|[Ll]icen[cs]e|[Ll]icen[cs]e.*| + (?:.*[.-])?(?:UN)?LICEN[CS]E(?:[.-].*)?|NOTICE(?:[.-].*)?| + PATENTS(?:[.-].*)?| + (?:agpl|l?gpl)[.-].*|CC-BY-.*| + (?:AGPL|APACHE|BSD|GFDL|GNU|L?GPL|MIT|MPL|OFL)-.*[0-9].* + """, re.VERBOSE) def sortify(func): """Return a sorted list from a generator""" @@ -236,16 +235,18 @@ def get_license_files(path): @sortify def get_doc_files(path): - matcher = re.compile( - r"(.*\.md|.*\.markdown|.*\.mdown|.*\.mkdn|.*\.rst|.*\.txt|AUTHORS|" - r"AUTHORS[\.\-].*|CONTRIBUTORS|CONTRIBUTORS[\.\-].*|README|" - r"README[\.\-].*|CHANGELOG|CHANGELOG[\.\-].*|TODO|TODO[\.\-].*)", - re.IGNORECASE) - matcherex = re.compile(r"CMakeLists\.txt") + plus = re.compile(r""" + .*\.(?:md|markdown|mdown|mkdn|rst|txt)|AUTHORS| + AUTHORS[.-].*|CONTRIBUTORS|CONTRIBUTORS[.-].*|README| + README[.-].*|CHANGELOG|CHANGELOG[.-].*|TODO|TODO[.-].* + """, + re.IGNORECASE | re.VERBOSE) + minus = re.compile(r"CMakeLists\.txt") + for root, dirs, files in os.walk(path, topdown=True): dirs[:] = [] for f in files: - if matcher.match(f) and not LICENSES.match(f) and not matcherex.match(f): + if plus.fullmatch(f) and not LICENSES.fullmatch(f) and not minus.fullmatch(f): yield os.path.relpath(os.path.join(root, f), path) def get_package_info(package):