Simplify and tighten license and documentation file name matching
[\.] is the same as [.], and [-] matches the dash, no need for backclash-escaping. Also, let's shorten the patches by using alternatives for the common parts. Before we would match any prefix, even though the matches were clearly intended to cover the whole file name. Let's use fullmatch to make it clear that the whole string must be matched.
This commit is contained in:
parent
8e8ae0c969
commit
e9a84e8261
1 changed files with 17 additions and 16 deletions
|
@ -37,15 +37,14 @@ JINJA_ENV = jinja2.Environment(
|
|||
extensions=["jinja2.ext.do"],
|
||||
trim_blocks=True,
|
||||
lstrip_blocks=True)
|
||||
LICENSES = re.compile(
|
||||
r"(COPYING|COPYING[\.\-].*|COPYRIGHT|COPYRIGHT[\.\-].*|"
|
||||
r"EULA|EULA[\.\-].*|[Ll]icen[cs]e|[Ll]icen[cs]e.*|LICEN[CS]E|"
|
||||
r"LICEN[CS]E[\.\-].*|.*[\.\-]LICEN[CS]E.*|NOTICE|NOTICE[\.\-].*|"
|
||||
r"PATENTS|PATENTS[\.\-].*|UNLICEN[CS]E|UNLICEN[CS]E[\.\-].*|"
|
||||
r"agpl[\.\-].*|gpl[\.\-].*|lgpl[\.\-].*|AGPL-.*[0-9].*|"
|
||||
r"APACHE-.*[0-9].*|BSD-.*[0-9].*|CC-BY-.*|GFDL-.*[0-9].*|"
|
||||
r"GNU-.*[0-9].*|GPL-.*[0-9].*|LGPL-.*[0-9].*|MIT-.*[0-9].*|"
|
||||
r"MPL-.*[0-9].*|OFL-.*[0-9].*)")
|
||||
LICENSES = re.compile(r"""
|
||||
COPYING(?:[.-].*)?|COPYRIGHT(?:[.-].*)?|
|
||||
EULA(?:[.-].*)?|[Ll]icen[cs]e|[Ll]icen[cs]e.*|
|
||||
(?:.*[.-])?(?:UN)?LICEN[CS]E(?:[.-].*)?|NOTICE(?:[.-].*)?|
|
||||
PATENTS(?:[.-].*)?|
|
||||
(?:agpl|l?gpl)[.-].*|CC-BY-.*|
|
||||
(?:AGPL|APACHE|BSD|GFDL|GNU|L?GPL|MIT|MPL|OFL)-.*[0-9].*
|
||||
""", re.VERBOSE)
|
||||
|
||||
def sortify(func):
|
||||
"""Return a sorted list from a generator"""
|
||||
|
@ -236,16 +235,18 @@ def get_license_files(path):
|
|||
|
||||
@sortify
|
||||
def get_doc_files(path):
|
||||
matcher = re.compile(
|
||||
r"(.*\.md|.*\.markdown|.*\.mdown|.*\.mkdn|.*\.rst|.*\.txt|AUTHORS|"
|
||||
r"AUTHORS[\.\-].*|CONTRIBUTORS|CONTRIBUTORS[\.\-].*|README|"
|
||||
r"README[\.\-].*|CHANGELOG|CHANGELOG[\.\-].*|TODO|TODO[\.\-].*)",
|
||||
re.IGNORECASE)
|
||||
matcherex = re.compile(r"CMakeLists\.txt")
|
||||
plus = re.compile(r"""
|
||||
.*\.(?:md|markdown|mdown|mkdn|rst|txt)|AUTHORS|
|
||||
AUTHORS[.-].*|CONTRIBUTORS|CONTRIBUTORS[.-].*|README|
|
||||
README[.-].*|CHANGELOG|CHANGELOG[.-].*|TODO|TODO[.-].*
|
||||
""",
|
||||
re.IGNORECASE | re.VERBOSE)
|
||||
minus = re.compile(r"CMakeLists\.txt")
|
||||
|
||||
for root, dirs, files in os.walk(path, topdown=True):
|
||||
dirs[:] = []
|
||||
for f in files:
|
||||
if matcher.match(f) and not LICENSES.match(f) and not matcherex.match(f):
|
||||
if plus.fullmatch(f) and not LICENSES.fullmatch(f) and not minus.fullmatch(f):
|
||||
yield os.path.relpath(os.path.join(root, f), path)
|
||||
|
||||
def get_package_info(package):
|
||||
|
|
Loading…
Reference in a new issue