rust2rpm/rust2rpm/crate.py

367 lines
12 KiB
Python

import contextlib
import os
from pathlib import Path
import re
import shutil
import tarfile
import tempfile
from typing import Optional
from cargo2rpm.metadata import Metadata
from cargo2rpm.semver import Version, VersionReq
from rust2rpm.cratesio import download_crate, query_available_versions
from rust2rpm import log
from rust2rpm.patching import make_patches
from rust2rpm.vendor import generate_vendor_tarball
LICENSE_FILE_PATTERN = re.compile(
r"""
COPYING(?:[.-].*)?|COPYRIGHT(?:[.-].*)?|
EULA(?:[.-].*)?|[Ll]icen[cs]e|[Ll]icen[cs]e.*|
(?:.*[.-])?(?:UN)?LICEN[CS]E(?:[.-].*)?|NOTICE(?:[.-].*)?|
PATENTS(?:[.-].*)?|
(?:agpl|l?gpl)[.-].*|CC-BY-.*|
(?:AGPL|APACHE|BSD|GFDL|GNU|L?GPL|MIT|MPL|OFL)-.*[0-9].*
""",
re.VERBOSE,
)
LICENSE_EXCLUDE_DIRS = {
"target",
"vendor",
"example",
"examples",
"_example",
"_examples",
"testdata",
"_testdata",
".github",
"tests",
"test",
}
DOC_FILE_PATTERN = re.compile(
r"""
.*\.(?:md|markdown|mdown|mkdn|rst|txt)|AUTHORS|
AUTHORS[.-].*|CONTRIBUTORS|CONTRIBUTORS[.-].*|README|
README[.-].*|CHANGELOG|CHANGELOG[.-].*|TODO|TODO[.-].*
""",
re.IGNORECASE | re.VERBOSE,
)
DOC_FILE_EXCLUDES = re.compile(r"CMakeLists\.txt|versions\.txt|.*\.tpl|.*\.in")
class InvalidProjectError(ValueError):
pass
class InvalidVersionError(ValueError):
pass
def local_toml_file(toml_path: str) -> tuple[str, list[str], list[str]]:
assert os.path.isfile(toml_path)
assert os.path.basename(toml_path) == "Cargo.toml"
parent = os.path.dirname(toml_path)
doc_files = get_doc_files(parent)
license_files = get_license_files(parent)
return toml_path, doc_files, license_files
def local_cargo_dir(project_dir: str) -> tuple[str, list[str], list[str]]:
assert os.path.isdir(project_dir)
toml_path = os.path.join(project_dir, "Cargo.toml")
doc_files = get_doc_files(project_dir)
license_files = get_license_files(project_dir)
return toml_path, doc_files, license_files
def parse_crate_file_name(path: str) -> tuple[str, str]:
name, version = os.path.basename(path).removesuffix(".crate").rsplit("-", 1)
return name, version
@contextlib.contextmanager
def files_from_crate(crate_path: str, crate_name: str, crate_version: str):
"""Unpacks crate_path and returns path to toml file, list of doc files, list of license files"""
# -> tuple[str, list[str], list[str]]
with tempfile.TemporaryDirectory() as tmpdir:
target_dir = f"{tmpdir}/"
with tarfile.open(crate_path, "r") as archive:
for n in archive.getnames():
if not os.path.abspath(os.path.join(target_dir, n)).startswith(target_dir):
raise Exception("Unsafe filenames!")
archive.extractall(target_dir)
toml_path = f"{tmpdir}/{crate_name}-{crate_version}/Cargo.toml"
if not os.path.isfile(toml_path):
raise IOError("Crate does not contain a Cargo.toml file.")
root_path = f"{tmpdir}/{crate_name}-{crate_version}"
doc_files = get_doc_files(root_path)
license_files = get_license_files(root_path)
yield toml_path, doc_files, license_files
def get_license_files(path: str) -> list[str]:
"""Heuristic match on file names to detect license files"""
results: list[str] = []
for root, dirs, files in os.walk(path, topdown=True):
dirs[:] = [d for d in dirs if d not in LICENSE_EXCLUDE_DIRS]
for f in files:
if LICENSE_FILE_PATTERN.match(f):
results.append(os.path.relpath(os.path.join(root, f), path))
results.sort()
return results
def get_doc_files(path: str) -> list[str]:
"""Heuristic match on file names to detect documentation files"""
results: list[str] = []
for root, dirs, files in os.walk(path, topdown=True):
dirs[:] = []
for f in files:
if (
DOC_FILE_PATTERN.fullmatch(f)
and not LICENSE_FILE_PATTERN.fullmatch(f)
and not DOC_FILE_EXCLUDES.fullmatch(f)
):
relpath = os.path.relpath(os.path.join(root, f), path)
if not relpath.startswith("target/"):
results.append(relpath)
results.sort()
return results
def project_is_path(path: str) -> bool:
return "/" in path or path in {".", ".."}
def guess_local_project_version_from_dir(dir_name: str) -> tuple[str, str]:
"""
Use a simple heuristic to determine the project name and version from the
name of the directory that contains the Cargo.toml file.
Raises an InvalidVersionError if the automatically determined version is
not valid according to SemVer.
"""
project = dir_name.rstrip("0123456789.").removesuffix("-")
version = dir_name.removeprefix(f"{project}-")
try:
Version.parse(version)
except ValueError as exc:
raise InvalidVersionError(exc.args)
return project, version
def guess_local_project_version_from_path(project: str, version: Optional[str]) -> tuple[str, str]:
"""
Use a simple heuristic to determine the project name and version from the
"project" argument supplied on the command line.
If the argument points at a file (i.e. a Cargo.toml file), the heuristics
use the name of the file's parent directory. If the argument points at a
directory, the name of the directory itself is used.
Raises an InvalidVersionError if the heuristics for automatically
determining the project name and version fail, or if the automatically
determined version is not valid according to SemVer. In this case,
supplying the optional "version" argument on the command line can override
the version string.
"""
if os.path.isdir(project):
dir_name = os.path.split(os.path.abspath(project))[1]
else:
dir_name = os.path.split(os.path.dirname(os.path.abspath(project)))[1]
if version:
project = dir_name.removesuffix(f"-{version}")
return project, version
else:
return guess_local_project_version_from_dir(dir_name)
@contextlib.contextmanager
def toml_temp_copy(toml_path: str):
with open(toml_path, "rb") as toml_file:
orig = toml_file.read()
yield
with open(toml_path, "wb") as toml_file:
toml_file.write(orig)
def process_project_local(
project: str,
version: Optional[str],
patch: bool,
patch_foreign: bool,
vendor: bool,
) -> tuple[str, str, tuple[Optional[list[str]], Optional[list[str]]], Metadata, list[str], list[str], Optional[str]]:
if os.path.isdir(project):
toml_path, doc_files, license_files = local_cargo_dir(project)
parent_dir = Path(project).parent
else:
toml_path, doc_files, license_files = local_toml_file(project)
parent_dir = Path(project).parent.parent
metadata = Metadata.from_cargo(toml_path)
if len(metadata.packages) > 1:
log.info("Skipping automatic creation of patches for cargo workspace.")
# fall back to the directory name for determining the name / version
# of the project heuristically
name, version = guess_local_project_version_from_path(project, version)
log.warn(f"Falling back to {name!r} as the name of the project (based on the name of the containing folder).")
diffs: tuple[Optional[list[str]], Optional[list[str]]] = (None, None)
if vendor:
vendor_tarball = generate_vendor_tarball(toml_path, name, version, parent_dir)
else:
vendor_tarball = None
else:
package = metadata.packages[0]
features = package.get_feature_names()
name = package.name
version = package.version
with toml_temp_copy(toml_path):
diffs = make_patches(name, package.version, patch, patch_foreign, toml_path, features)
# ensure metadata is up-to-date with changes from patches
metadata = Metadata.from_cargo(toml_path)
if vendor:
vendor_tarball = generate_vendor_tarball(toml_path, name, version, parent_dir)
else:
vendor_tarball = None
return name, version, diffs, metadata, doc_files, license_files, vendor_tarball
def resolve_version(crate: str, version: str) -> Optional[str]:
# try parsing version as actual version
try:
resolved_version = Version.parse(version)
return str(resolved_version)
except ValueError:
pass
# try parsing version as partial version
try:
parsed_version = VersionReq.parse(version)
log.info("Resolving partial version ...")
available_versions = query_available_versions(crate)
resolved_version = max(filter(lambda x: x in parsed_version, available_versions), default=None) # type: ignore
if resolved_version is None:
log.warn("Partial version does not match any available version.")
log.info("Falling back to latest version.")
return None
log.info(f"Partial version matched with available version: {resolved_version}")
return str(resolved_version)
except ValueError:
log.error(f"Invalid version: {version}")
log.info("Falling back to latest version.")
return None
def process_project(
project: str,
version: Optional[str],
patch: bool,
patch_foreign: bool,
store_crate: bool,
vendor: bool,
) -> tuple[
str, str, tuple[Optional[list[str]], Optional[list[str]]], Metadata, list[str], list[str], bool, Optional[str]
]:
if project_is_path(project):
if not os.path.exists(project):
raise InvalidProjectError(project)
if project.endswith(".crate"):
# project points at a local .crate file
crate_file_path = project
# determine name and version from the filename
name, version = parse_crate_file_name(project)
else:
# project points at unpacked sources
if store_crate:
log.warn("The '--store-crate' flag has no effect for unpacked sources.")
name, version, diffs, metadata, doc_files, license_files, vendor_tarball = process_project_local(
project, version, patch, patch_foreign, vendor
)
return name, version, diffs, metadata, doc_files, license_files, True, vendor_tarball
else:
# project is just a crate name
name = project
# download .crate from crates.io
if version:
# version or partial version was specified
resolved_version = resolve_version(project, version)
crate_file_path, version = download_crate(project, resolved_version)
else:
# no version was specified: download latest
crate_file_path, version = download_crate(project, version)
if store_crate:
copy_target = os.path.join(os.getcwd(), os.path.basename(crate_file_path))
if not (os.path.exists(copy_target) and os.path.samefile(crate_file_path, copy_target)):
shutil.copy2(crate_file_path, copy_target)
# process files from a .crate archive
with files_from_crate(crate_file_path, name, version) as (toml_path, doc_files, license_files):
metadata = Metadata.from_cargo(toml_path)
if len(metadata.packages) > 1:
log.error("Attempting to process a .crate file which contains a cargo workspace.")
log.error("This mode of operation is unusual and not supported by rust2rpm.")
raise ValueError("Failed to process invalid .crate file (cargo workspace)")
package = metadata.packages[0]
version = package.version
features = package.get_feature_names()
diffs = make_patches(name, version, patch, patch_foreign, toml_path, features)
# ensure metadata is up-to-date with changes from patches
metadata = Metadata.from_cargo(toml_path)
if vendor:
vendor_tarball = generate_vendor_tarball(toml_path, name, version, Path.cwd())
else:
vendor_tarball = None
return name, version, diffs, metadata, doc_files, license_files, False, vendor_tarball