From 226714a275cd879ca4cb86fb23166bd845fa3a78 Mon Sep 17 00:00:00 2001 From: myhailo-chernyshov-rg Date: Thu, 9 Jan 2025 19:01:49 +0200 Subject: [PATCH] feat: dockerize application [FC-0063] (#228) Add docker support and set requirements to use base.in rather than base.txt to make it easier to use this repo as part of another package. --- .dockerignore | 7 ++++ Dockerfile | 8 ++++ MANIFEST.in | 1 + README.rst | 17 +++++++++ setup.py | 20 ++-------- utils.py | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 140 insertions(+), 17 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 utils.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..cdc98808 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,7 @@ +.git +.github + +**/.coverage +**/.pytest_cache +**/__pycache__ +**/*.pyc diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..7b55bcf5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,8 @@ +FROM python:3.8.20-alpine3.20 + +WORKDIR /app + +COPY . . +RUN pip install --no-cache-dir /app + +ENTRYPOINT ["cc2olx"] diff --git a/MANIFEST.in b/MANIFEST.in index 765ff49f..e8e2cb12 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1,7 @@ include LICENSE include README.rst +recursive-include requirements * recursive-include tests * recursive-exclude * __pycache__ recursive-exclude * *.py[co] diff --git a/README.rst b/README.rst index 41fff911..def057c0 100644 --- a/README.rst +++ b/README.rst @@ -58,6 +58,23 @@ The link map file can be supplied using `-f` or `--link_file`:: cc2olx -r zip -i -f +Dockerization +------------- + +To make the application platform-independent, it is dockerized. To run the +application using Docker you need: + +1. Build the image:: + + docker build -t cc2olx . + +2. Run the conversion command in a container by mounting passed argument path +directories/files and passing the corresponding arguments to the script:: + + docker run --rm -v /input/file/path/cc_course_dump.imscc:/data/input/cc_course_dump.imscc -v /output/file/path/output_dir:/data/output/ cc2olx -r zip -i /data/input/cc_course_dump.imscc -o /data/output/edx_dump + +It will convert Common Cartridge dump from */input/file/path/cc_course_dump.imscc* +and save the OLX in */output/file/path/output_dir/edx_dump.zip* file. Test Data --------- diff --git a/setup.py b/setup.py index ff95b825..0df0a5f1 100644 --- a/setup.py +++ b/setup.py @@ -1,27 +1,13 @@ -import os -import re - from glob import glob from os.path import basename, splitext from setuptools import setup, find_packages +from utils import get_version, load_requirements + with open("README.rst", encoding="utf-8") as readme_file: readme = readme_file.read() - -def get_version(*file_paths): - """ - Extract the version string from the file at the given relative path fragments. - """ - filename = os.path.join(os.path.dirname(__file__), *file_paths) - version_file = open(filename, encoding="utf-8").read() - version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M) - if version_match: - return version_match.group(1) - raise RuntimeError("Unable to find version string. ") - - VERSION = get_version("src", "cc2olx", "__init__.py") @@ -41,7 +27,7 @@ def get_version(*file_paths): ], description=("Command line tool, that converts Common Cartridge " "courses to Open edX Studio imports."), entry_points={"console_scripts": ["cc2olx=cc2olx.main:main"]}, - install_requires=["lxml"], + install_requires=load_requirements("requirements/base.in"), license="GNU Affero General Public License", long_description=readme, include_package_data=True, diff --git a/utils.py b/utils.py new file mode 100644 index 00000000..b1a95387 --- /dev/null +++ b/utils.py @@ -0,0 +1,104 @@ +import os +import re + + +def get_version(*file_paths): + """ + Extract the version string from the file at the given relative path fragments. + """ + filename = os.path.join(os.path.dirname(__file__), *file_paths) + version_file = open(filename, encoding="utf-8").read() + version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M) + if version_match: + return version_match.group(1) + raise RuntimeError("Unable to find version string. ") + + +def is_requirement(line): + """ + Return True if the requirement line is a package requirement. + + That is, it is not blank, a comment, or editable. + """ + # Remove whitespace at the start/end of the line + line = line.strip() + + # Skip blank lines, comments, and editable installs + return bool(line) and not line.startswith(("-r", "#", "-e", "git+", "-c")) + + +def load_requirements(*requirements_paths): + """ + Load all requirements from the specified requirements files. + + Requirements will include any constraints from files specified + with -c in the requirements files. + Returns a list of requirement strings. + """ + def check_name_consistent(package): + """ + Raise exception if package is named different ways. + + This ensures that packages are named consistently so we can match + constraints to packages. It also ensures that if we require a package + with extras we don't constrain it without mentioning the extras (since + that too would interfere with matching constraints.) + """ + canonical = package.lower().replace("_", "-").split("[")[0] + seen_spelling = by_canonical_name.get(canonical) + if seen_spelling is None: + by_canonical_name[canonical] = package + elif seen_spelling != package: + raise Exception( + f'Encountered both "{seen_spelling}" and "{package}" in requirements ' + "and constraints files; please use just one or the other." + ) + + def add_version_constraint_or_raise(current_line, current_requirements, add_if_not_present): + if regex_match := requirement_line_regex.match(current_line): + package = regex_match.group(1) + version_constraints = regex_match.group(2) + check_name_consistent(package) + existing_version_constraints = current_requirements.get(package, None) + # It's fine to add constraints to an unconstrained package, + # but raise an error if there are already constraints in place. + if existing_version_constraints and existing_version_constraints != version_constraints: + raise Exception( + f"Multiple constraint definitions found for {package}:" + f' "{existing_version_constraints}" and "{version_constraints}".' + f"Combine constraints into one location with {package}" + f"{existing_version_constraints},{version_constraints}." + ) + if add_if_not_present or package in current_requirements: + current_requirements[package] = version_constraints + + by_canonical_name = {} # e.g. {"django": "Django", "confluent-kafka": "confluent_kafka[avro]"} + requirements = {} + constraint_files = set() + + # groups "pkg<=x.y.z,..." into ("pkg", "<=x.y.z,...") + re_package_name_base_chars = r"a-zA-Z0-9\-_." # chars allowed in base package name + # Two groups: name[maybe,extras], and optionally a constraint + requirement_line_regex = re.compile( + rf"([{re_package_name_base_chars}]+(?:\[[{re_package_name_base_chars},\s]+\])?)([<>=][^#\s]+)?" + ) + + # Read requirements from .in files and store the path to any + # constraint files that are pulled in. + for path in requirements_paths: + with open(path) as reqs: + for line in reqs: + if is_requirement(line): + add_version_constraint_or_raise(line, requirements, True) + if line and line.startswith("-c") and not line.startswith("-c http"): + constraint_files.add(os.path.dirname(path) + "/" + line.split("#")[0].replace("-c", "").strip()) + + # process constraint files: add constraints to existing requirements + for constraint_file in constraint_files: + with open(constraint_file) as reader: + for line in reader: + if is_requirement(line): + add_version_constraint_or_raise(line, requirements, False) + + # process back into list of pkg><=constraints strings + return [f'{pkg}{version or ""}' for (pkg, version) in sorted(requirements.items())]