diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index a02b8c9bed..62120fb2b1 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -19,10 +19,10 @@ jobs: contents: read steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - # Activate cache export feature to reduce build time of images name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Build the Docker image # default tag uses docker.io, so override on command-line run: make docker DOCKER_TAG=${{ env.GHCRIO_DOCKER_TAG }} @@ -34,13 +34,13 @@ jobs: docker run --rm ${{ env.GHCRIO_DOCKER_TAG }} ocrd --version docker run --rm ${{ env.GHCRIO_DOCKER_TAG }}-cuda ocrd --version - name: Login to GitHub Container Registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Log in to Docker Hub - uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERIO_USERNAME }} password: ${{ secrets.DOCKERIO_PASSWORD }} diff --git a/.github/workflows/network-testing.yml b/.github/workflows/network-testing.yml index 4948ff0bb4..12c8fe4fde 100644 --- a/.github/workflows/network-testing.yml +++ b/.github/workflows/network-testing.yml @@ -20,17 +20,18 @@ jobs: - '3.9' - '3.10' - '3.11' + - '3.12' os: - ubuntu-22.04 # - macos-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Homebrew id: set-up-homebrew uses: Homebrew/actions/setup-homebrew@master - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 4b0cd5145a..2b8e3d5b82 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -22,18 +22,19 @@ jobs: - '3.9' - '3.10' - '3.11' + - '3.12' os: - ubuntu-22.04 - ubuntu-20.04 - # - macos-latest + - macos-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Homebrew id: set-up-homebrew uses: Homebrew/actions/setup-homebrew@master - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.scrutinizer.yml b/.scrutinizer.yml index 446df627e2..4848dca46a 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -3,6 +3,10 @@ checks: build: image: default-bionic + environment: + python: + version: 3.8.2 + virtualenv: true nodes: analysis: dependencies: diff --git a/CHANGELOG.md b/CHANGELOG.md index a5c19bb1d4..c83143d240 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,16 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Fixed: + + - bashlib processors will download on-demand, like pythonic processors do, #1216, #1217 + +Changed: + + - Replace `distutils` which equivalents from `shutil` for compatibility with python 3.12+, #1219 + - CI: Updated GitHub actions, #1206 + - CI: Fixed scrutinizer, #1217 + ## [2.64.1] - 2024-04-22 Fixed: diff --git a/Dockerfile b/Dockerfile index 95130fdd4a..f3b2c92d11 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,9 +44,11 @@ WORKDIR /data CMD ["/usr/local/bin/ocrd", "--help"] FROM ocrd_core_base as ocrd_core_test +# Optionally skip make assets with this arg +ARG SKIP_ASSETS WORKDIR /build-ocrd COPY Makefile . -RUN make assets +RUN if test -z "$SKIP_ASSETS" || test $SKIP_ASSETS -eq 0 ; then make assets ; fi COPY tests ./tests COPY .gitmodules . COPY requirements_test.txt . diff --git a/Makefile b/Makefile index 5b83d1a173..b4c4cb8a86 100644 --- a/Makefile +++ b/Makefile @@ -140,7 +140,7 @@ install: #build $(PIP) config set global.no-binary shapely # Install with pip install -e -install-dev: PIP_INSTALL = $(PIP) install -e +install-dev: PIP_INSTALL = $(PIP) install -e install-dev: PIP_INSTALL_CONFIG_OPTION = --config-settings editable_mode=strict install-dev: uninstall $(MAKE) install @@ -240,12 +240,12 @@ network-module-test: assets INTEGRATION_TEST_IN_DOCKER = docker exec core_test network-integration-test: $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d - -$(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v + -$(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v --ignore-glob="$(TESTDIR)/network/*ocrd_all*.py" $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans network-integration-test-cicd: $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d - $(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v + $(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v --ignore-glob="tests/network/*ocrd_all*.py" $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans benchmark: @@ -315,7 +315,7 @@ pyclean: .PHONY: docker docker-cuda # Additional arguments to docker build. Default: '$(DOCKER_ARGS)' -DOCKER_ARGS = +DOCKER_ARGS = # Build docker image docker: DOCKER_BASE_IMAGE = ubuntu:20.04 @@ -328,7 +328,7 @@ docker-cuda: DOCKER_FILE = Dockerfile.cuda docker-cuda: docker -docker docker-cuda: +docker docker-cuda: docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --target ocrd_core_base --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) . # Build wheels and source dist and twine upload them diff --git a/src/ocrd/cli/bashlib.py b/src/ocrd/cli/bashlib.py index 5746151c72..1def4638c7 100644 --- a/src/ocrd/cli/bashlib.py +++ b/src/ocrd/cli/bashlib.py @@ -114,6 +114,10 @@ def bashlib_input_files(**kwargs): input_file_grp=kwargs['input_file_grp'], output_file_grp=kwargs['output_file_grp']) for input_files in processor.zip_input_files(mimetype=None, on_error='abort'): + # ensure all input files exist locally (without persisting them in the METS) + # - this mimics the default behaviour of all Pythonic processors + input_files = [workspace.download_file(input_file) if input_file else None + for input_file in input_files] for field in ['url', 'local_filename', 'ID', 'mimetype', 'pageId']: # make this bash-friendly (show initialization for associative array) if len(input_files) > 1: diff --git a/src/ocrd/cli/resmgr.py b/src/ocrd/cli/resmgr.py index 2e744a8958..1289e498e1 100644 --- a/src/ocrd/cli/resmgr.py +++ b/src/ocrd/cli/resmgr.py @@ -7,7 +7,7 @@ """ import sys from pathlib import Path -from distutils.spawn import find_executable as which +from shutil import which from yaml import safe_load, safe_dump import requests diff --git a/src/ocrd/task_sequence.py b/src/ocrd/task_sequence.py index 6d38601867..da691fbc1d 100644 --- a/src/ocrd/task_sequence.py +++ b/src/ocrd/task_sequence.py @@ -1,7 +1,6 @@ import json from shlex import split as shlex_split -from distutils.spawn import find_executable as which # pylint: disable=import-error,no-name-in-module -from subprocess import run, PIPE +from shutil import which from ocrd_utils import getLogger, parse_json_string_or_file, set_json_key_value_overrides, get_ocrd_tool_json # from collections import Counter diff --git a/src/ocrd/workspace_bagger.py b/src/ocrd/workspace_bagger.py index b67224ddd2..a30dbfb02a 100644 --- a/src/ocrd/workspace_bagger.py +++ b/src/ocrd/workspace_bagger.py @@ -2,13 +2,12 @@ from os import makedirs, chdir, walk from os.path import join, isdir, basename as os_path_basename, exists, relpath from pathlib import Path -from shutil import make_archive, rmtree, copyfile, move +from shutil import make_archive, rmtree, copyfile, move, copytree from tempfile import mkdtemp, TemporaryDirectory import re import tempfile import sys from bagit import Bag, make_manifests, _load_tag_file, _make_tag_file, _make_tagmanifest_file # pylint: disable=no-name-in-module -from distutils.dir_util import copy_tree from ocrd_utils import ( pushd_popd, @@ -298,7 +297,7 @@ def recreate_checksums(self, src, dest=None, overwrite=False): raise FileNotFoundError(f"data directory of bag not found at {src}") if not overwrite: path_to_bag.mkdir(parents=True, exist_ok=True) - copy_tree(src, dest) + copytree(src, dest, dirs_exist_ok=True) with pushd_popd(path_to_bag): n_bytes, n_files = make_manifests("data", 1, ["sha512"]) diff --git a/src/ocrd_models/ocrd_exif.py b/src/ocrd_models/ocrd_exif.py index 598c499eff..406e60a85a 100644 --- a/src/ocrd_models/ocrd_exif.py +++ b/src/ocrd_models/ocrd_exif.py @@ -5,7 +5,7 @@ from math import sqrt from io import BytesIO from subprocess import run, PIPE -from distutils.spawn import find_executable as which +from shutil import which from ocrd_utils import getLogger class OcrdExif(): diff --git a/src/ocrd_utils/os.py b/src/ocrd_utils/os.py index 47c4b510f4..1b3ab4e73d 100644 --- a/src/ocrd_utils/os.py +++ b/src/ocrd_utils/os.py @@ -18,12 +18,12 @@ from tempfile import TemporaryDirectory, gettempdir from functools import lru_cache from contextlib import contextmanager, redirect_stderr, redirect_stdout -from distutils.spawn import find_executable as which +from shutil import which from json import loads from json.decoder import JSONDecodeError from os import getcwd, chdir, stat, chmod, umask, environ from pathlib import Path -from os.path import exists, abspath as abspath_, join, isdir +from os.path import abspath as abspath_, join from zipfile import ZipFile from subprocess import run, PIPE from mimetypes import guess_type as mimetypes_guess diff --git a/tests/data/wf_testcase.py b/tests/data/wf_testcase.py index 2ea97ed212..0ed6ac9b7e 100644 --- a/tests/data/wf_testcase.py +++ b/tests/data/wf_testcase.py @@ -61,7 +61,7 @@ def setUp(self): p.chmod(0o777) os.environ['PATH'] = os.pathsep.join([self.tempdir, os.environ['PATH']]) - # from distutils.spawn import find_executable as which # pylint: disable=import-error,no-name-in-module + # from shutil import which # pylint: disable=import-error,no-name-in-module # self.assertTrue(which('ocrd-sample-processor')) diff --git a/tests/network/test_integration_5_processing_server.py b/tests/network/test_integration_5_processing_server.py index 5b22e6cc65..bce67bbe69 100644 --- a/tests/network/test_integration_5_processing_server.py +++ b/tests/network/test_integration_5_processing_server.py @@ -1,27 +1,14 @@ from pathlib import Path -from requests import get as request_get, post as request_post -from time import sleep +from requests import get as request_get from src.ocrd_network.constants import AgentType, JobState from src.ocrd_network.logging_utils import get_processing_job_logging_file_path from tests.base import assets from tests.network.config import test_config +from tests.network.utils import poll_till_timeout_fail_or_success, post_ps_processing_request, post_ps_workflow_request PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL -def poll_till_timeout_fail_or_success(test_url: str, tries: int, wait: int) -> JobState: - job_state = JobState.unset - while tries > 0: - sleep(wait) - response = request_get(url=test_url) - assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" - job_state = response.json()["state"] - if job_state == JobState.success or job_state == JobState.failed: - break - tries -= 1 - return job_state - - def test_processing_server_connectivity(): test_url = f"{PROCESSING_SERVER_URL}/" response = request_get(test_url) @@ -53,18 +40,7 @@ def test_processing_server_processing_request(): "parameters": {} } test_processor = "ocrd-dummy" - test_url = f"{PROCESSING_SERVER_URL}/processor/run/{test_processor}" - response = request_post( - url=test_url, - headers={"accept": "application/json"}, - json=test_processing_job_input - ) - print(response.json()) - print(response.__dict__) - assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" - processing_job_id = response.json()["job_id"] - assert processing_job_id - + processing_job_id = post_ps_processing_request(PROCESSING_SERVER_URL, test_processor, test_processing_job_input) job_state = poll_till_timeout_fail_or_success( test_url=f"{PROCESSING_SERVER_URL}/processor/job/{processing_job_id}", tries=10, wait=10 ) @@ -81,20 +57,7 @@ def test_processing_server_workflow_request(): path_to_dummy_wf = "/ocrd-data/assets/dummy-workflow.txt" workspace_root = "kant_aufklaerung_1784/data" path_to_mets = assets.path_to(f"{workspace_root}/mets.xml") - - # submit the workflow job - test_url = f"{PROCESSING_SERVER_URL}/workflow/run?mets_path={path_to_mets}&page_wise=True" - response = request_post( - url=test_url, - headers={"accept": "application/json"}, - files={"workflow": open(path_to_dummy_wf, 'rb')} - ) - # print(response.json()) - # print(response.__dict__) - assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" - wf_job_id = response.json()["job_id"] - assert wf_job_id - + wf_job_id = post_ps_workflow_request(PROCESSING_SERVER_URL, path_to_dummy_wf, path_to_mets) job_state = poll_till_timeout_fail_or_success( test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}", tries=30, wait=10 ) diff --git a/tests/network/test_integration_ocrd_all.py b/tests/network/test_integration_ocrd_all.py new file mode 100644 index 0000000000..d54d9f2fd5 --- /dev/null +++ b/tests/network/test_integration_ocrd_all.py @@ -0,0 +1,19 @@ +from src.ocrd_network.constants import JobState +from tests.network.config import test_config +from tests.network.utils import poll_till_timeout_fail_or_success, post_ps_workflow_request + +PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL + + +def test_ocrd_all_workflow(): + # This test is supposed to run with ocrd_all not with just core on its own + # Note: the used workflow path is volume mapped + path_to_wf = "/ocrd-data/assets/ocrd_all-test-workflow.txt" + path_to_mets = "/data/mets.xml" + wf_job_id = post_ps_workflow_request(PROCESSING_SERVER_URL, path_to_wf, path_to_mets) + job_state = poll_till_timeout_fail_or_success( + test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}", + tries=30, + wait=10 + ) + assert job_state == JobState.success diff --git a/tests/network/utils.py b/tests/network/utils.py new file mode 100644 index 0000000000..dbf594a894 --- /dev/null +++ b/tests/network/utils.py @@ -0,0 +1,47 @@ +from requests import get as request_get, post as request_post +from time import sleep +from src.ocrd_network.constants import JobState + + +def poll_till_timeout_fail_or_success(test_url: str, tries: int, wait: int) -> JobState: + job_state = JobState.unset + while tries > 0: + sleep(wait) + response = request_get(url=test_url) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + job_state = response.json()["state"] + if job_state == JobState.success or job_state == JobState.failed: + break + tries -= 1 + return job_state + + +def post_ps_processing_request(ps_server_host: str, test_processor: str, test_job_input: dict) -> str: + test_url = f"{ps_server_host}/processor/run/{test_processor}" + response = request_post( + url=test_url, + headers={"accept": "application/json"}, + json=test_job_input + ) + # print(response.json()) + # print(response.__dict__) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + processing_job_id = response.json()["job_id"] + assert processing_job_id + return processing_job_id + + +# TODO: Can be extended to include other parameters such as page_wise +def post_ps_workflow_request(ps_server_host: str, path_to_test_wf: str, path_to_test_mets: str) -> str: + test_url = f"{ps_server_host}/workflow/run?mets_path={path_to_test_mets}&page_wise=True" + response = request_post( + url=test_url, + headers={"accept": "application/json"}, + files={"workflow": open(path_to_test_wf, "rb")} + ) + # print(response.json()) + # print(response.__dict__) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + wf_job_id = response.json()["job_id"] + assert wf_job_id + return wf_job_id