From eb0da749ac965bb0fa91419d720c97e836d8b2ba Mon Sep 17 00:00:00 2001 From: joschrew Date: Wed, 7 Feb 2024 16:29:23 +0100 Subject: [PATCH 01/22] Add a test for workflow run in ocrd_all --- tests/network/test_ocrd_all_workflow.py | 49 +++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/network/test_ocrd_all_workflow.py diff --git a/tests/network/test_ocrd_all_workflow.py b/tests/network/test_ocrd_all_workflow.py new file mode 100644 index 0000000000..9406703a61 --- /dev/null +++ b/tests/network/test_ocrd_all_workflow.py @@ -0,0 +1,49 @@ +from time import sleep +from requests import get, post +from src.ocrd_network.models import StateEnum +from tests.base import assets +from tests.network.config import test_config + +PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL + + +def poll_till_timeout_fail_or_success(test_url: str, tries: int, wait: int) -> StateEnum: + job_state = StateEnum.unset + while tries > 0: + sleep(wait) + response = get(url=test_url) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + job_state = response.json()["state"] + if job_state == StateEnum.success or job_state == StateEnum.failed: + break + tries -= 1 + return job_state + + +def test_ocrd_all_workflow(): + # This tests is supposed to with ocrd_all not with just core on its own + # Note: the used workflow path is volume mapped + path_to_wf = "/ocrd-data/assets/ocrd_all-test-workflow.txt" + path_to_mets = "/data/mets.xml" + + # submit the workflow job + test_url = f"{PROCESSING_SERVER_URL}/workflow/run?mets_path={path_to_mets}&page_wise=True" + response = post( + url=test_url, + headers={"accept": "application/json"}, + files={"workflow": open(path_to_wf, 'rb')} + ) + # print(response.json()) + assert response.status_code == 200, ( + f"Processing server: {test_url}, {response.status_code}. " + f"Response text: {response.text}" + ) + wf_job_id = response.json()["job_id"] + assert wf_job_id + + job_state = poll_till_timeout_fail_or_success( + test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}", + tries=30, + wait=10 + ) + assert job_state == StateEnum.success From 79c5b79a2f75044b7b4392abcc13ac3d74108cb8 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 12 Feb 2024 16:23:54 +0100 Subject: [PATCH 02/22] remove duplicates --- tests/network/test_ocrd_all_workflow.py | 41 +++---------- tests/network/test_processing_server.py | 76 +++++++------------------ tests/network/utils.py | 50 ++++++++++++++++ 3 files changed, 79 insertions(+), 88 deletions(-) create mode 100644 tests/network/utils.py diff --git a/tests/network/test_ocrd_all_workflow.py b/tests/network/test_ocrd_all_workflow.py index 9406703a61..2e6f843a3d 100644 --- a/tests/network/test_ocrd_all_workflow.py +++ b/tests/network/test_ocrd_all_workflow.py @@ -1,47 +1,20 @@ -from time import sleep -from requests import get, post from src.ocrd_network.models import StateEnum -from tests.base import assets from tests.network.config import test_config +from tests.network.utils import ( + poll_job_till_timeout_fail_or_success, + post_ps_workflow_request, +) PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL -def poll_till_timeout_fail_or_success(test_url: str, tries: int, wait: int) -> StateEnum: - job_state = StateEnum.unset - while tries > 0: - sleep(wait) - response = get(url=test_url) - assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" - job_state = response.json()["state"] - if job_state == StateEnum.success or job_state == StateEnum.failed: - break - tries -= 1 - return job_state - - def test_ocrd_all_workflow(): - # This tests is supposed to with ocrd_all not with just core on its own + # This test is supposed to run with ocrd_all not with just core on its own # Note: the used workflow path is volume mapped path_to_wf = "/ocrd-data/assets/ocrd_all-test-workflow.txt" path_to_mets = "/data/mets.xml" - - # submit the workflow job - test_url = f"{PROCESSING_SERVER_URL}/workflow/run?mets_path={path_to_mets}&page_wise=True" - response = post( - url=test_url, - headers={"accept": "application/json"}, - files={"workflow": open(path_to_wf, 'rb')} - ) - # print(response.json()) - assert response.status_code == 200, ( - f"Processing server: {test_url}, {response.status_code}. " - f"Response text: {response.text}" - ) - wf_job_id = response.json()["job_id"] - assert wf_job_id - - job_state = poll_till_timeout_fail_or_success( + wf_job_id = post_ps_workflow_request(PROCESSING_SERVER_URL, path_to_wf, path_to_mets) + job_state = poll_job_till_timeout_fail_or_success( test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}", tries=30, wait=10 diff --git a/tests/network/test_processing_server.py b/tests/network/test_processing_server.py index 6d039f5bc9..e8a2ae8c45 100644 --- a/tests/network/test_processing_server.py +++ b/tests/network/test_processing_server.py @@ -1,70 +1,50 @@ -from time import sleep -from requests import get, post +from requests import get from src.ocrd_network import NETWORK_AGENT_WORKER from src.ocrd_network.models import StateEnum from tests.base import assets from tests.network.config import test_config +from tests.network.utils import ( + poll_job_till_timeout_fail_or_success, + post_ps_processing_request, + post_ps_workflow_request, +) PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL -def poll_till_timeout_fail_or_success(test_url: str, tries: int, wait: int) -> StateEnum: - job_state = StateEnum.unset - while tries > 0: - sleep(wait) - response = get(url=test_url) - assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" - job_state = response.json()["state"] - if job_state == StateEnum.success or job_state == StateEnum.failed: - break - tries -= 1 - return job_state - - def test_processing_server_connectivity(): - test_url = f'{PROCESSING_SERVER_URL}/' + test_url = f"{PROCESSING_SERVER_URL}/" response = get(test_url) assert response.status_code == 200, \ - f'Processing server is not reachable on: {test_url}, {response.status_code}' - message = response.json()['message'] - assert message.startswith('The home page of'), \ - f'Processing server home page message is corrupted' + f"Processing server is not reachable on: {test_url}, {response.status_code}" + message = response.json()["message"] + assert message.startswith("The home page of"), \ + f"Processing server home page message is corrupted" # TODO: The processing workers are still not registered when deployed separately. # Fix that by extending the processing server. def test_processing_server_deployed_processors(): - test_url = f'{PROCESSING_SERVER_URL}/processor' + test_url = f"{PROCESSING_SERVER_URL}/processor" response = get(test_url) processors = response.json() assert response.status_code == 200, \ - f'Processing server: {test_url}, {response.status_code}' - assert processors == [], f'Mismatch in deployed processors' + f"Processing server: {test_url}, {response.status_code}" + assert processors == [], f"Mismatch in deployed processors" def test_processing_server_processing_request(): - path_to_mets = assets.path_to('kant_aufklaerung_1784/data/mets.xml') + path_to_mets = assets.path_to("kant_aufklaerung_1784/data/mets.xml") test_processing_job_input = { "path_to_mets": path_to_mets, - "input_file_grps": ['OCR-D-IMG'], - "output_file_grps": ['OCR-D-DUMMY'], + "input_file_grps": ["OCR-D-IMG"], + "output_file_grps": ["OCR-D-DUMMY"], "agent_type": NETWORK_AGENT_WORKER, "parameters": {} } - test_processor = 'ocrd-dummy' - test_url = f'{PROCESSING_SERVER_URL}/processor/run/{test_processor}' - response = post( - url=test_url, - headers={"accept": "application/json"}, - json=test_processing_job_input - ) - # print(response.json()) - assert response.status_code == 200, \ - f'Processing server: {test_url}, {response.status_code}' - processing_job_id = response.json()["job_id"] - assert processing_job_id - - job_state = poll_till_timeout_fail_or_success( + test_processor = "ocrd-dummy" + processing_job_id = post_ps_processing_request(PROCESSING_SERVER_URL, test_processor, test_processing_job_input) + job_state = poll_job_till_timeout_fail_or_success( test_url=f"{PROCESSING_SERVER_URL}/processor/job/{processing_job_id}", tries=10, wait=10 @@ -76,20 +56,8 @@ def test_processing_server_workflow_request(): # Note: the used workflow path is volume mapped path_to_dummy_wf = "/ocrd-data/assets/dummy-workflow.txt" path_to_mets = assets.path_to('kant_aufklaerung_1784/data/mets.xml') - - # submit the workflow job - test_url = f"{PROCESSING_SERVER_URL}/workflow/run?mets_path={path_to_mets}&page_wise=True" - response = post( - url=test_url, - headers={"accept": "application/json"}, - files={"workflow": open(path_to_dummy_wf, 'rb')} - ) - # print(response.json()) - assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" - wf_job_id = response.json()["job_id"] - assert wf_job_id - - job_state = poll_till_timeout_fail_or_success( + wf_job_id = post_ps_workflow_request(PROCESSING_SERVER_URL, path_to_dummy_wf, path_to_mets) + job_state = poll_job_till_timeout_fail_or_success( test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}", tries=30, wait=10 diff --git a/tests/network/utils.py b/tests/network/utils.py new file mode 100644 index 0000000000..3c1cd0938c --- /dev/null +++ b/tests/network/utils.py @@ -0,0 +1,50 @@ +from requests import get, post +from time import sleep +from src.ocrd_network.models import StateEnum + + +def poll_job_till_timeout_fail_or_success( + test_url: str, + tries: int = 10, + wait: int = 10 +) -> StateEnum: + job_state = StateEnum.unset + while tries > 0: + sleep(wait) + response = get(url=test_url) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + job_state = response.json()["state"] + if job_state == StateEnum.success or job_state == StateEnum.failed: + break + tries -= 1 + return job_state + + +def post_ps_processing_request(ps_server_host: str, test_processor: str, test_job_input: dict) -> str: + test_url = f"{ps_server_host}/processor/run/{test_processor}" + response = post( + url=test_url, + headers={"accept": "application/json"}, + json=test_job_input + ) + # print(response.json()) + assert response.status_code == 200, \ + f"Processing server: {test_url}, {response.status_code}" + processing_job_id = response.json()["job_id"] + assert processing_job_id + return processing_job_id + + +# TODO: Can be extended to include other parameters such as page_wise +def post_ps_workflow_request(ps_server_host: str, path_to_test_wf: str, path_to_test_mets: str) -> str: + test_url = f"{ps_server_host}/workflow/run?mets_path={path_to_test_mets}&page_wise=True" + response = post( + url=test_url, + headers={"accept": "application/json"}, + files={"workflow": open(path_to_test_wf, "rb")} + ) + # print(response.json()) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + wf_job_id = response.json()["job_id"] + assert wf_job_id + return wf_job_id From 3d501b56c2893348b9a2b74fc6cb0b66c0f59b03 Mon Sep 17 00:00:00 2001 From: joschrew Date: Mon, 12 Feb 2024 16:46:26 +0100 Subject: [PATCH 03/22] Make make assets in Dockerfile skipable --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 95130fdd4a..f3b2c92d11 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,9 +44,11 @@ WORKDIR /data CMD ["/usr/local/bin/ocrd", "--help"] FROM ocrd_core_base as ocrd_core_test +# Optionally skip make assets with this arg +ARG SKIP_ASSETS WORKDIR /build-ocrd COPY Makefile . -RUN make assets +RUN if test -z "$SKIP_ASSETS" || test $SKIP_ASSETS -eq 0 ; then make assets ; fi COPY tests ./tests COPY .gitmodules . COPY requirements_test.txt . From 7f77b57a25d166469fa60ca015b8790e1bf7c8d4 Mon Sep 17 00:00:00 2001 From: joschrew <91774427+joschrew@users.noreply.github.com> Date: Wed, 13 Mar 2024 11:36:50 +0100 Subject: [PATCH 04/22] Add a test for workflow run in ocrd_all --- tests/network/test_ocrd_all_workflow.py | 49 +++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/network/test_ocrd_all_workflow.py diff --git a/tests/network/test_ocrd_all_workflow.py b/tests/network/test_ocrd_all_workflow.py new file mode 100644 index 0000000000..9406703a61 --- /dev/null +++ b/tests/network/test_ocrd_all_workflow.py @@ -0,0 +1,49 @@ +from time import sleep +from requests import get, post +from src.ocrd_network.models import StateEnum +from tests.base import assets +from tests.network.config import test_config + +PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL + + +def poll_till_timeout_fail_or_success(test_url: str, tries: int, wait: int) -> StateEnum: + job_state = StateEnum.unset + while tries > 0: + sleep(wait) + response = get(url=test_url) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + job_state = response.json()["state"] + if job_state == StateEnum.success or job_state == StateEnum.failed: + break + tries -= 1 + return job_state + + +def test_ocrd_all_workflow(): + # This tests is supposed to with ocrd_all not with just core on its own + # Note: the used workflow path is volume mapped + path_to_wf = "/ocrd-data/assets/ocrd_all-test-workflow.txt" + path_to_mets = "/data/mets.xml" + + # submit the workflow job + test_url = f"{PROCESSING_SERVER_URL}/workflow/run?mets_path={path_to_mets}&page_wise=True" + response = post( + url=test_url, + headers={"accept": "application/json"}, + files={"workflow": open(path_to_wf, 'rb')} + ) + # print(response.json()) + assert response.status_code == 200, ( + f"Processing server: {test_url}, {response.status_code}. " + f"Response text: {response.text}" + ) + wf_job_id = response.json()["job_id"] + assert wf_job_id + + job_state = poll_till_timeout_fail_or_success( + test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}", + tries=30, + wait=10 + ) + assert job_state == StateEnum.success From 9cdb2224e4f9b8043fe7eebdae7bfed942762ab9 Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Mon, 12 Feb 2024 16:23:54 +0100 Subject: [PATCH 05/22] remove duplicates --- tests/network/test_ocrd_all_workflow.py | 41 +++---------- tests/network/test_processing_server.py | 76 +++++++------------------ tests/network/utils.py | 50 ++++++++++++++++ 3 files changed, 79 insertions(+), 88 deletions(-) create mode 100644 tests/network/utils.py diff --git a/tests/network/test_ocrd_all_workflow.py b/tests/network/test_ocrd_all_workflow.py index 9406703a61..2e6f843a3d 100644 --- a/tests/network/test_ocrd_all_workflow.py +++ b/tests/network/test_ocrd_all_workflow.py @@ -1,47 +1,20 @@ -from time import sleep -from requests import get, post from src.ocrd_network.models import StateEnum -from tests.base import assets from tests.network.config import test_config +from tests.network.utils import ( + poll_job_till_timeout_fail_or_success, + post_ps_workflow_request, +) PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL -def poll_till_timeout_fail_or_success(test_url: str, tries: int, wait: int) -> StateEnum: - job_state = StateEnum.unset - while tries > 0: - sleep(wait) - response = get(url=test_url) - assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" - job_state = response.json()["state"] - if job_state == StateEnum.success or job_state == StateEnum.failed: - break - tries -= 1 - return job_state - - def test_ocrd_all_workflow(): - # This tests is supposed to with ocrd_all not with just core on its own + # This test is supposed to run with ocrd_all not with just core on its own # Note: the used workflow path is volume mapped path_to_wf = "/ocrd-data/assets/ocrd_all-test-workflow.txt" path_to_mets = "/data/mets.xml" - - # submit the workflow job - test_url = f"{PROCESSING_SERVER_URL}/workflow/run?mets_path={path_to_mets}&page_wise=True" - response = post( - url=test_url, - headers={"accept": "application/json"}, - files={"workflow": open(path_to_wf, 'rb')} - ) - # print(response.json()) - assert response.status_code == 200, ( - f"Processing server: {test_url}, {response.status_code}. " - f"Response text: {response.text}" - ) - wf_job_id = response.json()["job_id"] - assert wf_job_id - - job_state = poll_till_timeout_fail_or_success( + wf_job_id = post_ps_workflow_request(PROCESSING_SERVER_URL, path_to_wf, path_to_mets) + job_state = poll_job_till_timeout_fail_or_success( test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}", tries=30, wait=10 diff --git a/tests/network/test_processing_server.py b/tests/network/test_processing_server.py index 6d039f5bc9..e8a2ae8c45 100644 --- a/tests/network/test_processing_server.py +++ b/tests/network/test_processing_server.py @@ -1,70 +1,50 @@ -from time import sleep -from requests import get, post +from requests import get from src.ocrd_network import NETWORK_AGENT_WORKER from src.ocrd_network.models import StateEnum from tests.base import assets from tests.network.config import test_config +from tests.network.utils import ( + poll_job_till_timeout_fail_or_success, + post_ps_processing_request, + post_ps_workflow_request, +) PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL -def poll_till_timeout_fail_or_success(test_url: str, tries: int, wait: int) -> StateEnum: - job_state = StateEnum.unset - while tries > 0: - sleep(wait) - response = get(url=test_url) - assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" - job_state = response.json()["state"] - if job_state == StateEnum.success or job_state == StateEnum.failed: - break - tries -= 1 - return job_state - - def test_processing_server_connectivity(): - test_url = f'{PROCESSING_SERVER_URL}/' + test_url = f"{PROCESSING_SERVER_URL}/" response = get(test_url) assert response.status_code == 200, \ - f'Processing server is not reachable on: {test_url}, {response.status_code}' - message = response.json()['message'] - assert message.startswith('The home page of'), \ - f'Processing server home page message is corrupted' + f"Processing server is not reachable on: {test_url}, {response.status_code}" + message = response.json()["message"] + assert message.startswith("The home page of"), \ + f"Processing server home page message is corrupted" # TODO: The processing workers are still not registered when deployed separately. # Fix that by extending the processing server. def test_processing_server_deployed_processors(): - test_url = f'{PROCESSING_SERVER_URL}/processor' + test_url = f"{PROCESSING_SERVER_URL}/processor" response = get(test_url) processors = response.json() assert response.status_code == 200, \ - f'Processing server: {test_url}, {response.status_code}' - assert processors == [], f'Mismatch in deployed processors' + f"Processing server: {test_url}, {response.status_code}" + assert processors == [], f"Mismatch in deployed processors" def test_processing_server_processing_request(): - path_to_mets = assets.path_to('kant_aufklaerung_1784/data/mets.xml') + path_to_mets = assets.path_to("kant_aufklaerung_1784/data/mets.xml") test_processing_job_input = { "path_to_mets": path_to_mets, - "input_file_grps": ['OCR-D-IMG'], - "output_file_grps": ['OCR-D-DUMMY'], + "input_file_grps": ["OCR-D-IMG"], + "output_file_grps": ["OCR-D-DUMMY"], "agent_type": NETWORK_AGENT_WORKER, "parameters": {} } - test_processor = 'ocrd-dummy' - test_url = f'{PROCESSING_SERVER_URL}/processor/run/{test_processor}' - response = post( - url=test_url, - headers={"accept": "application/json"}, - json=test_processing_job_input - ) - # print(response.json()) - assert response.status_code == 200, \ - f'Processing server: {test_url}, {response.status_code}' - processing_job_id = response.json()["job_id"] - assert processing_job_id - - job_state = poll_till_timeout_fail_or_success( + test_processor = "ocrd-dummy" + processing_job_id = post_ps_processing_request(PROCESSING_SERVER_URL, test_processor, test_processing_job_input) + job_state = poll_job_till_timeout_fail_or_success( test_url=f"{PROCESSING_SERVER_URL}/processor/job/{processing_job_id}", tries=10, wait=10 @@ -76,20 +56,8 @@ def test_processing_server_workflow_request(): # Note: the used workflow path is volume mapped path_to_dummy_wf = "/ocrd-data/assets/dummy-workflow.txt" path_to_mets = assets.path_to('kant_aufklaerung_1784/data/mets.xml') - - # submit the workflow job - test_url = f"{PROCESSING_SERVER_URL}/workflow/run?mets_path={path_to_mets}&page_wise=True" - response = post( - url=test_url, - headers={"accept": "application/json"}, - files={"workflow": open(path_to_dummy_wf, 'rb')} - ) - # print(response.json()) - assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" - wf_job_id = response.json()["job_id"] - assert wf_job_id - - job_state = poll_till_timeout_fail_or_success( + wf_job_id = post_ps_workflow_request(PROCESSING_SERVER_URL, path_to_dummy_wf, path_to_mets) + job_state = poll_job_till_timeout_fail_or_success( test_url=f"{PROCESSING_SERVER_URL}/workflow/job-simple/{wf_job_id}", tries=30, wait=10 diff --git a/tests/network/utils.py b/tests/network/utils.py new file mode 100644 index 0000000000..3c1cd0938c --- /dev/null +++ b/tests/network/utils.py @@ -0,0 +1,50 @@ +from requests import get, post +from time import sleep +from src.ocrd_network.models import StateEnum + + +def poll_job_till_timeout_fail_or_success( + test_url: str, + tries: int = 10, + wait: int = 10 +) -> StateEnum: + job_state = StateEnum.unset + while tries > 0: + sleep(wait) + response = get(url=test_url) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + job_state = response.json()["state"] + if job_state == StateEnum.success or job_state == StateEnum.failed: + break + tries -= 1 + return job_state + + +def post_ps_processing_request(ps_server_host: str, test_processor: str, test_job_input: dict) -> str: + test_url = f"{ps_server_host}/processor/run/{test_processor}" + response = post( + url=test_url, + headers={"accept": "application/json"}, + json=test_job_input + ) + # print(response.json()) + assert response.status_code == 200, \ + f"Processing server: {test_url}, {response.status_code}" + processing_job_id = response.json()["job_id"] + assert processing_job_id + return processing_job_id + + +# TODO: Can be extended to include other parameters such as page_wise +def post_ps_workflow_request(ps_server_host: str, path_to_test_wf: str, path_to_test_mets: str) -> str: + test_url = f"{ps_server_host}/workflow/run?mets_path={path_to_test_mets}&page_wise=True" + response = post( + url=test_url, + headers={"accept": "application/json"}, + files={"workflow": open(path_to_test_wf, "rb")} + ) + # print(response.json()) + assert response.status_code == 200, f"Processing server: {test_url}, {response.status_code}" + wf_job_id = response.json()["job_id"] + assert wf_job_id + return wf_job_id From 419a535455d8e492dba85e094d76f9897b752f02 Mon Sep 17 00:00:00 2001 From: joschrew <91774427+joschrew@users.noreply.github.com> Date: Wed, 13 Mar 2024 11:37:14 +0100 Subject: [PATCH 06/22] Make make assets in Dockerfile skipable --- Dockerfile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 95130fdd4a..f3b2c92d11 100644 --- a/Dockerfile +++ b/Dockerfile @@ -44,9 +44,11 @@ WORKDIR /data CMD ["/usr/local/bin/ocrd", "--help"] FROM ocrd_core_base as ocrd_core_test +# Optionally skip make assets with this arg +ARG SKIP_ASSETS WORKDIR /build-ocrd COPY Makefile . -RUN make assets +RUN if test -z "$SKIP_ASSETS" || test $SKIP_ASSETS -eq 0 ; then make assets ; fi COPY tests ./tests COPY .gitmodules . COPY requirements_test.txt . From dfd78d5cd4b3cc3dda228d24d8dcc28f4494d2ed Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Thu, 11 Apr 2024 16:13:29 +0200 Subject: [PATCH 07/22] make ocrd all tests callable from Makefile --- Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Makefile b/Makefile index 5b83d1a173..36aa88687d 100644 --- a/Makefile +++ b/Makefile @@ -248,6 +248,16 @@ network-integration-test-cicd: $(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans +network-integration-test-ocrd-all: + $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d + -$(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_ocrd_all_' -v + $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans + +network-integration-test-ocrd-all-cicd: + $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d + $(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_ocrd_all_' -v + $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans + benchmark: $(PYTHON) -m pytest $(TESTDIR)/model/test_ocrd_mets_bench.py From 14576cffb2fe92cd194ed882d6688916f9a766a3 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Wed, 10 Apr 2024 14:27:39 +0200 Subject: [PATCH 08/22] update actions and add python 3.12 --- .github/workflows/network-testing.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/network-testing.yml b/.github/workflows/network-testing.yml index 4948ff0bb4..12c8fe4fde 100644 --- a/.github/workflows/network-testing.yml +++ b/.github/workflows/network-testing.yml @@ -20,17 +20,18 @@ jobs: - '3.9' - '3.10' - '3.11' + - '3.12' os: - ubuntu-22.04 # - macos-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Homebrew id: set-up-homebrew uses: Homebrew/actions/setup-homebrew@master - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies From 34459e0e035199b089f993bdf2cf2caf924d26ed Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Wed, 10 Apr 2024 14:28:27 +0200 Subject: [PATCH 09/22] update actions and add python 3.12 --- .github/workflows/unit-test.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/unit-test.yml b/.github/workflows/unit-test.yml index 4b0cd5145a..2b8e3d5b82 100644 --- a/.github/workflows/unit-test.yml +++ b/.github/workflows/unit-test.yml @@ -22,18 +22,19 @@ jobs: - '3.9' - '3.10' - '3.11' + - '3.12' os: - ubuntu-22.04 - ubuntu-20.04 - # - macos-latest + - macos-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Homebrew id: set-up-homebrew uses: Homebrew/actions/setup-homebrew@master - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies From 7d119aa8f14c5c7369a4ecc23c85e2f1120ea760 Mon Sep 17 00:00:00 2001 From: Klaus Rettinghaus Date: Wed, 10 Apr 2024 14:28:44 +0200 Subject: [PATCH 10/22] update actions --- .github/workflows/docker-image.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index a02b8c9bed..62120fb2b1 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -19,10 +19,10 @@ jobs: contents: read steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - # Activate cache export feature to reduce build time of images name: Set up Docker Buildx - uses: docker/setup-buildx-action@v2 + uses: docker/setup-buildx-action@v3 - name: Build the Docker image # default tag uses docker.io, so override on command-line run: make docker DOCKER_TAG=${{ env.GHCRIO_DOCKER_TAG }} @@ -34,13 +34,13 @@ jobs: docker run --rm ${{ env.GHCRIO_DOCKER_TAG }} ocrd --version docker run --rm ${{ env.GHCRIO_DOCKER_TAG }}-cuda ocrd --version - name: Login to GitHub Container Registry - uses: docker/login-action@v2 + uses: docker/login-action@v3 with: registry: ghcr.io username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - name: Log in to Docker Hub - uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a + uses: docker/login-action@v3 with: username: ${{ secrets.DOCKERIO_USERNAME }} password: ${{ secrets.DOCKERIO_PASSWORD }} From 2a7ef7b49b8c94efc8e13df77f047e95f62a515d Mon Sep 17 00:00:00 2001 From: joschrew <91774427+joschrew@users.noreply.github.com> Date: Tue, 16 Apr 2024 15:30:02 +0200 Subject: [PATCH 11/22] Remove ocrd_all-tests from core makefile The will be placed in ocrd_all --- Makefile | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index 36aa88687d..781c5277ae 100644 --- a/Makefile +++ b/Makefile @@ -140,7 +140,7 @@ install: #build $(PIP) config set global.no-binary shapely # Install with pip install -e -install-dev: PIP_INSTALL = $(PIP) install -e +install-dev: PIP_INSTALL = $(PIP) install -e install-dev: PIP_INSTALL_CONFIG_OPTION = --config-settings editable_mode=strict install-dev: uninstall $(MAKE) install @@ -248,16 +248,6 @@ network-integration-test-cicd: $(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans -network-integration-test-ocrd-all: - $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d - -$(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_ocrd_all_' -v - $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans - -network-integration-test-ocrd-all-cicd: - $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d - $(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_ocrd_all_' -v - $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans - benchmark: $(PYTHON) -m pytest $(TESTDIR)/model/test_ocrd_mets_bench.py @@ -325,7 +315,7 @@ pyclean: .PHONY: docker docker-cuda # Additional arguments to docker build. Default: '$(DOCKER_ARGS)' -DOCKER_ARGS = +DOCKER_ARGS = # Build docker image docker: DOCKER_BASE_IMAGE = ubuntu:20.04 @@ -338,7 +328,7 @@ docker-cuda: DOCKER_FILE = Dockerfile.cuda docker-cuda: docker -docker docker-cuda: +docker docker-cuda: docker build --progress=plain -f $(DOCKER_FILE) -t $(DOCKER_TAG) --target ocrd_core_base --build-arg BASE_IMAGE=$(DOCKER_BASE_IMAGE) $(DOCKER_ARGS) . # Build wheels and source dist and twine upload them From 3effd63bf24eeaa6254a952136fa1e430ccfb8cb Mon Sep 17 00:00:00 2001 From: kba Date: Tue, 16 Apr 2024 16:02:33 +0200 Subject: [PATCH 12/22] ci: disable scrutinizer build --- .scrutinizer.yml | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/.scrutinizer.yml b/.scrutinizer.yml index 446df627e2..596c38af7b 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -1,27 +1,23 @@ checks: - python: true + python: true build: - image: default-bionic - nodes: - analysis: - dependencies: - override: - - sudo make deps-ubuntu - - make install - tests: - override: - - py-scrutinizer-run - - - command: pylint-run - use_website_config: false - tests: - tests: - override: - - true + nodes: + analysis: + tests: + override: + - py-scrutinizer-run + - + command: pylint-run + use_website_config: true + + tests: + tests: + override: + - true filter: - excluded_paths: - - 'tests/*' - - 'ocrd_models/ocrd_models/ocrd_page_generateds.py' - dependency_paths: - - 'lib/*' + excluded_paths: + - '*/test/*' + - 'core-models/ocrd_models/model/ocrd_page_generateds.py' + dependency_paths: + - 'lib/*' From 8dae53da1df43fe822d2178a9b250759edbf0539 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Apr 2024 18:35:05 +0200 Subject: [PATCH 13/22] bashlib input-files: apply download_file on each input_file --- src/ocrd/cli/bashlib.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ocrd/cli/bashlib.py b/src/ocrd/cli/bashlib.py index 5746151c72..405875baf1 100644 --- a/src/ocrd/cli/bashlib.py +++ b/src/ocrd/cli/bashlib.py @@ -114,6 +114,10 @@ def bashlib_input_files(**kwargs): input_file_grp=kwargs['input_file_grp'], output_file_grp=kwargs['output_file_grp']) for input_files in processor.zip_input_files(mimetype=None, on_error='abort'): + # ensure all input files exist locally (without persisting them in the METS) + # - this mimics the default behaviour of all Pythonic processors + input_files = [workspace.download_file(input_file) + for input_file in input_files] for field in ['url', 'local_filename', 'ID', 'mimetype', 'pageId']: # make this bash-friendly (show initialization for associative array) if len(input_files) > 1: From 0195099a6c6335c3b774aa610b9c45c1f233f51a Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Apr 2024 18:37:25 +0200 Subject: [PATCH 14/22] bashlib input-files: let None pass through --- src/ocrd/cli/bashlib.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ocrd/cli/bashlib.py b/src/ocrd/cli/bashlib.py index 405875baf1..1def4638c7 100644 --- a/src/ocrd/cli/bashlib.py +++ b/src/ocrd/cli/bashlib.py @@ -116,7 +116,7 @@ def bashlib_input_files(**kwargs): for input_files in processor.zip_input_files(mimetype=None, on_error='abort'): # ensure all input files exist locally (without persisting them in the METS) # - this mimics the default behaviour of all Pythonic processors - input_files = [workspace.download_file(input_file) + input_files = [workspace.download_file(input_file) if input_file else None for input_file in input_files] for field in ['url', 'local_filename', 'ID', 'mimetype', 'pageId']: # make this bash-friendly (show initialization for associative array) From feee374f2ffedc0657be3fa2acd61eff1461caee Mon Sep 17 00:00:00 2001 From: Robert Sachunsky Date: Thu, 25 Apr 2024 18:52:21 +0200 Subject: [PATCH 15/22] scrutinizer: try to fix py version --- .scrutinizer.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.scrutinizer.yml b/.scrutinizer.yml index 446df627e2..4848dca46a 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -3,6 +3,10 @@ checks: build: image: default-bionic + environment: + python: + version: 3.8.2 + virtualenv: true nodes: analysis: dependencies: From 48d52e39418aec4679fa0aee09d1765fb4e9d6b7 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 3 May 2024 16:16:34 +0200 Subject: [PATCH 16/22] :memo: changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5c19bb1d4..0682192d89 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,10 @@ Versioned according to [Semantic Versioning](http://semver.org/). ## Unreleased +Fixed: + + - bashlib processors will download on-demand, like pythonic processors do, #1216, #1217 + ## [2.64.1] - 2024-04-22 Fixed: From c8f41a54934fb271a9719c9e6c9ad2195cc65351 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 3 May 2024 17:20:43 +0200 Subject: [PATCH 17/22] drop distutils, support python 3.12 --- src/ocrd/cli/resmgr.py | 2 +- src/ocrd/task_sequence.py | 3 +-- src/ocrd/workspace_bagger.py | 5 ++--- src/ocrd_models/ocrd_exif.py | 2 +- src/ocrd_utils/os.py | 4 ++-- tests/data/wf_testcase.py | 2 +- 6 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/ocrd/cli/resmgr.py b/src/ocrd/cli/resmgr.py index 2e744a8958..1289e498e1 100644 --- a/src/ocrd/cli/resmgr.py +++ b/src/ocrd/cli/resmgr.py @@ -7,7 +7,7 @@ """ import sys from pathlib import Path -from distutils.spawn import find_executable as which +from shutil import which from yaml import safe_load, safe_dump import requests diff --git a/src/ocrd/task_sequence.py b/src/ocrd/task_sequence.py index 6d38601867..da691fbc1d 100644 --- a/src/ocrd/task_sequence.py +++ b/src/ocrd/task_sequence.py @@ -1,7 +1,6 @@ import json from shlex import split as shlex_split -from distutils.spawn import find_executable as which # pylint: disable=import-error,no-name-in-module -from subprocess import run, PIPE +from shutil import which from ocrd_utils import getLogger, parse_json_string_or_file, set_json_key_value_overrides, get_ocrd_tool_json # from collections import Counter diff --git a/src/ocrd/workspace_bagger.py b/src/ocrd/workspace_bagger.py index b67224ddd2..a30dbfb02a 100644 --- a/src/ocrd/workspace_bagger.py +++ b/src/ocrd/workspace_bagger.py @@ -2,13 +2,12 @@ from os import makedirs, chdir, walk from os.path import join, isdir, basename as os_path_basename, exists, relpath from pathlib import Path -from shutil import make_archive, rmtree, copyfile, move +from shutil import make_archive, rmtree, copyfile, move, copytree from tempfile import mkdtemp, TemporaryDirectory import re import tempfile import sys from bagit import Bag, make_manifests, _load_tag_file, _make_tag_file, _make_tagmanifest_file # pylint: disable=no-name-in-module -from distutils.dir_util import copy_tree from ocrd_utils import ( pushd_popd, @@ -298,7 +297,7 @@ def recreate_checksums(self, src, dest=None, overwrite=False): raise FileNotFoundError(f"data directory of bag not found at {src}") if not overwrite: path_to_bag.mkdir(parents=True, exist_ok=True) - copy_tree(src, dest) + copytree(src, dest, dirs_exist_ok=True) with pushd_popd(path_to_bag): n_bytes, n_files = make_manifests("data", 1, ["sha512"]) diff --git a/src/ocrd_models/ocrd_exif.py b/src/ocrd_models/ocrd_exif.py index 598c499eff..406e60a85a 100644 --- a/src/ocrd_models/ocrd_exif.py +++ b/src/ocrd_models/ocrd_exif.py @@ -5,7 +5,7 @@ from math import sqrt from io import BytesIO from subprocess import run, PIPE -from distutils.spawn import find_executable as which +from shutil import which from ocrd_utils import getLogger class OcrdExif(): diff --git a/src/ocrd_utils/os.py b/src/ocrd_utils/os.py index 47c4b510f4..1b3ab4e73d 100644 --- a/src/ocrd_utils/os.py +++ b/src/ocrd_utils/os.py @@ -18,12 +18,12 @@ from tempfile import TemporaryDirectory, gettempdir from functools import lru_cache from contextlib import contextmanager, redirect_stderr, redirect_stdout -from distutils.spawn import find_executable as which +from shutil import which from json import loads from json.decoder import JSONDecodeError from os import getcwd, chdir, stat, chmod, umask, environ from pathlib import Path -from os.path import exists, abspath as abspath_, join, isdir +from os.path import abspath as abspath_, join from zipfile import ZipFile from subprocess import run, PIPE from mimetypes import guess_type as mimetypes_guess diff --git a/tests/data/wf_testcase.py b/tests/data/wf_testcase.py index 2ea97ed212..0ed6ac9b7e 100644 --- a/tests/data/wf_testcase.py +++ b/tests/data/wf_testcase.py @@ -61,7 +61,7 @@ def setUp(self): p.chmod(0o777) os.environ['PATH'] = os.pathsep.join([self.tempdir, os.environ['PATH']]) - # from distutils.spawn import find_executable as which # pylint: disable=import-error,no-name-in-module + # from shutil import which # pylint: disable=import-error,no-name-in-module # self.assertTrue(which('ocrd-sample-processor')) From b788b59d77ab6739b0632336ca382a5f041c1730 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 3 May 2024 17:39:17 +0200 Subject: [PATCH 18/22] :memo: changelog --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0682192d89..76734389d0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ Fixed: - bashlib processors will download on-demand, like pythonic processors do, #1216, #1217 +Changed: + + - Replace `distutils` which equivalents from `shutil` for compatibility with python 3.12+, #1219 + ## [2.64.1] - 2024-04-22 Fixed: From cf4664ad3bed946cfe237ef8d5e0254a16cedb8f Mon Sep 17 00:00:00 2001 From: Mehmed Mustafa Date: Fri, 3 May 2024 17:52:25 +0200 Subject: [PATCH 19/22] disable ocrd all test in core --- .../{test_ocrd_all_workflow.py => test_integration_ocrd_all.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename tests/network/{test_ocrd_all_workflow.py => test_integration_ocrd_all.py} (96%) diff --git a/tests/network/test_ocrd_all_workflow.py b/tests/network/test_integration_ocrd_all.py similarity index 96% rename from tests/network/test_ocrd_all_workflow.py rename to tests/network/test_integration_ocrd_all.py index d54d9f2fd5..eacc0df9cf 100644 --- a/tests/network/test_ocrd_all_workflow.py +++ b/tests/network/test_integration_ocrd_all.py @@ -5,7 +5,7 @@ PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL -def test_ocrd_all_workflow(): +def _test_ocrd_all_workflow(): # This test is supposed to run with ocrd_all not with just core on its own # Note: the used workflow path is volume mapped path_to_wf = "/ocrd-data/assets/ocrd_all-test-workflow.txt" From e88d6460ae53421a32684055a5362e8788b76cbb Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 3 May 2024 17:54:01 +0200 Subject: [PATCH 20/22] :memo: changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 76734389d0..c83143d240 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,8 @@ Fixed: Changed: - Replace `distutils` which equivalents from `shutil` for compatibility with python 3.12+, #1219 + - CI: Updated GitHub actions, #1206 + - CI: Fixed scrutinizer, #1217 ## [2.64.1] - 2024-04-22 From 6ecbaa84db9d6339477c70379522b1e6c31b86d3 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 3 May 2024 18:21:10 +0200 Subject: [PATCH 21/22] make network-integration-test: disable ocrd_all test --- Makefile | 4 ++-- tests/network/test_integration_ocrd_all.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 781c5277ae..5046d94987 100644 --- a/Makefile +++ b/Makefile @@ -240,12 +240,12 @@ network-module-test: assets INTEGRATION_TEST_IN_DOCKER = docker exec core_test network-integration-test: $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d - -$(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v + -$(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v --ignore-glob="$(TESTDIR)/network/*ocrd_all*.py" $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans network-integration-test-cicd: $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d - $(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v + $(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v --ignore-glob="$(TESTDIR)/network/*ocrd_all*.py" $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans benchmark: diff --git a/tests/network/test_integration_ocrd_all.py b/tests/network/test_integration_ocrd_all.py index eacc0df9cf..d54d9f2fd5 100644 --- a/tests/network/test_integration_ocrd_all.py +++ b/tests/network/test_integration_ocrd_all.py @@ -5,7 +5,7 @@ PROCESSING_SERVER_URL = test_config.PROCESSING_SERVER_URL -def _test_ocrd_all_workflow(): +def test_ocrd_all_workflow(): # This test is supposed to run with ocrd_all not with just core on its own # Note: the used workflow path is volume mapped path_to_wf = "/ocrd-data/assets/ocrd_all-test-workflow.txt" From 1bd8fc441934fcaca244bb9f1e893a30830ba11d Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 3 May 2024 18:32:35 +0200 Subject: [PATCH 22/22] ci: fix integration test --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5046d94987..b4c4cb8a86 100644 --- a/Makefile +++ b/Makefile @@ -245,7 +245,7 @@ network-integration-test: network-integration-test-cicd: $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml up -d - $(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v --ignore-glob="$(TESTDIR)/network/*ocrd_all*.py" + $(INTEGRATION_TEST_IN_DOCKER) pytest -k 'test_integration_' -v --ignore-glob="tests/network/*ocrd_all*.py" $(DOCKER_COMPOSE) --file tests/network/docker-compose.yml down --remove-orphans benchmark: