From f2797604914e801b1a6e9a5bb457e0421f0a5a25 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:02:23 +0100 Subject: [PATCH 01/14] download models: processors must be installed --- Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 0772f59..caa5024 100755 --- a/Makefile +++ b/Makefile @@ -67,10 +67,10 @@ ocrd_anybaseocr/pix2pixhd: # Download sample model TODO Add other models here .PHONY: models models: - ocrd resmgr download --allow-uninstalled --location cwd ocrd-anybaseocr-dewarp '*' - ocrd resmgr download --allow-uninstalled --location cwd ocrd-anybaseocr-block-segmentation '*' - ocrd resmgr download --allow-uninstalled --location cwd ocrd-anybaseocr-layout-analysis '*' - ocrd resmgr download --allow-uninstalled --location cwd ocrd-anybaseocr-tiseg '*' + ocrd resmgr download --location cwd ocrd-anybaseocr-dewarp '*' + ocrd resmgr download --location cwd ocrd-anybaseocr-block-segmentation '*' + ocrd resmgr download --location cwd ocrd-anybaseocr-layout-analysis '*' + ocrd resmgr download --location cwd ocrd-anybaseocr-tiseg '*' .PHONY: docker docker: From ed381c04e27f86ef1eebdd5e32ba23e81df23d3d Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:17:38 +0100 Subject: [PATCH 02/14] CI: no custom venv, models require install --- .circleci/config.yml | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 05f840f..6b6e344 100755 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -13,33 +13,32 @@ jobs: working_directory: '~/repo' steps: - checkout - - restore_cache: - keys: - - v1-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "requirements.test.txt" }} - run: name: install dependencies command: | - python3 -m venv venv - . venv/bin/activate pip install --upgrade pip pip install -r requirements.test.txt pip install -r requirements.txt sudo apt-get update -y sudo apt-get install -y imagemagick - make models + pip install . + - restore_cache: + keys: + # ocrd-resources depends on the model files registered under ocrd_anybaseocr/ocrd-tool.json + - v1-models-{{ checksum "ocrd_anybaseocr/ocrd-tool.json" }} + - run: + name: download models + command: make models - save_cache: paths: - - ./venv - - ./ocrd-resources - # ocrd-resources depends on the model files registered under core/ocrd-tool json - # but let's assume these won't change without changing requirements.txt for now - key: v1-dependencies-{{ checksum "requirements.txt" }}-{{ checksum "requirements.test.txt" }} + - . + key: v1-models-{{ checksum "ocrd_anybaseocr/ocrd-tool.json" }} - run: name: run unit tests - command: . venv/bin/activate && make test + command: make test - run: name: run CLI tests - command: . venv/bin/activate && pip install . && make cli-test + command: make cli-test - store_artifacts: path: test-reports destination: test-reports From 9096f4467ea9e3d7e79a709d89eef1555d5ff240 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:17:45 +0100 Subject: [PATCH 03/14] test_smoke.py: remove block segmentation here, too --- tests/test_smoke.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index be7b495..aa83b6b 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -3,7 +3,6 @@ from .base import main, CapturingTestCase as TestCase from ocrd_anybaseocr.cli.ocrd_anybaseocr_binarize import cli as OcrdAnybaseocrBinarizer -from ocrd_anybaseocr.cli.ocrd_anybaseocr_block_segmentation import cli as OcrdAnybaseocrBlockSegmenter from ocrd_anybaseocr.cli.ocrd_anybaseocr_cropping import cli as OcrdAnybaseocrCropper from ocrd_anybaseocr.cli.ocrd_anybaseocr_deskew import cli as OcrdAnybaseocrDeskewer from ocrd_anybaseocr.cli.ocrd_anybaseocr_dewarp import cli as OcrdAnybaseocrDewarper @@ -13,7 +12,6 @@ CLIS = [ OcrdAnybaseocrBinarizer, - OcrdAnybaseocrBlockSegmenter, OcrdAnybaseocrCropper, OcrdAnybaseocrDeskewer, OcrdAnybaseocrDewarper, From eeecf07ef1b649ea2e5a69ca6da16c8dd3ff28a0 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:25:52 +0100 Subject: [PATCH 04/14] models: do not attempt to download block segmentation resources --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index caa5024..656e174 100755 --- a/Makefile +++ b/Makefile @@ -68,7 +68,6 @@ ocrd_anybaseocr/pix2pixhd: .PHONY: models models: ocrd resmgr download --location cwd ocrd-anybaseocr-dewarp '*' - ocrd resmgr download --location cwd ocrd-anybaseocr-block-segmentation '*' ocrd resmgr download --location cwd ocrd-anybaseocr-layout-analysis '*' ocrd resmgr download --location cwd ocrd-anybaseocr-tiseg '*' From c0d399a01ea83c02b6b1d85af7fa524fa718ea28 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:41:19 +0100 Subject: [PATCH 05/14] dewarp: pass absolute model path --- ocrd_anybaseocr/cli/ocrd_anybaseocr_dewarp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_dewarp.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_dewarp.py index 89ea486..9e26fd5 100755 --- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_dewarp.py +++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_dewarp.py @@ -107,7 +107,7 @@ def setup(self): sys.exit(1) self.opt, self.model = prepare_options( gpu_id=self.parameter['gpu_id'], - model_path=model_path, + model_path=model_path.absolute(), resize_or_crop=self.parameter['resize_mode'], loadSize=self.parameter['resize_height'], fineSize=self.parameter['resize_width'], From 0954c1b66f1c10748412187a9f026a7b59b434a8 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:46:27 +0100 Subject: [PATCH 06/14] cli-test: deactivate tiseg, too --- Makefile | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 656e174..60fc620 100755 --- a/Makefile +++ b/Makefile @@ -69,7 +69,6 @@ ocrd_anybaseocr/pix2pixhd: models: ocrd resmgr download --location cwd ocrd-anybaseocr-dewarp '*' ocrd resmgr download --location cwd ocrd-anybaseocr-layout-analysis '*' - ocrd resmgr download --location cwd ocrd-anybaseocr-tiseg '*' .PHONY: docker docker: @@ -105,7 +104,7 @@ test: assets-clean assets # Run CLI tests .PHONY: cli-test cli-test: assets-clean assets -cli-test: test-binarize test-deskew test-crop test-tiseg test-textline test-layout-analysis test-dewarp +cli-test: test-binarize test-deskew test-crop test-textline test-layout-analysis test-dewarp # Test binarization CLI .PHONY: test-binarize @@ -134,8 +133,8 @@ test-block-segmentation: test-tiseg # Test textline segmentation CLI .PHONY: test-textline -test-textline: test-tiseg - ocrd-anybaseocr-textline -m $(TESTDATA)/mets.xml -I TISEG-TEST -O TL-TEST +test-textline: test-crop + ocrd-anybaseocr-textline -m $(TESTDATA)/mets.xml -I CROP-TEST -O TL-TEST # Test page dewarping CLI .PHONY: test-dewarp From 79850910c0a7859d493e3d4037b1720361f5ae06 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 15:55:29 +0100 Subject: [PATCH 07/14] test_smoke.py: no tiseg, too --- tests/test_smoke.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index aa83b6b..4c2d2a8 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -8,7 +8,6 @@ from ocrd_anybaseocr.cli.ocrd_anybaseocr_dewarp import cli as OcrdAnybaseocrDewarper from ocrd_anybaseocr.cli.ocrd_anybaseocr_layout_analysis import cli as OcrdAnybaseocrLayoutAnalyser from ocrd_anybaseocr.cli.ocrd_anybaseocr_textline import cli as OcrdAnybaseocrTextline -from ocrd_anybaseocr.cli.ocrd_anybaseocr_tiseg import cli as OcrdAnybaseocrTiseg CLIS = [ OcrdAnybaseocrBinarizer, @@ -17,7 +16,6 @@ OcrdAnybaseocrDewarper, OcrdAnybaseocrLayoutAnalyser, OcrdAnybaseocrTextline, - OcrdAnybaseocrTiseg ] class SmokeTest(TestCase): From 8ec81fe28af59b856e184e1a9827cac87e068f36 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:12:06 +0100 Subject: [PATCH 08/14] models: do not use CWD location anymore --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 60fc620..ec4351d 100755 --- a/Makefile +++ b/Makefile @@ -67,8 +67,8 @@ ocrd_anybaseocr/pix2pixhd: # Download sample model TODO Add other models here .PHONY: models models: - ocrd resmgr download --location cwd ocrd-anybaseocr-dewarp '*' - ocrd resmgr download --location cwd ocrd-anybaseocr-layout-analysis '*' + ocrd resmgr download ocrd-anybaseocr-dewarp '*' + ocrd resmgr download ocrd-anybaseocr-layout-analysis '*' .PHONY: docker docker: From 707493c6a17729292cf7b72d862f96a222db2a23 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:13:27 +0100 Subject: [PATCH 09/14] test_dewarp.py: adapt to resmgr controlled model location --- tests/test_dewarp.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_dewarp.py b/tests/test_dewarp.py index 49bb808..7b03ef9 100644 --- a/tests/test_dewarp.py +++ b/tests/test_dewarp.py @@ -14,7 +14,7 @@ class AnyocrDewarperTest(TestCase): def setUp(self): - self.model_path = Path(Path.cwd(), 'latest_net_G.pth') + self.model_path = 'latest_net_G.pth' self.resolver = Resolver() initLogging() From 73a2ecedfda2cd9b65d4ad747acc3335e9c19c01 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:32:27 +0100 Subject: [PATCH 10/14] CI: adapt to default resource location, no py37 --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 6b6e344..c8aba70 100755 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,7 +31,7 @@ jobs: command: make models - save_cache: paths: - - . + - ~/.local/share/ocrd-resources key: v1-models-{{ checksum "ocrd_anybaseocr/ocrd-tool.json" }} - run: name: run unit tests @@ -67,7 +67,7 @@ workflows: - test: matrix: parameters: - version: ['3.7', '3.8', '3.9', '3.10', '3.11'] + version: ['3.8', '3.9', '3.10', '3.11'] - deploy-docker: filters: branches: From cc76cb0c543060dd5aef9393a1595a59830e50bf Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:32:40 +0100 Subject: [PATCH 11/14] Docker: update base --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index a887f1f..a20bcb4 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ocrd/core-cuda:v2.63.0 AS base +FROM ocrd/core-cuda-tf2:v2.70.0 AS base ARG VCS_REF ARG BUILD_DATE LABEL \ From b39e7a9f06fba66ef7c4a15114c72b89e6a0d929 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:33:14 +0100 Subject: [PATCH 12/14] Dockerfile: fix build dir --- Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index a20bcb4..3f10c6c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,14 +7,14 @@ LABEL \ org.label-schema.vcs-url="https://github.com/OCR-D/ocrd_anybaseocr" \ org.label-schema.build-date=$BUILD_DATE -WORKDIR /build +WORKDIR /build/ocrd_anybaseocr COPY setup.py . COPY ocrd_anybaseocr/ocrd-tool.json . COPY ocrd_anybaseocr ./ocrd_anybaseocr COPY requirements.txt . COPY README.md . RUN pip install . \ - && rm -rf /build + && rm -rf /build/ocrd_anybaseocr WORKDIR /data VOLUME ["/data"] From e02bba2605005d41123c49ca645c41e377e87f3e Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:38:20 +0100 Subject: [PATCH 13/14] CI: invalidate cache --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c8aba70..5ef5c56 100755 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -25,14 +25,14 @@ jobs: - restore_cache: keys: # ocrd-resources depends on the model files registered under ocrd_anybaseocr/ocrd-tool.json - - v1-models-{{ checksum "ocrd_anybaseocr/ocrd-tool.json" }} + - v2-models-{{ checksum "ocrd_anybaseocr/ocrd-tool.json" }} - run: name: download models command: make models - save_cache: paths: - ~/.local/share/ocrd-resources - key: v1-models-{{ checksum "ocrd_anybaseocr/ocrd-tool.json" }} + key: v2-models-{{ checksum "ocrd_anybaseocr/ocrd-tool.json" }} - run: name: run unit tests command: make test From aa5bc55178ba6ccffdcc6aae6f62ba74f58696f3 Mon Sep 17 00:00:00 2001 From: Robert Sachunsky <38561704+bertsky@users.noreply.github.com> Date: Tue, 5 Nov 2024 16:59:22 +0100 Subject: [PATCH 14/14] avoid TF 2.16 / Keras 3 --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 19233b5..42695cc 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -keras +keras < 3.0 keras-preprocessing numpy >= 1.15.4 ocrd >= 2.31 @@ -10,7 +10,7 @@ scikit-image >= 0.17.2 scipy >= 1.4.1 setuptools >= 41.0.0 shapely -tensorflow +tensorflow < 2.16 torch>=1.1.0 torchvision >= 0.6.1 pix2pixhd # @ ./ocrd_anybaseocr/pix2pixhd