diff --git a/Makefile-slim b/Makefile-slim new file mode 100644 index 00000000..71bf5657 --- /dev/null +++ b/Makefile-slim @@ -0,0 +1,46 @@ +export PYTHON ?= python3 +VIRTUAL_ENV = $(CURDIR)/venv2 +BIN = $(VIRTUAL_ENV)/bin +ACTIVATE_VENV = $(BIN)/activate +OCRD_MODULES = OCRD_CIS OCRD_TESSEROCR +OCRD_CIS = ocrd-cis-ocropy-binarize ocrd-cis-ocropy-dewarp +OCRD_TESSEROCR = ocrd-tesserocr-recognize ocrd-tesserocr-segment-region +PROCESSORS = $(foreach mod,$(OCRD_MODULES),$(foreach proc,$($(mod)), $(proc) )) +DELEGATORS = $(foreach proc,$(PROCESSORS),$(BIN)/$(proc)) +OCRD_PS_PORT = 8000 + +slim-venv: docker-compose.yaml .env $(DELEGATORS) | $(VIRTUAL_ENV) + + +# create a delegator to the processing server for the processor +$(BIN)/ocrd-%: | $(VIRTUAL_ENV) + @sed -e "s/{{\s*OCRD_PS_PORT\s*}}/$(OCRD_PS_PORT)/" slim-containers-files/delegator_template.py > $@; + @chmod u+x $@ + + +$(VIRTUAL_ENV): $(ACTIVATE_VENV) + . $(ACTIVATE_VENV) && $(MAKE) -C core install + +%/bin/activate: + $(PYTHON) -m venv $(subst /bin/activate,,$@) + . $@ && pip install --upgrade pip setuptools wheel + @echo "set -a; source $(CURDIR)/.env; set +a" >> $@ + +# append the service to docker-compose for a processor +add_proc = sed -e "s/{{\s*processor_name\s*}}/$1/" -e "s/{{\s*processor_group_name\s*}}/\L$2/" \ + slim-containers-files/docker-compose.processor.template.yaml >> docker-compose.yaml; + +docker-compose.yaml: + @cat slim-containers-files/docker-compose.template.yaml > docker-compose.yaml + @$(foreach mod,$(OCRD_MODULES),$(foreach proc,$($(mod)),$(call add_proc,$(proc),$(mod)))) + +.env: + @echo OCRD_PS_PORT=$(OCRD_PS_PORT) >> .env + @echo OCRD_PS_MTU=1300 >> .env + @echo MONGODB_USER=admin >> .env + @echo MONGODB_PASS=admin >> .env + @echo 'MONGODB_URL=mongodb://$${MONGODB_USER}:$${MONGODB_PASS}@ocrd-mongodb:27017' >> .env + @echo RABBITMQ_USER=admin >> .env + @echo RABBITMQ_PASS=admin >> .env + @echo 'RABBITMQ_URL=amqp://$${RABBITMQ_USER}:$${RABBITMQ_PASS}@ocrd-rabbitmq:5672' >> .env + diff --git a/core b/core index 552cfcd7..ed55b894 160000 --- a/core +++ b/core @@ -1 +1 @@ -Subproject commit 552cfcd72aec38b23a856924391602acc7689267 +Subproject commit ed55b894b09f38d1e870d373efdbe6cc7f5c40cd diff --git a/slim-containers-files/delegator_template.py b/slim-containers-files/delegator_template.py new file mode 100755 index 00000000..3d389f6a --- /dev/null +++ b/slim-containers-files/delegator_template.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python +import sys +import subprocess +from http.server import BaseHTTPRequestHandler, HTTPServer +from os.path import basename +from os import environ + +processing_server_address = f"http://localhost:{environ['OCRD_PS_PORT']}" +processor_name = basename(sys.argv[0]) + +STOP_WAITING_SERVER = False + + +class CallbackReceiver(BaseHTTPRequestHandler): + """Simple http-server to wait for a processor to finish working. + + The OCR-D processor is started with a callback-url which points to this server. The processor + starts working in the background but the call is returned immediately. When the processor is + finished it sends a request to the callback-url. The purpose of this server is to wait for the + processor to finish and print its result. + """ + def do_POST(self): + self.send_response(200) + self.send_header("Content-Type", "text/plain") + self.end_headers() + self.wfile.write("finished".encode("utf-8")) + len = int(self.headers.get("Content-Length", 0)) + data = self.rfile.read(len).decode("utf-8") + # TODO: how should the callback-content be handled/printed + print(f"Processor finished: {data}") + global STOP_WAITING_SERVER + STOP_WAITING_SERVER = True + + +def call_processor_and_wait(): + server = HTTPServer(("0.0.0.0", 0), CallbackReceiver) + callback_url = f"http://172.17.0.1:{server.server_address[1]}" + cmd = [ + "ocrd", "network", "client", "processing", "processor", + processor_name, "--address", processing_server_address, + "--callback-url", callback_url + ] + args = list(sys.argv) + subprocess.run(cmd + args[1:]) + while not STOP_WAITING_SERVER: + server.handle_request() + + +if __name__ == "__main__": + call_processor_and_wait() diff --git a/slim-containers-files/docker-compose.processor.template.yaml b/slim-containers-files/docker-compose.processor.template.yaml new file mode 100644 index 00000000..01122f2b --- /dev/null +++ b/slim-containers-files/docker-compose.processor.template.yaml @@ -0,0 +1,14 @@ + + {{ processor_name }}: + extends: + file: slim-containers-files/{{ processor_group_name}}/docker-compose.yaml + service: {{ processor_name }} + command: ocrd network processing-worker --database $MONGODB_URL --queue $RABBITMQ_URL --create-queue {{ processor_name }} + depends_on: + - ocrd-processing-server + - ocrd-mongodb + - ocrd-rabbitmq + # restart: The worker creates its queue but rabbitmq needs a few seconds to be available + restart: on-failure:3 + volumes: + - "$PWD/data:/data" diff --git a/slim-containers-files/docker-compose.template.yaml b/slim-containers-files/docker-compose.template.yaml new file mode 100644 index 00000000..cf1a0be7 --- /dev/null +++ b/slim-containers-files/docker-compose.template.yaml @@ -0,0 +1,49 @@ +networks: + default: + driver: bridge + driver_opts: + com.docker.network.driver.mtu: ${OCRD_PS_MTU} + +services: + ocrd-processing-server: + build: + context: core + args: + BASE_IMAGE: ubuntu:20.04 + ports: + - ${OCRD_PS_PORT}:8000 + environment: + MONGODB_USER: ${MONGODB_USER:-admin} + MONGODB_PASS: ${MONGODB_PASS:-admin} + RABBITMQ_USER: ${RABBITMQ_USER:-admin} + RABBITMQ_PASS: ${RABBITMQ_PASS:-admin} + command: | + /bin/bash -c "echo -e \" + process_queue: + address: ocrd-rabbitmq + port: 5672 + skip_deployment: true + credentials: + username: ${RABBITMQ_USER} + password: ${RABBITMQ_PASS} + database: + address: ocrd-mongodb + port: 27017 + skip_deployment: true + credentials: + username: ${MONGODB_USER} + password: ${MONGODB_PASS} + hosts: []\" > /ocrd-processing-server-config.yaml && \ + ocrd network processing-server -a 0.0.0.0:8000 /ocrd-processing-server-config.yaml" + + ocrd-mongodb: + image: mongo:latest + environment: + MONGO_INITDB_ROOT_USERNAME: ${MONGODB_USER:-admin} + MONGO_INITDB_ROOT_PASSWORD: ${MONGODB_PASS:-admin} + + ocrd-rabbitmq: + image: rabbitmq:3-management + environment: + RABBITMQ_DEFAULT_USER: ${RABBITMQ_USER:-admin} + RABBITMQ_DEFAULT_PASS: ${RABBITMQ_PASS:-admin} diff --git a/slim-containers-files/ocrd_cis/Dockerfile b/slim-containers-files/ocrd_cis/Dockerfile new file mode 100644 index 00000000..5839c70a --- /dev/null +++ b/slim-containers-files/ocrd_cis/Dockerfile @@ -0,0 +1,15 @@ +FROM ocrd/core:latest AS base +WORKDIR /build-ocrd +# Remove the next RUN, this is only to checkout my branch while the changes are not in core yet +RUN git clone https://github.com/ocr-d/core.git && \ + cd core && \ + git checkout network-for-slim-prep && \ + make install + +# Not based on ocrd_cis "original" Dockerfile. That seems out of date and in ocrd_all ocrd_cis is +# simply installed with pip so I do the same here +COPY ocrd_cis/ ./ocrd_cis/ +COPY setup.py README.md LICENSE ocrd-tool.json Manifest.in ./ +RUN pip install . && rm -rf /build-ocrd +# TODO: install models for ocrd-cis +WORKDIR /data diff --git a/slim-containers-files/ocrd_cis/docker-compose.yaml b/slim-containers-files/ocrd_cis/docker-compose.yaml new file mode 100644 index 00000000..b61da055 --- /dev/null +++ b/slim-containers-files/ocrd_cis/docker-compose.yaml @@ -0,0 +1,14 @@ +services: + ocrd-cis-ocropy-binarize: + build: + context: ../../ocrd_cis + dockerfile: ../slim-containers-files/ocrd_cis/Dockerfile + command: + ocrd network processing-worker ocrd-cis-ocropy-binarize --database $MONGODB_URL --queue $RABBITMQ_URL --create-queue + + ocrd-cis-ocropy-dewarp: + build: + context: ../../ocrd_cis + dockerfile: ../slim-containers-files/ocrd_cis/Dockerfile + command: + ocrd network processing-worker ocrd-cis-ocropy-dewarp --database $MONGODB_URL --queue $RABBITMQ_URL --create-queue diff --git a/slim-containers-files/ocrd_tesserocr/Dockerfile b/slim-containers-files/ocrd_tesserocr/Dockerfile new file mode 100644 index 00000000..2ffc7544 --- /dev/null +++ b/slim-containers-files/ocrd_tesserocr/Dockerfile @@ -0,0 +1,44 @@ +FROM ocrd/core:latest AS base +WORKDIR /build-ocrd-core +# Remove the next RUN, this is only to checkout my branch while the changes are not in core yet +RUN git clone https://github.com/ocr-d/core.git && \ + cd core && \ + git checkout network-for-slim-prep && \ + make install + +# copied from https://github.com/OCR-D/ocrd_tesserocr/blob/master/Dockerfile and modified +ARG VCS_REF +ARG BUILD_DATE +LABEL \ + maintainer="https://ocr-d.de/kontakt" \ + org.label-schema.vcs-ref=$VCS_REF \ + org.label-schema.vcs-url="https://github.com/OCR-D/ocrd_tesserocr" \ + org.label-schema.build-date=$BUILD_DATE + +ENV DEBIAN_FRONTEND noninteractive +ENV PYTHONIOENCODING utf8 + +# avoid HOME/.local/share (hard to predict USER here) +# so let XDG_DATA_HOME coincide with fixed system location +# (can still be overridden by derived stages) +ENV XDG_DATA_HOME /usr/local/share + +WORKDIR /build-ocrd +COPY setup.py . +COPY ocrd_tesserocr/ocrd-tool.json . +COPY README.md . +COPY requirements.txt . +COPY requirements_test.txt . +COPY ocrd_tesserocr ./ocrd_tesserocr +COPY Makefile . +RUN make deps-ubuntu && \ + apt-get install -y --no-install-recommends \ + g++ \ + && make deps install \ + && rm -rf /build-ocrd \ + && apt-get -y remove --auto-remove g++ libtesseract-dev make +RUN ocrd resmgr download ocrd-tesserocr-recognize Fraktur.traineddata +RUN ocrd resmgr download ocrd-tesserocr-recognize deu.traineddata + +WORKDIR /data +VOLUME /data diff --git a/slim-containers-files/ocrd_tesserocr/docker-compose.yaml b/slim-containers-files/ocrd_tesserocr/docker-compose.yaml new file mode 100644 index 00000000..a29a51cd --- /dev/null +++ b/slim-containers-files/ocrd_tesserocr/docker-compose.yaml @@ -0,0 +1,14 @@ +services: + ocrd-tesserocr-recognize: + build: + context: ../../ocrd_tesserocr + dockerfile: ../slim-containers-files/ocrd_tesserocr/Dockerfile + command: + ocrd network processing-worker ocrd-tesseroc-recognize --database $MONGODB_URL --queue $RABBITMQ_URL --create-queue + + ocrd-tesserocr-segment-region: + build: + context: ../../ocrd_tesserocr + dockerfile: ../slim-containers-files/ocrd_tesserocr/Dockerfile + command: + ocrd network processing-worker ocrd-tesserocr-segment-region --database $MONGODB_URL --queue $RABBITMQ_URL --create-queue