diff --git a/README.md b/README.md index c04d3f3..185fd39 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ The Loghi framework is designed to streamline the process of Handwritten Text Re ### Laypa: Layout Analysis and Segmentation -[Laypa][https://github.com/knaw-huc/laypa/] specializes in the segmentation of documents, identifying different regions like paragraphs, page numbers, and most importantly, baselines within the text. Utilizing a sophisticated architecture based on a ResNet backbone and a feature pyramid network, Laypa performs pixel-wise classifications to detect these elements. Built on the [detectron2](https://github.com/facebookresearch/detectron2) framework, its output facilitates further processing by converting the classifications into instances—either as masks or directly into PageXML format. This segmentation is crucial for preparing documents for OCR/HTR processing, ensuring that text regions are accurately recognized and extracted. +[Laypa](https://github.com/knaw-huc/laypa/) specializes in the segmentation of documents, identifying different regions like paragraphs, page numbers, and most importantly, baselines within the text. Utilizing a sophisticated architecture based on a ResNet backbone and a feature pyramid network, Laypa performs pixel-wise classifications to detect these elements. Built on the [detectron2](https://github.com/facebookresearch/detectron2) framework, its output facilitates further processing by converting the classifications into instances—either as masks or directly into PageXML format. This segmentation is crucial for preparing documents for OCR/HTR processing, ensuring that text regions are accurately recognized and extracted. ### Loghi Tooling: Pre and Post-Processing Toolkit diff --git a/docker/buildAll.sh b/docker/buildAll.sh index 0935f9c..b5489e7 100755 --- a/docker/buildAll.sh +++ b/docker/buildAll.sh @@ -1,6 +1,7 @@ #!/bin/bash -VERSION=2.0.7 +VERSION=2.1.1 set -e +set -o pipefail CURRENT=$(pwd) @@ -42,7 +43,7 @@ cd docker.base cd .. echo "building docker.loghi-tooling" cd docker.loghi-tooling/ -./buildImage.sh $BASE/prima-core-libs/ $BASE/loghi-tooling/ +./buildImage.sh $BASE/prima-core-libs/ $BASE/loghi-tooling $VERSION cd .. echo "building docker.htr" cd docker.htr diff --git a/docker/docker.base/buildAndInstallOpencv.sh b/docker/docker.base/buildAndInstallOpencv.sh index 2c2ddd1..5e2a3d4 100755 --- a/docker/docker.base/buildAndInstallOpencv.sh +++ b/docker/docker.base/buildAndInstallOpencv.sh @@ -8,6 +8,8 @@ rm -rf $CURRENT/opencv_contrib set -e +numcores=`nproc` + git clone https://github.com/opencv/opencv_contrib.git git clone https://github.com/opencv/opencv.git cd $CURRENT/opencv_contrib @@ -19,7 +21,7 @@ cd $CURRENT/opencv/build #cmake -D OPENCV_ENABLE_MEMALIGN=OFF -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D INSTALL_C_EXAMPLES=ON -D OPENCV_IO_ENABLE_JASPER=ON -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules -D WITH_TBB=ON .. #cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D INSTALL_C_EXAMPLES=ON -D OPENCV_IO_ENABLE_JASPER=ON -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules .. cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D INSTALL_C_EXAMPLES=ON -D OPENCV_IO_ENABLE_JASPER=ON -D OPENCV_EXTRA_MODULES_PATH=../../opencv_contrib/modules -D WITH_TBB=ON .. -make -j 24 +make -j $numcores sudo make install sudo sh -c 'echo "/usr/local/lib" > /etc/ld.so.conf.d/opencv.conf' sudo ldconfig diff --git a/docker/docker.loghi-tooling/Dockerfile b/docker/docker.loghi-tooling/Dockerfile index 6ad4280..4476685 100644 --- a/docker/docker.loghi-tooling/Dockerfile +++ b/docker/docker.loghi-tooling/Dockerfile @@ -3,22 +3,25 @@ FROM docker.base EXPOSE 9006 RUN useradd -u 1000 rutger -#RUN useradd -m builder + RUN apt-get update && \ apt-get install -y locales && rm -rf /var/lib/apt # apt-get install -y openjdk-11-jre maven postgresql-client locales RUN locale-gen en_US.UTF-8 +ARG LOGHI_VERSION=1.0-SNAPSHOT + ENV LANG en_US.UTF-8 ENV LANGUAGE en_US:en ENV LC_ALL en_US.UTF-8 +ENV LOGHI_VERSION=${LOGHI_VERSION} COPY loghi-tooling /src/loghi-tooling COPY prima-core-libs /src/prima-core-libs COPY dependency-check-data /root/.m2/repository/org/owasp/dependency-check-data WORKDIR /src/prima-core-libs/java -#USER builder + RUN apt-get update && apt-get --no-install-recommends install -y openjdk-11-jre maven postgresql-client libdc1394-22 libavcodec58 libavformat58 libswscale5 libtbb2 git \ && mvn deploy:deploy-file -Durl=file:///$HOME/repo -Dfile=/usr/local/share/java/opencv4/opencv-490.jar -DgroupId=org.opencv -DartifactId=opencv -Dpackaging=jar -Dversion=4.9.0 \ && mvn clean package @@ -29,8 +32,9 @@ RUN cd /src \ && mvn deploy:deploy-file -Durl=file:///$HOME/repo -Dfile=target/langident-1.0.5-SNAPSHOT.jar -DgroupId=nl.knaw.huygens.pergamon.nlp -DartifactId=langident -Dpackaging=jar -Dversion=1.0.5 # && mvn clean package \ +# && mvn org.owasp:dependency-check-maven:check versions:set -DnewVersion=$LOGHI_VERSION clean package \ RUN cd /src/loghi-tooling \ - && mvn clean org.owasp:dependency-check-maven:check package \ + && mvn versions:set -DnewVersion=$LOGHI_VERSION clean package \ && find . -name src | xargs rm -rf \ && find . -name test-classes | xargs rm -rf \ && rm -rf /src/loghi-tooling/layoutanalyzer/target \ @@ -38,29 +42,3 @@ RUN cd /src/loghi-tooling \ && rm -rf /var/lib/apt \ && rm -rf /home/root \ && rm -rf /root - - -#WORKDIR /src -#RUN wget https://dl.min.io/client/mc/release/linux-amd64/mcli_20221029100923.0.0_amd64.deb && dpkg -i mcli_20221029100923.0.0_amd64.deb && rm mcli_20221029100923.0.0_amd64.deb -#RUN mcli alias set myminio/ http://MINIO-SERVER MYUSER MYPASSWORD - - -#FROM docker.base -#RUN useradd -u 1000 rutger -#RUN apt-get update \ -# && apt-get install -y --no-install-recommends openjdk-11-jre libtbb2 \ -# && apt autoremove -y -# -#COPY --from=0 /src/loghi-tooling /src/loghi-tooling -# -#USER root - -#USER rutger -# && apt remove -y git autotools-dev dpkg-dev icu-devtools libaec-dev libavutil-dev libblkid-dev libc-dev-bin libc6-dev libcrypt-dev libdatrie-dev \ -# libegl-dev libexif-dev libexpat1-dev libffi-dev libfreetype6-dev libfribidi-dev libgcc-9-dev libgl-dev libglib2.0-dev-bin libglu1-mesa-dev libglx-dev \ -# libgraphite2-dev libice-dev libicu-dev libjbig-dev libjpeg-turbo8-dev liblzma-dev libmount-dev libogg-dev libopenblas-pthread-dev libpcre2-dev libpcre3-dev \ -# libpixman-1-dev libpthread-stubs0-dev libqt5opengl5-dev libraw1394-dev libselinux1-dev libsepol1-dev libsm-dev libstdc++-9-dev libswresample-dev libthai-dev \ -# libvulkan-dev libx11-dev libxau-dev libxcb-render0-dev libxcb-shm0-dev libxcb1-dev libxcomposite-dev libxcursor-dev libxdamage-dev libxdmcp-dev libxext-dev \ -# libxfixes-dev libxi-dev libxinerama-dev libxrandr-dev libxrender-dev libxt-dev linux-libc-dev manpages-dev qtbase5-dev qtbase5-dev-tools uuid-dev \ -# x11proto-core-dev x11proto-dev x11proto-input-dev x11proto-randr-dev x11proto-xext-dev x11proto-xinerama-dev xtrans-dev zlib1g-dev libllvm10 libllvm12 \ -# libx265-179 perl-modules-5.30 libperl5.30 humanity-icon-theme \ diff --git a/docker/docker.loghi-tooling/buildImage.sh b/docker/docker.loghi-tooling/buildImage.sh index 8697be4..f5e4898 100755 --- a/docker/docker.loghi-tooling/buildImage.sh +++ b/docker/docker.loghi-tooling/buildImage.sh @@ -6,9 +6,11 @@ set -e if [ -z $1 ]; then echo "first parameter should be the path of prima-core-libs" && exit 1; fi; if [ -z $2 ]; then echo "second parameter should be the path of loghi-tooling" && exit 1; fi; +if [ -z $3 ]; then echo "third parameter should be version which loghi-tooling will get" && exit 1; fi; PRIMACORELIBS="$(realpath $1)" LOGHITOOLING="$(realpath $2)" +LOGHI_VERSION=$3 echo "Change to directory of script..." DIR_OF_SCRIPT="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" @@ -30,8 +32,7 @@ rm -rf ./loghi-tooling/layoutanalyzer/src/test/resources/in/*.png echo "Building docker image..." -docker build --no-cache . -t loghi/docker.loghi-tooling - +docker build --no-cache -t loghi/docker.loghi-tooling --build-arg LOGHI_VERSION=$LOGHI_VERSION . echo "cleaning up!" rm -rf prima-core-libs rm -rf loghi-tooling diff --git a/laypa b/laypa index d53e064..e3e4816 160000 --- a/laypa +++ b/laypa @@ -1 +1 @@ -Subproject commit d53e064f4af6c2492c0b3c8166f2976f8e67d8d8 +Subproject commit e3e4816c7a435b9c966ec92191ce1935673aa16d diff --git a/loghi-htr b/loghi-htr index 33ab059..67750f3 160000 --- a/loghi-htr +++ b/loghi-htr @@ -1 +1 @@ -Subproject commit 33ab05932ee4d9018914621725cbeffed438a54b +Subproject commit 67750f3d2647068670963322d950b0c3fa12927a diff --git a/loghi-tooling b/loghi-tooling index 25b1819..d1fb829 160000 --- a/loghi-tooling +++ b/loghi-tooling @@ -1 +1 @@ -Subproject commit 25b181902a08dbfbca48e8c97a6d5a9e6d8a399b +Subproject commit d1fb829872d95a558b7da1320ca2c6ca207d687c diff --git a/scripts/create-train-data.sh b/scripts/create-train-data.sh index 8157e96..2d5a3b5 100755 --- a/scripts/create-train-data.sh +++ b/scripts/create-train-data.sh @@ -1,5 +1,5 @@ #!/bin/bash -VERSION=2.0.7 +VERSION=2.1.1 # User-configurable parameters # Percentage split for training and validation sets diff --git a/scripts/htr-train-pipeline.sh b/scripts/htr-train-pipeline.sh index 38071db..e2313fc 100755 --- a/scripts/htr-train-pipeline.sh +++ b/scripts/htr-train-pipeline.sh @@ -1,5 +1,5 @@ #!/bin/bash -VERSION=2.0.7 +VERSION=2.1.1 set -e # User-configurable parameters diff --git a/scripts/inference-pipeline.sh b/scripts/inference-pipeline.sh index 5d9c558..14e463f 100755 --- a/scripts/inference-pipeline.sh +++ b/scripts/inference-pipeline.sh @@ -1,5 +1,5 @@ #!/bin/bash -VERSION=2.0.7 +VERSION=2.1.1 set -e # User-configurable parameters