Skip to content

Commit

Permalink
Merge branch 'master' into test-workflow
Browse files Browse the repository at this point in the history
# Conflicts:
#	.scrutinizer.yml
  • Loading branch information
kba committed May 3, 2024
2 parents 6ecbaa8 + e88d646 commit f714742
Show file tree
Hide file tree
Showing 33 changed files with 121 additions and 99 deletions.
13 changes: 0 additions & 13 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,6 @@ jobs:
- run: make install
- run: export PATH="/Users/distiller/Library/Python/3.9/bin:$PATH"; make deps-test test benchmark

test-python37:
docker:
- image: cimg/python:3.7
working_directory: ~/ocrd-core
steps:
- checkout
- run: sudo apt-get -y update
- run: sudo make deps-ubuntu
- run: make install deps-test
- run: make test benchmark
# smoke test to ensure that --editable install works
- run: make install-dev; ocrd --version

test-python38:
docker:
- image: cimg/python:3.8
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/docker-image.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ jobs:
contents: read

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- # Activate cache export feature to reduce build time of images
name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
uses: docker/setup-buildx-action@v3
- name: Build the Docker image
# default tag uses docker.io, so override on command-line
run: make docker DOCKER_TAG=${{ env.GHCRIO_DOCKER_TAG }}
Expand All @@ -34,13 +34,13 @@ jobs:
docker run --rm ${{ env.GHCRIO_DOCKER_TAG }} ocrd --version
docker run --rm ${{ env.GHCRIO_DOCKER_TAG }}-cuda ocrd --version
- name: Login to GitHub Container Registry
uses: docker/login-action@v2
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Log in to Docker Hub
uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERIO_USERNAME }}
password: ${{ secrets.DOCKERIO_PASSWORD }}
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/network-testing.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,22 @@ jobs:
fail-fast: false
matrix:
python-version:
- '3.7'
- '3.8'
- '3.9'
- '3.10'
- '3.11'
- '3.12'
os:
- ubuntu-22.04
# - macos-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Homebrew
id: set-up-homebrew
uses: Homebrew/actions/setup-homebrew@master
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/unit-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,23 @@ jobs:
fail-fast: false
matrix:
python-version:
- '3.7'
- '3.8'
- '3.9'
- '3.10'
- '3.11'
- '3.12'
os:
- ubuntu-22.04
- ubuntu-20.04
# - macos-latest
- macos-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Homebrew
id: set-up-homebrew
uses: Homebrew/actions/setup-homebrew@master
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down
46 changes: 27 additions & 19 deletions .scrutinizer.yml
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
checks:
python: true
python: true

build:
nodes:
analysis:
tests:
override:
- py-scrutinizer-run
-
command: pylint-run
use_website_config: true

tests:
tests:
override:
- true
image: default-bionic
environment:
python:
version: 3.8.2
virtualenv: true
nodes:
analysis:
dependencies:
override:
- sudo make deps-ubuntu
- make install
tests:
override:
- py-scrutinizer-run
-
command: pylint-run
use_website_config: false
tests:
tests:
override:
- true
filter:
excluded_paths:
- '*/test/*'
- 'core-models/ocrd_models/model/ocrd_page_generateds.py'
dependency_paths:
- 'lib/*'
excluded_paths:
- 'tests/*'
- 'ocrd_models/ocrd_models/ocrd_page_generateds.py'
dependency_paths:
- 'lib/*'
47 changes: 36 additions & 11 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,33 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

Fixed:

- bashlib processors will download on-demand, like pythonic processors do, #1216, #1217

Changed:

- Replace `distutils` which equivalents from `shutil` for compatibility with python 3.12+, #1219
- CI: Updated GitHub actions, #1206
- CI: Fixed scrutinizer, #1217

## [2.64.1] - 2024-04-22

Fixed:

* Broken PyPI release

## [2.64.0] - 2024-04-22

Removed:

* Support for Python `<=` 3.7, #1207

Fixed:

* remove duplicate description of `OCRD_DOWNLOAD_TIMEOUT` in `--help`, #1204
* Use `importlib_metadata` shim for 3.9+, #1210, OCR-D/ocrd_froc#10

## [2.63.3] - 2024-03-07

Added:
Expand Down Expand Up @@ -96,7 +123,7 @@ Fixed:

Fixed:

* Log level downgraded from DEBUG to INFO in loggin.conf, #1161
* Log level downgraded from DEBUG to INFO in logging.conf, #1161
* log OAI check as `DEBUG` not `INFO`, #1160

## [2.60.1] - 2023-12-15
Expand All @@ -121,7 +148,7 @@ Changed:
Fixed:

* Chunking algorithm for `ocrd workspace list-page` now handles edge cases properly, #1145
* Avoid deadlocks in `ocrd_network` if procesing workers not deployed, #1125, #1142
* Avoid deadlocks in `ocrd_network` if processing workers not deployed, #1125, #1142

## [2.59.0] - 2023-11-27

Expand Down Expand Up @@ -262,7 +289,7 @@ Changed:

Added

* Environement variables to control optional retries and timeouts for downloading files:
* Environment variables to control optional retries and timeouts for downloading files:
* `OCRD_DOWNLOAD_RETRIES`: Number of times to retry failed attempts for downloads of workspace files. #1073
* `OCRD_DOWNLOAD_TIMEOUT`: Timeout in seconds for connecting or reading (comma-separated) when downloading. #1073
* Environment variables used throughout core are now documented in README and `ocrd --help`, #1073
Expand Down Expand Up @@ -548,7 +575,7 @@ Fixed:

Changed:

* Consistenly use snake_case but continue to support CamelCase for kwargs and CLI options, #874, #862
* Consistently use snake_case but continue to support CamelCase for kwargs and CLI options, #874, #862
* Update to spec to 3.19.0, introducing greater flexibility in describing parameters, #872, #848, OCR-D/spec#206
* `ocrd workspace merge`: support mapping `file_id` and `page_id` in addition to `file_grp`, #886, #888
* `ocrd workspace merge`: rebase `OcrdFile.url` to target workspace, #887, #888
Expand Down Expand Up @@ -958,7 +985,7 @@ Changed:

Added:

* processors can `self.add_metada(pcgts)` to add a self-describing `pg:MetadataItem`, #574
* processors can `self.add_metadata(pcgts)` to add a self-describing `pg:MetadataItem`, #574


## [2.13.2] - 2020-08-13
Expand Down Expand Up @@ -1005,7 +1032,7 @@ Fixed:

Fixed:

* logging no longer intereferes with `--dump-json`/`--help`/`--version`, #540, #546
* logging no longer interferes with `--dump-json`/`--help`/`--version`, #540, #546

## [2.12.3] - 2020-07-23

Expand Down Expand Up @@ -1122,7 +1149,7 @@ Changed:
Added:

* Workspace: Optional `overwrite_mode` that sets `force` for all operations
* `OcrdPage`: `get_AllAlternaiveImagePaths` to list all `pc:AlternativeImage/@filename` referenced in a PcGts, #434, #471
* `OcrdPage`: `get_AllAlternativeImagePaths` to list all `pc:AlternativeImage/@filename` referenced in a PcGts, #434, #471
* `ocrd workspace bulk-add` to add many files at once to a workspace, #428
* `OcrdMets.add_file`: `ignore` parameter to optionally disable looking for existing files, #428

Expand Down Expand Up @@ -1832,7 +1859,7 @@ Changed:
a temporary directory but reuse the existing directory
* When not providing `mets_basename`, assume the last URL path segment to be
the METS basename instead of the fixed string `mets.xml`
* incoroporate changes to ocrd_tool schema from spec/v2.2.1
* incorporate changes to ocrd_tool schema from spec/v2.2.1

## [0.6.0] - 2018-07-23

Expand Down Expand Up @@ -2038,10 +2065,8 @@ Fixed
Initial Release

<!-- link-labels -->
<<<<<<< HEAD
=======
[2.64.0]: ../../compare/v2.63.0..v2.63.3
[2.63.3]: ../../compare/v2.63.3..v2.63.1
>>>>>>> master
[2.63.2]: ../../compare/v2.63.2..v2.63.1
[2.63.1]: ../../compare/v2.63.1..v2.63.0
[2.63.0]: ../../compare/v2.63.0..v2.62.0
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ pip install ocrd
pip install ocrd_modelfactory
```

All python software released by [OCR-D](https://github.com/OCR-D) requires Python 3.7 or higher.
All Python software released by [OCR-D](https://github.com/OCR-D) requires Python 3.8 or higher.

**NOTE** Some OCR-D-Tools (or even test cases) _might_ reveal an unintended behavior if you have specific environment modifications, like:
* using a custom build of [ImageMagick](https://github.com/ImageMagick/ImageMagick), whose format delegates are different from what OCR-D supposes
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.63.3
2.64.1
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ name = "ocrd"
authors = [{name = "Konstantin Baierer", email = "[email protected]"}]
license = {text = "Apache License 2.0"}
description = "OCR-D framework"
requires-python = ">=3.7"
requires-python = ">=3.8"
dynamic = ['version', 'dependencies']

[project.readme]
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ frozendict>=2.3.4
gdown
httpx>=0.22.0
importlib_metadata ; python_version < '3.8'
importlib_resources ; python_version < '3.9'
importlib_resources ; python_version < '3.10'
jsonschema
lxml
memory-profiler >= 0.58.0
Expand Down
3 changes: 1 addition & 2 deletions requirements_test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,4 @@ click
twine
wheel
# For typing-compatible lxml interface definitions
types-lxml ; python_version > '3.7'
lxml-stubs ; python_version <= '3.7'
types-lxml
2 changes: 0 additions & 2 deletions src/ocrd/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@
\b
{config.describe('OCRD_METS_CACHING')}
\b
{config.describe('OCRD_DOWNLOAD_TIMEOUT')}
\b
{config.describe('OCRD_MAX_PROCESSOR_CACHE')}
\b
{config.describe('OCRD_NETWORK_SERVER_ADDR_PROCESSING')}
Expand Down
4 changes: 4 additions & 0 deletions src/ocrd/cli/bashlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def bashlib_input_files(**kwargs):
input_file_grp=kwargs['input_file_grp'],
output_file_grp=kwargs['output_file_grp'])
for input_files in processor.zip_input_files(mimetype=None, on_error='abort'):
# ensure all input files exist locally (without persisting them in the METS)
# - this mimics the default behaviour of all Pythonic processors
input_files = [workspace.download_file(input_file) if input_file else None
for input_file in input_files]
for field in ['url', 'local_filename', 'ID', 'mimetype', 'pageId']:
# make this bash-friendly (show initialization for associative array)
if len(input_files) > 1:
Expand Down
2 changes: 1 addition & 1 deletion src/ocrd/cli/resmgr.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"""
import sys
from pathlib import Path
from distutils.spawn import find_executable as which
from shutil import which
from yaml import safe_load, safe_dump

import requests
Expand Down
11 changes: 7 additions & 4 deletions src/ocrd/mets_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import socket
import atexit

from fastapi import FastAPI, Request, Form, Response
from fastapi import FastAPI, Request, Form, Response, requests
from fastapi.responses import JSONResponse
from requests import Session as requests_session
from requests.exceptions import ConnectionError
Expand Down Expand Up @@ -99,10 +99,13 @@ class ClientSideOcrdMets():
"""

def __init__(self, url):
protocol = 'tcp' if url.startswith('http://') else 'uds'
self.protocol = 'tcp' if url.startswith('http://') else 'uds'
self.log = getLogger(f'ocrd.mets_client[{url}]')
self.url = url if protocol == 'tcp' else f'http+unix://{url.replace("/", "%2F")}'
self.session = requests_session() if protocol == 'tcp' else requests_unixsocket_session()
self.url = url if self.protocol == 'tcp' else f'http+unix://{url.replace("/", "%2F")}'

@property
def session(self) -> Union[requests_session, requests_unixsocket_session]:
return requests_session() if self.protocol == 'tcp' else requests_unixsocket_session()

def __getattr__(self, name):
raise NotImplementedError(f"ClientSideOcrdMets has no access to '{name}' - try without METS server")
Expand Down
2 changes: 1 addition & 1 deletion src/ocrd/processor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from ocrd_validators import ParameterValidator
from ocrd_models.ocrd_page import MetadataItemType, LabelType, LabelsType

# XXX imports must remain for backwards-compatibilty
# XXX imports must remain for backwards-compatibility
from .helpers import run_cli, run_processor, generate_processor_help # pylint: disable=unused-import

class Processor():
Expand Down
3 changes: 1 addition & 2 deletions src/ocrd/task_sequence.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import json
from shlex import split as shlex_split
from distutils.spawn import find_executable as which # pylint: disable=import-error,no-name-in-module
from subprocess import run, PIPE
from shutil import which

from ocrd_utils import getLogger, parse_json_string_or_file, set_json_key_value_overrides, get_ocrd_tool_json
# from collections import Counter
Expand Down
Loading

0 comments on commit f714742

Please sign in to comment.