Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: acdh-oeaw/acdh-transkribus-utils
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v2.9
Choose a base ref
...
head repository: acdh-oeaw/acdh-transkribus-utils
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: main
Choose a head ref
  • 9 commits
  • 4 files changed
  • 1 contributor

Commits on Aug 20, 2023

  1. Copy the full SHA
    ebb5788 View commit details
  2. Merge pull request #28 from acdh-oeaw/27-add-method-to-run-htr

    added method to start HTR on document
    csae8092 authored Aug 20, 2023
    Copy the full SHA
    5afba1e View commit details
  3. version bump [skip ci]

    csae8092 committed Aug 20, 2023
    Copy the full SHA
    013f07a View commit details

Commits on Nov 14, 2023

  1. Copy the full SHA
    06d831e View commit details

Commits on Dec 18, 2023

  1. more tests

    csae8092 committed Dec 18, 2023
    Copy the full SHA
    384f0e3 View commit details
  2. Copy the full SHA
    776adaf View commit details
  3. version bump

    csae8092 committed Dec 18, 2023
    Copy the full SHA
    78f294e View commit details
  4. Merge pull request #30 from acdh-oeaw/29-harden-library

    29 harden library
    csae8092 authored Dec 18, 2023
    Copy the full SHA
    866904b View commit details
  5. Copy the full SHA
    90090de View commit details
Showing with 91 additions and 19 deletions.
  1. +1 −1 README.md
  2. +1 −12 setup.py
  3. +48 −2 tests/test_package.py
  4. +41 −4 transkribus_utils/transkribus_utils.py
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@
[![PyPI version](https://badge.fury.io/py/acdh-transkribus-utils.svg)](https://badge.fury.io/py/acdh-transkribus-utils)
[![flake8 Lint](https://github.com/acdh-oeaw/acdh-transkribus-utils/actions/workflows/lint.yml/badge.svg)](https://github.com/acdh-oeaw/acdh-transkribus-utils/actions/workflows/lint.yml)
[![Test](https://github.com/acdh-oeaw/acdh-transkribus-utils/actions/workflows/test.yml/badge.svg)](https://github.com/acdh-oeaw/acdh-transkribus-utils/actions/workflows/test.yml)
[![codecov](https://codecov.io/gh/acdh-oeaw/acdh-transkribus-utils/branch/master/graph/badge.svg?token=QOY62C0X5Y)](https://codecov.io/gh/acdh-oeaw/acdh-transkribus-utils)
[![codecov](https://codecov.io/gh/acdh-oeaw/acdh-transkribus-utils/graph/badge.svg?token=QOY62C0X5Y)](https://codecov.io/gh/acdh-oeaw/acdh-transkribus-utils)

A python package providing some utility functions for interacting with the [Transkribus-API](https://transkribus.eu/wiki/index.php/REST_Interface)

13 changes: 1 addition & 12 deletions setup.py
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@

setup(
name="acdh-transkribus-utils",
version="2.9",
version="2.11",
description="""some utility function to interact with the Transkribus-API""",
long_description=readme,
long_description_content_type="text/markdown",
@@ -26,15 +26,4 @@
license="MIT",
zip_safe=False,
keywords="acdh-transkribus-utils",
classifiers=[
"Development Status :: 3 - Alpha",
"Framework :: Django :: 2.0",
"Intended Audience :: Developers",
"License :: OSI Approved :: BSD License",
"Natural Language :: English",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.4",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
],
)
50 changes: 48 additions & 2 deletions tests/test_package.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import shutil
import unittest
from pathlib import Path
import pytest

from acdh_xml_pyutils.xml import XMLReader

@@ -86,7 +88,51 @@ def test_011_user_id(self):

def test_012_add_user(self):
client = CLIENT
col_id = 190357
status = client.add_user_to_collection(client.user, col_id=col_id, send_mail=False)
status = client.add_user_to_collection(
client.user, col_id=COL_ID, send_mail=False
)
self.assertTrue(f"{190357}" in status)
self.assertTrue(f"{client.user}" in status)

def test_013_wrong_credentials(self):
with pytest.raises(Exception):
ACDHTranskribusUtils(user="whatever@gmail.com", password="this-wont work")

def test_014_doc_md(self):
client = CLIENT
doc_id = 1351422
doc_md = client.get_doc_md(doc_id, COL_ID)
self.assertTrue(doc_md["docId"], doc_id)

def test_014_get_fulldoc_md(self):
client = CLIENT
doc_id = 1351422
doc_md = client.get_fulldoc_md(doc_id, COL_ID)
self.assertTrue(doc_md["extra_info"]["nrOfPages"], 10)

def test_015_save_mets_to_file(self):
client = CLIENT
doc_id = 1351422
f_name = f"{doc_id}_mets.xml"
try:
os.remove(f_name)
except OSError:
pass
client.save_mets_to_file(doc_id, COL_ID)
my_file = Path(f_name)
self.assertTrue(my_file.is_file())
os.remove(f_name)

def test_016_list_documents(self):
client = CLIENT
result = client.list_documents(COL_ID)
self.assertTrue(len(result), 5)

def test_017_dl_collection(self):
shutil.rmtree(f"{COL_ID}", ignore_errors=True)
result = CLIENT.collection_to_mets(COL_ID)
doc_id = 1351422
self.assertTrue(doc_id in result)
my_file = Path(os.path.join(f"{COL_ID}", f"{doc_id}_mets.xml"))
self.assertTrue(my_file.is_file())
shutil.rmtree(f"{COL_ID}", ignore_errors=True)
45 changes: 41 additions & 4 deletions transkribus_utils/transkribus_utils.py
Original file line number Diff line number Diff line change
@@ -152,7 +152,7 @@ def get_fulldoc_md(self, doc_id, col_id, page_id="1"):
result["img_url"] = doc_xml.xpath("./url/text()")[0]
result["img_url"] = doc_xml.xpath("./url/text()")[0]
result["extra_info"] = self.get_doc_md(
doc_id, base_url=self.base_url, col_id=col_id
doc_id, col_id=col_id
)
return result
else:
@@ -282,7 +282,12 @@ def collection_to_mets(self, col_id, file_path=".", filter_by_doc_ids=[]):
print(f"{len(doc_ids)} to download")
counter = 1
for doc_id in doc_ids:
save_mets = self.save_mets_to_file(doc_id, col_id, file_path=col_dir)
try:
save_mets = self.save_mets_to_file(doc_id, col_id, file_path=col_dir)
except Exception as e:
print(f"failed to save mets for DOC-ID: {doc_id} in COLLECTION: {col_id} due to ERROR: {e}")
counter += 1
continue
file_list = self.save_image_names_to_file(doc_id, col_id, file_path=col_dir)
print(f"saving: {save_mets}")
print(f"saving: {file_list}")
@@ -443,10 +448,14 @@ def get_user_id(self, user_name: str) -> int:
user_id = response["trpUser"][0]["userId"]
return user_id

def add_user_to_collection(self, user_name: str, col_id: int, role: str = "Owner", send_mail: bool = True) -> str:
def add_user_to_collection(
self, user_name: str, col_id: int, role: str = "Owner", send_mail: bool = True
) -> str:
"""adds user to given collection"""
user_id = self.get_user_id(user_name)
result_msg = f"looks like something went wront adding {user_name} to collection {col_id}"
result_msg = (
f"looks like something went wront adding {user_name} to collection {col_id}"
)
params = {"userid": user_id, "role": role}
if not send_mail:
params = {"userid": user_id, "role": role, "sendMail": False}
@@ -496,6 +505,34 @@ def create_status_report(
docs.append(doc_stats)
return docs

def run_htr(
self,
col_id: int | str,
doc_id: int | str,
start_page: int = 1,
end_page: None | int = None,
model_id: int = 51170,
):
"""starts htr with the given params and returns the job ID"""
job_id = None

if end_page:
pages = f"{start_page}-{end_page}"
else:
pages = f"{start_page}"
params = {"id": doc_id, "pages": pages}
res = requests.post(
f"{self.base_url}/recognition/{col_id}/{model_id}/trhtr",
cookies=self.login_cookie,
params=params,
)
if res.status_code == 200:
job_id = res.text
print(f"started HTR for DOC-ID: {doc_id} with JOB-ID {job_id}")
return job_id
else:
print("something went wrong")

def __init__(
self,
user=None,