Skip to content

Commit

Permalink
Merge pull request #30 from acdh-oeaw/29-harden-library
Browse files Browse the repository at this point in the history
29 harden library
  • Loading branch information
csae8092 authored Dec 18, 2023
2 parents 06d831e + 78f294e commit 866904b
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 5 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

setup(
name="acdh-transkribus-utils",
version="2.10",
version="2.11",
description="""some utility function to interact with the Transkribus-API""",
long_description=readme,
long_description_content_type="text/markdown",
Expand Down
50 changes: 48 additions & 2 deletions tests/test_package.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os
import shutil
import unittest
from pathlib import Path
import pytest

from acdh_xml_pyutils.xml import XMLReader

Expand Down Expand Up @@ -86,7 +88,51 @@ def test_011_user_id(self):

def test_012_add_user(self):
client = CLIENT
col_id = 190357
status = client.add_user_to_collection(client.user, col_id=col_id, send_mail=False)
status = client.add_user_to_collection(
client.user, col_id=COL_ID, send_mail=False
)
self.assertTrue(f"{190357}" in status)
self.assertTrue(f"{client.user}" in status)

def test_013_wrong_credentials(self):
with pytest.raises(Exception):
ACDHTranskribusUtils(user="[email protected]", password="this-wont work")

def test_014_doc_md(self):
client = CLIENT
doc_id = 1351422
doc_md = client.get_doc_md(doc_id, COL_ID)
self.assertTrue(doc_md["docId"], doc_id)

def test_014_get_fulldoc_md(self):
client = CLIENT
doc_id = 1351422
doc_md = client.get_fulldoc_md(doc_id, COL_ID)
self.assertTrue(doc_md["extra_info"]["nrOfPages"], 10)

def test_015_save_mets_to_file(self):
client = CLIENT
doc_id = 1351422
f_name = f"{doc_id}_mets.xml"
try:
os.remove(f_name)
except OSError:
pass
client.save_mets_to_file(doc_id, COL_ID)
my_file = Path(f_name)
self.assertTrue(my_file.is_file())
os.remove(f_name)

def test_016_list_documents(self):
client = CLIENT
result = client.list_documents(COL_ID)
self.assertTrue(len(result), 5)

def test_017_dl_collection(self):
shutil.rmtree(f"{COL_ID}", ignore_errors=True)
result = CLIENT.collection_to_mets(COL_ID)
doc_id = 1351422
self.assertTrue(doc_id in result)
my_file = Path(os.path.join(f"{COL_ID}", f"{doc_id}_mets.xml"))
self.assertTrue(my_file.is_file())
shutil.rmtree(f"{COL_ID}", ignore_errors=True)
9 changes: 7 additions & 2 deletions transkribus_utils/transkribus_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def get_fulldoc_md(self, doc_id, col_id, page_id="1"):
result["img_url"] = doc_xml.xpath("./url/text()")[0]
result["img_url"] = doc_xml.xpath("./url/text()")[0]
result["extra_info"] = self.get_doc_md(
doc_id, base_url=self.base_url, col_id=col_id
doc_id, col_id=col_id
)
return result
else:
Expand Down Expand Up @@ -282,7 +282,12 @@ def collection_to_mets(self, col_id, file_path=".", filter_by_doc_ids=[]):
print(f"{len(doc_ids)} to download")
counter = 1
for doc_id in doc_ids:
save_mets = self.save_mets_to_file(doc_id, col_id, file_path=col_dir)
try:
save_mets = self.save_mets_to_file(doc_id, col_id, file_path=col_dir)
except Exception as e:
print(f"failed to save mets for DOC-ID: {doc_id} in COLLECTION: {col_id} due to ERROR: {e}")
counter += 1
continue
file_list = self.save_image_names_to_file(doc_id, col_id, file_path=col_dir)
print(f"saving: {save_mets}")
print(f"saving: {file_list}")
Expand Down

0 comments on commit 866904b

Please sign in to comment.