diff --git a/setup.py b/setup.py index e604392..4cf5a25 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name="acdh-transkribus-utils", - version="2.10", + version="2.11", description="""some utility function to interact with the Transkribus-API""", long_description=readme, long_description_content_type="text/markdown", diff --git a/tests/test_package.py b/tests/test_package.py index ad7e771..0b59007 100644 --- a/tests/test_package.py +++ b/tests/test_package.py @@ -1,6 +1,8 @@ import os +import shutil import unittest from pathlib import Path +import pytest from acdh_xml_pyutils.xml import XMLReader @@ -86,7 +88,51 @@ def test_011_user_id(self): def test_012_add_user(self): client = CLIENT - col_id = 190357 - status = client.add_user_to_collection(client.user, col_id=col_id, send_mail=False) + status = client.add_user_to_collection( + client.user, col_id=COL_ID, send_mail=False + ) self.assertTrue(f"{190357}" in status) self.assertTrue(f"{client.user}" in status) + + def test_013_wrong_credentials(self): + with pytest.raises(Exception): + ACDHTranskribusUtils(user="whatever@gmail.com", password="this-wont work") + + def test_014_doc_md(self): + client = CLIENT + doc_id = 1351422 + doc_md = client.get_doc_md(doc_id, COL_ID) + self.assertTrue(doc_md["docId"], doc_id) + + def test_014_get_fulldoc_md(self): + client = CLIENT + doc_id = 1351422 + doc_md = client.get_fulldoc_md(doc_id, COL_ID) + self.assertTrue(doc_md["extra_info"]["nrOfPages"], 10) + + def test_015_save_mets_to_file(self): + client = CLIENT + doc_id = 1351422 + f_name = f"{doc_id}_mets.xml" + try: + os.remove(f_name) + except OSError: + pass + client.save_mets_to_file(doc_id, COL_ID) + my_file = Path(f_name) + self.assertTrue(my_file.is_file()) + os.remove(f_name) + + def test_016_list_documents(self): + client = CLIENT + result = client.list_documents(COL_ID) + self.assertTrue(len(result), 5) + + def test_017_dl_collection(self): + shutil.rmtree(f"{COL_ID}", ignore_errors=True) + result = CLIENT.collection_to_mets(COL_ID) + doc_id = 1351422 + self.assertTrue(doc_id in result) + my_file = Path(os.path.join(f"{COL_ID}", f"{doc_id}_mets.xml")) + self.assertTrue(my_file.is_file()) + shutil.rmtree(f"{COL_ID}", ignore_errors=True) diff --git a/transkribus_utils/transkribus_utils.py b/transkribus_utils/transkribus_utils.py index 5d61e2d..68c7247 100644 --- a/transkribus_utils/transkribus_utils.py +++ b/transkribus_utils/transkribus_utils.py @@ -152,7 +152,7 @@ def get_fulldoc_md(self, doc_id, col_id, page_id="1"): result["img_url"] = doc_xml.xpath("./url/text()")[0] result["img_url"] = doc_xml.xpath("./url/text()")[0] result["extra_info"] = self.get_doc_md( - doc_id, base_url=self.base_url, col_id=col_id + doc_id, col_id=col_id ) return result else: @@ -282,7 +282,12 @@ def collection_to_mets(self, col_id, file_path=".", filter_by_doc_ids=[]): print(f"{len(doc_ids)} to download") counter = 1 for doc_id in doc_ids: - save_mets = self.save_mets_to_file(doc_id, col_id, file_path=col_dir) + try: + save_mets = self.save_mets_to_file(doc_id, col_id, file_path=col_dir) + except Exception as e: + print(f"failed to save mets for DOC-ID: {doc_id} in COLLECTION: {col_id} due to ERROR: {e}") + counter += 1 + continue file_list = self.save_image_names_to_file(doc_id, col_id, file_path=col_dir) print(f"saving: {save_mets}") print(f"saving: {file_list}")