From fcfafb7301c6499c3c34793b846ecca3affb1510 Mon Sep 17 00:00:00 2001 From: Jay Varner Date: Mon, 12 Aug 2024 15:42:55 -0400 Subject: [PATCH] Add extra metadata and call task to add OCR --- readux_ingest_ecds/models.py | 16 ++++++++++++++-- test_app/tests/test_s3.py | 5 +++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/readux_ingest_ecds/models.py b/readux_ingest_ecds/models.py index 0cf8298..9367ec1 100644 --- a/readux_ingest_ecds/models.py +++ b/readux_ingest_ecds/models.py @@ -419,10 +419,15 @@ class Meta: verbose_name_plural = "Amazon S3 Ingests" def ingest(self): - metadata = metadata_from_file(self.metadata_spreadsheet.path) + rows = metadata_from_file(self.metadata_spreadsheet.path) - for pid in [row["pid"] for row in metadata]: + for row in rows: + pid = row["pid"] manifest = create_manifest_from_pid(pid, self.image_server) + metadata = dict(row) + for key, value in metadata.items(): + setattr(manifest, key, value) + manifest.collections.set(self.collections.all()) manifest.save() local_ingest = Local.objects.create( @@ -452,5 +457,12 @@ def ingest(self): t_file.write(f"{image_file}\n") local_ingest.create_canvases() + manifest.save() + from .tasks import add_ocr_task_local + + if os.environ["DJANGO_ENV"] == "test": + add_ocr_task_local(str(local_ingest.id)) + else: + add_ocr_task_local.delay(str(local_ingest.id)) self.delete() diff --git a/test_app/tests/test_s3.py b/test_app/tests/test_s3.py index cee0b36..f0f5932 100644 --- a/test_app/tests/test_s3.py +++ b/test_app/tests/test_s3.py @@ -82,12 +82,12 @@ def create_pids(self, pid_count=1, image_count=1, include_pid_in_file=True): pids = [] pid_file = os.path.join(self.fixture_path, self.fake.file_name(extension="csv")) with open(pid_file, "w", encoding="utf-8") as t_file: - t_file.write("PID\n") + t_file.write("PID,Label\n") for _ in range(pid_count): pid = self.fake.isbn10() with open(pid_file, "a", encoding="utf-8") as t_file: - t_file.write(f"{pid}\n") + t_file.write(f"{pid},{self.fake.name()}\n") pids.append(pid) self.create_source_images( pid=pid, count=image_count, include_pid_in_file=include_pid_in_file @@ -123,6 +123,7 @@ def test_s3_ingest_pid_not_in_filename(self): ] assert Manifest.objects.filter(pid=pid).exists() assert Manifest.objects.get(pid=pid).canvas_set.count() == 4 + assert Manifest.objects.get(pid=pid).label is not None assert len(ingested_images) == 4 assert len(ingested_ocr) == 4