diff --git a/readux_ingest_ecds/management/commands/add_ocr.py b/readux_ingest_ecds/management/commands/add_ocr.py new file mode 100644 index 0000000..452903a --- /dev/null +++ b/readux_ingest_ecds/management/commands/add_ocr.py @@ -0,0 +1,26 @@ +from django.core.management.base import BaseCommand, CommandError +from readux_ingest_ecds.helpers import get_iiif_models +from readux_ingest_ecds.services.ocr_services import add_ocr_to_canvases + +Manifest = get_iiif_models()["Manifest"] + + +class Command(BaseCommand): + help = "(Re)Build OCR for a volume" + + def add_arguments(self, parser): + parser.add_argument( + "volume", type=str, help="PID for volume/manifest to be generated." + ) + + def handle(self, *args, **options): + try: + manifest = Manifest.objects.get(pid=options["volume"]) + except Manifest.DoesNotExist: + raise CommandError('Manifest "%s" does not exist' % options["volume"]) + + add_ocr_to_canvases(manifest) + + self.stdout.write( + self.style.SUCCESS('Successfully closed poll "%s"' % manifest.pid) + ) diff --git a/readux_ingest_ecds/models.py b/readux_ingest_ecds/models.py index 6cad399..bdacf06 100644 --- a/readux_ingest_ecds/models.py +++ b/readux_ingest_ecds/models.py @@ -225,20 +225,23 @@ def create_canvases(self): except IndexError: ocr_file_path = None - new_canvas = Canvas( - manifest=self.manifest, - image_server=self.image_server, - pid=canvas_pid, - ocr_file_path=ocr_file_path, - position=position, - width=width, - height=height, - resource=canvas_pid, - ) + try: + Canvas.objects.get(pid=canvas_pid) + except Canvas.DoesNotExist: + new_canvas = Canvas( + manifest=self.manifest, + image_server=self.image_server, + pid=canvas_pid, + ocr_file_path=ocr_file_path, + position=position, + width=width, + height=height, + resource=canvas_pid, + ) - new_canvas.before_save() + new_canvas.before_save() - new_canvases.append(new_canvas) + new_canvases.append(new_canvas) Canvas.objects.bulk_create(new_canvases) diff --git a/readux_ingest_ecds/services/ocr_services.py b/readux_ingest_ecds/services/ocr_services.py index 93ffb23..7f49760 100644 --- a/readux_ingest_ecds/services/ocr_services.py +++ b/readux_ingest_ecds/services/ocr_services.py @@ -90,7 +90,7 @@ def fetch_positional_ocr(canvas): return fetch_url(url) - if "images.readux.ecds.emory" in canvas.manifest.image_server.server_base: + if "iip.readux.io" in canvas.manifest.image_server.server_base: # Fake TSV data for testing. if environ["DJANGO_ENV"] == "test": fake_tsv = open(path.join(settings.FIXTURE_DIR, "sample.tsv")) @@ -121,7 +121,7 @@ def fetch_positional_ocr(canvas): if ( environ["DJANGO_ENV"] == "test" - and "images.readux.ecds.emory" not in canvas.manifest.image_server.server_base + and "iip.readux.io" not in canvas.manifest.image_server.server_base and canvas.ocr_file_path is None ): fake_json = open(path.join(settings.FIXTURE_DIR, "ocr_words.json"))