Skip to content

Commit

Permalink
Background add OCR manage command for volumes.
Browse files Browse the repository at this point in the history
  • Loading branch information
jayvarner committed Oct 2, 2024
1 parent 85cd4bc commit 038e5c0
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 7 deletions.
28 changes: 21 additions & 7 deletions readux_ingest_ecds/management/commands/add_ocr.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from django.core.management.base import BaseCommand, CommandError
from readux_ingest_ecds.tasks import add_ocr_manage_task
from readux_ingest_ecds.helpers import get_iiif_models
from readux_ingest_ecds.services.ocr_services import (
add_ocr_to_canvases,
Expand All @@ -15,19 +16,32 @@ class Command(BaseCommand):
help = "(Re)Build OCR for a volume or canvas."

def add_arguments(self, parser):
parser.add_argument("--volume", type=str, help="PID for volume/manifest.")

parser.add_argument(
"--volume", type=str, help="PID for volume. Same as --manifest."
)
parser.add_argument(
"--manifest", type=str, help="PID for manifest. Same as --volume."
)
parser.add_argument("--canvas", type=str, help="PID for canvas.")

def handle(self, *args, **options):
if options["volume"]:
if options["volume"] or options["manifest"]:
pid = (
options["volume"]
if options["volume"] is not None
else options["manifest"]
)
try:
manifest = Manifest.objects.get(pid=options["volume"])
manifest = Manifest.objects.get(pid=pid)
except Manifest.DoesNotExist:
raise CommandError(f'Manifest {options["volume"]} does not exist')
raise CommandError(f"Manifest {pid} does not exist")

add_ocr_to_canvases(manifest)
self.stdout.write(self.style.SUCCESS(f"OCR create for {manifest.pid}"))
add_ocr_manage_task.delay(manifest.pid)
self.stdout.write(
self.style.SUCCESS(
f"A background task has started to add OCR to {manifest.pid}. This could take a while depending on volume length. NOTE: The OCR is not necessarily created according to page order."
)
)
elif options["canvas"]:
try:
canvas = Canvas.objects.get(pid=options["canvas"])
Expand Down
11 changes: 11 additions & 0 deletions readux_ingest_ecds/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,14 @@ def s3_ingest_task(ingest_id, *args, **kwargs):
print(ingest_id)
s3_ingest = S3Ingest.objects.get(pk=ingest_id)
s3_ingest.ingest()


@app.task(
name="add_volume_ocr_manage_task",
autoretry_for=(Exception,),
retry_backoff=True,
max_retries=20,
)
def add_ocr_manage_task(volume_pid, *args, **kwargs):
"""Add OCR for Volume/Manifest via Manage Command"""
add_ocr_to_canvases(volume_pid)

0 comments on commit 038e5c0

Please sign in to comment.