Skip to content

Commit

Permalink
Don't make duplicate canvases
Browse files Browse the repository at this point in the history
  • Loading branch information
jayvarner committed Oct 1, 2024
1 parent 9ed597e commit 8501c83
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 14 deletions.
26 changes: 26 additions & 0 deletions readux_ingest_ecds/management/commands/add_ocr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from django.core.management.base import BaseCommand, CommandError
from readux_ingest_ecds.helpers import get_iiif_models
from readux_ingest_ecds.services.ocr_services import add_ocr_to_canvases

Manifest = get_iiif_models()["Manifest"]


class Command(BaseCommand):
help = "(Re)Build OCR for a volume"

def add_arguments(self, parser):
parser.add_argument(
"volume", type=str, help="PID for volume/manifest to be generated."
)

def handle(self, *args, **options):
try:
manifest = Manifest.objects.get(pid=options["volume"])
except Manifest.DoesNotExist:
raise CommandError('Manifest "%s" does not exist' % options["volume"])

add_ocr_to_canvases(manifest)

self.stdout.write(
self.style.SUCCESS('Successfully closed poll "%s"' % manifest.pid)
)
27 changes: 15 additions & 12 deletions readux_ingest_ecds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,20 +225,23 @@ def create_canvases(self):
except IndexError:
ocr_file_path = None

new_canvas = Canvas(
manifest=self.manifest,
image_server=self.image_server,
pid=canvas_pid,
ocr_file_path=ocr_file_path,
position=position,
width=width,
height=height,
resource=canvas_pid,
)
try:
Canvas.objects.get(pid=canvas_pid)
except Canvas.DoesNotExist:
new_canvas = Canvas(
manifest=self.manifest,
image_server=self.image_server,
pid=canvas_pid,
ocr_file_path=ocr_file_path,
position=position,
width=width,
height=height,
resource=canvas_pid,
)

new_canvas.before_save()
new_canvas.before_save()

new_canvases.append(new_canvas)
new_canvases.append(new_canvas)

Canvas.objects.bulk_create(new_canvases)

Expand Down
4 changes: 2 additions & 2 deletions readux_ingest_ecds/services/ocr_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def fetch_positional_ocr(canvas):

return fetch_url(url)

if "images.readux.ecds.emory" in canvas.manifest.image_server.server_base:
if "iip.readux.io" in canvas.manifest.image_server.server_base:
# Fake TSV data for testing.
if environ["DJANGO_ENV"] == "test":
fake_tsv = open(path.join(settings.FIXTURE_DIR, "sample.tsv"))
Expand Down Expand Up @@ -121,7 +121,7 @@ def fetch_positional_ocr(canvas):

if (
environ["DJANGO_ENV"] == "test"
and "images.readux.ecds.emory" not in canvas.manifest.image_server.server_base
and "iip.readux.io" not in canvas.manifest.image_server.server_base
and canvas.ocr_file_path is None
):
fake_json = open(path.join(settings.FIXTURE_DIR, "ocr_words.json"))
Expand Down

0 comments on commit 8501c83

Please sign in to comment.