Skip to content

Commit

Permalink
Add OCR on ingest
Browse files Browse the repository at this point in the history
  • Loading branch information
jayvarner committed Jan 25, 2024
1 parent 1654000 commit 37f3823
Show file tree
Hide file tree
Showing 36 changed files with 6,083 additions and 392 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
copyright license to reproduce, prep Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.

Expand Down
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
include LICENSE
include README.md
recursive-include readux_ingest_ecds/templates *
recursive-include readux_ingest_ecds/services *
prune test*
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ python manage.py migrate readux_ingest_ecds
| IIIF_RELATED_LINK_MODEL | Model reference, eg. 'iiif.RelatedLink' |
| IIIF_CANVAS_MODEL | Model reference, eg. 'iiif.Canvas' |
| IIIF_COLLECTION_MODEL | Model reference, eg. 'iiif.Collection' |
| IIIF_OCR_MODEL | Model reference, eg. 'iiif.OCR' |
| INGEST_TMP_DIR | Absolute path where files will be temporarily stored. |
| INGEST_PROCESSING_DIR | Absolute path where Lambda will look for images. |
| INGEST_OCR_DIR | Absolute path where OCR files will be preserved. |
Expand Down
11 changes: 4 additions & 7 deletions readux_ingest_ecds/admin.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import os
import logging
from django.contrib import admin
from django.urls import reverse
from django.utils.html import format_html
from django.shortcuts import redirect
from django_celery_results.models import TaskResult
from .models import Local
from .tasks import local_ingest_task_ecds

Expand All @@ -18,15 +15,15 @@ class LocalAdmin(admin.ModelAdmin):
def save_model(self, request, obj, form, change):
LOGGER.info(f'INGEST: Local ingest started by {request.user.username}')
obj.creator = request.user
obj.process()
obj.prep()
super().save_model(request, obj, form, change)

def response_add(self, request, obj, post_url_continue=None):
obj.refresh_from_db()
if os.environ["DJANGO_ENV"] != 'test': # pragma: no cover
local_ingest_task_ecds.apply_async(args=[obj.id])
else:
local_ingest_task_ecds(obj.id)

def response_add(self, request, obj, post_url_continue=None):
obj.refresh_from_db()
LOGGER.info(f'INGEST: Local ingest - {obj.id} - added for {obj.manifest.pid}')
return redirect('/admin/manifests/manifest/{m}/change/'.format(m=obj.manifest.pk))

Expand Down
2 changes: 2 additions & 0 deletions readux_ingest_ecds/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ def get_iiif_models():
'RelatedLink': apps.get_model(settings.IIIF_RELATED_LINK_MODEL),
'Canvas': apps.get_model(settings.IIIF_CANVAS_MODEL),
'Collection': apps.get_model(settings.IIIF_COLLECTION_MODEL),
'OCR': apps.get_model(settings.IIIF_OCR_MODEL),
}
except AppRegistryNotReady:
return {
Expand All @@ -18,4 +19,5 @@ def get_iiif_models():
'RelatedLink': settings.IIIF_RELATED_LINK_MODEL,
'Canvas': settings.IIIF_CANVAS_MODEL,
'Collection': settings.IIIF_COLLECTION_MODEL,
'OCR': settings.IIIF_OCR_MODEL,
}
7 changes: 5 additions & 2 deletions readux_ingest_ecds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from django.core.files.storage import FileSystemStorage
from django.db import models
from django.conf import settings
from .services import is_image, is_ocr, is_junk, metadata_from_file, create_manifest, move_image_file, move_ocr_file, canvas_dimensions, upload_trigger_file
from .services.file_services import is_image, is_ocr, is_junk, move_image_file, move_ocr_file, canvas_dimensions, upload_trigger_file
from .services.iiif_services import create_manifest
from .services.metadata_services import metadata_from_file
from .helpers import get_iiif_models

Manifest = get_iiif_models()['Manifest']
Expand Down Expand Up @@ -74,12 +76,13 @@ def ocr_directory(self):
def trigger_file(self):
return os.path.join(settings.INGEST_TMP_DIR, f'{self.manifest.pid}.txt')

def process(self):
def prep(self):
"""
Open metadata
Create manifest
Unzip bundle
"""
LOGGER.info(f'INGEST: Local ingest - preparing new local ingest')
os.makedirs(settings.INGEST_TMP_DIR, exist_ok=True)
os.makedirs(settings.INGEST_PROCESSING_DIR, exist_ok=True)
os.makedirs(settings.INGEST_OCR_DIR, exist_ok=True)
Expand Down
Loading

0 comments on commit 37f3823

Please sign in to comment.