Skip to content

Commit

Permalink
Merge pull request #1002 from ecds/feature/958-external-files
Browse files Browse the repository at this point in the history
External files ingest as "related" links (#958) and show them in sidebar (#959)
  • Loading branch information
jayvarner authored Jan 2, 2024
2 parents ad5f135 + b2bde28 commit efb74aa
Show file tree
Hide file tree
Showing 11 changed files with 225 additions and 24 deletions.
6 changes: 4 additions & 2 deletions apps/iiif/choices.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ class Choices():

""" List of Mime type choices. """
MIMETYPES = (
('text/html', 'HTML'),
('text/html', 'HTML or web page'),
('application/json', 'JSON'),
('application/ld+json', 'JSON-LD'),
('application/pdf', 'PDF'),
('text/plain', 'Text'),
('application/xml', 'XML'),
('text/plan', 'Text'),
('application/octet-stream', 'Other'),
)

"""
Expand Down
16 changes: 15 additions & 1 deletion apps/iiif/manifests/admin.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Django admin module for maninfests"""
from django.contrib import admin
from django.http import HttpResponseRedirect
from django.http.request import HttpRequest
from django.urls.conf import path
from import_export import resources, fields
from import_export.admin import ImportExportModelAdmin
from import_export.widgets import ManyToManyWidget, ForeignKeyWidget
from django_summernote.admin import SummernoteModelAdmin
from .models import Manifest, Note, ImageServer
from .models import Manifest, Note, ImageServer, RelatedLink
from .forms import ManifestAdminForm
from .views import AddToCollectionsView
from ..kollections.models import Collection
Expand All @@ -32,6 +33,18 @@ class Meta: # pylint: disable=too-few-public-methods, missing-class-docstring
'pdf', 'metadata', 'attribution', 'logo', 'logo_url', 'license', 'viewingdirection', 'collection_id'
)


class RelatedLinksInline(admin.TabularInline):
model = RelatedLink
exclude = ("id",)
fields = (
"link",
"is_structured_data",
"format",
)
extra = 1
min_num = 0

class ManifestAdmin(ImportExportModelAdmin, SummernoteModelAdmin, admin.ModelAdmin):
"""Django admin configuration for manifests"""
resource_class = ManifestResource
Expand All @@ -42,6 +55,7 @@ class ManifestAdmin(ImportExportModelAdmin, SummernoteModelAdmin, admin.ModelAdm
summernote_fields = ('summary',)
form = ManifestAdminForm
actions = ['add_to_collections_action']
inlines = [RelatedLinksInline]

def add_to_collections_action(self, request, queryset):
"""Action choose manifests to add to collections"""
Expand Down
59 changes: 59 additions & 0 deletions apps/iiif/manifests/migrations/0058_alter_relatedlink.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Generated by Django 3.2.12 on 2023-12-05 16:16

from django.db import migrations, models
import uuid


def populate_is_structured_data(apps, schema_editor):
# Data migration to populate RelatedLink.is_structured_data for existing data
RelatedLink = apps.get_model("manifests", "RelatedLink")
rl_set = RelatedLink.objects.all()
for rl in rl_set:
# Assume all existing RelatedLinks are structured data, since they were previously only
# being added in Remote ingests when there was an existing manifest (which is structured
# data)
rl.is_structured_data = True
RelatedLink.objects.bulk_update(rl_set, ["is_structured_data"], batch_size=1000)


class Migration(migrations.Migration):
dependencies = [
("manifests", "0057_alter_manifest_languages"),
]

operations = [
migrations.AlterField(
model_name="relatedlink",
name="id",
field=models.UUIDField(
default=uuid.uuid4, editable=False, primary_key=True, serialize=False
),
),
migrations.AlterField(
model_name="relatedlink",
name="format",
field=models.CharField(
blank=True,
choices=[
("text/html", "HTML or web page"),
("application/json", "JSON"),
("application/ld+json", "JSON-LD"),
("application/pdf", "PDF"),
("text/plain", "Text"),
("application/xml", "XML"),
("application/octet-stream", "Other"),
],
max_length=255,
null=True,
),
),
migrations.AddField(
model_name="relatedlink",
name="is_structured_data",
field=models.BooleanField(
default=False,
help_text="True if this link is structured data that should appear in the manifest's 'seeAlso' field; if false, the link will appear in the 'related' field instead. Leave unchecked if unsure.",
),
),
migrations.RunPython(populate_is_structured_data, migrations.RunPython.noop),
]
68 changes: 64 additions & 4 deletions apps/iiif/manifests/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,61 @@ def related_links(self):
:return: List of links related to Manifest
:rtype: list
"""
links = [link.link for link in self.relatedlink_set.all()]
links.append(self.get_volume_url())
links = [
{
"@id": link.link,
"format": link.format,
} if link.format else link.link
for link in self.relatedlink_set.all()
]
links.append({
"@id": self.get_volume_url(),
"format": "text/html"
})
return links

@property
def external_links(self):
"""Dict of lists of external links for display on volume pages
:return: Dict of external links ("related" and "seeAlso")
:rtype: dict
"""
# exclude internal links from related link set
related_links = self.relatedlink_set.exclude(
link__icontains=settings.HOSTNAME
)
# dict keys correspond to headings in sidebar
return {
"see_also": [
link.link
for link in related_links
if link.is_structured_data
],
"related": [
link.link
for link in related_links
if not link.is_structured_data
],
}

@property
def see_also_links(self):
"""List of links for IIIF v2 'seeAlso' field (structured data).
:return: List of links to structured data describing Manifest
:rtype: list
"""
return [
{
"@id": link.link,
"format": link.format,
}
if link.format
else link.link
for link in self.relatedlink_set.filter(is_structured_data=True)
]

# TODO: Is this needed? It doesn't seem to be called anywhere.
# Could we just use the label as is?
def autocomplete_label(self):
Expand Down Expand Up @@ -339,9 +390,18 @@ class Note(models.Model):

class RelatedLink(models.Model):
""" Links to related resources """
id = models.UUIDField(primary_key=True, default=uuid4)
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
link = models.CharField(max_length=255)
data_type = models.CharField(max_length=255, default='Dataset')
data_type = models.CharField(
max_length=255,
default='Dataset',
)
is_structured_data = models.BooleanField(
default=False,
help_text="True if this link is structured data that should appear in the manifest's " +
"'seeAlso' field; if false, the link will appear in the 'related' field instead. Leave " +
"unchecked if unsure.",
)
label = GenericRelation(ValueByLanguage)
format = models.CharField(max_length=255, choices=Choices.MIMETYPES, blank=True, null=True)
profile = models.CharField(max_length=255, blank=True, null=True)
Expand Down
4 changes: 2 additions & 2 deletions apps/iiif/manifests/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,9 @@ def test_serialized_related_links(self):
[manifest]
)
)
assert 'seeAlso' not in no_links.keys()
assert not no_links['seeAlso']

link = RelatedLink(link='images.org', manifest=manifest)
link = RelatedLink(link='images.org', manifest=manifest, is_structured_data=True)
link.save()
manifest.refresh_from_db()

Expand Down
5 changes: 2 additions & 3 deletions apps/iiif/serializers/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,9 @@ def get_dump_object(self, obj):
)
)
}
]
],
"seeAlso": obj.see_also_links,
}
if obj.relatedlink_set.exists():
data["seeAlso"] = [related.link for related in obj.relatedlink_set.all()]
return data
return None

Expand Down
42 changes: 34 additions & 8 deletions apps/ingest/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ def clean_metadata(metadata):
:rtype: dict
"""
metadata = {key.casefold().replace(' ', '_'): value for key, value in metadata.items()}
fields = [f.name for f in Manifest._meta.get_fields()]
fields = [
*(f.name for f in Manifest._meta.get_fields()),
"related", # used for related external links
]
invalid_keys = []

for key in metadata.keys():
Expand All @@ -33,13 +36,31 @@ def clean_metadata(metadata):
if key not in fields:
invalid_keys.append(key)

# TODO: Update this method to allow all "invalid" keys to populate Manifest.metadata JSONField
for invalid_key in invalid_keys:
metadata.pop(invalid_key)



return metadata

def create_related_links(manifest, related_str):
"""
Create RelatedLink objects from supplied related links string and associate each with supplied
Manifest. String should consist of semicolon-separated URLs.
:param manifest:
:type related_str: iiif.manifest.models.Manifest
:param related_str:
:type related_str: str
:rtype: None
"""
for link in related_str.split(";"):
(format, _) = guess_type(link)
RelatedLink.objects.create(
manifest=manifest,
link=link,
format=format or "text/html", # assume web page if MIME type cannot be determined
is_structured_data=False, # assume this is not meant for seeAlso
)

def create_manifest(ingest):
"""
Create or update a Manifest from supplied metadata and images.
Expand All @@ -61,7 +82,13 @@ def create_manifest(ingest):
else:
manifest = Manifest.objects.create()
for (key, value) in metadata.items():
setattr(manifest, key, value)
if key == "related":
# add RelatedLinks from metadata spreadsheet key "related"
create_related_links(manifest, value)
else:
# all other keys should exist as fields on Manifest (for now)
setattr(manifest, key, value)
# TODO: if the key doesn't exist on Manifest model, add it to Manifest.metadata
else:
manifest = Manifest()

Expand All @@ -77,13 +104,12 @@ def create_manifest(ingest):
manifest.collections.set(ingest.collections.all())
# Save again once relationship is set
manifest.save()

# if type(ingest, .models.Remote):
if isinstance(ingest, Remote):
else:
RelatedLink(
manifest=manifest,
link=ingest.remote_url,
format='application/ld+json'
format='application/ld+json',
is_structured_data=True,
).save()

return manifest
Expand Down
9 changes: 7 additions & 2 deletions apps/ingest/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from apps.ingest.models import IngestTaskWatcher

from .mail import send_email_on_failure, send_email_on_success
from .services import create_manifest
from .services import create_manifest, create_related_links

# Use `apps.get_model` to avoid circular import error. Because the parameters used to
# create a background task have to be serializable, we can't just pass in the model object.
Expand Down Expand Up @@ -129,7 +129,12 @@ def create_canvases_from_s3_ingest(metadata, ingest_id):
except Manifest.DoesNotExist:
manifest = Manifest.objects.create(pid=pid)
for (key, value) in metadata.items():
setattr(manifest, key, value)
if key == "related":
# add RelatedLinks from metadata spreadsheet key "related"
create_related_links(manifest, value)
else:
# all other keys should exist as fields on Manifest (for now)
setattr(manifest, key, value)
# Image server: set from ingest
ingest = S3Ingest.objects.get(pk=ingest_id)
manifest.image_server = ingest.image_server
Expand Down
17 changes: 17 additions & 0 deletions apps/ingest/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,23 @@ def test_it_creates_mainfest_with_metadata_property(self):
assert local.manifest.pid == '808'
assert local.manifest.title == 'Goodie Mob'

def test_create_related_links(self):
metadata = {
'pid': '808',
'related': 'https://github.com/ecds/readux/tree/develop;https://archive.org/download/cherokeehymnbook00boud/cherokeehymnbook00boud.pdf'
}
local = self.mock_local('no_meta_file.zip', metadata=metadata)
local.manifest = create_manifest(local)
related_links = local.manifest.related_links
# should get 2 from metadata, 1 from volume url
assert len(related_links) == 3
# should get github link format as text/html
assert any([link["@id"] == "https://github.com/ecds/readux/tree/develop" for link in related_links])
assert any([link["format"] == "text/html" for link in related_links])
# should get pdf format too
assert any([link["@id"] == "https://archive.org/download/cherokeehymnbook00boud/cherokeehymnbook00boud.pdf" for link in related_links])
assert any([link["format"] == "application/pdf" for link in related_links])

def test_moving_bulk_bundle_to_s3(self):
"""
It should upload Local.bundle_from_bulk to mock S3 by saving it to
Expand Down
4 changes: 2 additions & 2 deletions apps/ingest/tests/test_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ def test_adding_related_link_to_remote_ingest_manifest(self):
remote_url='https://swoop.net/manifest.json' # pylint: disable=line-too-long
)
manifest = services.create_manifest(remote)
related_link = manifest.relatedlink_set.first()
assert related_link.link == remote.remote_url
related_links = manifest.relatedlink_set.all()
assert any([link.link == remote.remote_url for link in related_links])

httpretty.disable()

Expand Down
19 changes: 19 additions & 0 deletions apps/templates/page.html
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,25 @@ <h2 class="uk-modal-title">Text</h2>
<v-info-content-url-external label="Stable Page" volume="{% url 'volume' volume.pid %}" url="https://{{ request.META.HTTP_HOST }}{% url 'volume' volume.pid %}/page/{{ page.pid }}">
</v-info-content-url-external>
{% endif %}

{% if volume.external_links.see_also %}
<v-info-content-url-multiple label="See Also">
{% for link in volume.external_links.see_also %}
<v-info-content-url-unit url="{{ link }}">
</v-info-content-url-unit>
{% endfor %}
</v-info-content-url-multiple>
{% endif %}

{% if volume.external_links.related %}
<v-info-content-url-multiple label="External Links">
{% for link in volume.external_links.related %}
<v-info-content-url-unit url="{{ link }}">
</v-info-content-url-unit>
{% endfor %}
</v-info-content-url-multiple>
{% endif %}

</div>
</div>

Expand Down

0 comments on commit efb74aa

Please sign in to comment.