Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

External files ingest as "related" links (#958) and show them in sidebar (#959) #1002

Merged
merged 11 commits into from
Jan 2, 2024
6 changes: 4 additions & 2 deletions apps/iiif/choices.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ class Choices():

""" List of Mime type choices. """
MIMETYPES = (
('text/html', 'HTML'),
('text/html', 'HTML or web page'),
('application/json', 'JSON'),
('application/ld+json', 'JSON-LD'),
('application/pdf', 'PDF'),
('text/plain', 'Text'),
('application/xml', 'XML'),
('text/plan', 'Text'),
('application/octet-stream', 'Other'),
)

"""
Expand Down
16 changes: 15 additions & 1 deletion apps/iiif/manifests/admin.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
"""Django admin module for maninfests"""
from django.contrib import admin
from django.http import HttpResponseRedirect
from django.http.request import HttpRequest
from django.urls.conf import path
from import_export import resources, fields
from import_export.admin import ImportExportModelAdmin
from import_export.widgets import ManyToManyWidget, ForeignKeyWidget
from django_summernote.admin import SummernoteModelAdmin
from .models import Manifest, Note, ImageServer
from .models import Manifest, Note, ImageServer, RelatedLink
from .forms import ManifestAdminForm
from .views import AddToCollectionsView
from ..kollections.models import Collection
Expand All @@ -32,6 +33,18 @@ class Meta: # pylint: disable=too-few-public-methods, missing-class-docstring
'pdf', 'metadata', 'attribution', 'logo', 'logo_url', 'license', 'viewingdirection', 'collection_id'
)


class RelatedLinksInline(admin.TabularInline):
model = RelatedLink
exclude = ("id",)
fields = (
"link",
"is_structured_data",
"format",
)
extra = 1
min_num = 0

class ManifestAdmin(ImportExportModelAdmin, SummernoteModelAdmin, admin.ModelAdmin):
"""Django admin configuration for manifests"""
resource_class = ManifestResource
Expand All @@ -42,6 +55,7 @@ class ManifestAdmin(ImportExportModelAdmin, SummernoteModelAdmin, admin.ModelAdm
summernote_fields = ('summary',)
form = ManifestAdminForm
actions = ['add_to_collections_action']
inlines = [RelatedLinksInline]

def add_to_collections_action(self, request, queryset):
"""Action choose manifests to add to collections"""
Expand Down
59 changes: 59 additions & 0 deletions apps/iiif/manifests/migrations/0058_alter_relatedlink.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Generated by Django 3.2.12 on 2023-12-05 16:16

from django.db import migrations, models
import uuid


def populate_is_structured_data(apps, schema_editor):
# Data migration to populate RelatedLink.is_structured_data for existing data
RelatedLink = apps.get_model("manifests", "RelatedLink")
rl_set = RelatedLink.objects.all()
for rl in rl_set:
# Assume all existing RelatedLinks are structured data, since they were previously only
# being added in Remote ingests when there was an existing manifest (which is structured
# data)
rl.is_structured_data = True
RelatedLink.objects.bulk_update(rl_set, ["is_structured_data"], batch_size=1000)


class Migration(migrations.Migration):
dependencies = [
("manifests", "0057_alter_manifest_languages"),
]

operations = [
migrations.AlterField(
model_name="relatedlink",
name="id",
field=models.UUIDField(
default=uuid.uuid4, editable=False, primary_key=True, serialize=False
),
),
migrations.AlterField(
model_name="relatedlink",
name="format",
field=models.CharField(
blank=True,
choices=[
("text/html", "HTML or web page"),
("application/json", "JSON"),
("application/ld+json", "JSON-LD"),
("application/pdf", "PDF"),
("text/plain", "Text"),
("application/xml", "XML"),
("application/octet-stream", "Other"),
],
max_length=255,
null=True,
),
),
migrations.AddField(
model_name="relatedlink",
name="is_structured_data",
field=models.BooleanField(
default=False,
help_text="True if this link is structured data that should appear in the manifest's 'seeAlso' field; if false, the link will appear in the 'related' field instead. Leave unchecked if unsure.",
),
),
migrations.RunPython(populate_is_structured_data, migrations.RunPython.noop),
]
68 changes: 64 additions & 4 deletions apps/iiif/manifests/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,61 @@ def related_links(self):
:return: List of links related to Manifest
:rtype: list
"""
links = [link.link for link in self.relatedlink_set.all()]
links.append(self.get_volume_url())
links = [
{
"@id": link.link,
"format": link.format,
} if link.format else link.link
for link in self.relatedlink_set.all()
]
links.append({
"@id": self.get_volume_url(),
"format": "text/html"
})
return links

@property
def external_links(self):
"""Dict of lists of external links for display on volume pages

:return: Dict of external links ("related" and "seeAlso")
:rtype: dict
"""
# exclude internal links from related link set
related_links = self.relatedlink_set.exclude(
link__icontains=settings.HOSTNAME
)
# dict keys correspond to headings in sidebar
return {
"see_also": [
link.link
for link in related_links
if link.is_structured_data
],
"related": [
link.link
for link in related_links
if not link.is_structured_data
],
}

@property
def see_also_links(self):
"""List of links for IIIF v2 'seeAlso' field (structured data).

:return: List of links to structured data describing Manifest
:rtype: list
"""
return [
{
"@id": link.link,
"format": link.format,
}
if link.format
else link.link
for link in self.relatedlink_set.filter(is_structured_data=True)
]

# TODO: Is this needed? It doesn't seem to be called anywhere.
# Could we just use the label as is?
def autocomplete_label(self):
Expand Down Expand Up @@ -339,9 +390,18 @@ class Note(models.Model):

class RelatedLink(models.Model):
""" Links to related resources """
id = models.UUIDField(primary_key=True, default=uuid4)
id = models.UUIDField(primary_key=True, default=uuid4, editable=False)
link = models.CharField(max_length=255)
data_type = models.CharField(max_length=255, default='Dataset')
data_type = models.CharField(
max_length=255,
default='Dataset',
)
is_structured_data = models.BooleanField(
default=False,
help_text="True if this link is structured data that should appear in the manifest's " +
"'seeAlso' field; if false, the link will appear in the 'related' field instead. Leave " +
"unchecked if unsure.",
)
label = GenericRelation(ValueByLanguage)
format = models.CharField(max_length=255, choices=Choices.MIMETYPES, blank=True, null=True)
profile = models.CharField(max_length=255, blank=True, null=True)
Expand Down
4 changes: 2 additions & 2 deletions apps/iiif/manifests/tests/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,9 +175,9 @@ def test_serialized_related_links(self):
[manifest]
)
)
assert 'seeAlso' not in no_links.keys()
assert not no_links['seeAlso']

link = RelatedLink(link='images.org', manifest=manifest)
link = RelatedLink(link='images.org', manifest=manifest, is_structured_data=True)
link.save()
manifest.refresh_from_db()

Expand Down
5 changes: 2 additions & 3 deletions apps/iiif/serializers/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,9 @@ def get_dump_object(self, obj):
)
)
}
]
],
"seeAlso": obj.see_also_links,
}
if obj.relatedlink_set.exists():
data["seeAlso"] = [related.link for related in obj.relatedlink_set.all()]
return data
return None

Expand Down
42 changes: 34 additions & 8 deletions apps/ingest/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ def clean_metadata(metadata):
:rtype: dict
"""
metadata = {key.casefold().replace(' ', '_'): value for key, value in metadata.items()}
fields = [f.name for f in Manifest._meta.get_fields()]
fields = [
*(f.name for f in Manifest._meta.get_fields()),
"related", # used for related external links
]
invalid_keys = []

for key in metadata.keys():
Expand All @@ -33,13 +36,31 @@ def clean_metadata(metadata):
if key not in fields:
invalid_keys.append(key)

# TODO: Update this method to allow all "invalid" keys to populate Manifest.metadata JSONField
for invalid_key in invalid_keys:
metadata.pop(invalid_key)



return metadata

def create_related_links(manifest, related_str):
"""
Create RelatedLink objects from supplied related links string and associate each with supplied
Manifest. String should consist of semicolon-separated URLs.
:param manifest:
:type related_str: iiif.manifest.models.Manifest
:param related_str:
:type related_str: str
:rtype: None
"""
for link in related_str.split(";"):
(format, _) = guess_type(link)
RelatedLink.objects.create(
manifest=manifest,
link=link,
format=format or "text/html", # assume web page if MIME type cannot be determined
is_structured_data=False, # assume this is not meant for seeAlso
)

def create_manifest(ingest):
"""
Create or update a Manifest from supplied metadata and images.
Expand All @@ -61,7 +82,13 @@ def create_manifest(ingest):
else:
manifest = Manifest.objects.create()
for (key, value) in metadata.items():
setattr(manifest, key, value)
if key == "related":
# add RelatedLinks from metadata spreadsheet key "related"
create_related_links(manifest, value)
else:
# all other keys should exist as fields on Manifest (for now)
setattr(manifest, key, value)
# TODO: if the key doesn't exist on Manifest model, add it to Manifest.metadata
else:
manifest = Manifest()

Expand All @@ -77,13 +104,12 @@ def create_manifest(ingest):
manifest.collections.set(ingest.collections.all())
# Save again once relationship is set
manifest.save()

# if type(ingest, .models.Remote):
if isinstance(ingest, Remote):
else:
RelatedLink(
manifest=manifest,
link=ingest.remote_url,
format='application/ld+json'
format='application/ld+json',
is_structured_data=True,
).save()

return manifest
Expand Down
9 changes: 7 additions & 2 deletions apps/ingest/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from apps.ingest.models import IngestTaskWatcher

from .mail import send_email_on_failure, send_email_on_success
from .services import create_manifest
from .services import create_manifest, create_related_links

# Use `apps.get_model` to avoid circular import error. Because the parameters used to
# create a background task have to be serializable, we can't just pass in the model object.
Expand Down Expand Up @@ -129,7 +129,12 @@ def create_canvases_from_s3_ingest(metadata, ingest_id):
except Manifest.DoesNotExist:
manifest = Manifest.objects.create(pid=pid)
for (key, value) in metadata.items():
setattr(manifest, key, value)
if key == "related":
# add RelatedLinks from metadata spreadsheet key "related"
create_related_links(manifest, value)
else:
# all other keys should exist as fields on Manifest (for now)
setattr(manifest, key, value)
# Image server: set from ingest
ingest = S3Ingest.objects.get(pk=ingest_id)
manifest.image_server = ingest.image_server
Expand Down
17 changes: 17 additions & 0 deletions apps/ingest/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,23 @@ def test_it_creates_mainfest_with_metadata_property(self):
assert local.manifest.pid == '808'
assert local.manifest.title == 'Goodie Mob'

def test_create_related_links(self):
metadata = {
'pid': '808',
'related': 'https://github.com/ecds/readux/tree/develop;https://archive.org/download/cherokeehymnbook00boud/cherokeehymnbook00boud.pdf'
}
local = self.mock_local('no_meta_file.zip', metadata=metadata)
local.manifest = create_manifest(local)
related_links = local.manifest.related_links
# should get 2 from metadata, 1 from volume url
assert len(related_links) == 3
# should get github link format as text/html
assert any([link["@id"] == "https://github.com/ecds/readux/tree/develop" for link in related_links])
assert any([link["format"] == "text/html" for link in related_links])
# should get pdf format too
assert any([link["@id"] == "https://archive.org/download/cherokeehymnbook00boud/cherokeehymnbook00boud.pdf" for link in related_links])
assert any([link["format"] == "application/pdf" for link in related_links])

def test_moving_bulk_bundle_to_s3(self):
"""
It should upload Local.bundle_from_bulk to mock S3 by saving it to
Expand Down
4 changes: 2 additions & 2 deletions apps/ingest/tests/test_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ def test_adding_related_link_to_remote_ingest_manifest(self):
remote_url='https://swoop.net/manifest.json' # pylint: disable=line-too-long
)
manifest = services.create_manifest(remote)
related_link = manifest.relatedlink_set.first()
assert related_link.link == remote.remote_url
related_links = manifest.relatedlink_set.all()
assert any([link.link == remote.remote_url for link in related_links])

httpretty.disable()

Expand Down
19 changes: 19 additions & 0 deletions apps/templates/page.html
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,25 @@ <h2 class="uk-modal-title">Text</h2>
<v-info-content-url-external label="Stable Page" volume="{% url 'volume' volume.pid %}" url="https://{{ request.META.HTTP_HOST }}{% url 'volume' volume.pid %}/page/{{ page.pid }}">
</v-info-content-url-external>
{% endif %}

{% if volume.external_links.see_also %}
<v-info-content-url-multiple label="See Also">
{% for link in volume.external_links.see_also %}
<v-info-content-url-unit url="{{ link }}">
</v-info-content-url-unit>
{% endfor %}
</v-info-content-url-multiple>
{% endif %}

{% if volume.external_links.related %}
<v-info-content-url-multiple label="External Links">
{% for link in volume.external_links.related %}
<v-info-content-url-unit url="{{ link }}">
</v-info-content-url-unit>
{% endfor %}
</v-info-content-url-multiple>
{% endif %}

</div>
</div>

Expand Down