From 6e33ed92cf6ae1afd62c94ac40ddd49cbae59b93 Mon Sep 17 00:00:00 2001 From: Sean Kelly Date: Wed, 18 Oct 2023 14:40:15 -0500 Subject: [PATCH] Possible fix for #290 - Adds an upgrade command `edrn_nukephotos` which should be run once at upgrade time to nuke all the PI photos that have been duplicated over time - Adjusts site ingest so that it checks for an existing photo first and re-uses it - Changes name of photo to include not just surname but also ID number so we can better re-use photos - Adds update steps to cbiit-deploy, cbiit-deploy-prod, and devrebuild --- .../management/commands/edrn_nukephotos.py | 40 +++++++++++++++++++ .../migrations/0013_alter_person_photo.py | 20 ++++++++++ .../migrations/0014_alter_person_photo.py | 20 ++++++++++ src/eke.knowledge/src/eke/knowledge/sites.py | 22 ++++++---- support/cbiit-deploy-prod.sh | 8 +++- support/cbiit-deploy.sh | 8 +++- support/devrebuild.sh | 5 +-- 7 files changed, 110 insertions(+), 13 deletions(-) create mode 100644 src/eke.knowledge/src/eke/knowledge/management/commands/edrn_nukephotos.py create mode 100644 src/eke.knowledge/src/eke/knowledge/migrations/0013_alter_person_photo.py create mode 100644 src/eke.knowledge/src/eke/knowledge/migrations/0014_alter_person_photo.py diff --git a/src/eke.knowledge/src/eke/knowledge/management/commands/edrn_nukephotos.py b/src/eke.knowledge/src/eke/knowledge/management/commands/edrn_nukephotos.py new file mode 100644 index 00000000..2b8f0ceb --- /dev/null +++ b/src/eke.knowledge/src/eke/knowledge/management/commands/edrn_nukephotos.py @@ -0,0 +1,40 @@ +# encoding: utf-8 + +'''😌 EDRN Site Content: nuke photos.''' + +from django.conf import settings +from django.core.management.base import BaseCommand +from wagtail.images.models import Image + + +class Command(BaseCommand): + help = 'Delete specific instances of duplicate photos' + + _to_nuke = ( + 'Photo of Chinnaiyan', + 'Photo of Feng', + 'Photo of Hanash', + 'Photo of Semmes', + 'Photo of Srivastava', + 'Photo of Stass', + ) + + def nuke_photos(self): + for title in self._to_nuke: + count = Image.objects.filter(title=title).count() + if count > 0: + self.stdout.write(f'Deleting {count} instance(s) of {title}') + Image.objects.filter(title=title).delete() + + def handle(self, *args, **options): + self.stdout.write('Deleting specific instances of duplicate photos') + + old = getattr(settings, 'WAGTAILREDIRECTS_AUTO_CREATE', True) + try: + settings.WAGTAILREDIRECTS_AUTO_CREATE = False + settings.WAGTAILSEARCH_BACKENDS['default']['AUTO_UPDATE'] = False + self.nuke_photos() + finally: + settings.WAGTAILREDIRECTS_AUTO_CREATE = old + settings.WAGTAILSEARCH_BACKENDS['default']['AUTO_UPDATE'] = True + self.stdout.write("Job's done!") diff --git a/src/eke.knowledge/src/eke/knowledge/migrations/0013_alter_person_photo.py b/src/eke.knowledge/src/eke/knowledge/migrations/0013_alter_person_photo.py new file mode 100644 index 00000000..8d574609 --- /dev/null +++ b/src/eke.knowledge/src/eke/knowledge/migrations/0013_alter_person_photo.py @@ -0,0 +1,20 @@ +# Generated by Django 4.1.9 on 2023-10-12 20:18 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('wagtailimages', '0025_alter_image_file_alter_rendition_file'), + ('ekeknowledge', '0012_datacollectionindex_preamble'), + ] + + operations = [ + migrations.AlterField( + model_name='person', + name='photo', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='person_photograph', to='wagtailimages.image'), + ), + ] diff --git a/src/eke.knowledge/src/eke/knowledge/migrations/0014_alter_person_photo.py b/src/eke.knowledge/src/eke/knowledge/migrations/0014_alter_person_photo.py new file mode 100644 index 00000000..376bcd03 --- /dev/null +++ b/src/eke.knowledge/src/eke/knowledge/migrations/0014_alter_person_photo.py @@ -0,0 +1,20 @@ +# Generated by Django 4.1.9 on 2023-10-12 21:17 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('wagtailimages', '0025_alter_image_file_alter_rendition_file'), + ('ekeknowledge', '0013_alter_person_photo'), + ] + + operations = [ + migrations.AlterField( + model_name='person', + name='photo', + field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='person_photograph', to='wagtailimages.image'), + ), + ] diff --git a/src/eke.knowledge/src/eke/knowledge/sites.py b/src/eke.knowledge/src/eke/knowledge/sites.py index 71da4a0f..bc169af7 100644 --- a/src/eke.knowledge/src/eke/knowledge/sites.py +++ b/src/eke.knowledge/src/eke/knowledge/sites.py @@ -96,23 +96,29 @@ def _get_file_name(self, url: str) -> str: def _get_surname(self, predicates: dict) -> str: name = str(predicates.get(_surname_predicate_uri, [''])[0]) - if not name: - name = str(predicates.get(rdflib.term.URIRef('urn:internal:id'), [''])[0]) - if not name: - name = '«unknown»' - return name + int_id = str(predicates.get(rdflib.term.URIRef('urn:internal:id'), [''])[0]) + name = name if name else '«unknown name»' + int_id = int_id if int_id else '«unknown id»' + return f'{name} ({int_id})' def compute_new_value(self, modelField: Field, value: str, predicates: dict) -> object: - # Curiously, we can't pass a URL stream to ImageFile, since Django's ImageFile expects - # to be able to do seek() operations on it. So we have to download to a temporary file: name = self._get_surname(predicates) + image_title = f'Photograph of {name}' + + # #290 check if the photo already exists and use it + existing_image = Image.objects.filter(title=image_title).first() + if existing_image: return existing_image + + # Not found, so create it from scratch given the URL in ``value`` try: + # Curiously, we can't pass a URL stream to ImageFile, since Django's ImageFile expects + # to be able to do seek() operations on it. So we have to download to a temporary file. with tempfile.TemporaryFile() as out_file: with urlopen(value) as image_stream: out_file.write(image_stream.read()) image_file = ImageFile(out_file, name=self._get_file_name(value)) image = Image( - title=f'Photo of {name}', file=image_file, + title=image_title, file=image_file, # These values only make sense for the photos from the DMCC: focal_point_x=66, focal_point_y=52, focal_point_height=76, focal_point_width=57 ) diff --git a/support/cbiit-deploy-prod.sh b/support/cbiit-deploy-prod.sh index 9f630413..9444551d 100755 --- a/support/cbiit-deploy-prod.sh +++ b/support/cbiit-deploy-prod.sh @@ -137,7 +137,13 @@ docker compose --project-name edrn start portal" || exit 1 echo "" echo "🆙 Applying upgrades" ssh -q $USER@$WEBSERVER "cd $WEBROOT ; \ -docker compose --project-name edrn exec portal django-admin edrn_explorer" || exit 1 +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos" || exit 1 echo "" echo "🤷‍♀️ Final portal restart and restart of search engine" diff --git a/support/cbiit-deploy.sh b/support/cbiit-deploy.sh index 66fe21d7..cbcb5c44 100755 --- a/support/cbiit-deploy.sh +++ b/support/cbiit-deploy.sh @@ -142,7 +142,13 @@ docker compose --project-name edrn start portal" || exit 1 echo "" echo "🆙 Applying upgrades" ssh -q $USER@$WEBSERVER "cd $WEBROOT ; \ -docker compose --project-name edrn exec portal django-admin edrn_explorer" || exit 1 +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos ||\ +docker compose --project-name edrn exec portal django-admin edrn_nukephotos" || exit 1 echo "" echo "🤷‍♀️ Final portal restart and restart of search engine" diff --git a/support/devrebuild.sh b/support/devrebuild.sh index 4e3904b8..86ceb847 100755 --- a/support/devrebuild.sh +++ b/support/devrebuild.sh @@ -6,7 +6,7 @@ # Download the latest production database, apply migrations, and get ready to rock and roll. -jpl_sys_ipv4=172.16.16.18 +jpl_sys_ipv4=172.16.16.25 # Argument check @@ -57,8 +57,7 @@ bzip2 --decompress --stdout edrn.sql.bz2 | psql --dbname=edrn --echo-errors --qu ./manage.sh edrndevreset # Add additional upgrade steps here: -./manage.sh edrn_explorer - +./manage.sh edrn_nukephotos # 6.7 # This may be optional if you want to save time: ./manage.sh rdfingest