Skip to content

Commit

Permalink
Support #377 (and fold in #374)
Browse files Browse the repository at this point in the history
  • Loading branch information
nutjob4life committed Nov 8, 2024
1 parent f6315d5 commit 2a4ea71
Show file tree
Hide file tree
Showing 7 changed files with 185 additions and 16 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# encoding: utf-8

'''💁‍♀️ EDRN Knowledge Environment: add months to publications.'''


from django.conf import settings
from django.core.management.base import BaseCommand
from eke.knowledge.models import Publication


class Command(BaseCommand):
help = 'Add months to publications'

def fix_publications(self):
pmids = Publication.objects.filter(month='').values_list('pubMedID', flat=True)
breakpoint()

def handle(self, *args, **options):
self.stdout.write('Adding months to publications')

old = getattr(settings, 'WAGTAILREDIRECTS_AUTO_CREATE', True)
try:
settings.WAGTAILREDIRECTS_AUTO_CREATE = False
settings.WAGTAILSEARCH_BACKENDS['default']['AUTO_UPDATE'] = False
self.fix_publications()
finally:
settings.WAGTAILREDIRECTS_AUTO_CREATE = old
settings.WAGTAILSEARCH_BACKENDS['default']['AUTO_UPDATE'] = True
self.stdout.write("Job's done!")
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 4.2.16 on 2024-11-08 17:28

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("ekeknowledge", "0019_remove_protocol_fieldofresearch_and_more"),
]

operations = [
migrations.AddField(
model_name="publication",
name="pis",
field=models.CharField(
blank=True, help_text="EDRN PIs who wrote this", max_length=250
),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 4.2.16 on 2024-11-08 18:54

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("ekeknowledge", "0020_publication_pis"),
]

operations = [
migrations.AddField(
model_name="publication",
name="month",
field=models.CharField(
blank=True, help_text="Month of publication", max_length=10
),
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 4.2.16 on 2024-11-08 20:35

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("ekeknowledge", "0021_publication_month"),
]

operations = [
migrations.AlterField(
model_name="publication",
name="month",
field=models.CharField(
blank=True, help_text="Month of publication", max_length=16
),
),
]
83 changes: 75 additions & 8 deletions src/eke.knowledge/src/eke/knowledge/publications.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,18 @@


class PMCID(models.Model):
'''PMCID and PMID correlation.
In Medline's Entrez system, PMID (PubMed Identifier) is a unique identifier assigned to
articles in PubMed. It doesn't indicate if the article is free or accessible, just that it
exists.
A PubMed Central Identifier (PMCID) is a free digital archive of full-text biomedical
articles. If there's a PMCID, the article's full-text is free through PubMed Central.
This correlation is used by the ``pubmed_papers`` command to extract article texts for
use in AI training.
'''
pmid = models.CharField(max_length=20, blank=True, null=False, help_text='Entrez Medline PMID code number')
pmcid = models.CharField(max_length=20, blank=True, null=False, help_text='Entrez Medline PMCID code number')
class Meta:
Expand All @@ -50,7 +62,9 @@ class Publication(KnowledgeObject):
journal = models.CharField(max_length=250, blank=True, null=False, help_text='Name of the periodical')
pubMedID = models.CharField(max_length=20, blank=True, null=False, help_text='Entrez Medline ID code number')
year = models.IntegerField(blank=True, null=True, help_text='Year of publication')
month = models.CharField(max_length=16, blank=True, null=False, help_text='Month of publication')
pubURL = models.URLField(blank=True, null=False, help_text='URL to read the publication')
pis = models.CharField(max_length=250, blank=True, null=False, help_text='EDRN PIs who wrote this')

# siteID should be deleted and use the site_that_wrote_this relation instead
siteID = models.CharField(
Expand All @@ -66,16 +80,19 @@ class Publication(KnowledgeObject):

def data_table(self) -> dict:
'''Return the JSON-compatible dictionary describing this publication.'''
return {'journal': self.journal, 'year': self.year, **super().data_table()}
return {'journal': self.journal, 'year': self.year, 'pis': self.pis, **super().data_table()}

def get_context(self, request: HttpRequest, *args, **kwargs) -> dict:
context = super().get_context(request, args, kwargs)
appearances = []
if self.journal: appearances.append(self.journal)
if self.year: appearances.append(str(self.year))
if self.volume: appearances.append(self.volume)
if self.year and self.month:
appearances.append(f'{self.year} {self.month}')
elif self.year:
appearances.append(str(self.year))
if self.volume: appearances.append(f'volume {self.volume}')
appearances = ', '.join(appearances)
if self.issue: appearances += ' ({})'.format(html_escape(self.issue))
if self.issue: appearances += ' (issue {})'.format(html_escape(self.issue))
context['appearance'] = appearances

biomarkers = set([i for i in self.ekebiomarkers_biomarker_in_print.all()])
Expand All @@ -91,7 +108,10 @@ def get_context(self, request: HttpRequest, *args, **kwargs) -> dict:
protocols = self.protocols.all().order_by('title')
context['protocols'] = protocols
context['num_protocols'] = protocols.count()

from .sites import Person
context['edrn_pis'] = Person.objects.filter(
pk__in=self.site_that_wrote_this.all().values_list('pi')
).order_by('title')
return context

content_panels = KnowledgeObject.content_panels + [
Expand All @@ -100,8 +120,10 @@ def get_context(self, request: HttpRequest, *args, **kwargs) -> dict:
FieldPanel('journal'),
FieldPanel('pubMedID'),
FieldPanel('year'),
FieldPanel('month'),
FieldPanel('pubURL'),
FieldPanel('abstract'),
FieldPanel('pis'),
InlinePanel('authors', label='Authors')
]
search_fields = KnowledgeObject.search_fields + [
Expand Down Expand Up @@ -325,9 +347,10 @@ def divide():
abstract = ''
issue = str(record['MedlineCitation']['Article']['Journal']['JournalIssue'].get('Issue'))
year = str(record['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate'].get('Year'))
month = str(record['MedlineCitation']['Article']['Journal']['JournalIssue']['PubDate'].get('Month'))
journal = str(record['MedlineCitation']['Article']['Journal']['ISOAbbreviation'])
authors = self.get_authors(record)
details[pubmed_id] = (title, abstract, issue, year, journal, authors)
details[pubmed_id] = (title, abstract, issue, year, month, journal, authors)
break
except (HTTPError, HTTPException) as ex:
failures += 1
Expand Down Expand Up @@ -367,6 +390,26 @@ def associate_publication(self, publication: Publication, pmids_to_sites: dict,
if modifications: publication.save()
return modifications

def add_missing_months(self):
monthless = Publication.objects.filter(month='')
if monthless.count() == 0:
_logger.info('No missing months on publications')
return

_logger.info('Querying pubmed to find missing months on %d publications', monthless.count())
details = self.get_pubmed_details(set(monthless.values_list('pubMedID', flat=True)))
for pub in monthless:
deets = details.get(pub.pubMedID, None)
if not deets:
_logger.warning('No pubmed info found for PMID «%s»; cannot update its month', pub.pubMedID)
continue
title, abstract, issue, year, month, journal, authors = deets
if month is not None and month != 'None':
pub.month = month
else:
pub.month = '(month unknown)'
pub.save()

def create_new_publications(self, pmids: set, pmids_to_sites: dict, pmids_to_uris: dict) -> set:
'''Create brand new publication objects for the pubmed IDs in ``pmids``.
Expand All @@ -380,12 +423,13 @@ def create_new_publications(self, pmids: set, pmids_to_sites: dict, pmids_to_uri
if not deets:
_logger.warning('No pubmed info found for PMID «%s», cannot create an object for it', pmid)
continue
title, abstract, issue, year, journal, authors = deets
title, abstract, issue, year, month, journal, authors = deets
p = Publication(
# 🔮 Maybe truncate titles better?
title=title[:255], live=True, slug=self.slugify(pmid, title),
identifier=self.miriam_uri(pmid), pubMedID=pmid,
search_description='This is a pbulication by a member of the Early Detection Research Network.'
search_description='This is a publication by a member of the Early Detection Research Network.',
abstract=abstract, issue=issue, year=int(year), month=month, journal=journal
)
self.folder.add_child(instance=p)
p.save()
Expand Down Expand Up @@ -419,6 +463,27 @@ def delete_obsolete_publications(self, pmids: set) -> set:
Publication.objects.filter(pubMedID__in=pmids).delete()
return pmids

def denormalize_pis(self, pmids_to_sites: dict):
'''Populate the "pis" field of each Publication depending on the PIs of the sites
to which they belong.
Normally we could get this from Pub → Site → Person but when rendering the data table
of 3000+ pubs that got slow.
'''
from .sites import Site, Person
for pmid, site_uris in pmids_to_sites.items():
pub = Publication.objects.filter(pubMedID=pmid).first()
if not pub: continue
sites = Site.objects.filter(identifier__in=site_uris)
pis = '; '.join(
Person.objects.filter(
pk__in=sites.values_list('pi')
).order_by('title').values_list('title', flat=True)
)
if pub.pis != pis:
pub.pis = pis
pub.save()

def update_publications(self, pmids: set, pmids_to_sites: dict, pmids_to_uris: dict) -> tuple:
'''Update publications.
Expand All @@ -435,6 +500,7 @@ def update_publications(self, pmids: set, pmids_to_sites: dict, pmids_to_uris: d
new = self.create_new_publications(pmids_to_create, pmids_to_sites, pmids_to_uris)
updated = self.update_existing_publications(pmids_to_update, pmids_to_sites, pmids_to_uris)
deleted = self.delete_obsolete_publications(pmids_to_delete)
self.denormalize_pis(pmids_to_sites)

return new, updated, deleted

Expand Down Expand Up @@ -468,6 +534,7 @@ def ingest(self):
# writer.writerow([pubMedID, title, in_dmcc, in_grants])

new, updated, deleted = self.update_publications(pmids, pmids_to_sites, pmids_to_uris)
self.add_missing_months()
return new, updated, deleted


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,17 @@
<thead>
<tr>
<th class='only-search-col'>Title</th>
<th class='only-search-col'>EDRN PI</th>
<th>Journal</th>
<th>Year</th>
</tr>
</thead>
<tfoot>
<tr>
<th>Title</th>
<th>Journal</th>
<th>Year</th>
<th>Title</th> {# 0 → 0 #}
<th>EDRN PI</th> {# 1 new #}
<th>Journal</th> {# 1 → 2 #}
<th>Year</th> {# 2 → 3 #}
</tr>
</tfoot>
</table>
Expand All @@ -52,8 +54,9 @@
function headerFunction(data, columnIndex, node) {
switch (columnIndex) {
case 0: return 'Title';
case 1: return 'Journal';
case 2: return 'Year';
case 1: return 'EDRN PI';
case 2: return 'Journal';
case 3: return 'Year';
default: return data;
}
}
Expand All @@ -73,6 +76,7 @@
{data: 'title', render: function(data, type, row) {
return '<a href="' + row['url'] + '">' + data + '</a>';
}},
{data: 'pis'},
{data: 'journal'},
{data: 'year'}
],
Expand All @@ -86,7 +90,7 @@
dom: "<'row'<'col-sm-12 col-md-6'l><'col-sm-12 col-md-6'f>><'row'<'col-sm-12'tr>>" +
"<'row'<'col-sm-12 col-md-5'i><'col-sm-12 col-md-7'p>>B",
initComplete: function() {
this.api().columns([1, 2]).every(function() {
this.api().columns([2, 3]).every(function() {
var column = this;
var label = this.footer().innerHTML;
var select = $('<select><option value="">— ' + label + ' —</option></select>')
Expand All @@ -99,7 +103,7 @@
select.append('<option value="' + d + '">' + d + '</option>');
});
});
this.api().columns([0]).every(function() {
this.api().columns([0, 1]).every(function() {
var that = this;
$('input', this.header()).on('keyup change clear', function() {
if (that.search() !== this.value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,16 @@ <h2>Protocols</h2>

</div>
<div class='col-lg-3'>
<h5>Authors</h5>
<h5>EDRN PI Authors</h5>
<ul class='list-unstyled'>
{% for author in edrn_pis.all %}
<li><a href={{author.url}}>{{author.title}}</a></li>
{% empty %}
<li class='small'>(None specified)</li>
{% endfor %}
</ul>

<h5>Medline Author List</h5>
<ul class='list-inline'>
{% for author in page.authors.all %}
<li class='list-inline-item'>{{author.value}}</li>
Expand Down

0 comments on commit 2a4ea71

Please sign in to comment.