Skip to content

Commit

Permalink
0.11.37: resolved py3.6 import loops; moved pubmed related methods to…
Browse files Browse the repository at this point in the history
… inputs.pubmed; fixes #157
  • Loading branch information
deeenes committed Oct 13, 2020
1 parent 76cb3fd commit fd3f736
Show file tree
Hide file tree
Showing 9 changed files with 189 additions and 234 deletions.
2 changes: 1 addition & 1 deletion src/pypath/__version__
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.11.36
0.11.37
5 changes: 2 additions & 3 deletions src/pypath/core/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,8 @@
import collections

import pypath.share.common as common
from pypath.internals import intera
import pypath.share.session as session_mod
import pypath.utils.mapping as mapping
import pypath.utils.uniprot as uniprot
import pypath.share.settings as settings


Expand Down Expand Up @@ -263,7 +261,7 @@ def _is_mirna(key):
@staticmethod
def _is_complex(key):

return isinstance(key, intera.Complex) or (
return key.__class__.__name__ == 'Complex' or (
isinstance(key, common.basestring) and
key.startswith('COMPLEX')
)
Expand Down Expand Up @@ -468,6 +466,7 @@ def info(cls, identifier):

if cls._is_protein(identifier):

import pypath.utils.uniprot as uniprot
return utils_uniprot.info(identifier)


Expand Down
3 changes: 2 additions & 1 deletion src/pypath/core/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import pypath.share.cache as cache_mod
import pypath.utils.mapping as mapping
import pypath.inputs.main as dataio
import pypath.inputs.pubmed as pubmed_input
import pypath.share.curl as curl
import pypath.internals.refs as refs_mod
import pypath.utils.reflists as reflists
Expand Down Expand Up @@ -1104,7 +1105,7 @@ def _read_resource(

refs = common.del_empty(list(set(refs)))

refs = dataio.only_pmids([str(r).strip() for r in refs])
refs = pubmed_input.only_pmids([str(r).strip() for r in refs])

if len(refs) == 0 and must_have_references:
ref_filtered += 1
Expand Down
140 changes: 0 additions & 140 deletions src/pypath/inputs/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3921,50 +3921,6 @@ def get_goslim(url = None):
return result


def get_pubmeds(pmids):
pmids = [str(pmid) for pmid in pmids]
url = urls.urls['pubmed-eutils']['url']
cache = len(pmids) < 10
data = {}
prg = progress.Progress(
len(pmids) / 100 + 1,
'Retrieving data from NCBI e-utils',
1,
percent = False)

for offset in xrange(0, len(pmids), 100):
prg.step()
post = {
'id': ','.join(pmids[offset:offset + 100]),
'retmode': 'json',
'db': 'pubmed'
}

for i in xrange(3):
try:
c = curl.Curl(
url,
silent = False,
cache = cache,
post = post,
override_post = True,
)
res = c.result
data = dict([(k, v)
for k, v in iteritems(json.loads(res)['result'])]
+ [(k, v) for k, v in iteritems(data)])

break

except ValueError:
sys.stdout.write('\t:: Error in JSON, retry %u\n' % i)
sys.stdout.flush()

prg.terminate()

return data


def get_lincs_compounds():
sys.stdout.write(
'\n\tReturned dict has names, brand names or company specific\n'
Expand Down Expand Up @@ -4764,102 +4720,6 @@ def guide2pharma_complexes(**kwargs):
return complexes


def open_pubmed(pmid):
"""
Opens PubMed record in web browser.
@pmid : str or int
PubMed ID
"""

pmid = str(pmid)
url = urls.urls['pubmed']['url'] % pmid
webbrowser.open(url)


def only_pmids(idList, strict = True):
"""
Return elements unchanged which comply with the PubMed ID format,
and attempts to translate the DOIs and PMC IDs using NCBI
E-utils.
Returns list containing only PMIDs.
@idList : list, str
List of IDs or one single ID.
@strict : bool
Whether keep in the list those IDs which are not PMIDs,
neither DOIs or PMC IDs or NIH manuscript IDs.
"""
if type(idList) in common.simple_types:
idList = [idList]

pmids = {i for i in idList if isinstance(i, int) or i.isdigit()}
pmcids = [i for i in idList if i.startswith('PMC')]
dois = [i for i in idList if '/' in i]
manuscids = [i for i in idList if i.startswith('NIHMS')]

if not strict:
pmids = set(pmids) | set(dois) | set(pmcids) | set(manuscids)

if len(pmcids) > 0:
pmids = pmids | set(pmids_list(pmcids))

if len(dois) > 0:
pmids = pmids | set(pmids_list(dois))

return list(pmids)


def get_pmid(idList):
"""
For a list of doi or PMC IDs
fetches the corresponding PMIDs.
"""

if type(idList) in common.simple_types:
idList = [idList]

url = urls.urls['pubmed-eutils']['conv'] % ','.join(str(i) for i in idList)
c = curl.Curl(url, silent = True)
data = c.result

try:
js = json.loads(data)

except:
js = {}

return js


def pmids_dict(idList):
jsn = get_pmid(idList)
result = {'doi': {}, 'pmc': {}}

if 'records' in jsn:
for r in jsn['records']:
if 'pmid' in r:
if 'doi' in r:
result['doi'][r['pmid']] = r['doi']

if 'pmcid' in r:
result['pmc'][r['pmid']] = r['pmcid']

return result


def pmids_list(idList):
jsn = get_pmid(idList)
result = []

if 'records' in jsn:
for r in jsn['records']:
if 'pmid' in r:
result.append(r['pmid'])

return result


def load_lmpid(organism = 9606):
"""
Reads and processes LMPID data from local file
Expand Down
174 changes: 174 additions & 0 deletions src/pypath/inputs/pubmed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
# This file is part of the `pypath` python module
#
# Copyright
# 2014-2020
# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University
#
# File author(s): Dénes Türei ([email protected])
# Nicolàs Palacio
# Olga Ivanova
#
# Distributed under the GPLv3 License.
# See accompanying file LICENSE.txt or copy at
# http://www.gnu.org/licenses/gpl-3.0.html
#
# Website: http://pypath.omnipathdb.org/
#

from future.utils import iteritems

import sys
import json
import webbrowser

import pypath.resources.urls as urls
import pypath.share.curl as curl
import pypath.share.common as common
import pypath.share.progress as progress


def open_pubmed(pmid):
"""
Opens PubMed record in web browser.
@pmid : str or int
PubMed ID
"""

pmid = str(pmid)
url = urls.urls['pubmed']['url'] % pmid
webbrowser.open(url)


def only_pmids(idList, strict = True):
"""
Return elements unchanged which comply with the PubMed ID format,
and attempts to translate the DOIs and PMC IDs using NCBI
E-utils.
Returns list containing only PMIDs.
@idList : list, str
List of IDs or one single ID.
@strict : bool
Whether keep in the list those IDs which are not PMIDs,
neither DOIs or PMC IDs or NIH manuscript IDs.
"""
if type(idList) in common.simple_types:
idList = [idList]

pmids = {i for i in idList if isinstance(i, int) or i.isdigit()}
pmcids = [i for i in idList if i.startswith('PMC')]
dois = [i for i in idList if '/' in i]
manuscids = [i for i in idList if i.startswith('NIHMS')]

if not strict:
pmids = set(pmids) | set(dois) | set(pmcids) | set(manuscids)

if len(pmcids) > 0:
pmids = pmids | set(pmids_list(pmcids))

if len(dois) > 0:
pmids = pmids | set(pmids_list(dois))

return list(pmids)


def get_pmid(idList):
"""
For a list of doi or PMC IDs
fetches the corresponding PMIDs.
"""

if type(idList) in common.simple_types:
idList = [idList]

url = urls.urls['pubmed-eutils']['conv'] % ','.join(str(i) for i in idList)
c = curl.Curl(url, silent = True)
data = c.result

try:
js = json.loads(data)

except:
js = {}

return js


def pmids_dict(idList):

jsn = get_pmid(idList)
result = {'doi': {}, 'pmc': {}}

if 'records' in jsn:
for r in jsn['records']:
if 'pmid' in r:
if 'doi' in r:
result['doi'][r['pmid']] = r['doi']

if 'pmcid' in r:
result['pmc'][r['pmid']] = r['pmcid']

return result


def pmids_list(idList):

jsn = get_pmid(idList)
result = []

if 'records' in jsn:
for r in jsn['records']:
if 'pmid' in r:
result.append(r['pmid'])

return result


def get_pubmeds(pmids):

pmids = [str(pmid) for pmid in pmids]
url = urls.urls['pubmed-eutils']['url']
cache = len(pmids) < 10
data = {}
prg = progress.Progress(
len(pmids) / 100 + 1,
'Retrieving data from NCBI e-utils',
1,
percent = False)

for offset in xrange(0, len(pmids), 100):
prg.step()
post = {
'id': ','.join(pmids[offset:offset + 100]),
'retmode': 'json',
'db': 'pubmed'
}

for i in xrange(3):
try:
c = curl.Curl(
url,
silent = False,
cache = cache,
post = post,
override_post = True,
)
res = c.result
data = dict([(k, v)
for k, v in iteritems(json.loads(res)['result'])]
+ [(k, v) for k, v in iteritems(data)])

break

except ValueError:
sys.stdout.write('\t:: Error in JSON, retry %u\n' % i)
sys.stdout.flush()

prg.terminate()

return data
Loading

0 comments on commit fd3f736

Please sign in to comment.