From fd3f736672a94e77c4a5dc61025a7f3e904b619a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?T=C3=BCrei=20D=C3=A9nes?= Date: Tue, 13 Oct 2020 18:41:51 +0200 Subject: [PATCH] 0.11.37: resolved py3.6 import loops; moved pubmed related methods to inputs.pubmed; fixes #157 --- src/pypath/__version__ | 2 +- src/pypath/core/entity.py | 5 +- src/pypath/core/network.py | 3 +- src/pypath/inputs/main.py | 140 ----------------------- src/pypath/inputs/pubmed.py | 174 +++++++++++++++++++++++++++++ src/pypath/internals/refs.py | 90 +-------------- src/pypath/legacy/main.py | 5 +- src/pypath/resources/__init__.py | 2 +- src/pypath/resources/controller.py | 2 +- 9 files changed, 189 insertions(+), 234 deletions(-) create mode 100644 src/pypath/inputs/pubmed.py diff --git a/src/pypath/__version__ b/src/pypath/__version__ index 2c12e33ba..0f148f9d9 100644 --- a/src/pypath/__version__ +++ b/src/pypath/__version__ @@ -1 +1 @@ -0.11.36 +0.11.37 diff --git a/src/pypath/core/entity.py b/src/pypath/core/entity.py index c9970f2d7..2362b0f46 100644 --- a/src/pypath/core/entity.py +++ b/src/pypath/core/entity.py @@ -30,10 +30,8 @@ import collections import pypath.share.common as common -from pypath.internals import intera import pypath.share.session as session_mod import pypath.utils.mapping as mapping -import pypath.utils.uniprot as uniprot import pypath.share.settings as settings @@ -263,7 +261,7 @@ def _is_mirna(key): @staticmethod def _is_complex(key): - return isinstance(key, intera.Complex) or ( + return key.__class__.__name__ == 'Complex' or ( isinstance(key, common.basestring) and key.startswith('COMPLEX') ) @@ -468,6 +466,7 @@ def info(cls, identifier): if cls._is_protein(identifier): + import pypath.utils.uniprot as uniprot return utils_uniprot.info(identifier) diff --git a/src/pypath/core/network.py b/src/pypath/core/network.py index 2e144e186..d975a0681 100644 --- a/src/pypath/core/network.py +++ b/src/pypath/core/network.py @@ -45,6 +45,7 @@ import pypath.share.cache as cache_mod import pypath.utils.mapping as mapping import pypath.inputs.main as dataio +import pypath.inputs.pubmed as pubmed_input import pypath.share.curl as curl import pypath.internals.refs as refs_mod import pypath.utils.reflists as reflists @@ -1104,7 +1105,7 @@ def _read_resource( refs = common.del_empty(list(set(refs))) - refs = dataio.only_pmids([str(r).strip() for r in refs]) + refs = pubmed_input.only_pmids([str(r).strip() for r in refs]) if len(refs) == 0 and must_have_references: ref_filtered += 1 diff --git a/src/pypath/inputs/main.py b/src/pypath/inputs/main.py index d4d9ea418..e25eb5ff3 100644 --- a/src/pypath/inputs/main.py +++ b/src/pypath/inputs/main.py @@ -3921,50 +3921,6 @@ def get_goslim(url = None): return result -def get_pubmeds(pmids): - pmids = [str(pmid) for pmid in pmids] - url = urls.urls['pubmed-eutils']['url'] - cache = len(pmids) < 10 - data = {} - prg = progress.Progress( - len(pmids) / 100 + 1, - 'Retrieving data from NCBI e-utils', - 1, - percent = False) - - for offset in xrange(0, len(pmids), 100): - prg.step() - post = { - 'id': ','.join(pmids[offset:offset + 100]), - 'retmode': 'json', - 'db': 'pubmed' - } - - for i in xrange(3): - try: - c = curl.Curl( - url, - silent = False, - cache = cache, - post = post, - override_post = True, - ) - res = c.result - data = dict([(k, v) - for k, v in iteritems(json.loads(res)['result'])] - + [(k, v) for k, v in iteritems(data)]) - - break - - except ValueError: - sys.stdout.write('\t:: Error in JSON, retry %u\n' % i) - sys.stdout.flush() - - prg.terminate() - - return data - - def get_lincs_compounds(): sys.stdout.write( '\n\tReturned dict has names, brand names or company specific\n' @@ -4764,102 +4720,6 @@ def guide2pharma_complexes(**kwargs): return complexes -def open_pubmed(pmid): - """ - Opens PubMed record in web browser. - - @pmid : str or int - PubMed ID - """ - - pmid = str(pmid) - url = urls.urls['pubmed']['url'] % pmid - webbrowser.open(url) - - -def only_pmids(idList, strict = True): - """ - Return elements unchanged which comply with the PubMed ID format, - and attempts to translate the DOIs and PMC IDs using NCBI - E-utils. - Returns list containing only PMIDs. - - @idList : list, str - List of IDs or one single ID. - @strict : bool - Whether keep in the list those IDs which are not PMIDs, - neither DOIs or PMC IDs or NIH manuscript IDs. - """ - if type(idList) in common.simple_types: - idList = [idList] - - pmids = {i for i in idList if isinstance(i, int) or i.isdigit()} - pmcids = [i for i in idList if i.startswith('PMC')] - dois = [i for i in idList if '/' in i] - manuscids = [i for i in idList if i.startswith('NIHMS')] - - if not strict: - pmids = set(pmids) | set(dois) | set(pmcids) | set(manuscids) - - if len(pmcids) > 0: - pmids = pmids | set(pmids_list(pmcids)) - - if len(dois) > 0: - pmids = pmids | set(pmids_list(dois)) - - return list(pmids) - - -def get_pmid(idList): - """ - For a list of doi or PMC IDs - fetches the corresponding PMIDs. - """ - - if type(idList) in common.simple_types: - idList = [idList] - - url = urls.urls['pubmed-eutils']['conv'] % ','.join(str(i) for i in idList) - c = curl.Curl(url, silent = True) - data = c.result - - try: - js = json.loads(data) - - except: - js = {} - - return js - - -def pmids_dict(idList): - jsn = get_pmid(idList) - result = {'doi': {}, 'pmc': {}} - - if 'records' in jsn: - for r in jsn['records']: - if 'pmid' in r: - if 'doi' in r: - result['doi'][r['pmid']] = r['doi'] - - if 'pmcid' in r: - result['pmc'][r['pmid']] = r['pmcid'] - - return result - - -def pmids_list(idList): - jsn = get_pmid(idList) - result = [] - - if 'records' in jsn: - for r in jsn['records']: - if 'pmid' in r: - result.append(r['pmid']) - - return result - - def load_lmpid(organism = 9606): """ Reads and processes LMPID data from local file diff --git a/src/pypath/inputs/pubmed.py b/src/pypath/inputs/pubmed.py new file mode 100644 index 000000000..e7291772c --- /dev/null +++ b/src/pypath/inputs/pubmed.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# +# This file is part of the `pypath` python module +# +# Copyright +# 2014-2020 +# EMBL, EMBL-EBI, Uniklinik RWTH Aachen, Heidelberg University +# +# File author(s): Dénes Türei (turei.denes@gmail.com) +# Nicolàs Palacio +# Olga Ivanova +# +# Distributed under the GPLv3 License. +# See accompanying file LICENSE.txt or copy at +# http://www.gnu.org/licenses/gpl-3.0.html +# +# Website: http://pypath.omnipathdb.org/ +# + +from future.utils import iteritems + +import sys +import json +import webbrowser + +import pypath.resources.urls as urls +import pypath.share.curl as curl +import pypath.share.common as common +import pypath.share.progress as progress + + +def open_pubmed(pmid): + """ + Opens PubMed record in web browser. + + @pmid : str or int + PubMed ID + """ + + pmid = str(pmid) + url = urls.urls['pubmed']['url'] % pmid + webbrowser.open(url) + + +def only_pmids(idList, strict = True): + """ + Return elements unchanged which comply with the PubMed ID format, + and attempts to translate the DOIs and PMC IDs using NCBI + E-utils. + Returns list containing only PMIDs. + + @idList : list, str + List of IDs or one single ID. + @strict : bool + Whether keep in the list those IDs which are not PMIDs, + neither DOIs or PMC IDs or NIH manuscript IDs. + """ + if type(idList) in common.simple_types: + idList = [idList] + + pmids = {i for i in idList if isinstance(i, int) or i.isdigit()} + pmcids = [i for i in idList if i.startswith('PMC')] + dois = [i for i in idList if '/' in i] + manuscids = [i for i in idList if i.startswith('NIHMS')] + + if not strict: + pmids = set(pmids) | set(dois) | set(pmcids) | set(manuscids) + + if len(pmcids) > 0: + pmids = pmids | set(pmids_list(pmcids)) + + if len(dois) > 0: + pmids = pmids | set(pmids_list(dois)) + + return list(pmids) + + +def get_pmid(idList): + """ + For a list of doi or PMC IDs + fetches the corresponding PMIDs. + """ + + if type(idList) in common.simple_types: + idList = [idList] + + url = urls.urls['pubmed-eutils']['conv'] % ','.join(str(i) for i in idList) + c = curl.Curl(url, silent = True) + data = c.result + + try: + js = json.loads(data) + + except: + js = {} + + return js + + +def pmids_dict(idList): + + jsn = get_pmid(idList) + result = {'doi': {}, 'pmc': {}} + + if 'records' in jsn: + for r in jsn['records']: + if 'pmid' in r: + if 'doi' in r: + result['doi'][r['pmid']] = r['doi'] + + if 'pmcid' in r: + result['pmc'][r['pmid']] = r['pmcid'] + + return result + + +def pmids_list(idList): + + jsn = get_pmid(idList) + result = [] + + if 'records' in jsn: + for r in jsn['records']: + if 'pmid' in r: + result.append(r['pmid']) + + return result + + +def get_pubmeds(pmids): + + pmids = [str(pmid) for pmid in pmids] + url = urls.urls['pubmed-eutils']['url'] + cache = len(pmids) < 10 + data = {} + prg = progress.Progress( + len(pmids) / 100 + 1, + 'Retrieving data from NCBI e-utils', + 1, + percent = False) + + for offset in xrange(0, len(pmids), 100): + prg.step() + post = { + 'id': ','.join(pmids[offset:offset + 100]), + 'retmode': 'json', + 'db': 'pubmed' + } + + for i in xrange(3): + try: + c = curl.Curl( + url, + silent = False, + cache = cache, + post = post, + override_post = True, + ) + res = c.result + data = dict([(k, v) + for k, v in iteritems(json.loads(res)['result'])] + + [(k, v) for k, v in iteritems(data)]) + + break + + except ValueError: + sys.stdout.write('\t:: Error in JSON, retry %u\n' % i) + sys.stdout.flush() + + prg.terminate() + + return data \ No newline at end of file diff --git a/src/pypath/internals/refs.py b/src/pypath/internals/refs.py index 1f6fa2983..f71354a31 100644 --- a/src/pypath/internals/refs.py +++ b/src/pypath/internals/refs.py @@ -32,7 +32,7 @@ import pypath.share.curl as curl import pypath.share.common as common import pypath.resources.urls as urls -from pypath.inputs import main as dataio +from pypath.inputs import pubmed as pubmed_input import pypath.share.settings as settings @@ -50,101 +50,19 @@ def __hash__(self): return hash(self.pmid) def open(self): - dataio.open_pubmed(self.pmid) + pubmed_input.open_pubmed(self.pmid) def __str__(self): return self.pmid def info(self): - return dataio.get_pubmeds([self.pmid]) + return pubmed_input.get_pubmeds([self.pmid]) def __repr__(self): return '' % self.pmid -def open_pubmed(pmid): - """ - Opens PubMed record in web browser. - - @pmid : str or int - PubMed ID - """ - pmid = str(pmid) - url = urls.urls['pubmed']['url'] % pmid - webbrowser.open(url) - - -def only_pmids(idList, strict=True): - """ - Return elements unchanged which comply with PubMed ID format, - and attempts to translate the DOIs and PMC IDs using NCBI - E-utils. - Returns list containing only PMIDs. - - @idList : list, str - List of IDs or one single ID. - @strict : bool - Whether keep in the list those IDs which are not PMIDs, - neither DOIs or PMC IDs or NIH manuscript IDs. - """ - if type(idList) in common.simple_types: - idList = [idList] - pmids = set([i for i in idList if i.isdigit()]) - pmcids = [i for i in idList if i.startswith('PMC')] - dois = [i for i in idList if '/' in i] - manuscids = [i for i in idList if i.startswith('NIHMS')] - if not strict: - non_pmids = set(idList) - (set(pmids) | set(dois) | set(pmcids) | - set(manuscids)) - pmids = pmids | non_pmids - if len(pmcids) > 0: - pmids = pmids | set(pmids_list(pmcids)) - if len(dois) > 0: - pmids = pmids | set(pmids_list(dois)) - return list(pmids) - - -def get_pmid(idList): - """ - For a list of doi or PMC IDs - fetches the corresponding PMIDs. - """ - if type(idList) in common.simple_types: - idList = [idList] - url = urls.urls['pubmed-eutils']['conv'] % ','.join(str(i) for i in idList) - c = curl.Curl(url, silent=True) - data = c.result - try: - js = json.loads(data) - except: - js = {} - return js - - -def pmids_dict(idList): - jsn = get_pmid(idList) - result = {'doi': {}, 'pmc': {}} - if 'records' in jsn: - for r in jsn['records']: - if 'pmid' in r: - if 'doi' in r: - result['doi'][r['pmid']] = r['doi'] - if 'pmcid' in r: - result['pmc'][r['pmid']] = r['pmcid'] - return result - - -def pmids_list(idList): - jsn = get_pmid(idList) - result = [] - if 'records' in jsn: - for r in jsn['records']: - if 'pmid' in r: - result.append(r['pmid']) - return result - - def get_pubmed_data( pp, cachefile = None, @@ -191,7 +109,7 @@ def get_pubmed_data( sys.stdout.write('\t:: Downloading data from PubMed about %s papers\n' % len(missing)) cached_pubmeds_len = len(pmdata) - pmdata_new = dataio.get_pubmeds(missing) + pmdata_new = pubmed_input.get_pubmeds(missing) pmdata.update(pmdata_new) sys.stdout.write('\t:: Saving PubMed data to file `%s`\n' % cachefile) diff --git a/src/pypath/legacy/main.py b/src/pypath/legacy/main.py index 1f0e300f3..195cd8f9b 100644 --- a/src/pypath/legacy/main.py +++ b/src/pypath/legacy/main.py @@ -110,6 +110,7 @@ import pypath.utils.mapping as mapping import pypath.resources.descriptions as descriptions import pypath.inputs.main as dataio +import pypath.inputs.pubmed as pubmed_input import pypath.inputs as inputs import pypath.core.network as network import pypath.utils.homology as homology @@ -2837,7 +2838,9 @@ def _read_network_data( refs = common.del_empty(list(set(refs))) - refs = dataio.only_pmids([str(r).strip() for r in refs]) + refs = pubmed_input.only_pmids( + [str(r).strip() for r in refs] + ) if len(refs) == 0 and must_have_references: rFiltered += 1 diff --git a/src/pypath/resources/__init__.py b/src/pypath/resources/__init__.py index 2b6c61cfc..3e40e9c01 100644 --- a/src/pypath/resources/__init__.py +++ b/src/pypath/resources/__init__.py @@ -19,7 +19,7 @@ # Website: http://pypath.omnipathdb.org/ # -from pypath.resources import controller as _controller_mod +from . import controller as _controller_mod def get_controller(reload = False): diff --git a/src/pypath/resources/controller.py b/src/pypath/resources/controller.py index 9319e31b4..b260ccf8e 100644 --- a/src/pypath/resources/controller.py +++ b/src/pypath/resources/controller.py @@ -29,7 +29,7 @@ import pypath.share.session as session_mod import pypath.share.common as common import pypath.internals.resource as resource_base -import pypath.resources.licenses as licenses +from . import licenses as licenses _logger = session_mod.Logger(name = 'resources.controller')