Skip to content

Commit

Permalink
UniChem: list of sources from API instead of legacy web page
Browse files Browse the repository at this point in the history
  • Loading branch information
deeenes committed May 7, 2024
1 parent 5871cd9 commit 118a16b
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 21 deletions.
33 changes: 13 additions & 20 deletions pypath/inputs/unichem.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import textwrap
import collections

import bs4
import json

import pypath.resources.urls as urls
import pypath.share.curl as curl
Expand Down Expand Up @@ -56,25 +56,18 @@ def unichem_info():

url = urls.urls['unichem']['sources']
c = curl.Curl(url, large = False, silent = False)
soup = bs4.BeautifulSoup(c.result, 'html.parser')
result = []

for table in soup.find_all('table'):

if table.find('tr').text.strip().startswith('src_id'):

for row in table.find_all('tr')[2:]:

fields = row.find_all('td')

result.append(
UnichemSource(
*(
field.text.strip()
for field in fields
)
)
)
response = json.loads(c.result)

result = [
UnichemSource(
number = s['sourceID'],
label = s['nameLabel'],
name = s['name'],
description = s['description'],
acquisition = s['lastUpdated'],
)
for s in response['sources']
]

return result

Expand Down
4 changes: 3 additions & 1 deletion pypath/resources/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -1483,7 +1483,9 @@
},
'unichem': {
'label': 'Mapping between drug compound IDs',
'sources': 'https://www.ebi.ac.uk/unichem/legacy/ucquery/listSources',
'sources_old':
'https://www.ebi.ac.uk/unichem/legacy/ucquery/listSources',
'sources': 'https://www.ebi.ac.uk/unichem/api/v1/sources/',
'mapping': 'https://ftp.ebi.ac.uk/pub/databases/chembl/UniChem/data/'
'wholeSourceMapping/src_id%s/src%ssrc%s.txt.gz',
},
Expand Down

0 comments on commit 118a16b

Please sign in to comment.