-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathretrievecBioPortalAPI.py
69 lines (64 loc) · 3.49 KB
/
retrievecBioPortalAPI.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# Import libraries
import json
from urllib.request import urlopen
# Extract Mutation data
def retrieveGenomicVariants(studyId, listInfoGenomicVariants):
listGenomicVariants = [] # List to store all the information
# API to find the Molecular Profile ID
listMolecularProfiles = f"https://www.cbioportal.org/api/studies/{studyId}/molecular-profiles"
response = urlopen(listMolecularProfiles)
listMolecularProfiles = json.loads(response.read())
# For now, we are only interested in the Mutation Data
# Take the object with MUTATION_EXTENDED as Molecular Alteration Type
for molecularProfiles in listMolecularProfiles:
if molecularProfiles['molecularAlterationType'] != 'MUTATION_EXTENDED':
continue
molecularProfileId = molecularProfiles['molecularProfileId']
# API to find the sample list related with mutation data
listSampleID = f"https://www.cbioportal.org/api/studies/{studyId}/sample-lists"
response = urlopen(listSampleID)
listSampleID = json.loads(response.read())
# Take the sampleList Id for the mutation data -> Category = all_cases_with_mutation_data
for sampleID in listSampleID:
if sampleID['category'] != 'all_cases_with_mutation_data':
continue
sampleID=sampleID["sampleListId"]
# After having the Molecular Profile Id and sampleList Id we can query for genomics variants data
listMolecularData = f"https://www.cbioportal.org/api/molecular-profiles/{molecularProfileId}/mutations?sampleListId={sampleID}"
response = urlopen(listMolecularData)
listMolecularData = json.loads(response.read())
for molecularData in listMolecularData: # For all the entries having a mutation
dictGenomicVariant = {} # Dict for storing data of each mutation object
for infoGenomicVariant in listInfoGenomicVariants: # For all variables that can map with Beacon
if not infoGenomicVariant in molecularData:
continue
dictGenomicVariant[infoGenomicVariant] = molecularData[infoGenomicVariant] # Store the info in the dict
listGenomicVariants.append(dictGenomicVariant)
return listGenomicVariants
# Extract Clinical Data from Patients and Samples
def retrieveAPIData(configVariables, studyId, patientORsample):
if patientORsample =='patients':
patientORsampleId = 'patientId'
else:
patientORsampleId = 'sampleId'
# List of Ids of the patients and samples
samplePatientIdsJson = f"https://www.cbioportal.org/api/studies/{studyId}/{patientORsample}"
response = urlopen(samplePatientIdsJson)
samplePatientIdsJson = json.loads(response.read())
# Extract Sample or Patient data
listSamplePatients = [] # List of all patient or sample data
for samplePatientIdvar in samplePatientIdsJson: # For each sampleId or patientId
samplePatientId = samplePatientIdvar[patientORsampleId]
dictSamplePatientId = {'id': samplePatientId} # Dict for storing data of each entry
if patientORsampleId == 'sampleId':
dictSamplePatientId['individualId'] = samplePatientIdvar['patientId']
for configVariable in configVariables: # For each variable in the config.ini file
# API search
urlData=f"https://www.cbioportal.org/api/studies/{studyId}/{patientORsample}/{samplePatientId}/clinical-data?attributeId={configVariables[configVariable]}"
response = urlopen(urlData)
dataJson = json.loads(response.read())
if not dataJson: # If variable do not have any values, remove it
continue
dictSamplePatientId[configVariable] = dataJson[0]["value"] # Insert values in dictionary
listSamplePatients.append(dictSamplePatientId) # Append all the entries in a list
return listSamplePatients