From e294a9283076644a0c92e742de672c6b146828e0 Mon Sep 17 00:00:00 2001 From: Sara JC Gosline Date: Fri, 18 Oct 2024 13:41:18 -0700 Subject: [PATCH] updating the drug retrieval to use the latest files --- build/broad_sanger/03a-nci60Drugs.py | 15 ++++++++++++++- build/broad_sanger/04b-nci60-updated.py | 2 +- build/utils/fit_curve.py | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/build/broad_sanger/03a-nci60Drugs.py b/build/broad_sanger/03a-nci60Drugs.py index 4e2c58a0..7583761c 100644 --- a/build/broad_sanger/03a-nci60Drugs.py +++ b/build/broad_sanger/03a-nci60Drugs.py @@ -13,10 +13,23 @@ ##drug files smi_strings='https://wiki.nci.nih.gov/download/attachments/155844992/nsc_smiles.csv?version=1&modificationDate=1710381820000&api=v2&download=true' +#oct 2024 +smi_strings = 'https://wiki.nci.nih.gov/download/attachments/155844992/nsc_smiles.csv?version=3&modificationDate=1727924130457&api=v2&download=true' + pc_ids='https://wiki.nci.nih.gov/download/attachments/155844992/nsc_sid_cid.csv?version=2&modificationDate=1712766341112&api=v2&download=true' +pc_ids = 'https://wiki.nci.nih.gov/download/attachments/155844992/nsc_sid_cid.csv?version=4&modificationDate=1727924129121&api=v2&download=true' + +#oct 2024 chemnames='https://wiki.nci.nih.gov/download/attachments/155844992/nsc_chemcal_name.csv?version=1&modificationDate=1710382716000&api=v2&download=true' + +chemnames='https://wiki.nci.nih.gov/download/attachments/155844992/nsc_chemical_name.csv?version=1&modificationDate=1727924127004&api=v2' +#oct 2024 cas='https://wiki.nci.nih.gov/download/attachments/155844992/nsc_cas.csv?version=1&modificationDate=1710381783000&api=v2&download=true' +#oct 2024 +cas = 'https://wiki.nci.nih.gov/download/attachments/155844992/nsc_cas.csv?version=3&modificationDate=1727924126194&api=v2&download=true' conc_data = 'https://wiki.nci.nih.gov/download/attachments/147193864/DOSERESP.zip?version=11&modificationDate=1712351454136&api=v2' +##OCT 2024 +conc_data = 'https://wiki.nci.nih.gov/download/attachments/147193864/DOSERESP.zip?version=13&modificationDate=1727922354561&api=v2' def main(): @@ -39,7 +52,7 @@ def main(): if not os.path.exists('DOSERESP.csv'): resp = request.urlretrieve(conc_data,'doseresp.zip') os.system('unzip doseresp.zip') - dose_resp = pl.read_csv("DOSERESP.csv",quote_char='"',infer_schema_length=10000000) + dose_resp = pl.read_csv("DOSERESP.csv",quote_char='"',infer_schema_length=10000000,ignore_errors=True) pubchems = pubchems.filter(pl.col('NSC').is_in(dose_resp['NSC'])) ##first retreive pubchem data if opts.test: diff --git a/build/broad_sanger/04b-nci60-updated.py b/build/broad_sanger/04b-nci60-updated.py index a930fa3f..6b6d2eb2 100644 --- a/build/broad_sanger/04b-nci60-updated.py +++ b/build/broad_sanger/04b-nci60-updated.py @@ -1,5 +1,5 @@ ''' -gets nci60 data from 10/2023 release +gets nci60 data from 10/2024 release ''' diff --git a/build/utils/fit_curve.py b/build/utils/fit_curve.py index d22a0c97..d8078038 100755 --- a/build/utils/fit_curve.py +++ b/build/utils/fit_curve.py @@ -42,7 +42,7 @@ def hs_response_curve_original(x, einf, ec50, hs): HS_BOUNDS = ([0, 0, 0], [1, 12, 4]) #HS_BOUNDS_NEG = ([0, -3,-1],[1,8,0]) ## made hill slope forced to be negative -HS_BOUNDS_NEG = ([0, -11,-1],[1,10,0]) ## made hill slope forced to be negative ##20241017 updated to make ec50 go lower +HS_BOUNDS_NEG = ([0, -5,-1],[1,3,0]) ## made hill slope forced to be negative ##20241017 updated to shift EC50 range def response_curve(x, einf, ec50, hs): """ transformed the original function with ec50 in -log10(M) instead of M """