diff --git a/tests/data/SLC6A4_active_excapedb_subset.csv b/tests/data/SLC6A4_active_excapedb_subset.csv new file mode 100644 index 0000000..0b800a4 --- /dev/null +++ b/tests/data/SLC6A4_active_excapedb_subset.csv @@ -0,0 +1,201 @@ +Ambit_InchiKey,SMILES,pXC50 +RBCQCVSMIQCOMN-PCQZLOAONA-N,C12C([C@@H](OC(C=3C=CC(=CC3)F)C=4C=CC(=CC4)F)CC(N1CCCCCC5=CC=CC=C5)CC2)C(=O)OC,6.26 +ALZTYVXVRZIERJ-UHFFFAOYNA-N,O(C1=NC=C2C(CN(CC2=C1)C)C3=CC=C(OC)C=C3)CCCN(CC)CC,7.18046 +MOEMPBAHOJKXBG-MRXNPFEDNA-N,O=S(=O)(N(CC=1C=CC2=CC=CC=C2C1)[C@@H]3CCNC3)C,7.77 +HEKGBDCRHYILPL-QWOVJGMINA-N,C1(=C2C(CCCC2O)=NC=3C1=CC=CC3)NCC=4C=CC(=CC4)Cl,5.24 +SNNRWIBSGBMYRF-UKRRQHHQNA-N,C1NC[C@@H](C1)[C@H](OC=2C=CC(=NC2C)OC)CC(C)C,9.12 +UZCRUMOKTIFCRO-UHFFFAOYNA-N,FC(F)(F)C=1C(CN(C2CCNCC2)CC(CC)CC)=CC=CC1,8.16115 +BNDYWUDBYSQRPQ-UHFFFAOYNA-N,FC1=CC=C(C2(CCN(CC2)C)COCC3=NC(CC(C)C)=CC(=C3)C(F)(F)F)C=C1,8.05061 +IWHNPLGMXINDSW-UHFFFAOYNA-N,N1(CC=2C=CC(=CC2)I)CCC(CC1)CCOC(C3=CC=CC=C3)C4=CC=CC=C4,6.34 +UBSRWNBGOMICGX-UHFFFAOYNA-N,C=1(C=2C(=CC=C(C2)F)SC1)C(CCN3CCN(CC3)C=4C5=C(C=CC=C5)C=CC4)O,7.21 +ZEJSMIRALRRUAZ-BZMWGJLINA-N,C1(=CNC2=C1C=CC(F)=C2)C3C[C@H](C)N(CC3)CCC4C5=C(C=C(C(=O)N)C=C5)CCO4,9.11 +PAVIGVDFHAXYTL-UHFFFAOYNA-N,N1(C(C)C)CCN(CC1)C(C2=CC=C(C=C2)F)CN3CCN(CC3)CCCC4=C(C=CC=C4C5=CC=CC=C5)F,5.84 +JOQQHMGSIKNGAF-NXDIHJKXNA-N,[C@H]1([C@H](C=2C=C(Cl)C(=CC2)Cl)C3CCC1CC3)CN(C)C,7.24 +IPLOTOHAAAUTMK-UDUWVDIRNA-N,C12=NO[C@H]([C@@H]1COC=3C2=CC(=C(OC)C3)OC)CN4CCN(CC4)C/C(=C/C5=CSC=C5)/C,8.72 +GEHDMZQXCGBGDS-UHFFFAOYNA-N,FC=1C=C2C(CCC3N(CCC=4C3=CC=CC4)C)=CNC2=CC1,8.24413 +RCJOXOPCIUQSKW-UHFFFAOYNA-N,N1(C2CCC(CC2)C=3C4=C(N(C3)C(C)C)C=CC(=C4)C#N)CCN(CC1)C=5C6=C(NC=C6)C=CC5,7.41454 +PKVDODLSCSUTRW-OAHLLOKONA-N,C1=CC=C2N(C=3C=C(C=CC3)OC)S(=O)(=O)N(CC[C@H](CNC)O)C2=C1,5.49 +RQESDRFAXQLNBU-LELJVTLKNA-N,C1=C2C(C(NC2)=O)=C3C(OCC(C3)NCCCC=4C5=C(NC4)C=CC(=C5)C#N)=C1,7.89 +GMJQYJUXKUAPMC-UHFFFAOYNA-N,C(#CCCN1CCCCC1)C2=CC=C(C=C2)C(CCN(C)C)OC3=CC=CC=C3,7.54 +NDGDCGDWGLUYKA-UHFFFAOYNA-N,N1(CC=2C=CC(=CC2)I)CCC(CC1)CCOC(C3=CC=C(C=C3)F)C4=CC=C(C=C4)F,6.69 +ZCJYSKHDYKCJTO-CFBBYLOMNA-N,C12N(C(CC1)C[C@H]([C@H]2C3=CC=C(C=C3)OC)C4=CC=C(C=C4)C)C,6.42 +UNCBFTIAWJTNTF-YOEHRIQHNA-N,[C@@H]1(C2=C(C=CC=C2)[C@H](C1)N(C)C)C=3C=C(Cl)C(=CC3)Cl,8.62 +SPMRKSIEUHZTAA-LRHAYUFXNA-N,C=12C(C[C@H](C1C=C(C=C2)OC)NC)C=3C=C(C(=CC3)Cl)Cl,8.96 +UBOHLJNXZUNBPT-QQNWGBJXNA-N,CN1[C@H](C[C@H](OS(=O)(=O)C2=CC=C(Cl)C=C2)C3=CC=CC=C3)CCC[C@@H]1CC(=O)C=4C=CC=CC4,5.36 +HKHUDNZBYVBGEA-UHFFFAOYNA-N,C1=2C(C(CC1N(C)C)C3=CC=C(C=C3)F)=CC=C(C(F)(F)F)C2,8.37 +HPCHCQKXJNQWHO-UHFFFAOYNA-N,C(CCN1CCN(CC1)CCC(O)C2=CC=CC=C2)N(C3=CC=C(C=C3)F)C4=CC=C(C=C4)F,6.1 +GRBWDTDDMWUOBH-QRFGNDMPNA-N,C1C2N(C)C([C@H](C(=O)NCCC3=CC=C(C=C3)NC([C@@H]4C5N(C)C(C[C@@H]4C6=CC=C(C=C6)C7=CSC=C7)CC5)=O)[C@H]1C8=CC=C(C=C8)C9=CSC=C9)CC2,6.47 +MHMSJQGEVKUFEQ-JXMROGBWNA-N,N(C=1N=CC=CC1)(CC=2C=CC(=CC2)OC)CCN3CCN(CC3)C/C=C/C=4C=CC=CC4,6.64 +GMOSPGFRDPBEEZ-BGYRXZFFNA-N,C=1(C=2C(=CC=C(C#N)C2)NC1)[C@@H]3CC[C@H](NCCC4=CC=CC=C4)CC3,8.12 +OWLDNNYJJFSQJU-LTJJPDBXNA-N,C1C2N(C)C(CC2)[C@H](C(=O)NC3=CC=C(C=C3)NC([C@@H]4C5N(C)C(C[C@@H]4C6=CC=C(C=C6)C=7C=CSC7)CC5)=O)[C@H]1C8=CC=C(C=C8)C9=CSC=C9,6.57 +HSIBGVUMFOSJPD-NXWOVTFFNA-N,C1[C@H]2C=3NC=4C=CC(OC)=CC4C3CCN5[C@H]2[C@@H](CC)C[C@@H]1C5,6.23 +NELMSVTWQFLKAL-UHFFFAOYNA-N,C=1C=CC=2N=C(C=CC2C1OCCN3CCN(CC3)CC4=CC=CC=C4)C,6.1 +UKNINZMNBQHDAV-UHFFFAOYNA-N,C1(=CC=C2C(=C1)COC2(C3=CC=C(C=C3)F)CCCN(CCC)C)C#N,6.93 +GOYJQBUTBBQPFC-HQRMLTQVNA-N,C=1(SC=2C=C(F)C=CC2C1)[C@H]3C[C@H](C)N(CC3)C[C@@H](COC4=C5C(NC=C5)=CC=C4)O,9.51 +XAYGSYISSLIYTN-YAAHMLIENA-N,C1([C@@H](C2=CC(Cl)=CC=C2)C3CCC1CC3)CN(C)C,6.57 +QBOITYXCKDGUNE-UHFFFAOYNA-N,S(C1=CC=C(CC=2C(CN(C)C)=CC=CC2)C=C1)C,8.30103 +BHMLFPOTZYRDKA-SJORKVTENA-N,C=1C=CC=C([C@H](OC2=CC=CC=C2I)[C@@H]3OCCNC3)C1,7.29 +WJHPAIHCWWZGFM-UHFFFAOYNA-N,C=1(C=C(C(OC=2C=CC(=CC2)Cl)=CC1)C(NC)C)F,7.46 +UUZFTXGGHPBIFW-MOPGFXCFNA-N,O[C@@H]([C@@H](N1C=2C(C=C1)=CC=CC2)C=3C=C(C=CC3)C)CNC,6.42366 +WLMBSKSEPQCRJV-UEBCHADLNA-N,N1(CCN(CC1)C[C@H](NC(C2=CC3=C(S2)N(N=C3C=4C=CC=CC4)C)=O)C)C(=O)C,5.0 +HUGQKIWTNMQGCP-QGZVFWFLNA-N,S(=O)(=O)(N([C@@H]1CCNC1)CC=2C(C3=CC=CC=C3)=CC=CC2)C,5.27165 +OZUHPVAODZNQKI-XGAFWQRZNA-N,C=1(C=CC=2NC=C(C2C1)[C@H]3CC[C@@H](CC3)N4CC5=C(CC4)C=C(C=C5)OC)C#N,8.1 +CUWDCKZGAWYXEX-BRLUZMBRNA-N,C12N([C@H](CC1)[C@H]([C@@H](C2)C=3C=CC(=CC3)F)COC=4C=CC=5OCOC5C4)C,7.06 +WIOSJRNBAZNBKL-UVHRUJRTNA-N,C1(C2C3N(C)[C@@H](C[C@@H]2C4=CC=C(C=C4)Cl)CC3)=NC(C=5C=CC(=CC5)Cl)=NO1,6.39 +OARTTZNTSJLCDS-UHFFFAOYNA-N,S(C1=CC=C(OC2=C(CNC)C=CC(C#CCCN3CCOCC3)=C2)C=C1)C,8.74473 +VCNSPGHSQPMCFF-HVXXBKQBNA-N,C=1(C=C(CN(C)C)C(OC=2C=CC(SC)=CC2)=CC1)S(N)(=O)=O,8.4 +UKWSMBRBDLPKHS-MEMLXQNLNA-N,C1CN([C@H]2CC[C@@H](CC2)C=3C=4C(NC3)=CC=C(C4)C#N)CCN1C=5C=CC=C6C5C=CN6,8.07 +JUAYCSTYVACFDD-JRVPFXOQNA-N,[C@@H]1(C(C2CCC(N2)C1)C3=CC=C(C=C3)F)C4=CC=C(C=C4)F,7.39 +APJYAVYSAZEVQB-UHFFFAOYNA-N,C1CNCCC1(C2=CC=C(C=C2)F)COCC=3N=C(C=C(C3)C(F)(F)F)Cl,8.05 +HPZAIKAZTLALFB-UHFFFAOYNA-N,C1(C2C(C2)CN(CC)C)=C3C(C=CC=C3)=CC=C1,7.24 +CLSUYIGLXGEXLU-CHWSQXEVNA-N,FC1=CC(CC[C@H]2O[C@H](CC2)CN)=C(OC)C=C1,7.93181 +RESPJLCKLFXXCY-GVAUOCQINA-N,[C@H]1(CC[C@@H](CO1)NC[C@H](C2=CC=CC=C2)O)C(C3=CC=CC=C3)C4=CC=CC=C4,6.1 +ZOIIDMMMKOZKED-UHFFFAOYNA-N,N(CC1C(C1)C=2C3=C(C=CC2)C=C(C=C3)C#N)(CC)CC,7.17393 +NLDLLVSDHOTCNF-UHFFFAOYNA-N,C=1C=C(C=CC1OC)C=2C=3CNC(CC3C=NC2)OCCCN(CC)CC,6.65 +WPOWYXAYIRKXTG-UUILKARUNA-N,C12N(C(CC1)CC(C2C(=O)CC/C=C/I)C=3C=CC(=CC3)C)C,7.29 +VKEVCZGXCJOVET-UHFFFAOYNA-N,ClC1=C(OC(C2CCNCC2)C3=CC=CN=C3)C=CC(Cl)=C1,8.30103 +SPKOQCHHYOCMGR-HXUWFJFHNA-N,C1(C2=CC=C3C(CCN(C3=O)CCN4CCC[C@H]4C)=C2)=CC=C(C(N5CCCC5)=O)C=C1,6.09 +KJKXIXZNTFLNGO-SDVLMSEANA-N,FC1=CC=C(C(O[C@@H]2C(C3N(C(CC3)C2)C)=CCC(OC)=O)C4=CC=C(F)C=C4)C=C1,6.2426 +MOOPIJCZPWGNKW-LEGOXMQRNA-N,C1[C@H](C[C@@H](N(C1)CC#C)CCCNC(NC2=CC(=CC(C(=O)C)=C2)C(C)=O)=O)CC3=CC=C(C=C3)F,6.52 +GSAFRMKQXDFZBL-LELJVTLKNA-N,C1=C2C(=C3CC(N(CCCC=4C5=C(NC4)C=CC(=C5)F)CC)COC3=C1)C(=O)NCC2,7.52 +BQKRARPXRCZXGV-UHFFFAOYNA-N,C1CCN(C(CC)CC(C2=CC(=C(C=C2)Cl)Cl)=O)C1,5.75 +UQAWWWMXEVRWIO-OKPOJWAQNA-N,N1(CCN(CC1)C=2C=CC=C(C)C2C)CCCCNC(C3=C(N(C(C4=CC=CC=C4)=C3)CCC)C)=O,7.12 +NDBCFJNXVCOSEG-BBELGVGPNA-N,[C@@]1(C=2C(=CC(/C=C/C3=CC=C(C#N)C=C3)=CC2)CO1)(C4=CC=C(F)C=C4)CCCN(C)C,7.55 +LKRPIMOVLOWXBL-UHFFFAOYNA-N,C1=CC(OC2=CC=C(C=C2)Cl)=C(C=C1C(N3CCN(C4CCC4)CC3)=O)CNC,7.41 +DIEUHEQWLCVFKO-DNVCBOLYNA-N,O1C2=C([C@H](OC3=CC=CC=C3C)[C@H](CN(C)C)C1)C=CC=C2,5.96 +NLMXEWVGDSQUMB-UHFFFAOYNA-N,C=1C=C2C(CN(C)CC2=CC1OCCCN3CCN(CC3)C=4C=CN=CC4)C5=CC=C(OC)C=C5,8.28 +RCJOXOPCIUQSKW-PSWAGMNNNA-N,C1CN([C@H]2CC[C@H](CC2)C=3C4=C(N(C3)C(C)C)C=CC(=C4)C#N)CCN1C=5C6=C(C=CC5)NC=C6,7.41 +GLZVSWBABDDJJM-UHFFFAOYNA-N,C=1C=C(C(OC=2C(=CC(=CC2)Cl)OC)=CC1)CNC,7.89 +QWBMFIRRCZLGHG-AGRCYTFUNA-N,C1(C(=CC2N(C1[C@@H](C2)O)C)C=3C=C4C=CC=CC4=CC3)C(=O)OC,6.17 +TYVFKNAUSZXXJC-UHFFFAOYNA-N,ClC=1C=C(CC=2C(CNC)=CC=CC2)C=CC1Cl,7.52288 +OZEMABLVUYGXRL-UHFFFAOYNA-N,N1(C(C2=CC=CC=C2)COCC=3C=C(Cl)C=CC3)CCNCC1,8.1 +GJMGTKLHMAHNJB-UHFFFAOYNA-N,S(O)(C(C1=CC=C(F)C=C1)C2=CC=C(F)C=C2)=CCNCCCC3=CC=CC=C3,5.89279 +LLLFWZQWZWPGBA-GPQMBLKYNA-N,C1(C(CCN(C1)C)C=2C=CC(=CC2)Cl)CSCC(=O)NC,6.96 +XWHBDOLRNWCIGO-UHFFFAOYNA-N,C1CCCCC1(C=2C=CC=C3C2C=CC=C3)CN,5.82 +ABKFNWSCLGGDOH-SREBMQDQNA-N,C=1C=C(C=CC1)C2=CC(=C(N2CC)C)C(NCCCN3CCN(CC3)C4=C(C(=CC=C4)Cl)Cl)=O,6.74 +OLINBEUJWKQKCV-IBGZPJMENA-N,C1=CC=CC(CN([C@@H]2CNCC2)S(C(C)C)(=O)=O)=C1C3=CC=CC=C3,5.33 +UVEZCGSQMOEHNU-UHFFFAOYNA-N,N1(C2CC(CC1CC2)=CCOC(C3=CC=C(C=C3)F)C4=CC=C(C=C4)F)CCCC5=CC=CC=C5,6.25 +YGUNLDOGPIFBCM-UHFFFAOYNA-N,N1(C=2C=C(OC)C(=CC2)C#N)CCN(CC1)CCCCC=3C=4C(=CC=C(C#N)C4)NC3,8.47 +UEQUQVLFIPOEMF-UHFFFAOYNA-N,N12C(C=3C(=CC=CC3)CC4=C1C=CC=C4)CN(CC2)C,5.54 +NVIYCTHUJATAOR-UHFFFAOYNA-N,C(C=1C=CC=CC1)(C=2C=CC=CC2)OCCN(CCN(CCC=3C=CC(=CC3)F)C)C,6.26 +PJJUVVQHQLAPRY-LWCGHFMUNA-N,C12[C@H]([C@@H](OC(C3=CC=C(C=C3)F)C4=CC=C(C=C4)F)CC(N1C)CC2)C(=O)OCCC5=CC=CC=C5,5.5 +PZHJZKURLFYNOX-GNXKAVGDNA-N,BrC=1C=C(C=2SC(=NC2)[C@@H]3[C@@H]4N([C@H](C[C@@H]3C5=CC=C(C=C5)C)CC4)C)C=CC1,6.06048 +XRAUKXHGDLZTOE-UHFFFAOYNA-N,C=1(C2=C(SC1)C=CC=C2)C(O)CCN3CCN(CC3)C4=C5C(=CC=C4)C=CC=N5,7.62 +XEWUKTSZOJWBEW-UHFFFAOYNA-N,ClC=1C(CC(N2CCNCC2)C=3SC=NC3)=CC=CC1,7.92082 +LPVORCAPZWIQEM-IBGZPJMENA-N,C=12C(=C(OC)C=CC1)CCC[C@H]2CN(CCC=3C=C4CNCC4=CC3)C,7.94 +RRFURIBDRXRPMX-VLZXCDOPNA-N,C1=2C(O[C@@]3(C1=CC=C4C2OCO4)CC[C@@H](N5CCC(CC6=C(C(F)(F)F)C=CC=C6)CC5)CC3)=O,6.74 +DATDUNSZEMGGEN-UHFFFAOYNA-N,BrC1=C(CC2CCN(C3CCC4(OC(=O)C=5C4=CC=C6OCOC56)CC3)CC2)C=C(F)C=C1,7.08619 +ZORXDXJSPINJEQ-UHFFFAOYNA-N,C=1(C(=C(F)C=CC1)F)C(N2CCNCC2)COCC=3C=C(C(F)(F)F)C=C(C(F)(F)F)C3,6.5 +VMGBISPOZOWYBU-IZZDOVSWNA-N,N12C3C(C(=O)OC)C(CC1CC3)/C(/C2)=C/C4=CC(F)=C(C=C4)F,7.0 +ZYALYWXELCWDBL-UHFFFAOYNA-N,C1=C(OCCCN2CCC(CC2)F)C=C3CN(C)CC(C3=C1)C=4C=CC(SC)=CC4,8.7 +HAHQKXHPHSTOFJ-BEVRRMAYNA-N,[C@@H]1([C@@H](C=2C=CC(=CC2)F)CC3NC1CC3)COC=4C=C5OCOC5=CC4,7.72 +BPBCGWPBILSOFV-UHFFFAOYNA-N,C=1C=C(C=2OC(CNC2C1)CN3CC=C(CC3)C=4C=5C(NC4)=CC=C(C5)F)OC,8.16 +SGRYPQQQMUQCSN-UHFFFAOYNA-N,N(C(COC=1C=C(F)C=C(C1)F)=O)(CC2=CC(Cl)=CC=C2)C3CCNCC3,7.7 +NUAORYLXEWKQRZ-HNNXBMFYNA-N,N([C@@H]1CNCC1)(CC2=C(C=CC=C2)C(F)(F)F)C3CCOCC3,7.68 +MTFRSYAWIOFJIS-UHFFFAOYNA-N,C1(CN(CC(C)C)C2CCNCC2)=CC=CC=C1Cl,8.8 +KPDJDJHGSVDTMY-UHFFFAOYNA-N,O(C=1C=2C(=CC=CC2)C=CC1)C(C3=CC=CC=C3)C4CCNCC4,8.08 +MXBQNIQYUCTYND-UHFFFAOYNA-N,C(C1=CC(=C(C=C1)Cl)Cl)(CC(C)C)C2NCCCC2,6.54 +YGZPJJNFDCTKHU-UHFFFAOYNA-N,C1(C2N(C)C(CC1C3=CC=C(C=4N(C=CC4)CC5=CC=CC=C5)C=C3)CC2)C(=O)OCC,7.23 +JVJNRJFMZQRGMT-HXTKINSTNA-N,C1(=CC(C(NCCCN2CCN(CC2)C3=CC=CC(=C3C)C)=O)=NC(=N1)C)C(C)(C)C,6.14 +KUWISOBIRVPNDN-UHFFFAOYNA-N,C=1C=C(C(OC=2C=C(C(=CC2)F)C)=CC1Cl)CNC,7.51 +PXHGECKMKPJNDM-INIZCTEONA-N,C=1C(=C(CN(C(=O)C(C)C)[C@@H]2CNCC2)C=CC1)C3CC3,6.91 +UOUWWVWWIQEODA-UHFFFAOYNA-N,FC1=CC=C(C(N2CCN(CC2)C(C)C)CN3CCN(CC3)CCCC=4C(=CC=CC4)C5=CC=CC=C5)C=C1,5.50446 +WEARRJZMFZNKLA-SREBMQDQNA-N,C1(=C(N(CCC)C(=C1)C2=CC=CC=C2)C)C(=O)NCCCN3CCN(CC3)C=4C=C(Br)C=CC4,6.65 +QHSQRSCEVZFFIU-UHFFFAOYNA-N,O(C(C1CCNCC1)C2=CC=CN=C2)C3=CC=C(C=C3)CC,8.30103 +QJFFUDZGJNXBLG-UHFFFAOYNA-N,N1(C2CC(CC1CC2)OC(C=3C=C(Cl)C=CC3)C=4C=CC=CC4)CC=5C=CC=CC5,5.77 +VYEDDDWXKRDQRX-NHCUHLMSNA-N,C1CN(CCCOC=2C=CC3=C([C@@H]4N(C[C@H]3C=5SC=NC5)CCC4)C2)CCO1,7.71 +PSQGGMTYCHMOPL-UHFFFAOYNA-N,C1(C2N(C)C(CC1C3=CC=C(C=C3)C)CC2)C(OC4CCCC4)=O,6.31 +IDZYJTXHWNKKBY-VIKVFOODNA-N,C1NC[C@@H]2[C@H]1[C@]2(C3=CC(=C(C=C3)Cl)Cl)COC4CCC4,9.4 +CTAHXNUPBZTDFO-UHFFFAOYNA-N,O(C=1C=C(CC(N2CCNCC2)C3=CC=CC=C3)C=CC1)C,7.60206 +YENSXXXRKPOXLI-OLZOCXBDNA-N,N(C[C@@H]1[C@H](CCC1)C=2C=3C(NC2)=CC=C(C3)C#N)C,8.1549 +BBVCHKBRDIGQFY-UHFFFAOYNA-N,CN1C2CCC1CN(CCOC(C3=CC=C(F)C=C3)C4=CC=C(F)C=C4)C2,6.15 +HRDUUEDPDCVSJF-UHFFFAOYNA-N,C1=CC(=CC(Cl)=C1Cl)C2CN(CC=3C=C(N=CC32)OCC4CCN(CC4)C(C)C)C,7.64 +DXAJRWYUVQJPNE-LELJVTLKNA-N,C1=CC=C(C=C1)C(OC=2C=CC(=CC2)NC(=O)C3=CC=C(C(=C3)Cl)OC)CCN(C)C,7.8 +QLVUBIIDSKNVLZ-UHFFFAOYNA-N,C=1(C=C(C(OC=2C=C(C(=CC2)Cl)F)=CC1)CNC)F,7.71 +MRDDAOFLRIQUHE-UHFFFAOYNA-N,C=1(C=C(C(OC=2C=C(C(=CC2)Cl)Cl)=CC1)CNC)Cl,8.24 +NBVANEHIKFOMLI-CABCVRRENA-N,C=1(C#N)C=CC2=C(C(=CN2)[C@@H]3[C@H](CCC3)CN(C)CC)C1,8.0 +UQDAWQPILXQULE-UHFFFAOYNA-N,C1CNCCC1(C=2C=CC(=CC2)I)C(OCC3=CC=C(C=C3)N(=O)=O)=O,8.23 +OITIJKINXHWSKF-UHFFFAOYNA-N,S(C1=CC=C(OC2=C(C=C(CCCCN3CCN(CC3)C(C)C)C=C2)CNC)C=C1)C,8.85387 +UYAPBVKHZZKNAC-UHFFFAOYNA-N,C=1(C=C(C=CC1F)C(N2N=NC=N2)CCN(C)C)F,5.53 +OYPPVKRFBIWMSX-CXUHLZMHNA-N,C(=C\CN(C)C)(\C=1C=CC(=CC1)Br)/C=2C=NC=CC2,5.77 +AWTICCZHEMZKBF-DFLUKEIKNA-N,FC1=C2OCC(N(CCCCC=3C=4C(NC3)=CC=C(F)C4)CCC)CC2=C(C=C1)C(=O)N,7.91364 +CWKGVKHDADMKBI-UHFFFAOYNA-N,O(CCCNC1CC1)C2=CC3=C(C(CN(C3)C)C4=CC=C(OC)C=C4)C=C2,8.52288 +OKSIZZBSWZAKPC-UHFFFAOYNA-N,C=1C=C(C(CCN2CCN(CC2)C3=CC=CC=C3OC)O)C=CC1OC,6.3 +OAWQQIDHAPEMAM-UHFFFAOYNA-N,N1=C(ON=C1C)CS(=CC2C(CCN(C2)C)C3=CC=C(C=C3)Cl)O,5.97 +NLDLLVSDHOTCNF-UHFFFAOYNA-N,C=1C=C(C=CC1OC)C=2C=3CNC(CC3C=NC2)OCCCN(CC)CC,6.89 +YTNASRCFNXLDGR-HIUFNZKINA-N,ClC1=CC=C([C@@H]2[C@@H]([C@@H]3N([C@H](C2)CC3)C)C=4SC(=CN4)C5=CC=C(N(=O)=O)C=C5)C=C1,6.00877 +FINNTOGAVZWNGL-UHFFFAOYNA-N,C1=CC(=CC(=C1)OC)C2CN(CC=3C=C(N=CC32)OCCCN4CCCCC4)C,6.52 +YIEXSVBTWQZEGZ-UHFFFAOYNA-N,O(C1C(O)(CC=2C1=CC=CC2)CNC)C=3C(=CC=CC3)C,6.1549 +MGESTBXJNHZBDQ-UHFFFAOYNA-N,N1(CC2=CC=C(C=C2)F)CCC(CC1)CCOC(C=3C=CC(=CC3)F)C=4C=CC=CC4,5.9 +YDFLGNNHJDQNRD-UHFFFAOYNA-N,S(C1=C(N2CCNCC2)C=C(C=C1)C)C3=CC=C(OC)C=C3,8.10237 +HZPZVEJWTPAYPU-UHFFFAOYNA-N,O=S(N(CC=1C=CC2=CC=CC=C2C1)C3CCNCC3)(C)=O,8.46 +VGJMSZYBBPAAOF-UHFFFAOYNA-N,C1=C(C=2C=CC(C)=NC2C=C1)N3CCN(CC3)CCC4=C5C(=CC=C4)N6C(CO5)=NC(=C6C)C,7.8 +RDLDWTPNWMTWKO-FQEVSTJZNA-N,O=C(N([C@H]1CCNC1)CC=2C(C3=CC=CC=C3)=CC=CC2)CC(C)C,6.38195 +UPJDIJPBOJIDFL-LBOYIXSDNA-N,ClC1=C(N2CCN(CC2)CCNC(=O)C=3C(=C(NC3C)C4=CC=CC=C4)C)C=CC=C1Cl,6.79048 +IEHAMLTURQLYEH-UHFFFAOYNA-N,C1(=CC=C2C(=C1)COC2(C3=CC=C(C=C3)F)CCCN(C)C)CNCC4=CC5=C(C=C4)C(OC5)(CCCN(C)C)C6=CC=C(C=C6)F,7.71 +YKCOYIKIANOWTN-UHFFFAOYNA-N,C=1C=C(OC2=CC=C(C=C2)SC)C(CNC)=CC1C#CCCN3CCC(CC3)F,9.05 +VRWDNIRFKYVFAT-UHFFFAOYNA-N,ClC1=CC=C(C2(OCC3=CC(Cl)=C(Cl)C=C3)CN(C2)C)C=C1,8.52288 +DTDAFQBYHBDFSS-UHFFFAOYNA-N,N1(C=2C=C3OCCOC3=CC2)CCN(CC1)CCCCC4=CNC5=C4C=CC=C5,8.0 +VKWPPFDVSFTDOF-UONOGXRCNA-N,[C@H]([C@H](C(C)(C)C)O)(CN(C)C)C1=CC=CC=C1,7.22 +KYYIDSXMWOZKMP-UHFFFAOYNA-N,C(C1(O)CCCCC1)(C2=CC=C(C=C2)O)CN(C)C,6.74 +OWMWWMZMOLIDRD-LNNLXFCONA-N,C1(=C2C(CCCC2O)=NC=3C1=CC=CC3)NCC4=CC=C(C(F)(F)F)C=C4,5.18 +DMJPYIXMJPWEKP-OKPOJWAQNA-N,N1(CCCNC(C=2N=C(N(C2)C3=CC=CC=C3)C=4C=CC=CC4)=O)CCN(CC1)C=5C(=C(C=CC5)C)C,7.3 +LRZMNFBWNOBYAQ-UHFFFAOYNA-N,C1=2N(CCC1=CC(=C(C2)C)CC(N)C)CC,5.66 +FRXXZKCFLLWYIW-DZQCGVKKNA-N,N=1C=2C(C(N)=C3C1C4=C(CC3)C=CC=C4)=CC=CC2,5.34 +DEDOXCFPPHRNCU-LELJVTLKNA-N,N1(CCN(CC1)C=2C=CC=C(C2Cl)Cl)CCCNC(C=3N=C(N(C3C)C4CCCC4)C)=O,7.46 +DRLOWUSIOOIRRD-HIUFNZKINA-N,BrC=1C=C(C=2SC(=NC2)[C@@H]3[C@@H]4N([C@H](C[C@@H]3C5=CC=C(Cl)C=C5)CC4)C)C=CC1,6.55284 +MNYRTHFYWWGGTK-WDAWEKNVNA-N,O1N=C2[C@H]([C@@H]1CN3CCN(CC3)C/C=C(/C4=CC=CC=C4)\C)CNC5=C2C=C(OC)C(OC)=C5,7.82391 +BZFUYHDDVGBTOS-LBOYIXSDNA-N,C1=CC(N2CCN(CC2)CCC=3C4=C(C=CC3C)NC(CC4)=O)=C5C=CC(=NC5=C1)C,7.3 +CNDBZDPWIJXQER-DNQXCXABNA-N,C=1(CN[C@H]2CN[C@H](CC2)C(C=3C=CC=CC3)C=4C=CC=CC4)C=CC=CC1,5.67 +YHXSELHUXJAOLR-UHFFFAOYNA-N,C=1C=CC=2N(CCCCCCN(CC)CC)C3=C(C=CC=C3)C(=O)C2C1,5.46 +NNOWQHZNUGOGEV-LEGOXMQRNA-N,C1[C@H](C[C@@H](N(C1)CCC)CCCNC(NC2=CC(=CC(C(=O)C)=C2)C(C)=O)=O)CC3=CC=C(C=C3)F,6.78 +LXNFWMBEBKFFDO-JGXXCWGCNA-N,C1(=CC=C(F)C=C1)CC=2C=C(ON2)[C@@H]3C4N(C(C[C@@H]3C5=CC=C(C=C5)Cl)CC4)C,7.14 +PRXDEVKRRDTKHX-UHFFFAOYNA-N,FC1=CC=C(CN(C2CCNCC2)CC(C)C)C=C1,8.72125 +QRFMINZAYREUQJ-UHFFFAOYNA-N,C=1C=C2C(CN(C)CC2=CC1OCCCN3CCC(CC3)F)C=4C=CC(S(C)(=O)=O)=CC4,7.66 +FSUVEHUIVCMLNZ-MAUKXSAKNA-N,[C@@H]1(C2=C(C=CC=C2)[C@@H](C1)NCCC)C=3C=C(Cl)C(=CC3)Cl,6.89 +KLDDEIDULFIHBU-UHFFFAOYNA-N,C1(C2N(C)C(CC1C=3C=CC(=CC3)C4=CC=C(N(=O)=O)C=C4)CC2)C(=O)OC,7.53 +AROKURNCLIVVKP-KSSFIOAINA-N,O1C[C@@H]([C@H](OC=2C(=CC=CC2)C)C=3C1=CC=CC3)CNC,5.55284 +LKOFLYSQAVPDDB-HKUYNNGSNA-N,O1[C@@H](CC[C@H](NCC=2C=CC(O)=CC2)C1)CC3=CC=CC=C3,5.60276 +AZSKJKSQZWHDOK-VJSLDGLSNA-N,C=1C=C(C=CC1)C2=CC(=C(N2CC(C)C)C)C(NCCCN3CCN(CC3)C4=CC(=CC=C4)Cl)=O,6.96 +CLBIXESACWOVRD-SXLOBPIMNA-N,C=1(SC=2C=C(OC)C=CC2C1)[C@H]3C[C@H](C)N(CC3)C[C@@H](COC4=C5C(NC=C5)=CC=C4)O,8.96 +QJCSFENQSHMLKU-UHFFFAOYNA-N,C1(C=2C=CC(=CC2)Cl)C(CNCC1)CSCC(=O)OC,7.89 +VBGHDLDPLPNLTB-MAUKXSAKNA-N,ClC1=CC=C([C@@H]2CC[C@@H](N(C)C)C=3C2=CC=CC3)C=C1,6.92082 +XXURTKGAFDGDML-XBXARRHUNA-N,N1(CCN(CCOC(C2=CC=C(C=C2)F)C3=CC=C(C=C3)F)CCC1)C/C=C/C4=CC=CC=C4,6.63 +UIKJCHWTULEUKX-CQSZACIVNA-N,S1(=O)(=O)N(CC[C@H]2OCCNC2)C=3C(N1C=4C(F)=CC=CC4)=CC=CC3,5.19314 +KAWWVHKWESRAHV-UHFFFAOYNA-N,O(C=1C(=C(N2CCN(CC2)CCCCC=3C=4C(NC3)=CC=C(C4)C#N)C=CC1)C#N)CCOCCOCC,8.49485 +HVBBQXZWOQJUOC-UHFFFAOYNA-N,ClCCCN1CCN(CC1)CCOC(C2=CC=C(F)C=C2)C3=CC=C(F)C=C3,6.47108 +IODCZMRTDNJNCT-CYBMUJFWNA-N,S1(=O)(=O)N(CC[C@H]2OCCNC2)C=3C(N1C4=C(F)C=C(F)C=C4F)=CC=CC3,5.85387 +NBLMTDPDGYITGF-AEFFLSMTNA-N,O([C@H]1C[C@@H](N(C)C)C=2C1=CC=CC2)C=3C(=CC=CC3)C,6.52288 +UIXPCYZWBFDFKV-UHFFFAOYNA-N,N1C=C(CCCCNCCOC2=C3C(NC=C3)=CC=C2)C4=C1C=CC=C4,7.71 +WIPLBJLBVWNILL-OKPOJWAQNA-N,ClC=1C(=C(N2CCN(CC2)CCCNC(=O)C=3N=C(N(C3C)C4=CC=5OCCOC5C=C4)CCC)C=CC1)C,7.85387 +DVPCSQHSLVMBEG-UHFFFAOYNA-N,S(C1=CC=C(CC(NCC)CC)C=C1)C,6.0846 +WYPGDIUNVVQDKO-SREBMQDQNA-N,N1C(=NC2=C1C=CC=C2)CN3CCN(CC3)CCOC(C=4C=CC(=CC4)F)C5=CC=C(C=C5)F,6.59 +FENDTBMFHNEILH-HNRBIFIRNA-N,[C@@H]12N([C@@H](CC(=CCOC(C3=CC=CC=C3)C=4C=CC=CC4)C1)CC2)CC5=CC=CC=C5,5.27 +XJLOQWKHCAZUQM-LGKQTMLJNA-N,C1C[C@H]2N([C@@H]1C[C@H](C2)NC(C3=CC=C(C=C3)F)C=4C=CC(F)=CC4)CCC5=CNC6=C5C=CC=C6,6.46 +AQGDMGSGBMCAQM-WMUOSMHTNA-N,ClC1=CC=C([C@H]2[C@@H](CN(CC2)C)C(=O)NCC3=CC=C(C=C3)CNC(=O)C4=CC(Cl)=CC=C4)C=C1,7.48149 +SUOVQWNJZQBPSQ-LBOYIXSDNA-N,C=1C=CC=2N=C(C=CC2C1N3CCN(CC3)CCCCC=4C=CC5=C(C4)NC(CO5)=O)C,8.2 +NSZQKMWRKWPGLQ-UYBDAZJANA-N,C1(=CC(CNC)=C(OC=2C=C(C(=CC2)SC)F)C=C1)N=S(C)(O)=O,7.8 +FIISMIHWBCMEDD-UHFFFAOYNA-N,C1(C(C2=CC=C(C(=C)C)C=C2)CC3NC1CC3)C(=O)OC,9.22 +SYWSPGNYZDCPLZ-LBOYIXSDNA-N,C1=C(N2CCN(CC2)CCC=3C=C(C=CC3)NC(C(C)C)=O)C=4C=CC(=NC4C=C1)C,5.9 +MVPLENCWDAZKFW-UHFFFAOYNA-N,O(C1=NC=C2C(CN(CC2=C1)C)C3=CC=CC=C3)CCCN(CC)CC,6.23433 +YQQAIKWUGKLZAA-JLHYYAGUNA-N,N12C3C(C(=O)OC)C(CC1CC3)/C(/C2)=C/C=4SC(CCCC)=CC4,6.18 +KWWVMVUZZJMGRZ-CHPOKUKFNA-N,C1=CC=NC(=C1)C([C@@H]2CNCC2)OC=3C(=C(C=CC3)Cl)Cl,7.89 +XTALQXXENKAGEX-UHFFFAOYNA-N,O=C(N1CCN(C2CC2)CCC1)C3=CC(=C(OC=4C(=CC=CC4)C#N)C=C3)CNC,6.62342 +ZUTKKMGBEIGKKD-XEWABKELNA-N,ClC=1C=C(C2(OCC3=CC=C(Cl)C=C3)C[C@H]4N[C@@H](C2)CC4)C=CC1Cl,8.07058 +ANDYBNKRXGVQGP-UHFFFAOYNA-N,C1=CC(=C(C=C1C(N2CCCN(CC2)C3CC3)=O)CNC)OC=4C=CC(=C(C4)Cl)Cl,8.05 +WFFZVYUUVLQMLS-VGOFMYFVNA-N,N1(C(/C=C/C=2C(=CC=C(C2)OC)OC)=O)CCN(CC1)CCOC(C=3C=CC(=CC3)F)C=4C=CC(=CC4)F,6.69 +QJEBAPXHCKKADR-UHFFFAOYNA-N,FC(F)(F)C1=CC=C(CN2CCC(CC2)CCOC(C3=CC=C(F)C=C3)C4=CC=C(F)C=C4)C=C1,6.35458 +DAXGGAWCENSVFP-PKSOQXRJNA-N,S1C2=C(CC1)C=C(OC=3C(CN(C)C)=CC(N=S(O)(=O)C)=CC3)C=C2,8.1549 +URXGPOFPBIBVGT-JIRNRHONNA-N,C12N([C@H](CC1)C[C@H]([C@H]2C3=CC=CC=C3)C=4C=CC=CC4)C,6.12 +LBZSRDGYAIZTDL-RTBURBONNA-N,[C@@H]1([C@H](CCN(C1)CC2=CC=CC=C2)C=3C=CC(=CC3)Cl)C(=O)OC,5.66 +ZYEZLTYJZNIUDO-UHFFFAOYNA-N,N1(CCN(CC1)CCCC2=CNC=3C=CC(=CC23)OC)CCCC4=CNC5=CC=C(C=C45)OC,7.01 +NNWMNOCTZDIGBT-SXLOBPIMNA-N,C1(=CC=2C(OC)=CC=CC2S1)[C@H]3C[C@H](C)N(CC3)C[C@@H](COC4=C5C(OC(=C5)C)=CC=C4)O,7.42 +UVAUNLUVSFGRMF-DFLUKEIKNA-N,FC1=C2OCC(N(CCCCN3C4=C(C=C3)C=C(F)C=C4)CCC)CC2=C(C=C1)C(=O)N,6.9431 +OYECSEBZWOFEHH-QWOVJGMINA-N,C1(=C2C(CCCC2O)=NC=3C1=CC=CC3)NCC=4C=C(F)C=CC4,5.21 +BLKKGRNFYMEBJN-UHFFFAOYNA-N,O(C1=NC=CC=C1C2CCNCC2)C=3C(=CC=CC3)C,6.53462 +ANIFHKBWUXCCBH-DZQCGVKKNA-N,C=1(C=C(CN(C)C)C(OC=2C=C(C(SC)=CC2)F)=CC1)S(N)(=O)=O,8.05 +PIKWEFAACQLYMF-UHFFFAOYNA-N,C1=CC=C2C=CC(=CC2=C1)C(N3N=NC(=N3)C=4C=CC=CC4)CCN(C)C,6.6 +AUZWJAMWJZUPHQ-UHFFFAOYNA-N,C(OC1=CC=C(C=C1)Cl)(C=2C=CC(=CC2)F)C3CNCCC3,7.86 +JCEWQICHOLLRDL-WUFINQPMNA-N,O(C1=CC=2[C@@H]3N(C[C@H](C2C=C1)C4=CC=C(N5N=CC=C5)C=C4)CCC3)CCCN6CCOCC6,8.22185 +NGRIUVQYFBDXMT-JYAVWHMHNA-N,C1NC[C@@H]2[C@H]1[C@@]2(CCOCC)C3=CC(=C(C=C3)Cl)Cl,9.3 +ZWLWOTHDIGRTNE-UHFFFAOYNA-N,C(C1=CC=NC=C1)(C2=CC=CC=C2)C3=CC=CC=C3,5.94 diff --git a/tests/fixtures.py b/tests/fixtures.py index 52d5736..4102825 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,7 +1,11 @@ +import os import pytest +import pandas as pd from rdkit import Chem from rdkit.Chem import rdMolDescriptors +#TODO these should really go into the conftest.py, so that they are automatically imported in the tests + @pytest.fixture def smiles_list(): return [Chem.MolToSmiles(Chem.MolFromSmiles(smiles)) for smiles in ['O=C(O)c1ccccc1', @@ -34,3 +38,9 @@ def chiral_mols_list(chiral_smiles_list): @pytest.fixture def fingerprint(mols_list): return rdMolDescriptors.GetHashedMorganFingerprint(mols_list[0],2,nBits=1000) + +@pytest.fixture +def SLC6A4_subset(): + file_path = os.path.realpath(__file__) + data = pd.read_csv(f"{os.path.split(file_path)[0]}/data/SLC6A4_active_excapedb_subset.csv") + return data \ No newline at end of file diff --git a/tests/test_transformers.py b/tests/test_transformers.py new file mode 100644 index 0000000..ec1e766 --- /dev/null +++ b/tests/test_transformers.py @@ -0,0 +1,62 @@ +# checking that the new transformers can work within a scikitlearn pipeline of the kind +# Pipeline([("s2m", SmilesToMol()), ("FP", FPTransformer()), ("RF", RandomForestRegressor())]) +# using some test data stored in ./data/SLC6A4_active_excape_subset.csv + +# to run as +# pytest tests/test_transformers.py --> tests/test_transformers.py::test_transformer PASSED + + +import pytest +import pandas as pd +from sklearn.pipeline import Pipeline +from sklearn.ensemble import RandomForestRegressor +from scikit_mol.transformers import SmilesToMol +from scikit_mol.transformers import MACCSTransformer, RDKitFPTransformer, AtomPairFingerprintTransformer, \ + TopologicalTorsionFingerprintTransformer, MorganTransformer + +from fixtures import SLC6A4_subset + +def test_transformer(SLC6A4_subset): + # load some toy data for quick testing on a small number of samples + X_smiles, Y = SLC6A4_subset.SMILES, SLC6A4_subset.pXC50 + X_train, X_test = X_smiles[:128], X_smiles[128:] + Y_train, Y_test = Y[:128], Y[128:] + + # run FP with default parameters except when useCounts can be given as an argument + FP_dict = {"MACCSTransformer": [MACCSTransformer, None], + "RDKitFPTransformer": [RDKitFPTransformer, None], + "AtomPairFingerprintTransformer": [AtomPairFingerprintTransformer, False], + "AtomPairFingerprintTransformer useCounts": [AtomPairFingerprintTransformer, True], + "TopologicalTorsionFingerprintTransformer": [TopologicalTorsionFingerprintTransformer, False], + "TopologicalTorsionFingerprintTransformer useCounts": [TopologicalTorsionFingerprintTransformer, True], + "MorganTransformer": [MorganTransformer, False], + "MorganTransformer useCounts": [MorganTransformer, True]} + + # fit on toy data and print train/test score if successful or collect the failed FP + failed_FP = [] + for FP_name, (FP, useCounts) in FP_dict.items(): + try: + print(f"\nrunning pipeline fitting and scoring for {FP_name} with useCounts={useCounts}") + if useCounts is None: + pipeline = Pipeline([("s2m", SmilesToMol()), ("FP", FP()), ("RF", RandomForestRegressor())]) + else: + pipeline = Pipeline([("s2m", SmilesToMol()), ("FP", FP(useCounts=useCounts)), ("RF", RandomForestRegressor())]) + pipeline.fit(X_train, Y_train) + train_score = pipeline.score(X_train, Y_train) + test_score = pipeline.score(X_test, Y_test) + print(f"\nfitting and scoring completed train_score={train_score}, test_score={test_score}") + except: + print(f"\n!!!! FAILED pipeline fitting and scoring for {FP_name} with useCounts={useCounts}") + failed_FP.append(FP_name) + pass + + # overall result + assert len(failed_FP) == 0, f"the following FP have failed {failed_FP}" + + + + + + + +