From 654ceb52090144403a8ab69cd4f4ecc9a46be216 Mon Sep 17 00:00:00 2001 From: JuseTiZ <291195857@qq.com> Date: Sun, 27 Aug 2023 21:39:19 +0800 Subject: [PATCH] Adding GO annotation function based on GO id --- Juse_toolkit.py | 4 +-- Juse_toolkit.ui | 4 +-- README.md | 17 +++++++++++++ __pycache__/Juse_toolkit.cpython-310.pyc | Bin 37553 -> 37562 bytes __pycache__/plot.cpython-310.pyc | Bin 4754 -> 5720 bytes __pycache__/ui_1.cpython-310.pyc | Bin 24969 -> 24980 bytes __pycache__/wz_ui.cpython-310.pyc | Bin 3172 -> 3179 bytes plot.py | 30 ++++++++++++++++++++++- ui_1.py | 2 +- ui_2.py | 28 +++++++++++++++++++-- 10 files changed, 77 insertions(+), 8 deletions(-) diff --git a/Juse_toolkit.py b/Juse_toolkit.py index 00435d9..84c5e06 100644 --- a/Juse_toolkit.py +++ b/Juse_toolkit.py @@ -991,11 +991,11 @@ def setupUi(self, Dialog): self.enrich_xlab.addItem("") self.horizontalLayout_38.addWidget(self.enrich_xlab) self.gtl_dl = QtWidgets.QPushButton(self.groupBox_31) - self.gtl_dl.setEnabled(False) + self.gtl_dl.setEnabled(True) self.gtl_dl.setObjectName("gtl_dl") self.horizontalLayout_38.addWidget(self.gtl_dl) self.gef_c = QtWidgets.QPushButton(self.groupBox_31) - self.gef_c.setEnabled(False) + self.gef_c.setEnabled(True) self.gef_c.setObjectName("gef_c") self.horizontalLayout_38.addWidget(self.gef_c) self.groupBox_32 = QtWidgets.QGroupBox(self.tab_8) diff --git a/Juse_toolkit.ui b/Juse_toolkit.ui index 6df18fb..d260496 100644 --- a/Juse_toolkit.ui +++ b/Juse_toolkit.ui @@ -3044,7 +3044,7 @@ p, li { white-space: pre-wrap; } - false + true 下载 GO term list 文件 @@ -3054,7 +3054,7 @@ p, li { white-space: pre-wrap; } - false + true 对 GO 富集文件进行分类 diff --git a/README.md b/README.md index 6b577bc..976c013 100644 --- a/README.md +++ b/README.md @@ -17,3 +17,20 @@ 教程可见:[Juse's blog](https://jusetiz.github.io/) 若存在报错 Bug 可以联系 Juse 修复。 + +目前已经实现的功能: + +- 提取最长转录本。*(已实现)* +- 根据 id 提取序列。*(已实现)* +- 对序列的 id 进行各种处理。*(已实现)* +- 串联序列并得到分区信息。*(已实现)* +- 批量进行序列格式转换。*(已实现)* +- 批量提取 Orthofinder 的 orthogroup 对应的 CDS 序列。*(已实现)* +- 批量进行序列的物种数和长度过滤。*(已实现)* +- 火山图绘制。*(已实现)* +- 气泡图绘制。*(已实现)* + +目前已有的小功能: + +- 学习计时器。 +- 批量改后缀。 diff --git a/__pycache__/Juse_toolkit.cpython-310.pyc b/__pycache__/Juse_toolkit.cpython-310.pyc index 71db309035f86c63411070e529d2fde31c59f218..de29fc33daf9af5945ebb8ffe78248c7219e66c8 100644 GIT binary patch delta 94 zcmdnElxf#eCfb8nCkyJrF0{&G$Z4x&6`5C)S{U5%#w`Kq|HB~{;)F&Z2p?GM;`!1&>xck delta 88 zcmdnBlxgEqCfb8xL9x|rFzQb8_9xBeTf`>A$^Qbp^#KVX#Q8e=fv@i8e(iJaz4kfh`isZz zP7ZjvoXO~Vr}~HddF$&~>wdmKJ;#YS{18;HuYP3Gs>dey{sYSx9dL42J$O3Nw29O4pfj5Cwpa`4< z&Hxrr0|Zb9Bwz!5z#7mXl*iOQ=>3Gf%7Hu!ECBBU6Vc`TK0`qsihj(GHZDQF477pE zKm`ba4d4S{l2Fd7C!ni92e<+}3w#KC1RMf30Uy``K8}`#hNHuUS31||w$2InFN7dN z{Uz%DF9e5e@PLI(@=fvCiyI;oGwk%y4K38VT#9Y3#z_gNuC~oKbccmJ)Tdcu*T_DV z*hwY!RG5Oj$RvC^SR_SG#}g+~iIWi;_neGrwr;-7B-5#FYMd5dW=}DH;Ueb@?Id+P zE2pb5S8dIam3GRt&fAr^pK7MAoL1m?Zrj8&zh%|Qkn!BWYkG}K<&GZfUdwi4(+?~a z_!p_TVx#E?G%ZdgLpdF1)|{r@vS4a>O~(&nz833E$F*1VxkX2=7;}qQ9wU}k&r#fZ zm?jnXE8CK*y{X?Jt_KOa$ns1_n0$a|IGDXhaYg)P{;TKmyLwJVS-wnYj@)TRM8jBM%(Gj!Q*&U%hud^;ygu}K)m+FFYFUU!&s!z!FceeV%D8^Ke~g56z9y|> z>Nxq-qbJSLj{1ru_WP7Z^)>JffKAl5K#~B&KH1_n1Ol&45Ze7m<`ym2^``3nh1oKX zQ;sX`_si*rhqZT1AH|FVgg7l7g#^b&;H}o@{R?Hicl46)!GTL$P$Ipplb1+x-}m5? zej_&@KSooOc!|iQ7sOu|eWeRYatUo|@wps;0F-sr$}?c>o(5 zJJO}Z6IgX6eFPgXU}1hW|KyYJEavjn^VmrbbX~K^RZM@f?}i83OxQ#BM0+jMSoIBw z>8y@9nM)RB9_D5>vYd=8Sc3I$&t8>!#zOJKEHg7ZTPc$lifW>Q#Ke&HPNoV!+Nwgr zBCJOV7f-sM%!m)2liR`oT_V;=`KZWM`NfMflAn?rJ}a5v5#$_jfGALb4a|c@um-$f z8LWc@Fn|J*U;{8v1x&Ch2{v&b+AnG1+sG|24R%2Q?1_7qPwhfx#jC3^IgUIACm;k) zK@OaO9!b!_d(mNV4)S0aT!2eZzV`~n!8N!MeeM;}PpzWQ6Zg7GYT8IKBUAou%$6~Y zPL z-)%$`U5zFbwmqsEJKLK1M~)8PY+_{s3Nb3`Q=PtyY)qV#n17I1#P1k?G*|E~9Gb*v z5nt#lkt+E0zQV%!gQSs6MA!x5;v*9a=2c*{3^1lZ>wrK_B}PjK0(MqKKW3J$#FVh1 zM1n1$;CxH}6k9ULRnwMkYX#mqVfwU`Mi8SuzEZTZvdtLN&i(!SAtGs6 zEAV9boUP%{=8PWt+rUZw6>?R><*ou{BcK5=T7Yf`5do^bMDKB$q6O82Myeai9%Ixc z4qyqy=%OV=jK+l{`sFNfaA1iP-h&oNZA|a=4v1fFw7v&{!2&olapGLzZ99Y4%rr{# zU?`>r7o@iN$fT0X-RMyNMg_|+ac?FiprJ6bZL9(~JYZVY@CAf(z3s`^*jaqH5azDQ z`=x@u1Cu|Kq;*`&yC602)~TJ+)%^U_I(H*@w(>>!#Xix8L^BnmA|A_sS6agB3QBT^ zT9innA_z6{+Y8oCUMSgY`=ou;BFxZWlbBRV}I@(ywOv5;+g^Zm{7&&AKs_#Mi z>dSbOzrbn#CMS)z`+H}tr8y>Af!I5h6QX?9jFVC?KR08Adk@$T@|>CFON0;_P{3`S zMXNwt4G_6^8;wjUXywers`Mu^$ug zh?x;rbUhSRg3ZZ2Bfa0lKPtYT65!^NQZLpIIGLZHD#^(er9Z-rRopB&UBQV>&eRp- z>R3okY^C0MvOmHu-3U=n7$J2HKO1;0|NB_n%k1>Sjst!KXaR73VopJxg5jfp5%OXF z{+vgb3VHWP`8pvFNQ@@Fi@#8MYn2dpjSx3Ix~2s6y>SMcQf?mOd*>d@79ILd;t9Bz z6Rdaf(s?Bd1{{W&d;p5W%xFo*BGeg0sgAIvhTlH#MAu314z9^ic;p&uVUQjN{1hNU zjzTo;N!|3sRNbI*8gWKS%a zh$_0lO8Jk=56^l8D~T!*%l(WVO(*OzJ+yryMEBbser-jKY00q*dT%)u$%U>a?=@8>9Rb` z2A;Keo$nMD?E^dm*vrF<7i@WshHc{s&L2Ab>7jc*ee%xp2M&MSy+1ICiAsa|vKQAS z2Bm)i$zK9q0Q3OX17IOL4S1e^zPN7fix{2({2K5Zf`E_t=|^^`5uN>5!arilu0~7;=3b_!;g;c1>pNZfZ^?%&5arH)2 z)yll5F!?G$z-7AWOphjX_7dja=2Mn7b;0~VzIBI%ZKg}tVya=?YzITfo`eLfBi(dy zaTFI*m~VQtmSCuv?x9EUJOkIfF~u{-qC4AsfcSUZt4;NL7K92kekMES_y}-qZo;_x&fmgAVeUi z_qUejWX#I5W{^$*#4#eGGn9&`OgS?`Z&wC+7l)w8Nz=`gco$wRQ)DC)r;rHlV_3Hv zYh-AQ&RA;rt3r&uf$2910+}LtMefMncrp@f)9IA`E$AYb9aNmcpLq%Z2IHz5r|?}0oc_{UlvirMmQPQY9e+b zU&-HDUoSnwXV-6<*bN4!o@9nq=2l(v-uk?)f5B4k0z~rvibirKUg8EEHsmcf8Eg*` zplS`~!s7#Y^fZN7LQO=i#(Y2jPkp`g0$;OXX5~%bK}7{hELXpSiI)Ix15Ci*2;_{q z&02^Vs-Y$mGJkMGsryUN{=v^}sIEY}kiEzq(Pd%3Jm5Gr!f>CK(_=~^YV4#YW3Y$$ z(v7wDZ*Ba>jebey4{TgAvJZdI_sPb`9hbJ3hc@eZ=qy_)_n{_btie<*$r4F}y~0Zy z%iLDV^I+ow={e3D=Wc5tJ}cXWBRAqvB(m`HV4Gp@fgs!rkFX7ZZomWr*{g-(F>x6= zg1N7F?v{sE4W??UOb?8=eEK2yFYX<(yZ`O#OaAqi=}X16!)Aam zcgPvtJijTgYi!f3%9K6yt|`>bpf^Tw)yIKymKG&{Su;8li6Tj~(OMYbQ6;Kg=^;y1b(hOj{+~t zRlY`W3R^|sx9~^g*+g8_Kp}3l3!u-FeWihiNJF73YO_8J%}+ NF{MVb@l~NK{|}Ue74ZN7 delta 5640 zcma)AYjjlA70#WR%nW(J5D3qN1V|tQAreqR2oS$6saN4LnYjrUX69bby?GFW z0YnSR%N%MSRtuFDv8|7$F0I;D#TVAvmeq&erIl9Dg{#&sYx_f2SG&J+lSv4ZEV5R< z>~r?n`<%Vc-uvvE6X&Eu=cG(MGczrPel8eqMc#NSv%x*e_QdoSm1WSRooZ<@mDzm1 zyLNc?@&&i=vLdRfHN{oNuee(SMYM(qm~`;7XHHMyh($3`ECfp%R;cczhDhm>j4`8gK4;@S3_>)HAf$A zsD`4&4LZH0wpoMfI0zl$s;<%@u4;HVVqTAqLkS&8T!?e%#*i+&Hz9w$mv#gaeX;-Q zm@&o6X^L_euCs?x?kb_Erp!%lH}{P#y56mh^=_5B=o%+yP%K(1Bm0KRhU3s6TwLB? zgK)9GMUM_0(cf^SGsf+g^1F|X`&g2ydFzBPr45`-Y?n6jPbXG;H(^aZUzjsJAWD>| zw1Nt9Gv__2g0Igl%I>RPBA|j0lKGL`731beDJdON2USq|#a~yWy9M>)lU%DrFhoSb|){+pEzC^(|2N|%|rBS zF5uhpb6uldd|&>(1uLORq_Wr}nW;PY4U^AGNBFywmw4~M^20oD%Ix_N`da|N4}jvwI$l09NlNpa66XiD;py-ZW6?Rz79w??-Nf#$lSd zY`YrXsYY0sdkRYP8lWQrNOt%TuPe}(i|yPDk1@-H7N2gY@l7mB#RmJb0)R+Ianioy z&LhSLV77B#VSU~rkizn0n{DNL3d^eQqcuVITJ|8;9u@<(1hkM_3M!efLX3~VJj(xF zxbDX95J{(7gBc;X7F04pyriFid6c(JD=GUnkwMp5XJ?mF4F^3j-ok%C_PhN3X-`S} z__66_(-R)C(_^Y>hMH8RL2ny)CkOba(;uX;I8-#V6#9c6$CWum+r#kKDDe@D^fB zm#DU65#g7*pDB4j@*)0jW=DA4><#IpS&uiVro~G5le3Q({1BQ%G6{80($Myx&$E5B zy@;CwJW{$?n#vz3T|NUbB)g(v+e=*qJ5ePw-?JAjVd!ib3-u>+2<_x#I>@gFoczKXjAl{otS|eY@gAngc_)) z#ba&zsq#C+C{~Eni-Y1HLLNks!%7*UP_rljH{g``%%?J$CL@NZ}FUp2{qzf zNf*Z#4TH%<7(ap)qJjDws1FI`RMC&fF0&!2uZ-?}6$jh_+iftFJlfnSQq1Prl~rC5 zaozm($}*{hpQz0Dii-Sq{#@mZbU2Fr9B`O_Rv8ZB3WAdDry6CpYSwmo{C*2AZuYDs zyIR|wTMF_!mh9;i7lLqQ7a%u@Z38X3n{R>=;S~ne0EiICX+349L617|rh@etU(m*ltQvcAj6XD$f25CPj>jXGzqwrtZm0U-6c|4VK77 zc;icHmQ&&0#5A0A<$v_kK5STcdx+7alK-W&sNGeO#xC$l)qA~#SXRJKSD&B@Dqq!J zDsG|2^H;zWy9&UrcG|*-7PcspjKvWbu~+d+t7@cY`Rtl?oiNt+m^!nZ#7<tO6i2Yv?C1>D+u-9LG&SK-R>=Xf#)?%p`eh))meTc=icoW+v z<{swm)iu(KeBJ6PWqYv*85JyYT>U;KoLmL<7r+Mua(eX!gN~(HnjW|LqpN3nKL_t$ z{N2?Rr6?D&AF(4UFuWr7x>JiVj&WL4jW=1_srp##`+UWkN|#6C_pHg6Y<_0V{C@wn z5Lb7p`?EEVxvwoRXE&G=P+96I_C;8bx0q(=EUsJZO+K%-#Os7S-%&ePdX7I_TfBJ# zt#_jRBB=EM5y-;(i890f3Wo4AJi^uiaA9mDf$TRzv8d>0L<)Vy^EW=cG!d%FB0V_5 z@#$sgxVm=8-kvYC&%5gKO1rS&D{MmTrKmNskBXMH+fb9%ljiRAb+$w;)XdG1YEUYPs^mXb1hWMnUe9yLogPnC;%@4rl*Mfb1D<$+CDDI9PS)Moy;T-Z6BL}h)b0u22#<`WFmdO?es?Tt$^;<-GcLQD;!!{bIOI$~F<>G<2HXwU0jL4o3J3x4nZ&jN6aWKQ0C5WoLEQ#u1jGP3pbCKIk!=B( z03)S>unFWPKr^5P&Z?+G`9VU5~I%wMPU!|kCm=0q=}r` z+O9NfEC7K&^ZT|fyWwT5^%zmAJ7Y16@@uD471zSc^mwDzWT&pEtzB2kpp?zv|J{~T acoO7p0^6guQO9!%yFs@A diff --git a/__pycache__/wz_ui.cpython-310.pyc b/__pycache__/wz_ui.cpython-310.pyc index 5bf232664da57c0ca23c6ee8f1e9237954aa5fd8..60ddfd8cfdf2fafca9347afb4dbc6ae5e97d929a 100644 GIT binary patch delta 34 pcmaDN@mgYoK9i`di&acvNlI#QW_n&~QA~PfNk(bXW?Lq8E&$!T3?%>n delta 27 icmaDY@kC;SJ`fe diff --git a/plot.py b/plot.py index b436465..42f3e40 100644 --- a/plot.py +++ b/plot.py @@ -148,4 +148,32 @@ def plot_GOem(file_path, num = 20, xlab = 'GeneRatio', ylab = 'Description'): plt.ylabel('GO Description', fontsize=12) plt.tight_layout() - plt.show() \ No newline at end of file + plt.show() + + +def read_golist(filepath): + + golist = {} + with open(filepath, 'r') as f: + + for line in f: + if line.startswith('id:'): + GOid = line.lstrip('id:').strip() + golist[GOid] = {} + if line.startswith('name:'): + GOname = line.lstrip('name:').strip() + golist[GOid]['description'] = GOname + if line.startswith('namespace:'): + GOonto = line.lstrip('namespace:').strip() + golist[GOid]['ontology'] = GOonto + + return golist + + +def assign_go(file_path, golist): + + data = pd.read_csv(file_path) + data['Description'] = data['ID'].apply(lambda x: golist.get(x, {}).get('description', 'NA(Obsolete)')) + data['Ontology'] = data['ID'].apply(lambda x: golist.get(x, {}).get('ontology', 'NA(Obsolete)')) + dirpath = os.path.dirname(file_path) + data.to_csv(f'{dirpath}/GOanno.csv', index=False) \ No newline at end of file diff --git a/ui_1.py b/ui_1.py index fa79704..65b5933 100644 --- a/ui_1.py +++ b/ui_1.py @@ -171,7 +171,7 @@ def eventFilter(self, source, event): source.setText(file_path) return True - if source in [self.cds_input_text, self.vol_file, self.enrich_file]: + elif source in [self.cds_input_text, self.vol_file, self.enrich_file]: if event.type() == QEvent.DragEnter: if event.mimeData().hasUrls(): event.acceptProposedAction() diff --git a/ui_2.py b/ui_2.py index 6ad5a0f..dad1c18 100644 --- a/ui_2.py +++ b/ui_2.py @@ -9,7 +9,7 @@ from PyQt5.QtCore import QUrl, QEvent, QTimer from PyQt5.QtGui import QDesktopServices, QIcon from ui_1 import MyApp -from plot import plot_GOem, plot_GOem_classify +from plot import plot_GOem, plot_GOem_classify, read_golist, assign_go import sys import os import urllib.request @@ -22,12 +22,14 @@ def __init__(self): self.enrich_file.installEventFilter(self) self.gtl_dl.clicked.connect(self.gtlfile_dl) self.plot_bu_2.clicked.connect(self.GOem_plot) + self.gef_c.clicked.connect(self.GOem_anno) + def gtlfile_dl(self): gtl_url = 'http://current.geneontology.org/ontology/go-basic.obo' gtl_fn = 'go_term.list' - if os.path.exists(gtl_fn): + if os.path.exists(gtl_fn) or os.path.exists('go-basic.obo'): self.show_message_dialog("文件已存在,下载终止") return try: @@ -58,6 +60,28 @@ def GOem_plot(self): except Exception as e: self.show_message_dialog(f"发生错误:{e}\n请检查文件") + def GOem_anno(self): + + fp = self.enrich_file.toPlainText() + if fp == '': + self.show_message_dialog("请输入文件路径。") + return + + if os.path.exists('go_term.list'): + glfile = 'go_term.list' + elif os.path.exists('go-basic.obo'): + glfile = 'go-basic.obo' + else: + self.show_message_dialog("不存在 GO list 文件,请下载。") + return + + golist = read_golist(glfile) + if golist == {}: + self.show_message_dialog("GO list 文件存在错误,请检查。") + return + + assign_go(fp, golist) + self.show_message_dialog("请完成注释,请见 GOanno.csv。") if __name__ == "__main__": app = QApplication(sys.argv)