From 2e2f6c916b8514452b3789afc5970f16597227d7 Mon Sep 17 00:00:00 2001 From: SubhadityaMukherjee Date: Wed, 3 Jul 2024 16:31:19 +0200 Subject: [PATCH] squished some bugs --- .DS_Store | Bin 10244 -> 10244 bytes backend/config.json | 4 ++-- backend/data/.langchain.db | Bin 3780608 -> 3989504 bytes backend/modules/llm.py | 2 +- backend/modules/metadata_utils.py | 4 ++-- .../train and evaluate models.ipynb | 7 +++++-- frontend/ui.py | 3 ++- ollama/get_ollama.sh | 2 +- start_training.sh | 3 +++ 9 files changed, 16 insertions(+), 9 deletions(-) create mode 100755 start_training.sh diff --git a/.DS_Store b/.DS_Store index b6b79efa4cb6036193b47ebb2d27476a5179bd94..365deba15ce4eef5e7cd2b213c54b54f5d85ff1f 100644 GIT binary patch delta 74 zcmZn(XbG6$&nUAoU^hRb%w!$`_sM$%`6tg6w47`xTqVxXg7YG_ugqfl*W1Z0~S en@!#&Y`l50PzmqG1{=oB>`Lie2ghtSDY3vRsRlE!CFSyzQ_ zeBzErmMz-3Z22X8@}9Q#H!fJVZrRFZ#r2(+{-EtMmi6t>lEy!L4Zi&7_4wz#*WGv7 zx_4Z*E`(15K8^S^;j;jrh4?h%vk0HX__W~DiccFpOYm8W&oX@4@#(B46@ zJ}dC)#-|6LUVK*KvkITp_^iQaEk5kq!e>1`8}QkP&t>>*!sl{)uE1wAK3nkFiqAHD zw&Sw{pFVtc;uFTl#wUVLw0PON*b6VUEo^MMWNAn1;Nr=~rsC4}_&+u*KJmn!MfZL$ zdH(_R@_SZZUOe-vrYA!!Ai?MVc*UX<@A(a7eRcch;vc5cPln#{gB5Xn62%{^NN##( zd-ElKamm8Rmo9AUZupa?bC>*uYCoNPX7Py+zjo2RXZM!CosQx~g8SIhZ(4NXfsBCb zxqi>SYX@_K)($HcP1y16)@WuVi@((ONBTCXKl#7WvmslZ`QGK-$~wEcsWF*ODSKbT z*2Y9Kt&V;vw6@ufN0RY$RIR`6vUSS-`el}~PBnElp`YJ<$GJz6_@s*G9!X!->fq4_ zt{xjLWbZDt>=_=m1}3tDxq>xz*eVR=^44IckU5meXRRY+*}PR4vkvA)tOMD*vZF(j z$Me}yHrzjv85qvyuN}_aotx+z7#r!c_KsL@namfg$?@T_%wTTxhy|EK*@>)mC_9ju z1iZc0vCL?}asV=)&05EXG6n1S*ras?V6DM1>sYQZ zC&#lB*3j|s?BIH<4Fb;I=$>)QPI*hC?} zVY{{Ay3tG_H{kHso5zk!X8Fs$J=a(_fz!0_>#CbRhh zf0yrW$Q_;3UmYA9ce|>F^xA8z>oX&u%7*QZwc)_zsQhYexZ!&D3v0tQd-mP3|Hg95 zYB1N_{6_YDeRj0(dci(3FqCuqwKnu0xZXOL86F$uw|j3q_@?S%q5s#9<_5+lM)TJG z!`2OBcV+vo-g|&Aydmm^b#o>+dT8uea>Mq`qmv`qiQK@JH*dG>aMA*4`D2H5-N8M# zj}K>#XD4EozfnpzN=c(l-bRiGR9d%6qt*VKuYI+*ncK<^ODp`?VO>+Zm91OZXjQvW z#MuNyKRJTd>{(}c#X8xP5bObW3pSh$cRM zpkJBk+vC>ed|?6%EbI``bjOb!*1p`Rfl|>1WCztFr22I`uixi3@-kLLlN~?M>iQC6 z#ntX-k}Jd4U0-4%fcCwmNgN@Gqy1&jYR$hUSGGp`T`-RlOtili7)`o}W>^=@`vJyr z*}S6jsHP5na4pmT9n}=V4gKl13gbXREcI)fd%7!jBywDY+q`p6l#c5lX7{ z-T0AG&%M8=CzMuaF74@Qu*0fH!mRpubRkA&3|G5)mG(U0(FM4ghKQI&awf5+4lVg8J(n|{Qf z3AO$-eY4sfcj)qmo2;fI;^~3@GjHnl{1YXgoIw8+vYM;fwK&e@E0&`59Gv9`EVth{c4;q;Wo* z$&Zby_357Ft@`)dk7qLzY7ZVXaT8HT+b&zK{_)hXYqm~W0~Q+>bSpWMaQAs zk*kL?qXXIe)f3rFAv-8ju+)9LFPG1&=XPDz)11lYbNK@Pz^p725PS5K1Ct{%M@upT z1CtW~(vPNDwSQkIhBc+vSj4Qcs99q%v&Q0PjU~() zOPVzXBxQ}I^%}#Pk~M}k#Wfa+sk6I#dOKrLpB%=YyVQvlJf>K8JG4H+SwL`s32BBikUMM?ovky3F(Mo1GOW73c@Wu#QvkTGn?2x%fx3aj5p zCoQF7hLCY1r4ojKm>!W*DI=xQT1r7WX$sngf|wqWQjne^r7+zhrQ(Ky2}42PClpK> zDFqeXNGC0&Y>`rJF>Hts;*>_VI`K^qCZf*WjvvtC$%+6^62>(NFUt9nobN6^act{f zH?9y3j7!&!?zCgKmqaaTw^%!uUlLZy(?&)c>V>|7*fuUaR z?CEaRPXGc^iu~GQ9sr2CsjH{gydp5P;9g?KciM?EB4p+k{fJN$rFQ9W0(s#ViC*)H z1_IHC-MGz8?6i|5R49^L^%IQ>rEuwijEADcykl^o2wq|*ciO2kE|f2=`jH0171CiI z5RA1Mc7RJ%=({~D+Vo=sjMBF;a|B`=JA51VstIf}w+{?Y9#Wwzd%BmDo(yL*M<&%1 z8{$1phsVYWD)Sc`yB7|LeksdJU3OPbySi;R>FW$_FP^z> zdw)ad`p-3q(#!!$6S zDW3T7*4JFzk8jsU#M$r1r43rPb*iqf*iL+|A#^~sJ($dOL=W1rUE%nyaJnxRj@gl@ zy6MQ4x3;G2lx=Uv83d+-7ydZDMvZ?a(WExM|B6eS(h(fffB$2ByVXZJ*Y>HiKilzh zHGcW(CiTUaFI}X5_eZVPV!j!{C#FKrx35um?6`dMLi1q#-*l(ctDkq)dH{s}ZE0IXx_b83#F*}Wa5)u3wPp0s0lt`wd@faGX z(=oKO6UitV+2L3c&&qE`;t{m8Q;{&fj3)3>GMNO5=`di~=mmqqpSB%Fvsg0fMA%c5 zkK!qUjVJL^G|pe63I58S?N}N}z}$$(F=7l8J^oF^0Wd~v1YtWIjRF~fz~i``B4!-7 z^cac61)Dg)Mbh{O-+<^2po9|%jzr+5h$gyVACUy%kO=(5qAC0t#TPc{F94Cp(qf4g~cm_+SpOE+~8U`h5(LWxx z(V?EBc#34_6uMxe5pri7pi4-Mp*d#`QvvEnIM7%$EKHe>@p(8wILQdnNszr(xuQCYsgw7a$S7`TV9&8oyQnyyC{* z&J_emk_ozz7`fcnYxixNbR)7V8zs`hQNBjUdV4!p5)#NLjqF$|j;VJ3{M^{juqNQ+Z}0w#dK~e3qCmuG|Qmkodhctq;BsZdRxN zxwT3CT7G-$L#@3{-Q-9v8jdp=CF8TFh+6;A-nHs&SEe>BjYd-Mh zzrQNgsh;}1wuP(8G_6`+2Fzc6qz^OQ)7zw8*qv%pzqLEH_|nRSRRa6;?$m}(*r)tU zsr2oxJ*hP-uwlx673QR)q2@+)^k2Iy6@CMje*43{T{Lk%_nvL*RJgf!MI(l$PJMan z26gtGy{pvzpKe{C9{f(%8g+7W(o!FMd0VsU-?X$zjemE|LUqsN&K0Wt>u9%o)wadz zjt_vPf3!5*tHQltll|>@)_(V;P3nKW*ww3E`0c)R>d~jSE>hiJ*|tXQ-nD6+dg>3h zEmEid(`Lw^kA-{G*#(y`QuqAVz8&U?pcdJH?K{uR`Jq~5p~U-W-wd6QaqGv2-DT{{=3d(z9A)tmkg;8_oC>VcIAjsO0=;F#A9?Ceqf zpX*wr+D`$>`19zzZ^`Cnb>)vq#EIUO>ZivyFHxU-`|_TLKGn5Ag%1FMMIYR>M74ga z>r$%fA673v8oOM*ao@%^^@Y23_Nv`$G02qaT%x}D8@;Pl_@jNRV1+#Ujqqjaxn{{~S26fs>E>iD0w!K@u<)&qz+M~-ctWC)#b#gS~oXV)9SirdB$D^>UYWlPkreL%msNS&QT zyEncTBYD&NwqeA#0@xit#mZ0L)Q)GZm-njvEva?tfy2FR>Ss@4{@d>YsK>UXR;j&@ zU}XO*M?&$nU$^k=H;?o#Rz3fF>ni=s>Bh7Fwk*=7y8i|5zGS7=svSeUEz0`WCCzHp zl60$j_7iQ3)r}6G{n@J^^X~bZL62rTYn5Aw*IJh6$L0(q#d!jXGYX> zOE<28AYQvH2Hk-BwD${HP)j38(Nuk3U{$SMVf$fG!Gy3^nXhkJqneHQc9GwR8rY{d)FvyRqxuB0pC*JDvzuE#?@<^Vv!iUi+>IdKf9}URZ}t& zi>pv)*pg-4Vn>tQogW)?z4z;zde<(FK(}|FJ@nl! z=t!UH>oKzxfpUP!)U&0=A8=`3`o>55)7kA7PA7OFOqC`A#Ti@y_+@p2()Pt)hOt5y z_R_UL$^78sW;u&cCVNsszZ}EPMscl9eFtLa**8LZpL{L;eA`zbQY&O~Np^c>vMa#L zGMW6QMYo7#S8BT=`Q-lqW5004Hb^W=ddO% zsJiEG(fWfA+0E+K#Xc#%cF`6Q%&XPO!R@QH6fX;A2;4;;A@0iMJN9l`DQbiI#c{}L z3g=28zVw4Yt@U@88zFwnzKu(abpEHEUg>=HF`sl^pdK#vt>OfVaE82I0C|4-N|Dzm z9$mgwq_P5k*1W7$4i5b6 zscjn`dTKeehqBV)vM}Y6S_;ioD;>8`w+R2^&Q)69D6_C?{~Rn9HU;i>#q zf)=q(eK!GJ9gC`s^M>J3#%6zwl%4z9^Sd9xG%JAQ4<*Yl1qwDd}Z6v0=4Q- zyOycliz18l!Rp~>tOd)dJ5u4ILK_Lwt_TfOJu$Fzo#yu|mt$qH<7EF~`*2bDG5XpqI)CVV_PCXEXid{Ngs*z3lj911?)N2avi><1>LsT{t%sP1Xb2kgNDPg9831*O!=DfOOg>4WQtFnvG6JNur zu|EZg*x$abt=-$o#i+Eu_`lbPEQAORw_%IsUbD>qz96w)xr)<0|6nzh=8Q*l&zwDX z-Oi2bbS}}OcK&SZV#SuFIb%x~8F#O$Y*V7lF1>B>>oQRUgy5r9Zhdgx{ZS7-WFD6= zqG-SL6b{07JcG>|2kXU(&C5ukxr@}}4?!=2p4L*;Mv!{$DQFF)jZn8KVaP=Pw5Qbv&*uqs!>2M11VC$NeucC>1L-wxB z@a=;PBt&GzhFu_SG?`AQ(;wWj`k}K+7dHL?3VH`xVA_1)l5z=aon=@MIS_uS9x* zn>P@4UWxPqPj-P598zze*YpBUc7dZ4HZE*>fhW7bc_q>dJlO@8`4a^$rWf35UraB! z@aY9>Jn02)>I{yK599Ok;;GMUzPzFM;w{b3hl(e!Pw#0gp4-##?#7m_)-}k)LmJ#J zwfhaNn~V0(+rHhX9?LZMDtn-5xjMJ4>Hp;c^b>0RP-~C+_PLdv#qWQ%<)1>uwI6Ez zPN;a|i_K1>IvHthRRFlJ*ng<``ysXE)#-i3)3-Jah15&`)K=i%H>i8wwxXqY)n^+f zL&f$(tzQbM=bjHqWY7O7p1HGmRY;A$-1OG+tANsRbL8@3$G^0EDpY)Fq~S?S|JenFkox6ML!Z!Rl~ZqT z$h+O&9V-50^Mbz&sUI!qh9MR0iPWu)D)i%(UFwDRB|3|LvSz^_g+h_ybGw&(83P$h z|3v-WnM9jH_~}0{93IY$WTM6AUcaP2)Q;3w&wq`N7SDX8?f!=8=B0>vSsW^21S=YX zXF`GezeOT^s(AFahJiZgq=?Zx6H;H=wW3-5;Le83w9}z5@{h2a$J7~X!Bf-CORkZ# zDySMsFJ> zoIVYRNwdsY9YT{x(i#|MGEW{SoNy{^ibK6{Z@@YP?shA1yD5Q0zx!K zIPQgHWvr{O4qJ9#*c#3u^CzF}BTnjX8j{w^0`MN2$Q@w@kAHyb_PbZCSao<}Y(%>1 z8_A9iW{0!KnZ&CxJiQf;*I#~r$-;*oY-nAB;SUUP;4&nufXrK=S`M$>tulvpY;lAZ zWob199^S~u-Wgvj%y8z)B&A?+85$A+*P1tF_-6LN&DED~W$ukeiKWtP$fY=C1^_GLNpZOh3FGNwHYH6VfU$Y zN_Ya0LZCun>{T>nb6gxtEJ=)!-Oj(^T*L1|2T-&HmMEbf@)ghQ zUDoYHZE>L?%7v)I&q@TvMVOR0KD=y4h#iQL!gm5G#W=r6BFuOcJ1GF? zT_WB=9wa1Cs|*XSSW*KJNe=;O+i8g-j1mvTMiC1P!jOhEjU!skJe3(klpg*@7!jHw z-mLV3c9YgS%Qula!VC!X5u&9Tt|xs( zlYSb3befQO11pFDM1l*JlMpOX1|_{{vVv+3{Ja1~Fd!mIKj3#xA;M5GNNJl)6PYN? zy_{Kc9fJ1oPlTpgmjU6NXEU<2XCRtb3VV%`cRZkp zuRxSY0nZnZoB3f2KWxtl4=2Va=Wa zsV81;?O9^IYlJXh7RI#Rn$oP2@3oYRozN(+!N-s>qX5bi3R#I=qen#`~ z+2WE|OsPGGH+CIL&VbW6{v_p}mo=tlx^x0@{h^Ir?Pf;HtftOf(bjXVd0L(#nJ&QB z;p;V7$FmyK$W~r|-^Q*!4Qfi>a-kyt=PRvU>rM9+Y*yK9N{(|hj9`v)A%WAqL<8%W z*f1&)x!8TsB%KfiBQ9G=T6QEvyHmPgngGG(6yrJmjq0yc4x%z4W4NTFjF0uT3`5qk zV~Rb(x6=p|queI1$7AvcVeAxhKs8F)4}}r`B)D?C*dV-$2!jE1_hqHeA=re(waWkn z9sY)0PU6!veB=~R^NYr*b0GtQatCM;4_W_+V#gD-aHm!WLWx@tkxv+hpa!LgaIk3t zA7JS4PuUDa>%*w{MpRb5&)9AA>7v=O?GE+vH`mSP($Tf84nN4P&j9tv6*rQ3O2 zZ#j4-qoNSzFDcR4?65-t4snB5xIC~QiINqMiUyFmyqN(x*ENUmT5cG+imaIg4jn+YJ z7b=Vp!O9MtehaaGsT>>n(^62{>c&23;o~%DxvN@b4&ITJ~n-On;L!=-54iP;SKj~YB*dhu{4*4d0kezoHk594q3xAh1-|9ueT}E zvB(p^!h;vI9_neP9b~6)Jla9BiZ;MkaNmC`IL;M+8i{Lu^azgB8n_ z9~Zd+3W`PuZ66P-2U?|a1!EX6X77+WelKi2W=+aEdywtcxBh292&_LtiRlOtAQWcM zPpxj8DuU~jJEODOF>N%_(%U_SLO16gKq^Nv#-R@2LDkG+!`mRA-S=OHSGd&?mGw zca&sW%&4$K;2HD<02R{^%}ZL{t?o7g`yr(ul3;DD{0K5tiDeiY9q~$VyPI{#5R@Dl z@*M!Bx^EorMXN)g!4(>KQpg{$@ziW8M-bRe zsfbiFys_lLxX*QmBO*DRoDQ%^QU;BAxa6*fR9G%bpOnD(Yl?|D65#>F)~0JDqoM;G zF}dZ!9?kDcE2$cCHLGgP6RF$41}1DGg6wSazyDN&Y;HguzBNOubyfrB_lj830@Qc#^u9B?7p5#qkjPRo+_k90dy&lO|(RDEhU&{1zRZ} z`d`OTXfbWbfzHC2gmW6<|Kz)TO-9YPusdtZ1u#k@ z+?D8wX$qE4F&=Tr8#u_#Beq!A0LFzNup%_OY3$%|lHc)_6HO2pGystj;);TG7*i%q zQTl@Os{x4djMD=<7Y7v;e~tL8aNj|%Y<^Hx@VwIY4oOQwC7_RycCv=JQ1C3!Ed&E8`eZlIj6_?9ON>sxfD3U7BmOG=%vvwN_C^gXI?Fc*f=$Trv6} zvXrzESrKBL^(jX+fnJH3dpaaf7x2_6dWOPbbR^eU{j&Wz2r&}8@ywlngI_O4KiL6i{{R5K^_8f16W zuo>IY6z2>6b8h|IlENUo>iBebCv<``D+3qtM~HGhH-l^{A-+xQOz08DWiE!41exjX zDV}66Qtx|URl6e&Qm;X`?)N!&W`9NSjJUDCABzviBvxJ_H;MFo$nsJgO5>s2{!$_;c5)Fqb!{aG2f{Ne}?}>P@5M>3y z;ej12e&LcWuXUGFQ~1bmW<#G-I>~TyqMO4be6=hNa+1+7G*5zf=8F%z9t?BS9Nwq! zK>Iju7K;y42L6+S`75&cz@*MAx%j}-h+YNPmY1?Dd~nExYonLr4r6 zU`Bx-Savf%pp^E#it`EN&+HB;bJ_Yl*wZCDZ`+>-ZW*kfW6aWWV>ehrm08|gf%?qm zVa1?@IGe*sNy!d4n}GhJIR|Z!zS9N=$H(;zs8EXfox=MEFnAK|B#9o}&ru0KY#E$CH|;ldg-$6Mu}JKSXaH zjwjf+d3d0J(FW6*C`yhSrv@)SWdI7)@x-7(c~{PPIjcy-D*LM(#JtJu8jy|HE{Ufq zxlOU{(#A`N*(*;TcqQ^WP63Go*a65(zle)Xw3o)4!M$GEvPlHMWCtDvr_Lm^^Rtie!mkw7z#!LuN| zdswc$BC$PSgZ8P|E|*+@qYL>z*l=)R>;}fH7hpJ;_th6@H$cRySdsi^AsP-KlUz;? zQitZ{=pxC4U;%8w0kO3+vfGNAS88dyc>7mizyU_gQ*}ny&Q?qEEXqtK>uIXgAbflysZ6yEeDIso{HztU zX$&P*3)OTt7vmoMwmSdAb?MY=*_)SBjWdBzEaisA>pPRGg zO_E6Krqs?V6268H;@sY)%U%7(=Ygm@3?5+ySRW}#gF!S3&zYDX?RyZGMfx(HBI{CO zQY2m;x~rIJ8JqB0)9s909y1@$jKEigxYz9wNp4rKy>{u+Irvn-*1r-y6^~YqglZs_ zq(!n$;VWq_9{m8$Zn|Xs2N(}n&%&D4FJ6y4MzELUPE`ry;8D%{Q3gT_vCn0yO(n2# zy#HJ+?NU?COPqkCP5B4{A5}?2X{F7`;9AP74?|aO21#zyM+6~e6|ybhoP)*=J?W_1 z0ECS2_6(x7V4gQaumg&UuM=l(9i@l-I{I`yXv|G4Pc4L);?>U5RgSCe_=H|0BH@Er zoe~$uiA9YFu4(?tpV%LHf=!4=PJ-d90|K;#f-{5fh-Ot16B4cgWLm<^qd1>(Wa`K} zmbN?9@vgI@%`BbdI0DzDeopPD^P9%&6yy@C>C{l8tHNcbQKjpV+(CldaaG@PP|sXc z>S~SJ54J*1M4fW_C~Y+XL0(>|5+tE#5zSQUhzjy10@m#l&kasT9J~$Y4ZqctqWK z5W;@Fw@OK-7tbqvz1Rfk$lj*&V%~5}>?{4X`d%du2DR-QsHh}?N?75Z_Omc<;37~+ zpLq%;4HCk&Ww00nI;v&UMe9cJo^{c>QL+P1r3B+#Uc{d3JDM9k*r^Qd_bi=xF<%5J z!5(?*%&+o0m31O$_z6cc8@z+Qu=2JPvE>z_UTn^{yroOrNIFUTnrC%)T5Z>PMJ4Ta zYgwfhcu~^+dyuqMiGQdR1Z_cX5yJV(EmZ$~YS21XnSlaj<-v?&($$OPaeGw^Z z!&l~7gb)3D65w>X9AjsaBjF6I@g*da#0y}`5|HW00gpAF9oES0gV9TlNFbCUR;gh* zwGYIgY6cgn+H+uQ*FIkopF2jVZbZ+6qJcDJrM_t^lbF(dRaGE8SlLfs(YeNS7F?cB zb&Ok{f{+1~_(~ba2_#BzkA+SwoTF2Uk+MxCTCPsFlQu&+5|XZqoNH)rsLlj#V__4I zEcchzgM*j00Nla@z_>gf?Ipcc=SG){+|g)-G=5gwfTS*!_Q6)h?m0oI42yS>7_5`Z z!0f0SsMEALM<{ZKR>+5y#9pa%T84;o4(BJunt+5A$5r#_jqAnOPdH*bDrGK_n+IbTBEn2~g8vKuL<9?|0;^XX3hywdR~PugC<%wYGkVA{4(3|J9WU3Dio6jv(S@-9 z4_RkyfNHd83j)FICK=O}lYt6(q%o6yv3v+QD&;znjEyJw-)7P>%)*l8iwZ7)Ep}uX zn1cfY+A*UE$2Za`g4AvFE0yTdl2BFmM9Y#w1q)J4-&^fv4h}D#WX7L|H()8R3(p#8 zN_jvM-_&z(YeWcR_`@1VAoT8yez2Vx)+8uPhE!ND1r8OTWP-ehs)!(_OcR&9ehA+v zG308HAQuW_T@e6vg(;B<02kYGsp!gtpp%p|j>9a?w)h7rZM6TfPhG1O%`ukB?@(L= zzz}#!#!EpOnkJ|OC4HBdu&(ccYy~N4D`3zW>lWy=eo1b_UlCMSH+boy7*e%CGoq{s zDNk2MOFYIh7T%B$n@ z1;qt&xwibA*pV26|8QQA0Z}nJF{WgC{rNqHx!6SYuW#Ya^5)3^r9(@3#OE{S0L2&)lE>#~(( za3C#!5*vG`_eCP;ooO2dt-asl(rjQ>;#8t7)w$!S?1^E?c@7Y3Rh& zCxJ3|QX2Cyrc$O$xr#&*NqodrzH^J51y>k%0Jp4f;*9ad+*yxu$zIuM>7@-NyugUyHg9cpU%MXDu1e`t=cS$m)DYGrFKr4$rAF~_@G1Fi z>O#E{czw8#XJ8(m+WqQAo)~_g%J@X=kQd2qT)P`RA;$rsGzOwaZ=MG;6!WsCjiM>_ zJYNgdJ5sIoI8KMU_X;7dBIVRlRtwyUUqna16*P2i0jp zv!@l#H0tUsY&ULiIuRW#A0T^Y5$q`awBl{?iHroK?WU|BtV8GsqVhXnM>3oUKiS2qL9cLHl2y2O7OvTeYSYhSROjO?Xwc>?hjub%p zIw}|a+LGHVxV@7wWy01FBod8ju*>nFbABqaPaD@)w}ieJ@E9CFsg06i;q$1jKDj%6 zz#}Y{vbD?*g37#O=?dh_QCTp0xPU=Td?+3s%coRRle0RGggn*Osgl>Dkvp*DVr@-( zcLtSvH>K4RtUX7e@5stFIl3OO-i3Hx5_N@)Fc4IXig%~tNJ~SzR;8=wcE)i!6ARKo5$ERIoT%<%B7hifAu| zI3e5%DB)S4)RmMDafu-~JSnzKo~3k3AM<&kiAt?WsZfrYr12Ox$I4wOupAQxN?7Ax zj1oVv(Q~dbf&@(BGGG)c=X1FWC?;-FRA44taeT?%uUP|xrEg2VgykO4a1oLiGXMv= zPI)d->yO04EnvDBIC>YmBWG5Zp~rp(6lTsxR2}Apk!Hz9jO&>fMw%rbIrGIx=1e|Q zJ`&&3d@#~1`RGo~NZ{%;uFb`d6fP7cTS?tDeRYaQj$x6Cmyv5yR<@MzH{l5pq(R|T zxI>u}i|_&pHTrA0cEP&prTbUm-cnfrcqD?y`#y?nQ%D-GPZ#VmfR4A20M5$MNm*B0o)K&Q}0wIL)E#7QzcdA+ zE?1z$OPc{yTSS7eYAPw5J;>fKBjK4Uy^}#b=f)`k3&J)=SMkw2Ky#4*9iyBGjVm)Q z%QT8Rayt=N4TG!R^5iy6nJ4ib<1TQ}OJXH(5v(B{YWb3hrsQ*(U0~w9wm@?lofurq zUQ<4$8PT34x8ZHW%pw;73D$67YGAKtO2!})GiWq4y?vK=dO-jpma-B09i@dRa>315 zAF+})nxwK65mEP4azzU?KWKS~w@9nZRUMrRapx4msnoiwV!aJv-17C~X888O$%#xMH#WLq7hLAB_eav|^ar=Be&}Rq!GZ-}y=1{xKht4- zVZqY-KI6Qm<=(SdpWf40Jh!KzEwl)} z>EFZW3q|{H+uj~39_{Zq)mZ$(wP`d%gLgMB+G<^sDP;0l)sb($src*{o5PLjWTd&Z zc;buA-wdhtL#^wZ0N{(o&=XA`2=VFXLh7FJrU#4G=Uc7}744t5eY;UTmTB%Sekj}Y zXsC$qJ|8M}{7Xx&Q9bv3$l}{i7VUp&c_37LX{6yvAhD?R&cZ=jCEupn<-d> zffBhBeg|_ylj&K^PI^qt}xCaB8dOQwK^X0qLOAoAGr+(+ehIj{p#`c<~Xtm?t5;Y43Jj_nmz z41^zn2liz%qw3|4CA!qv`&KVh`#zEAX&l9)^&d&BCjz_Qm#{94_Tl0dJCQ`qs6;r0 z*v6PT`o6^K4qknf>_f$Oj0;*S5IggcM6Y`Mt(Pwh<a_#^1k`G| zk7ZwORGqkQ_41w_xb3|Ucke-+hS4<*9>#LuwvHQgnTf)7&DwWo$k2UTEUQE>(4$}tsAo1=i=3$LHk-G`4nsRY=cL>K{?29w zU>QF)gns3B-4J5}CZNU`FfJS)&+@$-iAd@efX4pOtZ9me{BJlnn$1iA-F$XDGm$xj z)pdAcY(yyKjDf5ZR^->@uVJB^l|hd;?mt-bMPg>ji+z~Gmi~$CU~ZrQ(%|)p+@Z-r zc9+4R5;u2NAtAn$8MW4B?;gnF_0c25PcQw!v5`z}6eDSAvEK98!>gQK_yzyjeKLY! zYdAZ4q%g#J8W_=EMM_^C%MJR!iS~hk3YnmnVx_ODUW)f={C5!r%VFo%jvbcp>Va%w zET7TcdLF+4lad=48y=gSsQxZDaU?gI8_1McI^!-I>qvH#dKmh3>m8lRjPAIpP%UqhD2q1jzoD?c!i86W4{5T|-xuLcfzzyxO;&JE-+ z_G8&YAZ~%2zkd=C4ZuupL>Or(J3Nl1dfXb#9s_YR6Tk>~6@(`ya?X(z|IUa1P`K2ZykDhj1HLYNxz8Y4t%uLApWO0i~g_W7cs{!^%&NkB?0hNQ%Q_ z!^308ID3;ExSkrJ4_5O8C$~&EjMku-lfhC%4vk?QizIn7zoU3KmKz?XT;tpvo*Wip zjE!W+u(-(-kUBfLa3Q?%W1}F1VGbcgi7SZD05(rfgNP9odRxQtpdo@=c4#W?xJDz; zvS*@@8-S?D92%R{oKQl|a4hF-mo-!Oa){{ZRSo{!sDk<4gWBq&mjZKT&^oCefxs zKVI3TUU*-kv-s|VP4^M@C6}mE|IqgF>u=#aWk&C`uHL`z+Jm6(@GuCZRXA%i=Q2N@ z<(h*yyLu=)YTfesEiz$vQeqIyr5gF0)rlpzIyHqm4-w=ShU)n8y=&I0$3M1Wp|bvH z{et56o?ZRkP*<`K^{o)-mPBoY!yrOsmzbob^oT_ckRT)&5$m-js7YOWuWhMkQy2PQ@d}^>#weS6!^Y*U4@=p zy<9!}t<~E_ziP+nqz@s=sU+--FtJ9}*^ebwHRI41?u&0%_CV8eb>_P5|GW@18_7hA z&%J(0e<-vU3ht$g8ye54^>1!{S*xH$sGwh}Tk%v-Jb#b#|3}56FSmq3>g2nYj2B<{ zzb!Y2PWq{fI_Uq8bkH|9aHhYkUiezep^Mt+MQwCM>&A1`MxlvP5nXjr6TPU3UerV{ zYN9jHL=`mAW89OzqHcPm;WyQm^-Ch^>C}>EsF&VjG|m|+qrWHW>{nGL-jED!sil}c z9a0axr8#p^CxyCOR%z#`lb*SL`?-*_b?br2k&(>A@m(-QiPdIoU=pWQ7%j6n$mC## zvI@t>N~iezcI!H5_cY6FzV3!wwusq_Hmy4gI3sQDzu|RTXvoRIYC_Wrj4%aSZFm6P z2it_TXZXz}0<}8_68ulRLA=kBvIXGdCWMID%^Bvp3dhnmJ zeYcI?HoA4|J{V`LtH*FqA1~xVAU=g@CU7LBf!sC<3(a&iDq)^?ZX1pE85?!M8o^)% zt<7ebd#}M5d_#4g;B*e+>RB2m3AbC>!a(1a+eTsS*~_8qpwUp9OK{G{j0tQ#;$4QKXuis#U82w0GeDE%aCYzrc|Sd@G@L98Fdhsxww`Ov({k2a2|Hve0=lOclgeg_FnGp+ybiVgzKvadzEr!ZFJ(p+h&uX})~-#a zdwA!tas5xwxpK5{hUlg>uV#MY<<=e~s!5Cy9By#g>nlE{9}*(1(W4ASXTl-L8KW*$ z@L~ip92*ROK=1+>4yRWVM3QQC2&wrxg+wKMBLPlw@2IaOQgX?M2%e&^9KWTbj1!1U z25nrfwucjso^(u%C(aT6KyrkR&6J8&@F5dn;8hX9DmT=?cS7bNih>S*gRjrH?+G7C zu8e{3WibFPz9sHa+%ymTG* zcvu?Ch`}GVubS|L+s`B<8xhZ5q`}e{K*butU}gd`2qJ)b2jj+dBW$ZWO|`nIFQU7? z%PfUqs_0mmZ7J(dnt~n4^WMG#k}bVWOVJ$?S7`*O8*E)W?b2KF%Ny|>kiRp{ot2QA z!tIa^)%P!XM^ZD4+nXqQUbA-)T%_Z7u`sztP_YL~*LEuJp)z@l`nKe{GkUx^mr`;C z3=8OCc-rwJF58C0p-M#>#*>-pF5M7hvMf<2fr^32vc#q)(QUGPb!qil;YVCHRiW!O z#_S!k%)f8UGwPwL9OFz0f%Ru7QA&q^*y5(^BSe5eh;pBjk`bsa1X|Zi@QQAb4eKE| z%R7*euC8YaLr7f-@bR#LT3hwY@HP$7zhIbm|i&2n)S!hw#7xF@$t1RD}quH;($+0b?3-bfh4C%3}kzX3^@YU6_S1e^nsTthh8!RUo{( zbbYD?H49%t6U0n;b$B<2^Q2l(4{%(4=z^;j)GVshEAO}ivV`YTwcs}Ac=ZKu-{(oS z@NO;KwMrL22B;RcL`HKH_Tc5u5WA>eKpaYefZ6VLqF&&D{x!(PS3<>bFunTp;k_B= z!l@XLe;#G4V(<5D{pUx;U>zF2DpQqVT-Pyiwx>kMU!5j~_0BUd*a-;Xx7lB)GNb!c zoOwSm24FvhWEc9wY6DSND&6JDu=bf#s;Q#k;#H4;3cShJM-pFSefx^<^f=Equ$%Q& zd9I}JhtUiC&sciueBeYcY5^O77|BY_wXH-V+vRz!8`w?%HBOrjbexuKh&i3izRu6Q-LWFaDlF#u zm2gl2>0+Qpq_XyZl~6c8Mm|mh?^t1pq7wR32yO0}^DeWg2^gEFHhI<-+LFLK-!dEb z9$;90AzVXfpBI^pTP0J7NA_(3HH7xeWVVgaQ-GaPk%z>_t%hiX&vYW&cujCYpAXl$ z8V6C4aEgL#9+7>XBsT7t;ZcYBa8l=~Rw(u*k$s*d_G;aEu*g;&pYHCwD5k-m7sd2N zG5zmHOnW^!G`ARmUbeR232ju95FaqamrRFKzP=sH_Geifw5O6dbn+=JALso`J=@UPJOL2Y=}K|vCuZi3#J6G_o z4!x*5a7dSVL|XdtwOt#1UYX$Fp=qypnbONL=MW)e#B)kG<*0I7;uwPW1*-tdM7H2+;|ks*QLv zkdtH<_K)Jg24GaJ>m@9j*F#sQYDTB%dxo@~g0$sMP~5h1g!4(>K)b~_4ypBjiQp&$*WV)vl2ckWk~g}X}J{gXIeHD3o{Zib-fr$ z8sTYKtR$PzcGEB%8-$GT4?r<1sTy)Ms|LFwP=ijvCA%DbzocFlV2ZW1PSyq)QmBU@cgXDJg+H03C zorCj$;upv8Z&31MHO$U|AY0)1m$Wqn?Zr!jC3nRym*-#7dZmm{(Eh4j8hoKIX(#{d zTD7I5iuraAOz9o4mj^mJ)kS&xt0`~iBx||v&zl#ZwpLqH*3w5yem~Dz?UYI_owYoN z%p`VnN+r2ktDRD*^{JdDbaVJmWogxScie^wg# zU9}d6WS%3_l7#5NdB)*0&MPWyqp8k-u#$_?76Iaz-l{B*S6aLEOWP`Q{>6*h5t}qs zn(xJn+Ih~GtC``|xOh=JJO~#rYM-x*+Hv%L8mb0OIqCW@YsdASv?SBaj=gv4OXX*E zSvxGYz$h-4P|$|~7Zt6*rJ!4yyih}cN$X5sqWw><;)uyrjY!Oto+Hg+1HJ=@Luf;UaTA?%ZuX_}h7#_qJ-Y=L6) zBs=aDp#cPGA?z9M1R4bOz44`4fe(he8m?fbaTO7qfafGwHDAMaJI3F1grG7u$%AXb z0eZ?d`ZDCoCQ$77KAamhhv?`woPShV<_N-)X^v~@K|OORu))=XswP8PkVLx(4E}(C zYYFsaANoQqRQcu*CGA`+jDiksduLnIfs>RN;Tk=_Z%qhs+^7QR=m_pHD(i3)@i}h3 z#`sEif#{LIt02V(BwJTsVp4-FQq2PLhb}R4lvgq=7m_+9sf%IY-T z!byoXn$PMW8|q@0fFTNmfN@_IAtZ!9U=t?p=7-xEv|O^GjY`>EMs;)q_Y1(n%g6-> z;>E%Dd{gdy!i5V|uVL?!dIA~^8s4U47cMZgnH9Up9Rb7xDIQ$!SWDqB&VLbHn}O~T zCQIKCZUB|r8(f6^3^NtKq)|^35F|)KI>P*uaj@_x17d03Zxg}NBpr1J`usaR&zONH zB57H?y4C{xO6(8yI|J6Id1IQQDR6m~w>NYdv zG`$OOpky%AWuZiiLp)LLa+qKU&1DFRU<=wG$_UlPY0?D~Sal~h+&NrTD9wQ%xWErR zLKeuqN!l${1|KAvaGx&PyOO`cF^T0%fhT$2rJ^3=1WC?9Z4(5CtD?!9VZq8m!dB~i}F`? zg#0nw;pV9Rf6wbuPqG}7r@YA@D3)rf*ny6Mn5;4 zHw`;_dc}Yw&fFd7l#E^juv21m4`fUUXIZXRaw!7q6!VE>T}k;IiTM$QkSj>S6A*4h z{*B=}3lIuRRC5ZTVVmV7h&st3OZqV00sWIHxpfIuzkF21)w#a)2FW1m7MBtjk3U?t z5-nDTQD6mu%r4$SV7DcPC0uUEM)@bIHj=l5Rf)IMfWRe7lqQ^U1YPh~bIX;tTWHtb zzPN}n3KUq9Fc7?oD@HiMZvQd4ovw_Y19oE2g{F^}6q>T-)ggO}{DT_blo&wBc#g!c zF0+BC5XvLomxK$xNG>rXp>!2l)-;#GnB}6Q6m~<5 zU4r~jEK8<_xRAnjifOBe0Rl#XqLk*6ITJ}6m&#`H4R>vGZISHU(v3bPMKN_8w1f(R z!dM;@r}DQ%7GcJTgWxQK)z=gej+4NCg15n-Tz=(Iq_|vlSdtz*$r^>i2QZvI-T;Gt z17S1=@iHfjits$>d%wHyTy2L)>B z88I)4JOPP0iP$ztAvtj%s1i@KrtH2XbOi1sHC}@KA8lmEp6V?ORJcfAW~nQfP1h3% zk4_+g1Py=)=*N)&vNC!yb$CfgOB!o(q-ARt!I9A9gHlbLpxTlg1QO5&*L;!3gt3Zfj zDo6n=T>K`l%Sk1LTes0bmKo$9MhPu7PCr0lTL zod`d6C#?<1f{2ClvgsA~%VD50S{yR*5Om~pVt_R>;gBpzK9U*2sezOdO#;UXYVmT* z95!?S!yyVo;d^|P>-|qo?cVc;ME&b4esQ8A3;`jsUf55|+O$+gjm4-iGvok#AiPL=qig($ zt(x{<+(TKKS*Qv4Rus*e1go1~vfj!pjo3UOf-BF^G0cgeCw5rS(vhJ;%8}`+uKr8Q z&R>NPR?{#wg zA`8jxh~~z&T31P2_jq6BAbaD~z0giWF;Gry!S5nyh1opllp4C;r6+#2q-UisWyLd` za7N-`cA^v!Q3uyYlFDfeucqt zA~8UM7+j?C1ud6{5N1j-!%@=zJaPDJ82mEKAWf?!d`RqI4&UeFV3)&n257+h|1OH; z8NNnh67?r9W25XhkDq16hMF%n?^?VnhKMq+IzpgeN$fJKR}fFlry6i&De~Ha7^NBX zPKNn}g$WB$5K3{+xDbNanL-lJhEMBZ(~|t;l&L3wwxrEu`58u>E6W=2HTk_-Q16r- zq~;68=Z8jA6o8((Kr*`qYgbsr<(!0ihgF$6V#%06ZDu%{BvvK3Y8Zrk8Ykv&VTQr0 z(95ZtOc~>daVY~SWGWW#5 z;Uwod0U&+@d{Ewa>e_dzl%yG*0!;t7%35sC=6r!EnI`#tzBV85U(HYgE#&8Y^TF8_ zg2gB~D}>s?T2ifZ0J8hkiA@_XKx+HE>-+(_QKiyvbfY>q5Nyj}milC^h}!8l5Nw}V zi&O3z?xrF2@maqNtJ9P(ql!cw$uC&uX}_r4F6ap&1!TXFF6Vcna6txd5IA#S|0>C6 zA052XE1;AxWErjFxTQqfMoBYqs~E{2v@ciC6fMOP;@f;+ho^}qBGxsY`Mh`p1jkpk zHy(Sj8gf9GNAA%cFrH0{l$pv_stmtldupo-YkYK3QZ0;%`JcN&vO~l(OPCPzD zi85`fCI54wp;!ZdD^EI?FV!~$lb|`6RE%A4-{4{O=(H|JK=_VELNTaTMY0Es(|7{C z1D&_(!PAePme9IKAD}$CWt;2iE2t|bp9fHyT0}k9%gnZJFt$$}59d_W4Nf2^3_OER zodR4G9#^vk%ku1wfGXo@Vw*j)+wDL%3dLkr*Cx-hGF~Yc({*bG< zp^0mqdOm2%a3K$Hj|8s};4@6pNB=I5_+>ZrJP}&J#fWW>s_%JgP68YS^yxd`CZT`3 zFw3sI>Z{yp2SXJrEPG`ds8%9}|YZbW%;#^W&^ ziCJq2qTtNb>eHOHCyw8<_;dhAwSrMgeVViO1o;C()bhK6m8MpoIC)^8*=tWM{$h?c zvr(VCl+re|8EQ}L-?#kduF?c+Gy!6PT_F>J{^Z%JPr-f7Kzo{B`>Crk)GNlN)S;_3 z%v61nEqg8^9Os|8s84g&o@DeHOHr|R=Bk58WA z8%-=pROg%;zb%{-cZI6FyTY3SvT-)_H*qnTJquYfpDIrWl-Yfx|0i zd!pC_{VSE707*FwU8dE^{48Zwlo*I0e~ov-M+XlB2CYAj0yRhd3Hl!+f=WnSP}>vr zCvPQ&86I7{tJHsTnSt8m(8pm$e_?Mi(Z(3a|5w23mOu4}sl7nFS1^QfvK9JTH%yj&G~hL%3iy_TU5 zMtHI_X)N9!%zgC+801e8*SmXo4%#Z~QB^4f&%32>3RTsam8EYc_6G1b7Lz1rxa2Bt zn&uYG3t{i8GWQ`jkcZH;k-}V^C_MmN!$?fWlB9PZFHxzKBK61!B()@!mjNvNMtmpX zG-Dej@*J!JFp~J`4Jxi#*8vsC8L(cvNV%!6fKX`eI}kOLPB=J2X28+Uv<0Fma+S`z zpnBRl<<1W0n?7)60;04M0P zfRrJb>1f133yz5 z@f3eWW)PQAB!MEZ{2__3$Rg$X6b5G-9l{6s0CB;sl!Z}f1gNeB;*KpL&}F$(qmT+L z$c+Y&6!n2g7Ox<8mJskO?&QR8iKs0(4)PBaJ8skhn`=hmefSsh`k9W7G)%&f+(Mnr zDX65y!GJU%a#uyxF`lwNz`&V+`Xf`?pn9v_69DsrZ4kAe#ZCrdrsYt96eb=$BxM-1CtlFpDYqfCIS#pCfK)KfUeRD&tr;Rm zA7MPE7Ni5pD5zYN1EHjz3`#)}5D<|nGTIv~hc%toWWsmN;SdqM#0Rj_5GVM}9DU?+ z2lysAOz;tiA+vgw{tC-#5s*!^jXjWLm~0X$Xr z4-$y-0X7;L%{ID}gG3gPKvI=ihXHbd;?*S5(D9AtbB!g+VX-q^>p{-M#1L(fEKB_3 z5Fhw~)EfrpRK)?1v#&#4Nr#sY(53and_~P-! zdm4*J`#VlG7Qb+9`uR}t+@6MaH!j+0U6Uzf@>$i9Z@#Jc>=&C~+t7sYfV2M}J*j|q delta 349 zcmWm7Ju|~_0D$qNO(Ja~FJ4j<`J;^@-c{Q>ceVEmuo-*>dz~R?p z0$)ZBc_>h!VFWq~D8hh=QCP6yz{MEGF@Z^VC}9fIm_Zq{n8Q5&-vSF*#1bl~BET|g zSivgR5Mmu0*u)mLv4h<-@b^Bxi>fk|EY~ Tuple[pd.DataFrame, Sequence[i # subset the data for testing if config["test_subset_2000"] == True: print("[INFO] Subsetting the data to 100 rows.") - all_objects = all_objects[:100] + all_objects = all_objects[:500] data_id = [int(all_objects.iloc[i]["did"]) for i in range(len(all_objects))] @@ -232,7 +232,7 @@ def get_all_metadata_from_openml(config: dict) -> Tuple[pd.DataFrame, Sequence[i openml_data_object = handler.get_metadata(data_id) print("[INFO] Saving metadata to file.") - save_metadata_to_file((openml_data_object, data_id, all_objects), save_filename) + save_metadata_to_file((openml_data_object, data_id, all_objects, handler), save_filename) return openml_data_object, data_id, all_objects, handler diff --git a/docs/developer tutorials/train and evaluate models.ipynb b/docs/developer tutorials/train and evaluate models.ipynb index e37909c..91424b8 100644 --- a/docs/developer tutorials/train and evaluate models.ipynb +++ b/docs/developer tutorials/train and evaluate models.ipynb @@ -75,7 +75,8 @@ "metadata": {}, "outputs": [], "source": [ - "list_of_embedding_models = [\"BAAI/bge-small-en-v1.5\"]\n", + "# list_of_embedding_models = [\"BAAI/bge-small-en-v1.5\", \"Alibaba-NLP/gte-Qwen2-1.5B-instruct\"]\n", + "list_of_embedding_models = [\"GritLM/GritLM-7B\"]\n", "list_of_llm_models = [\"qwen2:1.5b\", \"phi3\"]" ] }, @@ -212,6 +213,8 @@ " # TODO : Replace this evaluation with a more meaningful one\n", " combined_df = aggregate_multiple_queries_and_count(queries,qa_dataset=qa_dataset, config=config, group_cols = [\"id\", \"name\"], sort_by=\"query\", count = True)\n", "\n", + " # TODO : ADD LLM evaluation here when the function is ready\n", + "\n", " combined_df.to_csv(experiment_path / \"results.csv\")" ] }, @@ -239,7 +242,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" + "version": "3.10.14" } }, "nbformat": 4, diff --git a/frontend/ui.py b/frontend/ui.py index f7c3074..5613c5e 100644 --- a/frontend/ui.py +++ b/frontend/ui.py @@ -9,7 +9,8 @@ st.title("OpenML AI Search") query_type = st.selectbox("Select Query Type", ["Dataset", "Flow"]) -query = st.chat_input("Enter your query") +# query = st ("Enter your query") +query = st.text_input("Enter your query") st.session_state["query"] = query diff --git a/ollama/get_ollama.sh b/ollama/get_ollama.sh index 152346b..dfff1f0 100755 --- a/ollama/get_ollama.sh +++ b/ollama/get_ollama.sh @@ -6,5 +6,5 @@ while [ "$(ollama list | grep 'NAME')" == "" ]; do sleep 1 done -ollama run qwen2:1.5b +ollama run llama3 diff --git a/start_training.sh b/start_training.sh new file mode 100755 index 0000000..bdb4d3a --- /dev/null +++ b/start_training.sh @@ -0,0 +1,3 @@ +cd backend +python training.py +cd ..