From bb044d92c1212f60e4d05cd38fdf77017a290893 Mon Sep 17 00:00:00 2001 From: greg pereira Date: Mon, 27 May 2024 12:21:21 -0700 Subject: [PATCH] next refactor Signed-off-by: greg pereira --- milvus/build/merlinite-qq.sh | 7 - milvus/seed/README.md | 29 ++ .../__pycache__/ilab_model.cpython-311.pyc | Bin 0 -> 13895 bytes .../merlinite_model.cpython-311.pyc | Bin 0 -> 5522 bytes milvus/seed/client.py | 98 ++--- milvus/seed/dumb_client.py | 40 ++ milvus/seed/ilab_model.py | 372 ++++++++++++++++++ milvus/seed/new-seed.py | 31 -- milvus/seed/new_seed.py | 41 ++ milvus/seed/requirements.txt | 1 + milvus/seed/seed.py | 23 +- 11 files changed, 551 insertions(+), 91 deletions(-) delete mode 100755 milvus/build/merlinite-qq.sh create mode 100644 milvus/seed/__pycache__/ilab_model.cpython-311.pyc create mode 100644 milvus/seed/__pycache__/merlinite_model.cpython-311.pyc create mode 100644 milvus/seed/dumb_client.py create mode 100644 milvus/seed/ilab_model.py delete mode 100644 milvus/seed/new-seed.py create mode 100644 milvus/seed/new_seed.py diff --git a/milvus/build/merlinite-qq.sh b/milvus/build/merlinite-qq.sh deleted file mode 100755 index a70f526a..00000000 --- a/milvus/build/merlinite-qq.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash -## EXPECTED INPUT IS STRING ECAPSULATED -input="$1" -echo "input: $input" -request_body='{"model":"ibm/merlinite-7b","logprobs":false,"messages":[{"role": "system","content": "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior."},{"role":"user","content": "'$input'"}],"stream":false}' -echo $request_body -curl -X 'POST' 'https://merlinite-7b-vllm-openai.apps.fmaas-backend.fmaas.res.ibm.com/v1/chat/completions' -H 'accept: application/json' -H 'Content-Type: application/json' -k -d $request_body diff --git a/milvus/seed/README.md b/milvus/seed/README.md index e69de29b..5df33a24 100644 --- a/milvus/seed/README.md +++ b/milvus/seed/README.md @@ -0,0 +1,29 @@ +RAG application with ILAB + +1. setup a vector DB (Milvus) + +Development story: + 0. Starting Goal: + - Naive RAG no KG aided + - Addition: + 1. identify what the model lacks knowledge in + 2. Can I use the interal trained model or do I have to use the HF model + - + +- UI integration + +----------------------------------------------- + +variable definition +class Config + +_identify_params, +_llm_type, _extract_token_usage, + +Inherint in defining this spec which could eventually live as a contribution to langchain are some assumptions / questions I made: + - Is the model serializable: Assumed no + - Max tokens for merlinite and granite: Both assumed 4096 + - Does this model have attention / memmory? + - Does these models have a verbosity option for output? + - Recomended default values: + - \ No newline at end of file diff --git a/milvus/seed/__pycache__/ilab_model.cpython-311.pyc b/milvus/seed/__pycache__/ilab_model.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2b8da034de867a4463723968b44f54a59027b50e GIT binary patch literal 13895 zcmd5@du$tdULV`zNBoGLcbqusBjz4Hl2vMt<}bcUTQZB4k6?yx)Q344<5 z;dYj1OL&vMurJvW?nriqJ6WDR(Ut5DcbhmfC)q#2D864E8^_8WiJoL{xHs7s?o0NE z`;-2#KRFN{NDhVvsm^6iY`f2iPRaU-1$FUfT;YIdy2%AyKcwW1ph>sC5mlt6rR#Wg zCDKtbBBoU-bvJ0%otLGQBuCYFI;FP(<84XFB-Efqw_b`%2~lsmDaB+-y{XE2yPQs| zk-O1ET#Tw|88|($bn31os}VIFQ5arC0b=)xq(+wIbTT5wV`?NVN6?qvPq}OBVl<`3 zW07TQJ(7whB?W`(yq$?7DSfEQ(a4>9QF%oP+H}i>)Vj_uq79v2iYuyaxhbi-Fp%m1(mF+itvluJW|05=R>uHot?-#TJ^wykwPZqUAJD+Ssy-{1fXZ zCM+NRj4SM5I5q=Ew1?XyEU~{@%%Q%cUSD!OY1>!dEqTyF`?9&Ijj?L>ly%n1dRxl6DrMc_zLv6{N?C8XU-DyYeU-E) zJOJFm#@RQGrN4$30A95nMTC}PlV=O5l33bSLv^5r6(di;ODrSH zQc6v%3u|)xZd8>H3+w5OkjyBmuo}H9AzPY>$}$?0L?IfBNs1C;P3fH8&YFhg71z@7 zl&V`<8nV_30UlL!r%_H4IQzHnq1mV`3DJ~r;p)E838HjYN~G7&#qD+B>KoSu=m;q) z$5ul^t)38zW+0U_iV#&4h-wIVqcnzQmoo{>VL6>hr0)sw%3?r-EA<9erNkQ0FaQx# zlamQdg(Yahm6{S(Cl*ZzE15VJEuNALj5R14Xft6gt;E$hR^_&Y#fhioknT~|6;(<` z2xU#h+`Xc-9EE<1EJtHdVe7ivSOr7n{E+i;92O?}Ar#sjDW%-dZPB&(9clfCq#Jar znqEU&B(e04Wm)=qMoPujBWn_rk-DyT%F>#oVhrh2HE(5^UH^1WcVQfBq-Zm;bV+Xm zK}gXgbeGPn(G^8^BqcQpwHj^GWmS#zc`v%G>$0jQ6D^=N{>SYVY71RVr?GL1w8oG5OXw@ANri3FA-5H5sF)|4W@9s$C>zQbRhyy8m1olD< zFT+vV!pK^BEt6m-L*({)gC_qT_c@93AFs{6qe!wcyCO>~n6ng@qm{?(Vmg*df~J*O zRf?_7zMGcs%*v8Dvl>-rW8j6^DtUlc8sBGbr`6eHJaIRp%qo&3&c=a2;|2Gut?M>p z%JMY2{{VkV0AQ>3D>;r89LGw*nSx`|xMkRaoIDSp+hE>g*vNxjz;R?veNNT{ zEJKrMj*~@Y$v(UwB%^CWdRc(&W3V;W7fj8jB`||VP}7284heA)9LZRc1i>YsN*_j( zYYFL8Et3j9YD!Vgs6lw6r-ad~OBddVTzKnhjUH~o+^nT!0VCDjjkxj=WPQ)1 z^NN&MR%m*He3Y=9iCD#4Q?i{^;;HAQfTMH)Y;omo&eOABVLLlR{Tv9bgi7F&(RQ=v<2*x*%5 zETJ@xx-+gs*dUF1(c1|rJ5U{cYuQIMi#B5xHqO_R%doNEqLv{ybsG%XjHJYB_6JbO zAz*X)5HAlowSgoyxMpi&)4XBIHjQB}AHCPRjzU@@_T@(`K|T4XmK zx&>yp?4b-^ejN|_EamotlOrU-;^4jp7FKjC>s5jA%n=c|~I)=;Gtlpf*AhgEKv7>diIfl1JOV*&m&372C>}}jM(bBwWKW{=UzFtdd z7p-bTA0O1-?{e~pN)RMM$MpVO>(eGg3;3lk6Y?Dv2S+^73n_BM$)R&g(zD_~ugN#)a!|La;l(MD+cp zET?7NoK|!@e6YBjPRS(FWEVitTqg*HW?tRx?uw5mk>5sjsKacay6vwj{S{PKegR;M z+jDhnAJSaI`SItj$%1S0v(7+a=u)xsvetRI;J#c!@j=ZsSmrEk?K|K2(el z_h$zdKJ$&|zqI@1r{5_0E@{3?pbeoNv>~+TE^VJEx&t6BVWH&iEV=t}zqIoLNCyC< zvr%*e$^zHXmuqj8QAEs)bh3DP$i=x0rX`TTh4&gCi>62B@0gHNBZTjmHYrWD*(Ez0 zQ`l%TWBqfy>xQNJJUdnw01Ok)< z>(+90Z=`C(7$Sh7q237~>NX>_k3OpGPs!!|_#EZVm&T?Z zpF{5HqFX3=d#e0gTWEAz6SiweJp95fY21v)rN>tBY-?z9+4W7@np@Yrk<+tQi{ z{tgLQK6|XX6_w>|*al`Z!PJD2xguPbWIW@rFrkA(*oq1CCXW&r1JHfYCJGEescQ7y zkPjd~XxoPJ}{5>?G#qhls#NsNk(UkkcABCBG^Ik)Lod;jN}b#EohRjqZ#=) zK$Cip5NgaYD=P$BQ}3;1e?iqrltJKlyFT71I0o6kbs!^|Kdt?^v8)ka0zz`4}bxfq%no`b& zltpxj?nm}btLWJ`W#J|N4C zT^suXU83uW?-Q)f_iHP(Y1^>n8``hd3HW8?_RM~uq*Gb@ z8}JTc^T7;s!!AdlQJuN9z9#9O3mJ7aEyuGA*-%qSrW!gA3X%;L zVmw0(cPVI>`|%`qNm1ZGEBg@STdXvZUI78zRt%OSt*YA$rHY`XeN~EzkhMBb{ub;& z7^2JTXjuTBJjfbgzMY~12VSMhq_NJ^n2tL)yG`&X!>{0jhxJrF7lE#%e59~Fic za6?dck)ZB!=ImuR*BjVb(t3iB*FzJK%-R8x-gaz-WF( z8$41P5b^`11$noV#&Mo9$E4YHVOTpQ~kwq=|W^S?2 zF;R3+?zwt*pzsCUdqZRSYueC}+~RisFW>mbuE7Uq^4;3V!n2jXS^VptD!luCap^1C z(pL)72ZgRX#jZPA*PWcB)Gz$L=XX56?S16UdFaO&TDKVnaI8k@SO>Dqh{8X=574YT zHhD;V9unUI)olfjnktqdg1;-KO|!XK;X5|0WR8R9EgP1{re8M^2Q+hjodZQ%!@Iry z4o+x`{t3jxMd%Dey;H0wo?3=s4$UA`qc9=i7IZ%-0x<~kAr0wh#B7KnE+FvT>=_z@ z>=dhKypvQVTOXD2Mz(|@SZl5zHAHgnlu(V73kif|s6WaU?xkfBjzgMcG&yJ~VWn~g zkXoq^g<6p_sYo)K0;986jC!*&$P86dO)ani$m~@*55)QGb(njodn8HoIk_`3_Tbinr0VgDZz|f4`>O z;Uqfp3~&D&L|ZeaJlfdKf45e18|clh^n77mdo%W?9jp6F^}OMD;%LT&P;y4sszNE9 zf!~o9h^D5LnmiKj#nn}Wktsad5U2WoCWdN^`i1g^BFq8;?gFN{mT8>I|CUIJW8p$o z`E~I9(OVLFOd?_~d;t44H2T#BabidbGAiXe09oNiT3}9dT}H8JPE7(L=)(N;&Pu#` z8NyPM5%f+ffrs)hrG)rr;c$MAI}x(~H3T z^T2#Da7+uppJWlpgcYE1HU$ZtvI`+%`ScV_2;v16=vUJ5r=cd&huL5`GWiWo~!?bEAZSE*m<|; zn$TPm`(~7QMIDzg?GreqOd;f+!Yo;$jGo$aUPORCDuc_t09Qa2&cu_YGY(V{Wk0;Q4VFz zu>AxPkZTplZMHguWQx(>=M;P*^DB>M>(%Howa6w@cAVyq!xoC9lj0Q=M1$+c6%}{U ztoO4wb{2u}3@XTkYXn91JCZ&+zo}qF2rb)RCE5KYb2L+mtWW@i^L4-Q9)9jUynE|O zr06}PdCwFaXAGxO0!~w;*oF+Y0)LMt9&#%j#e)~Qum0&K%*Z?nTQAYnyasSFdHLHL ze4~ELw^l>W{xIf$qXxgPFRMzUtz7>{eeTnv`D%Af&;2?SZnQOSg6=}Zt^O?ljBs4xKqqtH? zPBAft?yZPgI$4BMk%q~sqBSEnXx3fzVw~#|8yjj6A*l0&Mg{|&Wa6Mw)VqzZZ17!9 zqx5ItMI!28MQISl%fCPsEtrM8Ny#miL}xhtjN{C+sfGI}Wall-szX+gUq7d5)l#n! z5B>*`AZ>d4p&7am8g{fzGmyJj>h(Wd`PRx#yx4nC>phseT=MpoIh$v?B*032?EG%~ zx7&Bui^54wIQeWu6HdPn&OaB<7cRVA6mDw5&27(Ty+X+^{QmIo4DYrT{YN$b z(Wi5o|JV!v$>;u)&w@q&qUK+G;lJ|Sf8|qu(SK9(-z-fmJWUqH7vU}Ub-v^Peneo~ zLK-OW!Z-TdH=4hgKUVYwHD9pc3;tUvAiM}1dmcFUbhQ{brv=VI`RxrJW>HnN(o1f7 zuQTWTR|n_w=ih%8E_kmLy;n5vm4f3+tB@tXfQcAdAri7%Fp=5M{0IlXFqe_qgnzqf z5iLXx)l1|X&omlK=#!}G)%b~xXjN(NE7!73wB6?+o;V1!FT;nn<{Ro&u^^WA8>jG` z<#QV*81cO5fEV3xKGV!lS)4~9c1ylEFSh+eO!J0?VLE>zrsd=J$eZ7QuTN_fblqUK z7@|HNgDEQq`+7$VCT{|#_24MutY3GlX|+nzif+NFKOGH|Atf(h(oN!@)}sr5pVN6N z_QsUQv-R4*E;Y}54SR-hnJeQmF$M)Y|2itmzX8w~04Bayp~^BT_Ss!V${wp~vR5&2 z=rla!XhJy)JGH6WEn+bWY;AFH0Uw_F)~TIXey-Rxp><6Zx~6wK3hrq`&oMUtRT|63 z1bz(wM@bgA@3NON+5vP6%u-|A5O9>+TtWU#%KIe()Le^yEB_*L>!(Stn#olf@V^6% z#kSqu1=nC{V)EI~6yExoLf6}LyWigH_vbvNzz|uOef>Fina5$oce>b_#2<)%JpbpG zrzf7ISn{uoT=w1oY+QGs%-P$POa}ZSVPo|ynM%H){DJ&|rw4v8_36l8P8Ub6Ya`c- zzU$kbl5Zp*$s<@cwcGc!#1_!F!W3uD7ihAzVaXC67N z7f8|4Op35r%v-NF+6C~Zx!>hP>+e}a+ZSkG)2sFDC+P6|wN9IDk1X<=bKvuu!36f~ z6~xOBi-Fr>#Id3u#*>+(kjf-)BYq5v*kDciv`2~Kyq`SSs@K5`MGEK8Cm`}LIzhbI z7r&jHtwn!kj^EA>G}yxUASFr%Imn-ZJ9i`NeJ6D%mA;oUS~(?TUk?dyC7_t;)2j4c zoY4ugG4MiBVzKlGw6^t>;$yZ&sv*mq9rJC}3sx%vvO1Mq{&W>d#R9x)<3>Yy}r=)3FR zUN6iozL>lEeC}#-?wU4ttvI!$O)b5&m^!YTURo?&IG+VTPo8#k;|o;JVS=U`!Fl-* z1|=URFhd|jfE{gq3vW2dj6_p7-e$)EN+co^RU1YGqrzq_saFD<}l{m23_&`ZrmrqjC6-ywz ziVw0@)1piVE;4z<@|A_sri@eA4T}L^viA}{wKsM! zf9zRzVeD*i?5s9+c57*Sd|TPMxc%XSP=P;-yU3p{@n72O9WYuSDDVSCexPhNTMyz0 zVr**bQtrz3dz$+|fuF=(T_Ue_+Y$1)0EyIqJa7gp~SVt2H!C0O2maLLqb zeFNH_;Lb(knP1&ipI&^ju1%gaGCr*opmihT74S7~B3QN`qCE!K>*(2zJ@A*Ulwsq% zeMD0a*0pwgFF{!juB*4)MsH5e*;RJY8`Wz7VUyW9w1cVPu^$-pWwxHpk5Lhpp$clt zNg0j~>s8b4!c+dqDIC|~{TU=WJzoJQIa}x6{PD77fJOmO>Kxcv$n*J|yJPt;KW^6s zXSL2Ftaz`@lZ$Q7=T<)+E?cYUtPb_wdH*YAGv%JZd)e%@26nN0csw;8?45xcHVW&x zhS^CXFE}J$#zVKRs_NQ@a+IWCj55Y3EXKYKgwB(tP&GGi(vBI2TCZqL%eHhj6VAFFk5FDsC;ZrP8oD2cR%hk!p#bWhQ$)bB#bh#- z17&-6`uX$KSf!8)Q=^tEITnBON^a3ah=459Vv-yOHN`Zu9={l6?a6fwB$0g zOW98^UWLnE*uUZNPqbH+(LghLjR_jMiT1G{XYV89Vtj*G?b`)nFIrQ zR%6p#f@7$R)A+QI5FDP@JZW#joAxDqX@A0>4kQ8$Vi6W@Kue!D+DDGApas*RL?|6j zgwq{~j&vjuadgpAXG2l(++d&{ytS0rfp;k0WtQd$tN0Q-SoA7V{8x|?z}yWEbm6U~ zM3=&xMX}&lG&>h#Z0_{wS@;AsT~;JTH*sbq#@oX5C6mau@QR8x#rB`YNrKI@Cb2t+ zuA9<|tf`7@>I77|(_>X?M(?nVLQ=P0vleer9IwoQ(>-i>8@9 z5|3+oQq~rA!#r~M>8B4Q>*I1h?hL;N+3N@+I;#m*VG^9eUO@?72`D@k6afnhyyCec zBs{=-v4DLG?10CP~wMLL4|`k2-%@oOf)s4niwXbkPnEE zW=_nCu2|G$lEOeUshkW))J71+**ExP0@O;#Hr`zT1%i_xKR(} zO`cooXpy_Tg21V`xxzm$z^cmxi*iOhKJ$R{hzef8nw|v@n9n=T_9iy4Op=R}qFYZ) z$~jZjbA~7z25c!gW7bNO(Ck7^0~cG+HBG-Lsu{y1xg;oOjAjFiSj&PM3_tKtRmuhH9$dwDTBzSJla6A^c(yXB2(G#FxzC z(OAUpaJGX4!mO@lOgm7^XXG@ty>eESmT?}0@+HaCmvP3h1yj#TS-XqiEH>w)^n!qUBP5tH$VwSug%NJWy1~| zc?0T8bnsb#Ap`occ~edqwl9rM8K6oAq~Z;%Es!o)I|A5_Y|rvV*xrUg_W|Ad-{<0Q z8JHOH6u~JN3|5I;|HP;CWG)T6+lZSuxfnmMljS(U%EY2<#*=^w@y1GM@_Z{lF|V8P zw5qM-jJScZ5>GcEN2-BIHgEeRiGq+M6?$Fd0YI8;hzDVO#y^0pq3x0VCBEVtvIcjT z_^Xla=IRA517VRo0BxP?gakbO#R9hAfD}bh!+-$DApM|6 zsH2cRN}`mEP%;F>_Gx+ww)4CJ-8m2rk);O!TV2RQ$KFLOD}b*yPe%?TSkzSrf(V^8 z+gtB~?1x-IoYO@o$P@`SvT(RzaRI;`BCdK67VCQCQ0OQKT|m-9359twObH#MEzl73 zk{vv5Ylp%is>5kGV+$Zsvv%NZSdak_h_{IwxFWhn?Pj~`xF*VWrf^~s)2 z7#@%k|LuO%Gf?T?eRI0peeiDg!AkeRD)RcLnIA)MEm*_jpS<(&J2!hOu@~>8%IAK% z@y^}h3zgvu<>-S?={0NT?oVbvp8fTi%`>Ih?MVN|Y&jBx=l9856LhYVgrR{n|0% zn;&gRtFhm9py*h6?B%uFA-PYS>G?vPS#?vIi8}ET{cCmecDO zwHz#E3+G?Orh|qiU_CXl{f;UEs5>_d=ZczyD7Aif6(Pb^;0{1Ufa;VpA2|XL2;sk; z(ejhF-wjYdLu3lNY&G03Xpku=GLA9yh{)0vg|3hWGv} zKQ1ga_uk!Jxza0_BM&`3*_nfw zXU<76q4o*Uu%BL)B*_j)^fHjsC?Arf_j9sVFY!w7pG;Dc$ZqJ|=HOUz%p~-xOlV|B zrYV`BcotlMUFgB%9d9;dqiyj{&mNuA)y#-^yk2V!?4`zn!rYaJH1WBtv0T(rzZt=|1>b5(wkVd!c;{"], +# "logprobs": False, +# "stream": False +# } + +# response = requests.post(url, headers=headers, data=json.dumps(data), verify=False) +# print(response.json()) +print(f'model_name={model_name}') +llm = IlabLLM( + model_endpoint=model_endpoint, + model_name=model_name, + apikey=model_token, + temperature=1, + max_tokens=500, + top_p=1, + repetition_penalty=1.05, + stop=["<|endoftext|>"], + streaming=False +) + +prompt="I am training for a marathon in 12 weeks. Can you help me build an exercise plan to help prepare myself?" +prompts=[prompt] +# prompt_template = PromptTemplate.from_template(prompt) +llm.generate(prompts) +# llm.invoke("dog") diff --git a/milvus/seed/dumb_client.py b/milvus/seed/dumb_client.py new file mode 100644 index 00000000..e08c9124 --- /dev/null +++ b/milvus/seed/dumb_client.py @@ -0,0 +1,40 @@ +import requests +import json +import os +from dotenv import load_dotenv + +load_dotenv() + +# manage ENV +model_endpoint=os.getenv('MODEL_ENDPOINT') +if model_endpoint == "": + model_endpoint = "http://localhost:8001" + +model_name=os.getenv('MODEL_NAME') +if model_name == "": + model_name = "ibm/merlinite-7b" + +model_token=os.getenv('MODEL_TOKEN') + +headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {model_token}" +} + +data = { + "model": model_name, + "messages": [ + {"role": "system", "content": "your name is carl"}, + {"role": "user", "content": "what is your name?"} + ], + "temperature": 1, + "max_tokens": 1792, + "top_p": 1, + "repetition_penalty": 1.05, + "stop": ["<|endoftext|>"], + "logprobs": False, + "stream": False +} + +response = requests.post(model_endpoint, headers=headers, data=json.dumps(data), verify=False) +print(response.json()) \ No newline at end of file diff --git a/milvus/seed/ilab_model.py b/milvus/seed/ilab_model.py new file mode 100644 index 00000000..bc0b0093 --- /dev/null +++ b/milvus/seed/ilab_model.py @@ -0,0 +1,372 @@ +#!/bin/python3 + +## This is a langchain compatabible implementation for the Ilab models. It will remain in this repo until we publish APIKey +## functionality and route backendservice endpoints through a proxy that can be exposed, similary to openAI. At which point +## we can move this pr as a contribution to langchain and easily scale our usage! + +### Fixes in progress: + ### - override self params with calls invoke or generate for temperature, etc. + ### - test that invoke works, generate starts + ### - Feat: streaming implementation + ### - Callbacks with streaming + ### - Authentication enablement via user and password rather than just API keys + ### - Authentication checking for API keys (whole backend API setup) + ### - Utilize tags and metadata with langserve + ### - Allow logprobs as an option + +import os +import httpx +import requests +import json +from langchain_core.language_models.llms import BaseLLM +from dotenv import load_dotenv +from langchain_core.outputs import Generation, LLMResult +from langchain_core.pydantic_v1 import Field, SecretStr, root_validator +from langchain_core.utils import ( + convert_to_secret_str, + get_from_dict_or_env, + get_pydantic_field_names, +) +from langchain_core.utils.utils import build_extra_kwargs + +load_dotenv() +from typing import ( + Any, + Dict, + List, + Set, + Optional, + Mapping +) + +class IlabLLM(BaseLLM): + """ + Instructlab large language model. + + As this model is currently private, you must have pre-arranged access. + """ + + # REQUIRED PARAMS + + model_endpoint: str = "" + """The model Endpoint to Use""" + + model_name: str = Field(alias="model") + """Type of deployed model to use.""" + + # OPTIONAL BUT DEFAULTS + + system_prompt: Optional[str] = "You are an AI language model developed by IBM Research. You are a cautious assistant. You carefully follow instructions. You are helpful and harmless and you follow ethical guidelines and promote positive behavior." + """Default system prompt to use.""" + + model_kwargs: Dict[str, Any] = Field(default_factory=dict) + """Holds any model parameters valid for `create` call not explicitly specified.""" + + max_tokens: int = 4096 + """The maximum number of tokens to generate in the completion. + -1 returns as many tokens as possible given the prompt and + the models maximal context size.""" + + # TOTALLY OPTIONAL + + apikey: Optional[SecretStr] = None + """Apikey to the Ilab model APIs (merlinte or granite)""" + + top_p: Optional[float] = 1 + """Total probability mass of tokens to consider at each step.""" + + frequency_penalty: Optional[float] = 0 + """Penalizes repeated tokens according to frequency.""" + + repetition_penalty: Optional[float] = 0 + """Penalizes repeated tokens.""" + + temperature: Optional[float] = 0.7 + """What sampling temperature to use.""" + + # verbose: Optional[str] = None + # """If the model should return verbose output or standard""" + + streaming: bool = False + """ Whether to stream the results or not. """ + + # FUTURE EXTENSIONS + + tags: Optional[List[str]] = None + """Tags to add to the run trace.""" + + metadata: Optional[Dict[str, Any]] = None + """Metadata to add to the run trace.""" + + # This gets implemented with stream + # callbacks: Optional[SecretStr] = None + # """callbacks""" + + # END PARMS + + class Config: + """Configuration for this pydantic object.""" + allow_population_by_field_name = True + + @property + def lc_secrets(self) -> Dict[str, str]: + """A map of constructor argument names to secret ids. + + For example: + { + "apikey": "ILAB_API_KEY", + } + """ + return { + "apikey": "ILAB_API_KEY", + } + + @classmethod + def is_lc_serializable(cls) -> bool: + """Return whether this model can be serialized by Langchain.""" + return False + + @root_validator(pre=True) + def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + """Build extra kwargs from additional params that were passed in.""" + all_required_field_names = get_pydantic_field_names(cls) + extra = values.get("model_kwargs", {}) + values["model_kwargs"] = build_extra_kwargs( + extra, values, all_required_field_names + ) + return values + + @root_validator() + def validate_environment(cls, values: Dict) -> Dict: + if values["streaming"] == True: + raise ValueError("streaming has not yet been implemented.") + if values["apikey"] or "ILAB_API_KEY" in os.environ: + values["apikey"] = convert_to_secret_str( + get_from_dict_or_env(values, "apikey", "ILAB_API_KEY") + ) + values['model_name'] = get_from_dict_or_env( + values, + "model_name", + "MODEL_NAME", + ) + ## extension for more options for required auth params + ## client_params = { + ## "api_key": ( + ## values["apikey"].get_secret_value() + ## if values["apikey"] + ## else None + ## ) + ## } + # CURRENTLY WE DONT CHECK KEYS + ## if not client_params['values']['apikey']: + ## raise ValueError("Did not find token `apikey`.") + return values + + @property + def _params(self) -> Mapping[str, Any]: + """Get the identifying parameters.""" + params = {**{ + "model_name": self.model_name, + "model_endpoint": self.model_endpoint, + }, **self._default_params} + if self.apikey: + params['apikey'] = self.apikey + if self.model_name: + params['model_name'] = self.model_name + return params + + @property + def _default_params(self) -> Dict[str, Any]: + """Get the default parameters for calling Merlinite API.""" + normal_params: Dict[str, Any] = { + "temperature": self.temperature, + "top_p": self.top_p, + "frequency_penalty": self.frequency_penalty, + "presence_penalty": self.repetition_penalty, + } + + if self.max_tokens is not None: + normal_params["max_tokens"] = self.max_tokens + + return {**normal_params, **self.model_kwargs} + + + def _invocation_params(self) -> Dict[str, Any]: + """Get the parameters used to invoke the model.""" + return self._params + + def make_request(self, params: Dict[str, Any], prompt: str, stop: Optional[List[str]]) -> Dict[str, Any]: + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {self.apikey}" + } + + data = { + "model": params['model_name'], + "messages": [ + { + "role": "system", + "content": self.system_prompt + }, + { + "role": "user", + "content": prompt + } + ], + "temperature": params['temperature'], + "max_tokens": params['max_tokens'], + "top_p": params['top_p'], + "stop": stop, + "logprobs": False, + } + + if 'repetition_penalty' in params: + data["repetition_penalty"] = params['repetition_penalty'] + + if 'streaming' in params: + # Shadowing basemodel re-route for streaming + data["stream"] = params["streaming"] + + response = requests.post(self.model_endpoint, headers=headers, data=json.dumps(data), verify=False) + response_json = response.json() + + def _call(self, prompt: str, stop:Optional[List[str]] = None, **kwargs: Any) -> str: + """Call the ilab inference endpoint. The result of invoke. + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + run_manager: Optional callback manager. + Returns: + The string generated by the model. + Example: + .. code-block:: python + + response = merlinite.invoke("What is a molecule") + """ + + invocation_params = self._invocation_params + params = {**invocation_params, **kwargs} + + if stop == None: + stop = ["<|endoftext|>"] + response_json = self.make_request( + params=params, prompt=prompt, stop=stop, **kwargs + ) + return response_json['choices'][0]['messages']['content'] + + def _generate( + self, + prompts: List[str], + stop: Optional[List[str]] = None, + **kwargs: Any, + ) -> LLMResult: + """Call out to Ilab's endpoint with prompt. + + Args: + prompt: The prompt to pass into the model. + stop: Optional list of stop words to use when generating. + + Returns: + The full LLM output. + + Example: + .. code-block:: python + + response = ilab.generate(["Tell me a joke."]) + """ + + invocation_params = self._invocation_params() + params = {**invocation_params, **kwargs} + token_usage: Dict[str, int] = {} + system_fingerprint: Optional[str] = None + + response_json = self.make_request( + params=params, prompt=prompts[0], stop=stop, **kwargs + ) + + if not ('choices' in response_json and len(response_json['choices']) > 0): + raise ValueError("No valid response from the model") + + if response_json.get("error"): + raise ValueError(response_json.get("error")) + + if not system_fingerprint: + system_fingerprint = response_json.get("system_fingerprint") + return self._create_llm_result( + response_json=response_json, + ) + + def _llm_type(self) -> str: + """Get the type of language model used by this chat model. Used for logging purposes only.""" + return "instructlab" + + @property + def max_context_size(self) -> int: + """Get max context size for this model.""" + return self.modelname_to_contextsize(self.model_name) + + def _create_llm_result(self, response: List[dict]) -> LLMResult: + """Create the LLMResult from the choices and prompt.""" + generations = [] + for res in response: + results = res.get("results") + if results: + finish_reason = results[0].get("choices")[0].get('finished_reason') + gen = Generation( + text=results[0].get("choices")[0].get('message').get('content'), + generation_info={"finish_reason": finish_reason}, + ) + generations.append([gen]) + final_token_usage = self._extract_token_usage(response) + llm_output = { + "token_usage": final_token_usage, + "model_name": self.model_name + } + return LLMResult(generations=generations, llm_output=llm_output) + + @staticmethod + def _extract_token_usage( + response: Optional[List[Dict[str, Any]]] = None, + ) -> Dict[str, Any]: + if response is None: + return {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} + + prompt_tokens = 0 + completion_tokens = 0 + total_tokens = 0 + + def get_count_value(key: str, result: Dict[str, Any]) -> int: + return result.get(key, 0) or 0 + + for res in response: + results = res.get("results") + if results: + prompt_tokens += get_count_value("prompt_tokens", results[0]) + completion_tokens += get_count_value( + "completion_tokens", results[0] + ) + total_tokens += get_count_value("total_tokens", results[0]) + + return { + "prompt_tokens": prompt_tokens, + "completion_tokens": completion_tokens, + "total_tokens": total_tokens + } + + @staticmethod + def modelname_to_contextsize(modelname: str) -> int: + """Calculate the maximum number of tokens possible to generate for a model.""" + model_token_mapping = { + "ibm/merlinite-7b": 4096, + "instructlab/granite-7b-lab": 4096 + } + + context_size = model_token_mapping.get(modelname, None) + + if context_size is None: + raise ValueError( + f"Unknown model: {modelname}. Please provide a valid Ilab model name." + "Known models are: " + ", ".join(model_token_mapping.keys()) + ) + + return context_size diff --git a/milvus/seed/new-seed.py b/milvus/seed/new-seed.py deleted file mode 100644 index c6ea9093..00000000 --- a/milvus/seed/new-seed.py +++ /dev/null @@ -1,31 +0,0 @@ -import os -from pymilvus import MilvusClient, DataType -from langchain_experimental.text_splitter import SemanticChunker -from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader -from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings -from tika import parser # pip install tika - -def log_step(step_num, step_name) -> None: - print("-----------------------------------------------") - print(f"{step_num}. {step_name}") - print("-----------------------------------------------") - -model_name = "ibm/merlinite-7b" -model_kwargs = {"device": "cpu"} -encode_kwargs = {"normalize_embeddings": True} - -log_step(0, "Generate embeddings") -embeddings = HuggingFaceBgeEmbeddings( - model_name=model_name, - model_kwargs=model_kwargs, - encode_kwargs=encode_kwargs, - query_instruction = "search_query:", - embed_instruction = "search_document:" -) - - -# data_url = "https://orkerhulen.dk/onewebmedia/DnD%205e%20Players%20Handbook%20%28BnW%20OCR%29.pdf" -# loader = WebBaseLoader(data_url) -# data = loader.load() -raw = parser.from_file("data/DnD-5e-Handbook.pdf") -print(raw['content']) diff --git a/milvus/seed/new_seed.py b/milvus/seed/new_seed.py new file mode 100644 index 00000000..60311c76 --- /dev/null +++ b/milvus/seed/new_seed.py @@ -0,0 +1,41 @@ +import os +from pymilvus import MilvusClient, DataType +from langchain_community.vectorstores import Milvus +from langchain_experimental.text_splitter import SemanticChunker +from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader +from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings, HuggingFaceEmbeddings +from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter +from langchain import hub +from langchain_core.runnables import RunnablePassthrough +from langchain_core.output_parsers import StrOutputParser +from tika import parser # pip install tika +from langchain_openai import OpenAI +from ilab_models import IlabOpenAILLM + + +def log_step(step_num, step_name) -> None: + print("-----------------------------------------------") + print(f"{step_num}. {step_name}") + print("-----------------------------------------------") + +embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") + +text_splitter = SemanticChunker(embeddings=embeddings) # fails + +loader = PyPDFLoader('./data/DnD-5e-Handbook.pdf') +data = loader.load() +split_data = text_splitter.split_documents(data) +print(len(split_data)) +vector_store = Milvus.from_documents( + documents=split_data, + embedding=embeddings, + connection_args={"host": "localhost", "port": 19530}, + collection_name="dnd" +) + +llm = IlabOpenAILLM( + +) + +retreiver = vector_store.as_retreiver() +prompt = hub.pull("rlm/rag-prompt") \ No newline at end of file diff --git a/milvus/seed/requirements.txt b/milvus/seed/requirements.txt index 297139d5..431c4f8a 100644 --- a/milvus/seed/requirements.txt +++ b/milvus/seed/requirements.txt @@ -7,3 +7,4 @@ langchain-experimental==0.0.59 tika==2.6.0 sentence-transformers==2.7.0 beautifulsoup4==4.12.3 +python-dotenv==1.0.1 diff --git a/milvus/seed/seed.py b/milvus/seed/seed.py index 09a0f4d6..044158e8 100644 --- a/milvus/seed/seed.py +++ b/milvus/seed/seed.py @@ -4,7 +4,7 @@ from langchain_experimental.text_splitter import SemanticChunker from langchain_community.document_loaders import PyPDFLoader, WebBaseLoader from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings -from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter from langchain import hub from langchain_core.runnables import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser @@ -24,18 +24,25 @@ def milvus_init() -> MilvusClient: def fill_dnd_collection(text_splitter: any, embeddings: any) -> None: # local - raw = parser.from_file("data/DnD-5e-Handbook.pdf") - print(len(raw['content'])) - docs = text_splitter.create_documents([raw['content']]) + # raw = parser.from_file("data/DnD-5e-Handbook.pdf") + # print(len(raw['content'])) + # docs = text_splitter.create_documents([raw['content']]) + # vector_store = Milvus.from_documents( + # docs, + # embedding=embeddings, + # connection_args={"host": "localhost", "port": 19530}, + # collection_name="dnd" + # ) + # remote + loader = PyPDFLoader('https://orkerhulen.dk/onewebmedia/DnD%205e%20Players%20Handbook%20%28BnW%20OCR%29.pdf') + data = loader.load() + split_data = text_splitter.split_documents(data) vector_store = Milvus.from_documents( - docs, + documents=split_data, embedding=embeddings, connection_args={"host": "localhost", "port": 19530}, collection_name="dnd" ) - # remote - # loader = PyPDFLoader('https://orkerhulen.dk/onewebmedia/DnD%205e%20Players%20Handbook%20%28BnW%20OCR%29.pdf') - # data = loader.load() def generate_embeddings() -> any: # model_name = "ibm/merlinite-7b"