diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index e02b5c6200a82..48953dafa0744 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -904,6 +904,8 @@ jobs: - name: Clone id: checkout uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Install Cuda Toolkit 11.7 if: ${{ matrix.cuda == '11.7' }} @@ -1139,6 +1141,8 @@ jobs: - name: Clone id: checkout uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Install id: depends diff --git a/AUTHORS b/AUTHORS index 1bd36158a72f4..2eb60806ad058 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,4 +1,4 @@ -# date: Wed Jun 26 19:36:34 EEST 2024 +# date: Thu Nov 28 20:46:15 EET 2024 # this file is auto-generated by scripts/gen-authors.sh 0cc4m @@ -7,6 +7,7 @@ 2f38b454 3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com> 44670 <44670@users.noreply.github.com> +65a <10104049+65a@users.noreply.github.com> AN Long AT Aarni Koskela @@ -19,20 +20,28 @@ Adithya Balaji AdithyanI Adrian Adrian Hesketh +Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com> Ahmet Zeer AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> +AidanBeltonS Aisuko +Akarshan Biswas Akarshan Biswas +Al Mochkin <14274697+amochkin@users.noreply.github.com> Albert Jin Alberto <57916483+albbus-stack@users.noreply.github.com> +Alberto Cabrera Pérez +Alberto Cabrera Pérez Alex Alex Azarov Alex Azarov Alex Klinkhamer Alex Klinkhamer Alex Nguyen +Alex O'Connell <35843486+acon96@users.noreply.github.com> Alex Petenchea Alex Renda +Alex Tuddenham <61622354+AlexsCode@users.noreply.github.com> Alex von Gluck IV Alexey Parfenov Ali Chraghi <63465728+alichraghi@users.noreply.github.com> @@ -45,18 +54,25 @@ AmirAli Mirian <37371367+amiralimi@users.noreply.github.com> Ananta Bastola Anas Ahouzi <112881240+aahouzi@users.noreply.github.com> András Salamon +Andreas (Andi) Kunar Andrei Andrew Canis Andrew Downing Andrew Duffy Andrew Godfrey +Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com> +Andy Salerno Andy Tai +Anthony Van de Gejuchte +Antonis Makropoulos Arik Poznanski +Armen Kaleshian Artem Artem Zinnatullin Artyom Lebedev Asbjørn Olling Ásgeir Bjarni Ingvarsson +Asghar Ghorbani Ashish <1856117+ashishdatta@users.noreply.github.com> Ashok Gelal <401055+ashokgelal@users.noreply.github.com> Ashraful Islam @@ -76,12 +92,16 @@ Ben Williams Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com> Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com> Bernat Vadell +Bert Wagner Bingan <70050083+binganao@users.noreply.github.com> +Bjarke Viksøe <164612031+bviksoe@users.noreply.github.com> Bodo Graumann Bono Lv Borislav Stanimirov Branden Butler +Brandon Squizzato <35474886+bsquizz@users.noreply.github.com> Brian +Brian Cunnie Bruce MacDonald Bryan Honof CJ Pais @@ -90,32 +110,47 @@ Calvin Laurenson Cameron Cameron Kaiser Carolinabanana <140120812+Carolinabanana@users.noreply.github.com> +CarryFun <76023481+CarryFun@users.noreply.github.com> +Carsten Kragelund Jørgensen +CarterLi999 <664681047@qq.com> Casey Primozic Casey Primozic CausalLM <148736309+CausalLM@users.noreply.github.com> Cebtenzzre Chad Brewbaker +Changyeon Kim Chao Jiang +Charles Xu <63788048+chaxu01@users.noreply.github.com> +Charles Xu +Chen Xi +Chen Xi Cheng Shao +Chenguang Li <87689256+noemotiovon@users.noreply.github.com> Chris Elrod Chris Kuehl Christian Demsar Christian Demsar Christian Falch <875252+chrfalch@users.noreply.github.com> Christian Kögler +Christian Köhnenkamp Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com> Clark Saben <76020733+csaben@users.noreply.github.com> Clint Herron +Conrad Kramer CrispStrobe <154636388+CrispStrobe@users.noreply.github.com> +Csaba Kecskemeti Cuong Trinh Manh DAN™ Damian Stewart +Dan Johansson <164997844+eddnjjn@users.noreply.github.com> +Dan Johansson Dane Madsen DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com> Daniel Bevenius Daniel Drake Daniel Hiltgen Daniel Illescas Romero +Daniel Kleine <53251018+d-kleine@users.noreply.github.com> Daniele <57776841+daniandtheweb@users.noreply.github.com> DannyDaemonic Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com> @@ -129,19 +164,28 @@ David Pflug David Renshaw David Sommers <12738+databyte@users.noreply.github.com> David Yang +DavidKorczynski Dawid Potocki Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com> Dean Deins +Denis Spasyuk <34203011+dspasyuk@users.noreply.github.com> +Derrick T. Woolworth Deven Mistry <31466137+deven367@users.noreply.github.com> +Dibakar Gope Didzis Gosko +Diego Devesa +Diogo Teles Sant'Anna Djip007 Don Mahurin DooWoong Lee (David) Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com> +Dou Xinpeng <15529241576@163.com> +Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com> Douglas Hanley Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com> Ebey Abraham +Echo Nolan Ed Lee Ed Lepedus Eddie-Wang @@ -151,10 +195,13 @@ Elbios <141279586+Elbios@users.noreply.github.com> Elton Kola Engininja2 <139037756+Engininja2@users.noreply.github.com> Equim +Eric Curtin +Eric Curtin Eric Sommerlade Eric Zhang <34133756+EZForever@users.noreply.github.com> Erik Garrison Erik Scholz +Esko Toivonen Ettore Di Giacinto Evan Jones Evan Miller @@ -166,19 +213,26 @@ FK Fabian Fabio R. Sluzala Faez Shakil +Faisal Zaghloul +Faisal Zaghloul +Fan Shupei FantasyGmm <16450052+FantasyGmm@users.noreply.github.com> +Farbod Bijary <110523279+farbodbj@users.noreply.github.com> Fattire <528174+fat-tire@users.noreply.github.com> Felix Finn Voorhees Firat +FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com> Folko-Ven <71110216+Folko-Ven@users.noreply.github.com> Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com> Francisco Melo <43780565+francis2tm@users.noreply.github.com> Frank Mai FrankHB +Frankie Robertson Fred Douglas <43351173+fredlas@users.noreply.github.com> Frederik Vogel Gabe Goodhart +Gabe Goodhart GainLee Galunid Gary Linscott @@ -187,11 +241,13 @@ Gavin Zhao Genkagaku.GPT Georgi Gerganov Gilad S +Gilad S. <7817232+giladgd@users.noreply.github.com> Giuseppe Scrivano GiviMAD Govlzkoy Guillaume "Vermeille" Sanchez Guillaume Wenzek +Guoliang Hua <32868157+nbcsm@users.noreply.github.com> Guoteng <32697156+SolenoidWGT@users.noreply.github.com> Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com> Haggai Nuchi @@ -213,11 +269,14 @@ Hong Bo PENG Hongyu Ouyang <96765450+casavaca@users.noreply.github.com> Howard Su Hua Jiang +Huang Qi Huawei Lin Hugo Roussel +Huifeng Ou <79071290+ho2103@users.noreply.github.com> Ian Bull Ian Bull Ian Scrivener +Icecream95 Ido S IgnacioFDM Igor Okulist @@ -226,11 +285,15 @@ Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com> Ionoclast Laboratories Isaac McFadyen IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com> +Ivan +Ivan Filipov <159561759+vanaka11@users.noreply.github.com> Ivan Komarov Ivan Stepanov JH23X <165871467+JH23X@users.noreply.github.com> +Jack Mousseau Jack Mousseau JackJollimore <130917767+JackJollimore@users.noreply.github.com> +Jaeden Amero Jaemin Son Jag Chadha Jakub N @@ -243,10 +306,14 @@ Jannis Schönleber Jared Van Bortel Jared Van Bortel Jason McCartney +Jason Stillerman Jean-Christophe Hoelt Jean-Michaël Celerier Jed Fox +Jeff Bolz +Jeffrey Morgan Jeffrey Quesnelle +Jeroen Mostert Jesse Jojo Johnson Jeximo Jhen-Jie Hong @@ -258,6 +325,9 @@ Jiří Podivín <66251151+jpodivin@users.noreply.github.com> Jiří Sejkora Joan Fontanals Joan Fontanals +João Dinis Ferreira +Joe Eli McIlvain +Joe Todd Johan Johannes Gäßler Johannes Rudolph @@ -274,7 +344,9 @@ Joyce Juan Calderon-Perez <835733+gaby@users.noreply.github.com> Judd Julius Arkenberg +Jun Hee Yoo Jun Jie <71215065+junnjiee16@users.noreply.github.com> +Junil Kim Junyang Lin Juraj Bednar Justin Parker @@ -292,12 +364,14 @@ Karthik Sethuraman Kasumi <90275229+kasumi-1@users.noreply.github.com> Kawrakow <48489457+ikawrakow@users.noreply.github.com> Keiichi Tabata +Keke Han Kenvix ⭐ Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> Kevin Gibbons Kevin Ji <1146876+kevinji@users.noreply.github.com> Kevin Kwok Kevin Lo +Kevin Wang Kolen Cheung Konstantin Herud Konstantin Zhuravlyov @@ -315,22 +389,29 @@ LeonEricsson <70749762+LeonEricsson@users.noreply.github.com> Leonardo Neumann Li Tan Linwei Wang +Liu Jia <109258120+Septa2112@users.noreply.github.com> +Liu Jia LoganDark +Loïc Carrère LostRuins <39025047+LostRuins@users.noreply.github.com> Luciano Luo Tian Lyle Dean +M-A M. Yusuf Sarıgöz +Ma Mingfei Maarten ter Huurne Mack Straight Maël Kerbiriou MaggotHATE +Mahesh Madhav <67384846+heshpdx@users.noreply.github.com> Manuel <44313466+makuche@users.noreply.github.com> Marc Köhlbrugge Marco Matthies <71844+marcom@users.noreply.github.com> Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com> Marian Cepok Mark Fairbairn +Mark Zhuang Marko Tasic Markus Tavenrath Martin Delille @@ -342,11 +423,15 @@ MasterYi1024 <39848311+MasterYi1024@users.noreply.github.com> Mateusz Charytoniuk Matheus C. França Matheus Gabriel Alves Silva +Mathieu Geli Mathieu Nayrolles +Mathijs Henquet Mathijs de Bruin Matt Clayton <156335168+mattjcly@users.noreply.github.com> Matt Pulver +Matt Stephenson Matteo Boschini <12133566+mbosc@users.noreply.github.com> +Matteo Mortari Mattheus Chediak Matthew Tejo Matvey Soloviev @@ -356,8 +441,10 @@ Maxime <672982+maximegmd@users.noreply.github.com> Maximilian Winter Meng Zhang Meng, Hengyu +Mengqing Cao Merrick Christensen Michael Coppola +Michael Francis Michael Hueschen Michael Kesper Michael Klimenko @@ -365,41 +452,57 @@ Michael Podvitskiy Michael Potter Michael de Gans Michaël de Vries +Michał Tuszyński Mihai Mike Mikko Juola Minsoo Cheong <54794500+mscheong01@users.noreply.github.com> +Minsoo Cheong Mirko185 Mirror Azure <54669636+MirrorAzure@users.noreply.github.com> +MistApproach <98988043+MistApproach@users.noreply.github.com> Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com> Mohammadreza Hendiani Mohammadreza Hendiani +Molly Sophia +MorganRO8 <47795945+MorganRO8@users.noreply.github.com> Murilo Santana Musab Gultekin Nam D. Tran <42194884+namtranase@users.noreply.github.com> Nathan Epstein +Natsu NawafAlansari <72708095+NawafAlansari@users.noreply.github.com> Nebula Neo Zhang <14088817+arthw@users.noreply.github.com> Neo Zhang Neo Zhang Jianyu Neuman Vong +Nexes the Old <124105151+Nexesenex@users.noreply.github.com> Nexesenex <124105151+Nexesenex@users.noreply.github.com> Niall Coates <1349685+Niall-@users.noreply.github.com> +Nicholai Tukanov +Nico Bosshard Nicolai Weitkemper Nicolás Pérez Nigel Bosch Niklas Korz +NikolaiLyssogor <59844691+NikolaiLyssogor@users.noreply.github.com> Nikolas <127742645+nneubacher@users.noreply.github.com> Nindaleth +OSecret <135510162+OLSecret@users.noreply.github.com> Oleksandr Nikitin Oleksii Maryshchenko Olivier Chafik Ondřej Čertík Ouadie EL FAROUKI +PAB +Pablo Duboue +Pascal Patry Patrice Ferlet Paul Tsochantaris +Pavel Zloi Pavol Rusnak +Paweł Wodnicki <151604+32bitmicro@users.noreply.github.com> Pedro Cuenca Peter Sugihara Phil H <5756783+phiharri@users.noreply.github.com> @@ -407,10 +510,15 @@ Philip Taron Phillip Kravtsov Pierre Alexandre SCHEMBRI Pierrick Hymbert +Pieter Ouwerkerk +Plamen Minev +Prashant Vithule <119530321+Vithulep@users.noreply.github.com> Przemysław Pawełczyk Qin Yue Chen <71813199+chenqiny@users.noreply.github.com> Qingyou Meng Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com> +R0CKSTAR +R0CKSTAR RJ Adriaansen Radoslav Gerganov Radosław Gryta @@ -419,11 +527,13 @@ Raj Hammeer Singh Hada Ralph Soika Rand Xie Randall Fitzgerald +Random Fly Reinforce-II Ren Xuancheng Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com> RhinoDevel Riceball LEE +Rich Dougherty Richard Kiss Richard Roberson Rick G <26732651+TheFlipbook@users.noreply.github.com> @@ -439,21 +549,30 @@ Robey Holderith Robyn Roger Meier Roland <14355895+rbur0425@users.noreply.github.com> +Romain Biessy Romain D <90720+Artefact2@users.noreply.github.com> Romain Neutron Roman Parykin Ron Evans Ron Jailall +Roni Ronny Brendel Ronsor Rowan Hart +Ruchira Hasaranga +Ruixin Huang <18860020911@163.com> Rune <43761327+Rune-AI@users.noreply.github.com> +RunningLeon +RunningLeon Ryan Landay Ryder Wishart Ryuei Rőczey Barnabás <31726601+An0nie@users.noreply.github.com> +SRHMorris <69468379+SRHMorris@users.noreply.github.com> +SXX SakuraUmi Salvador E. Tropea +Salvatore Mesoraca Sam Spilsbury Sami Farin <3876865+Safari77@users.noreply.github.com> Samuel Maynard @@ -463,23 +582,29 @@ Sebastián A SebastianApel <13675545+SebastianApel@users.noreply.github.com> Senemu <10880819+Senemu@users.noreply.github.com> Sergey Alirzaev +Sergio López Sergio López Sertaç Özercan <852750+sozercan@users.noreply.github.com> SeungWon Jeong <65549245+redlion0929@users.noreply.github.com> ShadovvBeast Shakhar Dasgupta +Shane A Shangning Xu <32517059+xushangning@users.noreply.github.com> +Shankar +Shanshan Shen <467638484@qq.com> Shijie <821898965@qq.com> Shintarou Okada Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com> Shouzheng Liu Shuichi Tsutsumi +Shupei Fan Sigbjørn Skjæret Simon Willison Siwen Yu Sky Yan Slaren <2141330+slaren@users.noreply.github.com> Slava Primenko +Small Grass Forest SoftwareRenderer <138734813+SoftwareRenderer@users.noreply.github.com> Someone Someone Serge @@ -491,12 +616,15 @@ Stefan Sydow Steffen Röcker Stephan Walter Stephen Nichols +Steve Bonds Steve Grubb Steven Prichard Steven Roussey Steward Garcia <57494570+FSSRepo@users.noreply.github.com> +StrangeBytesDev <141275258+StrangeBytesDev@users.noreply.github.com> Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com> SuperUserNameMan +Sutou Kouhei Tai Duc Nguyen Taikono-Himazin Tameem <113388789+AhmadTameem@users.noreply.github.com> @@ -507,7 +635,9 @@ Theia Vogel Thérence <13496987+Royalphax@users.noreply.github.com> Thibault Terrasson Thomas Klausner +Thorsten Sommer Tim Miller +Tim Wang Timmy Knight Timothy Cronin <40186632+4imothy@users.noreply.github.com> Ting Lou @@ -517,24 +647,31 @@ Tom C Tom Jobbins <784313+TheBloke@users.noreply.github.com> Tomas Tomáš Pazdiora +Tony Wasserka <4840017+neobrain@users.noreply.github.com> Tristan Druyen Tristan Ross +Trivikram Kamat <16024985+trivikr@users.noreply.github.com> Tungsten842 <886724vf@anonaddy.me> Tungsten842 Tushar UEXTM.com <84163508+uextm@users.noreply.github.com> +Ujjawal Panchal <31011628+Ujjawal-K-Panchal@users.noreply.github.com> Ulrich Drepper Uzo Nweke Vaibhav Srivastav Val Kharitonov Valentin Konovalov Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com> +Vali Malinoiu <0x4139@gmail.com> Victor Nogueira Victor Z. Peng +Viet-Anh NGUYEN (Andrew) +Vinesh Janarthanan <36610342+VJHack@users.noreply.github.com> Vlad Vladimir Vladimir Malyutin Vladimir Zorin +VoidIsVoid <343750470@qq.com> Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com> WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com> Weird Constructor @@ -551,15 +688,22 @@ Xiang (Kevin) Li Xiao-Yong Jin XiaotaoChen Xiaoyi Chen +Xie Yanbo Xingchen Song(宋星辰) +Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com> Xuan Son Nguyen +Yaiko Yann Follet <131855179+YannFollet@users.noreply.github.com> Yaroslav Yazan Agha-Schrader Yiming Cui Yishuo Wang +Yoshi Suhara +Yoshi Suhara +Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com> Yui +Yuri Khrustalev Yusuf Kağan Hanoğlu Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com> ZHAOKAI WANG @@ -568,6 +712,8 @@ Zay <95888118+isaiahbjork@users.noreply.github.com> Zenix Zhang Peiyuan Zheng.Deng <32841220+dengzheng-cloud@users.noreply.github.com> +Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com> +Zhiyuan Li ZhouYuChen Ziad Ben Hadj-Alouane Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com> @@ -581,6 +727,7 @@ alexpinel <93524949+alexpinel@users.noreply.github.com> alonfaraj alwqx amd-lalithnc +amritahs-ibm andrijdavid anon998 <131767832+anon998@users.noreply.github.com> anzz1 @@ -588,14 +735,18 @@ apaz apcameron <37645737+apcameron@users.noreply.github.com> arch-btw <57669023+arch-btw@users.noreply.github.com> arcrank +ardfork <134447697+ardfork@users.noreply.github.com> arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com> at8u <129688334+at8u@users.noreply.github.com> automaticcat +awatuna <23447591+awatuna@users.noreply.github.com> +b4b4o bandoti <141645996+bandoti@users.noreply.github.com> beiller bhubbb <79117352+bhubbb@users.noreply.github.com> bmwl bobqianic <129547291+bobqianic@users.noreply.github.com> +brucepro bryanSwk <93190252+bryanSwk@users.noreply.github.com> bsilvereagle bssrdf @@ -614,10 +765,14 @@ cpumaxx <163466046+cpumaxx@users.noreply.github.com> crasm crasm daboe01 +daghanerdonmez <44506702+daghanerdonmez@users.noreply.github.com> +daminho <37615795+daminho@users.noreply.github.com> david raistrick ddh0 ddpasa <112642920+ddpasa@users.noreply.github.com> deepdiffuser <112834445+deepdiffuser@users.noreply.github.com> +devojony <61173062+devojony@users.noreply.github.com> +ditsuke divinity76 dm4 dotpy314 <33351922+dotpy314@users.noreply.github.com> @@ -629,14 +784,18 @@ ebraminio eiery <19350831+eiery@users.noreply.github.com> eric8607242 fairydreaming <166155368+fairydreaming@users.noreply.github.com> +fengerhu1 <2748250768@qq.com> fraxy-v <65565042+fraxy-v@users.noreply.github.com> github-actions[bot] gliptic goerch grahameth <96447521+grahameth@users.noreply.github.com> +gtygo gwjr <502526+gwjr@users.noreply.github.com> h-h-h-h <13482553+h-h-h-h@users.noreply.github.com> hankcs +haopeng <657407891@qq.com> +hipudding hoangmit hongbo.mo <352280764@qq.com> hopkins385 <98618192+hopkins385@users.noreply.github.com> @@ -649,12 +808,14 @@ hxer7963 hydai iSma iacore <74560659+iacore@users.noreply.github.com> +icppWorld <124377669+icppWorld@users.noreply.github.com> igarnier intelmatt <61025942+intelmatt@users.noreply.github.com> iohub jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com> jaime-m-p <167997752+jaime-m-p@users.noreply.github.com> jameswu2014 <545426914@qq.com> +jdomke <28772296+jdomke@users.noreply.github.com> jiez <373447296@qq.com> jneem joecryptotoo <80373433+joecryptotoo@users.noreply.github.com> @@ -677,28 +838,35 @@ klosax <131523366+klosax@users.noreply.github.com> kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com> kunnis kuronekosaiko +kustaaya <58045274+kustaaya@users.noreply.github.com> kuvaus <22169537+kuvaus@users.noreply.github.com> kwin1412 <42286931+kwin1412@users.noreply.github.com> l3utterfly +laik ldwang le.chang leejet +leo-pony limitedAtonement liuwei-git <14815172+liuwei-git@users.noreply.github.com> lon <114724657+longregen@users.noreply.github.com> loonerin <132926317+loonerin@users.noreply.github.com> +ltoniazzi <61414566+ltoniazzi@users.noreply.github.com> luoyu-intel m3ndax maddes8cht <55592906+maddes8cht@users.noreply.github.com> makomk manikbhandari maor-ps <154728172+maor-ps@users.noreply.github.com> +matiaslin <45382001+matiaslin@users.noreply.github.com> +matteo mdrokz mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com> minarchist mj-shifu <77107165+mj-shifu@users.noreply.github.com> mmyjona momonga <115213907+mmnga@users.noreply.github.com> +momonga <146910567+mmngays@users.noreply.github.com> moritzbrantner <31051084+moritzbrantner@users.noreply.github.com> mzcu nanahi <130121847+na-na-hi@users.noreply.github.com> @@ -716,8 +884,10 @@ omahs <73983677+omahs@users.noreply.github.com> oobabooga <112222186+oobabooga@users.noreply.github.com> opparco ostix360 <55257054+ostix360@users.noreply.github.com> +pculliton pengxin99 perserk +piDack <104877312+piDack@users.noreply.github.com> pmysl postmasters pudepiedj @@ -733,6 +903,7 @@ runfuture sandyiscool sasha0552 semidark +serhii-nakon <57632032+serhii-nakon@users.noreply.github.com> sharpHL <132747147+sharpHL@users.noreply.github.com> shibe2 singularity <12184989+singularity-s0@users.noreply.github.com> @@ -741,42 +912,55 @@ sjxx <63994076+ylsdamxssjxxdd@users.noreply.github.com> slaren <2141330+slaren@users.noreply.github.com> slaren snadampal <87143774+snadampal@users.noreply.github.com> +standby24x7 staviq stduhpf strawberrymelonpanda <152940198+strawberrymelonpanda@users.noreply.github.com> swittk takov751 <40316768+takov751@users.noreply.github.com> tarcey +tc-mb <157115220+tc-mb@users.noreply.github.com> texmex76 <40733439+texmex76@users.noreply.github.com> thement <40525767+thement@users.noreply.github.com> +thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> tjohnman +toyer <2042519524@qq.com> tslmy ubik2 uint256_t uint256_t unbounded +uvos valiray <133289098+valiray@users.noreply.github.com> +vb vik viric vodkaslime <646329483@qq.com> vvhg1 <94630311+vvhg1@users.noreply.github.com> vxiiduu <73044267+vxiiduu@users.noreply.github.com> +wangshuai09 <391746016@qq.com> wbpxre150 <100937007+wbpxre150@users.noreply.github.com> whoreson <139810751+whoreson@users.noreply.github.com> woachk <24752637+woachk@users.noreply.github.com> wonjun Jang woodx <124784234+woodx9@users.noreply.github.com> +wwoodsTM <104587230+wwoodsTM@users.noreply.github.com> wzy <32936898+Freed-Wu@users.noreply.github.com> xaedes xaedes +xctan xloem <0xloem@gmail.com> yangli2 yuiseki +yuri@FreeBSD zakkor zhangkaihuo +zhentaoyu zhouwg <6889919+zhouwg@users.noreply.github.com> zhouwg zrm Ștefan-Gabriel Muscalu +杨朱 · Kiki 源文雨 <41315874+fumiama@users.noreply.github.com> +蕭澧邦 <45505768+shou692199@users.noreply.github.com> Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com> diff --git a/common/arg.cpp b/common/arg.cpp index a6b7a1394f735..32d9a964c1716 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1370,8 +1370,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, int value) { params.n_gpu_layers = value; if (!llama_supports_gpu_offload()) { - fprintf(stderr, "warning: not compiled with GPU offload support, --gpu-layers option will be ignored\n"); - fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); + fprintf(stderr, "warning: no usable GPU found, --gpu-layers option will be ignored\n"); + fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n"); + fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n"); } } ).set_env("LLAMA_ARG_N_GPU_LAYERS")); @@ -2104,8 +2105,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, int value) { params.speculative.n_gpu_layers = value; if (!llama_supports_gpu_offload()) { - fprintf(stderr, "warning: not compiled with GPU offload support, --gpu-layers-draft option will be ignored\n"); - fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); + fprintf(stderr, "warning: no usable GPU found, --gpu-layers-draft option will be ignored\n"); + fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n"); + fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n"); } } ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER})); diff --git a/docs/android.md b/docs/android.md index 320b62240382f..47530c6c1d478 100644 --- a/docs/android.md +++ b/docs/android.md @@ -23,10 +23,10 @@ $ curl -L {model-url} -o ~/{model}.gguf Then, if you are not already in the repo directory, `cd` into `llama.cpp` and: ``` -$ ./build/bin/llama-simple -m ~/{model}.gguf -c {context-size} -p "{your-prompt}" +$ ./build/bin/llama-cli -m ~/{model}.gguf -c {context-size} -p "{your-prompt}" ``` -Here, we show `llama-simple`, but any of the executables under `examples` should work, in theory. Be sure to set `context-size` to a reasonable number (say, 4096) to start with; otherwise, memory could spike and kill your terminal. +Here, we show `llama-cli`, but any of the executables under `examples` should work, in theory. Be sure to set `context-size` to a reasonable number (say, 4096) to start with; otherwise, memory could spike and kill your terminal. To see what it might look like visually, here's an old demo of an interactive session running on a Pixel 5 phone: diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp index aae49c965e905..7ba4cea58e80b 100644 --- a/examples/llava/clip.cpp +++ b/examples/llava/clip.cpp @@ -40,10 +40,17 @@ #include #include -#define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0) -#define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0) -#define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0) -#define LOG_DBG(...) do { fprintf(stderr, __VA_ARGS__); } while (0) +#if defined(LLAVA_LOG_OFF) +# define LOG_INF(...) +# define LOG_WRN(...) +# define LOG_ERR(...) +# define LOG_DBG(...) +#else // defined(LLAVA_LOG_OFF) +# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0) +# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0) +# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0) +# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0) +#endif // defined(LLAVA_LOG_OFF) //#define CLIP_DEBUG_FUNCTIONS diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index be69885408433..4ca53a0b883b9 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -11,13 +11,17 @@ #include #include -#define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0) -#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0) - -#define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0) -#define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0) -#define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0) -#define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0) +#if defined(LLAVA_LOG_OFF) +# define LOG_INF(...) +# define LOG_WRN(...) +# define LOG_ERR(...) +# define LOG_DBG(...) +#else // defined(LLAVA_LOG_OFF) +# define LOG_INF(...) do { fprintf(stdout, __VA_ARGS__); } while (0) +# define LOG_WRN(...) do { fprintf(stderr, __VA_ARGS__); } while (0) +# define LOG_ERR(...) do { fprintf(stderr, __VA_ARGS__); } while (0) +# define LOG_DBG(...) do { fprintf(stdout, __VA_ARGS__); } while (0) +#endif // defined(LLAVA_LOG_OFF) // RGB uint8 image struct clip_image_u8 { @@ -498,10 +502,16 @@ static bool load_file_to_bytes(const char* path, unsigned char** bytesOut, long errno = 0; size_t ret = fread(buffer, 1, fileSize, file); // Read the file into the buffer if (ferror(file)) { - die_fmt("read error: %s", strerror(errno)); + LOG_ERR("read error: %s", strerror(errno)); + free(buffer); + fclose(file); + return false; } if (ret != (size_t) fileSize) { - die("unexpectedly reached end of file"); + LOG_ERR("unexpectedly reached end of file"); + free(buffer); + fclose(file); + return false; } fclose(file); // Close the file diff --git a/examples/server/tests/requirements.txt b/examples/server/tests/requirements.txt index 935a79114b45e..074b9d47bddce 100644 --- a/examples/server/tests/requirements.txt +++ b/examples/server/tests/requirements.txt @@ -2,6 +2,6 @@ aiohttp~=3.9.3 pytest~=8.3.3 huggingface_hub~=0.23.2 numpy~=1.26.4 -openai~=1.30.3 +openai~=1.55.3 prometheus-client~=0.20.0 requests~=2.32.3 diff --git a/examples/server/tests/utils.py b/examples/server/tests/utils.py index b48ec3e78e1d8..e17a05ff6902a 100644 --- a/examples/server/tests/utils.py +++ b/examples/server/tests/utils.py @@ -8,7 +8,6 @@ import re import json import sys -import threading import requests import time from concurrent.futures import ThreadPoolExecutor, as_completed @@ -170,26 +169,12 @@ def start(self, timeout_seconds: int = 10) -> None: self.process = subprocess.Popen( [str(arg) for arg in [server_path, *server_args]], creationflags=flags, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, + stdout=sys.stdout, + stderr=sys.stdout, env={**os.environ, "LLAMA_CACHE": "tmp"}, ) server_instances.add(self) - def server_log(in_stream, out_stream): - for line in iter(in_stream.readline, b""): - print(line.decode("utf-8"), end="", file=out_stream) - - thread_stdout = threading.Thread( - target=server_log, args=(self.process.stdout, sys.stdout), daemon=True - ) - thread_stdout.start() - - thread_stderr = threading.Thread( - target=server_log, args=(self.process.stderr, sys.stderr), daemon=True - ) - thread_stderr.start() - print(f"server pid={self.process.pid}, pytest pid={os.getpid()}") # wait for server to start diff --git a/examples/simple/README.md b/examples/simple/README.md index 0ff3425359a41..937008b243ee4 100644 --- a/examples/simple/README.md +++ b/examples/simple/README.md @@ -3,7 +3,7 @@ The purpose of this example is to demonstrate a minimal usage of llama.cpp for generating text with a given prompt. ```bash -./llama-simple -m ./models/llama-7b-v2/ggml-model-f16.gguf -p "Hello my name is" +./llama-simple -m ./models/llama-7b-v2/ggml-model-f16.gguf "Hello my name is" ... diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index d707efc5d1f48..b2d857e1e549b 100644 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -2965,7 +2965,7 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst, aclTensor* acl_cos_repeat_tensor, aclTensor* acl_sin_repeat_tensor, float theta_scale, float freq_scale, - bool is_neox) { + float attn_factor, bool is_neox) { // int sin/cos cache, cache has different repeat method depond on // @param.is_neox @@ -3017,6 +3017,7 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst, ggml_type_size(src2->type), arange_ne, arange_nb, GGML_MAX_DIMS); aclnn_div_tensor(ctx, acl_theta_scale_tensor, acl_freq_factors_tensor, nullptr, true); + ACL_CHECK(aclDestroyTensor(acl_freq_factors_tensor)); } // position @@ -3047,16 +3048,6 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst, aclnn_mul(ctx, acl_position_tensor, acl_theta_scale_tensor, acl_theta_tensor); - // // power[] * position[] * freq_scale / freq_factors[] - // ggml_cann_pool_alloc theta_final_allocator(ctx.pool(), - // theta_length * - // sizeof(float_t)); - // aclTensor* acl_theat_final_tensor = aclnn_zero( - // ctx, theta_final_allocator.get(), sizeof(float_t) * theta_length, - // theta_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof(float_t)); - // aclnn_inplace_addcdiv(ctx, acl_theat_final_tensor, acl_theta_tensor, - // acl_freq_factors_tensor, freq_scale); - // permute: [0,1,2,3]->[0,2,1,3] int64_t permute_ne[] = {arange_length, 1, position_length, 1}; size_t permute_nb[GGML_MAX_DIMS]; @@ -3092,6 +3083,12 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst, GGML_MAX_DIMS, ACL_FORMAT_ND); aclnn_cos(ctx, acl_permute_tensor, acl_cos_tensor); + // attn_factor + if (attn_factor != 1) { + aclnn_muls(ctx, acl_sin_tensor, attn_factor, nullptr, true); + aclnn_muls(ctx, acl_cos_tensor, attn_factor, nullptr, true); + } + // repeat if (is_neox) { int64_t repeatsArray[] = {1, 1, 1, 2}; @@ -3155,15 +3152,11 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) { memcpy(&beta_fast, (int32_t*)dst->op_params + 9, sizeof(float)); memcpy(&beta_slow, (int32_t*)dst->op_params + 10, sizeof(float)); - // TODO: attn_factor != 1 - GGML_ASSERT(attn_factor == 1); // TODO: n_dims <= ne0 GGML_ASSERT(n_dims == ne0); GGML_ASSERT(n_dims % 2 == 0); // TODO: ext_factor != 0 GGML_ASSERT(ext_factor == 0); - // TODO: type == GGML_TYPE_F16 - GGML_ASSERT(src0->type == GGML_TYPE_F32); const float theta_scale = powf(freq_base, -2.0f / n_dims); @@ -3194,7 +3187,217 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ggml_cann_create_tensor(cos_buffer, ACL_FLOAT, sizeof(float_t), sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS); aclnn_cache_init(ctx, dst, acl_cos_reshape_tensor, acl_sin_reshape_tensor, - theta_scale, freq_scale, is_neox); + theta_scale, freq_scale, attn_factor, is_neox); + + aclTensor* acl_src = ggml_cann_create_tensor(src0); + aclTensor* acl_dst = ggml_cann_create_tensor(dst); + +#ifdef ASCEND_310P + // Special ROPE operation for 310P + + // roll input + void* input_roll_buffer; + aclTensor* acl_minus_one_tensor; + void* minus_one_scale_buffer = nullptr; + ggml_cann_pool_alloc roll_allocator(ctx.pool(), ggml_nbytes(src0)); + ggml_cann_pool_alloc minus_one_scale_allocator( + ctx.pool(), sizeof(float_t) * src0->ne[0]); + if (!is_neox) { + // roll input: [q0,q1,q2,q3,...] -> [q1,q0,q3,q2,...] + input_roll_buffer = roll_allocator.get(); + int64_t input_roll_ne[4] = {2, src0->ne[1] * (src0->ne[0] / 2), + src0->ne[2], src0->ne[3]}; + size_t input_roll_nb[GGML_MAX_DIMS]; + input_roll_nb[0] = ggml_type_size(src0->type); + for (int i = 1; i < GGML_MAX_DIMS; i++) { + input_roll_nb[i] = input_roll_nb[i - 1] * input_roll_ne[i - 1]; + } + aclTensor* acl_input_roll_tensor = ggml_cann_create_tensor( + input_roll_buffer, ggml_cann_type_mapping(src0->type), + ggml_type_size(src0->type), input_roll_ne, input_roll_nb, + GGML_MAX_DIMS); + aclTensor* acl_input_tensor = ggml_cann_create_tensor( + src0->data, ggml_cann_type_mapping(src0->type), + ggml_type_size(src0->type), input_roll_ne, input_roll_nb, + GGML_MAX_DIMS); + + int64_t shifts[] = {1}; + int64_t dims[] = {3}; + aclnn_roll(ctx, acl_input_tensor, acl_input_roll_tensor, shifts, dims); + ACL_CHECK(aclDestroyTensor(acl_input_roll_tensor)); + ACL_CHECK(aclDestroyTensor(acl_input_tensor)); + + // init [-1, 1, -1, 1, ...] + minus_one_scale_buffer = minus_one_scale_allocator.get(); + + int64_t minus_one_ne[4] = {src0->ne[0], 1, 1, 1}; + size_t minus_one_nb[GGML_MAX_DIMS]; + minus_one_nb[0] = sizeof(float_t); + for (int i = 1; i < GGML_MAX_DIMS; i++) { + minus_one_nb[i] = minus_one_nb[i - 1] * minus_one_ne[i - 1]; + } + acl_minus_one_tensor = aclnn_values( + ctx, minus_one_scale_buffer, sizeof(float_t) * src0->ne[0], + minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof(float_t), 1); + int64_t dim = 3; + int64_t* index = new int64_t[src0->ne[0]]; + for (int i = 0; i < src0->ne[0]; i++) { + index[i] = i / 2 * 2; + } + int64_t index_num = src0->ne[0]; + float value = -1; + aclnn_index_fill_tensor(ctx, acl_minus_one_tensor, dim, index, + index_num, value); + } else { + // roll input: [q0,q1,q2,...] -> + // [q_half,q_half+1,...,q_end,q0,q1,...q_half-1] + input_roll_buffer = roll_allocator.get(); + aclTensor* acl_input_roll_tensor = ggml_cann_create_tensor( + input_roll_buffer, ggml_cann_type_mapping(src0->type), + ggml_type_size(src0->type), src0->ne, src0->nb, GGML_MAX_DIMS); + aclTensor* acl_input_tensor = ggml_cann_create_tensor(src0); + + int64_t shifts[] = {src0->ne[0] / 2}; + int64_t dims[] = {3}; + aclnn_roll(ctx, acl_input_tensor, acl_input_roll_tensor, shifts, dims); + + ACL_CHECK(aclDestroyTensor(acl_input_roll_tensor)); + ACL_CHECK(aclDestroyTensor(acl_input_tensor)); + // init [-1, -1, -1, 1, 1,1,...] + minus_one_scale_buffer = minus_one_scale_allocator.get(); + int64_t minus_one_ne[4] = {src0->ne[0], 1, 1, 1}; + size_t minus_one_nb[GGML_MAX_DIMS]; + minus_one_nb[0] = sizeof(float_t); + for (int i = 1; i < GGML_MAX_DIMS; i++) { + minus_one_nb[i] = minus_one_nb[i - 1] * minus_one_ne[i - 1]; + } + acl_minus_one_tensor = aclnn_values( + ctx, minus_one_scale_buffer, sizeof(float_t) * src0->ne[0], + minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof(float_t), 1); + // -1 * first half + int64_t first_half_ne[4] = {src0->ne[0] / 2, 1, 1, 1}; + size_t first_half_nb[GGML_MAX_DIMS]; + first_half_nb[0] = sizeof(float_t); + for (int i = 1; i < GGML_MAX_DIMS; i++) { + first_half_nb[i] = first_half_nb[i - 1] * first_half_ne[i - 1]; + } + aclTensor* acl_first_half_tensor = ggml_cann_create_tensor( + minus_one_scale_buffer, ACL_FLOAT, sizeof(float_t), first_half_ne, + first_half_nb, GGML_MAX_DIMS); + bool inplace = true; + float scale = -1; + aclnn_muls(ctx, acl_first_half_tensor, scale, nullptr, inplace); + ACL_CHECK(aclDestroyTensor(acl_first_half_tensor)); + } + + // TODO: n_dims < ne0 + GGML_ASSERT(n_dims == src0->ne[0]); + + // input * scale + ggml_cann_pool_alloc roll_mul_scale_allocator(ctx.pool(), + ggml_nbytes(src0)); + void* input_roll_mul_scale_buffer = roll_mul_scale_allocator.get(); + size_t input_nb[GGML_MAX_DIMS]; + input_nb[0] = ggml_type_size(src0->type); + for (int i = 1; i < GGML_MAX_DIMS; i++) { + input_nb[i] = input_nb[i - 1] * src0->ne[i - 1]; + } + aclTensor* acl_input_roll_mul_scale_tensor = ggml_cann_create_tensor( + input_roll_mul_scale_buffer, ggml_cann_type_mapping(src0->type), + ggml_type_size(src0->type), src0->ne, input_nb, GGML_MAX_DIMS); + aclTensor* acl_input_roll_reshape_tensor = ggml_cann_create_tensor( + input_roll_buffer, ggml_cann_type_mapping(src0->type), + ggml_type_size(src0->type), src0->ne, input_nb, GGML_MAX_DIMS); + + aclnn_mul(ctx, acl_input_roll_reshape_tensor, acl_minus_one_tensor, + acl_input_roll_mul_scale_tensor); + + // output + void* output_fp32_buffer; + if (src0->type == GGML_TYPE_F32) { + aclnn_inplace_mul(ctx, acl_src, acl_cos_reshape_tensor); + aclnn_inplace_mul(ctx, acl_input_roll_mul_scale_tensor, + acl_sin_reshape_tensor); + aclnn_add(ctx, acl_src, acl_input_roll_mul_scale_tensor, acl_dst); + // TODO: ne0 != n_dims in mode2 + } else if (src0->type == GGML_TYPE_F16) { + size_t input_fp32_nb[GGML_MAX_DIMS]; + input_fp32_nb[0] = sizeof(float_t); + for (int i = 1; i < GGML_MAX_DIMS; i++) { + input_fp32_nb[i] = input_fp32_nb[i - 1] * dst->ne[i - 1]; + } + ggml_cann_pool_alloc fp32_allocator1( + ctx.pool(), ggml_nelements(dst) * sizeof(float_t)); + void* input_fp32_buffer1 = fp32_allocator1.get(); + aclTensor* input_fp32_tensor1 = ggml_cann_create_tensor( + input_fp32_buffer1, ACL_FLOAT, sizeof(float_t), dst->ne, + input_fp32_nb, GGML_MAX_DIMS); + ggml_cann_pool_alloc fp32_allocator2( + ctx.pool(), ggml_nelements(dst) * sizeof(float_t)); + void* input_fp32_buffer2 = fp32_allocator2.get(); + aclTensor* input_fp32_tensor2 = ggml_cann_create_tensor( + input_fp32_buffer2, ACL_FLOAT, sizeof(float_t), dst->ne, + input_fp32_nb, GGML_MAX_DIMS); + + ggml_cann_pool_alloc fp32_allocator( + ctx.pool(), ggml_nelements(dst) * sizeof(float_t)); + output_fp32_buffer = fp32_allocator.get(); + aclTensor* output_fp32_tensor = ggml_cann_create_tensor( + output_fp32_buffer, ACL_FLOAT, sizeof(float_t), dst->ne, + input_fp32_nb, GGML_MAX_DIMS); + aclnn_mul(ctx, acl_src, acl_cos_reshape_tensor, input_fp32_tensor1); + aclnn_mul(ctx, acl_input_roll_mul_scale_tensor, acl_sin_reshape_tensor, + input_fp32_tensor2); + aclnn_add(ctx, input_fp32_tensor1, input_fp32_tensor2, + output_fp32_tensor); + aclnn_cast(ctx, output_fp32_tensor, acl_dst, ACL_FLOAT16); + + ACL_CHECK(aclDestroyTensor(input_fp32_tensor1)); + ACL_CHECK(aclDestroyTensor(input_fp32_tensor2)); + ACL_CHECK(aclDestroyTensor(output_fp32_tensor)); + ACL_CHECK(aclDestroyTensor(acl_sin_reshape_tensor)); + ACL_CHECK(aclDestroyTensor(acl_minus_one_tensor)); + ACL_CHECK(aclDestroyTensor(acl_input_roll_mul_scale_tensor)); + ACL_CHECK(aclDestroyTensor(acl_input_roll_reshape_tensor)); + ACL_CHECK(aclDestroyTensor(acl_src)); + } + return; +#endif + + // src0 == GGML_TYPE_F16 + // TODO: optimization this `if` code + if (src0->type == GGML_TYPE_F16) { + ggml_cann_pool_alloc sin_final_allocator( + ctx.pool(), src0->ne[0] * src0->ne[2] * ggml_type_size(src0->type)); + ggml_cann_pool_alloc cos_final_allocator( + ctx.pool(), src0->ne[0] * src0->ne[2] * ggml_type_size(src0->type)); + void* sin_final_buffer = sin_final_allocator.get(); + void* cos_final_buffer = cos_final_allocator.get(); + + int64_t sin_final_ne[4] = {src0->ne[0], 1, src0->ne[2], 1}; + size_t sin_final_nb[GGML_MAX_DIMS]; + sin_final_nb[0] = ggml_type_size(src0->type); + for (int i = 1; i < GGML_MAX_DIMS; i++) { + sin_final_nb[i] = sin_final_nb[i - 1] * sin_final_ne[i - 1]; + } + aclTensor* acl_sin_final_tensor = ggml_cann_create_tensor( + sin_final_buffer, ggml_cann_type_mapping(src0->type), + ggml_type_size(src0->type), sin_final_ne, sin_final_nb, + GGML_MAX_DIMS); + aclTensor* acl_cos_final_tensor = ggml_cann_create_tensor( + cos_final_buffer, ggml_cann_type_mapping(src0->type), + ggml_type_size(src0->type), sin_final_ne, sin_final_nb, + GGML_MAX_DIMS); + + aclnn_cast(ctx, acl_sin_reshape_tensor, acl_sin_final_tensor, + ggml_cann_type_mapping(src0->type)); + aclnn_cast(ctx, acl_cos_reshape_tensor, acl_cos_final_tensor, + ggml_cann_type_mapping(src0->type)); + ACL_CHECK(aclDestroyTensor(acl_cos_reshape_tensor)); + ACL_CHECK(aclDestroyTensor(acl_sin_reshape_tensor)); + acl_sin_reshape_tensor = acl_sin_final_tensor; + acl_cos_reshape_tensor = acl_cos_final_tensor; + } uint64_t workspaceSize = 0; aclOpExecutor* executor; @@ -3206,10 +3409,8 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) { acl_mode = 1; } - aclTensor* acl_x = ggml_cann_create_tensor(src0); - aclTensor* acl_dst = ggml_cann_create_tensor(dst); ACL_CHECK(aclnnRotaryPositionEmbeddingGetWorkspaceSize( - acl_x, acl_cos_reshape_tensor, acl_sin_reshape_tensor, acl_mode, + acl_src, acl_cos_reshape_tensor, acl_sin_reshape_tensor, acl_mode, acl_dst, &workspaceSize, &executor)); if (workspaceSize > 0) { ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); @@ -3219,7 +3420,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ACL_CHECK(aclnnRotaryPositionEmbedding(workspaceAddr, workspaceSize, executor, ctx.stream())); - ACL_CHECK(aclDestroyTensor(acl_x)); + ACL_CHECK(aclDestroyTensor(acl_src)); ACL_CHECK(aclDestroyTensor(acl_cos_reshape_tensor)); ACL_CHECK(aclDestroyTensor(acl_sin_reshape_tensor)); ACL_CHECK(aclDestroyTensor(acl_dst)); diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index bcb54e44404db..04e25b8ab1a23 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -1739,7 +1739,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, case GGML_OP_ROPE: { // TODO: with ops-test v == 1 float * ext_factor = (float*)((int32_t*)op->op_params + 7); - float * attn_factor = (float*)((int32_t*)op->op_params + 8); // TODO: n_dims <= ne0 if (op->src[0]->ne[0] != op->op_params[1]) { return false; @@ -1748,17 +1747,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, if (*ext_factor != 0) { return false; } - // TODO: attn_factor != 1 - if (*attn_factor != 1) { - return false; - } - //TODO: type == GGML_TYPE_F16 - switch (op->src[0]->type) { - case GGML_TYPE_F32: - return true; - default: - return false; - } + return true; } case GGML_OP_UPSCALE: { // aclnnUpsampleNearest2dGetWorkspaceSize not support diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.c b/ggml/src/ggml-cpu/ggml-cpu-aarch64.c index ced3788790671..69d3d327d1180 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-aarch64.c +++ b/ggml/src/ggml-cpu/ggml-cpu-aarch64.c @@ -1,7 +1,3 @@ -// SPDX-FileCopyrightText: Copyright 2024 Arm Limited and/or its affiliates -// SPDX-License-Identifier: MIT -// - #define GGML_COMMON_IMPL_C #include "ggml-common.h" diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index b6392ed8dcc6a..aabcdc22422fc 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -3447,8 +3447,15 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor use_dequantize_mul_mat_vec = use_dequantize_mul_mat_vec && !use_mul_mat_vec_q; if (!split && src0->type == GGML_TYPE_F16 && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) { - // KQ single-batch - ggml_sycl_mul_mat_vec_p021(ctx, src0, src1, dst); + // TODO: Refactor and cleanup of mul mat dispatching. + if (src0->ne[3] == 1 && src1->ne[3] == 1) { + // KQ single-batch + // mmv p021 was specific for these dimensions + ggml_sycl_mul_mat_vec_p021(ctx, src0, src1, dst); + } else { + // The kernel from the if path is faster for that specific case, but does not support all mul mats. + ggml_sycl_mul_mat_batched_sycl(ctx, src0, src1, dst); + } } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) { // KQV single-batch ggml_sycl_mul_mat_vec_nc(ctx, src0, src1, dst); diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index a833007fb3d02..849c119237bba 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -5672,6 +5672,48 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod } else { compute_ctx = ctx->compute_ctx.lock(); } + } else { + switch (node->op) { + case GGML_OP_REPEAT: + case GGML_OP_ACC: + case GGML_OP_GET_ROWS: + case GGML_OP_ADD: + case GGML_OP_MUL: + case GGML_OP_DIV: + case GGML_OP_CONCAT: + case GGML_OP_UPSCALE: + case GGML_OP_SCALE: + case GGML_OP_SQR: + case GGML_OP_SIN: + case GGML_OP_COS: + case GGML_OP_CLAMP: + case GGML_OP_PAD: + case GGML_OP_CPY: + case GGML_OP_CONT: + case GGML_OP_DUP: + case GGML_OP_NORM: + case GGML_OP_GROUP_NORM: + case GGML_OP_RMS_NORM: + case GGML_OP_UNARY: + case GGML_OP_DIAG_MASK_INF: + case GGML_OP_SOFT_MAX: + case GGML_OP_ROPE: + case GGML_OP_ARGSORT: + case GGML_OP_SUM_ROWS: + case GGML_OP_IM2COL: + case GGML_OP_TIMESTEP_EMBEDDING: + case GGML_OP_POOL_2D: + case GGML_OP_LEAKY_RELU: + { + // These operations all go through ggml_vk_op_f32, so short-circuit and + // do the only thing needed for the dryrun. + vk_pipeline pipeline = ggml_vk_op_get_pipeline(ctx, src0, src1, src2, node, node->op); + ggml_pipeline_request_descriptor_sets(ctx->device, pipeline, 1); + return false; + } + default: + break; + } } switch (node->op) { @@ -6401,16 +6443,17 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg bool first_node_in_batch = true; // true if next node will be first node in a batch int submit_node_idx = 0; // index to first node in a batch - // submit work every submit_count node to overlap CPU cmdbuffer generation with GPU execution - constexpr int submit_count = 100; + // Submit work every nodes_per_submit nodes to overlap CPU cmdbuffer generation with GPU execution. + // Start with a smaller count to get work submitted right away, and increase it after each submit. + int nodes_per_submit = 20; int submitted_nodes = 0; + int submit_count = 0; for (int i = 0; i < cgraph->n_nodes; i++) { if (first_node_in_batch) { submit_node_idx = i; } - bool submit = (submitted_nodes >= submit_count) || (i == last_node); - + bool submit = (submitted_nodes >= nodes_per_submit) || (i == last_node); bool enqueued = ggml_vk_build_graph(ctx, cgraph->nodes[i], i, cgraph->nodes[submit_node_idx], submit_node_idx, false, i == last_node, submit); @@ -6427,6 +6470,15 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg if (submit) { first_node_in_batch = true; submitted_nodes = 0; + switch (submit_count) { + case 0: + nodes_per_submit = 50; + break; + default: + nodes_per_submit = 100; + break; + } + submit_count++; } } diff --git a/src/llama.cpp b/src/llama.cpp index af5e686e07eda..22b951ba2a946 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -2341,6 +2341,7 @@ enum e_model { MODEL_16B, MODEL_20B, MODEL_30B, + MODEL_32B, MODEL_34B, MODEL_35B, MODEL_40B, @@ -5330,6 +5331,7 @@ static const char * llama_model_type_name(e_model type) { case MODEL_16B: return "16B"; case MODEL_20B: return "20B"; case MODEL_30B: return "30B"; + case MODEL_32B: return "32B"; case MODEL_34B: return "34B"; case MODEL_35B: return "35B"; case MODEL_40B: return "40B"; @@ -5690,7 +5692,10 @@ static void llm_load_hparams( case 24: model.type = hparams.n_embd == 1024 ? e_model::MODEL_0_5B : e_model::MODEL_1B; break; case 28: model.type = hparams.n_embd == 1536 ? e_model::MODEL_1_5B : e_model::MODEL_7B; break; case 32: model.type = e_model::MODEL_7B; break; + case 36: model.type = e_model::MODEL_3B; break; case 40: model.type = hparams.n_head() == 20 ? e_model::MODEL_4B : e_model::MODEL_13B; break; + case 48: model.type = e_model::MODEL_14B; break; + case 64: model.type = e_model::MODEL_32B; break; case 80: model.type = e_model::MODEL_70B; break; default: model.type = e_model::MODEL_UNKNOWN; }