diff --git a/sw/blas/gemm/src/gemm.h b/sw/blas/gemm/src/gemm.h index 9be687913..2cc7bf03c 100644 --- a/sw/blas/gemm/src/gemm.h +++ b/sw/blas/gemm/src/gemm.h @@ -11,6 +11,7 @@ #define Y(i,j) Y[(i)*m + (j)] #define DATA_TYPE double +//#define SSRFREP // alpha*A[m][k]*B[k][n] + beta*C[m][n] = Y[m][n] void gemm(uint32_t M, uint32_t N, uint32_t K, uint32_t sM, uint32_t sN, uint32_t sK, double* A, @@ -18,12 +19,34 @@ void gemm(uint32_t M, uint32_t N, uint32_t K, uint32_t sM, uint32_t sN, uint32_t uint32_t tb, double* C, double BETA){ DATA_TYPE res; + + if (!ta && !tb) { + + for (uint32_t m = 0; m < sM; m++) { for (uint32_t n = 0; n < sN; n++) { res = BETA * C[m * N + n]; + + #ifdef SSRFREP + snrt_ssr_loop_1d(SNRT_SSR_DM0, sK, 8); + snrt_ssr_loop_1d(SNRT_SSR_DM1, sK, 8*N); + snrt_ssr_read(SNRT_SSR_DM0, SNRT_SSR_1D, A + m*K); + snrt_ssr_read(SNRT_SSR_DM1, SNRT_SSR_1D, B + n); + + asm volatile + ("frep.o %[n_frep], 1, 0, 0 \n" + "fmadd.d %[res], ft1, ft2, %[res] \n" + : [res] "+f"(res) + : [n_frep] "r"(sK-1) + : "ft0", "ft1" + ); + + #else for (uint32_t k = 0; k < sK; k++) res += A[k + m * K] * B[k * N + n]; + #endif + C[m * N + n] = res; } } diff --git a/sw/blas/gemm/src/main.c b/sw/blas/gemm/src/main.c index 5c334f106..908cc550a 100644 --- a/sw/blas/gemm/src/main.c +++ b/sw/blas/gemm/src/main.c @@ -5,30 +5,54 @@ #include "gemm.h" #include "snrt.h" -DATA_TYPE t [16] = {0}; +#define CEIL(x, y) ((((x) - 1) / (y)) + 1) +#define MIN(x, y) ((x) < (y)?(x):(y)) -int main(int argc, char *argv[]) { +int main(int argc, char *argv[]) { - snrt_cluster_hw_barrier(); + // Allocate space in TCDM + uint32_t size_a = M * K * sizeof(DATA_TYPE); + uint32_t size_b = K * N * sizeof(DATA_TYPE); + uint32_t size_c = M * N * sizeof(DATA_TYPE); + DATA_TYPE *local_a, *local_b, *local_c; + local_a = (DATA_TYPE *)snrt_l1_next(); + local_b = local_a + size_a; //maybe multiplying by sizeof(datatype) isn't needed + local_c = local_b + size_b; + DATA_TYPE* t = local_c + size_c; - DATA_TYPE *a11 = a, *a12 = a + K/2; - DATA_TYPE *a21 = a + M/2 *K, *a22 = a + M/2 *K + K/2; + // Copy data in TCDM + if (snrt_is_dm_core()) { + snrt_dma_start_1d(local_a, a, size_a); + snrt_dma_start_1d(local_b, b, size_b); + snrt_dma_start_1d(local_c, c, size_c); + snrt_dma_wait_all(); + } - DATA_TYPE *b11 = b, *b12 = b + N/2; - DATA_TYPE *b21 = b + K/2 *N, *b22 = b + K/2 *N + N/2; + snrt_cluster_hw_barrier(); - DATA_TYPE *c11 = c, *c12 = c + N/2; - DATA_TYPE *c21 = c + M/2 *N, *c22 = c + M/2 *N + N/2; + // Compute + if (!snrt_is_dm_core()) { - DATA_TYPE *t11 = t, *t12 = t + N/2; - DATA_TYPE *t21 = t + M/2 *N, *t22 = t + M/2 *N + N/2; - #ifdef SINGLE_CORE if (snrt_cluster_core_idx() == 0) - gemm(M, N, K, M, N, K, a, TA, b, TB, c, BETA); + gemm(M, N, K, M, N, K, local_a, TA, local_b, TB, local_c, BETA); #else + + DATA_TYPE *a11 = local_a, *a12 = local_a + K/2; + DATA_TYPE *a21 = local_a + M/2 *K, *a22 = local_a + M/2 *K + K/2; + + DATA_TYPE *b11 = local_b, *b12 = local_b + N/2; + DATA_TYPE *b21 = local_b + K/2 *N, *b22 = local_b + K/2 *N + N/2; + + DATA_TYPE *c11 = local_c, *c12 = local_c + N/2; + DATA_TYPE *c21 = local_c + M/2 *N, *c22 = local_c + M/2 *N + N/2; + + DATA_TYPE *t11 = t, *t12 = t + N/2; + DATA_TYPE *t21 = t + M/2 *N, *t22 = t + M/2 *N + N/2; + + switch (snrt_cluster_core_idx()) { case 0: gemm (M, N, K, M/2, N/2, K/2, a11, TA, b11, TB, c11, BETA); @@ -54,16 +78,34 @@ int main(int argc, char *argv[]) { case 7: gemm (M, N, K, M/2, N/2, K/2, a22, TA, b22, TB, t22, BETA); break; - } + } + snrt_fpu_fence(); + } snrt_cluster_hw_barrier(); - if (snrt_cluster_core_idx() == 0) - for (uint32_t i = 0; i < M; i++) + + if (!snrt_is_dm_core()) { ////////////////////////Call add function + uint32_t c, lb, ub, core_idx = snrt_cluster_core_idx(); + c = CEIL(M, snrt_cluster_core_num()); + lb = c * core_idx; + ub = MIN((c * (core_idx + 1)), M); + + for (uint32_t i = lb; i < ub; i++) { for (uint32_t j = 0; j < N; j++) - c[i*N +j] += t[i*N +j]; + local_c[i*N +j] += t[i*N +j]; + } + snrt_fpu_fence(); + } + #endif + snrt_cluster_hw_barrier(); + + // Copy data out of TCDM + if (snrt_is_dm_core()) { + snrt_dma_start_1d(c, local_c, size_c); + snrt_dma_wait_all(); + } snrt_cluster_hw_barrier(); - snrt_fpu_fence(); - #endif + } \ No newline at end of file diff --git a/target/snitch_cluster/gemm_results.csv b/target/snitch_cluster/gemm_results.csv index 2aee12809..d2ff16340 100644 --- a/target/snitch_cluster/gemm_results.csv +++ b/target/snitch_cluster/gemm_results.csv @@ -1,256 +1,16 @@ -4.105918197892349,1.1749655783398258,2.9309526195525226 -3.953183707820053,1.1289219412770841,2.8242617665429686 -2.891090334283691,0.6996513124665965,2.1914390218170947 -3.17738419214957,0.5131952117828857,2.664188980366684 -3.7760889151112815,0.9892955271616714,2.78679338794961 -5.382824444124319,0.9887448943241853,4.394079549800134 -3.0234582286158647,0.7198370365500957,2.303621192065769 -3.013935275276351,0.9507067253674646,2.0632285499088865 -3.900725093736766,1.1747522258805798,2.725972867856186 -3.795972516957686,0.5987843972480749,3.197188119709611 -3.6731464768109023,0.6181324447893228,3.0550140320215795 -3.4160501406061305,0.3524584470557827,3.063591693550348 -3.5076008465166226,0.6844230616602283,2.8231777848563944 -3.78889531106399,0.14413275814690465,3.644762552917085 -4.803148896947726,0.7457889202379945,4.057359976709732 -4.643452725194436,0.8193199846360567,3.824132740558379 -3.336770336223837,0.7398196041131416,2.5969507321106953 -3.534356792249745,0.6909905200884519,2.843366272161293 -2.7517794937545488,0.4417669015806611,2.3100125921738877 -2.9433338871171717,0.29273909563323885,2.650594791483933 -3.4129361516482755,0.6060217075911039,2.8069144440571714 -4.687637451159328,0.6422889570110499,4.0453484941482785 -2.5433625389080605,0.4109811086321156,2.1323814302759447 -2.527115309032971,0.5752950975599621,1.951820211473009 -3.447783596771655,0.7339699786214772,2.7138136181501777 -3.1407596748935496,0.40273823819796606,2.7380214366955835 -2.83713474025486,0.40915120242774133,2.4279835378271186 -3.407104574581321,0.26304296074438444,3.1440616138369366 -2.9631148179187368,0.4128286174441443,2.5502862004745923 -2.7193041764314705,0.10818398055598401,2.6111201958754866 -3.62666225561791,0.4905674078773379,3.136094847740572 -3.5013922297642357,0.5312269784022863,2.9701652513619496 -4.482622327630714,0.6181282404578958,3.864494087172818 -3.875162781177636,0.3581627180328405,3.5170000631447955 -2.732231585146141,0.11355759219962902,2.618673992946512 -3.844416303782056,0.6715731955927996,3.172843108189256 -3.7798625096157292,0.5203077009037933,3.259554808711936 -4.969155642234775,0.7723183917356393,4.1968372504991365 -3.1841171949546148,0.5201635011119934,2.6639536938426214 -3.546975334998331,0.8521815003185401,2.694793834679791 -3.726564818438037,0.5519068387744855,3.1746579796635515 -4.071312135594371,0.5609379715353863,3.5103741640589847 -4.056706254086806,0.876653602658345,3.180052651428461 -3.7738597179388327,0.40348286621239704,3.3703768517264354 -3.676143876877025,0.13401522845064073,3.542128648426384 -3.546420093467768,0.028782676313338973,3.517637417154429 -4.7454497518363485,0.755137255673619,3.9903124961627294 -4.337758367314869,0.6203095513534647,3.7174488159614043 -4.805346613892697,0.7040797680992236,4.101266845793473 -4.375246898782492,0.21296416150891073,4.162282737273581 -3.30127059238685,0.13637147558676976,3.16489911680008 -2.9482197045774035,0.014544665667881929,2.9336750389095214 -4.485170545661117,0.350587558806597,4.13458298685452 -5.419118838556791,0.5899176868546331,4.829201151702158 -2.565817918097636,0.3922440450997323,2.173573872997904 -3.664000617875772,0.43747492202372906,3.226525695852043 -4.460786502460156,0.9041586944937484,3.5566278079664073 -4.265013712582255,0.34825546702330035,3.9167582455589547 -4.732137103853917,0.5139894891598108,4.218147614694106 -3.9145284968996665,0.7836530127411431,3.1308754841585236 -2.8680989686184115,0.39654278232127016,2.4715561862971414 -3.640151765227455,0.6220867002278735,3.0180650649995817 -4.495577545765346,0.8623637087467452,3.6332138370186007 -4.7550514728787645,0.9495206236576421,3.8055308492211224 -4.189954761007185,0.14707348092903794,4.042881280078147 -3.8415919339712667,0.9265876251614944,2.915004308809772 -3.2944685397595115,0.4921162930795382,2.8023522466799733 -3.732937674301854,0.25824438829895835,3.474693286002896 -3.4672335858223042,0.4591357562382613,3.008097829584043 -5.05677998154862,0.980032575285477,4.076747406263143 -2.7838417926757346,0.4926180939928696,2.291223698682865 -2.7622590246671255,0.3287516102875082,2.4335074143796174 -3.025419694308955,0.6334008543167258,2.3920188399922293 -4.273681384122438,0.24014561877819307,4.0335357653442445 -3.5256781163582347,0.07586332810866392,3.449814788249571 -3.557585121210263,0.12887972191064923,3.4287053992996137 -3.7975637820446377,0.12804583895777244,3.6695179430868654 -3.3153434857849144,0.15190269351229435,3.16344079227262 -4.272692555219036,0.13882717264941014,4.133865382569626 -4.655615978515096,0.6408747448032146,4.014741233711881 -5.320423236927069,0.18188008439914483,5.138543152527924 -4.140846129161142,0.3456672833238632,3.795178845837279 -3.541314472764257,0.8967884099060118,2.644526062858245 -4.049349733430115,0.47396164026287235,3.575388093167243 -4.094626143446727,0.6675577385210272,3.4270684049257003 -6.65131544064657,0.17231987120162984,6.47899556944494 -3.444994713155788,0.19228901880867078,3.2527056943471173 -4.101697728288644,0.04086861626647886,4.060829112022166 -4.579049481374347,0.16893506307216455,4.410114418302182 -4.868897609204094,0.2785903390319586,4.5903072701721355 -4.178811884516337,0.17701048427674682,4.00180140023959 -4.7424909485097455,0.08870253375705561,4.65378841475269 -4.248211460415375,0.12063587110060081,4.127575589314774 -4.077353136701042,0.46077876803272577,3.6165743686683163 -5.914733663475622,0.2063337184057925,5.7083999450698295 -5.501429502390642,0.36426986104807546,5.137159641342566 -3.5308944469800334,0.5034172708548569,3.0274771761251764 -2.8589412884743504,0.6903948286293653,2.1685464598449853 -2.1791029766300847,0.039312139841098936,2.1397908367889857 -2.3241937965296247,0.7994103989090426,1.524783397620582 -2.7057203850309772,0.6279003894909078,2.0778199955400694 -4.219378488973355,0.08175903194887191,4.137619457024483 -2.539812782292868,0.8735786241067772,1.666234158186091 -2.31290867445308,0.9208724005318132,1.3920362739212666 -2.7227084849144507,0.06107795985486375,2.661630525059587 -3.4132333900941276,0.2768776481472037,3.136355741946924 -2.7898701364777767,0.8062012797930613,1.9836688566847154 -2.99398658295463,0.7482596903836584,2.2457268925709717 -3.115739517490465,0.18452101935637732,2.931218498134088 -2.7374208875439376,0.2093493233367103,2.5280715642072273 -3.7176239467356607,0.370472102791382,3.3471518439442787 -3.997769978686842,0.4845229851910213,3.513246993495821 -6.040973454008404,0.618254771530296,5.422718682478108 -5.886540513081615,0.3689136395697724,5.517626873511842 -4.562133859865462,0.4625347161331479,4.099599143732314 -4.687916323695227,0.7474709381337565,3.9404453855614703 -5.788324707952346,0.0366832028905979,5.751641505061748 -7.094516967233457,0.25243694434402075,6.842080022889436 -4.105369403106122,0.7133495858845524,3.39201981722157 -4.7482721749077665,0.8952068376871993,3.853065337220567 -5.7856127441417,0.5116774421156661,5.273935302026033 -5.185860093394901,0.5321134852653157,4.653746608129585 -5.465390472912875,0.10717201133977605,5.358218461573099 -6.238545873332988,0.44741236682345464,5.791133506509533 -5.024884387492808,0.5326172664550232,4.492267121037784 -3.884810902460437,0.24247050363472966,3.6423403988257075 -5.753259711609921,0.26924323094938096,5.48401648066054 -5.628975996476029,0.37728416310462265,5.251691833371407 -4.023462142086585,0.02007119777772637,4.003390944308858 -3.247140826622671,0.3220791655831783,2.925061661039493 -2.897764466599304,0.21144800699654465,2.6863164596027596 -4.01512018757607,0.32749735217791465,3.6876228353981553 -3.2061007027280075,0.11976213181925122,3.0863385709087563 -5.428920079651427,0.890527280739895,4.5383927989115325 -3.2772910453560398,0.5935924535540487,2.683698591801991 -3.094387604272433,0.6791023191444896,2.4152852851279434 -3.244893609701426,0.7891712386073383,2.4557223710940876 -4.1260179349411015,0.4984421989290573,3.6275757360120444 -3.2080285624252163,0.08692028808742369,3.1211082743377925 -3.708758764162887,0.5371065418185478,3.1716522223443393 -4.050859652389088,0.5868411180208791,3.4640185343682086 -3.7601037513845155,0.74543947418433,3.0146642772001853 -5.009946636761818,0.4316595462296794,4.578287090532139 -4.746204117276445,0.12758030279556376,4.618623814480881 -3.9060444252635125,0.28377590579872447,3.622268519464788 -2.9502769101093316,0.3630822963986351,2.5871946137106967 -3.2426417341353346,0.6459172413316012,2.5967244928037334 -4.1064585003350835,0.5707783046689119,3.5356801956661714 -3.504563331431983,0.35609672589784624,3.1484666055341366 -5.007054160069279,0.9865152487929797,4.020538911276299 -2.226802627102628,0.6057748193568872,1.6210278077457412 -3.5875377222799236,0.23722679173599448,3.3503109305439294 -2.791750668497269,0.10178247262040374,2.6899681958768653 -3.8744770629795027,0.15285913918433203,3.7216179237951708 -3.761779365179259,0.24595772838450813,3.515821636794751 -4.3671919768597665,0.16068137325955567,4.2065106036002105 -3.3743909710549276,0.18656702405130576,3.187823947003622 -3.3127958838254634,0.2850951686938471,3.0277007151316164 -4.416388101786428,0.17337359529475482,4.243014506491673 -4.479188872596241,0.8967654246264252,3.5824234479698163 -3.6933636402407726,0.08023374566164221,3.6131298945791306 -3.8286159143304506,0.5245113895702547,3.304104524760196 -3.0190119363311365,0.4103968269896615,2.608615109341475 -2.9425420807082268,0.9823786169086064,1.9601634637996204 -3.724999224794485,0.11203890216805235,3.612960322626433 -4.393703461961863,0.39785559904574164,3.995847862916121 -1.8075441763008482,0.9694704332753689,0.8380737430254793 -2.8831249103695447,0.8655071258939803,2.0176177844755645 -3.3784176821406398,0.8170720709492799,2.56134561119136 -3.405006888788806,0.25790282704493983,3.147104061743866 -3.3070155818350604,0.17088758739006582,3.1361279944449945 -3.1148303281054814,0.668643219924431,2.4461871081810505 -2.657400773234067,0.9293759891275858,1.728024784106481 -2.504430668664107,0.5567628930139298,1.947667775650177 -3.393376589837642,0.5716126894698998,2.821763900367742 -3.894495300982204,0.2799790936602842,3.6145162073219197 -4.270449659939358,0.7694929331919369,3.5009567267474218 -3.453903404299285,0.18704374855752337,3.2668596557417615 -3.5236298108735706,0.3236792364042437,3.199950574469327 -4.101457407027369,0.4254364386164168,3.676020968410952 -4.1541072409139455,0.507610378684455,3.6464968622294904 -5.935007328832963,0.24240973241508024,5.692597596417883 -3.011041693688358,0.11483682473920354,2.8962048689491544 -4.050335064157297,0.6106200424416326,3.4397150217156645 -4.470607011183457,0.28863055324025577,4.181976457943201 -3.696654450630967,0.5812382214226123,3.1154162292083547 -4.541517254884805,0.1543627152742023,4.387154539610602 -4.609956136669925,0.4811401018548175,4.128816034815107 -4.143278257027482,0.5325894325515859,3.6106888244758966 -4.0743751666375125,0.05182353682242691,4.022551629815085 -5.2786860435701035,0.3366042781939206,4.942081765376183 -5.494044763404392,0.13441467693897424,5.359630086465417 -5.417403289731059,0.06337497047276774,5.354028319258291 -4.0066799559902515,0.9899602323899452,3.016719723600306 -3.647925324089788,0.32235384497472297,3.325571479115065 -3.405108572855658,0.8098744458546349,2.595234127001023 -4.331143857852142,0.25464065476376385,4.076503203088379 -5.997493454301411,0.6815027222239293,5.3159907320774815 -3.8098724721623953,0.7602278598896866,3.0496446122727088 -4.0624968596847495,0.5956387406078443,3.466858119076905 -4.247324123253859,0.47157618855015837,3.775747934703701 -4.583620796274766,0.41184091414726853,4.171779882127497 -4.581337276008597,0.3488682665429953,4.232469009465602 -4.102534010263541,0.9295291442478258,3.173004866015715 -4.272431479480933,0.8306194077877291,3.441812071693204 -4.152526514201796,0.9650269106665126,3.187499603535283 -5.44910071000015,0.12429722348554473,5.324803486514605 -5.656009760802475,0.7308674752036443,4.925142285598831 -3.968751890116349,0.9383404568210378,3.0304114332953116 -3.5022682867751964,0.18123306616566015,3.321035220609536 -3.1215784383571807,0.0664962673667775,3.0550821709904032 -4.032380636238438,0.741120649290059,3.2912599869483787 -3.6838341011193187,0.5744731131799119,3.109360987939407 -5.227986062256077,0.8418287767582721,4.386157285497805 -3.0437601661273894,0.1397723766262895,2.9039877895011 -3.1864272389735757,0.7952673118598902,2.3911599271136854 -2.7066134532672974,0.20162732004774453,2.504986133219553 -4.079359324364404,0.1636559428657045,3.9157033814986995 -3.546742168070164,0.16426579793099294,3.3824763701391714 -4.413254740922689,0.8145747202313821,3.5986800206913068 -3.599675789746499,0.6651972206962001,2.934478569050299 -3.549357355441469,0.5230654247691193,3.0262919306723495 -4.417149939538114,0.3588304841235025,4.058319455414612 -4.268456694760429,0.8772005408131083,3.391256153947321 -4.293265172954261,0.3924451074226354,3.900820065531625 -4.152520847366211,0.8165994394715771,3.335921407894634 -2.798255551826119,0.4391349085702184,2.359120643255901 -3.063019997803726,0.3769444294249076,2.6860755683788184 -4.124604180685198,0.46267978566960644,3.6619243950155917 -5.253003193204011,0.3013778741641421,4.951625319039868 -3.2983451132061163,0.7476093801762511,2.5507357330298652 -3.1939089042566797,0.5027203900924792,2.6911885141642005 -4.040051914600392,0.2322126951468173,3.807839219453575 -4.069661629199208,0.8995745732745685,3.1700870559246397 -3.897738123944754,0.3838912213732114,3.5138469025715424 -3.895254395475186,0.5435528611139886,3.3517015343611973 -3.5056197389224235,0.9064721109645469,2.5991476279578767 -3.510219677289319,0.6242379959139921,2.885981681375327 -4.480077955987175,0.11689804070836407,4.3631799152788115 -4.631161494773433,0.9398321236134751,3.6913293711599584 -5.689934695098975,0.6277080530714179,5.062226642027557 -5.25951998983163,0.3349056146570861,4.924614375174544 -4.125705664075235,0.13927207266338726,3.9864335914118474 -3.8848351659938336,0.794025189270296,3.0908099767235377 -5.137719912015215,0.6200727559285135,4.517647156086701 -6.961197183177653,0.5334610919763215,6.427736091201331 -3.5846987347218167,0.8938925830509576,2.6908061516708592 -4.531546485256404,0.7885972112245307,3.742949274031873 -5.493385220266235,0.15167487973275118,5.341710340533484 -4.736892972349524,0.31172206779554823,4.425170904553975 -5.1462166935729305,0.24848913981446574,4.897727553758465 -4.736208124445718,0.743946292572677,3.992261831873041 -4.707387881821839,0.03353243473577938,4.67385544708606 -4.748130699824885,0.5698896848713165,4.178241014953568 -6.291802545085838,0.7624586857406905,5.529343859345148 -6.565955820567193,0.8767656367617495,5.689190183805443 +1.3841426995674244,0.0,1.3841426995674244 +0.9317131259126159,0.0,0.9317131259126159 +0.9493987145207906,0.0,0.9493987145207906 +0.9358846495188261,0.0,0.9358846495188261 +0.6825387199829023,0.0,0.6825387199829023 +0.1894721583937541,0.0,0.1894721583937541 +0.650803071599901,0.0,0.650803071599901 +0.28016013642645926,0.0,0.28016013642645926 +1.200097532739648,0.0,1.200097532739648 +0.47542590909416294,0.0,0.47542590909416294 +1.0598821706504924,0.0,1.0598821706504924 +0.6104512662770104,0.0,0.6104512662770104 +0.5747609292601418,0.0,0.5747609292601418 +0.6177334406503692,0.0,0.6177334406503692 +0.5693353315284378,0.0,0.5693353315284378 +0.44500005576030155,0.0,0.44500005576030155