From 48ada9fb3b0053c4aa6d6f1bb801d236ce3e9462 Mon Sep 17 00:00:00 2001 From: ZHG Date: Tue, 2 Jul 2019 15:37:58 +0200 Subject: [PATCH 01/21] Added hybrid method and fixed the bug for checking result but with hybrid fails with 2 processes meanwhile it works with number of processes other than 2 --- linbox/solutions/solve.h | 24 +++++++++++++----------- linbox/solutions/solve/solve-cra.h | 5 +++++ tests/test-solve-full.C | 20 +++++++++++++++++--- 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/linbox/solutions/solve.h b/linbox/solutions/solve.h index e827167c65..709086a248 100644 --- a/linbox/solutions/solve.h +++ b/linbox/solutions/solve.h @@ -169,17 +169,19 @@ namespace LinBox { Element xDen; solve(xNum, xDen, A, b, tag, m); - - // The denominator being zero means computation failure - if (b.field().isZero(xDen)) { - throw LinboxError("Rational solve failed."); - } - - // Copy result back to RatVector - auto iXNum = xNum.begin(); - for (auto iX = x.begin(); iX != x.end(); ++iX) { - *iX = typename RatVector::value_type(*iXNum, xDen); - ++iXNum; + + if (m.master()) { + // The denominator being zero means computation failure + if (b.field().isZero(xDen)) { + throw LinboxError("Rational solve failed."); + } + + // Copy result back to RatVector + auto iXNum = xNum.begin(); + for (auto iX = x.begin(); iX != x.end(); ++iX) { + *iX = typename RatVector::value_type(*iXNum, xDen); + ++iXNum; + } } return x; diff --git a/linbox/solutions/solve/solve-cra.h b/linbox/solutions/solve/solve-cra.h index 2141b39dbd..2588211d17 100644 --- a/linbox/solutions/solve/solve-cra.h +++ b/linbox/solutions/solve/solve-cra.h @@ -33,6 +33,7 @@ #pragma once #include +#include #include #include #include @@ -179,6 +180,10 @@ namespace LinBox { LinBox::ChineseRemainderDistributed cra(hadamardLogBound, m.pCommunicator); cra(num, den, iteration, primeGenerator); } + else if (dispatch == Dispatch::Combined) { + LinBox::HybridChineseRemainder cra(hadamardLogBound, m.pCommunicator); + cra(num, den, iteration, primeGenerator); + } #endif else { throw LinBox::NotImplementedYet("Integer CRA Solve with specified dispatch type is not implemented yet."); diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 1739664c66..67accd8016 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -97,6 +97,10 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul { ResultVector RAx(RA.field(), Rb.size()); RA.apply(RAx, x); + + std::cout << "RA " << RA << std::endl; + std::cout << "Rb " << Rb << std::endl; + std::cout << "x " << x << std::endl; VectorDomain VD(RA.field()); if (!VD.areEqual(RAx, Rb)) { @@ -112,7 +116,7 @@ bool test_solve(const SolveMethod& method, Matrix& A, Vector& b, ResultDomain& R { using ResultVector = DenseVector; - if (verbose) { + if (verbose && method.master()) { std::cout << "--- Testing " << SolveMethod::name() << " on " << type_to_string(A) << " over "; A.field().write(std::cout) << " of size " << A.rowdim() << "x" << A.coldim() << std::endl; } @@ -141,10 +145,14 @@ bool test_solve(const SolveMethod& method, Matrix& A, Vector& b, ResultDomain& R bool ok = true; try { solve(x, A, b, method); - ok = ok && check_result(x, A, b, RA, Rb); + if (method.master()) { + ok = ok && check_result(x, A, b, RA, Rb); + } solveInPlace(x, A, b, method); - ok = ok && check_result(x, A, b, RA, Rb); + if (method.master()) { + ok = ok && check_result(x, A, b, RA, Rb); + } } catch (...) { print_error(x, A, b, "throws error"); return false; @@ -235,6 +243,8 @@ int main(int argc, char** argv) method.dispatch = Dispatch::Auto; if (dispatchString == "Distributed") method.dispatch = Dispatch::Distributed; + else if (dispatchString == "Combined") + method.dispatch = Dispatch::Combined; else if (dispatchString == "Sequential") method.dispatch = Dispatch::Sequential; else if (dispatchString == "SMP") @@ -263,6 +273,7 @@ int main(int argc, char** argv) bool ok = true; do { // ----- Rational Auto + #if 0 ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // @fixme Dixon does not compile @@ -274,7 +285,9 @@ int main(int argc, char** argv) // ----- Rational CRA // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails + #endif ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); + #if 0 ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); @@ -347,6 +360,7 @@ int main(int argc, char** argv) // ok = ok && test_dense_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); // ok = ok && test_sparse_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); // ok = ok && test_blackbox_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); + #endif if (!ok) { std::cerr << "Failed with seed: " << seed << std::endl; From c315e0d9dd12a7e806588f79170200d2349845ba Mon Sep 17 00:00:00 2001 From: ZHG Date: Wed, 3 Jul 2019 10:53:30 +0200 Subject: [PATCH 02/21] Added hybrid methwith openmp ad afixed the bug for 2 processes --- linbox/algorithms/cra-hybrid.h | 388 +++++++++++++++++++++++++++++++++ linbox/solutions/solve.h | 3 +- tests/test-solve-full.C | 12 +- 3 files changed, 393 insertions(+), 10 deletions(-) create mode 100644 linbox/algorithms/cra-hybrid.h diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h new file mode 100644 index 0000000000..fc83d801e0 --- /dev/null +++ b/linbox/algorithms/cra-hybrid.h @@ -0,0 +1,388 @@ +/* Copyright (C) 2007 LinBox + * Updated by Hongguang ZHU + * Written by bds and zw + * author: B. David Saunders and Zhendong Wan + * parallelized for BOINC computing by Bryan Youse + * + * + * ========LICENCE======== + * This file is part of the library LinBox. + * + * LinBox is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * ========LICENCE======== + */ + + +#pragma once + +#include +#include +#include + +#include "linbox/algorithms/cra-domain.h" +#include "linbox/algorithms/rational-cra.h" +#include "linbox/algorithms/rational-cra-var-prec.h" +#include "linbox/integer.h" +#include "linbox/randiter/random-prime.h" +#include "linbox/solutions/methods.h" +#include "linbox/util/mpicpp.h" +#include "linbox/util/timer.h" + + +#include +#include "linbox/randiter/random-prime.h" + +#include "linbox/algorithms/cra-domain-omp.h" + +#if defined(__LINBOX_HAVE_MPI) +namespace LinBox +{ + + /** + * @fixme This hybrid CRA does no work when lauched with mpirun -np 2. + * This might have been designed so that does not work, but if so, find out why and + * and a comment here. + */ + template + struct HybridChineseRemainder { + typedef typename CRABase::Domain Domain; + typedef typename CRABase::DomainElement DomainElement; + protected: + CRABase Builder_; + Communicator* _commPtr; + unsigned int _numprocs; + double HB;//hadamard bound + + public: + template + HybridChineseRemainder(const Param& b, Communicator *c) : + Builder_(b), _commPtr(c), _numprocs(c->size()) + , HB(b)//Init with hadamard bound + {} + + int getNiter(){ + return std::ceil(1.442695040889*HB/(double)(LinBox::MaskedPrimeIterator(0,_commPtr->size()).getBits()-1)); + } + + /** \brief The CRA loop. + * + * termination condition. + * + * \param Iteration Function object of two arguments, \c + * Iteration(r, p), given prime \c p it outputs residue(s) \c + * r. This loop may be parallelized. \p Iteration must be + * reentrant, thread safe. For example, \p Iteration may be + * returning the coefficients of the minimal polynomial of a + * matrix \c mod \p p. + @warning we won't detect bad primes. + * + * \param primeg RandIter object for generating primes. + * \param[out] res an integer + */ + template + Integer & operator() (Integer& res, Function& Iteration, PrimeIterator& primeg) + { + // defer to standard CRA loop if no parallel usage is desired + if(_commPtr == 0 || _commPtr->size() == 1) { + ChineseRemainder< CRABase > sequential(Builder_); + return sequential(res, Iteration, primeg); + } + + para_compute(res, Iteration, primeg); + if(_commPtr->rank() == 0){ + return Builder_.result(res); + } + else{ + return res; + } + } + + template + Integer & operator() (Integer& num, Integer& den, Function& Iteration, PrimeIterator& primeg) + { + + // defer to standard CRA loop if no parallel usage is desired + if(_commPtr == 0 || _commPtr->size() == 1) { + RationalChineseRemainder< CRABase > sequential(Builder_); + return sequential(num, den, Iteration, primeg); + } + para_compute(num, Iteration, primeg); + if(_commPtr->rank() == 0){ + return Builder_.result(num,den); + } + else{ + return num; + } + } + + + template + Vect & operator() (Vect& num, Integer& den, Function& Iteration, PrimeIterator& primeg) + { + // if there is no communicator or if there is only one process, + // then proceed normally (without parallel) + if(_commPtr == 0 || _commPtr->size() == 1) { + + RationalChineseRemainder< CRABase > sequential(Builder_); + return sequential(num, den, Iteration, primeg); + + } + para_compute(num, Iteration, primeg); + + if(_commPtr->rank() == 0){ + return Builder_.result(num,den); + } + else{ + return num; + } + } + + template + Vect & operator() (Vect& num, Function& Iteration, PrimeIterator& primeg) + { + // if there is no communicator or if there is only one process, + // then proceed normally (without parallel) + if(_commPtr == 0 || _commPtr->size() == 1) { + + ChineseRemainder< CRABase > sequential(Builder_); + return sequential(num, Iteration, primeg); + + } + para_compute(num, Iteration, primeg); + if(_commPtr->rank() == 0){ + return Builder_.result(num); + } + else{ + return num; + } + } + + + template< class Function, class Domain, class ElementContainer> + void solve_with_prime(int m_primeiter, + Function& Iteration, std::vector& VECTORdomains, + ElementContainer& VECTORresidues + ) + { + + VECTORdomains[ omp_get_thread_num()] = Domain(m_primeiter); + + Iteration(VECTORresidues, VECTORdomains[ omp_get_thread_num()] + + ); + + VECTORresidues.push_back(m_primeiter); + + } + + + template + void compute_task(pFunc& pF, std::vector& m_primeiters, + Function& Iteration, std::vector& VECTORdomains, + std::vector& VECTORresidues, size_t Ntask) + { + + int Nthread = Ntask; + +#pragma omp parallel +#pragma omp single + Nthread=omp_get_num_threads(); +//TODO: Replace the OMP directives with Paladin +#pragma omp parallel for num_threads(Nthread) schedule(dynamic,1) + for(auto j=0u;j + void worker_process_task(Function& Iteration, Vect &r) + { + int Ntask=0; + //LinBox::MaskedPrimeIterator gen(_commPtr->rank(),_commPtr->size()); + LinBox::MaskedPrimeIterator gen(_commPtr->rank(),_commPtr->size()); + ++gen; + _commPtr->recv(Ntask, 0); + + if(Ntask!=0){ + std::unordered_set prime_used; + + size_t Nthread = Ntask; +#pragma omp parallel +{ +#pragma omp single + Nthread=omp_get_num_threads(); +} + + std::vector> VECTORresidues;VECTORresidues.resize(Ntask); + std::vector VECTORdomains;VECTORdomains.resize(Nthread); + std::vector m_primeiters;m_primeiters.reserve(Ntask); + + for(auto j=0;jBuilder_.noncoprime(*gen) ) + ++gen; + m_primeiters.push_back(*gen); + + } + + compute_task( (this->Builder_), m_primeiters, Iteration, VECTORdomains, VECTORresidues, Ntask); + + for(long i=0; isend(VECTORresidues[i].begin(), VECTORresidues[i].end(), 0, 0); + } + + + }; + + } + + template + void para_compute( Vect& num, Function& Iteration, PrimeIterator& primeg) + { + + Domain D(*primeg); + BlasVector r(D); + +//double starttime; +//double endtime; + + // parent propcess + if(_commPtr->rank() == 0){ + + master_process_task(Iteration, D, r); + + } + // child process + else{ + +//starttime = omp_get_wtime(); + worker_process_task(Iteration, r); +//endtime = MPI_Wtime(); +//std::cout<<" process("<<_commPtr->rank()<<") used total CPU time (seconds): " << endtime-starttime< + void master_recv_residues(Vect &r, int &pp, int &Nrecv) + { + r.resize (r.size()+1); + + //receive the beginnin and end of a vector in heapspace + _commPtr->recv(r.begin(), r.end(), MPI_ANY_SOURCE, 0); + + //Update the number of iterations for the next step + Nrecv--; + + //Store the corresponding prime number + pp = r[r.size()-1]; + + //Restructure the vector without added prime number + r.resize (r.size()-1); + + } + + template + void master_compute(Vect &r) + { + + int pp; + +#ifdef __Detailed_Time_Measurement + Timer chrono; +#endif + int Nrecv=this->getNiter(); + + while(Nrecv > 0 ){ + + master_recv_residues(r, pp, Nrecv); + + Domain D(pp); + +#ifdef __Detailed_Time_Measurement + chrono.start(); +#endif + Builder_.progress(D, r); +#ifdef __Detailed_Time_Measurement + chrono.stop(); + std::cout<<"Builder_.progress(D, r) in the manager process used CPU time (seconds): " < + void master_process_task(Function& Iteration, Domain &D, Vect &r) + { + int vNtask_per_proc[_commPtr->size() - 1]; + + master_init(vNtask_per_proc, Iteration, D, r); + + master_compute(r); + + } + + template + void master_init(int *vNtask_per_proc, Function& Iteration, Domain &D, Vect &r) + { + int procs = _commPtr->size(); + + int Niter=this->getNiter(); + + //Compute nb of tasks ought to be realized for each process + if(Niter<(procs-1)){ + + for(long i=1; isend(vNtask_per_proc[i - 1], i); + } + for(long i=Niter+1; isend(vNtask_per_proc[i - 1], i); + } + + }else{ + for(long i=1; isend(vNtask_per_proc[i - 1], i); + } + for(long i=Niter%(procs-1)+1; isend(vNtask_per_proc[i - 1], i); + } + } + + //Initialize the buider and the receiver vector r + Builder_.initialize( D, Iteration(r, D) ); + } + + }; + +} + +#endif + +// Local Variables: +// mode: C++ +// tab-width: 4 +// indent-tabs-mode: nil +// c-basic-offset: 4 +// End: +// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s diff --git a/linbox/solutions/solve.h b/linbox/solutions/solve.h index 709086a248..28ff6f5f89 100644 --- a/linbox/solutions/solve.h +++ b/linbox/solutions/solve.h @@ -169,13 +169,12 @@ namespace LinBox { Element xDen; solve(xNum, xDen, A, b, tag, m); - + if (m.master()) { // The denominator being zero means computation failure if (b.field().isZero(xDen)) { throw LinboxError("Rational solve failed."); } - // Copy result back to RatVector auto iXNum = xNum.begin(); for (auto iX = x.begin(); iX != x.end(); ++iX) { diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 67accd8016..95be521ffc 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -97,10 +97,10 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul { ResultVector RAx(RA.field(), Rb.size()); RA.apply(RAx, x); - - std::cout << "RA " << RA << std::endl; - std::cout << "Rb " << Rb << std::endl; - std::cout << "x " << x << std::endl; + + //std::cout << "RA " << RA << std::endl; + //std::cout << "Rb " << Rb << std::endl; + //std::cout << "x " << x << std::endl; VectorDomain VD(RA.field()); if (!VD.areEqual(RAx, Rb)) { @@ -273,7 +273,6 @@ int main(int argc, char** argv) bool ok = true; do { // ----- Rational Auto - #if 0 ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // @fixme Dixon does not compile @@ -285,9 +284,7 @@ int main(int argc, char** argv) // ----- Rational CRA // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails - #endif ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); - #if 0 ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); @@ -360,7 +357,6 @@ int main(int argc, char** argv) // ok = ok && test_dense_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); // ok = ok && test_sparse_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); // ok = ok && test_blackbox_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); - #endif if (!ok) { std::cerr << "Failed with seed: " << seed << std::endl; From bacc3b57e01443a439db1abdb75af76f96851e9e Mon Sep 17 00:00:00 2001 From: ZHG Date: Wed, 3 Jul 2019 14:21:03 +0200 Subject: [PATCH 03/21] Tested hybrid after having merged with master branch --- benchmarks/benchmark-dense-solve.C | 1 + linbox/algorithms/cra-hybrid.h | 49 ++++++++++++++++++++---------- tests/test-solve-full.C | 8 +++-- 3 files changed, 39 insertions(+), 19 deletions(-) diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index 504cb69e06..329458a42a 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -164,6 +164,7 @@ int main(int argc, char** argv) if (args.dispatchString == "Sequential") method.dispatch = Dispatch::Sequential; else if (args.dispatchString == "SMP") method.dispatch = Dispatch::SMP; else if (args.dispatchString == "Distributed") method.dispatch = Dispatch::Distributed; + else if (args.dispatchString == "Combined") method.dispatch = Dispatch::Combined; else method.dispatch = Dispatch::Auto; // Real benchmark diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index fc83d801e0..fa1eba9d4b 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -50,11 +50,6 @@ namespace LinBox { - /** - * @fixme This hybrid CRA does no work when lauched with mpirun -np 2. - * This might have been designed so that does not work, but if so, find out why and - * and a comment here. - */ template struct HybridChineseRemainder { typedef typename CRABase::Domain Domain; @@ -176,12 +171,11 @@ namespace LinBox ElementContainer& VECTORresidues ) { - +//std::cout<<"Thread("<& VECTORresidues, size_t Ntask) { - int Nthread = Ntask; +#if 1 + int Nthread = Ntask; #pragma omp parallel -#pragma omp single +{ Nthread=omp_get_num_threads(); -//TODO: Replace the OMP directives with Paladin -#pragma omp parallel for num_threads(Nthread) schedule(dynamic,1) + omp_set_num_threads(Nthread); +#pragma omp for //num_threads(Nthread) schedule(dynamic,1) for(auto j=0u;j Date: Thu, 4 Jul 2019 09:22:28 +0200 Subject: [PATCH 04/21] Instant clean up --- linbox/algorithms/cra-hybrid.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index fa1eba9d4b..a74283904e 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -276,8 +276,10 @@ namespace LinBox Domain D(*primeg); BlasVector r(D); -//double starttime; -//double endtime; +#ifdef __Detailed_Time_Measurement + double starttime; + double endtime; +#endif // parent propcess if(_commPtr->rank() == 0){ @@ -287,11 +289,14 @@ namespace LinBox } // child process else{ - -//starttime = omp_get_wtime(); +#ifdef __Detailed_Time_Measurement + chrono.start(); +#endif worker_process_task(Iteration, r); -//endtime = MPI_Wtime(); -//std::cout<<" process("<<_commPtr->rank()<<") used total CPU time (seconds): " << endtime-starttime<rank()<<") used total CPU time (seconds): " << endtime-starttime< Date: Thu, 4 Jul 2019 11:54:29 +0200 Subject: [PATCH 05/21] cleanup unused param --- linbox/algorithms/cra-hybrid.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index a74283904e..e165dae663 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -167,15 +167,16 @@ namespace LinBox template< class Function, class Domain, class ElementContainer> void solve_with_prime(int m_primeiter, - Function& Iteration, std::vector& VECTORdomains, + Function& Iteration, ElementContainer& VECTORresidues ) { //std::cout<<"Thread("< void compute_task(pFunc& pF, std::vector& m_primeiters, - Function& Iteration, std::vector& VECTORdomains, + Function& Iteration, std::vector& VECTORresidues, size_t Ntask) { @@ -198,7 +199,7 @@ namespace LinBox #pragma omp for //num_threads(Nthread) schedule(dynamic,1) for(auto j=0u;j> VECTORresidues;VECTORresidues.resize(Ntask); - std::vector VECTORdomains;VECTORdomains.resize(Nthread); std::vector m_primeiters;m_primeiters.reserve(Ntask); for(auto j=0;jBuilder_), m_primeiters, Iteration, VECTORdomains, VECTORresidues, Ntask); + compute_task( (this->Builder_), m_primeiters, Iteration, VECTORresidues, Ntask); for(long i=0; isend(VECTORresidues[i].begin(), VECTORresidues[i].end(), 0, 0); From 4dccfc3c6adbb0660fb7f304a780e0af60c946eb Mon Sep 17 00:00:00 2001 From: ZHG Date: Thu, 4 Jul 2019 17:11:24 +0200 Subject: [PATCH 06/21] Taken off critical section once compiled with commentator disabled and no more segmentation fault but still not yet clean enough for PR --- linbox/algorithms/cra-hybrid.h | 37 ++++++---------------------------- tests/test-solve-full.C | 4 ++-- 2 files changed, 8 insertions(+), 33 deletions(-) diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index a74283904e..ca4cd91d9b 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -171,14 +171,11 @@ namespace LinBox ElementContainer& VECTORresidues ) { -//std::cout<<"Thread("<& VECTORresidues, size_t Ntask) { - -#if 1 - int Nthread = Ntask; -#pragma omp parallel -{ - Nthread=omp_get_num_threads(); - omp_set_num_threads(Nthread); -#pragma omp for //num_threads(Nthread) schedule(dynamic,1) - for(auto j=0u;j Date: Thu, 4 Jul 2019 17:50:00 +0200 Subject: [PATCH 07/21] Taken off critical section once compiled with commentator disabled and cleaned up --- linbox/algorithms/cra-hybrid.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index 921ceb5521..8590451e18 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -1,8 +1,4 @@ /* Copyright (C) 2007 LinBox - * Updated by Hongguang ZHU - * Written by bds and zw - * author: B. David Saunders and Zhendong Wan - * parallelized for BOINC computing by Bryan Youse * * * ========LICENCE======== @@ -174,7 +170,6 @@ namespace LinBox Domain D(m_primeiter); //@fixme: The commentator within the following function call to other functions will crash if not disable the commentator while compiling Iteration(VECTORresidues, D ); - VECTORresidues.push_back(m_primeiter); } @@ -192,7 +187,7 @@ namespace LinBox TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues)),{ for(auto j=iter.begin(); j!=iter.end(); ++j) { - solve_with_prime(m_primeiters[j], Iteration, VECTORresidues[j]); + solve_with_prime(m_primeiters[j], Iteration, VECTORresidues[j]); } }) }); From 012bb285be6831fa17a49d33af75e0e70deaf10e Mon Sep 17 00:00:00 2001 From: ZHG Date: Fri, 5 Jul 2019 10:52:09 +0200 Subject: [PATCH 08/21] Cleaned up for validation/benchmark on server --- linbox/algorithms/cra-hybrid.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index 8590451e18..c2bc8ce6c9 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -20,7 +20,6 @@ * ========LICENCE======== */ - #pragma once #include @@ -36,7 +35,6 @@ #include "linbox/util/mpicpp.h" #include "linbox/util/timer.h" - #include #include "linbox/randiter/random-prime.h" @@ -45,7 +43,13 @@ #if defined(__LINBOX_HAVE_MPI) namespace LinBox { - + /* + * The MPI standard imposes that if the following program is launched with only 2 processes, multithreading + * will not be available. + * Multithreading is only active if launched with more than 2 processes if each process is mapped to one node. + * To use multithreading, user needs to set the environment useing export for the number of cores/threads on + * each node if each process is mapped to a node + */ template struct HybridChineseRemainder { typedef typename CRABase::Domain Domain; @@ -168,7 +172,7 @@ namespace LinBox ) { Domain D(m_primeiter); - //@fixme: The commentator within the following function call to other functions will crash if not disable the commentator while compiling + //@fixme: The commentator within the following function's function call hierarchy will raise a crash if the whole program is not compiled with all commentator disabled Iteration(VECTORresidues, D ); VECTORresidues.push_back(m_primeiter); } From 794f93cd9b5de5e350f99d0a1f6609aa4e807e23 Mon Sep 17 00:00:00 2001 From: ZHG Date: Wed, 10 Jul 2019 13:44:35 +0200 Subject: [PATCH 09/21] Use MPI_Wtime for multiprocessing timing --- benchmarks/benchmark-dense-solve.C | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index af030346b8..9c1f1a3072 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -84,8 +84,12 @@ void benchmark(std::array& timebits, Arguments& args, MethodBase& met DenseMatrix A(F, args.n, args.n); DenseVector B(F, A.rowdim()); Timer chrono; - + double t1=0.0,t2=0.0; if (method.master()) { + + if(args.dispatchString == "Distributed" || args.dispatchString == "Combined"){ + t1 = MPI_Wtime(); + } chrono.start(); PAR_BLOCK { FFLAS::pfrand(F, randIter, args.n, args.n, A.getPointer(), args.n); } chrono.stop(); @@ -129,7 +133,10 @@ void benchmark(std::array& timebits, Arguments& args, MethodBase& met chrono.stop(); timebits[0] = chrono.usertime(); - timebits[1] = chrono.realtime(); + if(args.dispatchString == "Distributed" || args.dispatchString == "Combined"){ + t2 = MPI_Wtime(); + timebits[1] = t2 - t1; + }else{ timebits[1] = chrono.realtime(); } setBitsize(timebits[2], args.q, X); } } @@ -209,4 +216,4 @@ int main(int argc, char** argv) } return 0; -} \ No newline at end of file +} From a849b2cea589ab04b1859638a8f58add61508d0a Mon Sep 17 00:00:00 2001 From: ZHG Date: Fri, 12 Jul 2019 09:06:49 +0200 Subject: [PATCH 10/21] Fall back to previous timing method as no big differece --- benchmarks/benchmark-dense-solve.C | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index 9c1f1a3072..3e1f32ecc3 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -84,12 +84,9 @@ void benchmark(std::array& timebits, Arguments& args, MethodBase& met DenseMatrix A(F, args.n, args.n); DenseVector B(F, A.rowdim()); Timer chrono; - double t1=0.0,t2=0.0; + if (method.master()) { - if(args.dispatchString == "Distributed" || args.dispatchString == "Combined"){ - t1 = MPI_Wtime(); - } chrono.start(); PAR_BLOCK { FFLAS::pfrand(F, randIter, args.n, args.n, A.getPointer(), args.n); } chrono.stop(); @@ -133,10 +130,7 @@ void benchmark(std::array& timebits, Arguments& args, MethodBase& met chrono.stop(); timebits[0] = chrono.usertime(); - if(args.dispatchString == "Distributed" || args.dispatchString == "Combined"){ - t2 = MPI_Wtime(); - timebits[1] = t2 - t1; - }else{ timebits[1] = chrono.realtime(); } + timebits[1] = chrono.realtime(); setBitsize(timebits[2], args.q, X); } } From 124f947fa22ef3cdfbbb857c244ba4b5221b754f Mon Sep 17 00:00:00 2001 From: ZHG Date: Thu, 18 Jul 2019 14:40:54 +0200 Subject: [PATCH 11/21] cleaned up before PR --- linbox/algorithms/cra-hybrid.h | 42 +++++++++++++++++----------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index c2bc8ce6c9..072b12cca0 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -183,20 +183,20 @@ namespace LinBox Function& Iteration, std::vector& VECTORresidues, size_t Ntask) { - - PAR_BLOCK{ - auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); - SYNCH_GROUP({ - FORBLOCK1D(iter, Ntask, sp,{ - TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues)),{ - for(auto j=iter.begin(); j!=iter.end(); ++j) - { - solve_with_prime(m_primeiters[j], Iteration, VECTORresidues[j]); - } - }) - }); - }); - } + //@fixme: cannot use export OMP_NUM_THREADS to set the desired number of threads for more than one iterations and only omp_set_num_thread() can be used for this purpose, whick takes the option -t + PAR_BLOCK{ + auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); + SYNCH_GROUP({ + FORBLOCK1D(iter, Ntask, sp,{ + TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues)),{ + for(auto j=iter.begin(); j!=iter.end(); ++j) + { + solve_with_prime(m_primeiters[j], Iteration, VECTORresidues[j]); + } + }) + }); + }); + } } @@ -205,26 +205,26 @@ namespace LinBox void worker_process_task(Function& Iteration, Vect &r) { int Ntask=0; - //LinBox::MaskedPrimeIterator gen(_commPtr->rank(),_commPtr->size()); - LinBox::MaskedPrimeIterator gen(_commPtr->rank(),_commPtr->size()); + LinBox::MaskedPrimeIterator gen(_commPtr->rank(),_commPtr->size()); + //LinBox::MaskedPrimeIterator gen(_commPtr->rank(),_commPtr->size()); ++gen; _commPtr->recv(Ntask, 0); if(Ntask!=0){ std::unordered_set prime_used; - std::vector> VECTORresidues;VECTORresidues.resize(Ntask); - std::vector m_primeiters;m_primeiters.reserve(Ntask); + std::vector> VECTORresidues;VECTORresidues.resize(Ntask); + std::vector m_primeiters;m_primeiters.reserve(Ntask); for(auto j=0;jBuilder_.noncoprime(*gen) ) + while(this->Builder_.noncoprime(*gen) || prime_used.find(*gen)!=prime_used.end()) ++gen; m_primeiters.push_back(*gen); - + prime_used.insert(*gen); } - compute_task( (this->Builder_), m_primeiters, Iteration, VECTORresidues, Ntask); + compute_task( (this->Builder_), m_primeiters, Iteration, VECTORresidues, Ntask); for(long i=0; isend(VECTORresidues[i].begin(), VECTORresidues[i].end(), 0, 0); From 651fcf2656fd047d6da4060b063dd6df7a60e8ab Mon Sep 17 00:00:00 2001 From: ZHG Date: Mon, 29 Jul 2019 11:54:04 +0200 Subject: [PATCH 12/21] Added more explicit comments --- linbox/algorithms/cra-hybrid.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index 072b12cca0..ba7d55bae4 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -43,10 +43,10 @@ #if defined(__LINBOX_HAVE_MPI) namespace LinBox { - /* + /* * The MPI standard imposes that if the following program is launched with only 2 processes, multithreading * will not be available. - * Multithreading is only active if launched with more than 2 processes if each process is mapped to one node. + * Multithreading is only active if launched with more than 2 processes while each process is mapped to one node. * To use multithreading, user needs to set the environment useing export for the number of cores/threads on * each node if each process is mapped to a node */ @@ -127,7 +127,7 @@ namespace LinBox Vect & operator() (Vect& num, Integer& den, Function& Iteration, PrimeIterator& primeg) { // if there is no communicator or if there is only one process, - // then proceed normally (without parallel) + // then proceed sequentially (without parallel) if(_commPtr == 0 || _commPtr->size() == 1) { RationalChineseRemainder< CRABase > sequential(Builder_); @@ -148,7 +148,7 @@ namespace LinBox Vect & operator() (Vect& num, Function& Iteration, PrimeIterator& primeg) { // if there is no communicator or if there is only one process, - // then proceed normally (without parallel) + // then proceed sequentially (without parallel) if(_commPtr == 0 || _commPtr->size() == 1) { ChineseRemainder< CRABase > sequential(Builder_); @@ -172,7 +172,7 @@ namespace LinBox ) { Domain D(m_primeiter); - //@fixme: The commentator within the following function's function call hierarchy will raise a crash if the whole program is not compiled with all commentator disabled + //@warning: The commentator within the following function's function call hierarchy will raise a crash if the whole program is not compiled with all commentator disabled as the commentator is not thread safe Iteration(VECTORresidues, D ); VECTORresidues.push_back(m_primeiter); } @@ -334,7 +334,7 @@ namespace LinBox int Niter=this->getNiter(); - //Compute nb of tasks ought to be realized for each process + //Compute and send nb of tasks ought to be realized for each worker process if(Niter<(procs-1)){ for(long i=1; i Date: Mon, 29 Jul 2019 11:56:50 +0200 Subject: [PATCH 13/21] Cleaned up for code review --- linbox/algorithms/cra-hybrid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index ba7d55bae4..a34cdb047a 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -44,10 +44,10 @@ namespace LinBox { /* - * The MPI standard imposes that if the following program is launched with only 2 processes, multithreading + * The MPI standard imposes that if the following program is launched with only 2 processes, multithreading * will not be available. * Multithreading is only active if launched with more than 2 processes while each process is mapped to one node. - * To use multithreading, user needs to set the environment useing export for the number of cores/threads on + * To use multithreading, user needs to set the environment useing export for the number of cores/threads on * each node if each process is mapped to a node */ template From 888c037edb9aba1b32daab592fd78fce300fd68f Mon Sep 17 00:00:00 2001 From: ZHG Date: Mon, 5 Aug 2019 11:55:55 +0200 Subject: [PATCH 14/21] Fast fix for the benchmark-solve-cra compling time error as the wrapped MPI Bcast did not take into account of simple bool input data --- linbox/util/mpicpp.inl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/linbox/util/mpicpp.inl b/linbox/util/mpicpp.inl index 203f456fa9..cd602d6b3f 100644 --- a/linbox/util/mpicpp.inl +++ b/linbox/util/mpicpp.inl @@ -137,6 +137,13 @@ namespace LinBox { unserialize(value, bytes); } } + + template <> void Communicator::bcast(bool& value, int src) + { + + MPI_Bcast(&value, 1, MPI::BOOL, src, _comm); + + } } // Local Variables: From f79947cc7332cb27367da639684378bcaf8c598a Mon Sep 17 00:00:00 2001 From: ZHG Date: Mon, 5 Aug 2019 11:56:50 +0200 Subject: [PATCH 15/21] Fast fix for the benchmark-solve-cra compling time error as the wrapped MPI Bcast did not take into account of simple bool input data --- linbox/util/mpicpp.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linbox/util/mpicpp.inl b/linbox/util/mpicpp.inl index cd602d6b3f..06204058a9 100644 --- a/linbox/util/mpicpp.inl +++ b/linbox/util/mpicpp.inl @@ -138,9 +138,9 @@ namespace LinBox { } } + //Specialization for Bcast with only one boolean value data template <> void Communicator::bcast(bool& value, int src) { - MPI_Bcast(&value, 1, MPI::BOOL, src, _comm); } From af0c44307614982d5e731958db4275ca2ee3153f Mon Sep 17 00:00:00 2001 From: ZHG Date: Mon, 5 Aug 2019 11:56:50 +0200 Subject: [PATCH 16/21] Quick fix for the benchmark-solve-cra compling time error as the wrapped MPI Bcast did not take into account of simple bool input data --- linbox/util/mpicpp.inl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linbox/util/mpicpp.inl b/linbox/util/mpicpp.inl index cd602d6b3f..06204058a9 100644 --- a/linbox/util/mpicpp.inl +++ b/linbox/util/mpicpp.inl @@ -138,9 +138,9 @@ namespace LinBox { } } + //Specialization for Bcast with only one boolean value data template <> void Communicator::bcast(bool& value, int src) { - MPI_Bcast(&value, 1, MPI::BOOL, src, _comm); } From d0c279be129a17f4dd57ea44912cc9ac96728da7 Mon Sep 17 00:00:00 2001 From: ZHG Date: Mon, 5 Aug 2019 12:20:45 +0200 Subject: [PATCH 17/21] Quick fix for the compiling time error with minpoly example program as the communicatorp() method for Blackbox no longer exists --- examples/minpoly.C | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/minpoly.C b/examples/minpoly.C index 395caa76f9..0e0579071d 100644 --- a/examples/minpoly.C +++ b/examples/minpoly.C @@ -75,7 +75,7 @@ int main (int argc, char **argv) #ifdef __LINBOX_HAVE_MPI Communicator C(&argc, &argv); process = C.rank(); - M.communicatorp(&C); + //M.communicatorp(&C); #endif Givaro::ZRing ZZ; From b4ad8ba6ca47ecab7559f6d1ae803ea373473c9c Mon Sep 17 00:00:00 2001 From: Alexis Breust Date: Tue, 20 Aug 2019 10:54:57 +0200 Subject: [PATCH 18/21] As Hadamard Bound is now expressed in log2, reduced hybrid iterations. --- linbox/algorithms/cra-hybrid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index a34cdb047a..d2fe748d6d 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -68,7 +68,7 @@ namespace LinBox {} int getNiter(){ - return std::ceil(1.442695040889*HB/(double)(LinBox::MaskedPrimeIterator(0,_commPtr->size()).getBits()-1)); + return std::ceil(HB/(double)(LinBox::MaskedPrimeIterator(0,_commPtr->size()).getBits()-1)); } /** \brief The CRA loop. From cae625b47840a40a5a1fd498da48ed30f4ba5b17 Mon Sep 17 00:00:00 2001 From: ZHG Date: Wed, 21 Aug 2019 11:33:44 +0200 Subject: [PATCH 19/21] Fall back to previous implementation as the estimation for stop using the log of hadamard bound is incorrect and the multithreading in the worker process will not use the prime numbers in the same order wheras the residues may not be the result of the iteration order through the prime number vector --- linbox/algorithms/cra-hybrid.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index d2fe748d6d..0530b25bbc 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -43,12 +43,17 @@ #if defined(__LINBOX_HAVE_MPI) namespace LinBox { - /* + /* @Warning /!\ * The MPI standard imposes that if the following program is launched with only 2 processes, multithreading * will not be available. - * Multithreading is only active if launched with more than 2 processes while each process is mapped to one node. + * Multithreading is only active if launched with more than 2 processes while each process is mapped to + * one node. * To use multithreading, user needs to set the environment useing export for the number of cores/threads on - * each node if each process is mapped to a node + * each node if each process is mapped to a node however this only works for one run so this will not work + * for multiple executions where only the -t option is needed to set the number of threads + * + * For the execution on several nodes, each node needs to have at least 3 processes mapped otherwise no + * multithreading will be available according to the MPI standards */ template struct HybridChineseRemainder { @@ -68,7 +73,7 @@ namespace LinBox {} int getNiter(){ - return std::ceil(HB/(double)(LinBox::MaskedPrimeIterator(0,_commPtr->size()).getBits()-1)); + return std::ceil(1.442695040889*HB/(double)(LinBox::MaskedPrimeIterator(0,_commPtr->size()).getBits()-1)); } /** \brief The CRA loop. @@ -83,8 +88,6 @@ namespace LinBox * matrix \c mod \p p. @warning we won't detect bad primes. * - * \param primeg RandIter object for generating primes. - * \param[out] res an integer */ template Integer & operator() (Integer& res, Function& Iteration, PrimeIterator& primeg) @@ -205,8 +208,8 @@ namespace LinBox void worker_process_task(Function& Iteration, Vect &r) { int Ntask=0; - LinBox::MaskedPrimeIterator gen(_commPtr->rank(),_commPtr->size()); - //LinBox::MaskedPrimeIterator gen(_commPtr->rank(),_commPtr->size()); + //LinBox::MaskedPrimeIterator gen(_commPtr->rank(),_commPtr->size()); + LinBox::MaskedPrimeIterator gen(_commPtr->rank(),_commPtr->size()); ++gen; _commPtr->recv(Ntask, 0); From b09a5a625f2a0feeb847262876602c458b44f25b Mon Sep 17 00:00:00 2001 From: ZHG Date: Wed, 28 Aug 2019 11:39:41 +0200 Subject: [PATCH 20/21] Adopted the FOR1D with MODE --- linbox/algorithms/cra-hybrid.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index 0530b25bbc..62babd6afa 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -187,6 +187,7 @@ namespace LinBox std::vector& VECTORresidues, size_t Ntask) { //@fixme: cannot use export OMP_NUM_THREADS to set the desired number of threads for more than one iterations and only omp_set_num_thread() can be used for this purpose, whick takes the option -t +#if 0 PAR_BLOCK{ auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); SYNCH_GROUP({ @@ -200,7 +201,16 @@ namespace LinBox }); }); } - +#else + PAR_BLOCK{ + auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); + SYNCH_GROUP({ + FOR1D(iter, Ntask, sp,MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues)),{ + solve_with_prime(m_primeiters[iter], Iteration, VECTORresidues[iter]); + }); + }); + } +#endif } From 95d1775951a748df0fc34c4b0d52e7e47b06dfba Mon Sep 17 00:00:00 2001 From: ZHG Date: Thu, 29 Aug 2019 10:46:01 +0200 Subject: [PATCH 21/21] Taken off the factor of 1.40 for the Niter and cleaned up again --- benchmarks/benchmark-dense-solve.C | 3 +-- linbox/algorithms/cra-hybrid.h | 18 +----------------- linbox/solutions/methods.h | 1 - tests/test-solve-full.C | 16 +++++++--------- 4 files changed, 9 insertions(+), 29 deletions(-) diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C index 3e1f32ecc3..926ad843be 100644 --- a/benchmarks/benchmark-dense-solve.C +++ b/benchmarks/benchmark-dense-solve.C @@ -145,7 +145,7 @@ int main(int argc, char** argv) {'n', "-n", "Set the matrix dimension.", TYPE_INT, &args.n}, {'b', "-b", "bit size", TYPE_INT, &args.bits}, {'s', "-s", "Seed for randomness.", TYPE_INT, &args.seed}, - {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString}, + {'d', "-d", "Dispatch mode (any of: Auto, Sequential, Combined or Distributed).", TYPE_STR, &args.dispatchString}, {'t', "-t", "Number of threads.", TYPE_INT, &numThreads }, {'M', "-M", "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, " @@ -173,7 +173,6 @@ int main(int argc, char** argv) MethodBase method; method.pCommunicator = &communicator; if (args.dispatchString == "Sequential") method.dispatch = Dispatch::Sequential; - else if (args.dispatchString == "SMP") method.dispatch = Dispatch::SMP; else if (args.dispatchString == "Distributed") method.dispatch = Dispatch::Distributed; else if (args.dispatchString == "Combined") method.dispatch = Dispatch::Combined; else method.dispatch = Dispatch::Auto; diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h index 62babd6afa..25dd61410e 100644 --- a/linbox/algorithms/cra-hybrid.h +++ b/linbox/algorithms/cra-hybrid.h @@ -73,7 +73,7 @@ namespace LinBox {} int getNiter(){ - return std::ceil(1.442695040889*HB/(double)(LinBox::MaskedPrimeIterator(0,_commPtr->size()).getBits()-1)); + return std::ceil(HB/(double)(LinBox::MaskedPrimeIterator(0,_commPtr->size()).getBits()-1)); } /** \brief The CRA loop. @@ -187,21 +187,6 @@ namespace LinBox std::vector& VECTORresidues, size_t Ntask) { //@fixme: cannot use export OMP_NUM_THREADS to set the desired number of threads for more than one iterations and only omp_set_num_thread() can be used for this purpose, whick takes the option -t -#if 0 - PAR_BLOCK{ - auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); - SYNCH_GROUP({ - FORBLOCK1D(iter, Ntask, sp,{ - TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues)),{ - for(auto j=iter.begin(); j!=iter.end(); ++j) - { - solve_with_prime(m_primeiters[j], Iteration, VECTORresidues[j]); - } - }) - }); - }); - } -#else PAR_BLOCK{ auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads); SYNCH_GROUP({ @@ -210,7 +195,6 @@ namespace LinBox }); }); } -#endif } diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h index 881de19599..ee75eb0cb0 100644 --- a/linbox/solutions/methods.h +++ b/linbox/solutions/methods.h @@ -106,7 +106,6 @@ namespace LinBox { enum class Dispatch { Auto, //!< Let implementation decide what to use. Sequential, //!< All sub-computations are done sequentially. - SMP, //!< Use symmetric multiprocessing (Paladin) to do sub-computations. Distributed, //!< Use MPI to distribute sub-computations accross nodes. Combined, //!< Use MPI then Paladin on each node. }; diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C index 2c83f648dd..948f3e862a 100644 --- a/tests/test-solve-full.C +++ b/tests/test-solve-full.C @@ -229,7 +229,7 @@ int main(int argc, char** argv) {'B', "-B", "Vector bit size for rational solve tests (defaults to -b if not specified).", TYPE_INT, &vectorBitSize}, {'m', "-m", "Row dimension of matrices.", TYPE_INT, &m}, {'n', "-n", "Column dimension of matrices.", TYPE_INT, &n}, - {'d', "-d", "Dispatch mode (either Auto, Sequential, SMP or Distributed).", TYPE_STR, &dispatchString}, + {'d', "-d", "Dispatch mode (either Auto, Sequential, Combined or Distributed).", TYPE_STR, &dispatchString}, END_OF_ARGUMENTS}; parseArguments(argc, argv, args); @@ -247,10 +247,8 @@ int main(int argc, char** argv) method.dispatch = Dispatch::Combined; else if (dispatchString == "Sequential") method.dispatch = Dispatch::Sequential; - else if (dispatchString == "SMP") - method.dispatch = Dispatch::SMP; else if (dispatchString != "Auto") { - std::cerr << "-d Dispatch mode should be either Auto, Sequential, SMP or Distributed" << std::endl; + std::cerr << "-d Dispatch mode should be either Auto, Sequential, Combined or Distributed" << std::endl; return EXIT_FAILURE; } @@ -272,7 +270,7 @@ int main(int argc, char** argv) bool ok = true; do { -/* + // ----- Rational Auto ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); @@ -282,14 +280,14 @@ int main(int argc, char** argv) ok = ok && test_dense_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); ok = ok && test_sparse_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_blackbox_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); -*/ + // ----- Rational CRA // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); -/* - //@fixme: Distributed and Combined(ie ybrid) method will not work for QQ field + + //@fixme: Distributed and Combined(ie hybrid) method will not work for QQ field ok = ok && test_dense_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); ok = ok && test_sparse_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose); @@ -359,7 +357,7 @@ int main(int argc, char** argv) // ok = ok && test_dense_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); // ok = ok && test_sparse_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); // ok = ok && test_blackbox_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose); -*/ + if (!ok) { std::cerr << "Failed with seed: " << seed << std::endl; }