From 48ada9fb3b0053c4aa6d6f1bb801d236ce3e9462 Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Tue, 2 Jul 2019 15:37:58 +0200
Subject: [PATCH 01/21] Added hybrid method and fixed the bug for checking
 result but with hybrid fails with 2 processes meanwhile it works with number
 of processes other than 2

---
 linbox/solutions/solve.h           | 24 +++++++++++++-----------
 linbox/solutions/solve/solve-cra.h |  5 +++++
 tests/test-solve-full.C            | 20 +++++++++++++++++---
 3 files changed, 35 insertions(+), 14 deletions(-)
diff --git a/linbox/solutions/solve.h b/linbox/solutions/solve.h
index e827167c65..709086a248 100644
--- a/linbox/solutions/solve.h
+++ b/linbox/solutions/solve.h
@@ -169,17 +169,19 @@ namespace LinBox {
         Element xDen;
 
         solve(xNum, xDen, A, b, tag, m);
-
-        // The denominator being zero means computation failure
-        if (b.field().isZero(xDen)) {
-            throw LinboxError("Rational solve failed.");
-        }
-
-        // Copy result back to RatVector
-        auto iXNum = xNum.begin();
-        for (auto iX = x.begin(); iX != x.end(); ++iX) {
-            *iX = typename RatVector::value_type(*iXNum, xDen);
-            ++iXNum;
+        
+        if (m.master()) {
+            // The denominator being zero means computation failure
+            if (b.field().isZero(xDen)) {
+                throw LinboxError("Rational solve failed.");
+            }
+
+            // Copy result back to RatVector
+            auto iXNum = xNum.begin();
+            for (auto iX = x.begin(); iX != x.end(); ++iX) {
+                *iX = typename RatVector::value_type(*iXNum, xDen);
+                ++iXNum;
+            }
         }
 
         return x;
diff --git a/linbox/solutions/solve/solve-cra.h b/linbox/solutions/solve/solve-cra.h
index 2141b39dbd..2588211d17 100644
--- a/linbox/solutions/solve/solve-cra.h
+++ b/linbox/solutions/solve/solve-cra.h
@@ -33,6 +33,7 @@
 #pragma once
 
 #include <linbox/algorithms/cra-distributed.h>
+#include <linbox/algorithms/cra-hybrid.h>
 #include <linbox/algorithms/rational-cra-builder-early-multip.h>
 #include <linbox/algorithms/rational-cra-builder-full-multip.h>
 #include <linbox/algorithms/rational-cra.h>
@@ -179,6 +180,10 @@ namespace LinBox {
             LinBox::ChineseRemainderDistributed<CRAAlgorithm> cra(hadamardLogBound, m.pCommunicator);
             cra(num, den, iteration, primeGenerator);
         }
+        else if (dispatch == Dispatch::Combined) {
+            LinBox::HybridChineseRemainder<CRAAlgorithm> cra(hadamardLogBound, m.pCommunicator);
+            cra(num, den, iteration, primeGenerator);
+        }
 #endif
         else {
             throw LinBox::NotImplementedYet("Integer CRA Solve with specified dispatch type is not implemented yet.");
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index 1739664c66..67accd8016 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -97,6 +97,10 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul
 {
     ResultVector RAx(RA.field(), Rb.size());
     RA.apply(RAx, x);
+    
+    std::cout << "RA " << RA << std::endl;
+    std::cout << "Rb " << Rb << std::endl;
+    std::cout << "x " << x << std::endl;
 
     VectorDomain<typename ResultMatrix::Field> VD(RA.field());
     if (!VD.areEqual(RAx, Rb)) {
@@ -112,7 +116,7 @@ bool test_solve(const SolveMethod& method, Matrix& A, Vector& b, ResultDomain& R
 {
     using ResultVector = DenseVector<ResultDomain>;
 
-    if (verbose) {
+    if (verbose && method.master()) {
         std::cout << "--- Testing " << SolveMethod::name() << " on " << type_to_string(A) << " over ";
         A.field().write(std::cout) << " of size " << A.rowdim() << "x" << A.coldim() << std::endl;
     }
@@ -141,10 +145,14 @@ bool test_solve(const SolveMethod& method, Matrix& A, Vector& b, ResultDomain& R
     bool ok = true;
     try {
         solve(x, A, b, method);
-        ok = ok && check_result<SolveMethod>(x, A, b, RA, Rb);
+        if (method.master()) {
+            ok = ok && check_result<SolveMethod>(x, A, b, RA, Rb);
+        }
 
         solveInPlace(x, A, b, method);
-        ok = ok && check_result<SolveMethod>(x, A, b, RA, Rb);
+        if (method.master()) {
+            ok = ok && check_result<SolveMethod>(x, A, b, RA, Rb);
+        }
     } catch (...) {
         print_error<SolveMethod>(x, A, b, "throws error");
         return false;
@@ -235,6 +243,8 @@ int main(int argc, char** argv)
     method.dispatch = Dispatch::Auto;
     if (dispatchString == "Distributed")
         method.dispatch = Dispatch::Distributed;
+    else if (dispatchString == "Combined")
+        method.dispatch = Dispatch::Combined;
     else if (dispatchString == "Sequential")
         method.dispatch = Dispatch::Sequential;
     else if (dispatchString == "SMP")
@@ -263,6 +273,7 @@ int main(int argc, char** argv)
     bool ok = true;
     do {
         // ----- Rational Auto
+        #if 0
         ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // @fixme Dixon<Wiedemann> does not compile
@@ -274,7 +285,9 @@ int main(int argc, char** argv)
 
         // ----- Rational CRA
         // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails
+        #endif
         ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        #if 0
         ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
 
@@ -347,6 +360,7 @@ int main(int argc, char** argv)
         // ok = ok && test_dense_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
         // ok = ok && test_sparse_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
+        #endif
 
         if (!ok) {
             std::cerr << "Failed with seed: " << seed << std::endl;

From c315e0d9dd12a7e806588f79170200d2349845ba Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Wed, 3 Jul 2019 10:53:30 +0200
Subject: [PATCH 02/21] Added hybrid methwith openmp ad afixed the bug for 2
 processes

---
 linbox/algorithms/cra-hybrid.h | 388 +++++++++++++++++++++++++++++++++
 linbox/solutions/solve.h       |   3 +-
 tests/test-solve-full.C        |  12 +-
 3 files changed, 393 insertions(+), 10 deletions(-)
 create mode 100644 linbox/algorithms/cra-hybrid.h

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
new file mode 100644
index 0000000000..fc83d801e0
--- /dev/null
+++ b/linbox/algorithms/cra-hybrid.h
@@ -0,0 +1,388 @@
+/* Copyright (C) 2007 LinBox
+ * Updated by Hongguang ZHU
+ * Written by bds and zw
+ * author: B. David Saunders and Zhendong Wan
+ * parallelized for BOINC computing by Bryan Youse
+ *
+ *
+ * ========LICENCE========
+ * This file is part of the library LinBox.
+ *
+  * LinBox is free software: you can redistribute it and/or modify
+ * it under the terms of the  GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ * ========LICENCE========
+ */
+
+
+#pragma once
+
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+#include "linbox/algorithms/cra-domain.h"
+#include "linbox/algorithms/rational-cra.h"
+#include "linbox/algorithms/rational-cra-var-prec.h"
+#include "linbox/integer.h"
+#include "linbox/randiter/random-prime.h"
+#include "linbox/solutions/methods.h"
+#include "linbox/util/mpicpp.h"
+#include "linbox/util/timer.h"
+
+
+#include <unordered_set>
+#include "linbox/randiter/random-prime.h"
+
+#include "linbox/algorithms/cra-domain-omp.h"
+
+#if defined(__LINBOX_HAVE_MPI)
+namespace LinBox
+{
+
+    /**
+     * @fixme This hybrid CRA does no work when lauched with mpirun -np 2.
+     * This might have been designed so that does not work, but if so, find out why and
+     * and a comment here.
+     */
+	template<class CRABase>
+	struct HybridChineseRemainder  {
+		typedef typename CRABase::Domain	Domain;
+		typedef typename CRABase::DomainElement	DomainElement;
+	protected:
+		CRABase Builder_;
+		Communicator* _commPtr;
+		unsigned int _numprocs;
+		double HB;//hadamard bound
+
+	public:
+		template<class Param>
+		HybridChineseRemainder(const Param& b, Communicator *c) :
+			Builder_(b), _commPtr(c), _numprocs(c->size())
+			, HB(b)//Init with hadamard bound
+		{}
+
+		int getNiter(){
+		    return std::ceil(1.442695040889*HB/(double)(LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>(0,_commPtr->size()).getBits()-1));
+		}
+
+		/** \brief The CRA loop.
+		 *
+		 * termination condition.
+		 *
+		 * \param Iteration  Function object of two arguments, \c
+		 * Iteration(r, p), given prime \c p it outputs residue(s) \c
+		 * r.  This loop may be parallelized.  \p Iteration must be
+		 * reentrant, thread safe.  For example, \p Iteration may be
+		 * returning the coefficients of the minimal polynomial of a
+		 * matrix \c mod \p p.
+		 @warning  we won't detect bad primes.
+		 *
+		 * \param primeg  RandIter object for generating primes.
+		 * \param[out] res an integer
+		 */
+		template<class Function, class PrimeIterator>
+		Integer & operator() (Integer& res, Function& Iteration, PrimeIterator& primeg)
+		{
+			//  defer to standard CRA loop if no parallel usage is desired
+			if(_commPtr == 0 || _commPtr->size() == 1) {
+				ChineseRemainder< CRABase > sequential(Builder_);
+				return sequential(res, Iteration, primeg);
+			}
+
+			para_compute(res, Iteration, primeg);
+			if(_commPtr->rank() == 0){
+				return Builder_.result(res);
+			}
+			else{
+                return res;
+			}
+		}
+
+		template<class Function, class PrimeIterator>
+		Integer & operator() (Integer& num, Integer& den, Function& Iteration, PrimeIterator& primeg)
+		{
+
+			//  defer to standard CRA loop if no parallel usage is desired
+			if(_commPtr == 0 || _commPtr->size() == 1) {
+				RationalChineseRemainder< CRABase > sequential(Builder_);
+				return sequential(num, den, Iteration, primeg);
+			}
+			para_compute(num, Iteration, primeg);
+			if(_commPtr->rank() == 0){
+				return Builder_.result(num,den);
+			}
+			else{
+                return num;
+			}
+		}
+
+
+		template<class Vect, class Function, class PrimeIterator>
+		Vect & operator() (Vect& num,  Integer& den, Function& Iteration, PrimeIterator& primeg)
+		{
+			//  if there is no communicator or if there is only one process,
+			//  then proceed normally (without parallel)
+			if(_commPtr == 0 || _commPtr->size() == 1) {
+
+                RationalChineseRemainder< CRABase > sequential(Builder_);
+				return sequential(num, den, Iteration, primeg);
+
+			}
+            para_compute(num, Iteration, primeg);
+
+			if(_commPtr->rank() == 0){
+				return Builder_.result(num,den);
+			}
+			else{
+                return num;
+			}
+		}
+
+		template<class Vect, class Function, class PrimeIterator>
+		Vect & operator() (Vect& num, Function& Iteration, PrimeIterator& primeg)
+		{
+			//  if there is no communicator or if there is only one process,
+			//  then proceed normally (without parallel)
+			if(_commPtr == 0 || _commPtr->size() == 1) {
+
+                ChineseRemainder< CRABase > sequential(Builder_);
+				return sequential(num, Iteration, primeg);
+
+			}
+            para_compute(num, Iteration, primeg);
+			if(_commPtr->rank() == 0){
+				return Builder_.result(num);
+			}
+			else{
+                return num;
+			}
+		}
+
+
+        template< class Function, class Domain, class ElementContainer>
+        void solve_with_prime(int m_primeiter,
+                              Function& Iteration, std::vector<Domain>& VECTORdomains,
+                              ElementContainer& VECTORresidues
+                              )
+        {
+
+            VECTORdomains[ omp_get_thread_num()] = Domain(m_primeiter);
+
+            Iteration(VECTORresidues, VECTORdomains[ omp_get_thread_num()]
+
+            );
+
+            VECTORresidues.push_back(m_primeiter);
+
+        }
+
+
+        template<class pFunc, class Function,  class Domain, class ElementContainer>
+        void compute_task(pFunc& pF, std::vector<int>& m_primeiters,
+                          Function& Iteration, std::vector<Domain>& VECTORdomains,
+                          std::vector<ElementContainer>& VECTORresidues, size_t Ntask)
+        {
+
+            int Nthread = Ntask;
+
+#pragma omp parallel
+#pragma omp single
+            Nthread=omp_get_num_threads();
+//TODO: Replace the OMP directives with Paladin
+#pragma omp parallel for num_threads(Nthread) schedule(dynamic,1)
+            for(auto j=0u;j<Ntask;j++)
+                {
+#pragma omp critical //Usage of thread ID as index to access an array could help to get rid of critical section
+                    solve_with_prime(m_primeiters[j], Iteration, VECTORdomains, VECTORresidues[j]);
+                }
+
+        }
+
+
+        template<class Vect, class Function>
+        void worker_process_task(Function& Iteration,  Vect &r)
+        {
+            int Ntask=0;
+            //LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>   gen(_commPtr->rank(),_commPtr->size());
+            LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::DeterministicTag>   gen(_commPtr->rank(),_commPtr->size());
+            ++gen;
+            _commPtr->recv(Ntask, 0);
+
+            if(Ntask!=0){
+                std::unordered_set<int> prime_used;
+
+			size_t Nthread = Ntask;
+#pragma omp parallel
+{
+#pragma omp single
+            Nthread=omp_get_num_threads();
+}
+
+            std::vector<BlasVector<Domain>> VECTORresidues;VECTORresidues.resize(Ntask);
+            std::vector<Domain> VECTORdomains;VECTORdomains.resize(Nthread);
+            std::vector<int> m_primeiters;m_primeiters.reserve(Ntask);
+
+                for(auto j=0;j<Ntask;j++){
+                    ++gen;
+                    while(this->Builder_.noncoprime(*gen) )
+                        ++gen;
+                    m_primeiters.push_back(*gen);
+
+                }
+
+            compute_task( (this->Builder_), m_primeiters, Iteration,  VECTORdomains, VECTORresidues, Ntask);
+
+                for(long i=0; i<Ntask; i++){
+                    _commPtr->send(VECTORresidues[i].begin(), VECTORresidues[i].end(), 0, 0);
+                 }
+
+
+            };
+
+        }
+
+	template<class Vect, class Function, class PrimeIterator>
+		void  para_compute( Vect& num, Function& Iteration, PrimeIterator& primeg)
+		{
+
+            Domain D(*primeg);
+            BlasVector<Domain> r(D);
+
+//double starttime;
+//double endtime;
+
+			//  parent propcess
+			if(_commPtr->rank() == 0){
+
+                master_process_task(Iteration, D, r);
+
+			}
+			//  child process
+			else{
+
+//starttime = omp_get_wtime();
+                worker_process_task(Iteration, r);
+//endtime   = MPI_Wtime();
+//std::cout<<" process("<<_commPtr->rank()<<") used total CPU time (seconds): " << endtime-starttime<<std::endl;
+			}
+
+		}
+
+        template<class Vect>
+        void master_recv_residues(Vect &r, int &pp, int &Nrecv)
+        {
+            r.resize (r.size()+1);
+
+           //receive the beginnin and end of a vector in heapspace
+            _commPtr->recv(r.begin(), r.end(), MPI_ANY_SOURCE, 0);
+
+            //Update the number of iterations for the next step
+            Nrecv--;
+
+            //Store the corresponding prime number
+            pp = r[r.size()-1];
+
+            //Restructure the vector without added prime number
+            r.resize (r.size()-1);
+
+        }
+
+        template<class Vect>
+        void master_compute(Vect &r)
+        {
+
+            int pp;
+
+#ifdef __Detailed_Time_Measurement
+            Timer chrono;
+#endif
+            int Nrecv=this->getNiter();
+
+            while(Nrecv > 0 ){
+
+                master_recv_residues(r, pp, Nrecv);
+
+                Domain D(pp);
+
+#ifdef __Detailed_Time_Measurement
+		chrono.start();
+#endif
+                Builder_.progress(D, r);
+#ifdef __Detailed_Time_Measurement
+		chrono.stop();
+		std::cout<<"Builder_.progress(D, r) in the manager process used CPU time (seconds): " <<chrono.usertime()<<std::endl;
+#endif
+            }
+
+        }
+
+
+        template<class Vect, class Function>
+        void master_process_task(Function& Iteration, Domain &D, Vect &r)
+        {
+            int vNtask_per_proc[_commPtr->size() - 1];
+
+            master_init(vNtask_per_proc, Iteration, D, r);
+
+            master_compute(r);
+
+        }
+
+        template<class Vect, class Function>
+        void master_init(int *vNtask_per_proc, Function& Iteration, Domain &D, Vect &r)
+        {
+			int procs = _commPtr->size();
+
+            int Niter=this->getNiter();
+
+            //Compute nb of tasks ought to be realized for each process
+            if(Niter<(procs-1)){
+
+                for(long i=1; i<Niter+1; i++){
+                    vNtask_per_proc[i - 1] = 1;
+                    _commPtr->send(vNtask_per_proc[i - 1], i);
+                }
+                for(long i=Niter+1; i<procs; i++){
+                    vNtask_per_proc[i - 1] = 0;
+                    _commPtr->send(vNtask_per_proc[i - 1], i);
+                }
+
+             }else{
+                for(long i=1; i<Niter%(procs-1)+1; i++){
+                    vNtask_per_proc[i - 1] = Niter/(procs-1)+1;
+                    _commPtr->send(vNtask_per_proc[i - 1], i);
+                }
+                for(long i=Niter%(procs-1)+1; i<procs; i++){
+                    vNtask_per_proc[i - 1] = Niter/(procs-1);
+                    _commPtr->send(vNtask_per_proc[i - 1], i);
+                }
+            }
+
+            //Initialize the buider and the receiver vector r
+            Builder_.initialize( D, Iteration(r, D) );
+        }
+
+    };
+
+}
+
+#endif
+
+// Local Variables:
+// mode: C++
+// tab-width: 4
+// indent-tabs-mode: nil
+// c-basic-offset: 4
+// End:
+// vim:sts=4:sw=4:ts=4:et:sr:cino=>s,f0,{0,g0,(0,\:0,t0,+0,=s
diff --git a/linbox/solutions/solve.h b/linbox/solutions/solve.h
index 709086a248..28ff6f5f89 100644
--- a/linbox/solutions/solve.h
+++ b/linbox/solutions/solve.h
@@ -169,13 +169,12 @@ namespace LinBox {
         Element xDen;
 
         solve(xNum, xDen, A, b, tag, m);
-        
+
         if (m.master()) {
             // The denominator being zero means computation failure
             if (b.field().isZero(xDen)) {
                 throw LinboxError("Rational solve failed.");
             }
-
             // Copy result back to RatVector
             auto iXNum = xNum.begin();
             for (auto iX = x.begin(); iX != x.end(); ++iX) {
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index 67accd8016..95be521ffc 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -97,10 +97,10 @@ bool check_result(ResultVector& x, Matrix& A, Vector& b, ResultMatrix& RA, Resul
 {
     ResultVector RAx(RA.field(), Rb.size());
     RA.apply(RAx, x);
-    
-    std::cout << "RA " << RA << std::endl;
-    std::cout << "Rb " << Rb << std::endl;
-    std::cout << "x " << x << std::endl;
+
+    //std::cout << "RA " << RA << std::endl;
+    //std::cout << "Rb " << Rb << std::endl;
+    //std::cout << "x " << x << std::endl;
 
     VectorDomain<typename ResultMatrix::Field> VD(RA.field());
     if (!VD.areEqual(RAx, Rb)) {
@@ -273,7 +273,6 @@ int main(int argc, char** argv)
     bool ok = true;
     do {
         // ----- Rational Auto
-        #if 0
         ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // @fixme Dixon<Wiedemann> does not compile
@@ -285,9 +284,7 @@ int main(int argc, char** argv)
 
         // ----- Rational CRA
         // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails
-        #endif
         ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        #if 0
         ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
 
@@ -360,7 +357,6 @@ int main(int argc, char** argv)
         // ok = ok && test_dense_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
         // ok = ok && test_sparse_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
-        #endif
 
         if (!ok) {
             std::cerr << "Failed with seed: " << seed << std::endl;

From bacc3b57e01443a439db1abdb75af76f96851e9e Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Wed, 3 Jul 2019 14:21:03 +0200
Subject: [PATCH 03/21] Tested hybrid after having merged with master branch

---
 benchmarks/benchmark-dense-solve.C |  1 +
 linbox/algorithms/cra-hybrid.h     | 49 ++++++++++++++++++++----------
 tests/test-solve-full.C            |  8 +++--
 3 files changed, 39 insertions(+), 19 deletions(-)

diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C
index 504cb69e06..329458a42a 100644
--- a/benchmarks/benchmark-dense-solve.C
+++ b/benchmarks/benchmark-dense-solve.C
@@ -164,6 +164,7 @@ int main(int argc, char** argv)
     if (args.dispatchString == "Sequential")        method.dispatch = Dispatch::Sequential;
     else if (args.dispatchString == "SMP")          method.dispatch = Dispatch::SMP;
     else if (args.dispatchString == "Distributed")  method.dispatch = Dispatch::Distributed;
+    else if (args.dispatchString == "Combined")  method.dispatch = Dispatch::Combined;
     else                                            method.dispatch = Dispatch::Auto;
 
     // Real benchmark
diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index fc83d801e0..fa1eba9d4b 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -50,11 +50,6 @@
 namespace LinBox
 {
 
-    /**
-     * @fixme This hybrid CRA does no work when lauched with mpirun -np 2.
-     * This might have been designed so that does not work, but if so, find out why and
-     * and a comment here.
-     */
 	template<class CRABase>
 	struct HybridChineseRemainder  {
 		typedef typename CRABase::Domain	Domain;
@@ -176,12 +171,11 @@ namespace LinBox
                               ElementContainer& VECTORresidues
                               )
         {
-
+//std::cout<<"Thread("<<omp_get_thread_num()<<")--------------------------------"<<std::endl;
             VECTORdomains[ omp_get_thread_num()] = Domain(m_primeiter);
-
-            Iteration(VECTORresidues, VECTORdomains[ omp_get_thread_num()]
-
-            );
+#pragma omp critical
+            //@fixme: The functor Iteration should be marked as critical to allow only one thread a time to call it otherwise a segmentation fault could be caused due to concurrent calls from different threads
+            Iteration(VECTORresidues, VECTORdomains[ omp_get_thread_num()] );
 
             VECTORresidues.push_back(m_primeiter);
 
@@ -194,19 +188,42 @@ namespace LinBox
                           std::vector<ElementContainer>& VECTORresidues, size_t Ntask)
         {
 
-            int Nthread = Ntask;
 
+#if 1
+            int Nthread = Ntask;
 #pragma omp parallel
-#pragma omp single
+{
             Nthread=omp_get_num_threads();
-//TODO: Replace the OMP directives with Paladin
-#pragma omp parallel for num_threads(Nthread) schedule(dynamic,1)
+            omp_set_num_threads(Nthread);
+#pragma omp for //num_threads(Nthread) schedule(dynamic,1)
             for(auto j=0u;j<Ntask;j++)
                 {
-#pragma omp critical //Usage of thread ID as index to access an array could help to get rid of critical section
                     solve_with_prime(m_primeiters[j], Iteration, VECTORdomains, VECTORresidues[j]);
                 }
-
+}
+#else
+/*
+        PAR_BLOCK{
+            auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
+            SYNCH_GROUP({
+                FORBLOCK1D(iter, Ntask, sp,
+                    TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues,VECTORdomains)),{
+                            for(auto j=iter.begin(); j!=iter.end(); ++j)
+                            {
+                                solve_with_prime(m_primeiters[j], Iteration, VECTORdomains, VECTORresidues[j]);
+                            }
+                     })
+                 );
+            });
+        }
+*/
+        PAR_BLOCK{
+            auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
+             FOR1D(j, Ntask, sp,{
+                solve_with_prime(m_primeiters[j], Iteration, VECTORdomains[j], VECTORresidues[j]);
+               });
+        }
+#endif
         }
 
 
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index 95be521ffc..ed135eb1ec 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -272,6 +272,7 @@ int main(int argc, char** argv)
 
     bool ok = true;
     do {
+/*
         // ----- Rational Auto
         ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
@@ -281,13 +282,14 @@ int main(int argc, char** argv)
         ok = ok && test_dense_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         ok = ok && test_sparse_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-
+*/
         // ----- Rational CRA
         // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails
         ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-
+/*
+        //@fixme: Distributed and Combined(ie ybrid) method will not work for QQ field
         ok = ok && test_dense_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         ok = ok && test_sparse_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
@@ -357,7 +359,7 @@ int main(int argc, char** argv)
         // ok = ok && test_dense_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
         // ok = ok && test_sparse_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
-
+*/
         if (!ok) {
             std::cerr << "Failed with seed: " << seed << std::endl;
         }

From 547aae9d65a8ad3329fbb71aa4fb3c50b6fe1f5e Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Thu, 4 Jul 2019 09:22:28 +0200
Subject: [PATCH 04/21] Instant clean up

---
 linbox/algorithms/cra-hybrid.h | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index fa1eba9d4b..a74283904e 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -276,8 +276,10 @@ namespace LinBox
             Domain D(*primeg);
             BlasVector<Domain> r(D);
 
-//double starttime;
-//double endtime;
+#ifdef __Detailed_Time_Measurement
+            double starttime;
+            double endtime;
+#endif
 
 			//  parent propcess
 			if(_commPtr->rank() == 0){
@@ -287,11 +289,14 @@ namespace LinBox
 			}
 			//  child process
 			else{
-
-//starttime = omp_get_wtime();
+#ifdef __Detailed_Time_Measurement
+		        chrono.start();
+#endif
                 worker_process_task(Iteration, r);
-//endtime   = MPI_Wtime();
-//std::cout<<" process("<<_commPtr->rank()<<") used total CPU time (seconds): " << endtime-starttime<<std::endl;
+#ifdef __Detailed_Time_Measurement
+		        chrono.stop();
+                std::cout<<" process("<<_commPtr->rank()<<") used total CPU time (seconds): " << endtime-starttime<<std::endl;
+#endif
 			}
 
 		}

From 398f3aa7e15acfe7216bd0559467fe6208fe20b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Pernet?= <clement.pernet@gmail.com>
Date: Thu, 4 Jul 2019 11:54:29 +0200
Subject: [PATCH 05/21] cleanup unused param

---
 linbox/algorithms/cra-hybrid.h | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index a74283904e..e165dae663 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -167,15 +167,16 @@ namespace LinBox
 
         template< class Function, class Domain, class ElementContainer>
         void solve_with_prime(int m_primeiter,
-                              Function& Iteration, std::vector<Domain>& VECTORdomains,
+                              Function& Iteration,
                               ElementContainer& VECTORresidues
                               )
         {
 //std::cout<<"Thread("<<omp_get_thread_num()<<")--------------------------------"<<std::endl;
-            VECTORdomains[ omp_get_thread_num()] = Domain(m_primeiter);
+//            VECTORdomains[ omp_get_thread_num()] =
+            Domain D (m_primeiter);
 #pragma omp critical
             //@fixme: The functor Iteration should be marked as critical to allow only one thread a time to call it otherwise a segmentation fault could be caused due to concurrent calls from different threads
-            Iteration(VECTORresidues, VECTORdomains[ omp_get_thread_num()] );
+            Iteration(VECTORresidues, D /*VECTORdomains[ omp_get_thread_num()]*/ );
 
             VECTORresidues.push_back(m_primeiter);
 
@@ -184,7 +185,7 @@ namespace LinBox
 
         template<class pFunc, class Function,  class Domain, class ElementContainer>
         void compute_task(pFunc& pF, std::vector<int>& m_primeiters,
-                          Function& Iteration, std::vector<Domain>& VECTORdomains,
+                          Function& Iteration,
                           std::vector<ElementContainer>& VECTORresidues, size_t Ntask)
         {
 
@@ -198,7 +199,7 @@ namespace LinBox
 #pragma omp for //num_threads(Nthread) schedule(dynamic,1)
             for(auto j=0u;j<Ntask;j++)
                 {
-                    solve_with_prime(m_primeiters[j], Iteration, VECTORdomains, VECTORresidues[j]);
+                    solve_with_prime(m_primeiters[j], Iteration, VECTORresidues[j]);
                 }
 }
 #else
@@ -207,10 +208,10 @@ namespace LinBox
             auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
             SYNCH_GROUP({
                 FORBLOCK1D(iter, Ntask, sp,
-                    TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues,VECTORdomains)),{
+                    TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues)),{
                             for(auto j=iter.begin(); j!=iter.end(); ++j)
                             {
-                                solve_with_prime(m_primeiters[j], Iteration, VECTORdomains, VECTORresidues[j]);
+                                solve_with_prime(m_primeiters[j], Iteration,  VECTORresidues[j]);
                             }
                      })
                  );
@@ -247,7 +248,6 @@ namespace LinBox
 }
 
             std::vector<BlasVector<Domain>> VECTORresidues;VECTORresidues.resize(Ntask);
-            std::vector<Domain> VECTORdomains;VECTORdomains.resize(Nthread);
             std::vector<int> m_primeiters;m_primeiters.reserve(Ntask);
 
                 for(auto j=0;j<Ntask;j++){
@@ -258,7 +258,7 @@ namespace LinBox
 
                 }
 
-            compute_task( (this->Builder_), m_primeiters, Iteration,  VECTORdomains, VECTORresidues, Ntask);
+            compute_task( (this->Builder_), m_primeiters, Iteration, VECTORresidues, Ntask);
 
                 for(long i=0; i<Ntask; i++){
                     _commPtr->send(VECTORresidues[i].begin(), VECTORresidues[i].end(), 0, 0);

From 4dccfc3c6adbb0660fb7f304a780e0af60c946eb Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Thu, 4 Jul 2019 17:11:24 +0200
Subject: [PATCH 06/21] Taken off critical section once compiled with
 commentator disabled and no more segmentation fault but still not yet clean
 enough for PR

---
 linbox/algorithms/cra-hybrid.h | 37 ++++++----------------------------
 tests/test-solve-full.C        |  4 ++--
 2 files changed, 8 insertions(+), 33 deletions(-)

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index a74283904e..ca4cd91d9b 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -171,14 +171,11 @@ namespace LinBox
                               ElementContainer& VECTORresidues
                               )
         {
-//std::cout<<"Thread("<<omp_get_thread_num()<<")--------------------------------"<<std::endl;
             VECTORdomains[ omp_get_thread_num()] = Domain(m_primeiter);
-#pragma omp critical
-            //@fixme: The functor Iteration should be marked as critical to allow only one thread a time to call it otherwise a segmentation fault could be caused due to concurrent calls from different threads
-            Iteration(VECTORresidues, VECTORdomains[ omp_get_thread_num()] );
-
+            Domain D(m_primeiter);
+            //@fixme: The commentator within the following function call to other functions will crash if not disable the commentator while compiling 
+            Iteration(VECTORresidues, D );
             VECTORresidues.push_back(m_primeiter);
-
         }
 
 
@@ -188,42 +185,20 @@ namespace LinBox
                           std::vector<ElementContainer>& VECTORresidues, size_t Ntask)
         {
 
-
-#if 1
-            int Nthread = Ntask;
-#pragma omp parallel
-{
-            Nthread=omp_get_num_threads();
-            omp_set_num_threads(Nthread);
-#pragma omp for //num_threads(Nthread) schedule(dynamic,1)
-            for(auto j=0u;j<Ntask;j++)
-                {
-                    solve_with_prime(m_primeiters[j], Iteration, VECTORdomains, VECTORresidues[j]);
-                }
-}
-#else
-/*
         PAR_BLOCK{
             auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
             SYNCH_GROUP({
-                FORBLOCK1D(iter, Ntask, sp,
+                FORBLOCK1D(iter, Ntask, sp,{
                     TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues,VECTORdomains)),{
                             for(auto j=iter.begin(); j!=iter.end(); ++j)
                             {
                                 solve_with_prime(m_primeiters[j], Iteration, VECTORdomains, VECTORresidues[j]);
                             }
                      })
-                 );
+                 });
             });
         }
-*/
-        PAR_BLOCK{
-            auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
-             FOR1D(j, Ntask, sp,{
-                solve_with_prime(m_primeiters[j], Iteration, VECTORdomains[j], VECTORresidues[j]);
-               });
-        }
-#endif
+
         }
 
 
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index ed135eb1ec..2c83f648dd 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -243,7 +243,7 @@ int main(int argc, char** argv)
     method.dispatch = Dispatch::Auto;
     if (dispatchString == "Distributed")
         method.dispatch = Dispatch::Distributed;
-    else if (dispatchString == "Combined")
+    else if (dispatchString == "Combined") //fixme: This multithreaded method could crash if Commentator is not disabled
         method.dispatch = Dispatch::Combined;
     else if (dispatchString == "Sequential")
         method.dispatch = Dispatch::Sequential;
@@ -286,7 +286,7 @@ int main(int argc, char** argv)
         // ----- Rational CRA
         // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails
         ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-        ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
+        // ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
 /*
         //@fixme: Distributed and Combined(ie ybrid) method will not work for QQ field

From fd6ddfcf9dd785c1429736db1cb7835334170994 Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Thu, 4 Jul 2019 17:50:00 +0200
Subject: [PATCH 07/21] Taken off critical section once compiled with
 commentator disabled and cleaned up

---
 linbox/algorithms/cra-hybrid.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index 921ceb5521..8590451e18 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -1,8 +1,4 @@
 /* Copyright (C) 2007 LinBox
- * Updated by Hongguang ZHU
- * Written by bds and zw
- * author: B. David Saunders and Zhendong Wan
- * parallelized for BOINC computing by Bryan Youse
  *
  *
  * ========LICENCE========
@@ -174,7 +170,6 @@ namespace LinBox
             Domain D(m_primeiter);
             //@fixme: The commentator within the following function call to other functions will crash if not disable the commentator while compiling 
             Iteration(VECTORresidues, D );
-
             VECTORresidues.push_back(m_primeiter);
         }
 
@@ -192,7 +187,7 @@ namespace LinBox
                     TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues)),{
                             for(auto j=iter.begin(); j!=iter.end(); ++j)
                             {
-                                solve_with_prime(m_primeiters[j], Iteration,  VECTORresidues[j]);
+                                solve_with_prime(m_primeiters[j], Iteration, VECTORresidues[j]);
                             }
                      })
                  });

From 012bb285be6831fa17a49d33af75e0e70deaf10e Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Fri, 5 Jul 2019 10:52:09 +0200
Subject: [PATCH 08/21] Cleaned up for validation/benchmark on server

---
 linbox/algorithms/cra-hybrid.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index 8590451e18..c2bc8ce6c9 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -20,7 +20,6 @@
  * ========LICENCE========
  */
 
-
 #pragma once
 
 #include <unordered_set>
@@ -36,7 +35,6 @@
 #include "linbox/util/mpicpp.h"
 #include "linbox/util/timer.h"
 
-
 #include <unordered_set>
 #include "linbox/randiter/random-prime.h"
 
@@ -45,7 +43,13 @@
 #if defined(__LINBOX_HAVE_MPI)
 namespace LinBox
 {
-
+    /* 
+     * The MPI standard imposes that if the following program is launched with only 2 processes, multithreading 
+     * will not be available.
+     * Multithreading is only active if launched with more than 2 processes if each process is mapped to one node.
+     * To use multithreading, user needs to set the environment useing export for the number of cores/threads on 
+     * each node if each process is mapped to a node
+     */
 	template<class CRABase>
 	struct HybridChineseRemainder  {
 		typedef typename CRABase::Domain	Domain;
@@ -168,7 +172,7 @@ namespace LinBox
                               )
         {
             Domain D(m_primeiter);
-            //@fixme: The commentator within the following function call to other functions will crash if not disable the commentator while compiling 
+            //@fixme: The commentator within the following function's function call hierarchy will raise a crash if the whole program is not compiled with all commentator disabled
             Iteration(VECTORresidues, D );
             VECTORresidues.push_back(m_primeiter);
         }

From 794f93cd9b5de5e350f99d0a1f6609aa4e807e23 Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Wed, 10 Jul 2019 13:44:35 +0200
Subject: [PATCH 09/21] Use MPI_Wtime for multiprocessing timing

---
 benchmarks/benchmark-dense-solve.C | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C
index af030346b8..9c1f1a3072 100644
--- a/benchmarks/benchmark-dense-solve.C
+++ b/benchmarks/benchmark-dense-solve.C
@@ -84,8 +84,12 @@ void benchmark(std::array<double, 3>& timebits, Arguments& args, MethodBase& met
     DenseMatrix<Field> A(F, args.n, args.n);
     DenseVector<Field> B(F, A.rowdim());
     Timer chrono;
-
+    double t1=0.0,t2=0.0;
     if (method.master()) {
+
+        if(args.dispatchString == "Distributed" || args.dispatchString == "Combined"){
+            t1 = MPI_Wtime();
+        }
         chrono.start();
         PAR_BLOCK { FFLAS::pfrand(F, randIter, args.n, args.n, A.getPointer(), args.n); }
         chrono.stop();
@@ -129,7 +133,10 @@ void benchmark(std::array<double, 3>& timebits, Arguments& args, MethodBase& met
         chrono.stop();
 
         timebits[0] = chrono.usertime();
-        timebits[1] = chrono.realtime();
+        if(args.dispatchString == "Distributed" || args.dispatchString == "Combined"){
+            t2 = MPI_Wtime();
+            timebits[1] = t2 - t1;
+        }else{  timebits[1] = chrono.realtime(); }
         setBitsize(timebits[2], args.q, X);
     }
 }
@@ -209,4 +216,4 @@ int main(int argc, char** argv)
     }
 
     return 0;
-}
\ No newline at end of file
+}

From a849b2cea589ab04b1859638a8f58add61508d0a Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Fri, 12 Jul 2019 09:06:49 +0200
Subject: [PATCH 10/21] Fall back to previous timing method as no big differece

---
 benchmarks/benchmark-dense-solve.C | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C
index 9c1f1a3072..3e1f32ecc3 100644
--- a/benchmarks/benchmark-dense-solve.C
+++ b/benchmarks/benchmark-dense-solve.C
@@ -84,12 +84,9 @@ void benchmark(std::array<double, 3>& timebits, Arguments& args, MethodBase& met
     DenseMatrix<Field> A(F, args.n, args.n);
     DenseVector<Field> B(F, A.rowdim());
     Timer chrono;
-    double t1=0.0,t2=0.0;
+
     if (method.master()) {
 
-        if(args.dispatchString == "Distributed" || args.dispatchString == "Combined"){
-            t1 = MPI_Wtime();
-        }
         chrono.start();
         PAR_BLOCK { FFLAS::pfrand(F, randIter, args.n, args.n, A.getPointer(), args.n); }
         chrono.stop();
@@ -133,10 +130,7 @@ void benchmark(std::array<double, 3>& timebits, Arguments& args, MethodBase& met
         chrono.stop();
 
         timebits[0] = chrono.usertime();
-        if(args.dispatchString == "Distributed" || args.dispatchString == "Combined"){
-            t2 = MPI_Wtime();
-            timebits[1] = t2 - t1;
-        }else{  timebits[1] = chrono.realtime(); }
+        timebits[1] = chrono.realtime();
         setBitsize(timebits[2], args.q, X);
     }
 }

From 124f947fa22ef3cdfbbb857c244ba4b5221b754f Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Thu, 18 Jul 2019 14:40:54 +0200
Subject: [PATCH 11/21] cleaned up before PR

---
 linbox/algorithms/cra-hybrid.h | 42 +++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index c2bc8ce6c9..072b12cca0 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -183,20 +183,20 @@ namespace LinBox
                           Function& Iteration,
                           std::vector<ElementContainer>& VECTORresidues, size_t Ntask)
         {
-
-        PAR_BLOCK{
-            auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
-            SYNCH_GROUP({
-                FORBLOCK1D(iter, Ntask, sp,{
-                    TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues)),{
-                            for(auto j=iter.begin(); j!=iter.end(); ++j)
-                            {
-                                solve_with_prime(m_primeiters[j], Iteration, VECTORresidues[j]);
-                            }
-                     })
-                 });
-            });
-        }
+            //@fixme: cannot use export OMP_NUM_THREADS to set the desired number of threads for more than one iterations and only omp_set_num_thread() can be used for this purpose, whick takes the option -t
+            PAR_BLOCK{
+                auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
+                SYNCH_GROUP({
+                    FORBLOCK1D(iter, Ntask, sp,{
+                        TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues)),{
+                                for(auto j=iter.begin(); j!=iter.end(); ++j)
+                                {
+                                    solve_with_prime(m_primeiters[j], Iteration, VECTORresidues[j]);
+                                }
+                         })
+                     });
+                });
+            }
 
         }
 
@@ -205,26 +205,26 @@ namespace LinBox
         void worker_process_task(Function& Iteration,  Vect &r)
         {
             int Ntask=0;
-            //LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>   gen(_commPtr->rank(),_commPtr->size());
-            LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::DeterministicTag>   gen(_commPtr->rank(),_commPtr->size());
+            LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>   gen(_commPtr->rank(),_commPtr->size());
+            //LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::DeterministicTag>   gen(_commPtr->rank(),_commPtr->size());
             ++gen;
             _commPtr->recv(Ntask, 0);
 
             if(Ntask!=0){
                 std::unordered_set<int> prime_used;
 
-            std::vector<BlasVector<Domain>> VECTORresidues;VECTORresidues.resize(Ntask);
-            std::vector<int> m_primeiters;m_primeiters.reserve(Ntask);
+                std::vector<BlasVector<Domain>> VECTORresidues;VECTORresidues.resize(Ntask);
+                std::vector<int> m_primeiters;m_primeiters.reserve(Ntask);
 
                 for(auto j=0;j<Ntask;j++){
                     ++gen;
-                    while(this->Builder_.noncoprime(*gen) )
+                    while(this->Builder_.noncoprime(*gen) || prime_used.find(*gen)!=prime_used.end())
                         ++gen;
                     m_primeiters.push_back(*gen);
-
+                    prime_used.insert(*gen);
                 }
 
-            compute_task( (this->Builder_), m_primeiters, Iteration, VECTORresidues, Ntask);
+                compute_task( (this->Builder_), m_primeiters, Iteration, VECTORresidues, Ntask);
 
                 for(long i=0; i<Ntask; i++){
                     _commPtr->send(VECTORresidues[i].begin(), VECTORresidues[i].end(), 0, 0);

From 651fcf2656fd047d6da4060b063dd6df7a60e8ab Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Mon, 29 Jul 2019 11:54:04 +0200
Subject: [PATCH 12/21] Added more explicit comments

---
 linbox/algorithms/cra-hybrid.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index 072b12cca0..ba7d55bae4 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -43,10 +43,10 @@
 #if defined(__LINBOX_HAVE_MPI)
 namespace LinBox
 {
-    /* 
+    /*
      * The MPI standard imposes that if the following program is launched with only 2 processes, multithreading 
      * will not be available.
-     * Multithreading is only active if launched with more than 2 processes if each process is mapped to one node.
+     * Multithreading is only active if launched with more than 2 processes while each process is mapped to one node.
      * To use multithreading, user needs to set the environment useing export for the number of cores/threads on 
      * each node if each process is mapped to a node
      */
@@ -127,7 +127,7 @@ namespace LinBox
 		Vect & operator() (Vect& num,  Integer& den, Function& Iteration, PrimeIterator& primeg)
 		{
 			//  if there is no communicator or if there is only one process,
-			//  then proceed normally (without parallel)
+			//  then proceed sequentially (without parallel)
 			if(_commPtr == 0 || _commPtr->size() == 1) {
 
                 RationalChineseRemainder< CRABase > sequential(Builder_);
@@ -148,7 +148,7 @@ namespace LinBox
 		Vect & operator() (Vect& num, Function& Iteration, PrimeIterator& primeg)
 		{
 			//  if there is no communicator or if there is only one process,
-			//  then proceed normally (without parallel)
+			//  then proceed sequentially (without parallel)
 			if(_commPtr == 0 || _commPtr->size() == 1) {
 
                 ChineseRemainder< CRABase > sequential(Builder_);
@@ -172,7 +172,7 @@ namespace LinBox
                               )
         {
             Domain D(m_primeiter);
-            //@fixme: The commentator within the following function's function call hierarchy will raise a crash if the whole program is not compiled with all commentator disabled
+            //@warning: The commentator within the following function's function call hierarchy will raise a crash if the whole program is not compiled with all commentator disabled as the commentator is not thread safe
             Iteration(VECTORresidues, D );
             VECTORresidues.push_back(m_primeiter);
         }
@@ -334,7 +334,7 @@ namespace LinBox
 
             int Niter=this->getNiter();
 
-            //Compute nb of tasks ought to be realized for each process
+            //Compute and send nb of tasks ought to be realized for each worker process
             if(Niter<(procs-1)){
 
                 for(long i=1; i<Niter+1; i++){

From 35529be76dcfa0b5bb047ee6da84a12833cc7ab9 Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Mon, 29 Jul 2019 11:56:50 +0200
Subject: [PATCH 13/21] Cleaned up for code review

---
 linbox/algorithms/cra-hybrid.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index ba7d55bae4..a34cdb047a 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -44,10 +44,10 @@
 namespace LinBox
 {
     /*
-     * The MPI standard imposes that if the following program is launched with only 2 processes, multithreading 
+     * The MPI standard imposes that if the following program is launched with only 2 processes, multithreading
      * will not be available.
      * Multithreading is only active if launched with more than 2 processes while each process is mapped to one node.
-     * To use multithreading, user needs to set the environment useing export for the number of cores/threads on 
+     * To use multithreading, user needs to set the environment useing export for the number of cores/threads on
      * each node if each process is mapped to a node
      */
 	template<class CRABase>

From 888c037edb9aba1b32daab592fd78fce300fd68f Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Mon, 5 Aug 2019 11:55:55 +0200
Subject: [PATCH 14/21] Fast fix for the benchmark-solve-cra compling time
 error as the wrapped MPI Bcast did not take into account of simple bool input
 data

---
 linbox/util/mpicpp.inl | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/linbox/util/mpicpp.inl b/linbox/util/mpicpp.inl
index 203f456fa9..cd602d6b3f 100644
--- a/linbox/util/mpicpp.inl
+++ b/linbox/util/mpicpp.inl
@@ -137,6 +137,13 @@ namespace LinBox {
             unserialize(value, bytes);
         }
     }
+
+    template <> void Communicator::bcast(bool& value, int src)
+    {
+
+        MPI_Bcast(&value, 1, MPI::BOOL, src, _comm);
+
+    }
 }
 
 // Local Variables:

From f79947cc7332cb27367da639684378bcaf8c598a Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Mon, 5 Aug 2019 11:56:50 +0200
Subject: [PATCH 15/21] Fast fix for the benchmark-solve-cra compling time
 error as the wrapped MPI Bcast did not take into account of simple bool input
 data

---
 linbox/util/mpicpp.inl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linbox/util/mpicpp.inl b/linbox/util/mpicpp.inl
index cd602d6b3f..06204058a9 100644
--- a/linbox/util/mpicpp.inl
+++ b/linbox/util/mpicpp.inl
@@ -138,9 +138,9 @@ namespace LinBox {
         }
     }
 
+    //Specialization for Bcast with only one boolean value data
     template <> void Communicator::bcast(bool& value, int src)
     {
-
         MPI_Bcast(&value, 1, MPI::BOOL, src, _comm);
 
     }

From af0c44307614982d5e731958db4275ca2ee3153f Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Mon, 5 Aug 2019 11:56:50 +0200
Subject: [PATCH 16/21] Quick fix for the benchmark-solve-cra compling time
 error as the wrapped MPI Bcast did not take into account of simple bool input
 data

---
 linbox/util/mpicpp.inl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linbox/util/mpicpp.inl b/linbox/util/mpicpp.inl
index cd602d6b3f..06204058a9 100644
--- a/linbox/util/mpicpp.inl
+++ b/linbox/util/mpicpp.inl
@@ -138,9 +138,9 @@ namespace LinBox {
         }
     }
 
+    //Specialization for Bcast with only one boolean value data
     template <> void Communicator::bcast(bool& value, int src)
     {
-
         MPI_Bcast(&value, 1, MPI::BOOL, src, _comm);
 
     }

From d0c279be129a17f4dd57ea44912cc9ac96728da7 Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Mon, 5 Aug 2019 12:20:45 +0200
Subject: [PATCH 17/21] Quick fix for the compiling time error with minpoly
 example program as the communicatorp() method for Blackbox no longer exists

---
 examples/minpoly.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/minpoly.C b/examples/minpoly.C
index 395caa76f9..0e0579071d 100644
--- a/examples/minpoly.C
+++ b/examples/minpoly.C
@@ -75,7 +75,7 @@ int main (int argc, char **argv)
 #ifdef __LINBOX_HAVE_MPI
 		Communicator C(&argc, &argv);
 		process = C.rank();
-		M.communicatorp(&C);
+		//M.communicatorp(&C);
 #endif
 
 		Givaro::ZRing<Integer> ZZ;

From b4ad8ba6ca47ecab7559f6d1ae803ea373473c9c Mon Sep 17 00:00:00 2001
From: Alexis Breust <alexis.breust@gmail.com>
Date: Tue, 20 Aug 2019 10:54:57 +0200
Subject: [PATCH 18/21] As Hadamard Bound is now expressed in log2, reduced
 hybrid iterations.

---
 linbox/algorithms/cra-hybrid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index a34cdb047a..d2fe748d6d 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -68,7 +68,7 @@ namespace LinBox
 		{}
 
 		int getNiter(){
-		    return std::ceil(1.442695040889*HB/(double)(LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>(0,_commPtr->size()).getBits()-1));
+		    return std::ceil(HB/(double)(LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>(0,_commPtr->size()).getBits()-1));
 		}
 
 		/** \brief The CRA loop.

From cae625b47840a40a5a1fd498da48ed30f4ba5b17 Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Wed, 21 Aug 2019 11:33:44 +0200
Subject: [PATCH 19/21] Fall back to previous implementation as the estimation
 for stop using the log of hadamard bound is incorrect and the multithreading
 in the worker process will not use the prime numbers in the same order wheras
 the residues may not be the result of the iteration order through the prime
 number vector

---
 linbox/algorithms/cra-hybrid.h | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index d2fe748d6d..0530b25bbc 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -43,12 +43,17 @@
 #if defined(__LINBOX_HAVE_MPI)
 namespace LinBox
 {
-    /*
+    /* @Warning /!\
      * The MPI standard imposes that if the following program is launched with only 2 processes, multithreading
      * will not be available.
-     * Multithreading is only active if launched with more than 2 processes while each process is mapped to one node.
+     * Multithreading is only active if launched with more than 2 processes while each process is mapped to
+     * one node.
      * To use multithreading, user needs to set the environment useing export for the number of cores/threads on
-     * each node if each process is mapped to a node
+     * each node if each process is mapped to a node however this only works for one run so this will not work
+     * for multiple executions where only the -t option is needed to set the number of threads
+     *
+     * For the execution on several nodes, each node needs to have at least 3 processes mapped otherwise no
+     * multithreading will be available according to the MPI standards
      */
 	template<class CRABase>
 	struct HybridChineseRemainder  {
@@ -68,7 +73,7 @@ namespace LinBox
 		{}
 
 		int getNiter(){
-		    return std::ceil(HB/(double)(LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>(0,_commPtr->size()).getBits()-1));
+		    return std::ceil(1.442695040889*HB/(double)(LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>(0,_commPtr->size()).getBits()-1));
 		}
 
 		/** \brief The CRA loop.
@@ -83,8 +88,6 @@ namespace LinBox
 		 * matrix \c mod \p p.
 		 @warning  we won't detect bad primes.
 		 *
-		 * \param primeg  RandIter object for generating primes.
-		 * \param[out] res an integer
 		 */
 		template<class Function, class PrimeIterator>
 		Integer & operator() (Integer& res, Function& Iteration, PrimeIterator& primeg)
@@ -205,8 +208,8 @@ namespace LinBox
         void worker_process_task(Function& Iteration,  Vect &r)
         {
             int Ntask=0;
-            LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>   gen(_commPtr->rank(),_commPtr->size());
-            //LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::DeterministicTag>   gen(_commPtr->rank(),_commPtr->size());
+            //LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>   gen(_commPtr->rank(),_commPtr->size());
+            LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::DeterministicTag>   gen(_commPtr->rank(),_commPtr->size());
             ++gen;
             _commPtr->recv(Ntask, 0);
 

From b09a5a625f2a0feeb847262876602c458b44f25b Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Wed, 28 Aug 2019 11:39:41 +0200
Subject: [PATCH 20/21] Adopted the FOR1D with MODE

---
 linbox/algorithms/cra-hybrid.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index 0530b25bbc..62babd6afa 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -187,6 +187,7 @@ namespace LinBox
                           std::vector<ElementContainer>& VECTORresidues, size_t Ntask)
         {
             //@fixme: cannot use export OMP_NUM_THREADS to set the desired number of threads for more than one iterations and only omp_set_num_thread() can be used for this purpose, whick takes the option -t
+#if 0
             PAR_BLOCK{
                 auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
                 SYNCH_GROUP({
@@ -200,7 +201,16 @@ namespace LinBox
                      });
                 });
             }
-
+#else
+            PAR_BLOCK{
+                auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
+                SYNCH_GROUP({
+                    FOR1D(iter, Ntask, sp,MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues)),{
+                                    solve_with_prime(m_primeiters[iter], Iteration, VECTORresidues[iter]);
+                     });
+                });
+            }
+#endif
         }
 
 

From 95d1775951a748df0fc34c4b0d52e7e47b06dfba Mon Sep 17 00:00:00 2001
From: ZHG <ZHG2011823@hotmail.com>
Date: Thu, 29 Aug 2019 10:46:01 +0200
Subject: [PATCH 21/21] Taken off the factor of 1.40 for the Niter and cleaned
 up again

---
 benchmarks/benchmark-dense-solve.C |  3 +--
 linbox/algorithms/cra-hybrid.h     | 18 +-----------------
 linbox/solutions/methods.h         |  1 -
 tests/test-solve-full.C            | 16 +++++++---------
 4 files changed, 9 insertions(+), 29 deletions(-)

diff --git a/benchmarks/benchmark-dense-solve.C b/benchmarks/benchmark-dense-solve.C
index 3e1f32ecc3..926ad843be 100644
--- a/benchmarks/benchmark-dense-solve.C
+++ b/benchmarks/benchmark-dense-solve.C
@@ -145,7 +145,7 @@ int main(int argc, char** argv)
                      {'n', "-n", "Set the matrix dimension.", TYPE_INT, &args.n},
                      {'b', "-b", "bit size", TYPE_INT, &args.bits},
                      {'s', "-s", "Seed for randomness.", TYPE_INT, &args.seed},
-                     {'d', "-d", "Dispatch mode (any of: Auto, Sequential, SMP, Distributed).", TYPE_STR, &args.dispatchString},
+                     {'d', "-d", "Dispatch mode (any of: Auto, Sequential, Combined or Distributed).", TYPE_STR, &args.dispatchString},
 		             {'t', "-t", "Number of threads.", TYPE_INT, &numThreads },
                      {'M', "-M",
                       "Choose the solve method (any of: Auto, Elimination, DenseElimination, SparseElimination, "
@@ -173,7 +173,6 @@ int main(int argc, char** argv)
     MethodBase method;
     method.pCommunicator = &communicator;
     if (args.dispatchString == "Sequential")        method.dispatch = Dispatch::Sequential;
-    else if (args.dispatchString == "SMP")          method.dispatch = Dispatch::SMP;
     else if (args.dispatchString == "Distributed")  method.dispatch = Dispatch::Distributed;
     else if (args.dispatchString == "Combined")  method.dispatch = Dispatch::Combined;
     else                                            method.dispatch = Dispatch::Auto;
diff --git a/linbox/algorithms/cra-hybrid.h b/linbox/algorithms/cra-hybrid.h
index 62babd6afa..25dd61410e 100644
--- a/linbox/algorithms/cra-hybrid.h
+++ b/linbox/algorithms/cra-hybrid.h
@@ -73,7 +73,7 @@ namespace LinBox
 		{}
 
 		int getNiter(){
-		    return std::ceil(1.442695040889*HB/(double)(LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>(0,_commPtr->size()).getBits()-1));
+		    return std::ceil(HB/(double)(LinBox::MaskedPrimeIterator<LinBox::IteratorCategories::HeuristicTag>(0,_commPtr->size()).getBits()-1));
 		}
 
 		/** \brief The CRA loop.
@@ -187,21 +187,6 @@ namespace LinBox
                           std::vector<ElementContainer>& VECTORresidues, size_t Ntask)
         {
             //@fixme: cannot use export OMP_NUM_THREADS to set the desired number of threads for more than one iterations and only omp_set_num_thread() can be used for this purpose, whick takes the option -t
-#if 0
-            PAR_BLOCK{
-                auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
-                SYNCH_GROUP({
-                    FORBLOCK1D(iter, Ntask, sp,{
-                        TASK(MODE(CONSTREFERENCE(m_primeiters,Iteration,VECTORresidues)),{
-                                for(auto j=iter.begin(); j!=iter.end(); ++j)
-                                {
-                                    solve_with_prime(m_primeiters[j], Iteration, VECTORresidues[j]);
-                                }
-                         })
-                     });
-                });
-            }
-#else
             PAR_BLOCK{
                 auto sp=SPLITTER(NUM_THREADS,FFLAS::CuttingStrategy::Row,FFLAS::StrategyParameter::Threads);
                 SYNCH_GROUP({
@@ -210,7 +195,6 @@ namespace LinBox
                      });
                 });
             }
-#endif
         }
 
 
diff --git a/linbox/solutions/methods.h b/linbox/solutions/methods.h
index 881de19599..ee75eb0cb0 100644
--- a/linbox/solutions/methods.h
+++ b/linbox/solutions/methods.h
@@ -106,7 +106,6 @@ namespace LinBox {
     enum class Dispatch {
         Auto,        //!< Let implementation decide what to use.
         Sequential,  //!< All sub-computations are done sequentially.
-        SMP,         //!< Use symmetric multiprocessing (Paladin) to do sub-computations.
         Distributed, //!< Use MPI to distribute sub-computations accross nodes.
         Combined,    //!< Use MPI then Paladin on each node.
     };
diff --git a/tests/test-solve-full.C b/tests/test-solve-full.C
index 2c83f648dd..948f3e862a 100644
--- a/tests/test-solve-full.C
+++ b/tests/test-solve-full.C
@@ -229,7 +229,7 @@ int main(int argc, char** argv)
         {'B', "-B", "Vector bit size for rational solve tests (defaults to -b if not specified).", TYPE_INT, &vectorBitSize},
         {'m', "-m", "Row dimension of matrices.", TYPE_INT, &m},
         {'n', "-n", "Column dimension of matrices.", TYPE_INT, &n},
-        {'d', "-d", "Dispatch mode (either Auto, Sequential, SMP or Distributed).", TYPE_STR, &dispatchString},
+        {'d', "-d", "Dispatch mode (either Auto, Sequential, Combined or Distributed).", TYPE_STR, &dispatchString},
         END_OF_ARGUMENTS};
 
     parseArguments(argc, argv, args);
@@ -247,10 +247,8 @@ int main(int argc, char** argv)
         method.dispatch = Dispatch::Combined;
     else if (dispatchString == "Sequential")
         method.dispatch = Dispatch::Sequential;
-    else if (dispatchString == "SMP")
-        method.dispatch = Dispatch::SMP;
     else if (dispatchString != "Auto") {
-        std::cerr << "-d Dispatch mode should be either Auto, Sequential, SMP or Distributed" << std::endl;
+        std::cerr << "-d Dispatch mode should be either Auto, Sequential, Combined or Distributed" << std::endl;
         return EXIT_FAILURE;
     }
 
@@ -272,7 +270,7 @@ int main(int argc, char** argv)
 
     bool ok = true;
     do {
-/*
+
         // ----- Rational Auto
         ok = ok && test_dense_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         ok = ok && test_sparse_solve(Method::Auto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
@@ -282,14 +280,14 @@ int main(int argc, char** argv)
         ok = ok && test_dense_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         ok = ok && test_sparse_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::Auto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-*/
+
         // ----- Rational CRA
         // @fixme @bug When bitSize = 5 and vectorBitSize = 50, CRA fails
         ok = ok && test_dense_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_sparse_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::CRAAuto(method), ZZ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
-/*
-        //@fixme: Distributed and Combined(ie ybrid) method will not work for QQ field
+
+        //@fixme: Distributed and Combined(ie hybrid) method will not work for QQ field
         ok = ok && test_dense_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         ok = ok && test_sparse_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::CRAAuto(method), QQ, QQ, m, n, bitSize, vectorBitSize, seed, verbose);
@@ -359,7 +357,7 @@ int main(int argc, char** argv)
         // ok = ok && test_dense_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
         // ok = ok && test_sparse_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
         // ok = ok && test_blackbox_solve(Method::Coppersmith(method), F, F, m, n, 0, 0, seed, verbose);
-*/
+
         if (!ok) {
             std::cerr << "Failed with seed: " << seed << std::endl;
         }