diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml
index 958489e09fd..441be04d842 100644
--- a/.github/workflows/osx.yml
+++ b/.github/workflows/osx.yml
@@ -55,7 +55,7 @@ jobs:
         mkdir install
         export INSTALL_PREFIX=`pwd`/install
         cd build
-        cmake .. -DCMAKE_CXX_FLAGS=-Wpedantic -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }}
+        cmake .. -DCMAKE_PREFIX_PATH=/opt/homebrew/opt/libomp/ -DCMAKE_CXX_FLAGS=-Wpedantic -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }}
         make -j8
         ctest -j10 --output-on-failure
 
diff --git a/.github/workflows/windows-msvc-cuda.yml b/.github/workflows/windows-msvc-cuda.yml
index 1bf6a7bee85..b1df1aaf4ed 100644
--- a/.github/workflows/windows-msvc-cuda.yml
+++ b/.github/workflows/windows-msvc-cuda.yml
@@ -29,7 +29,7 @@ jobs:
         config:
         - {version: "latest", name: "cuda-latest/release/shared", "mixed": "ON"}
     name: msvc/${{ matrix.config.name }} (only compile)
-    runs-on: [windows-latest]
+    runs-on: [windows-2019]
 
     steps:
     - name: Checkout the latest code (shallow clone)
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a812ad44f80..f3cecee4b71 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -633,7 +633,7 @@ warnings:
   variables:
     BUILD_OMP: "ON"
     BUILD_CUDA: "ON"
-    CXX_FLAGS: "-Werror=pedantic;-pedantic-errors"
+    CXX_FLAGS: "-Werror=pedantic -pedantic-errors"
   allow_failure: yes
 
 # Ensure kernel modules do not depend on core
@@ -699,6 +699,11 @@ sonarqube_cov_:
     - .quick_test_short_lived_condition
     - .before_script_template
     - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019
+  tags:
+    - private_ci
+    - controller
+    - cpu
+    - nla-gpu
   script:
     - PR_ID=$(curl -s "https://api.github.com/search/issues?q=sha:${CI_COMMIT_SHA}"
       | jq '.items[0].number')
@@ -730,6 +735,11 @@ sonarqube_cov:
     - .deploy_condition
     - .before_script_template
     - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019
+  tags:
+    - private_ci
+    - controller
+    - cpu
+    - nla-gpu
   script:
     - ctest -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=COVERAGE
       -DGINKGO_SONARQUBE_TEST=ON
diff --git a/BENCHMARKING.md b/BENCHMARKING.md
index a26d8572068..c93704532b5 100644
--- a/BENCHMARKING.md
+++ b/BENCHMARKING.md
@@ -11,22 +11,29 @@ repository](https://github.com/ginkgo-project/ginkgo-data/). These results can
 also be used for performance comparison in order to ensure that you get similar
 performance as what is published on this repository.
 
-To compile the benchmarks, the flag `-GINKGO_BUILD_BENCHMARKS=ON` has to be set
+To compile the benchmarks, the flag `-DGINKGO_BUILD_BENCHMARKS=ON` has to be set
 during the `cmake` step. In addition, the [`ssget` command-line
 utility](https://github.com/ginkgo-project/ssget) has to be installed on the
 system. The purpose of this file is to explain in detail the capacities of this
 benchmarking suite as well as how to properly setup everything.
 
+There are two ways to benchmark Ginkgo. When compiling the benchmark suite,
+executables are generated for collecting matrix statistics, running
+sparse-matrix vector product, solvers (possibly distributed) benchmarks. Another
+way to run benchmarks is through the convenience script `run_all_benchmarks.sh`,
+but not all features are exposed through this tool!
+
 Here is a short description of the content of this file:
 1. Ginkgo setup and best practice guidelines
 2. Installing and using the `ssget` tool to fetch the [SuiteSparse
    matrices](https://sparse.tamu.edu/).
-3. Benchmarking overview and how to run them in a simple way.
-4. How to publish the benchmark results online and use the [Ginkgo Performance
+3. Running benchmarks manually
+4. Benchmarking with the script utility
+5. How to publish the benchmark results online and use the [Ginkgo Performance
    Explorer (GPE)](https://ginkgo-project.github.io/gpe/) for performance
    analysis (optional).
-5. Using the benchmark suite for performance debugging thanks to the loggers.
-6. All available benchmark customization options.
+6. Using the benchmark suite for performance debugging thanks to the loggers.
+7. Available benchmark customization options with the script utility.
 
 
 ### 1: Ginkgo setup and best practice guidelines
@@ -55,13 +62,16 @@ In addition, the following specific options can be considered:
    `overhead` LinOp. If your purpose is to check Ginkgo's overhead, make sure to
    try this mode.
 
+
 ### 2: Using ssget to fetch the matrices
 
-The benchmark suite tests Ginkgo's performance using the [SuiteSparse matrix
-collection](https://sparse.tamu.edu/) and artificially generated matrices. The
-suite sparse collection will be downloaded automatically when the benchmarks are
-run. This is done thanks to the [`ssget` command-line
-utility](https://github.com/ginkgo-project/ssget).
+To benchmark `ginkgo`, matrices need to be provided as input in the `Matrix
+Market` format. A convenient way is to run benchmark with the [SuiteSparse
+matrix collection](https://sparse.tamu.edu/). A helper tool, the [`ssget`
+command-line utility](https://github.com/ginkgo-project/ssget) can be used to
+facilitate downloading and extracting matrices from the suitesparse collection.
+When running the benchmarks with the helper script `run_all_benchmarks.sh` (or
+calling `make benchmark`), the `ssget` tool is required.
 
 To install `ssget`, access the repository and copy the file `ssget` into a
 directory present in your `PATH` variable as per the tool's `README.md`
@@ -107,7 +117,84 @@ for i in $(seq 0 $(ssget -n)); do
 done
 ```
 
-### 3: Benchmarking overview
+### 3: Running benchmarks manually
+When compiling Ginkgo with the flag `-DGINKGO_BUILD_BENCHMARKS=ON`, a suite of
+executables will be generated depending on the CMake configuration. These
+executables are the backbone of the benchmarking suite. Note that all of these
+executables describe the available options and the required input format when
+running them with the `--help` option. All executables have multiple variants
+depending on the precision, by default `double` precision is used for the type
+of values, but variants with `single` and `complex` (single and double) value
+types are also available. Here is a non exhaustive list of the available
+benchmarks:
+
++ `blas/blas`: supports benchmarking many of Ginkgo's BLAS operations: dot
+    products, axpy, copy, etc.
++ `conversion/conversion`: conversion between matrix formats.
++ `matrix_generator/matrix_generator`: mostly allows generating block diagonal
+    matrices (to benchmark the block-jacobi preconditioner).
++ `matrix_statistics/matrix_statistics`: computes size and other matrix
+    statistics (such as variance, load imbalance, ...).
++ `preconditioner/preconditioner`: benchmarks most Ginkgo preconditioner.
++ `solver/solver`: benchmark most of Ginkgo's solvers in a non distributed
+    setting.
++ `sparse_blas/sparse_blas`: benchmarks Sparse BLAS operations, such as SpGEMM,
+    SpGEAM, transpose.
++ `spmv/spmv`: benchmarks Ginkgo's matrix formats (Sparse-Matrix Vector
+    product).
+
+
+Optionally when compiling with MPI support:
++ `blas/distributed/multi_vector`: measures BLAS performance on (distributed)
+    multi-vectors.
++ `solver/distributed/solver`: distributed solver benchmarks.
++ `spmv/distributed/spmv`: distributed matrix Sparse-Matrix Vector (SpMV)
+    product benchmark.
+
+
+All benchmarks require input data as in a `JSON` format. The json file has to
+consist of exactly one array, and within that array the test cases are defined.
+The exact syntax can change between executables, the `--help` option will
+explain the necessary `JSON` input format. For example for the `spmv` benchmark
+case, and many other benchmarks the following minimal input should be provided:
+
+```
+[
+  {
+    "filename": "path/to/your/matrix",
+    "rhs": "path/to/your/rhs"
+  },
+  { ... }
+]
+```
+The files have to be in matrix market format.
+
+Some benchmarks require some extra fields. For example the solver benchmarks
+requires the field `"optimal": {"spmv": "matrix format (such as csr)"}`. This is
+automatically populated when running the `spmv` benchmark which finds the
+optimal (fastest) format among all requested formats.
+
+After writing the necessary data in a JSON file, the benchmark can be called by
+passing in the input via stdin, i.e.
+
+```
+./solver < input.json
+```
+
+The output of our benchmarks is again JSON, and it is printed to stdout, while
+our status messages are printed to stderr. So, the output can be stored with
+
+```
+./solver < input.json > output.json
+```
+
+Note that in most cases, the JSON output by our benchmarks is compatible with
+other benchmarks, therefore it is possible to first call the `spmv` benchmark,
+use the resulting output JSON as input to the `solver` benchmark, and finally
+use the resulting solver JSON output as input to the `preconditioner` benchmark.
+
+
+### 4: Benchmarking overview
 
 The benchmark suite is invoked using the `make benchmark` command in the build
 directory. Under the hood, this command simply calls the script
@@ -169,7 +256,7 @@ benchmark options). Here are the most important options:
     thermal2
     ```
 
-### 4: Publishing the results on Github and analyze the results with the GPE (optional)
+### 5: Publishing the results on Github and analyze the results with the GPE (optional)
 
 The previous experiments generated json files for each matrices, each containing
 timing, iteration count, achieved precision, ... depending on the type of
@@ -223,7 +310,7 @@ For the generating the plots in the GPE, here are the steps to go through:
    tabs allow to access the result of the processed data after invoking the
    processing script.
 
-### 5: Detailed performance analysis and debugging
+### 6: Detailed performance analysis and debugging
 
 Detailed performance analysis can be ran by passing the environment variable
 `DETAILED=1` to the benchmarking script. This detailed run is available for
@@ -233,12 +320,12 @@ log the time taken by all operations. These features are also available in the
 to analyze Ginkgo's performance.
 
 These features are implemented thanks to the loggers located in the file
-`${ginkgo_src_dir}/benchmark/utils/loggers.hpp`. Ginkgo possesses hooks at all important code
-location points which can be inspected thanks to the logger. In this fashion, it
-is easy to use these loggers also for tracking memory allocation sizes and other
-important library aspects.
+`${ginkgo_src_dir}/benchmark/utils/loggers.hpp`. Ginkgo possesses hooks at all
+important code location points which can be inspected thanks to the logger. In
+this fashion, it is easy to use these loggers also for tracking memory
+allocation sizes and other important library aspects.
 
-### 6: Available benchmark options
+### 7: Available benchmark options
 
 There are a set amount of options available for benchmarking. Most important
 options can be configured through the benchmarking script itself thanks to
@@ -311,8 +398,9 @@ The supported environment variables are described in the following list:
 * `SOLVERS_INITIAL_GUESS={rhs,0,random}` - the initial guess generation of the
     solvers. `rhs` uses the right-hand side, `0` uses a zero vector and `random`
     generates a random vector as the initial guess.
-* `DETAILED={0,1}` - selects whether detailed benchmarks should be ran for the
-    solver benchmarks, can be either `0` (off) or `1` (on). The default is `0`.
+* `DETAILED={0,1}` - selects whether detailed benchmarks should be ran. This
+    generally provides extra, verbose information at the cost of one or more
+    extra benchmark runs. It can be either `0` (off) or `1` (on).
 * `GPU_TIMER={true, false}` - If set to `true`, use the gpu timer, which is
     valid for cuda/hip executor, to measure the timing. Default is `false`.
 * `SOLVERS_JACOBI_MAX_BS` - sets the maximum block size for the Jacobi
diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 7fdbe7d4e53..0aa93a3b141 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -91,13 +91,21 @@ function(ginkgo_add_test test_name test_target_name)
     endif()
     set_target_properties(${test_target_name} PROPERTIES OUTPUT_NAME ${test_binary_name})
     if (add_test_MPI_SIZE)
-        add_test(NAME ${REL_BINARY_DIR}/${test_binary_name}
-                 COMMAND
-                     ${MPIEXEC_EXECUTABLE}
-                     ${MPIEXEC_NUMPROC_FLAG}
-                     ${add_test_MPI_SIZE}
-                     "$<TARGET_FILE:${test_target_name}>"
-                 WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
+        if (add_test_MPI_SIZE LESS_EQUAL MPIEXEC_MAX_NUMPROCS)
+            add_test(NAME ${REL_BINARY_DIR}/${test_binary_name}
+                     COMMAND
+                         ${MPIEXEC_EXECUTABLE}
+                         ${MPIEXEC_NUMPROC_FLAG}
+                         ${add_test_MPI_SIZE}
+                         "$<TARGET_FILE:${test_target_name}>"
+                     WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
+        else()
+            message(
+                    WARNING
+                        "Disabling test: ${test_target_name}. Only ${MPIEXEC_MAX_NUMPROCS} "
+                        "MPI processes available but ${add_test_MPI_SIZE} processes required."
+                   )
+        endif()
     else()
         add_test(NAME ${REL_BINARY_DIR}/${test_binary_name}
                  COMMAND ${test_target_name}
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 0ae740b87cf..14ae6ce6592 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -1,7 +1,22 @@
 add_subdirectory(device_hooks) # placeholders for disabled modules
 
-add_library(ginkgo "")
-target_sources(ginkgo
+set(config_source 
+    config/factorization_config.cpp
+    config/multigrid_config.cpp
+    config/preconditioner_config.cpp
+    config/registry.cpp
+    config/solver_config.cpp
+)
+# MSVC: To solve LNK1189, we separate the library as a workaround
+# To make ginkgo still be the major library, we make the original to ginkgo_core in MSVC/shared
+# TODO: should think another way to solve it like dllexport or def file
+set(ginkgo_core "ginkgo")
+if(MSVC AND BUILD_SHARED_LIBS)
+    set(ginkgo_core "ginkgo_core")
+endif()
+
+add_library(${ginkgo_core} "")
+target_sources(${ginkgo_core}
     PRIVATE
     base/array.cpp
     base/batch_multi_vector.cpp
@@ -21,12 +36,7 @@ target_sources(ginkgo
     base/version.cpp
     config/config.cpp
     config/config_helper.cpp
-    config/factorization_config.cpp
-    config/preconditioner_config.cpp
     config/property_tree.cpp
-    config/registry.cpp
-    config/multigrid_config.cpp
-    config/solver_config.cpp
     config/stop_config.cpp
     config/type_descriptor.cpp
     distributed/index_map.cpp
@@ -108,15 +118,15 @@ target_sources(ginkgo
     )
 
 if(GINKGO_HAVE_PAPI_SDE)
-    target_sources(ginkgo PRIVATE log/papi.cpp)
+    target_sources(${ginkgo_core} PRIVATE log/papi.cpp)
 endif()
 
 if(GINKGO_HAVE_METIS)
-    target_sources(ginkgo PRIVATE reorder/nested_dissection.cpp)
+    target_sources(${ginkgo_core} PRIVATE reorder/nested_dissection.cpp)
 endif()
 
 if(GINKGO_BUILD_MPI)
-    target_sources(ginkgo
+    target_sources(${ginkgo_core}
         PRIVATE
         mpi/exception.cpp
         distributed/matrix.cpp
@@ -125,38 +135,55 @@ if(GINKGO_BUILD_MPI)
         distributed/preconditioner/schwarz.cpp)
 endif()
 
-ginkgo_compile_features(ginkgo)
+# MSVC/shared: make ginkgo be the major library
+if(MSVC AND BUILD_SHARED_LIBS)
+    add_library(ginkgo "")
+    target_sources(ginkgo PRIVATE ${config_source})
+    ginkgo_compile_features(ginkgo)
+    ginkgo_default_includes(ginkgo)
+    ginkgo_install_library(ginkgo)
+    if(GINKGO_CHECK_CIRCULAR_DEPS)
+        ginkgo_check_headers(ginkgo "")
+    endif()
+else() 
+    target_sources(${ginkgo_core} PRIVATE ${config_source})
+endif()
+
+ginkgo_compile_features(${ginkgo_core})
 
 # add a namespace alias so Ginkgo can always be included as Ginkgo::ginkgo
 # regardless of whether it is installed or added as a subdirectory
 add_library(Ginkgo::ginkgo ALIAS ginkgo)
-target_link_libraries(ginkgo
+if(MSVC AND BUILD_SHARED_LIBS)
+    target_link_libraries(ginkgo PUBLIC ${ginkgo_core})
+endif()
+target_link_libraries(${ginkgo_core}
     PUBLIC ginkgo_device ginkgo_omp ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp)
 if(GINKGO_HAVE_PAPI_SDE)
-    target_link_libraries(ginkgo PUBLIC PAPI::PAPI_SDE)
+    target_link_libraries(${ginkgo_core} PUBLIC PAPI::PAPI_SDE)
 endif()
 
 if(GINKGO_HAVE_TAU)
-    target_link_libraries(ginkgo PRIVATE perfstubs)
+    target_link_libraries(${ginkgo_core} PRIVATE perfstubs)
 endif()
 
 if(GINKGO_HAVE_VTUNE)
-    target_link_libraries(ginkgo PRIVATE VTune::ITT)
+    target_link_libraries(${ginkgo_core} PRIVATE VTune::ITT)
 endif()
 
 if(GINKGO_HAVE_METIS)
-    target_link_libraries(ginkgo PRIVATE METIS::METIS)
+    target_link_libraries(${ginkgo_core} PRIVATE METIS::METIS)
 endif()
 
 if(GINKGO_BUILD_MPI)
-    target_link_libraries(ginkgo PUBLIC MPI::MPI_CXX)
+    target_link_libraries(${ginkgo_core} PUBLIC MPI::MPI_CXX)
 endif()
 
-ginkgo_default_includes(ginkgo)
-ginkgo_install_library(ginkgo)
+ginkgo_default_includes(${ginkgo_core})
+ginkgo_install_library(${ginkgo_core})
 
 if(GINKGO_CHECK_CIRCULAR_DEPS)
-    ginkgo_check_headers(ginkgo "")
+    ginkgo_check_headers(${ginkgo_core} "")
 endif()
 
 if(GINKGO_BUILD_TESTS)
@@ -165,7 +192,7 @@ endif()
 
 if(GINKGO_DEVEL_TOOLS AND CMAKE_SYSTEM_NAME STREQUAL "Linux" AND BUILD_SHARED_LIBS)
     # Copy pretty-printer next to library
-    add_custom_command(TARGET ginkgo POST_BUILD
+    add_custom_command(TARGET ${ginkgo_core} POST_BUILD
         COMMAND "${CMAKE_COMMAND}" -E copy
         "${Ginkgo_SOURCE_DIR}/dev_tools/scripts/gdb-ginkgo.py"
         "$<TARGET_FILE:ginkgo>-gdb.py"
diff --git a/core/reorder/amd.cpp b/core/reorder/amd.cpp
index 79b44abf536..1b3198b248f 100644
--- a/core/reorder/amd.cpp
+++ b/core/reorder/amd.cpp
@@ -157,7 +157,8 @@ std::unique_ptr<LinOp> Amd<IndexType>::generate_impl(
     // row workspace
     const auto col_idxs_plus_workspace_size = nnz + nnz / 5 + 2 * num_rows;
     array<IndexType> col_idxs_plus_workspace{
-        host_exec, col_idxs_plus_workspace_size + 6 * num_rows};
+        host_exec,
+        static_cast<size_type>(col_idxs_plus_workspace_size + 6 * num_rows)};
     host_exec->copy_from(exec, nnz, pattern->get_const_col_idxs(),
                          col_idxs_plus_workspace.get_data());
 
diff --git a/core/test/gtest/CMakeLists.txt b/core/test/gtest/CMakeLists.txt
index cdfc67fafdf..56f83181375 100644
--- a/core/test/gtest/CMakeLists.txt
+++ b/core/test/gtest/CMakeLists.txt
@@ -9,6 +9,12 @@ function(add_gtest_main suffix definitions)
         target_compile_definitions(ginkgo_gtest_main_mpi${suffix} PRIVATE ${definitions})
         ginkgo_compile_features(ginkgo_gtest_main_mpi${suffix})
     endif()
+    if (GINKGO_BUILD_OMP)
+        target_link_libraries(ginkgo_gtest_main${suffix} PUBLIC OpenMP::OpenMP_CXX)
+        if (GINKGO_BUILD_MPI)
+            target_link_libraries(ginkgo_gtest_main_mpi${suffix} PUBLIC OpenMP::OpenMP_CXX)
+        endif()
+    endif()
 endfunction()
 
 add_gtest_main("" "")
diff --git a/third_party/gtest/CMakeLists.txt b/third_party/gtest/CMakeLists.txt
index 378a7cdc705..fb0407ba215 100644
--- a/third_party/gtest/CMakeLists.txt
+++ b/third_party/gtest/CMakeLists.txt
@@ -3,7 +3,7 @@ include(FetchContent)
 FetchContent_Declare(
     googletest
     GIT_REPOSITORY https://github.com/google/googletest.git
-    GIT_TAG        release-1.12.1
+    GIT_TAG        v1.14.0
 )
 # need to set the variables in CACHE due to CMP0077
 set(gtest_disable_pthreads ON CACHE INTERNAL "")
diff --git a/third_party/nlohmann_json/CMakeLists.txt b/third_party/nlohmann_json/CMakeLists.txt
index 6f413e458b9..dbc43660bbe 100644
--- a/third_party/nlohmann_json/CMakeLists.txt
+++ b/third_party/nlohmann_json/CMakeLists.txt
@@ -3,7 +3,7 @@ include(FetchContent)
 FetchContent_Declare(
     nlohmann_json
     GIT_REPOSITORY https://github.com/nlohmann/json.git
-    GIT_TAG        v3.9.1
+    GIT_TAG        v3.11.3
 )
 set(JSON_BuildTests OFF CACHE INTERNAL "")
 set(JSON_Install OFF CACHE INTERNAL "")