diff --git a/.github/workflows/openmp-macos-13.yaml b/.github/workflows/openmp-macos-13.yaml new file mode 100644 index 00000000..4cd576c3 --- /dev/null +++ b/.github/workflows/openmp-macos-13.yaml @@ -0,0 +1,55 @@ +name: openmp-macos-13 +on: + pull_request: + workflow_dispatch: + +jobs: + build: + runs-on: macos-13 + steps: + - uses: actions/checkout@v2 + + - name: configure OS + run: | + # os level stuff + set -x + brew install googletest llvm + + - name: install BLAS++ + run: | + cd .. + git clone https://github.com/icl-utk-edu/blaspp.git + mkdir blaspp-build + cd blaspp-build + cmake \ + -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++ \ + -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=`pwd`/../blaspp-install \ + -Dbuild_tests=OFF \ + ../blaspp + make -j2 install + + - name: install Random123 + run: | + cd .. + git clone https://github.com/DEShawResearch/Random123.git + cd Random123/ + mkdir -p `pwd`/../Random123-install/include + cp -rp include/Random123 `pwd`/../Random123-install/include/ + + - name: build and test RandBLAS (release) + run: | + cd .. + mkdir RandBLAS-build + cd RandBLAS-build + cmake \ + -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++ \ + -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang \ + -DCMAKE_BUILD_TYPE=Release \ + -Dblaspp_DIR=`pwd`/../blaspp-install/lib/cmake/blaspp/ \ + -DRandom123_DIR=`pwd`/../Random123-install/include/ \ + -DCMAKE_INSTALL_PREFIX=`pwd`/../RandBLAS-install \ + ../RandBLAS + make -j2 install + ctest --output-on-failure diff --git a/.github/workflows/openmp-macos.yaml b/.github/workflows/openmp-macos.yaml index bcb29d97..fba16aa6 100644 --- a/.github/workflows/openmp-macos.yaml +++ b/.github/workflows/openmp-macos.yaml @@ -1,4 +1,4 @@ -name: openmp-macos +name: openmp-macos-latest on: pull_request: workflow_dispatch: @@ -22,8 +22,8 @@ jobs: mkdir blaspp-build cd blaspp-build cmake \ - -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++ \ - -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang \ + -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ \ + -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=`pwd`/../blaspp-install \ -Dbuild_tests=OFF \ @@ -44,8 +44,8 @@ jobs: mkdir RandBLAS-build cd RandBLAS-build cmake \ - -DCMAKE_CXX_COMPILER=/usr/local/opt/llvm/bin/clang++ \ - -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang \ + -DCMAKE_CXX_COMPILER=/opt/homebrew/opt/llvm/bin/clang++ \ + -DCMAKE_C_COMPILER=/opt/homebrew/opt/llvm/bin/clang \ -DCMAKE_BUILD_TYPE=Release \ -Dblaspp_DIR=`pwd`/../blaspp-install/lib/cmake/blaspp/ \ -DRandom123_DIR=`pwd`/../Random123-install/include/ \ diff --git a/RandBLAS/dense_skops.hh b/RandBLAS/dense_skops.hh index a79c9549..d84f8914 100644 --- a/RandBLAS/dense_skops.hh +++ b/RandBLAS/dense_skops.hh @@ -332,38 +332,40 @@ static RNGState fill_dense_submat_impl( } randblas_require(n_cols >= n_scols); RNG rng; - typename RNG::ctr_type c = seed.counter; - typename RNG::key_type k = seed.key; + using CTR_t = typename RNG::ctr_type; + using KEY_t = typename RNG::key_type; + CTR_t c = seed.counter; + KEY_t k = seed.key; int64_t pad = 0; // ^ computed such that n_cols+pad is divisible by RNG::static_size - if (n_cols % RNG::ctr_type::static_size != 0) { - pad = RNG::ctr_type::static_size - n_cols % RNG::ctr_type::static_size; + if (n_cols % CTR_t::static_size != 0) { + pad = CTR_t::static_size - n_cols % CTR_t::static_size; } int64_t n_cols_padded = n_cols + pad; - // ^ smallest number of columns, greater than or equal to n_cols, that would be divisible by RNG::ctr_type::static_size + // ^ smallest number of columns, greater than or equal to n_cols, that would be divisible by CTR_t::static_size int64_t ptr_padded = ptr + ptr / n_cols * pad; // ^ ptr corresponding to the padded matrix - int64_t r0_padded = ptr_padded / RNG::ctr_type::static_size; + int64_t r0_padded = ptr_padded / CTR_t::static_size; // ^ starting counter corresponding to ptr_padded - int64_t r1_padded = (ptr_padded + n_scols - 1) / RNG::ctr_type::static_size; + int64_t r1_padded = (ptr_padded + n_scols - 1) / CTR_t::static_size; // ^ ending counter corresponding to ptr of the last element of the row - int64_t ctr_gap = n_cols_padded / RNG::ctr_type::static_size; + int64_t ctr_gap = n_cols_padded / CTR_t::static_size; // ^ number of counters between the first counter of the row to the first counter of the next row; - int64_t s0 = ptr_padded % RNG::ctr_type::static_size; - int64_t e1 = (ptr_padded + n_scols - 1) % RNG::ctr_type::static_size; + int64_t s0 = ptr_padded % CTR_t::static_size; + int64_t e1 = (ptr_padded + n_scols - 1) % CTR_t::static_size; int64_t num_thrds = 1; -#if defined(RandBLAS_HAS_OpenMP) + #if defined(RandBLAS_HAS_OpenMP) #pragma omp parallel { num_thrds = omp_get_num_threads(); } -#endif + #endif //Instead of using thrd_arr just initialize ctr_arr to be zero counters; - typename RNG::ctr_type ctr_arr[num_thrds]; + CTR_t *ctr_arr = new CTR_t[num_thrds]; for (int i = 0; i < num_thrds; i++) { ctr_arr[i] = c; } @@ -381,9 +383,9 @@ static RNGState fill_dense_submat_impl( #pragma omp for for (int row = 0; row < n_srows; row++) { - #if defined(RandBLAS_HAS_OpenMP) - thrd = omp_get_thread_num(); - #endif + #if defined(RandBLAS_HAS_OpenMP) + thrd = omp_get_thread_num(); + #endif ind = 0; r0 = r0_padded + ctr_gap*row; @@ -392,7 +394,7 @@ static RNGState fill_dense_submat_impl( cc.incr(r0 - prev); prev = r0; auto rv = OP::generate(rng, cc, k); - int64_t range = (r1 > r0)? RNG::ctr_type::static_size-1 : e1; + int64_t range = (r1 > r0)? CTR_t::static_size - 1 : e1; for (i = s0; i <= range; i++) { smat[ind + row * lda] = rv[i]; ind++; @@ -403,7 +405,7 @@ static RNGState fill_dense_submat_impl( cc.incr(); prev++; rv = OP::generate(rng, cc, k); - for (i = 0; i < RNG::ctr_type::static_size; i++) { + for (i = 0; i < CTR_t::static_size; i++) { smat[ind + row * lda] = rv[i]; ind++; } @@ -426,12 +428,13 @@ static RNGState fill_dense_submat_impl( } //finds the largest counter in the counter array - typename RNG::ctr_type max_c = ctr_arr[0]; + CTR_t max_c = ctr_arr[0]; for (int i = 1; i < num_thrds; i++) { if (compare_ctr(ctr_arr[i], max_c)) { max_c = ctr_arr[i]; } } + delete [] ctr_arr; max_c.incr(); return RNGState {max_c, k}; diff --git a/test/test_dense_skops/test_construction.cc b/test/test_dense_skops/test_construction.cc index a6005a8a..5520a5c4 100644 --- a/test/test_dense_skops/test_construction.cc +++ b/test/test_dense_skops/test_construction.cc @@ -461,7 +461,7 @@ class TestStateUpdate : public ::testing::Test RandBLAS::DenseDistName dn ) { int total = 0; - int buff[n_rows*n_cols]; + int *buff = new int[n_rows*n_cols]; auto state = RandBLAS::RNGState(key); auto state_copy = RandBLAS::RNGState(key); @@ -486,6 +486,7 @@ class TestStateUpdate : public ::testing::Test } ASSERT_TRUE(total == 0); + delete [] buff; } };