[sdk] removed limits.cuh in favor of libcu++'s limits

NVIDIA-Genomics-Research · ahehn-nv · Jun 14, 2021 · Jun 14, 2021 · 8be627b0ea8386fcf6c6e34cd5ab15e0edf94f13
commit 8be627b0ea8386fcf6c6e34cd5ab15e0edf94f13
diff --git a/common/base/include/claraparabricks/genomeworks/utils/limits.cuh b/common/base/include/claraparabricks/genomeworks/utils/limits.cuh
diff --git a/cudaaligner/src/batched_device_matrices.cuh b/cudaaligner/src/batched_device_matrices.cuh
@@ -19,7 +19,7 @@
 #include "matrix_cpu.hpp"
 
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 #include <claraparabricks/genomeworks/utils/signed_integer_utils.hpp>
 #include <claraparabricks/genomeworks/utils/device_buffer.hpp>
 #include <claraparabricks/genomeworks/utils/pinned_host_vector.hpp>
@@ -131,7 +131,7 @@ public:
         {
             assert(id < n_matrices_);
             assert(offsets_[id + 1] - offsets_[id] >= 0);
-            assert(offsets_[id + 1] - offsets_[id] <= numeric_limits<int32_t>::max());
+            assert(offsets_[id + 1] - offsets_[id] <= cuda::std::numeric_limits<int32_t>::max());
             return offsets_[id + 1] - offsets_[id];
         }
 

diff --git a/cudaaligner/src/hirschberg_myers_gpu.cu b/cudaaligner/src/hirschberg_myers_gpu.cu
@@ -20,7 +20,7 @@
 #include <claraparabricks/genomeworks/cudaaligner/aligner.hpp>
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
 #include <claraparabricks/genomeworks/utils/mathutils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 #include <cstring>
 
 namespace claraparabricks
@@ -455,7 +455,7 @@ __device__ const char* hirschberg_myers_compute_target_mid_warp(
 
     const int32_t target_size = (target_end - target_begin);
     int32_t midpoint          = 0;
-    nw_score_t cur_min        = numeric_limits<nw_score_t>::max();
+    nw_score_t cur_min        = cuda::std::numeric_limits<nw_score_t>::max();
     for (int32_t t = threadIdx.x; t <= target_size; t += warp_size)
     {
         nw_score_t sum = score(t, 0) + score(target_size - t, 1);

diff --git a/cudaaligner/src/myers_gpu.cu b/cudaaligner/src/myers_gpu.cu
@@ -19,14 +19,13 @@
 
 #include <claraparabricks/genomeworks/cudaaligner/aligner.hpp>
 #include <claraparabricks/genomeworks/utils/signed_integer_utils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
 #include <claraparabricks/genomeworks/utils/mathutils.hpp>
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
 #include <claraparabricks/genomeworks/utils/allocator.hpp>
 #include <claraparabricks/genomeworks/utils/device_buffer.hpp>
 
 #include <cassert>
-#include <climits>
+#include <cuda/std/limits>
 #include <vector>
 #include <numeric>
 #pragma GCC diagnostic push
@@ -429,7 +428,7 @@ __device__ int32_t myers_backtrace_banded(int8_t* path, int32_t* const path_coun
 {
     assert(threadIdx.x == 0);
     using nw_score_t                    = int32_t;
-    GW_CONSTEXPR nw_score_t out_of_band = numeric_limits<nw_score_t>::max() - 1; // -1 to avoid integer overflow further down.
+    GW_CONSTEXPR nw_score_t out_of_band = cuda::std::numeric_limits<nw_score_t>::max() - 1; // -1 to avoid integer overflow further down.
     assert(pv.num_rows() == score.num_rows());
     assert(mv.num_rows() == score.num_rows());
     assert(pv.num_cols() == score.num_cols());

diff --git a/cudaaligner/src/ukkonen_gpu.cu b/cudaaligner/src/ukkonen_gpu.cu
@@ -17,7 +17,7 @@
 #include "ukkonen_gpu.cuh"
 #include "batched_device_matrices.cuh"
 #include <claraparabricks/genomeworks/cudaaligner/cudaaligner.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 
 #include <limits>
 #include <cstdint>
@@ -77,7 +77,7 @@ __launch_bounds__(GW_UKKONEN_MAX_THREADS_PER_BLOCK) // Workaround for a register
     if (id >= n_alignments)
         return;
 
-    GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
+    GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;
 
     int32_t m        = sequence_lengths_d[2 * id] + 1;
     int32_t n        = sequence_lengths_d[2 * id + 1] + 1;
@@ -153,7 +153,7 @@ __launch_bounds__(GW_UKKONEN_MAX_THREADS_PER_BLOCK) // Workaround for a register
 
 __device__ void ukkonen_compute_score_matrix_odd(device_matrix_view<nw_score_t>& scores, int32_t kmax, int32_t k, int32_t m, int32_t n, char const* query, char const* target, int32_t max_target_query_length, int32_t p, int32_t l)
 {
-    GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
+    GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;
     while (k < kmax)
     {
         int32_t const lmin = abs(2 * k + 1 - p);
@@ -173,7 +173,7 @@ __device__ void ukkonen_compute_score_matrix_odd(device_matrix_view<nw_score_t>&
 
 __device__ void ukkonen_compute_score_matrix_even(device_matrix_view<nw_score_t>& scores, int32_t kmax, int32_t k, int32_t m, int32_t n, char const* query, char const* target, int32_t max_target_query_length, int32_t p, int32_t l)
 {
-    GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
+    GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;
     while (k < kmax)
     {
         int32_t const lmin = abs(2 * k - p);
@@ -193,7 +193,7 @@ __device__ void ukkonen_compute_score_matrix_even(device_matrix_view<nw_score_t>
 
 __device__ void ukkonen_init_score_matrix(device_matrix_view<nw_score_t>& scores, int32_t k, int32_t p)
 {
-    GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
+    GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;
     while (k < scores.num_rows())
     {
         for (int32_t l = 0; l < scores.num_cols(); ++l)

diff --git a/cudapoa/src/cudapoa_nw.cuh b/cudapoa/src/cudapoa_nw.cuh
@@ -19,9 +19,9 @@
 #include "cudapoa_structs.cuh"
 
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 
-#include <stdio.h>
+#include <cstdio>
 
 namespace claraparabricks
 {
@@ -172,7 +172,7 @@ __device__ __forceinline__
 
     static_assert(CPT == 4, "implementation currently supports only 4 cells per thread");
 
-    GW_CONSTEXPR ScoreT score_type_min_limit = numeric_limits<ScoreT>::min();
+    GW_CONSTEXPR ScoreT score_type_min_limit = cuda::std::numeric_limits<ScoreT>::min();
 
     int16_t lane_idx = threadIdx.x % WARP_SIZE;
     int64_t score_index;

diff --git a/cudapoa/src/cudapoa_nw_banded.cuh b/cudapoa/src/cudapoa_nw_banded.cuh
@@ -19,9 +19,9 @@
 #include "cudapoa_structs.cuh"
 
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 
-#include <stdio.h>
+#include <cstdio>
 
 namespace claraparabricks
 {
@@ -199,7 +199,7 @@ __device__ __forceinline__
                           int32_t match_score,
                           int32_t rerun)
 {
-    const ScoreT min_score_value = numeric_limits<ScoreT>::min() / 2;
+    constexpr ScoreT min_score_value = cuda::std::numeric_limits<ScoreT>::min() / 2;
 
     int32_t lane_idx = threadIdx.x % WARP_SIZE;
 

diff --git a/cudapoa/src/cudapoa_nw_tb_banded.cuh b/cudapoa/src/cudapoa_nw_tb_banded.cuh
@@ -19,9 +19,9 @@
 #include "cudapoa_structs.cuh"
 
 #include <claraparabricks/genomeworks/utils/cudautils.hpp>
-#include <claraparabricks/genomeworks/utils/limits.cuh>
+#include <cuda/std/limits>
 
-#include <stdio.h>
+#include <cstdio>
 
 namespace claraparabricks
 {
@@ -289,7 +289,7 @@ __device__ __forceinline__
                                    int32_t match_score,
                                    int32_t rerun)
 {
-    const ScoreT min_score_value = numeric_limits<ScoreT>::min() / 2;
+    constexpr ScoreT min_score_value = cuda::std::numeric_limits<ScoreT>::min() / 2;
 
     int32_t lane_idx = threadIdx.x % WARP_SIZE;