Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[sdk] removed limits.cuh in favor of libcu++'s limits #654

Draft
wants to merge 1 commit into
base: dev
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
[sdk] removed limits.cuh in favor of libcu++'s limits
ahehn-nv committed Jun 14, 2021
commit 8be627b0ea8386fcf6c6e34cd5ab15e0edf94f13
52 changes: 0 additions & 52 deletions common/base/include/claraparabricks/genomeworks/utils/limits.cuh

This file was deleted.

4 changes: 2 additions & 2 deletions cudaaligner/src/batched_device_matrices.cuh
Original file line number Diff line number Diff line change
@@ -19,7 +19,7 @@
#include "matrix_cpu.hpp"

#include <claraparabricks/genomeworks/utils/cudautils.hpp>
#include <claraparabricks/genomeworks/utils/limits.cuh>
#include <cuda/std/limits>
#include <claraparabricks/genomeworks/utils/signed_integer_utils.hpp>
#include <claraparabricks/genomeworks/utils/device_buffer.hpp>
#include <claraparabricks/genomeworks/utils/pinned_host_vector.hpp>
@@ -131,7 +131,7 @@ public:
{
assert(id < n_matrices_);
assert(offsets_[id + 1] - offsets_[id] >= 0);
assert(offsets_[id + 1] - offsets_[id] <= numeric_limits<int32_t>::max());
assert(offsets_[id + 1] - offsets_[id] <= cuda::std::numeric_limits<int32_t>::max());
return offsets_[id + 1] - offsets_[id];
}

4 changes: 2 additions & 2 deletions cudaaligner/src/hirschberg_myers_gpu.cu
Original file line number Diff line number Diff line change
@@ -20,7 +20,7 @@
#include <claraparabricks/genomeworks/cudaaligner/aligner.hpp>
#include <claraparabricks/genomeworks/utils/cudautils.hpp>
#include <claraparabricks/genomeworks/utils/mathutils.hpp>
#include <claraparabricks/genomeworks/utils/limits.cuh>
#include <cuda/std/limits>
#include <cstring>

namespace claraparabricks
@@ -455,7 +455,7 @@ __device__ const char* hirschberg_myers_compute_target_mid_warp(

const int32_t target_size = (target_end - target_begin);
int32_t midpoint = 0;
nw_score_t cur_min = numeric_limits<nw_score_t>::max();
nw_score_t cur_min = cuda::std::numeric_limits<nw_score_t>::max();
for (int32_t t = threadIdx.x; t <= target_size; t += warp_size)
{
nw_score_t sum = score(t, 0) + score(target_size - t, 1);
5 changes: 2 additions & 3 deletions cudaaligner/src/myers_gpu.cu
Original file line number Diff line number Diff line change
@@ -19,14 +19,13 @@

#include <claraparabricks/genomeworks/cudaaligner/aligner.hpp>
#include <claraparabricks/genomeworks/utils/signed_integer_utils.hpp>
#include <claraparabricks/genomeworks/utils/limits.cuh>
#include <claraparabricks/genomeworks/utils/mathutils.hpp>
#include <claraparabricks/genomeworks/utils/cudautils.hpp>
#include <claraparabricks/genomeworks/utils/allocator.hpp>
#include <claraparabricks/genomeworks/utils/device_buffer.hpp>

#include <cassert>
#include <climits>
#include <cuda/std/limits>
#include <vector>
#include <numeric>
#pragma GCC diagnostic push
@@ -429,7 +428,7 @@ __device__ int32_t myers_backtrace_banded(int8_t* path, int32_t* const path_coun
{
assert(threadIdx.x == 0);
using nw_score_t = int32_t;
GW_CONSTEXPR nw_score_t out_of_band = numeric_limits<nw_score_t>::max() - 1; // -1 to avoid integer overflow further down.
GW_CONSTEXPR nw_score_t out_of_band = cuda::std::numeric_limits<nw_score_t>::max() - 1; // -1 to avoid integer overflow further down.
assert(pv.num_rows() == score.num_rows());
assert(mv.num_rows() == score.num_rows());
assert(pv.num_cols() == score.num_cols());
10 changes: 5 additions & 5 deletions cudaaligner/src/ukkonen_gpu.cu
Original file line number Diff line number Diff line change
@@ -17,7 +17,7 @@
#include "ukkonen_gpu.cuh"
#include "batched_device_matrices.cuh"
#include <claraparabricks/genomeworks/cudaaligner/cudaaligner.hpp>
#include <claraparabricks/genomeworks/utils/limits.cuh>
#include <cuda/std/limits>

#include <limits>
#include <cstdint>
@@ -77,7 +77,7 @@ __launch_bounds__(GW_UKKONEN_MAX_THREADS_PER_BLOCK) // Workaround for a register
if (id >= n_alignments)
return;

GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;

int32_t m = sequence_lengths_d[2 * id] + 1;
int32_t n = sequence_lengths_d[2 * id + 1] + 1;
@@ -153,7 +153,7 @@ __launch_bounds__(GW_UKKONEN_MAX_THREADS_PER_BLOCK) // Workaround for a register

__device__ void ukkonen_compute_score_matrix_odd(device_matrix_view<nw_score_t>& scores, int32_t kmax, int32_t k, int32_t m, int32_t n, char const* query, char const* target, int32_t max_target_query_length, int32_t p, int32_t l)
{
GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;
while (k < kmax)
{
int32_t const lmin = abs(2 * k + 1 - p);
@@ -173,7 +173,7 @@ __device__ void ukkonen_compute_score_matrix_odd(device_matrix_view<nw_score_t>&

__device__ void ukkonen_compute_score_matrix_even(device_matrix_view<nw_score_t>& scores, int32_t kmax, int32_t k, int32_t m, int32_t n, char const* query, char const* target, int32_t max_target_query_length, int32_t p, int32_t l)
{
GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;
while (k < kmax)
{
int32_t const lmin = abs(2 * k - p);
@@ -193,7 +193,7 @@ __device__ void ukkonen_compute_score_matrix_even(device_matrix_view<nw_score_t>

__device__ void ukkonen_init_score_matrix(device_matrix_view<nw_score_t>& scores, int32_t k, int32_t p)
{
GW_CONSTEXPR nw_score_t max = numeric_limits<nw_score_t>::max() - 1;
GW_CONSTEXPR nw_score_t max = cuda::std::numeric_limits<nw_score_t>::max() - 1;
while (k < scores.num_rows())
{
for (int32_t l = 0; l < scores.num_cols(); ++l)
6 changes: 3 additions & 3 deletions cudapoa/src/cudapoa_nw.cuh
Original file line number Diff line number Diff line change
@@ -19,9 +19,9 @@
#include "cudapoa_structs.cuh"

#include <claraparabricks/genomeworks/utils/cudautils.hpp>
#include <claraparabricks/genomeworks/utils/limits.cuh>
#include <cuda/std/limits>

#include <stdio.h>
#include <cstdio>

namespace claraparabricks
{
@@ -172,7 +172,7 @@ __device__ __forceinline__

static_assert(CPT == 4, "implementation currently supports only 4 cells per thread");

GW_CONSTEXPR ScoreT score_type_min_limit = numeric_limits<ScoreT>::min();
GW_CONSTEXPR ScoreT score_type_min_limit = cuda::std::numeric_limits<ScoreT>::min();

int16_t lane_idx = threadIdx.x % WARP_SIZE;
int64_t score_index;
6 changes: 3 additions & 3 deletions cudapoa/src/cudapoa_nw_banded.cuh
Original file line number Diff line number Diff line change
@@ -19,9 +19,9 @@
#include "cudapoa_structs.cuh"

#include <claraparabricks/genomeworks/utils/cudautils.hpp>
#include <claraparabricks/genomeworks/utils/limits.cuh>
#include <cuda/std/limits>

#include <stdio.h>
#include <cstdio>

namespace claraparabricks
{
@@ -199,7 +199,7 @@ __device__ __forceinline__
int32_t match_score,
int32_t rerun)
{
const ScoreT min_score_value = numeric_limits<ScoreT>::min() / 2;
constexpr ScoreT min_score_value = cuda::std::numeric_limits<ScoreT>::min() / 2;

int32_t lane_idx = threadIdx.x % WARP_SIZE;

6 changes: 3 additions & 3 deletions cudapoa/src/cudapoa_nw_tb_banded.cuh
Original file line number Diff line number Diff line change
@@ -19,9 +19,9 @@
#include "cudapoa_structs.cuh"

#include <claraparabricks/genomeworks/utils/cudautils.hpp>
#include <claraparabricks/genomeworks/utils/limits.cuh>
#include <cuda/std/limits>

#include <stdio.h>
#include <cstdio>

namespace claraparabricks
{
@@ -289,7 +289,7 @@ __device__ __forceinline__
int32_t match_score,
int32_t rerun)
{
const ScoreT min_score_value = numeric_limits<ScoreT>::min() / 2;
constexpr ScoreT min_score_value = cuda::std::numeric_limits<ScoreT>::min() / 2;

int32_t lane_idx = threadIdx.x % WARP_SIZE;