diff --git a/pcdet/ops/pointnet2/pointnet2_3DSSD/src/ball_query.cpp b/pcdet/ops/pointnet2/pointnet2_3DSSD/src/ball_query.cpp
index 6c70258..3845592 100644
--- a/pcdet/ops/pointnet2/pointnet2_3DSSD/src/ball_query.cpp
+++ b/pcdet/ops/pointnet2/pointnet2_3DSSD/src/ball_query.cpp
@@ -7,8 +7,8 @@
 
 extern THCState *state;
 
-#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
-#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
+#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
 #define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
 
 int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample, 
@@ -19,7 +19,7 @@ int ball_query_wrapper_fast(int b, int n, int m, float radius, int nsample,
     const float *xyz = xyz_tensor.data<float>();
     int *idx = idx_tensor.data<int>();
     
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     ball_query_kernel_launcher_fast(b, n, m, radius, nsample, new_xyz, xyz, idx, stream);
     return 1;
 }
@@ -33,7 +33,7 @@ int ball_query_dilated_wrapper_fast(int b, int n, int m, float max_radius, float
     const float *xyz = xyz_tensor.data<float>();
     int *idx = idx_tensor.data<int>();
     
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     ball_query_dilated_kernel_launcher_fast(b, n, m, max_radius, min_radius, nsample, new_xyz, xyz, idx, stream);
     return 1;
 }
\ No newline at end of file
diff --git a/pcdet/ops/pointnet2/pointnet2_3DSSD/src/group_points.cpp b/pcdet/ops/pointnet2/pointnet2_3DSSD/src/group_points.cpp
index 6bb577d..430dc77 100644
--- a/pcdet/ops/pointnet2/pointnet2_3DSSD/src/group_points.cpp
+++ b/pcdet/ops/pointnet2/pointnet2_3DSSD/src/group_points.cpp
@@ -15,7 +15,7 @@ int group_points_grad_wrapper_fast(int b, int c, int n, int npoints, int nsample
     const int *idx = idx_tensor.data<int>();
     const float *grad_out = grad_out_tensor.data<float>();
 
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
     group_points_grad_kernel_launcher_fast(b, c, n, npoints, nsample, grad_out, idx, grad_points, stream);
     return 1;
@@ -29,7 +29,7 @@ int group_points_wrapper_fast(int b, int c, int n, int npoints, int nsample,
     const int *idx = idx_tensor.data<int>();
     float *out = out_tensor.data<float>();
 
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
     group_points_kernel_launcher_fast(b, c, n, npoints, nsample, points, idx, out, stream);
     return 1;
diff --git a/pcdet/ops/pointnet2/pointnet2_3DSSD/src/interpolate.cpp b/pcdet/ops/pointnet2/pointnet2_3DSSD/src/interpolate.cpp
index 6c1724c..b50fdbb 100644
--- a/pcdet/ops/pointnet2/pointnet2_3DSSD/src/interpolate.cpp
+++ b/pcdet/ops/pointnet2/pointnet2_3DSSD/src/interpolate.cpp
@@ -18,7 +18,7 @@ void three_nn_wrapper_fast(int b, int n, int m, at::Tensor unknown_tensor,
     float *dist2 = dist2_tensor.data<float>();
     int *idx = idx_tensor.data<int>();
 
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     three_nn_kernel_launcher_fast(b, n, m, unknown, known, dist2, idx, stream);
 }
 
@@ -34,7 +34,7 @@ void three_interpolate_wrapper_fast(int b, int c, int m, int n,
     float *out = out_tensor.data<float>();
     const int *idx = idx_tensor.data<int>();
 
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     three_interpolate_kernel_launcher_fast(b, c, m, n, points, idx, weight, out, stream);
 }
 
@@ -49,6 +49,6 @@ void three_interpolate_grad_wrapper_fast(int b, int c, int n, int m,
     float *grad_points = grad_points_tensor.data<float>();
     const int *idx = idx_tensor.data<int>();
 
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     three_interpolate_grad_kernel_launcher_fast(b, c, n, m, grad_out, idx, weight, grad_points, stream);
 }
\ No newline at end of file
diff --git a/pcdet/ops/pointnet2/pointnet2_3DSSD/src/sampling.cpp b/pcdet/ops/pointnet2/pointnet2_3DSSD/src/sampling.cpp
index f29e15f..f275909 100644
--- a/pcdet/ops/pointnet2/pointnet2_3DSSD/src/sampling.cpp
+++ b/pcdet/ops/pointnet2/pointnet2_3DSSD/src/sampling.cpp
@@ -14,7 +14,7 @@ int gather_points_wrapper_fast(int b, int c, int n, int npoints,
     const int *idx = idx_tensor.data<int>();
     float *out = out_tensor.data<float>();
 
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     gather_points_kernel_launcher_fast(b, c, n, npoints, points, idx, out, stream);
     return 1;
 }
@@ -27,7 +27,7 @@ int gather_points_grad_wrapper_fast(int b, int c, int n, int npoints,
     const int *idx = idx_tensor.data<int>();
     float *grad_points = grad_points_tensor.data<float>();
 
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     gather_points_grad_kernel_launcher_fast(b, c, n, npoints, grad_out, idx, grad_points, stream);
     return 1;
 }
@@ -40,7 +40,7 @@ int furthest_point_sampling_wrapper(int b, int n, int m,
     float *temp = temp_tensor.data<float>();
     int *idx = idx_tensor.data<int>();
 
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     furthest_point_sampling_kernel_launcher(b, n, m, points, temp, idx, stream);
     return 0;
 }
@@ -52,7 +52,7 @@ int furthest_point_sampling_with_dist_wrapper(int b, int n, int m,
     float *temp = temp_tensor.data<float>();
     int *idx = idx_tensor.data<int>();
 
-    cudaStream_t stream = THCState_getCurrentStream(state);
+    cudaStream_t stream = at::cuda::getCurrentCUDAStream();
     furthest_point_sampling_with_dist_kernel_launcher(b, n, m, points, temp, idx, stream);
     return 2;
 }
diff --git a/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d.cpp b/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
index 00edfef..e18fb69 100644
--- a/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
+++ b/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
@@ -11,8 +11,8 @@ All Rights Reserved 2019-2020.
 #include <assert.h>
 
 
-//#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
-//#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
+//#define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
+//#define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
 //#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
 
 
diff --git a/pcdet/version.py b/pcdet/version.py
index 6e26f2c..60850ac 100644
--- a/pcdet/version.py
+++ b/pcdet/version.py
@@ -1 +1 @@
-__version__ = "0.3.0+a7cf536"
+__version__ = "0.3.0+e5d6188"