support ninja compile

BUPT-GAMMA · Apr 11, 2024 · 8ae745d · 8ae745d
1 parent 6279fd5
commit 8ae745d
Show file tree

Hide file tree

Showing 5 changed files with 21 additions and 77 deletions.
diff --git a/clean.sh b/clean.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+# 设置 GammaGL 目录的路径
+# GAMMAGL_DIR="/home/zgy/GammaGL"
+GAMMAGL_DIR="/home/zgy/operator0309/operator0411/GammaGL"
+
+# 删除 build、dist 和 gammagl.egg-info 目录
+rm -rf "$GAMMAGL_DIR/build" "$GAMMAGL_DIR/dist" "$GAMMAGL_DIR/gammagl.egg-info"
+
+# 删除 GammaGL/gammagl/mpops/torch_ext 路径下以 .so 结尾的文件
+find "$GAMMAGL_DIR/gammagl/mpops/torch_ext" -name "*.so" -type f -exec rm -f {} +
+find "$GAMMAGL_DIR/gammagl/ops" -name "*.so" -type f -exec rm -f {} +
+find "$GAMMAGL_DIR/gammagl/mpops/torch_ext" -name "*.pyd" -type f -exec rm -f {} +
+find "$GAMMAGL_DIR/gammagl/ops" -name "*.pyd" -type f -exec rm -f {} +
+
+echo "Cleanup completed."
+
+pip uninstall gammagl
diff --git a/gammagl/mpops/torch_ext/cuda/segment_max_cuda.cu b/gammagl/mpops/torch_ext/cuda/segment_max_cuda.cu
@@ -96,18 +96,6 @@ __global__ void arg_segment_max_cuda_forward_kernel(
       // }
     }
   }
-
-  // if (thread_idx < numel) {
-  //   int64_t idx = index_data[e];
-  //   scalar_t current_max = out_data[idx * K + k];
-  //   scalar_t current_val = x_data[thread_idx];
-
-  //   if (current_val == current_max) {
-  //     // atomicCAS(&arg_out_data[idx * K + k], N, e);
-  //     atomicMax(reinterpret_cast<unsigned int*>(&arg_out_data[idx * K + k]),
-  //     static_cast<unsigned int>(e));
-  //   }
-  // }
 }
 
 std::tuple<torch::Tensor, torch::Tensor> segment_max_cuda_forward(
@@ -145,26 +133,6 @@ std::tuple<torch::Tensor, torch::Tensor> segment_max_cuda_forward(
   auto K = x.numel() / x.size(0);
   auto stream = at::cuda::getCurrentCUDAStream();
 
-  // AT_DISPATCH_ALL_TYPES(x.scalar_type(), "__ops_name",  [&] {
-  // using scalar_t = float;  // temporary usage, delete later
-  // auto x_data = x.data_ptr<scalar_t>();
-  // auto out_data = out.data_ptr<scalar_t>();
-  // auto index_data = index.data_ptr<int64_t>();
-
-  // segment_max_cuda_forward_kernel<scalar_t>
-  //     <<<BLOCKS(x.numel()), THREADS, 0, stream>>>(
-  //         x_data, index_data, out_data, E, K, N, x.numel());
-
-  // // out.masked_fill_(out == std::numeric_limits<int64_t>::lowest(),
-  // // (scalar_t)0);
-
-  // arg_segment_max_cuda_forward_kernel<scalar_t>
-  //     <<<BLOCKS(x.numel()), THREADS, 0, stream>>>(
-  //         x_data, index_data, out_data, arg_out_data, E, K, N, x.numel(),
-  //         out.size(0));
-  // });
-
-
   if (x.dtype() == torch::kInt8 || x.dtype() == torch::kInt16 || x.dtype() == torch::kInt32 || x.dtype() == torch::kInt64) {
     if (x.dtype() == torch::kInt8){
       out.fill_(std::numeric_limits<int8_t>::lowest());

diff --git a/gammagl/mpops/torch_ext/cuda/segment_mean_cuda.cu b/gammagl/mpops/torch_ext/cuda/segment_mean_cuda.cu
@@ -19,16 +19,6 @@ using torch::autograd::variable_list;
 #define THREADS 1024
 #define BLOCKS(N) (N + THREADS - 1) / THREADS
 
-// inline __device__ void atomic_max_float(float *addr, float value) {
-//   int *addr_as_i = (int *)addr;
-//   int old = *addr_as_i;
-//   int assumed;
-//   do{
-//     assumed = old;
-//     old = atomicCAS(addr_as_i, assumed,
-//                     __float_as_int(max(value, __int_as_float(assumed))));
-//   } while (assumed != old);
-// }
 
 template <typename scalar_t>
 __global__ void segment_mean_cuda_forward_kernel(
@@ -99,25 +89,6 @@ torch::Tensor segment_mean_cuda_forward(
   auto K = x.numel() / x.size(0);
   auto stream = at::cuda::getCurrentCUDAStream();
 
-  // AT_DISPATCH_ALL_TYPES(x.scalar_type(), "__ops_name",  [&] {
-  // using scalar_t = float;  // temporary usage, delete later
-  // auto x_data = x.data_ptr<scalar_t>();
-  // auto out_data = out.data_ptr<scalar_t>();
-  // auto index_data = index.data_ptr<int64_t>();
-
-  // torch::Tensor count = torch::full_like(out, 0.0, x.options());
-  // scalar_t *count_data = count.data_ptr<scalar_t>();
-
-  // segment_mean_cuda_forward_kernel<scalar_t>
-  //     <<<BLOCKS(x.numel()), THREADS, 0, stream>>>(
-  //         x_data, index_data, out_data, count_data, E, K, N, x.numel());
-
-  // arg_segment_mean_cuda_forward_kernel<scalar_t>
-  //     <<<BLOCKS(x.numel()), THREADS, 0, stream>>>(
-  //         x_data, index_data, out_data, arg_out_data, count_data, E, K, N,
-  //         x.numel());
-  // });
-
   if (x.dtype() == torch::kInt8 || x.dtype() == torch::kInt16 || x.dtype() == torch::kInt32 || x.dtype() == torch::kInt64) {
     auto type = x.dtype();
     using scalar_t = int;

diff --git a/gammagl/mpops/torch_ext/cuda/segment_sum_cuda.cu b/gammagl/mpops/torch_ext/cuda/segment_sum_cuda.cu
@@ -66,26 +66,13 @@ torch::Tensor segment_sum_cuda_forward(
   auto K = x.numel() / x.size(0);
   auto stream = at::cuda::getCurrentCUDAStream();
 
-  // AT_DISPATCH_ALL_TYPES(x.scalar_type(), "__ops_name",  [&] {
-  // using scalar_t = float;  // temporary usage, delete later
-  // using scalar_t = x.scalar_type();  // temporary usage, delete later
-  // auto x_data = x.data_ptr<scalar_t>();
-  // auto out_data = out.data_ptr<scalar_t>();
-  // auto index_data = index.data_ptr<int64_t>();
-
-  // segment_sum_cuda_forward_kernel<scalar_t>
-  //     <<<BLOCKS(x.numel()), THREADS, 0, stream>>>(
-  //         x_data, index_data, out_data, E, K, N, x.numel());
-  // });
-
   if (x.dtype() == torch::kInt8 || x.dtype() == torch::kInt16 || x.dtype() == torch::kInt32 || x.dtype() == torch::kInt64) {
     auto type = x.dtype();
     using scalar_t = int;
     if (x.dtype() == torch::kInt8 || x.dtype() == torch::kInt16 || x.dtype() == torch::kInt64) {
       x = x.to(torch::kInt32);
       out = out.to(torch::kInt32);
     }
-    std::cout << x.dtype() << std::endl;
     auto x_data = x.data_ptr<scalar_t>();
     auto out_data = out.data_ptr<scalar_t>();
     auto index_data = index.data_ptr<int64_t>();

diff --git a/setup.py b/setup.py
@@ -94,14 +94,14 @@ def load_ops_extensions():
                 extensions.append(PyCPUExtension(
                     name=osp.join(ops_dir, f'_{ops_prefix}').replace(osp.sep, "."),
                     sources=[osp.join(src_dir, f) for f in src_files],
-                    include_dirs=[osp.join('third_party', d) for d in ops_third_party_deps[i]],
+                    include_dirs=[osp.abspath(osp.join('third_party', d)) for d in ops_third_party_deps[i]],
                     extra_compile_args=['-std=c++17']
                 ))
             else:
                 extensions.append(PyCudaExtension(
                     name=osp.join(ops_dir, f'_{ops_prefix}_cuda').replace(osp.sep, "."),
                     sources=[osp.join(src_dir, f) for f in src_files],
-                    include_dirs=[osp.join('third_party', d) for d in ops_third_party_deps[i]],
+                    include_dirs=[ops.abspath(osp.join('third_party', d)) for d in ops_third_party_deps[i]],
                     extra_compile_args=['-std=c++17']
                 ))
 
@@ -115,7 +115,7 @@ def load_extensions():
     return extensions
 
 install_requires = ['numpy', 'pandas', 'numba', 'scipy', 'protobuf', 'pyparsing', 'rdkit',
-                    'tensorboardx', 'pytest', 'tensorlayerx', 'rich', 'tqdm', 'pybind11', 'panda']
+                    'tensorboardx', 'pytest', 'tensorlayerx', 'rich', 'tqdm', 'pybind11', 'panda', 'ninja']
 
 classifiers = [
     'Development Status :: 3 - Alpha',