Skip to content

Commit

Permalink
support ninja compile
Browse files Browse the repository at this point in the history
  • Loading branch information
gyzhou2000 committed Apr 11, 2024
1 parent 6279fd5 commit 8ae745d
Show file tree
Hide file tree
Showing 5 changed files with 21 additions and 77 deletions.
18 changes: 18 additions & 0 deletions clean.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/bin/bash

# 设置 GammaGL 目录的路径
# GAMMAGL_DIR="/home/zgy/GammaGL"
GAMMAGL_DIR="/home/zgy/operator0309/operator0411/GammaGL"

# 删除 build、dist 和 gammagl.egg-info 目录
rm -rf "$GAMMAGL_DIR/build" "$GAMMAGL_DIR/dist" "$GAMMAGL_DIR/gammagl.egg-info"

# 删除 GammaGL/gammagl/mpops/torch_ext 路径下以 .so 结尾的文件
find "$GAMMAGL_DIR/gammagl/mpops/torch_ext" -name "*.so" -type f -exec rm -f {} +
find "$GAMMAGL_DIR/gammagl/ops" -name "*.so" -type f -exec rm -f {} +
find "$GAMMAGL_DIR/gammagl/mpops/torch_ext" -name "*.pyd" -type f -exec rm -f {} +
find "$GAMMAGL_DIR/gammagl/ops" -name "*.pyd" -type f -exec rm -f {} +

echo "Cleanup completed."

pip uninstall gammagl
32 changes: 0 additions & 32 deletions gammagl/mpops/torch_ext/cuda/segment_max_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -96,18 +96,6 @@ __global__ void arg_segment_max_cuda_forward_kernel(
// }
}
}

// if (thread_idx < numel) {
// int64_t idx = index_data[e];
// scalar_t current_max = out_data[idx * K + k];
// scalar_t current_val = x_data[thread_idx];

// if (current_val == current_max) {
// // atomicCAS(&arg_out_data[idx * K + k], N, e);
// atomicMax(reinterpret_cast<unsigned int*>(&arg_out_data[idx * K + k]),
// static_cast<unsigned int>(e));
// }
// }
}

std::tuple<torch::Tensor, torch::Tensor> segment_max_cuda_forward(
Expand Down Expand Up @@ -145,26 +133,6 @@ std::tuple<torch::Tensor, torch::Tensor> segment_max_cuda_forward(
auto K = x.numel() / x.size(0);
auto stream = at::cuda::getCurrentCUDAStream();

// AT_DISPATCH_ALL_TYPES(x.scalar_type(), "__ops_name", [&] {
// using scalar_t = float; // temporary usage, delete later
// auto x_data = x.data_ptr<scalar_t>();
// auto out_data = out.data_ptr<scalar_t>();
// auto index_data = index.data_ptr<int64_t>();

// segment_max_cuda_forward_kernel<scalar_t>
// <<<BLOCKS(x.numel()), THREADS, 0, stream>>>(
// x_data, index_data, out_data, E, K, N, x.numel());

// // out.masked_fill_(out == std::numeric_limits<int64_t>::lowest(),
// // (scalar_t)0);

// arg_segment_max_cuda_forward_kernel<scalar_t>
// <<<BLOCKS(x.numel()), THREADS, 0, stream>>>(
// x_data, index_data, out_data, arg_out_data, E, K, N, x.numel(),
// out.size(0));
// });


if (x.dtype() == torch::kInt8 || x.dtype() == torch::kInt16 || x.dtype() == torch::kInt32 || x.dtype() == torch::kInt64) {
if (x.dtype() == torch::kInt8){
out.fill_(std::numeric_limits<int8_t>::lowest());
Expand Down
29 changes: 0 additions & 29 deletions gammagl/mpops/torch_ext/cuda/segment_mean_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,6 @@ using torch::autograd::variable_list;
#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS

// inline __device__ void atomic_max_float(float *addr, float value) {
// int *addr_as_i = (int *)addr;
// int old = *addr_as_i;
// int assumed;
// do{
// assumed = old;
// old = atomicCAS(addr_as_i, assumed,
// __float_as_int(max(value, __int_as_float(assumed))));
// } while (assumed != old);
// }

template <typename scalar_t>
__global__ void segment_mean_cuda_forward_kernel(
Expand Down Expand Up @@ -99,25 +89,6 @@ torch::Tensor segment_mean_cuda_forward(
auto K = x.numel() / x.size(0);
auto stream = at::cuda::getCurrentCUDAStream();

// AT_DISPATCH_ALL_TYPES(x.scalar_type(), "__ops_name", [&] {
// using scalar_t = float; // temporary usage, delete later
// auto x_data = x.data_ptr<scalar_t>();
// auto out_data = out.data_ptr<scalar_t>();
// auto index_data = index.data_ptr<int64_t>();

// torch::Tensor count = torch::full_like(out, 0.0, x.options());
// scalar_t *count_data = count.data_ptr<scalar_t>();

// segment_mean_cuda_forward_kernel<scalar_t>
// <<<BLOCKS(x.numel()), THREADS, 0, stream>>>(
// x_data, index_data, out_data, count_data, E, K, N, x.numel());

// arg_segment_mean_cuda_forward_kernel<scalar_t>
// <<<BLOCKS(x.numel()), THREADS, 0, stream>>>(
// x_data, index_data, out_data, arg_out_data, count_data, E, K, N,
// x.numel());
// });

if (x.dtype() == torch::kInt8 || x.dtype() == torch::kInt16 || x.dtype() == torch::kInt32 || x.dtype() == torch::kInt64) {
auto type = x.dtype();
using scalar_t = int;
Expand Down
13 changes: 0 additions & 13 deletions gammagl/mpops/torch_ext/cuda/segment_sum_cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -66,26 +66,13 @@ torch::Tensor segment_sum_cuda_forward(
auto K = x.numel() / x.size(0);
auto stream = at::cuda::getCurrentCUDAStream();

// AT_DISPATCH_ALL_TYPES(x.scalar_type(), "__ops_name", [&] {
// using scalar_t = float; // temporary usage, delete later
// using scalar_t = x.scalar_type(); // temporary usage, delete later
// auto x_data = x.data_ptr<scalar_t>();
// auto out_data = out.data_ptr<scalar_t>();
// auto index_data = index.data_ptr<int64_t>();

// segment_sum_cuda_forward_kernel<scalar_t>
// <<<BLOCKS(x.numel()), THREADS, 0, stream>>>(
// x_data, index_data, out_data, E, K, N, x.numel());
// });

if (x.dtype() == torch::kInt8 || x.dtype() == torch::kInt16 || x.dtype() == torch::kInt32 || x.dtype() == torch::kInt64) {
auto type = x.dtype();
using scalar_t = int;
if (x.dtype() == torch::kInt8 || x.dtype() == torch::kInt16 || x.dtype() == torch::kInt64) {
x = x.to(torch::kInt32);
out = out.to(torch::kInt32);
}
std::cout << x.dtype() << std::endl;
auto x_data = x.data_ptr<scalar_t>();
auto out_data = out.data_ptr<scalar_t>();
auto index_data = index.data_ptr<int64_t>();
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ def load_ops_extensions():
extensions.append(PyCPUExtension(
name=osp.join(ops_dir, f'_{ops_prefix}').replace(osp.sep, "."),
sources=[osp.join(src_dir, f) for f in src_files],
include_dirs=[osp.join('third_party', d) for d in ops_third_party_deps[i]],
include_dirs=[osp.abspath(osp.join('third_party', d)) for d in ops_third_party_deps[i]],
extra_compile_args=['-std=c++17']
))
else:
extensions.append(PyCudaExtension(
name=osp.join(ops_dir, f'_{ops_prefix}_cuda').replace(osp.sep, "."),
sources=[osp.join(src_dir, f) for f in src_files],
include_dirs=[osp.join('third_party', d) for d in ops_third_party_deps[i]],
include_dirs=[ops.abspath(osp.join('third_party', d)) for d in ops_third_party_deps[i]],
extra_compile_args=['-std=c++17']
))

Expand All @@ -115,7 +115,7 @@ def load_extensions():
return extensions

install_requires = ['numpy', 'pandas', 'numba', 'scipy', 'protobuf', 'pyparsing', 'rdkit',
'tensorboardx', 'pytest', 'tensorlayerx', 'rich', 'tqdm', 'pybind11', 'panda']
'tensorboardx', 'pytest', 'tensorlayerx', 'rich', 'tqdm', 'pybind11', 'panda', 'ninja']

classifiers = [
'Development Status :: 3 - Alpha',
Expand Down

0 comments on commit 8ae745d

Please sign in to comment.