forked from beiwang2003/strip_clustering_gpu
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
79 lines (67 loc) · 2.54 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
SYSTEMS = $(shell hostname)
COMPILER = intel
#CUDA_PATH should set in the calling shell if CMSSW tools are not used
#tigergpu at princeton
ifneq (,$(findstring tigergpu, $(SYSTEMS)))
#git clone https://github.com/NVlabs/cub.git
CUBROOT=/home/beiwang/clustering/cub-1.8.0
GPUARCH=sm_60
endif
#lnx7188 and steve-t4 at cornell
ifneq (,$(filter lnx7188 steve-t4, $(subst ., ,$(SYSTEMS))))
CUBROOT=../cub-1.8.0
GPUARCH=sm_70
endif
# phi3 at UCSD
ifneq (,$(findstring phi3, $(SYSTEMS)))
GPUARCH=sm_70
endif
# compilers, CUDA, and cub from CMSSW
ifneq (,$(CMSSW_BASE))
CUBROOT := $(shell cd $(CMSSW_BASE) && scram tool tag cub INCLUDE)
CUDA_PATH := $(shell cd $(CMSSW_BASE) && scram tool tag cuda CUDA_BASE)
#CUDALIBDIR := $(shell cd $(CMSSW_BASE) && scram tool tag cuda LIBDIR)
endif
ifeq ($(COMPILER), gnu)
CC = g++
CXXFLAGS += -std=c++17 -O3 -fopenmp -march=native \
-mprefer-vector-width=512 -fopt-info-vec -g \
-I$(CUDA_PATH)/include -I$(CUBROOT) \
-DUSE_GPU -DCACHE_ALLOC #-DNUMA_FT -DOUTPUT -DCPU_DEBUG
LDFLAGS += -std=c++17 -O3 -fopenmp -march=native \
-mprefer-vector-width=512 -fopt-info-vec -g
endif
ifeq ($(COMPILER), intel)
CC = icpc
CXXFLAGS += -std=c++17 -O3 -qopenmp -xHost \
-qopt-zmm-usage=high -qopt-report=5 \
-I$(CUDA_PATH)/include -I$(CUBROOT) -g \
-DNUMA_FT #-DUSE_GPU #-DOUTPUT -DCPU_DEBUG
LDFLAGS += -std=c++17 -O3 -qopenmp -xHost \
-qopt-zmm-usage=high -qopt-report=5 -g
endif
NVCC = nvcc
CUDAFLAGS += -std=c++14 -O3 --default-stream per-thread -arch=$(GPUARCH) \
-I$(CUBROOT) --ptxas-options=-v -lineinfo \
-DCACHE_ALLOC #-DCOPY_ADC -DGPU_TIMER #-DUSE_TEXTURE -DGPU_DEBUG -DCUB_STDERR
# Note: -arch=sm_60 == -gencode=arch=compute_60,code=\"sm_60,compute_60\"
CUDALDFLAGS += -lcudart -L$(CUDA_PATH)/lib64
ifeq ($(COMPILER), intel)
CUDAFLAGS += -ccbin=icpc #specify intel for nvcc host compiler
endif
strip-cluster : strip-cluster.o \
cluster.o clusterGPU.o allocate_host.o allocate_device.o
$(CC) $(LDFLAGS) $(CUDALDFLAGS) -o strip-cluster strip-cluster.o \
cluster.o clusterGPU.o allocate_host.o allocate_device.o
strip-cluster.o: strip-cluster.cc cluster.h
$(CC) $(CXXFLAGS) -o strip-cluster.o -c strip-cluster.cc
cluster.o: cluster.cc cluster.h
$(CC) $(CXXFLAGS) -o cluster.o -c cluster.cc
clusterGPU.o: clusterGPU.cu
$(NVCC) $(CUDAFLAGS) -o clusterGPU.o -c clusterGPU.cu
allocate_host.o: allocate_host.cc
$(CC) $(CXXFLAGS) -o allocate_host.o -c allocate_host.cc
allocate_device.o: allocate_device.cc
$(CC) $(CXXFLAGS) -o allocate_device.o -c allocate_device.cc
clean:
rm -rf strip-cluster *.o *.optrpt