-
Notifications
You must be signed in to change notification settings - Fork 0
/
Makefile.nvcc
75 lines (58 loc) · 2.93 KB
/
Makefile.nvcc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# When building a library with CUDA code we should support diverse scenarios,
# where the CUDA code in the library may be used by:
# - host code in the library itself (i.e. a kernel defined and used within the library)
# - host code in a user program or library (i.e. a kernel defined in the library)
# - CUDA code in a user program or library (i.e. __device__ code in the library)
.PHONY: all clean
BUILD:=build$(suffix $(word 1, $(MAKEFILE_LIST)))
CUDA_BASE=/usr/local/cuda-9.2
CUDA_ARCH=sm_35 sm_50 sm_61
CUDA_CXXFLAGS=$(foreach ARCH,$(CUDA_ARCH),-gencode arch=$(ARCH:sm_%=compute_%),code=$(ARCH))
HOST_CXX=g++
HOST_CXXFLAGS=-fPIC -Wall
CXXFLAGS=-std=c++14 -O2 -I$(CUDA_BASE)/include -I.
LDFLAGS=-L$(BUILD)
CUDA_LDFLAGS=-L$(CUDA_BASE)/lib64/stubs -L$(CUDA_BASE)/lib64 -lcudadevrt -lcudart -lrt -lpthread -ldl
PARTICLE_OBJECTS = $(BUILD)/particle/particle.o $(BUILD)/particle/v3.o
PROPAGATE_OBJECTS = $(BUILD)/propagate/propagate.o
all: $(BUILD)/app
clean:
rm -rf $(BUILD)
$(BUILD)/%.o: %.cc
@mkdir -p $(dir $@)
# compile individual host-only files
@#$(CUDA_BASE)/bin/nvcc -x c++ $(CXXFLAGS) $(CUDA_CXXFLAGS) -ccbin $(HOST_CXX) -Xcompiler '$(HOST_CXXFLAGS)' -c $< -o $@
$(HOST_CXX) $(CXXFLAGS) $(HOST_CXXFLAGS) -c $< -o $@
$(BUILD)/%.o $(BUILD)/%.cuda.o: %.cu
@mkdir -p $(dir $@)
# compile individual host/device files
$(CUDA_BASE)/bin/nvcc -x cu $(CXXFLAGS) $(CUDA_CXXFLAGS) -ccbin $(HOST_CXX) -Xcompiler '$(HOST_CXXFLAGS)' -dc $< -o $@
# extract CUDA fatbin to a separate object file
objcopy -j '.nv*' -j '__nv*' $(BUILD)/$*.o $(BUILD)/$*.cuda.o
$(BUILD)/libparticle.cuda.a: $(PARTICLE_OBJECTS:.o=.cuda.o)
@mkdir -p $(dir $@)
# build a static library with the device code
ar crs $@ $^
$(BUILD)/libparticle.so: $(PARTICLE_OBJECTS)
@mkdir -p $(dir $@)
# build a shared library with the host and device code
@$(eval TMPDIR:=$(shell mktemp -d))
$(CUDA_BASE)/bin/nvcc $(CUDA_CXXFLAGS) -Xcompiler '$(HOST_CXXFLAGS)' -dlink $(filter %.o,$^) -o $(TMPDIR)/$(basename $(@F)).o
$(HOST_CXX) -shared $(HOST_CXXFLAGS) $^ $(TMPDIR)/$(basename $(@F)).o -o $@
@rm -rf $(TMPDIR)
$(BUILD)/libpropagate.cuda.a: $(PROPAGATE_OBJECTS:.o=.cuda.o)
@mkdir -p $(dir $@)
# build a static library with the device code
ar crs $@ $^
$(BUILD)/libpropagate.so: $(PROPAGATE_OBJECTS) $(BUILD)/libparticle.cuda.a $(BUILD)/libparticle.so
@mkdir -p $(dir $@)
# build a shared library with the host and device code
@$(eval TMPDIR:=$(shell mktemp -d))
$(CUDA_BASE)/bin/nvcc $(CUDA_CXXFLAGS) -Xcompiler '$(HOST_CXXFLAGS)' -dlink $(filter %.o,$^) $(LDFLAGS) -lparticle.cuda -o $(TMPDIR)/$(basename $(@F)).o
$(HOST_CXX) -shared $(HOST_CXXFLAGS) $^ $(LDFLAGS) -lparticle $(TMPDIR)/$(basename $(@F)).o -o $@
@rm -rf $(TMPDIR)
# link the main executable
$(BUILD)/app: $(BUILD)/main.o $(BUILD)/libparticle.so $(BUILD)/libpropagate.so
@mkdir -p $(dir $@)
# call the host linker to build the executable
$(HOST_CXX) $(BUILD)/main.o $(LDFLAGS) -lpropagate -lparticle $(CUDA_LDFLAGS) -o $@