Skip to content

Commit

Permalink
initial version
Browse files Browse the repository at this point in the history
  • Loading branch information
drossetti committed Oct 5, 2016
1 parent 5e2eb07 commit e407cdb
Show file tree
Hide file tree
Showing 23 changed files with 4,584 additions and 0 deletions.
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
*~
configure
config.h.in
config.log
Makefile.in
autom4te.cache
aclocal.m4
.libs
.deps
config/*
libgdsync.spec
libtool

# Object files
*.o
*.ko
Expand Down
48 changes: 48 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
CUDA_PATH = @CUDA_PATH@

AM_CPPFLAGS = -I$(srcdir)
AM_CPPFLAGS += -I$(srcdir)/include
AM_CPPFLAGS += -I$(srcdir)/src
AM_CPPFLAGS += -I$(CUDA_PATH)/include
AM_CPPFLAGS += -D__STDC_FORMAT_MACROS

AM_LDFLAGS = -L$(CUDA_PATH)/lib64

lib_LTLIBRARIES = src/libgdsync.la

ACLOCAL_AMFLAGS = -Iconfig
AM_CFLAGS = -g -Wall

EXTRA_DIST = autogen.sh
EXTRA_DIST += scripts/expose_libmlx5_headers/libmlx_expose_headers scripts/expose_libmlx5_headers/defines.txt scripts/expose_libmlx5_headers/structures.txt scripts/expose_libmlx5_headers/enumerations.txt

src_libgdsync_la_CFLAGS = $(AM_CFLAGS)
src_libgdsync_la_SOURCES = src/gdsync.cpp src/memmgr.cpp src/mem.cpp src/objs.cpp src/apis.cpp src/mlx5.cpp
src_libgdsync_la_LDFLAGS = -version-info 1

noinst_HEADERS = src/mem.hpp src/memmgr.hpp src/objs.hpp src/rangeset.hpp src/utils.hpp

# if enabled at configure time

if TEST_ENABLE

bin_PROGRAMS =
noinst_PROGRAMS = tests/gds_kernel_latency tests/rstest

tests_gds_kernel_latency_SOURCES = tests/gds_kernel_latency.c tests/gpu.c tests/gpu_kernels.cu tests/pingpong.c tests/cycles.c
tests_gds_kernel_latency_LDADD = $(top_builddir)/src/libgdsync.la -lgdrapi -lmpi -lcuda -lcudart

tests_rstest_SOURCES = tests/rstest.cpp
tests_rstest_LDADD =


SUFFIXES= .cu

.cu.o:
$(NVCC) $(NVCCFLAGS) -c -o $@ $<


.cu.lo:
$(LIBTOOL) --tag=CC --mode=compile $(NVCC) -o $@ -c $< $(NVCCFLAGS)

endif
53 changes: 53 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
GPUDirect Sync
========

Introduction
===

GPUDirect Sync (aka PeerSync) is all about moving control logic from
third-party devices to the GPU.

The CPU is taken off the control path, replaced by the GPU which is now
able to schedule both computation and network communication tasks
seamlessly. There are substantial improvements for both time-to-solution
(40% less latency) and power-to-solution (45% less CPU load) scenarios.


Requirements
===

This prototype has been tested on RHEL 6.x only.

A recent display driver, i.e. r361, r367 or later, is required.

A recent CUDA Toolkit is required, minimally 8.0, because of the CUDA driver MemOP APIs.

Mellanox OFED 2.5 or newer is required, because of the peer-direct verbs extensions.

The GDRCopy library (https://github.com/drossetti/gdrcopy) is necessary to
create CPU-side user-space mappings of GPU memory, currently used when
allocating a CQ on GPU memory.



Caveats
===

Tests have been done using Mellanox Connect-IB. Any HCA driven by mlx5
driver should work.

Kepler and Maxwell Tesla/Quadro GPUs are required for RDMA.

A special HCA firmware is currently necessary in combination with GPUs
prior to Pascal.


Build
===

Git repository does not include autotools files. The first time the directory
must be configured by running autogen.sh

As an example, the build.sh script is provided. You should modify it
according to the desired destination paths as well as the location
of the dependencies.
7 changes: 7 additions & 0 deletions autogen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/sh -exE

aclocal -I config
libtoolize --force --copy
autoheader
automake --foreign --add-missing --copy
autoconf
21 changes: 21 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash

[ ! -d config ] && mkdir -p config

[ ! -e configure ] && ./autogen.sh

[ ! -d build ] && mkdir build

cd build

if [ ! -e Makefile ]; then
echo "configuring..."
../configure \
--prefix=$PREFIX \
--with-libibverbs=$PREFIX \
--with-cuda=$CUDA \
--with-gdrcopy=$PREFIX \
--with-mpi=$MPI_HOME
fi

make clean all
123 changes: 123 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
dnl Process this file with autoconf to produce a configure script.

AC_PREREQ(2.57)
AC_INIT(libgdsync, 1.0.0, [email protected])
AC_CONFIG_SRCDIR([src/mem.hpp])
AC_CONFIG_AUX_DIR(config)
AC_CONFIG_HEADER(config.h)
AM_INIT_AUTOMAKE([1.10 foreign tar-ustar silent-rules subdir-objects])
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])

AC_PROG_CC
AC_PROG_CXX
AC_GNU_SOURCE
AC_PROG_LN_S
AC_PROG_LIBTOOL

LT_INIT

AC_ARG_ENABLE(
[test],
[AC_HELP_STRING([--enable-test],
[Build test programs (default=no)])],
[enable_test=$enableval],
[enable_test=no])
AM_CONDITIONAL(TEST_ENABLE, test x$enable_test = xyes)

AC_ARG_WITH([libibverbs],
AC_HELP_STRING([--with-libibverbs], [ Set path to libibverbs installation ]))
if test x$with_libibverbs = x || test x$with_libibverbs = xno; then
want_libibverbs=no
else
want_libibverbs=yes
if test -d $with_libibverbs; then
CPPFLAGS="$CPPFLAGS -I$with_libibverbs/include"
LDFLAGS="$LDFLAGS -L$with_libibverbs/lib -L$with_libibverbs/lib64"
fi
fi

AC_ARG_WITH([gdrcopy],
AC_HELP_STRING([--with-gdrcopy], [ Set path to gdrcopy installation ]))
if test x$with_gdrcopy = x || test x$with_gdrcopy = xno; then
want_gdrcopy=no
else
want_gdrcopy=yes
if test -d $with_gdrcopy; then
CPPFLAGS="$CPPFLAGS -I$with_gdrcopy/include"
LDFLAGS="$LDFLAGS -L$with_gdrcopy/lib -L$with_gdrcopy/lib64"
fi
fi

AC_ARG_WITH([mpi],
AC_HELP_STRING([--with-mpi], [ Set path to mpi installation ]))
if test x$with_mpi = x || test x$with_mpi = xno; then
want_mpi=no
else
want_mpi=yes
if test -d $with_mpi; then
MPICC=$with_mpi/bin/mpicc
MPICXX=$with_mpi/bin/mpic++
CPPFLAGS="$CPPFLAGS -I$with_mpi/include"
LDFLAGS="$LDFLAGS -L$with_mpi/lib -L$with_mpi/lib64"
fi
fi

dnl Specify CUDA Location
AC_ARG_WITH(cuda,
AC_HELP_STRING([--with-cuda=CUDADIR], [ Specify CUDA installation directory (default: /usr/local/cuda)]),
[ cuda_home=${withval} ],
[ cuda_home=/usr/local/cuda ]
)

dnl Specify GPU Arch
AC_ARG_ENABLE(gpu-arch,
AC_HELP_STRING([--enable-gpu-arch=arch], [ Set GPU arch: sm_20, sm_21, sm_30, sm_35, sm_50, sm_52 (default: sm_35)]),
[ gpu_arch=${enableval} ],
[ gpu_arch="sm_35" ]
)


dnl Checks for programs
AC_PROG_CC

dnl Checks for header files.
AC_HEADER_STDC

dnl Checks for Verbs support
AC_CHECK_LIB(ibverbs, ibv_get_device_list, [],
AC_MSG_ERROR([ibv_get_device_list() not found. libgdsync requires libibverbs.]))

AC_CHECK_LIB(ibverbs, ibv_register_driver_ext,
AC_MSG_ERROR([ibv_register_driver_ext not found. libgdsync requires verbs extension support.]))

AC_CHECK_HEADER(infiniband/peer_ops.h, [],
AC_MSG_ERROR([<infiniband/peer_ops.h> not found. libgdsync requires verbs peer-direct support.]))
AC_HEADER_STDC

dnl Checks for typedefs, structures, and compiler characteristics.
AC_C_CONST
AC_CHECK_SIZEOF(long)

dnl Output Substitutions
AC_MSG_NOTICE([Setting CUDA_PATH = ${cuda_home} ])
AC_SUBST( CUDA_PATH, [${cuda_home} ])

AC_MSG_NOTICE([Setting GPU_ARCH = ${gpu_arch} ])
AC_SUBST( GPU_ARCH, [${gpu_arch}] )

AC_MSG_NOTICE([Setting MPI_PATH = ${mpi_home} ])
AC_SUBST( MPI_PATH, [${mpi_home} ])
AC_SUBST( MPICC, [${MPICC} ])
AC_SUBST( MPICXX, [${MPICXX} ])

NVCCFLAGS="$NVCCFLAGS"
CUDA_CFLAGS="$CUDA_CFLAGS -arch=${gpu_arch}"
CUDA_CFLAGS="$CUDA_CFLAGS -I$CUDA_PATH/include"
CUDA_LDFLAGS="-L$CUDA_INSTALL_PATH/lib64"
CUDA_LIBS="-lcuda -lcudart -lcufft"
NVCCFLAGS="$NVCCFLAGS $CUDA_CFLAGS $CUDA_LDFLAGS $CUDA_LIBS"
AC_SUBST(NVCC, [nvcc])
AC_SUBST(NVCCFLAGS)

AC_CONFIG_FILES([Makefile libgdsync.spec])
AC_OUTPUT
116 changes: 116 additions & 0 deletions include/gdsync.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#pragma once

/*
* Dependencies & Verbs adaptation layer
*/

#include <infiniband/verbs.h>
#include <infiniband/verbs_exp.h>
#include <infiniband/peer_ops.h>

#define ibv_peer_commit ibv_exp_peer_commit

This comment has been minimized.

Copy link
@haggaie

haggaie Apr 30, 2017

Contributor

Why do you rename the libibverbs function here? If I understand correctly this is a header file the libgdsync user is intended to include, so this will not only rename the functions inside libgdsync, but also for the user.

We try to leave the ibv_* functions for the upstream community (rdma-core) and keep MLNX OFED specific verbs with the experimental prefix (ibv_exp_*) until they are accepted upstream.

This comment has been minimized.

Copy link
@drossetti

drossetti May 5, 2017

Author Contributor

agreed, I'll use official verbs API names.

This comment has been minimized.

Copy link
@drossetti

drossetti May 19, 2017

Author Contributor

tracked by #9

#define ibv_peer_commit_qp ibv_exp_peer_commit_qp

#define ibv_create_qp_ex ibv_exp_create_qp
#define ibv_qp_init_attr_ex ibv_exp_qp_init_attr
#define ibv_create_cq_attr_ex ibv_exp_cq_init_attr

#define IBV_QP_INIT_ATTR_PD IBV_EXP_QP_INIT_ATTR_PD
#define IBV_QP_INIT_ATTR_PEER_DIRECT IBV_EXP_QP_INIT_ATTR_PEER_DIRECT

#define IBV_CREATE_CQ_ATTR_PEER_DIRECT IBV_EXP_CQ_INIT_ATTR_PEER_DIRECT

#define IBV_PEER_OP_FENCE IBV_EXP_PEER_OP_FENCE
#define IBV_PEER_OP_STORE_DWORD IBV_EXP_PEER_OP_STORE_DWORD
#define IBV_PEER_OP_STORE_QWORD IBV_EXP_PEER_OP_STORE_QWORD
#define IBV_PEER_OP_POLL_AND_DWORD IBV_EXP_PEER_OP_POLL_AND_DWORD
#define IBV_PEER_OP_POLL_NOR_DWORD IBV_EXP_PEER_OP_POLL_NOR_DWORD
#define IBV_PEER_OP_POLL_GEQ_DWORD IBV_EXP_PEER_OP_POLL_GEQ_DWORD
#define IBV_PEER_OP_COPY_BLOCK IBV_EXP_PEER_OP_COPY_BLOCK

#define IBV_PEER_OP_FENCE_CAP IBV_EXP_PEER_OP_FENCE_CAP
#define IBV_PEER_OP_STORE_DWORD_CAP IBV_EXP_PEER_OP_STORE_DWORD_CAP
#define IBV_PEER_OP_STORE_QWORD_CAP IBV_EXP_PEER_OP_STORE_QWORD_CAP
#define IBV_PEER_OP_COPY_BLOCK_CAP IBV_EXP_PEER_OP_COPY_BLOCK_CAP
#define IBV_PEER_OP_POLL_AND_DWORD_CAP IBV_EXP_PEER_OP_POLL_AND_DWORD_CAP
#define IBV_PEER_OP_POLL_NOR_DWORD_CAP IBV_EXP_PEER_OP_POLL_NOR_DWORD_CAP

#define IBV_PEER_FENCE_OP_READ IBV_EXP_PEER_FENCE_OP_READ
#define IBV_PEER_FENCE_OP_WRITE IBV_EXP_PEER_FENCE_OP_WRITE
#define IBV_PEER_FENCE_FROM_CPU IBV_EXP_PEER_FENCE_FROM_CPU
#define IBV_PEER_FENCE_FROM_HCA IBV_EXP_PEER_FENCE_FROM_HCA
#define IBV_PEER_FENCE_MEM_SYS IBV_EXP_PEER_FENCE_MEM_SYS
#define IBV_PEER_FENCE_MEM_PEER IBV_EXP_PEER_FENCE_MEM_PEER

#define ibv_peer_direct_attr ibv_exp_peer_direct_attr
#define ibv_peer_direction ibv_exp_peer_direction
#define ibv_peer_op ibv_exp_peer_op

#define IBV_ROLLBACK_ABORT_UNCOMMITED IBV_EXP_ROLLBACK_ABORT_UNCOMMITED
#define IBV_ROLLBACK_ABORT_LATE IBV_EXP_ROLLBACK_ABORT_LATE

#define ibv_rollback_ctx ibv_exp_rollback_ctx
#define ibv_rollback_qp ibv_exp_rollback_qp
#define ibv_peer_peek ibv_exp_peer_peek
#define ibv_peer_peek_cq ibv_exp_peer_peek_cq
#define ibv_peer_abort_peek ibv_exp_peer_abort_peek
#define ibv_peer_abort_peek_cq ibv_exp_peer_abort_peek_cq

#define IBV_PEER_DIRECTION_FROM_CPU IBV_EXP_PEER_DIRECTION_FROM_CPU
#define IBV_PEER_DIRECTION_FROM_HCA IBV_EXP_PEER_DIRECTION_FROM_HCA
#define IBV_PEER_DIRECTION_FROM_PEER IBV_EXP_PEER_DIRECTION_FROM_PEER
#define IBV_PEER_DIRECTION_TO_CPU IBV_EXP_PEER_DIRECTION_TO_CPU
#define IBV_PEER_DIRECTION_TO_HCA IBV_EXP_PEER_DIRECTION_TO_HCA
#define IBV_PEER_DIRECTION_TO_PEER IBV_EXP_PEER_DIRECTION_TO_PEER

#define ibv_peer_buf ibv_exp_peer_buf
#define ibv_peer_buf_alloc_attr ibv_exp_peer_buf_alloc_attr

#define ibv_create_cq_ex_(ctx, attr, n, ch) \
ibv_exp_create_cq(ctx, n, NULL, ch, 0, attr)

#include <cuda.h>
#include <gdrapi.h>

#ifdef __cplusplus
# define GDS_BEGIN_DECLS extern "C" {
# define GDS_END_DECLS }
#else
# define GDS_BEGIN_DECLS
# define GDS_END_DECLS
#endif

GDS_BEGIN_DECLS

#include <gdsync/core.h>
#include <gdsync/tools.h>
#include <gdsync/mlx5.h>

GDS_END_DECLS
Loading

0 comments on commit e407cdb

Please sign in to comment.