From 87cc55c1a5c70a034cd9d2838a4552c7715fde3a Mon Sep 17 00:00:00 2001 From: ksang Date: Tue, 16 May 2017 17:34:18 +0800 Subject: [PATCH 1/3] remove duplicate definition in tests --- tests/test_utils.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_utils.h b/tests/test_utils.h index 618ffac..707ef26 100644 --- a/tests/test_utils.h +++ b/tests/test_utils.h @@ -1,5 +1,5 @@ #pragma once - +/* #ifndef USE_PROF struct prof { }; @@ -14,7 +14,7 @@ static inline int prof_enabled(struct prof *p) { return 0; } static inline void prof_disable(struct prof *p) {} static inline void prof_reset(struct prof *p) {} #endif - +*/ typedef int64_t gds_us_t; static inline gds_us_t gds_get_time_us() { @@ -43,9 +43,9 @@ static void gds_cpu_relax(void) } static void gds_wmb(void) __attribute__((unused)) ; -static void gds_wmb(void) +static void gds_wmb(void) { - asm volatile("sync") ; + asm volatile("sync") ; } #else #error "platform not supported" From b6b2313f73840b8f02908e1ce9c2c9437a532368 Mon Sep 17 00:00:00 2001 From: ksang Date: Thu, 18 May 2017 18:07:28 +0800 Subject: [PATCH 2/3] make test working with RoCE --- tests/gds_kernel_latency.c | 56 ++++++++++++++++++++--------- tests/gds_kernel_loopback_latency.c | 42 ++++++++++++++-------- 2 files changed, 67 insertions(+), 31 deletions(-) diff --git a/tests/gds_kernel_latency.c b/tests/gds_kernel_latency.c index 7641280..6929641 100644 --- a/tests/gds_kernel_latency.c +++ b/tests/gds_kernel_latency.c @@ -1,9 +1,9 @@ /* * GPUDirect Async latency benchmark - * + * * * based on OFED libibverbs ud_pingpong test. - * minimally changed to use MPI for bootstrapping, + * minimally changed to use MPI for bootstrapping, */ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. @@ -193,8 +193,22 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, ctx->ah = ibv_create_ah(ctx->pd, &ah_attr); if (!ctx->ah) { - fprintf(stderr, "Failed to create AH\n"); - return 1; + union ibv_gid dgid; + if (ibv_query_gid(ctx->context, port, 0, &dgid)) { + fprintf(stderr, "Failed to query interface gid\n"); + return 1; + } + + ah_attr.is_global = 1; + ah_attr.grh.hop_limit = 1; + ah_attr.grh.dgid = dgid; + ah_attr.grh.sgid_index = 0; + + ctx->ah = ibv_create_ah(ctx->pd, &ah_attr); + if (!ctx->ah) { + fprintf(stderr, "Failed to create AH\n"); + return 1; + } } return 0; @@ -249,7 +263,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, ctx->rx_flag = memalign(page_size, alloc_size); if (!ctx->rx_flag) { - fprintf(stderr, "Couldn't allocate rx_flag buf\n"); + fprintf(stderr, "Couldn't allocate rx_flag buf\n"); goto clean_ctx; } @@ -367,8 +381,8 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, clean_buffer: if (ctx->gpu_id >= 0) - gpu_free(ctx->buf); - else + gpu_free(ctx->buf); + else free(ctx->buf); clean_ctx: @@ -408,8 +422,8 @@ int pp_close_ctx(struct pingpong_context *ctx) } if (ctx->gpu_id >= 0) - gpu_free(ctx->buf); - else + gpu_free(ctx->buf); + else free(ctx->buf); if (ctx->gpu_id >= 0) @@ -509,7 +523,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin posted_recv = pp_post_recv(ctx, n_posts); if (posted_recv < 0) { - fprintf(stderr,"ERROR: can't post recv (%d) n_posts=%d is_client=%d\n", + fprintf(stderr,"ERROR: can't post recv (%d) n_posts=%d is_client=%d\n", posted_recv, n_posts, is_client); exit(EXIT_FAILURE); return 0; @@ -518,7 +532,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin if (!posted_recv) return 0; } - + PROF(&prof, prof_idx++); for (i = 0; i < posted_recv; ++i) { @@ -630,7 +644,7 @@ int main(int argc, char *argv[]) MPI_CHECK(MPI_Comm_size(MPI_COMM_WORLD, &comm_size)); MPI_CHECK(MPI_Comm_rank(MPI_COMM_WORLD, &my_rank)); - if (comm_size != 2) { + if (comm_size != 2) { fprintf(stderr, "this test requires exactly two processes \n"); MPI_Abort(MPI_COMM_WORLD, -1); } @@ -785,7 +799,7 @@ int main(int argc, char *argv[]) MPI_CHECK(MPI_Get_processor_name(hostnames[my_rank], &name_len)); assert(name_len < MPI_MAX_PROCESSOR_NAME); - MPI_CHECK(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, + MPI_CHECK(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, hostnames, MPI_MAX_PROCESSOR_NAME, MPI_CHAR, MPI_COMM_WORLD)); if (my_rank == 1) { @@ -815,7 +829,7 @@ int main(int argc, char *argv[]) if (!ib_devname) { // old env var, for compatibility - const char *value = getenv("USE_IB_HCA"); + const char *value = getenv("USE_IB_HCA"); if (value != NULL) { printf("[%d] USE_IB_HCA: <%s>\n", my_rank, value); ib_devname = value; @@ -876,7 +890,7 @@ int main(int argc, char *argv[]) struct pingpong_dest all_dest[4] = {{0,}}; all_dest[my_rank] = my_dest; - MPI_CHECK(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, + MPI_CHECK(MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, all_dest, sizeof(all_dest[0]), MPI_CHAR, MPI_COMM_WORLD)); rem_dest = &all_dest[my_rank?0:1]; inet_ntop(AF_INET6, &rem_dest->gid, gid, sizeof gid); @@ -918,8 +932,16 @@ int main(int argc, char *argv[]) ctx->ah = ibv_create_ah(ctx->pd, &ah_attr); if (!ctx->ah) { - fprintf(stderr, "Failed to create AH\n"); - return 1; + ah_attr.is_global = 1; + ah_attr.grh.hop_limit = 1; + ah_attr.grh.dgid = my_dest.gid; + ah_attr.grh.sgid_index = 0; + + ctx->ah = ibv_create_ah(ctx->pd, &ah_attr); + if (!ctx->ah) { + fprintf(stderr, "Failed to create AH\n"); + return 1; + } } } diff --git a/tests/gds_kernel_loopback_latency.c b/tests/gds_kernel_loopback_latency.c index 34ab332..f67fbbd 100644 --- a/tests/gds_kernel_loopback_latency.c +++ b/tests/gds_kernel_loopback_latency.c @@ -1,6 +1,6 @@ /* * GPUDirect Async loopback latency benchmark - * + * * * based on OFED libibverbs ud_pingpong test. */ @@ -177,8 +177,22 @@ static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn, ctx->ah = ibv_create_ah(ctx->pd, &ah_attr); if (!ctx->ah) { - fprintf(stderr, "Failed to create AH\n"); - return 1; + union ibv_gid dgid; + if (ibv_query_gid(ctx->context, port, 0, &dgid)) { + fprintf(stderr, "Failed to query interface gid\n"); + return 1; + } + + ah_attr.is_global = 1; + ah_attr.grh.hop_limit = 1; + ah_attr.grh.dgid = dgid; + ah_attr.grh.sgid_index = 0; + + ctx->ah = ibv_create_ah(ctx->pd, &ah_attr); + if (!ctx->ah) { + fprintf(stderr, "Failed to create AH\n"); + return 1; + } } return 0; @@ -198,7 +212,7 @@ static struct pingpong_dest *pp_client_exch_dest(const char *servername, int por int sockfd = -1; struct pingpong_dest *rem_dest = NULL; char gid[33]; - + fprintf(stderr, "%04x:%06x:%06x:%s\n", my_dest->lid, my_dest->qpn, my_dest->psn, (char *)&my_dest->gid); rem_dest = malloc(sizeof *rem_dest); @@ -267,7 +281,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, ctx->rx_flag = memalign(page_size, alloc_size); if (!ctx->rx_flag) { - fprintf(stderr, "Couldn't allocate rx_flag buf\n"); + fprintf(stderr, "Couldn't allocate rx_flag buf\n"); goto clean_ctx; } @@ -341,7 +355,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, }, .qp_type = IBV_QPT_UD, }; - + //why? if (my_rank == 1) { printf("sleeping 2s\n"); @@ -395,8 +409,8 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, clean_buffer: if (ctx->gpumem) - gpu_free(ctx->buf); - else + gpu_free(ctx->buf); + else free(ctx->buf); clean_ctx: @@ -436,8 +450,8 @@ int pp_close_ctx(struct pingpong_context *ctx) } if (ctx->gpumem) - gpu_free(ctx->buf); - else + gpu_free(ctx->buf); + else free(ctx->buf); if (ctx->gpu_id >= 0) @@ -539,7 +553,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin posted_recv = pp_post_recv(ctx, n_posts); if (posted_recv < 0) { - fprintf(stderr,"ERROR: can't post recv (%d) n_posts=%d is_client=%d\n", + fprintf(stderr,"ERROR: can't post recv (%d) n_posts=%d is_client=%d\n", posted_recv, n_posts, is_client); exit(EXIT_FAILURE); return 0; @@ -548,7 +562,7 @@ static int pp_post_work(struct pingpong_context *ctx, int n_posts, int rcnt, uin if (!posted_recv) return 0; } - + PROF(&prof, prof_idx++); for (i = 0; i < posted_recv; ++i) { @@ -897,7 +911,7 @@ int main(int argc, char *argv[]) inet_ntop(AF_INET6, &my_dest.gid, gid, sizeof gid); printf(" local address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x: GID %s\n", my_dest.lid, my_dest.qpn, my_dest.psn, gid); - + rem_dest = pp_client_exch_dest(servername, port, &my_dest); if (!rem_dest) { @@ -1153,7 +1167,7 @@ int main(int argc, char *argv[]) prof_destroy(&prof); //ibv_ack_cq_events(ctx->cq, num_cq_events); - + return 0; From 83457e356b1ecc73645d8fa4904d0ac92e35aff8 Mon Sep 17 00:00:00 2001 From: Ying Zhi Date: Fri, 19 May 2017 13:38:24 +0800 Subject: [PATCH 3/3] Revert "remove duplicate definition in tests" This reverts commit 87cc55c1a5c70a034cd9d2838a4552c7715fde3a. --- tests/test_utils.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_utils.h b/tests/test_utils.h index 707ef26..618ffac 100644 --- a/tests/test_utils.h +++ b/tests/test_utils.h @@ -1,5 +1,5 @@ #pragma once -/* + #ifndef USE_PROF struct prof { }; @@ -14,7 +14,7 @@ static inline int prof_enabled(struct prof *p) { return 0; } static inline void prof_disable(struct prof *p) {} static inline void prof_reset(struct prof *p) {} #endif -*/ + typedef int64_t gds_us_t; static inline gds_us_t gds_get_time_us() { @@ -43,9 +43,9 @@ static void gds_cpu_relax(void) } static void gds_wmb(void) __attribute__((unused)) ; -static void gds_wmb(void) +static void gds_wmb(void) { - asm volatile("sync") ; + asm volatile("sync") ; } #else #error "platform not supported"