[openib-general] PATCH] enhancement to rdma_bw and rdma_lat to utilize the RDMA CM
Steve Wise
swise at opengridcomputing.com
Tue May 16 06:59:39 PDT 2006
I don't know who maintains src/userspace/perftest, but here is a patch
set that enables rdma_bw and rdma_lat to use the RDMA_CM with the
addition of the -c or --cma flag.
The rkey/addr info is exchanged in the private data, and SEND/RECV's are
used to sync the client/server before and after execution.
Also, I added -P or --poll to rdma_bw to allow blocking for completion
events when none are ready (if you omit -P, it will block when no
completion is available, otherwise it will spin).
Signed-off-by: Steve Wise <swise at opengridcomputing.com>
Index: rdma_lat.c
===================================================================
--- rdma_lat.c (revision 7050)
+++ rdma_lat.c (working copy)
@@ -53,6 +53,7 @@
#include <time.h>
#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
#include "get_clock.h"
@@ -71,7 +72,8 @@
struct ibv_context *context;
struct ibv_pd *pd;
struct ibv_mr *mr;
- struct ibv_cq *cq;
+ struct ibv_cq *scq;
+ struct ibv_cq *rcq;
struct ibv_qp *qp;
void *buf;
volatile char *post_buf;
@@ -80,6 +82,7 @@
int tx_depth;
struct ibv_sge list;
struct ibv_send_wr wr;
+ struct rdma_cm_id *cm_id;
};
struct pingpong_dest {
@@ -323,16 +326,22 @@
return NULL;
}
- ctx->cq = ibv_create_cq(ctx->context, tx_depth, NULL, NULL, 0);
- if (!ctx->cq) {
+ ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);
+ if (!ctx->rcq) {
fprintf(stderr, "Couldn't create CQ\n");
return NULL;
}
+ ctx->scq = ibv_create_cq(ctx->context, tx_depth, NULL, NULL, 0);
+ if (!ctx->scq) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+
{
struct ibv_qp_init_attr attr = {
- .send_cq = ctx->cq,
- .recv_cq = ctx->cq,
+ .send_cq = ctx->scq,
+ .recv_cq = ctx->rcq,
.cap = {
.max_send_wr = tx_depth,
/* Work around: driver doesnt support
@@ -370,13 +379,6 @@
}
}
- ctx->wr.wr_id = PINGPONG_RDMA_WRID;
- ctx->wr.sg_list = &ctx->list;
- ctx->wr.num_sge = 1;
- ctx->wr.opcode = IBV_WR_RDMA_WRITE;
- ctx->wr.send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
- ctx->wr.next = NULL;
-
return ctx;
}
@@ -489,6 +491,467 @@
return 0;
}
+/* CMA STUFF */
+
+static void pp_post_recv(struct pingpong_context *ctx)
+{
+ struct ibv_sge list;
+ struct ibv_recv_wr wr, *bad_wr;
+ int rc;
+
+ list.addr = (uintptr_t) ctx->buf;
+ list.length = 1;
+ list.lkey = ctx->mr->lkey;
+ wr.next = NULL;
+ wr.wr_id = 0xdeadbeef;
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+
+ rc = ibv_post_recv(ctx->qp, &wr, &bad_wr);
+ if (rc) {
+ perror("ibv_post_recv");
+ fprintf(stderr, "%s ibv_post_recv failed %d\n", __FUNCTION__, rc);
+ }
+}
+
+static struct pingpong_context *pp_init_cma_ctx(struct rdma_cm_id *cm_id,
+ unsigned size,
+ int tx_depth, int port)
+{
+ struct pingpong_context *ctx;
+
+ ctx = malloc(sizeof *ctx);
+ if (!ctx)
+ return NULL;
+
+ ctx->size = size;
+ ctx->tx_depth = tx_depth;
+
+ ctx->buf = memalign(page_size, size * 2);
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+
+ memset(ctx->buf, 0, size * 2);
+
+ ctx->post_buf = (char*)ctx->buf + (size - 1);
+ ctx->poll_buf = (char*)ctx->buf + (2 * size - 1);
+
+ ctx->cm_id = cm_id;
+ ctx->context = cm_id->verbs;
+ if (!ctx->context) {
+ fprintf(stderr, "%s Unbound cm_id!!\n", __FUNCTION__);
+ return NULL;
+ }
+
+ ctx->pd = ibv_alloc_pd(ctx->context);
+ if (!ctx->pd) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+ /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:
+ * The Consumer is not allowed to assign Remote Write or Remote Atomic to
+ * a Memory Region that has not been assigned Local Write. */
+ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,
+ IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
+ if (!ctx->mr) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+
+ ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);
+ if (!ctx->rcq) {
+ fprintf(stderr, "Couldn't create RCQ\n");
+ return NULL;
+ }
+
+ ctx->scq = ibv_create_cq(ctx->context, tx_depth, NULL, NULL, 0);
+ if (!ctx->scq) {
+ fprintf(stderr, "Couldn't create SCQ\n");
+ return NULL;
+ }
+
+ {
+ struct ibv_qp_init_attr attr = {
+ .send_cq = ctx->scq,
+ .recv_cq = ctx->rcq,
+ .cap = {
+ .max_send_wr = tx_depth,
+ .max_recv_wr = 1,
+ .max_send_sge = 1,
+ .max_recv_sge = 1,
+ .max_inline_data = size
+ },
+ .qp_type = IBV_QPT_RC
+ };
+
+ if (rdma_create_qp(ctx->cm_id, ctx->pd, &attr)) {
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+ ctx->qp = ctx->cm_id->qp;
+ }
+
+ pp_post_recv(ctx);
+
+ return ctx;
+}
+
+static void pp_close_cma(struct pingpong_context *ctx, const char *servername)
+{
+ struct rdma_cm_event *event;
+ int rc;
+
+ if (servername) {
+ rc = rdma_disconnect(ctx->cm_id);
+ if (rc) {
+ perror("rdma_disconnect");
+ return;
+ }
+ }
+
+ rdma_get_cm_event(&event);
+ if (event->event != RDMA_CM_EVENT_DISCONNECTED)
+ printf("unexpected event during disconnect %d\n", event->event);
+ rdma_ack_cm_event(event);
+ rdma_destroy_id(ctx->cm_id);
+}
+
+static struct pingpong_context *pp_server_connect_cma(unsigned short port, int size, int tx_depth,
+ struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest)
+{
+ struct rdma_cm_id *listen_id;
+ struct rdma_cm_event *event;
+ struct rdma_conn_param conn_param;
+ int ret;
+ struct sockaddr_in sin;
+ struct rdma_cm_id *child_cm_id;
+ struct pingpong_context *ctx;
+
+ printf("%s starting server\n", __FUNCTION__);
+ ret = rdma_create_id(&listen_id, NULL);
+ if (ret) {
+ fprintf(stderr, "%s rdma_create_id failed %d\n", __FUNCTION__, ret);
+ return NULL;
+ }
+
+ sin.sin_addr.s_addr = 0;
+ sin.sin_family = PF_INET;
+ sin.sin_port = htons(port);
+ ret = rdma_bind_addr(listen_id, (struct sockaddr *)&sin);
+ if (ret) {
+ fprintf(stderr,"%s rdma_bind_addr failed %d\n", __FUNCTION__, ret);
+ goto err2;
+ }
+
+ ret = rdma_listen(listen_id, 0);
+ if (ret) {
+ fprintf(stderr,"%s rdma_listen failed %d\n", __FUNCTION__, ret);
+ goto err2;
+ }
+
+ ret = rdma_get_cm_event(&event);
+ if (ret)
+ goto err2;
+
+ if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
+ fprintf(stderr,"%s bad event waiting for connect request %d\n", __FUNCTION__,
+ event->event);
+ goto err1;
+ }
+
+ if (!event->private_data || (event->private_data_len < sizeof(*rem_dest))) {
+ ret = 1;
+ fprintf(stderr,"%s bad private data len %d\n", __FUNCTION__,
+ event->private_data_len);
+ goto err1;
+ }
+
+ memcpy(rem_dest, event->private_data, sizeof(*rem_dest));
+ child_cm_id = (struct rdma_cm_id *)event->id;
+ ctx = pp_init_cma_ctx(child_cm_id, size, tx_depth, port);
+
+ if (!ctx) {
+ fprintf(stderr,"%s pp_init_cma_ctx failed\n", __FUNCTION__);
+ goto err0;
+ }
+
+ my_dest->qpn = 0;
+ my_dest->psn = 0xbb;
+ my_dest->rkey = ctx->mr->rkey;
+ my_dest->vaddr = (uintptr_t)ctx->buf + ctx->size;
+ memset(&conn_param, 0, sizeof conn_param);
+ conn_param.responder_resources = 1;
+ conn_param.initiator_depth = 1;
+ conn_param.private_data = my_dest;
+ conn_param.private_data_len = sizeof(*my_dest);
+ ret = rdma_accept(ctx->cm_id, &conn_param);
+ if (ret) {
+ fprintf(stderr,"%s rdma_accept failed %d\n", __FUNCTION__, ret);
+ goto err0;
+ }
+ rdma_ack_cm_event(event);
+ ret = rdma_get_cm_event(&event);
+ if (ret) {
+ fprintf(stderr,"rdma_get_cm_event error %d\n", ret);
+ rdma_destroy_id(child_cm_id);
+ goto err2;
+ }
+ if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
+ fprintf(stderr,"%s bad event waiting for established %d\n", __FUNCTION__,
+ event->event);
+ goto err0;
+ }
+ rdma_ack_cm_event(event);
+ fprintf(stderr,"%s connected!\n", __FUNCTION__);
+ return ctx;
+err0:
+ rdma_destroy_id(child_cm_id);
+err1:
+ rdma_ack_cm_event(event);
+err2:
+ rdma_destroy_id(listen_id);
+ fprintf(stderr,"%s NOT connected!\n", __FUNCTION__);
+ return NULL;
+}
+
+static unsigned get_dst_addr(const char *dst)
+{
+ struct addrinfo *res;
+ int ret;
+ unsigned addr;
+
+ ret = getaddrinfo(dst, NULL, NULL, &res);
+ if (ret) {
+ fprintf(stderr, "%s getaddrinfo failed - invalid hostname or IP address\n",
+ __FUNCTION__);
+ return 0;
+ }
+
+ if (res->ai_family != PF_INET) {
+ return 0;
+ }
+
+ addr = ((struct sockaddr_in*)res->ai_addr)->sin_addr.s_addr;
+ freeaddrinfo(res);
+ return addr;
+}
+
+static struct pingpong_context *pp_client_connect_cma(const char *servername,
+ unsigned short port, int size, int tx_depth,
+ struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest)
+{
+ struct rdma_cm_event *event;
+ struct rdma_conn_param conn_param;
+ int ret;
+ struct sockaddr_in sin;
+ struct rdma_cm_id *cm_id;
+ struct pingpong_context *ctx;
+
+ fprintf(stderr,"%s starting client\n", __FUNCTION__);
+ sin.sin_addr.s_addr = get_dst_addr(servername);
+ if (!sin.sin_addr.s_addr) {
+ return NULL;
+ }
+
+ ret = rdma_create_id(&cm_id, NULL);
+ if (ret) {
+ fprintf(stderr,"%s rdma_create_id failed %d\n", __FUNCTION__, ret);
+ return NULL;
+ }
+
+ sin.sin_family = PF_INET;
+ sin.sin_port = htons(port);
+ ret = rdma_resolve_addr(cm_id, NULL, (struct sockaddr *)&sin, 2000);
+ if (ret) {
+ fprintf(stderr,"%s rdma_resolve_addr failed %d\n", __FUNCTION__, ret);
+ goto err2;
+ }
+
+ ret = rdma_get_cm_event(&event);
+ if (ret)
+ goto err2;
+
+ if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) {
+ fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, __LINE__,
+ event->event);
+ goto err1;
+ }
+ rdma_ack_cm_event(event);
+
+ ret = rdma_resolve_route(cm_id, 2000);
+ if (ret) {
+ fprintf(stderr,"%s rdma_resolve_route failed %d\n", __FUNCTION__, ret);
+ goto err2;
+ }
+
+ ret = rdma_get_cm_event(&event);
+ if (ret)
+ goto err2;
+
+ if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) {
+ fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, __LINE__,
+ event->event);
+ goto err1;
+ }
+ rdma_ack_cm_event(event);
+
+ ctx = pp_init_cma_ctx(cm_id, size, tx_depth, port);
+
+ if (!ctx) {
+ fprintf(stderr,"%s pp_init_cma_ctx failed\n", __FUNCTION__);
+ goto err2;
+ }
+
+ my_dest->qpn = 0;
+ my_dest->psn = 0xaa;
+ my_dest->rkey = ctx->mr->rkey;
+ my_dest->vaddr = (uintptr_t)ctx->buf + ctx->size;
+ memset(&conn_param, 0, sizeof conn_param);
+ conn_param.responder_resources = 1;
+ conn_param.initiator_depth = 1;
+ conn_param.retry_count = 5;
+ conn_param.private_data = my_dest;
+ conn_param.private_data_len = sizeof(*my_dest);
+ ret = rdma_connect(ctx->cm_id, &conn_param);
+ if (ret) {
+ fprintf(stderr,"%s rdma_connect failure %d\n", __FUNCTION__, ret);
+ goto err2;
+ }
+
+ ret = rdma_get_cm_event(&event);
+ if (ret)
+ goto err2;
+
+ if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
+ fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, __LINE__,
+ event->event);
+ goto err1;
+ }
+ if (!event->private_data || (event->private_data_len < sizeof(*rem_dest))) {
+ fprintf(stderr,"%s bad private data ptr %p len %d\n", __FUNCTION__,
+ event->private_data, event->private_data_len);
+ goto err1;
+ }
+
+ memcpy(rem_dest, event->private_data, sizeof(*rem_dest));
+ rdma_ack_cm_event(event);
+ fprintf(stderr,"connected!\n");
+ return ctx;
+err1:
+ rdma_ack_cm_event(event);
+err2:
+ fprintf(stderr,"NOT connected!\n");
+ rdma_destroy_id(cm_id);
+ return NULL;
+}
+
+static void pp_wait_for_done(struct pingpong_context *ctx)
+{
+ struct ibv_wc wc;
+ int ne;
+
+ do {
+ usleep(1000);
+ ne = ibv_poll_cq(ctx->rcq, 1, &wc);
+ } while (ne == 0);
+
+ if (wc.status)
+ fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, wc.status);
+ if (!(wc.opcode & IBV_WC_RECV))
+ fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, wc.opcode);
+ if (wc.wr_id != 0xdeadbeef)
+ fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, (int)wc.wr_id);
+}
+
+static void pp_send_done(struct pingpong_context *ctx)
+{
+ struct ibv_send_wr *bad_wr;
+ int rc;
+ struct ibv_wc wc;
+ int ne;
+
+ ctx->list.addr = (uintptr_t) ctx->buf;
+ ctx->list.length = 1;
+ ctx->list.lkey = ctx->mr->lkey;
+ ctx->wr.wr_id = 0xcafebabe;
+ ctx->wr.sg_list = &ctx->list;
+ ctx->wr.num_sge = 1;
+ ctx->wr.opcode = IBV_WR_SEND;
+ ctx->wr.send_flags = IBV_SEND_SIGNALED;
+ ctx->wr.next = NULL;
+ rc = ibv_post_send(ctx->qp, &ctx->wr, &bad_wr);
+ if (rc != 0)
+ fprintf(stderr, "%s ibv_post_send failed %d!\n", __FUNCTION__, rc);
+ do {
+ usleep(1000);
+ ne = ibv_poll_cq(ctx->scq, 1, &wc);
+ } while (ne == 0);
+
+ if (wc.status)
+ fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, wc.status);
+ if (wc.opcode != IBV_WC_SEND)
+ fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, wc.opcode);
+ if (wc.wr_id != 0xcafebabe)
+ fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, (int)wc.wr_id);
+ sleep(1);
+}
+
+static void pp_wait_for_start(struct pingpong_context *ctx)
+{
+ struct ibv_wc wc;
+ int ne;
+
+ do {
+ usleep(1000);
+ ne = ibv_poll_cq(ctx->rcq, 1, &wc);
+ } while (ne == 0);
+
+ if (wc.status)
+ fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, wc.status);
+ if (!(wc.opcode & IBV_WC_RECV))
+ fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, wc.opcode);
+ if (wc.wr_id != 0xdeadbeef)
+ fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, (int)wc.wr_id);
+ pp_post_recv(ctx);
+}
+
+static void pp_send_start(struct pingpong_context *ctx)
+{
+ struct ibv_send_wr *bad_wr;
+ int rc;
+ struct ibv_wc wc;
+ int ne;
+
+ ctx->list.addr = (uintptr_t) ctx->buf;
+ ctx->list.length = 1;
+ ctx->list.lkey = ctx->mr->lkey;
+ ctx->wr.wr_id = 0xabbaabba;
+ ctx->wr.sg_list = &ctx->list;
+ ctx->wr.num_sge = 1;
+ ctx->wr.opcode = IBV_WR_SEND;
+ ctx->wr.send_flags = IBV_SEND_SIGNALED;
+ ctx->wr.next = NULL;
+ rc = ibv_post_send(ctx->qp, &ctx->wr, &bad_wr);
+ if (rc != 0)
+ fprintf(stderr, "%s ibv_post_send failed %d!\n", __FUNCTION__, rc);
+ do {
+ usleep(1000);
+ ne = ibv_poll_cq(ctx->scq, 1, &wc);
+ } while (ne == 0);
+
+ if (wc.status)
+ fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, wc.status);
+ if (wc.opcode != IBV_WC_SEND)
+ fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, wc.opcode);
+ if (wc.wr_id != 0xabbaabba)
+ fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, (int)wc.wr_id);
+}
+
static void usage(const char *argv0)
{
printf("Usage:\n");
@@ -505,6 +968,7 @@
printf(" -C, --report-cycles report times in cpu cycle units (default microseconds)\n");
printf(" -H, --report-histogram print out all results (default print summary only)\n");
printf(" -U, --report-unsorted (implies -H) print out unsorted results (default sorted)\n");
+ printf(" -c, --cma Use the RDMA CMA to setup the RDMA connection\n");
}
/*
@@ -599,6 +1063,7 @@
struct ibv_send_wr *wr;
volatile char *poll_buf;
volatile char *post_buf;
+ int use_cma=0;
int scnt, rcnt, ccnt;
@@ -618,14 +1083,19 @@
{ .name = "report-cycles", .has_arg = 0, .val = 'C' },
{ .name = "report-histogram",.has_arg = 0, .val = 'H' },
{ .name = "report-unsorted",.has_arg = 0, .val = 'U' },
+ { .name = "cma", .has_arg = 0, .val = 'c' },
{ 0 }
};
- c = getopt_long(argc, argv, "p:d:i:s:n:t:CHU", long_options, NULL);
+ c = getopt_long(argc, argv, "cp:d:i:s:n:t:CHU", long_options, NULL);
if (c == -1)
break;
switch (c) {
+ case 'c':
+ use_cma = 1;
+ break;
+
case 'p':
port = strtol(optarg, NULL, 0);
if (port < 0 || port > 65535) {
@@ -697,23 +1167,62 @@
srand48(getpid() * time(NULL));
page_size = sysconf(_SC_PAGESIZE);
- ib_dev = pp_find_dev(ib_devname);
- if (!ib_dev)
- return 7;
+ if (use_cma) {
+ int rc;
+ struct pingpong_dest my_dest;
+
+ memset(&rem_dest, 0, sizeof(rem_dest));
- ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
- if (!ctx)
- return 8;
+ if (servername)
+ ctx = pp_client_connect_cma(servername, port, size, tx_depth, &my_dest,
+ &rem_dest);
+ else
+ ctx = pp_server_connect_cma(port, size, tx_depth, &my_dest, &rem_dest);
+ if (!ctx) {
+ fprintf(stderr, "pp_connect_cma(%s,%d) failed!\n", servername, port);
+ return rc;
+ }
- if (pp_open_port(ctx, servername, ib_port, port, &rem_dest))
- return 9;
+ /*
+ * Synch up and force the server to wait for the client to send
+ * the first message (MPA requirement).
+ */
+ if (servername) {
+ sleep(1);
+ pp_send_start(ctx);
+ } else {
+ pp_wait_for_start(ctx);
+ }
+ printf(" local address: PSN %#06x RKey %#08x VAddr %#016Lx\n",
+ my_dest.psn, my_dest.rkey, my_dest.vaddr);
+ printf(" remote address: PSN %#06x RKey %#08x VAddr %#016Lx\n",
+ rem_dest.psn, rem_dest.rkey, rem_dest.vaddr);
+ } else {
+ ib_dev = pp_find_dev(ib_devname);
+ if (!ib_dev)
+ return 7;
+
+ ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
+ if (!ctx)
+ return 8;
+
+ if (pp_open_port(ctx, servername, ib_port, port, &rem_dest))
+ return 9;
+ }
+
wr = &ctx->wr;
ctx->list.addr = (uintptr_t) ctx->buf;
ctx->list.length = ctx->size;
ctx->list.lkey = ctx->mr->lkey;
wr->wr.rdma.remote_addr = rem_dest.vaddr;
wr->wr.rdma.rkey = rem_dest.rkey;
+ wr->wr_id = PINGPONG_RDMA_WRID;
+ wr->sg_list = &ctx->list;
+ wr->num_sge = 1;
+ wr->opcode = IBV_WR_RDMA_WRITE;
+ wr->send_flags = IBV_SEND_SIGNALED | IBV_SEND_INLINE;
+ wr->next = NULL;
scnt = 0;
rcnt = 0;
@@ -757,9 +1266,10 @@
if (ccnt < iters) {
struct ibv_wc wc;
int ne;
+
++ccnt;
do {
- ne = ibv_poll_cq(ctx->cq, 1, &wc);
+ ne = ibv_poll_cq(ctx->scq, 1, &wc);
} while (ne == 0);
if (ne < 0) {
@@ -778,6 +1288,12 @@
}
}
+ if (use_cma) {
+ pp_send_done(ctx);
+ pp_wait_for_done(ctx);
+ pp_close_cma(ctx, servername);
+ }
+
print_report(&report, iters, tstamp);
return 0;
}
Index: rdma_bw.c
===================================================================
--- rdma_bw.c (revision 7050)
+++ rdma_bw.c (working copy)
@@ -53,6 +53,7 @@
#include <time.h>
#include <infiniband/verbs.h>
+#include <rdma/rdma_cma.h>
#include "get_clock.h"
@@ -64,13 +65,16 @@
struct ibv_context *context;
struct ibv_pd *pd;
struct ibv_mr *mr;
- struct ibv_cq *cq;
+ struct ibv_cq *rcq;
+ struct ibv_cq *scq;
+ struct ibv_comp_channel *ch;
struct ibv_qp *qp;
void *buf;
unsigned size;
int tx_depth;
struct ibv_sge list;
struct ibv_send_wr wr;
+ struct rdma_cm_id *cm_id;
};
struct pingpong_dest {
@@ -308,16 +312,27 @@
return NULL;
}
- ctx->cq = ibv_create_cq(ctx->context, tx_depth, NULL, NULL, 0);
- if (!ctx->cq) {
+ ctx->ch = ibv_create_comp_channel(ctx->context);
+ if (!ctx->ch) {
+ fprintf(stderr, "Couldn't create comp channel\n");
+ return NULL;
+ }
+ ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);
+ if (!ctx->rcq) {
fprintf(stderr, "Couldn't create CQ\n");
return NULL;
}
+ ctx->scq = ibv_create_cq(ctx->context, tx_depth, ctx, ctx->ch, 0);
+ if (!ctx->scq) {
+ fprintf(stderr, "Couldn't create CQ\n");
+ return NULL;
+ }
+
{
struct ibv_qp_init_attr attr = {
- .send_cq = ctx->cq,
- .recv_cq = ctx->cq,
+ .send_cq = ctx->scq,
+ .recv_cq = ctx->rcq,
.cap = {
.max_send_wr = tx_depth,
/* Work around: driver doesnt support
@@ -407,6 +422,469 @@
return 0;
}
+/* CMA STUFF */
+
+static void pp_post_recv(struct pingpong_context *ctx)
+{
+ struct ibv_sge list;
+ struct ibv_recv_wr wr, *bad_wr;
+ int rc;
+
+ list.addr = (uintptr_t) ctx->buf;
+ list.length = 1;
+ list.lkey = ctx->mr->lkey;
+ wr.next = NULL;
+ wr.wr_id = 0xdeadbeef;
+ wr.sg_list = &list;
+ wr.num_sge = 1;
+
+ rc = ibv_post_recv(ctx->qp, &wr, &bad_wr);
+ if (rc) {
+ perror("ibv_post_recv");
+ fprintf(stderr, "%s ibv_post_recv failed %d\n", __FUNCTION__, rc);
+ }
+}
+
+static struct pingpong_context *pp_init_cma_ctx(struct rdma_cm_id *cm_id,
+ unsigned size,
+ int tx_depth, int port)
+{
+ struct pingpong_context *ctx;
+
+ ctx = malloc(sizeof *ctx);
+ if (!ctx)
+ return NULL;
+
+ ctx->size = size;
+ ctx->tx_depth = tx_depth;
+
+ ctx->buf = memalign(page_size, size * 2);
+ if (!ctx->buf) {
+ fprintf(stderr, "Couldn't allocate work buf.\n");
+ return NULL;
+ }
+
+ memset(ctx->buf, 0, size * 2);
+
+ ctx->cm_id = cm_id;
+ ctx->context = cm_id->verbs;
+ if (!ctx->context) {
+ fprintf(stderr, "%s Unbound cm_id!!\n", __FUNCTION__);
+ return NULL;
+ }
+
+ ctx->pd = ibv_alloc_pd(ctx->context);
+ if (!ctx->pd) {
+ fprintf(stderr, "Couldn't allocate PD\n");
+ return NULL;
+ }
+
+ /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:
+ * The Consumer is not allowed to assign Remote Write or Remote Atomic to
+ * a Memory Region that has not been assigned Local Write. */
+ ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size * 2,
+ IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_LOCAL_WRITE);
+ if (!ctx->mr) {
+ fprintf(stderr, "Couldn't allocate MR\n");
+ return NULL;
+ }
+
+ ctx->ch = ibv_create_comp_channel(ctx->context);
+ if (!ctx->ch) {
+ fprintf(stderr, "Couldn't create comp channel\n");
+ return NULL;
+ }
+
+ ctx->rcq = ibv_create_cq(ctx->context, 1, NULL, NULL, 0);
+ if (!ctx->rcq) {
+ fprintf(stderr, "Couldn't create RCQ\n");
+ return NULL;
+ }
+
+ ctx->scq = ibv_create_cq(ctx->context, tx_depth, ctx, ctx->ch, 0);
+ if (!ctx->scq) {
+ fprintf(stderr, "Couldn't create SCQ\n");
+ return NULL;
+ }
+
+ {
+ struct ibv_qp_init_attr attr = {
+ .send_cq = ctx->scq,
+ .recv_cq = ctx->rcq,
+ .cap = {
+ .max_send_wr = tx_depth,
+ .max_recv_wr = 1,
+ .max_send_sge = 1,
+ .max_recv_sge = 1,
+ .max_inline_data = 0
+ },
+ .qp_type = IBV_QPT_RC
+ };
+ if (rdma_create_qp(ctx->cm_id, ctx->pd, &attr)) {
+ fprintf(stderr, "Couldn't create QP\n");
+ return NULL;
+ }
+ ctx->qp = ctx->cm_id->qp;
+ }
+
+ pp_post_recv(ctx);
+
+ return ctx;
+}
+
+static void pp_close_cma(struct pingpong_context *ctx, const char *servername)
+{
+ struct rdma_cm_event *event;
+ int rc;
+
+ if (servername) {
+ rc = rdma_disconnect(ctx->cm_id);
+ if (rc) {
+ perror("rdma_disconnect");
+ return;
+ }
+ }
+
+ rdma_get_cm_event(&event);
+ if (event->event != RDMA_CM_EVENT_DISCONNECTED)
+ printf("unexpected event during disconnect %d\n", event->event);
+ rdma_ack_cm_event(event);
+ rdma_destroy_id(ctx->cm_id);
+}
+
+static struct pingpong_context *pp_server_connect_cma(unsigned short port, int size, int tx_depth,
+ struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest)
+{
+ struct rdma_cm_id *listen_id;
+ struct rdma_cm_event *event;
+ struct rdma_conn_param conn_param;
+ int ret;
+ struct sockaddr_in sin;
+ struct rdma_cm_id *child_cm_id;
+ struct pingpong_context *ctx;
+
+ printf("%s starting server\n", __FUNCTION__);
+ ret = rdma_create_id(&listen_id, NULL);
+ if (ret) {
+ fprintf(stderr, "%s rdma_create_id failed %d\n", __FUNCTION__, ret);
+ return NULL;
+ }
+
+ sin.sin_addr.s_addr = 0;
+ sin.sin_family = PF_INET;
+ sin.sin_port = htons(port);
+ ret = rdma_bind_addr(listen_id, (struct sockaddr *)&sin);
+ if (ret) {
+ fprintf(stderr,"%s rdma_bind_addr failed %d\n", __FUNCTION__, ret);
+ goto err2;
+ }
+
+ ret = rdma_listen(listen_id, 0);
+ if (ret) {
+ fprintf(stderr,"%s rdma_listen failed %d\n", __FUNCTION__, ret);
+ goto err2;
+ }
+
+ ret = rdma_get_cm_event(&event);
+ if (ret)
+ goto err2;
+
+ if (event->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
+ fprintf(stderr,"%s bad event waiting for connect request %d\n", __FUNCTION__,
+ event->event);
+ goto err1;
+ }
+
+ if (!event->private_data || (event->private_data_len < sizeof(*rem_dest))) {
+ ret = 1;
+ fprintf(stderr,"%s bad private data len %d\n", __FUNCTION__,
+ event->private_data_len);
+ goto err1;
+ }
+
+ memcpy(rem_dest, event->private_data, sizeof(*rem_dest));
+ child_cm_id = (struct rdma_cm_id *)event->id;
+ ctx = pp_init_cma_ctx(child_cm_id, size, tx_depth, port);
+
+ if (!ctx) {
+ fprintf(stderr,"%s pp_init_cma_ctx failed\n", __FUNCTION__);
+ goto err0;
+ }
+
+ my_dest->qpn = 0;
+ my_dest->psn = 0xbb;
+ my_dest->rkey = ctx->mr->rkey;
+ my_dest->vaddr = (uintptr_t)ctx->buf + ctx->size;
+ memset(&conn_param, 0, sizeof conn_param);
+ conn_param.responder_resources = 1;
+ conn_param.initiator_depth = 1;
+ conn_param.private_data = my_dest;
+ conn_param.private_data_len = sizeof(*my_dest);
+ ret = rdma_accept(ctx->cm_id, &conn_param);
+ if (ret) {
+ fprintf(stderr,"%s rdma_accept failed %d\n", __FUNCTION__, ret);
+ goto err0;
+ }
+ rdma_ack_cm_event(event);
+ ret = rdma_get_cm_event(&event);
+ if (ret) {
+ fprintf(stderr,"rdma_get_cm_event error %d\n", ret);
+ rdma_destroy_id(child_cm_id);
+ goto err2;
+ }
+ if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
+ fprintf(stderr,"%s bad event waiting for established %d\n", __FUNCTION__,
+ event->event);
+ goto err0;
+ }
+ rdma_ack_cm_event(event);
+ fprintf(stderr,"%s connected!\n", __FUNCTION__);
+ return ctx;
+err0:
+ rdma_destroy_id(child_cm_id);
+err1:
+ rdma_ack_cm_event(event);
+err2:
+ rdma_destroy_id(listen_id);
+ fprintf(stderr,"%s NOT connected!\n", __FUNCTION__);
+ return NULL;
+}
+
+static unsigned get_dst_addr(const char *dst)
+{
+ struct addrinfo *res;
+ int ret;
+ unsigned addr;
+
+ ret = getaddrinfo(dst, NULL, NULL, &res);
+ if (ret) {
+ fprintf(stderr, "%s getaddrinfo failed - invalid hostname or IP address\n",
+ __FUNCTION__);
+ return 0;
+ }
+
+ if (res->ai_family != PF_INET) {
+ return 0;
+ }
+
+ addr = ((struct sockaddr_in*)res->ai_addr)->sin_addr.s_addr;
+ freeaddrinfo(res);
+ return addr;
+}
+
+static struct pingpong_context *pp_client_connect_cma(const char *servername,
+ unsigned short port, int size, int tx_depth,
+ struct pingpong_dest *my_dest,
+ struct pingpong_dest *rem_dest)
+{
+ struct rdma_cm_event *event;
+ struct rdma_conn_param conn_param;
+ int ret;
+ struct sockaddr_in sin;
+ struct rdma_cm_id *cm_id;
+ struct pingpong_context *ctx;
+
+ fprintf(stderr,"%s starting client\n", __FUNCTION__);
+ sin.sin_addr.s_addr = get_dst_addr(servername);
+ if (!sin.sin_addr.s_addr) {
+ return NULL;
+ }
+
+ ret = rdma_create_id(&cm_id, NULL);
+ if (ret) {
+ fprintf(stderr,"%s rdma_create_id failed %d\n", __FUNCTION__, ret);
+ return NULL;
+ }
+
+ sin.sin_family = PF_INET;
+ sin.sin_port = htons(port);
+ ret = rdma_resolve_addr(cm_id, NULL, (struct sockaddr *)&sin, 2000);
+ if (ret) {
+ fprintf(stderr,"%s rdma_resolve_addr failed %d\n", __FUNCTION__, ret);
+ goto err2;
+ }
+
+ ret = rdma_get_cm_event(&event);
+ if (ret)
+ goto err2;
+
+ if (event->event != RDMA_CM_EVENT_ADDR_RESOLVED) {
+ fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, __LINE__,
+ event->event);
+ goto err1;
+ }
+ rdma_ack_cm_event(event);
+
+ ret = rdma_resolve_route(cm_id, 2000);
+ if (ret) {
+ fprintf(stderr,"%s rdma_resolve_route failed %d\n", __FUNCTION__, ret);
+ goto err2;
+ }
+
+ ret = rdma_get_cm_event(&event);
+ if (ret)
+ goto err2;
+
+ if (event->event != RDMA_CM_EVENT_ROUTE_RESOLVED) {
+ fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, __LINE__,
+ event->event);
+ goto err1;
+ }
+ rdma_ack_cm_event(event);
+
+ ctx = pp_init_cma_ctx(cm_id, size, tx_depth, port);
+
+ if (!ctx) {
+ fprintf(stderr,"%s pp_init_cma_ctx failed\n", __FUNCTION__);
+ goto err2;
+ }
+
+ my_dest->qpn = 0;
+ my_dest->psn = 0xaa;
+ my_dest->rkey = ctx->mr->rkey;
+ my_dest->vaddr = (uintptr_t)ctx->buf + ctx->size;
+ memset(&conn_param, 0, sizeof conn_param);
+ conn_param.responder_resources = 1;
+ conn_param.initiator_depth = 1;
+ conn_param.retry_count = 5;
+ conn_param.private_data = my_dest;
+ conn_param.private_data_len = sizeof(*my_dest);
+ ret = rdma_connect(ctx->cm_id, &conn_param);
+ if (ret) {
+ fprintf(stderr,"%s rdma_connect failure %d\n", __FUNCTION__, ret);
+ goto err2;
+ }
+
+ ret = rdma_get_cm_event(&event);
+ if (ret)
+ goto err2;
+
+ if (event->event != RDMA_CM_EVENT_ESTABLISHED) {
+ fprintf(stderr,"%s/%d unexpected CM event %d\n", __FUNCTION__, __LINE__,
+ event->event);
+ goto err1;
+ }
+ if (!event->private_data || (event->private_data_len < sizeof(*rem_dest))) {
+ fprintf(stderr,"%s bad private data ptr %p len %d\n", __FUNCTION__,
+ event->private_data, event->private_data_len);
+ goto err1;
+ }
+ memcpy(rem_dest, event->private_data, sizeof(*rem_dest));
+ rdma_ack_cm_event(event);
+ fprintf(stderr,"connected!\n");
+ return ctx;
+err1:
+ rdma_ack_cm_event(event);
+err2:
+ fprintf(stderr,"NOT connected!\n");
+ rdma_destroy_id(cm_id);
+ return NULL;
+}
+
+static void pp_wait_for_done(struct pingpong_context *ctx)
+{
+ struct ibv_wc wc;
+ int ne;
+
+ do {
+ usleep(1000);
+ ne = ibv_poll_cq(ctx->rcq, 1, &wc);
+ } while (ne == 0);
+
+ if (wc.status)
+ fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, wc.status);
+ if (!(wc.opcode & IBV_WC_RECV))
+ fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, wc.opcode);
+ if (wc.wr_id != 0xdeadbeef)
+ fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, (int)wc.wr_id);
+}
+
+static void pp_send_done(struct pingpong_context *ctx)
+{
+ struct ibv_send_wr *bad_wr;
+ int rc;
+ struct ibv_wc wc;
+ int ne;
+
+ ctx->list.addr = (uintptr_t) ctx->buf;
+ ctx->list.length = 1;
+ ctx->list.lkey = ctx->mr->lkey;
+ ctx->wr.wr_id = 0xcafebabe;
+ ctx->wr.sg_list = &ctx->list;
+ ctx->wr.num_sge = 1;
+ ctx->wr.opcode = IBV_WR_SEND;
+ ctx->wr.send_flags = IBV_SEND_SIGNALED;
+ ctx->wr.next = NULL;
+ rc = ibv_post_send(ctx->qp, &ctx->wr, &bad_wr);
+ if (rc != 0)
+ fprintf(stderr, "%s ibv_post_send failed %d!\n", __FUNCTION__, rc);
+ do {
+ usleep(1000);
+ ne = ibv_poll_cq(ctx->scq, 1, &wc);
+ } while (ne == 0);
+
+ if (wc.status)
+ fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, wc.status);
+ if (wc.opcode != IBV_WC_SEND)
+ fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, wc.opcode);
+ if (wc.wr_id != 0xcafebabe)
+ fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, (int)wc.wr_id);
+ sleep(1);
+}
+
+static void pp_wait_for_start(struct pingpong_context *ctx)
+{
+ struct ibv_wc wc;
+ int ne;
+
+ do {
+ usleep(1000);
+ ne = ibv_poll_cq(ctx->rcq, 1, &wc);
+ } while (ne == 0);
+
+ if (wc.status)
+ fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, wc.status);
+ if (!(wc.opcode & IBV_WC_RECV))
+ fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, wc.opcode);
+ if (wc.wr_id != 0xdeadbeef)
+ fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, (int)wc.wr_id);
+ pp_post_recv(ctx);
+}
+
+static void pp_send_start(struct pingpong_context *ctx)
+{
+ struct ibv_send_wr *bad_wr;
+ int rc;
+ struct ibv_wc wc;
+ int ne;
+
+ ctx->list.addr = (uintptr_t) ctx->buf;
+ ctx->list.length = 1;
+ ctx->list.lkey = ctx->mr->lkey;
+ ctx->wr.wr_id = 0xabbaabba;
+ ctx->wr.sg_list = &ctx->list;
+ ctx->wr.num_sge = 1;
+ ctx->wr.opcode = IBV_WR_SEND;
+ ctx->wr.send_flags = IBV_SEND_SIGNALED;
+ ctx->wr.next = NULL;
+ rc = ibv_post_send(ctx->qp, &ctx->wr, &bad_wr);
+ if (rc != 0)
+ fprintf(stderr, "%s ibv_post_send failed %d!\n", __FUNCTION__, rc);
+ do {
+ usleep(1000);
+ ne = ibv_poll_cq(ctx->scq, 1, &wc);
+ } while (ne == 0);
+
+ if (wc.status)
+ fprintf(stderr, "%s bad wc status %d\n", __FUNCTION__, wc.status);
+ if (wc.opcode != IBV_WC_SEND)
+ fprintf(stderr, "%s bad wc opcode %d\n", __FUNCTION__, wc.opcode);
+ if (wc.wr_id != 0xabbaabba)
+ fprintf(stderr, "%s bad wc wr_id 0x%x\n", __FUNCTION__, (int)wc.wr_id);
+ printf("start sent\n");
+}
+
static void usage(const char *argv0)
{
printf("Usage:\n");
@@ -421,6 +899,7 @@
printf(" -t, --tx-depth=<dep> size of tx queue (default 100)\n");
printf(" -n, --iters=<iters> number of exchanges (at least 2, default 1000)\n");
printf(" -b, --bidirectional measure bidirectional bandwidth (default unidirectional)\n");
+ printf(" -c, --cma Use the RDMA CMA to setup the RDMA connection\n");
}
static void print_report(unsigned int iters, unsigned size, int duplex,
@@ -438,14 +917,15 @@
/* Find the peak bandwidth */
for (i = 0; i < iters; ++i)
- for (j = i; j < iters; ++j) {
- t = (tcompleted[j] - tposted[i]) / (j - i + 1);
- if (t < opt_delta) {
- opt_delta = t;
- opt_posted = i;
- opt_completed = j;
+ if ((i+200) < iters)
+ for (j = i; j < i+200; ++j) {
+ t = (tcompleted[j] - tposted[i]) / (j - i + 1);
+ if (t < opt_delta) {
+ opt_delta = t;
+ opt_posted = i;
+ opt_completed = j;
+ }
}
- }
cycles_to_units = get_cpu_mhz() * 1000000;
@@ -486,10 +966,16 @@
int sockfd;
int duplex = 0;
struct ibv_qp *qp;
+ int use_cma=0;
+ int poll = 0;
+ cycles_t *tposted;
+ cycles_t *tcompleted;
+ struct ibv_wc wc;
+ int ne;
+ struct ibv_cq *ev_cq;
+ void *ev_ctx;
+ int blocks = 0;
- cycles_t *tposted;
- cycles_t *tcompleted;
-
/* Parameter parsing. */
while (1) {
int c;
@@ -502,14 +988,24 @@
{ .name = "iters", .has_arg = 1, .val = 'n' },
{ .name = "tx-depth", .has_arg = 1, .val = 't' },
{ .name = "bidirectional", .has_arg = 0, .val = 'b' },
+ { .name = "cma", .has_arg = 0, .val = 'c' },
+ { .name = "poll", .has_arg = 0, .val = 'P' },
{ 0 }
};
- c = getopt_long(argc, argv, "p:d:i:s:n:t:b", long_options, NULL);
+ c = getopt_long(argc, argv, "p:d:i:s:n:t:bcP", long_options, NULL);
if (c == -1)
break;
switch (c) {
+ case 'c':
+ use_cma = 1;
+ break;
+
+ case 'P':
+ poll = 1;
+ break;
+
case 'p':
port = strtol(optarg, NULL, 0);
if (port < 0 || port > 65535) {
@@ -552,15 +1048,6 @@
break;
- case 'l':
- tx_depth = strtol(optarg, NULL, 0);
- if (tx_depth < 1) {
- usage(argv[0]);
- return 1;
- }
-
- break;
-
case 'b':
duplex = 1;
break;
@@ -585,82 +1072,125 @@
page_size = sysconf(_SC_PAGESIZE);
- dev_list = ibv_get_device_list(NULL);
-
- if (!ib_devname) {
- ib_dev = dev_list[0];
- if (!ib_dev) {
- fprintf(stderr, "No IB devices found\n");
+ if (use_cma) {
+ int rc;
+ rem_dest = malloc(sizeof *rem_dest);
+ if (!rem_dest) {
+ fprintf(stderr,"%s cannot malloc rem_dest!\n", __FUNCTION__);
return 1;
}
+
+ memset(rem_dest, 0, sizeof(*rem_dest));
+
+ if (servername)
+ ctx = pp_client_connect_cma(servername, port, size, tx_depth, &my_dest,
+ rem_dest);
+ else
+ ctx = pp_server_connect_cma(port, size, tx_depth, &my_dest, rem_dest);
+ if (!ctx) {
+ fprintf(stderr, "pp_connect_cma(%s,%d) failed!\n", servername, port);
+ return rc;
+ }
+
+ /*
+ * Synch up and force the server to wait for the client to send
+ * the first message (MPA requirement).
+ */
+ if (servername) {
+ sleep(1);
+ pp_send_start(ctx);
+ } else {
+ pp_wait_for_start(ctx);
+ }
+
+ printf(" local address: PSN %#06x RKey %#08x VAddr %#016Lx\n",
+ my_dest.psn, my_dest.rkey, my_dest.vaddr);
+ printf(" remote address: PSN %#06x RKey %#08x VAddr %#016Lx\n",
+ rem_dest->psn, rem_dest->rkey, rem_dest->vaddr);
} else {
- for (; (ib_dev = *dev_list); ++dev_list)
- if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
- break;
- if (!ib_dev) {
- fprintf(stderr, "IB device %s not found\n", ib_devname);
- return 1;
+ dev_list = ibv_get_device_list(NULL);
+
+ if (!ib_devname) {
+ ib_dev = dev_list[0];
+ if (!ib_dev) {
+ fprintf(stderr, "No IB devices found\n");
+ return 1;
+ }
+ } else {
+ for (; (ib_dev = *dev_list); ++dev_list)
+ if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+ break;
+ if (!ib_dev) {
+ fprintf(stderr, "IB device %s not found\n", ib_devname);
+ return 1;
+ }
}
- }
- ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
- if (!ctx)
- return 1;
+ ctx = pp_init_ctx(ib_dev, size, tx_depth, ib_port);
+ if (!ctx)
+ return 1;
- /* Create connection between client and server.
- * We do it by exchanging data over a TCP socket connection. */
+ /* Create connection between client and server.
+ * We do it by exchanging data over a TCP socket connection. */
- my_dest.lid = pp_get_local_lid(ctx, ib_port);
- my_dest.qpn = ctx->qp->qp_num;
- my_dest.psn = lrand48() & 0xffffff;
- if (!my_dest.lid) {
- fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
- return 1;
- }
- my_dest.rkey = ctx->mr->rkey;
- my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
+ my_dest.lid = pp_get_local_lid(ctx, ib_port);
+ my_dest.qpn = ctx->qp->qp_num;
+ my_dest.psn = lrand48() & 0xffffff;
+ if (!my_dest.lid) {
+ fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
+ return 1;
+ }
+ my_dest.rkey = ctx->mr->rkey;
+ my_dest.vaddr = (uintptr_t)ctx->buf + ctx->size;
- printf(" local address: LID %#04x, QPN %#06x, PSN %#06x "
- "RKey %#08x VAddr %#016Lx\n",
- my_dest.lid, my_dest.qpn, my_dest.psn,
- my_dest.rkey, my_dest.vaddr);
+ printf(" local address: LID %#04x, QPN %#06x, PSN %#06x "
+ "RKey %#08x VAddr %#016Lx\n",
+ my_dest.lid, my_dest.qpn, my_dest.psn,
+ my_dest.rkey, my_dest.vaddr);
- if (servername) {
- sockfd = pp_client_connect(servername, port);
- if (sockfd < 0)
+ if (servername) {
+ sockfd = pp_client_connect(servername, port);
+ if (sockfd < 0)
+ return 1;
+ rem_dest = pp_client_exch_dest(sockfd, &my_dest);
+ } else {
+ sockfd = pp_server_connect(port);
+ if (sockfd < 0)
+ return 1;
+ rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+ }
+
+ if (!rem_dest)
return 1;
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);
- } else {
- sockfd = pp_server_connect(port);
- if (sockfd < 0)
- return 1;
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);
- }
- if (!rem_dest)
- return 1;
+ printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x, "
+ "RKey %#08x VAddr %#016Lx\n",
+ rem_dest->lid, rem_dest->qpn, rem_dest->psn,
+ rem_dest->rkey, rem_dest->vaddr);
- printf(" remote address: LID %#04x, QPN %#06x, PSN %#06x, "
- "RKey %#08x VAddr %#016Lx\n",
- rem_dest->lid, rem_dest->qpn, rem_dest->psn,
- rem_dest->rkey, rem_dest->vaddr);
+ if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
+ return 1;
- if (pp_connect_ctx(ctx, ib_port, my_dest.psn, rem_dest))
- return 1;
-
- /* An additional handshake is required *after* moving qp to RTR.
- Arbitrarily reuse exch_dest for this purpose. */
- if (servername) {
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);
- } else {
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+ /* An additional handshake is required *after* moving qp to RTR.
+ Arbitrarily reuse exch_dest for this purpose. */
+ if (servername) {
+ rem_dest = pp_client_exch_dest(sockfd, &my_dest);
+ } else {
+ rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+ }
}
/* For half duplex tests, server just waits for client to exit */
if (!servername && !duplex) {
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);
- write(sockfd, "done", sizeof "done");
- close(sockfd);
+ if (use_cma) {
+ pp_wait_for_done(ctx);
+ pp_send_done(ctx);
+ pp_close_cma(ctx, servername);
+ } else {
+ rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+ write(sockfd, "done", sizeof "done");
+ close(sockfd);
+ }
return 0;
}
@@ -695,6 +1225,11 @@
return 1;
}
+ if (!poll && ibv_req_notify_cq(ctx->scq, 0)) {
+ fprintf(stderr, "ibv_req_notify failed!\n");
+ return 1;
+ }
+
/* Done with setup. Start the test. */
while (scnt < iters || ccnt < iters) {
@@ -712,10 +1247,22 @@
}
if (ccnt < iters) {
- struct ibv_wc wc;
- int ne;
+
do {
- ne = ibv_poll_cq(ctx->cq, 1, &wc);
+ ne = ibv_poll_cq(ctx->scq, 1, &wc);
+ if (!poll && ne == 0) {
+ blocks++;
+ if (ibv_get_cq_event(ctx->ch, &ev_cq, &ev_ctx)) {
+ fprintf(stderr, "Failed to get cq event!\n");
+ return 1;
+ }
+ if (ev_cq != ctx->scq) {
+ fprintf(stderr, "Unkown CQ!\n");
+ return 1;
+ }
+ ibv_ack_cq_events(ctx->scq, 1);
+ ibv_req_notify_cq(ctx->scq, 0);
+ }
} while (ne == 0);
tcompleted[ccnt] = get_cycles();
@@ -737,15 +1284,20 @@
}
}
- if (servername) {
- rem_dest = pp_client_exch_dest(sockfd, &my_dest);
+ if (use_cma) {
+ pp_send_done(ctx);
+ pp_wait_for_done(ctx);
+ pp_close_cma(ctx, servername);
} else {
- rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+ if (servername) {
+ rem_dest = pp_client_exch_dest(sockfd, &my_dest);
+ } else {
+ rem_dest = pp_server_exch_dest(sockfd, &my_dest);
+ }
+ write(sockfd, "done", sizeof "done");
+ close(sockfd);
}
- write(sockfd, "done", sizeof "done");
- close(sockfd);
-
print_report(iters, size, duplex, tposted, tcompleted);
free(tposted);
Index: Makefile
===================================================================
--- Makefile (revision 7050)
+++ Makefile (working copy)
@@ -10,7 +10,7 @@
LOADLIBES +=
LDFLAGS +=
-${TESTS}: LOADLIBES += -libverbs
+${TESTS}: LOADLIBES += -libverbs -lrdmacm
${TESTS} ${UTILS}: %: %.c ${EXTRA_FILES} ${EXTRA_HEADERS}
$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) $< ${EXTRA_FILES} $(LOADLIBES) $(LDLIBS) -o $@
More information about the general
mailing list