[openib-general] [PATCH 2/4] SRQs for libibverbs

Roland Dreier rolandd at cisco.com
Tue Aug 9 20:55:39 PDT 2005


--- libibverbs/include/infiniband/driver.h	(revision 3041)
+++ libibverbs/include/infiniband/driver.h	(working copy)
@@ -92,6 +92,12 @@ extern int ibv_cmd_create_cq(struct ibv_
 			     struct ibv_create_cq_resp *resp, size_t resp_size);
 extern int ibv_cmd_destroy_cq(struct ibv_cq *cq);
 
+extern int ibv_cmd_create_srq(struct ibv_pd *pd,
+			      struct ibv_srq *srq, struct ibv_srq_init_attr *attr,
+			      struct ibv_create_srq *cmd, size_t cmd_size,
+			      struct ibv_create_srq_resp *resp, size_t resp_size);
+extern int ibv_cmd_destroy_srq(struct ibv_srq *srq);
+
 extern int ibv_cmd_create_qp(struct ibv_pd *pd,
 			     struct ibv_qp *qp, struct ibv_qp_init_attr *attr,
 			     struct ibv_create_qp *cmd, size_t cmd_size);
--- libibverbs/include/infiniband/verbs.h	(revision 3041)
+++ libibverbs/include/infiniband/verbs.h	(working copy)
@@ -185,16 +185,20 @@ enum ibv_event_type {
 	IBV_EVENT_PORT_ERR,
 	IBV_EVENT_LID_CHANGE,
 	IBV_EVENT_PKEY_CHANGE,
-	IBV_EVENT_SM_CHANGE
+	IBV_EVENT_SM_CHANGE,
+	IBV_EVENT_SRQ_ERR,
+	IBV_EVENT_SRQ_LIMIT_REACHED,
+	IBV_EVENT_QP_LAST_WQE_REACHED
 };
 
 struct ibv_async_event {
 	union {
-		struct ibv_cq *cq;
-		struct ibv_qp *qp;
-		int            port_num;
-	}                  element;
-	enum ibv_event_type event_type;
+		struct ibv_cq  *cq;
+		struct ibv_qp  *qp;
+		struct ibv_srq *srq;
+		int		port_num;
+	} element;
+	enum ibv_event_type	event_type;
 };
 
 enum ibv_wc_status {
@@ -297,6 +301,22 @@ struct ibv_ah_attr {
 	uint8_t			port_num;
 };
 
+enum ibv_srq_attr_mask {
+	IBV_SRQ_MAX_WR	= 1 << 0,
+	IBV_SRQ_LIMIT	= 1 << 1,
+};
+
+struct ibv_srq_attr {
+	uint32_t		max_wr;
+	uint32_t		max_sge;
+	uint32_t		srq_limit;
+};
+
+struct ibv_srq_init_attr {
+	void		       *srq_context;
+	struct ibv_srq_attr	attr;
+};
+
 enum ibv_qp_type {
 	IBV_QPT_RC = 2,
 	IBV_QPT_UC,
@@ -446,12 +466,20 @@ struct ibv_recv_wr {
 	int			num_sge;
 };
 
+struct ibv_srq {
+	struct ibv_context     *context;
+	void		       *srq_context;
+	struct ibv_pd	       *pd; 
+	uint32_t		handle;
+};
+
 struct ibv_qp {
 	struct ibv_context     *context;
 	void		       *qp_context;
 	struct ibv_pd	       *pd; 
 	struct ibv_cq	       *send_cq;
 	struct ibv_cq	       *recv_cq;
+	struct ibv_srq	       *srq;
 	uint32_t		handle;
 	uint32_t		qp_num;
 	enum ibv_qp_state       state;
@@ -504,6 +532,15 @@ struct ibv_context_ops {
 	int			(*req_notify_cq)(struct ibv_cq *cq, int solicited);
 	void			(*cq_event)(struct ibv_cq *cq);
 	int			(*destroy_cq)(struct ibv_cq *cq);
+	struct ibv_srq *	(*create_srq)(struct ibv_pd *pd,
+					      struct ibv_srq_init_attr *srq_init_attr);
+	int			(*modify_srq)(struct ibv_srq *srq,
+					      struct ibv_srq_attr *srq_attr,
+					      enum ibv_srq_attr_mask srq_attr_mask);
+	int			(*destroy_srq)(struct ibv_srq *srq);
+	int			(*post_srq_recv)(struct ibv_srq *srq,
+						 struct ibv_recv_wr *recv_wr,
+						 struct ibv_recv_wr **bad_recv_wr);
 	struct ibv_qp *		(*create_qp)(struct ibv_pd *pd, struct ibv_qp_init_attr *attr);
 	int			(*modify_qp)(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 					     enum ibv_qp_attr_mask attr_mask);
@@ -650,6 +687,56 @@ static inline int ibv_req_notify_cq(stru
 }
 
 /**
+ * ibv_create_srq - Creates a SRQ associated with the specified protection
+ *   domain.
+ * @pd: The protection domain associated with the SRQ.
+ * @srq_init_attr: A list of initial attributes required to create the SRQ.
+ *
+ * srq_attr->max_wr and srq_attr->max_sge are read the determine the
+ * requested size of the SRQ, and set to the actual values allocated
+ * on return.  If ibv_create_srq() succeeds, then max_wr and max_sge
+ * will always be at least as large as the requested values.
+ */
+struct ibv_srq *ibv_create_srq(struct ibv_pd *pd,
+			       struct ibv_srq_init_attr *srq_init_attr);
+
+/**
+ * ibv_modify_srq - Modifies the attributes for the specified SRQ.
+ * @srq: The SRQ to modify.
+ * @srq_attr: On input, specifies the SRQ attributes to modify.  On output,
+ *   the current values of selected SRQ attributes are returned.
+ * @srq_attr_mask: A bit-mask used to specify which attributes of the SRQ
+ *   are being modified.
+ *
+ * The mask may contain IB_SRQ_MAX_WR to resize the SRQ and/or
+ * IB_SRQ_LIMIT to set the SRQ's limit and request notification when
+ * the number of receives queued drops below the limit.
+ */
+int ibv_modify_srq(struct ibv_srq *srq,
+		   struct ibv_srq_attr *srq_attr,
+		   enum ibv_srq_attr_mask srq_attr_mask);
+
+/**
+ * ibv_destroy_srq - Destroys the specified SRQ.
+ * @srq: The SRQ to destroy.
+ */
+int ibv_destroy_srq(struct ibv_srq *srq);
+
+/**
+ * ibv_post_srq_recv - Posts a list of work requests to the specified SRQ.
+ * @srq: The SRQ to post the work request on.
+ * @recv_wr: A list of work requests to post on the receive queue.
+ * @bad_recv_wr: On an immediate failure, this parameter will reference
+ *   the work request that failed to be posted on the QP.
+ */
+static inline int ibv_post_srq_recv(struct ibv_srq *srq,
+				    struct ibv_recv_wr *recv_wr,
+				    struct ibv_recv_wr **bad_recv_wr)
+{
+	return srq->context->ops.post_srq_recv(srq, recv_wr, bad_recv_wr);
+}
+
+/**
  * ibv_create_qp - Create a queue pair.
  */
 extern struct ibv_qp *ibv_create_qp(struct ibv_pd *pd,
--- libibverbs/include/infiniband/kern-abi.h	(revision 3041)
+++ libibverbs/include/infiniband/kern-abi.h	(working copy)
@@ -83,7 +83,12 @@ enum {
 	IB_USER_VERBS_CMD_POST_SEND,
 	IB_USER_VERBS_CMD_POST_RECV,
 	IB_USER_VERBS_CMD_ATTACH_MCAST,
-	IB_USER_VERBS_CMD_DETACH_MCAST
+	IB_USER_VERBS_CMD_DETACH_MCAST,
+	IB_USER_VERBS_CMD_CREATE_SRQ,
+	IB_USER_VERBS_CMD_MODIFY_SRQ,
+	IB_USER_VERBS_CMD_QUERY_SRQ,
+	IB_USER_VERBS_CMD_DESTROY_SRQ,
+	IB_USER_VERBS_CMD_POST_SRQ_RECV
 };
 
 /*
@@ -425,4 +430,41 @@ struct ibv_detach_mcast {
 	__u64 driver_data[0];
 };
 
+struct ibv_create_srq {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u64 response;
+	__u64 user_handle;
+	__u32 pd_handle;
+	__u32 max_wr;
+	__u32 max_sge;
+	__u32 srq_limit;
+	__u64 driver_data[0];
+};
+
+struct ibv_create_srq_resp {
+	__u32 srq_handle;
+};
+
+struct ibv_modify_srq {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u32 srq_handle;
+	__u32 attr_mask;
+	__u32 max_wr;
+	__u32 max_sge;
+	__u32 srq_limit;
+	__u32 reserved;
+	__u64 driver_data[0];
+};
+
+struct ibv_destroy_srq {
+	__u32 command;
+	__u16 in_words;
+	__u16 out_words;
+	__u32 srq_handle;
+};
+
 #endif /* KERN_ABI_H */
--- libibverbs/src/libibverbs.map	(revision 3041)
+++ libibverbs/src/libibverbs.map	(working copy)
@@ -17,6 +17,9 @@ IBVERBS_1.0 {
 		ibv_create_cq;
 		ibv_destroy_cq;
 		ibv_get_cq_event;
+		ibv_create_srq;
+		ibv_modify_srq;
+		ibv_destroy_srq;
 		ibv_create_qp;
 		ibv_modify_qp;
 		ibv_destroy_qp;
@@ -35,6 +38,9 @@ IBVERBS_1.0 {
 		ibv_cmd_dereg_mr;
 		ibv_cmd_create_cq;
 		ibv_cmd_destroy_cq;
+		ibv_cmd_create_srq;
+		ibv_cmd_modify_srq;
+		ibv_cmd_destroy_srq;
 		ibv_cmd_create_qp;
 		ibv_cmd_modify_qp;
 		ibv_cmd_destroy_qp;
--- libibverbs/src/verbs.c	(revision 3041)
+++ libibverbs/src/verbs.c	(working copy)
@@ -140,6 +140,32 @@ int ibv_get_cq_event(struct ibv_context 
 	return 0;
 }
 
+struct ibv_srq *ibv_create_srq(struct ibv_pd *pd,
+			       struct ibv_srq_init_attr *srq_init_attr)
+{
+	struct ibv_srq *srq = pd->context->ops.create_srq(pd, srq_init_attr);
+
+	if (srq) {
+		srq->context     = pd->context;
+		srq->srq_context = srq_init_attr->srq_context;
+		srq->pd          = pd;
+	}
+
+	return srq;
+}
+
+int ibv_modify_srq(struct ibv_srq *srq,
+		   struct ibv_srq_attr *srq_attr,
+		   enum ibv_srq_attr_mask srq_attr_mask)
+{
+	return srq->context->ops.modify_srq(srq, srq_attr, srq_attr_mask);
+}
+
+int ibv_destroy_srq(struct ibv_srq *srq)
+{
+	return srq->context->ops.destroy_srq(srq);
+}
+
 struct ibv_qp *ibv_create_qp(struct ibv_pd *pd,
 			     struct ibv_qp_init_attr *qp_init_attr)
 {
@@ -151,10 +177,12 @@ struct ibv_qp *ibv_create_qp(struct ibv_
 		qp->pd         = pd;
 		qp->send_cq    = qp_init_attr->send_cq;
 		qp->recv_cq    = qp_init_attr->recv_cq;
+		qp->srq        = qp_init_attr->srq;
 	}
 
 	return qp;
 }
+
 int ibv_modify_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
 		  enum ibv_qp_attr_mask attr_mask)
 {
--- libibverbs/src/cmd.c	(revision 3041)
+++ libibverbs/src/cmd.c	(working copy)
@@ -288,6 +288,39 @@ int ibv_cmd_destroy_cq(struct ibv_cq *cq
 	return 0;
 }
 
+int ibv_cmd_create_srq(struct ibv_pd *pd,
+		       struct ibv_srq *srq, struct ibv_srq_init_attr *attr,
+		       struct ibv_create_srq *cmd, size_t cmd_size,
+		       struct ibv_create_srq_resp *resp, size_t resp_size)
+{
+	IBV_INIT_CMD_RESP(cmd, cmd_size, CREATE_SRQ, resp, resp_size);
+	cmd->user_handle = (uintptr_t) srq;
+	cmd->pd_handle 	 = pd->handle;
+	cmd->max_wr      = attr->attr.max_wr;
+	cmd->max_sge     = attr->attr.max_sge;
+	cmd->srq_limit   = attr->attr.srq_limit;
+
+	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
+		return errno;
+
+	srq->handle = resp->srq_handle;
+
+	return 0;
+}
+
+int ibv_cmd_destroy_srq(struct ibv_srq *srq)
+{
+	struct ibv_destroy_srq cmd;
+
+	IBV_INIT_CMD(&cmd, sizeof cmd, DESTROY_SRQ);
+	cmd.srq_handle = srq->handle;
+
+	if (write(srq->context->cmd_fd, &cmd, sizeof cmd) != sizeof cmd)
+		return errno;
+
+	return 0;
+}
+
 int ibv_cmd_create_qp(struct ibv_pd *pd,
 		      struct ibv_qp *qp, struct ibv_qp_init_attr *attr,
 		      struct ibv_create_qp *cmd, size_t cmd_size)
@@ -299,6 +332,7 @@ int ibv_cmd_create_qp(struct ibv_pd *pd,
 	cmd->pd_handle 	     = pd->handle;
 	cmd->send_cq_handle  = attr->send_cq->handle;
 	cmd->recv_cq_handle  = attr->recv_cq->handle;
+	cmd->srq_handle      = attr->srq ? attr->srq->handle : 0;
 	cmd->max_send_wr     = attr->cap.max_send_wr;
 	cmd->max_recv_wr     = attr->cap.max_recv_wr;
 	cmd->max_send_sge    = attr->cap.max_send_sge;
@@ -306,7 +340,7 @@ int ibv_cmd_create_qp(struct ibv_pd *pd,
 	cmd->max_inline_data = attr->cap.max_inline_data;
 	cmd->sq_sig_all	     = attr->sq_sig_all;
 	cmd->qp_type 	     = attr->qp_type;
-	cmd->is_srq 	     = 0;
+	cmd->is_srq 	     = !!attr->srq;
 
 	if (write(pd->context->cmd_fd, cmd, cmd_size) != cmd_size)
 		return errno;
--- libibverbs/Makefile.am	(revision 3041)
+++ libibverbs/Makefile.am	(working copy)
@@ -21,7 +21,7 @@ src_libibverbs_la_DEPENDENCIES = $(srcdi
 
 bin_PROGRAMS = examples/ibv_devices examples/ibv_devinfo \
     examples/ibv_asyncwatch examples/ibv_rc_pingpong examples/ibv_uc_pingpong \
-    examples/ibv_ud_pingpong
+    examples/ibv_ud_pingpong examples/ibv_srq_pingpong
 examples_ibv_devices_SOURCES = examples/device_list.c
 examples_ibv_devices_LDADD = $(top_builddir)/src/libibverbs.la
 examples_ibv_devinfo_SOURCES = examples/devinfo.c
@@ -32,6 +32,8 @@ examples_ibv_uc_pingpong_SOURCES = examp
 examples_ibv_uc_pingpong_LDADD = $(top_builddir)/src/libibverbs.la
 examples_ibv_ud_pingpong_SOURCES = examples/ud_pingpong.c
 examples_ibv_ud_pingpong_LDADD = $(top_builddir)/src/libibverbs.la
+examples_ibv_srq_pingpong_SOURCES = examples/srq_pingpong.c
+examples_ibv_srq_pingpong_LDADD = $(top_builddir)/src/libibverbs.la
 examples_ibv_asyncwatch_SOURCES = examples/asyncwatch.c
 examples_ibv_asyncwatch_LDADD = $(top_builddir)/src/libibverbs.la
 
--- libibverbs/examples/srq_pingpong.c	(revision 0)
+++ libibverbs/examples/srq_pingpong.c	(revision 0)
@@ -0,0 +1,772 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <netdb.h>
+#include <malloc.h>
+#include <getopt.h>
+#include <arpa/inet.h>
+#include <time.h>
+
+#include <sysfs/libsysfs.h>
+
+#include <infiniband/verbs.h>
+
+enum {
+	PINGPONG_RECV_WRID = 1,
+	PINGPONG_SEND_WRID = 2,
+
+	MAX_QP             = 256,
+};
+
+static int page_size;
+
+struct pingpong_context {
+	struct ibv_context *context;
+	struct ibv_pd      *pd;
+	struct ibv_mr      *mr;
+	struct ibv_cq      *cq;
+	struct ibv_srq     *srq;
+	struct ibv_qp      *qp[MAX_QP];
+	void               *buf;
+	int                 size;
+	int                 num_qp;
+	int                 rx_depth;
+};
+
+struct pingpong_dest {
+	int lid;
+	int qpn;
+	int psn;
+};
+
+static uint16_t pp_get_local_lid(struct pingpong_context *ctx, int port)
+{
+	struct ibv_port_attr attr;
+
+	if (ibv_query_port(ctx->context, port, &attr))
+		return 0;
+
+	return attr.lid;
+}
+
+static int pp_connect_ctx(struct pingpong_context *ctx, int port,
+			  const struct pingpong_dest *my_dest,
+			  const struct pingpong_dest *dest)
+{
+	int i;
+
+	for (i = 0; i < ctx->num_qp; ++i) {
+		struct ibv_qp_attr attr = {
+			.qp_state		= IBV_QPS_RTR,
+			.path_mtu		= IBV_MTU_1024,
+			.dest_qp_num		= dest[i].qpn,
+			.rq_psn 		= dest[i].psn,
+			.max_dest_rd_atomic	= 1,
+			.min_rnr_timer		= 12,
+			.ah_attr		= {
+				.is_global	= 0,
+				.dlid		= dest[i].lid,
+				.sl		= 0,
+				.src_path_bits	= 0,
+				.port_num	= port
+			}
+		};
+		if (ibv_modify_qp(ctx->qp[i], &attr,
+				  IBV_QP_STATE              |
+				  IBV_QP_AV                 |
+				  IBV_QP_PATH_MTU           |
+				  IBV_QP_DEST_QPN           |
+				  IBV_QP_RQ_PSN             |
+				  IBV_QP_MAX_DEST_RD_ATOMIC |
+				  IBV_QP_MIN_RNR_TIMER)) {
+			fprintf(stderr, "Failed to modify QP[%d] to RTR\n", i);
+			return 1;
+		}
+
+		attr.qp_state 	    = IBV_QPS_RTS;
+		attr.timeout 	    = 14;
+		attr.retry_cnt 	    = 7;
+		attr.rnr_retry 	    = 7;
+		attr.sq_psn 	    = my_dest[i].psn;
+		attr.max_rd_atomic  = 1;
+		if (ibv_modify_qp(ctx->qp[i], &attr,
+				  IBV_QP_STATE              |
+				  IBV_QP_TIMEOUT            |
+				  IBV_QP_RETRY_CNT          |
+				  IBV_QP_RNR_RETRY          |
+				  IBV_QP_SQ_PSN             |
+				  IBV_QP_MAX_QP_RD_ATOMIC)) {
+			fprintf(stderr, "Failed to modify QP[%d] to RTS\n", i);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static struct pingpong_dest *pp_client_exch_dest(const char *servername, int port,
+						 const struct pingpong_dest *my_dest)
+{
+	struct addrinfo *res, *t;
+	struct addrinfo hints = {
+		.ai_family   = AF_UNSPEC,
+		.ai_socktype = SOCK_STREAM
+	};
+	char *service;
+	char msg[ sizeof "0000:000000:000000"];
+	int n;
+	int r;
+	int i;
+	int sockfd = -1;
+	struct pingpong_dest *rem_dest = NULL;
+
+	asprintf(&service, "%d", port);
+	n = getaddrinfo(servername, service, &hints, &res);
+
+	if (n < 0) {
+		fprintf(stderr, "%s for %s:%d\n", gai_strerror(n), servername, port);
+		return NULL;
+	}
+
+	for (t = res; t; t = t->ai_next) {
+		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+		if (sockfd >= 0) {
+			if (!connect(sockfd, t->ai_addr, t->ai_addrlen))
+				break;
+			close(sockfd);
+			sockfd = -1;
+		}
+	}
+
+	freeaddrinfo(res);
+
+	if (sockfd < 0) {
+		fprintf(stderr, "Couldn't connect to %s:%d\n", servername, port);
+		return NULL;
+	}
+
+	for (i = 0; i < MAX_QP; ++i) {
+		sprintf(msg, "%04x:%06x:%06x", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn);
+		if (write(sockfd, msg, sizeof msg) != sizeof msg) {
+			fprintf(stderr, "Couldn't send local address\n");
+			goto out;
+		}
+	}
+
+	rem_dest = malloc(MAX_QP * sizeof *rem_dest);
+	if (!rem_dest)
+		goto out;
+
+	for (i = 0; i < MAX_QP; ++i) {
+		n = 0;
+		while (n < sizeof msg) {
+			r = read(sockfd, msg + n, sizeof msg - n);
+			if (r < 0) {
+				perror("client read");
+				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
+					n, (int) sizeof msg, i);
+				goto out;
+			}
+			n += r;
+		}
+
+		sscanf(msg, "%x:%x:%x",
+		       &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn);
+	}
+
+	write(sockfd, "done", sizeof "done");
+
+out:
+	close(sockfd);
+	return rem_dest;
+}
+
+static struct pingpong_dest *pp_server_exch_dest(struct pingpong_context *ctx,
+						 int ib_port, int port,
+						 const struct pingpong_dest *my_dest)
+{
+	struct addrinfo *res, *t;
+	struct addrinfo hints = {
+		.ai_flags    = AI_PASSIVE,
+		.ai_family   = AF_UNSPEC,
+		.ai_socktype = SOCK_STREAM
+	};
+	char *service;
+	char msg[ sizeof "0000:000000:000000"];
+	int n;
+	int r;
+	int i;
+	int sockfd = -1, connfd;
+	struct pingpong_dest *rem_dest = NULL;
+
+	asprintf(&service, "%d", port);
+	n = getaddrinfo(NULL, service, &hints, &res);
+
+	if (n < 0) {
+		fprintf(stderr, "%s for port %d\n", gai_strerror(n), port);
+		return NULL;
+	}
+
+	for (t = res; t; t = t->ai_next) {
+		sockfd = socket(t->ai_family, t->ai_socktype, t->ai_protocol);
+		if (sockfd >= 0) {
+			n = 1;
+
+			setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &n, sizeof n);
+
+			if (!bind(sockfd, t->ai_addr, t->ai_addrlen))
+				break;
+			close(sockfd);
+			sockfd = -1;
+		}
+	}
+
+	freeaddrinfo(res);
+
+	if (sockfd < 0) {
+		fprintf(stderr, "Couldn't listen to port %d\n", port);
+		return NULL;
+	}
+
+	listen(sockfd, 1);
+	connfd = accept(sockfd, NULL, 0);
+	close(sockfd);
+	if (connfd < 0) {
+		fprintf(stderr, "accept() failed\n");
+		return NULL;
+	}
+
+	rem_dest = malloc(MAX_QP *sizeof *rem_dest);
+	if (!rem_dest)
+		goto out;
+
+	for (i = 0; i < MAX_QP; ++i) {
+		n = 0;
+		while (n < sizeof msg) {
+			r = read(connfd, msg + n, sizeof msg - n);
+			if (r < 0) {
+				perror("server read");
+				fprintf(stderr, "%d/%d: Couldn't read remote address [%d]\n",
+					n, (int) sizeof msg, i);
+				goto out;
+			}
+			n += r;
+		}
+
+		sscanf(msg, "%x:%x:%x",
+		       &rem_dest[i].lid, &rem_dest[i].qpn, &rem_dest[i].psn);
+	}
+
+	if (pp_connect_ctx(ctx, ib_port, my_dest, rem_dest)) {
+		fprintf(stderr, "Couldn't connect to remote QP\n");
+		free(rem_dest);
+		rem_dest = NULL;
+		goto out;
+	}
+
+	for (i = 0; i < MAX_QP; ++i) {
+		sprintf(msg, "%04x:%06x:%06x", my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn);
+		if (write(connfd, msg, sizeof msg) != sizeof msg) {
+			fprintf(stderr, "Couldn't send local address\n");
+			free(rem_dest);
+			rem_dest = NULL;
+			goto out;
+		}
+	}
+
+	read(connfd, msg, sizeof msg);
+
+out:
+	close(connfd);
+	return rem_dest;
+}
+
+static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size,
+					    int num_qp, int rx_depth, int port)
+{
+	struct pingpong_context *ctx;
+	int i;
+
+	ctx = malloc(sizeof *ctx);
+	if (!ctx)
+		return NULL;
+
+	ctx->size     = size;
+	ctx->num_qp   = num_qp;
+	ctx->rx_depth = rx_depth;
+
+	ctx->buf = memalign(page_size, size);
+	if (!ctx->buf) {
+		fprintf(stderr, "Couldn't allocate work buf.\n");
+		return NULL;
+	}
+
+	memset(ctx->buf, 0, size);
+
+	ctx->context = ibv_open_device(ib_dev);
+	if (!ctx->context) {
+		fprintf(stderr, "Couldn't get context for %s\n",
+			ibv_get_device_name(ib_dev));
+		return NULL;
+	}
+
+	ctx->pd = ibv_alloc_pd(ctx->context);
+	if (!ctx->pd) {
+		fprintf(stderr, "Couldn't allocate PD\n");
+		return NULL;
+	}
+
+	ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, IBV_ACCESS_LOCAL_WRITE);
+	if (!ctx->mr) {
+		fprintf(stderr, "Couldn't allocate MR\n");
+		return NULL;
+	}
+
+	ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL);
+	if (!ctx->cq) {
+		fprintf(stderr, "Couldn't create CQ\n");
+		return NULL;
+	}
+
+	{
+		struct ibv_srq_init_attr attr = {
+			.attr = {
+				.max_wr  = rx_depth,
+				.max_sge = 1
+			}
+		};
+
+		ctx->srq = ibv_create_srq(ctx->pd, &attr);
+		if (!ctx->srq)  {
+			fprintf(stderr, "Couldn't create SRQ\n");
+			return NULL;
+		}
+	}
+
+	for (i = 0; i < num_qp; ++i) {
+		struct ibv_qp_init_attr attr = {
+			.send_cq = ctx->cq,
+			.recv_cq = ctx->cq,
+			.srq     = ctx->srq,
+			.cap     = {
+				.max_send_wr  = 4,
+				.max_send_sge = 1,
+			},
+			.qp_type = IBV_QPT_RC
+		};
+
+		ctx->qp[i] = ibv_create_qp(ctx->pd, &attr);
+		if (!ctx->qp[i])  {
+			fprintf(stderr, "Couldn't create QP[%d]\n", i);
+			return NULL;
+		}
+	}
+
+	for (i = 0; i < num_qp; ++i) {
+		struct ibv_qp_attr attr;
+
+		attr.qp_state        = IBV_QPS_INIT;
+		attr.pkey_index      = 0;
+		attr.port_num        = port;
+		attr.qp_access_flags = 0;
+
+		if (ibv_modify_qp(ctx->qp[i], &attr,
+				  IBV_QP_STATE              |
+				  IBV_QP_PKEY_INDEX         |
+				  IBV_QP_PORT               |
+				  IBV_QP_ACCESS_FLAGS)) {
+			fprintf(stderr, "Failed to modify QP[%d] to INIT\n", i);
+			return NULL;
+		}
+	}
+
+	return ctx;
+}
+
+static int pp_post_recv(struct pingpong_context *ctx, int n)
+{
+	struct ibv_sge list = {
+		.addr 	= (uintptr_t) ctx->buf,
+		.length = ctx->size,
+		.lkey 	= ctx->mr->lkey
+	};
+	struct ibv_recv_wr wr = {
+		.wr_id 	    = PINGPONG_RECV_WRID,
+		.sg_list    = &list,
+		.num_sge    = 1,
+	};
+	struct ibv_recv_wr *bad_wr;
+	int i;
+
+	for (i = 0; i < n; ++i)
+		if (ibv_post_srq_recv(ctx->srq, &wr, &bad_wr))
+			break;
+
+	return i;
+}
+
+static int pp_post_send(struct pingpong_context *ctx, int qp_index)
+{
+	struct ibv_sge list = {
+		.addr 	= (uintptr_t) ctx->buf,
+		.length = ctx->size,
+		.lkey 	= ctx->mr->lkey
+	};
+	struct ibv_send_wr wr = {
+		.wr_id 	    = PINGPONG_SEND_WRID,
+		.sg_list    = &list,
+		.num_sge    = 1,
+		.opcode     = IBV_WR_SEND,
+		.send_flags = IBV_SEND_SIGNALED,
+	};
+	struct ibv_send_wr *bad_wr;
+
+	return ibv_post_send(ctx->qp[qp_index], &wr, &bad_wr);
+}
+
+static int find_qp(int qpn, struct pingpong_context *ctx, int num_qp)
+{
+	int i;
+
+	for (i = 0; i < num_qp; ++i)
+		if (ctx->qp[i]->qp_num == qpn)
+			return i;
+
+	return -1;
+}
+
+static void usage(const char *argv0)
+{
+	printf("Usage:\n");
+	printf("  %s            start a server and wait for connection\n", argv0);
+	printf("  %s <host>     connect to server at <host>\n", argv0);
+	printf("\n");
+	printf("Options:\n");
+	printf("  -p, --port=<port>      listen on/connect to port <port> (default 18515)\n");
+	printf("  -d, --ib-dev=<dev>     use IB device <dev> (default first device found)\n");
+	printf("  -i, --ib-port=<port>   use port <port> of IB device (default 1)\n");
+	printf("  -s, --size=<size>      size of message to exchange (default 4096)\n");
+	printf("  -q, --num-qp=<num>     number of QPs to use (default 16)\n");
+	printf("  -r, --rx-depth=<dep>   number of receives to post at a time (default 500)\n");
+	printf("  -n, --iters=<iters>    number of exchanges per QP(default 1000)\n");
+	printf("  -e, --events           sleep on CQ events (default poll)\n");
+}
+
+int main(int argc, char *argv[])
+{
+	struct dlist 	  	*dev_list;
+	struct ibv_device 	*ib_dev;
+	struct pingpong_context *ctx;
+	struct pingpong_dest     my_dest[MAX_QP];
+	struct pingpong_dest    *rem_dest;
+	struct timeval           start, end;
+	char                    *ib_devname = NULL;
+	char                    *servername = NULL;
+	int                      port = 18515;
+	int                      ib_port = 1;
+	int                      size = 4096;
+	int                      num_qp = 16;
+	int                      rx_depth = 500;
+	int                      iters = 1000;
+	int                      use_event = 0;
+	int                      routs;
+	int                      rcnt, scnt;
+	int                      i;
+
+	srand48(getpid() * time(NULL));
+
+	while (1) {
+		int c;
+
+		static struct option long_options[] = {
+			{ .name = "port",     .has_arg = 1, .val = 'p' },
+			{ .name = "ib-dev",   .has_arg = 1, .val = 'd' },
+			{ .name = "ib-port",  .has_arg = 1, .val = 'i' },
+			{ .name = "size",     .has_arg = 1, .val = 's' },
+			{ .name = "num-qp",   .has_arg = 1, .val = 'q' },
+			{ .name = "rx-depth", .has_arg = 1, .val = 'r' },
+			{ .name = "iters",    .has_arg = 1, .val = 'n' },
+			{ .name = "events",   .has_arg = 0, .val = 'e' },
+			{ 0 }
+		};
+
+		c = getopt_long(argc, argv, "p:d:i:s:q:r:n:e", long_options, NULL);
+		if (c == -1)
+			break;
+
+		switch (c) {
+		case 'p':
+			port = strtol(optarg, NULL, 0);
+			if (port < 0 || port > 65535) {
+				usage(argv[0]);
+				return 1;
+			}
+			break;
+
+		case 'd':
+			ib_devname = strdupa(optarg);
+			break;
+
+		case 'i':
+			ib_port = strtol(optarg, NULL, 0);
+			if (ib_port < 0) {
+				usage(argv[0]);
+				return 1;
+			}
+			break;
+
+		case 's':
+			size = strtol(optarg, NULL, 0);
+			break;
+
+		case 'q':
+			num_qp = strtol(optarg, NULL, 0);
+			break;
+
+		case 'r':
+			rx_depth = strtol(optarg, NULL, 0);
+			break;
+
+		case 'n':
+			iters = strtol(optarg, NULL, 0);
+			break;
+
+		case 'e':
+			++use_event;
+			break;
+
+		default:
+			usage(argv[0]);
+			return 1;
+		}
+	}
+
+	if (optind == argc - 1)
+		servername = strdupa(argv[optind]);
+	else if (optind < argc) {
+		usage(argv[0]);
+		return 1;
+	}
+
+	page_size = sysconf(_SC_PAGESIZE);
+
+	dev_list = ibv_get_devices();
+
+	dlist_start(dev_list);
+	if (!ib_devname) {
+		ib_dev = dlist_next(dev_list);
+		if (!ib_dev) {
+			fprintf(stderr, "No IB devices found\n");
+			return 1;
+		}
+	} else {
+		dlist_for_each_data(dev_list, ib_dev, struct ibv_device)
+			if (!strcmp(ibv_get_device_name(ib_dev), ib_devname))
+				break;
+		if (!ib_dev) {
+			fprintf(stderr, "IB device %s not found\n", ib_devname);
+			return 1;
+		}
+	}
+
+	ctx = pp_init_ctx(ib_dev, size, num_qp, rx_depth, ib_port);
+	if (!ctx)
+		return 1;
+
+	routs = pp_post_recv(ctx, ctx->rx_depth);
+	if (routs < ctx->rx_depth) {
+		fprintf(stderr, "Couldn't post receive (%d)\n", routs);
+		return 1;
+	}
+
+	for (i = 0; i < num_qp; ++i) {
+		my_dest[i].qpn = ctx->qp[i]->qp_num;
+		my_dest[i].psn = lrand48() & 0xffffff;
+		my_dest[i].lid = pp_get_local_lid(ctx, ib_port);
+		if (!my_dest[i].lid) {
+			fprintf(stderr, "Couldn't get local LID\n");
+			return 1;
+		}
+
+		printf("  local address:  LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
+		       my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn);
+	}
+
+	if (servername)
+		rem_dest = pp_client_exch_dest(servername, port, my_dest);
+	else
+		rem_dest = pp_server_exch_dest(ctx, ib_port, port, my_dest);
+
+	if (!rem_dest)
+		return 1;
+
+	for (i = 0; i < num_qp; ++i)
+		printf("  remote address: LID 0x%04x, QPN 0x%06x, PSN 0x%06x\n",
+		       rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn);
+
+	if (servername)
+		if (pp_connect_ctx(ctx, ib_port, my_dest, rem_dest))
+			return 1;
+
+	if (use_event)
+		if (ibv_req_notify_cq(ctx->cq, 0)) {
+			fprintf(stderr, "Couldn't request CQ notification\n");
+			return 1;
+		}
+
+	if (servername)
+		for (i = 0; i < num_qp; ++i)
+			if (pp_post_send(ctx, i)) {
+				fprintf(stderr, "Couldn't post send\n");
+				return 1;
+			}
+
+	if (gettimeofday(&start, NULL)) {
+		perror("gettimeofday");
+		return 1;
+	}
+
+	rcnt = scnt = 0;
+	while (rcnt < iters || scnt < iters) {
+		if (use_event) {
+			struct ibv_cq *ev_cq;
+			void          *ev_ctx;
+
+			if (ibv_get_cq_event(ctx->context, 0, &ev_cq, &ev_ctx)) {
+				fprintf(stderr, "Failed to get cq_event\n");
+				return 1;
+			}
+
+			if (ev_cq != ctx->cq) {
+				fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
+				return 1;
+			}
+
+			if (ibv_req_notify_cq(ctx->cq, 0)) {
+				fprintf(stderr, "Couldn't request CQ notification\n");
+				return 1;
+			}
+		}
+
+		{
+			struct ibv_wc wc[2];
+			int ne, j;
+
+			do {
+				ne = ibv_poll_cq(ctx->cq, 2, wc);
+			} while (!use_event && ne < 1);
+
+			if (ne < 0) {
+				fprintf(stderr, "poll CQ failed %d\n", ne);
+				return 1;
+			}
+
+			for (i = 0; i < ne; ++i) {
+				if (wc[i].status != IBV_WC_SUCCESS) {
+					fprintf(stderr, "Failed status %d for wr_id %d\n",
+						wc[i].status, (int) wc[i].wr_id);
+					return 1;
+				}
+
+				switch ((int) wc[i].wr_id) {
+				case PINGPONG_SEND_WRID:
+					++scnt;
+					break;
+
+				case PINGPONG_RECV_WRID:
+					if (--routs <= 1) {
+						routs += pp_post_recv(ctx, ctx->rx_depth - routs);
+						if (routs < ctx->rx_depth) {
+							fprintf(stderr,
+								"Couldn't post receive (%d)\n",
+								routs);
+							return 1;
+						}
+					}
+
+					if (scnt < iters) {
+						j = find_qp(wc[i].qp_num, ctx, num_qp);
+						if (j < 0) {
+							fprintf(stderr, "Couldn't find QPN %06x\n",
+								wc[i].qp_num);
+							return 1;
+						}
+
+						if (pp_post_send(ctx, j)) {
+							fprintf(stderr, "Couldn't post send\n");
+							return 1;
+						}
+					}
+
+					++rcnt;
+					break;
+
+				default:
+					fprintf(stderr, "Completion for unknown wr_id %d\n",
+						(int) wc[i].wr_id);
+					return 1;
+				}
+			}
+		}
+	}
+
+	if (gettimeofday(&end, NULL)) {
+		perror("gettimeofday");
+		return 1;
+	}
+
+	{
+		float usec = (end.tv_sec - start.tv_sec) * 1000000 +
+			(end.tv_usec - start.tv_usec);
+		long long bytes = (long long) size * iters * 2;
+
+		printf("%lld bytes in %.2f seconds = %.2f Mbit/sec\n",
+		       bytes, usec / 1000000., bytes * 8. / usec);
+		printf("%d iters in %.2f seconds = %.2f usec/iter\n",
+		       iters, usec / 1000000., usec / iters);
+	}
+
+	return 0;
+}

Property changes on: libibverbs/examples/srq_pingpong.c
___________________________________________________________________
Name: svn:keywords
   + Id




More information about the general mailing list