[openib-general] [PATCH 2/2] librdmacm: add UD QP support for userspace clients

Sean Hefty sean.hefty at intel.com
Tue Jun 6 20:15:43 PDT 2006


Add support for UD QPs to the RDMA CM library, along with a goofy test program.

Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
Index: include/rdma/rdma_cma_ib.h
===================================================================
--- include/rdma/rdma_cma_ib.h	(revision 7743)
+++ include/rdma/rdma_cma_ib.h	(working copy)
@@ -44,4 +44,19 @@ struct ib_cm_req_opt {
 	uint8_t		max_cm_retries;
 };
 
+/**
+ * rdma_get_dst_attr - Retrieve information about a UDP destination.
+ * @id: Connection identifier associated with the request.
+ * @addr: Address of remote destination to retrieve information about.
+ * @ah_attr: Address handle attributes.  A caller uses these attributes to
+ *   create an address handle when communicating with the destination.
+ * @qpn: The remote QP number associated with the UDP address.
+ * @qkey: The QKey of the remote QP.
+ *
+ * Users must have called rdma_connect() to resolve the destination information.
+ */
+int rdma_get_dst_attr(struct rdma_cm_id *id, struct sockaddr *addr,
+		      struct ibv_ah_attr *ah_attr, uint32_t *remote_qpn,
+		      uint32_t *remote_qkey);
+
 #endif /* RDMA_CMA_IB_H */
Index: include/rdma/rdma_cma_abi.h
===================================================================
--- include/rdma/rdma_cma_abi.h	(revision 7636)
+++ include/rdma/rdma_cma_abi.h	(working copy)
@@ -40,7 +40,7 @@
  */
 
 #define RDMA_USER_CM_MIN_ABI_VERSION	1
-#define RDMA_USER_CM_MAX_ABI_VERSION	1
+#define RDMA_USER_CM_MAX_ABI_VERSION	2
 
 #define RDMA_MAX_PRIVATE_DATA		256
 
@@ -60,6 +60,7 @@ enum {
 	UCMA_CMD_GET_EVENT,
 	UCMA_CMD_GET_OPTION,
 	UCMA_CMD_SET_OPTION,
+	UCMA_CMD_GET_DST_ATTR
 };
 
 struct ucma_abi_cmd_hdr {
@@ -68,9 +69,16 @@ struct ucma_abi_cmd_hdr {
 	__u16 out;
 };
 
+struct ucma_abi_create_id_v1 {
+	__u64 uid;
+	__u64 response;
+};
+
 struct ucma_abi_create_id {
 	__u64 uid;
 	__u64 response;
+	__u16 ps;
+	__u8  reserved[6];
 };
 
 struct ucma_abi_create_id_resp {
@@ -170,6 +178,18 @@ struct ucma_abi_init_qp_attr {
 	__u32 qp_state;
 };
 
+struct ucma_abi_dst_attr_resp {
+	__u32 remote_qpn;
+	__u32 remote_qkey;
+	struct ibv_kern_ah_attr ah_attr;
+};
+
+struct ucma_abi_get_dst_attr {
+	__u64 response;
+	struct sockaddr_in6 addr;
+	__u32 id;
+};
+
 struct ucma_abi_get_event {
 	__u64 response;
 };
Index: include/rdma/rdma_cma.h
===================================================================
--- include/rdma/rdma_cma.h	(revision 7743)
+++ include/rdma/rdma_cma.h	(working copy)
@@ -54,6 +54,11 @@ enum rdma_cm_event_type {
 	RDMA_CM_EVENT_DEVICE_REMOVAL,
 };
 
+enum rdma_port_space {
+	RDMA_PS_TCP  = 0x0106,
+	RDMA_PS_UDP  = 0x0111,
+};
+
 /* Protocol levels for get/set options. */
 enum {
 	RDMA_PROTO_IP = 0,
@@ -90,6 +95,7 @@ struct rdma_cm_id {
 	void			*context;
 	struct ibv_qp		*qp;
 	struct rdma_route	 route;
+	enum rdma_port_space	 ps;
 	uint8_t			 port_num;
 };
 
@@ -121,9 +127,11 @@ void rdma_destroy_event_channel(struct r
  * @id: A reference where the allocated communication identifier will be
  *   returned.
  * @context: User specified context associated with the rdma_cm_id.
+ * @ps: RDMA port space.
  */
 int rdma_create_id(struct rdma_event_channel *channel,
-		   struct rdma_cm_id **id, void *context);
+		   struct rdma_cm_id **id, void *context,
+		   enum rdma_port_space ps);
 
 /**
  * rdma_destroy_id - Release a communication identifier.
@@ -194,6 +202,10 @@ struct rdma_conn_param {
 	uint8_t flow_control;
 	uint8_t retry_count;		/* ignored when accepting */
 	uint8_t rnr_retry_count;
+	/* Fields below ignored if a QP is created on the rdma_cm_id. */
+	uint8_t srq;
+	uint32_t qp_num;
+	enum ibv_qp_type qp_type;
 };
 
 /**
@@ -227,7 +239,8 @@ int rdma_reject(struct rdma_cm_id *id, c
 		uint8_t private_data_len);
 
 /**
- * rdma_disconnect - This function disconnects the associated QP.
+ * rdma_disconnect - This function disconnects the associated QP and
+ *   transitions it into the error state.
  */
 int rdma_disconnect(struct rdma_cm_id *id);
 
@@ -278,4 +291,18 @@ int rdma_get_option(struct rdma_cm_id *i
 int rdma_set_option(struct rdma_cm_id *id, int level, int optname,
 		    void *optval, size_t optlen);
 
+static inline uint16_t rdma_get_src_port(struct rdma_cm_id *id)
+{
+	return	id->route.addr.src_addr.sin6_family == PF_INET6 ?
+		id->route.addr.src_addr.sin6_port :
+		((struct sockaddr_in *) &id->route.addr.src_addr)->sin_port;
+}
+
+static inline uint16_t rdma_get_dst_port(struct rdma_cm_id *id)
+{
+	return	id->route.addr.dst_addr.sin6_family == PF_INET6 ?
+		id->route.addr.dst_addr.sin6_port :
+		((struct sockaddr_in *) &id->route.addr.dst_addr)->sin_port;
+}
+
 #endif /* RDMA_CMA_H */
Index: src/cma.c
===================================================================
--- src/cma.c	(revision 7636)
+++ src/cma.c	(working copy)
@@ -54,6 +54,7 @@
 #include <infiniband/marshall.h>
 #include <rdma/rdma_cma.h>
 #include <rdma/rdma_cma_abi.h>
+#include <rdma/rdma_cma_ib.h>
 
 #define PFX "librdmacm: "
 
@@ -203,7 +204,7 @@ static int ucma_init(void)
 
 	dev_list = ibv_get_device_list(NULL);
 	if (!dev_list) {
-		printf("CMA: unable to get RDMA device liste\n");
+		printf("CMA: unable to get RDMA device list\n");
 		ret = -ENODEV;
 		goto err;
 	}
@@ -301,7 +302,8 @@ static void ucma_free_id(struct cma_id_p
 }
 
 static struct cma_id_private *ucma_alloc_id(struct rdma_event_channel *channel,
-					    void *context)
+					    void *context,
+					    enum rdma_port_space ps)
 {
 	struct cma_id_private *id_priv;
 
@@ -311,6 +313,7 @@ static struct cma_id_private *ucma_alloc
 
 	memset(id_priv, 0, sizeof *id_priv);
 	id_priv->id.context = context;
+	id_priv->id.ps = ps;
 	id_priv->id.channel = channel;
 	pthread_mutex_init(&id_priv->mut, NULL);
 	if (pthread_cond_init(&id_priv->cond, NULL))
@@ -322,8 +325,44 @@ err:	ucma_free_id(id_priv);
 	return NULL;
 }
 
+static int ucma_create_id_v1(struct rdma_event_channel *channel,
+			     struct rdma_cm_id **id, void *context,
+			     enum rdma_port_space ps)
+{
+	struct ucma_abi_create_id_resp *resp;
+	struct ucma_abi_create_id_v1 *cmd;
+	struct cma_id_private *id_priv;
+	void *msg;
+	int ret, size;
+
+	if (ps != RDMA_PS_TCP) {
+		fprintf(stderr, "librdmacm: Kernel ABI does not support "
+				"requested port space.\n");
+		return -EPROTONOSUPPORT;
+	}
+
+	id_priv = ucma_alloc_id(channel, context, ps);
+	if (!id_priv)
+		return -ENOMEM;
+
+	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_CREATE_ID, size);
+	cmd->uid = (uintptr_t) id_priv;
+
+	ret = write(channel->fd, msg, size);
+	if (ret != size)
+		goto err;
+
+	id_priv->handle = resp->id;
+	*id = &id_priv->id;
+	return 0;
+
+err:	ucma_free_id(id_priv);
+	return ret;
+}
+
 int rdma_create_id(struct rdma_event_channel *channel,
-		   struct rdma_cm_id **id, void *context)
+		   struct rdma_cm_id **id, void *context,
+		   enum rdma_port_space ps)
 {
 	struct ucma_abi_create_id_resp *resp;
 	struct ucma_abi_create_id *cmd;
@@ -335,12 +374,16 @@ int rdma_create_id(struct rdma_event_cha
 	if (ret)
 		return ret;
 
-	id_priv = ucma_alloc_id(channel, context);
+	if (abi_ver == 1)
+		return ucma_create_id_v1(channel, id, context, ps);
+
+	id_priv = ucma_alloc_id(channel, context, ps);
 	if (!id_priv)
 		return -ENOMEM;
 
 	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_CREATE_ID, size);
 	cmd->uid = (uintptr_t) id_priv;
+	cmd->ps = ps;
 
 	ret = write(channel->fd, msg, size);
 	if (ret != size)
@@ -637,6 +680,36 @@ static int ucma_init_ib_qp(struct cma_id
 					   IBV_QP_PKEY_INDEX | IBV_QP_PORT);
 }
 
+static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp)
+{
+	struct ibv_qp_attr qp_attr;
+	struct ib_addr *ibaddr;
+	int ret;
+
+	ibaddr = &id_priv->id.route.addr.addr.ibaddr;
+	ret = ucma_find_pkey(id_priv->cma_dev, id_priv->id.port_num,
+			     ibaddr->pkey, &qp_attr.pkey_index);
+	if (ret)
+		return ret;
+
+	qp_attr.port_num = id_priv->id.port_num;
+	qp_attr.qp_state = IBV_QPS_INIT;
+	qp_attr.qkey = ntohs(rdma_get_src_port(&id_priv->id));
+	ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_PKEY_INDEX |
+					  IBV_QP_PORT | IBV_QP_QKEY);
+	if (ret)
+		return ret;
+
+	qp_attr.qp_state = IBV_QPS_RTR;
+	ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);
+	if (ret)
+		return ret;
+
+	qp_attr.qp_state = IBV_QPS_RTS;
+	qp_attr.sq_psn = 0;
+	return ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE | IBV_QP_SQ_PSN);
+}
+
 int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd,
 		   struct ibv_qp_init_attr *qp_init_attr)
 {
@@ -652,7 +725,10 @@ int rdma_create_qp(struct rdma_cm_id *id
 	if (!qp)
 		return -ENOMEM;
 
-	ret = ucma_init_ib_qp(id_priv, qp);
+	if (id->ps == RDMA_PS_UDP)
+		ret = ucma_init_ud_qp(id_priv, qp);
+	else
+		ret = ucma_init_ib_qp(id_priv, qp);
 	if (ret)
 		goto err;
 
@@ -670,11 +746,12 @@ void rdma_destroy_qp(struct rdma_cm_id *
 
 static void ucma_copy_conn_param_to_kern(struct ucma_abi_conn_param *dst,
 					 struct rdma_conn_param *src,
-					 struct ibv_qp *qp)
+					 uint32_t qp_num,
+					 enum ibv_qp_type qp_type, uint8_t srq)
 {
-	dst->qp_num = qp->qp_num;
-	dst->qp_type = qp->qp_type;
-	dst->srq = (qp->srq != NULL);
+	dst->qp_num = qp_num;
+	dst->qp_type = qp_type;
+	dst->srq = srq;
 	dst->responder_resources = src->responder_resources;
 	dst->initiator_depth = src->initiator_depth;
 	dst->flow_control = src->flow_control;
@@ -700,7 +777,15 @@ int rdma_connect(struct rdma_cm_id *id, 
 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_CONNECT, size);
 	id_priv = container_of(id, struct cma_id_private, id);
 	cmd->id = id_priv->handle;
-	ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param, id->qp);
+	if (id->qp)
+		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
+					     id->qp->qp_num, id->qp->qp_type,
+					     (id->qp->srq != NULL));
+	else
+		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
+					     conn_param->qp_num,
+					     conn_param->qp_type,
+					     conn_param->srq);
 
 	ret = write(id->channel->fd, msg, size);
 	if (ret != size)
@@ -735,15 +820,25 @@ int rdma_accept(struct rdma_cm_id *id, s
 	void *msg;
 	int ret, size;
 
-	ret = ucma_modify_qp_rtr(id);
-	if (ret)
-		return ret;
+	if (id->ps != RDMA_PS_UDP) {
+		ret = ucma_modify_qp_rtr(id);
+		if (ret)
+			return ret;
+	}
 
 	CMA_CREATE_MSG_CMD(msg, cmd, UCMA_CMD_ACCEPT, size);
 	id_priv = container_of(id, struct cma_id_private, id);
 	cmd->id = id_priv->handle;
 	cmd->uid = (uintptr_t) id_priv;
-	ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param, id->qp);
+	if (id->qp)
+		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
+					     id->qp->qp_num, id->qp->qp_type,
+					     (id->qp->srq != NULL));
+	else
+		ucma_copy_conn_param_to_kern(&cmd->conn_param, conn_param,
+					     conn_param->qp_num,
+					     conn_param->qp_type,
+					     conn_param->srq);
 
 	ret = write(id->channel->fd, msg, size);
 	if (ret != size) {
@@ -845,7 +940,8 @@ static int ucma_process_conn_req(struct 
 	int ret;
 
 	listen_id_priv = container_of(event->id, struct cma_id_private, id);
-	id_priv = ucma_alloc_id(event->id->channel, event->id->context);
+	id_priv = ucma_alloc_id(event->id->channel, event->id->context,
+				event->id->ps);
 	if (!id_priv) {
 		ucma_destroy_kern_id(event->id->channel->fd, handle);
 		ret = -ENOMEM;
@@ -967,6 +1063,9 @@ retry:
 		}
 		break;
 	case RDMA_CM_EVENT_ESTABLISHED:
+		if (id_priv->id.ps == RDMA_PS_UDP)
+			break;
+
 		evt->status = ucma_process_establish(&id_priv->id);
 		if (evt->status) {
 			evt->event = RDMA_CM_EVENT_CONNECT_ERROR;
@@ -1041,3 +1140,32 @@ int rdma_set_option(struct rdma_cm_id *i
 
 	return 0;
 }
+
+int rdma_get_dst_attr(struct rdma_cm_id *id, struct sockaddr *addr,
+		      struct ibv_ah_attr *ah_attr, uint32_t *remote_qpn,
+		      uint32_t *remote_qkey)
+{
+	struct ucma_abi_dst_attr_resp *resp;
+	struct ucma_abi_get_dst_attr *cmd;
+	struct cma_id_private *id_priv;
+	void *msg;
+	int ret, size, addrlen;
+	
+	addrlen = ucma_addrlen(addr);
+	if (!addrlen)
+		return -EINVAL;
+
+	CMA_CREATE_MSG_CMD_RESP(msg, cmd, resp, UCMA_CMD_GET_DST_ATTR, size);
+	id_priv = container_of(id, struct cma_id_private, id);
+	cmd->id = id_priv->handle;
+	memcpy(&cmd->addr, addr, addrlen);
+
+	ret = write(id->channel->fd, msg, size);
+	if (ret != size)
+		return (ret > 0) ? -ENODATA : ret;
+
+	ibv_copy_ah_attr_from_kern(ah_attr, &resp->ah_attr);
+	*remote_qpn = resp->remote_qpn;
+	*remote_qkey = resp->remote_qkey;
+	return 0;
+}
Index: src/librdmacm.map
===================================================================
--- src/librdmacm.map	(revision 7636)
+++ src/librdmacm.map	(working copy)
@@ -18,5 +18,6 @@ RDMACM_1.0 {
 		rdma_ack_cm_event;
 		rdma_get_option;
 		rdma_set_option;
+		rdma_get_dst_attr;
 	local: *;
 };
Index: librdmacm.spec.in
===================================================================
--- librdmacm.spec.in	(revision 7636)
+++ librdmacm.spec.in	(working copy)
@@ -66,3 +66,4 @@ rm -rf $RPM_BUILD_ROOT
 %defattr(-,root,root)
 %{_bindir}/rping
 %{_bindir}/ucmatose
+%{_bindir}/udaddy
Index: Makefile.am
===================================================================
--- Makefile.am	(revision 7743)
+++ Makefile.am	(working copy)
@@ -18,11 +18,13 @@ endif
 src_librdmacm_la_SOURCES = src/cma.c
 src_librdmacm_la_LDFLAGS = -avoid-version $(rdmacm_version_script)
 
-bin_PROGRAMS = examples/ucmatose examples/rping
+bin_PROGRAMS = examples/ucmatose examples/rping examples/udaddy
 examples_ucmatose_SOURCES = examples/cmatose.c
 examples_ucmatose_LDADD = $(top_builddir)/src/librdmacm.la
 examples_rping_SOURCES = examples/rping.c
 examples_rping_LDADD = $(top_builddir)/src/librdmacm.la
+examples_udaddy_SOURCES = examples/udaddy.c
+examples_udaddy_LDADD = $(top_builddir)/src/librdmacm.la
 
 librdmacmincludedir = $(includedir)/rdma
 
Index: examples/rping.c
===================================================================
--- examples/rping.c	(revision 7636)
+++ examples/rping.c	(working copy)
@@ -1028,7 +1028,7 @@ int main(int argc, char *argv[])
 		goto out;
 	}
 
-	ret = rdma_create_id(cb->cm_channel, &cb->cm_id, cb);
+	ret = rdma_create_id(cb->cm_channel, &cb->cm_id, cb, RDMA_PS_TCP);
 	if (ret) {
 		ret = errno;
 		fprintf(stderr, "rdma_create_id error %d\n", ret);
Index: examples/udaddy.c
===================================================================
--- examples/udaddy.c	(revision 0)
+++ examples/udaddy.c	(revision 0)
@@ -0,0 +1,636 @@
+/*
+ * Copyright (c) 2005 Intel Corporation.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <netdb.h>
+#include <byteswap.h>
+
+#include <rdma/rdma_cma.h>
+#include <rdma/rdma_cma_ib.h>
+
+/*
+ * To execute:
+ * Server: rdma_cmatose
+ * Client: rdma_cmatose "dst_ip=ip"
+ */
+
+struct cmatest_node {
+	int			id;
+	struct rdma_cm_id	*cma_id;
+	int			connected;
+	struct ibv_pd		*pd;
+	struct ibv_cq		*cq;
+	struct ibv_mr		*mr;
+	struct ibv_ah		*ah;
+	uint32_t		remote_qpn;
+	uint32_t		remote_qkey;
+	void			*mem;
+};
+
+struct cmatest {
+	struct rdma_event_channel *channel;
+	struct cmatest_node	*nodes;
+	int			conn_index;
+	int			connects_left;
+
+	struct sockaddr_in	dst_in;
+	struct sockaddr		*dst_addr;
+	struct sockaddr_in	src_in;
+	struct sockaddr		*src_addr;
+};
+
+static struct cmatest test;
+static int connections = 1;
+static int message_size = 100;
+static int message_count = 10;
+static int is_server;
+
+static int create_message(struct cmatest_node *node)
+{
+	if (!message_size)
+		message_count = 0;
+
+	if (!message_count)
+		return 0;
+
+	node->mem = malloc(message_size + sizeof(struct ibv_grh));
+	if (!node->mem) {
+		printf("failed message allocation\n");
+		return -1;
+	}
+	node->mr = ibv_reg_mr(node->pd, node->mem,
+			      message_size + sizeof(struct ibv_grh),
+			      IBV_ACCESS_LOCAL_WRITE);
+	if (!node->mr) {
+		printf("failed to reg MR\n");
+		goto err;
+	}
+	return 0;
+err:
+	free(node->mem);
+	return -1;
+}
+
+static int init_node(struct cmatest_node *node)
+{
+	struct ibv_qp_init_attr init_qp_attr;
+	int cqe, ret;
+
+	node->pd = ibv_alloc_pd(node->cma_id->verbs);
+	if (!node->pd) {
+		ret = -ENOMEM;
+		printf("cmatose: unable to allocate PD\n");
+		goto out;
+	}
+
+	cqe = message_count ? message_count * 2 : 2;
+	node->cq = ibv_create_cq(node->cma_id->verbs, cqe, node, 0, 0);
+	if (!node->cq) {
+		ret = -ENOMEM;
+		printf("cmatose: unable to create CQ\n");
+		goto out;
+	}
+
+	memset(&init_qp_attr, 0, sizeof init_qp_attr);
+	init_qp_attr.cap.max_send_wr = message_count ? message_count : 1;
+	init_qp_attr.cap.max_recv_wr = message_count ? message_count : 1;
+	init_qp_attr.cap.max_send_sge = 1;
+	init_qp_attr.cap.max_recv_sge = 1;
+	init_qp_attr.qp_context = node;
+	init_qp_attr.sq_sig_all = 0;
+	init_qp_attr.qp_type = IBV_QPT_UD;
+	init_qp_attr.send_cq = node->cq;
+	init_qp_attr.recv_cq = node->cq;
+	ret = rdma_create_qp(node->cma_id, node->pd, &init_qp_attr);
+	if (ret) {
+		printf("cmatose: unable to create QP: %d\n", ret);
+		goto out;
+	}
+
+	ret = create_message(node);
+	if (ret) {
+		printf("cmatose: failed to create messages: %d\n", ret);
+		goto out;
+	}
+out:
+	return ret;
+}
+
+static int post_recvs(struct cmatest_node *node)
+{
+	struct ibv_recv_wr recv_wr, *recv_failure;
+	struct ibv_sge sge;
+	int i, ret = 0;
+
+	if (!message_count)
+		return 0;
+
+	recv_wr.next = NULL;
+	recv_wr.sg_list = &sge;
+	recv_wr.num_sge = 1;
+	recv_wr.wr_id = (uintptr_t) node;
+
+	sge.length = message_size + sizeof(struct ibv_grh);
+	sge.lkey = node->mr->lkey;
+	sge.addr = (uintptr_t) node->mem;
+
+	for (i = 0; i < message_count && !ret; i++ ) {
+		ret = ibv_post_recv(node->cma_id->qp, &recv_wr, &recv_failure);
+		if (ret) {
+			printf("failed to post receives: %d\n", ret);
+			break;
+		}
+	}
+	return ret;
+}
+
+static int post_sends(struct cmatest_node *node, int signal_flag)
+{
+	struct ibv_send_wr send_wr, *bad_send_wr;
+	struct ibv_sge sge;
+	int i, ret = 0;
+
+	if (!node->connected || !message_count)
+		return 0;
+
+	send_wr.next = NULL;
+	send_wr.sg_list = &sge;
+	send_wr.num_sge = 1;
+	send_wr.opcode = IBV_WR_SEND_WITH_IMM;
+	send_wr.send_flags = IBV_SEND_INLINE | signal_flag;
+	send_wr.wr_id = (unsigned long)node;
+	send_wr.imm_data = htonl(node->cma_id->qp->qp_num);
+
+	send_wr.wr.ud.ah = node->ah;
+	send_wr.wr.ud.remote_qpn = node->remote_qpn;
+	send_wr.wr.ud.remote_qkey = node->remote_qkey;
+
+	sge.length = message_size - sizeof(struct ibv_grh);
+	sge.lkey = node->mr->lkey;
+	sge.addr = (uintptr_t) node->mem;
+
+	for (i = 0; i < message_count && !ret; i++) {
+		ret = ibv_post_send(node->cma_id->qp, &send_wr, &bad_send_wr);
+		if (ret) 
+			printf("failed to post sends: %d\n", ret);
+	}
+	return ret;
+}
+
+static void connect_error(void)
+{
+	test.connects_left--;
+}
+
+static int addr_handler(struct cmatest_node *node)
+{
+	int ret;
+
+	ret = rdma_resolve_route(node->cma_id, 2000);
+	if (ret) {
+		printf("cmatose: resolve route failed: %d\n", ret);
+		connect_error();
+	}
+	return ret;
+}
+
+static int route_handler(struct cmatest_node *node)
+{
+	struct rdma_conn_param conn_param;
+	int ret;
+
+	ret = init_node(node);
+	if (ret)
+		goto err;
+
+	ret = post_recvs(node);
+	if (ret)
+		goto err;
+
+	memset(&conn_param, 0, sizeof conn_param);
+	conn_param.qp_num = node->cma_id->qp->qp_num;
+	conn_param.qp_type = node->cma_id->qp->qp_type;
+	conn_param.retry_count = 5;
+	ret = rdma_connect(node->cma_id, &conn_param);
+	if (ret) {
+		printf("cmatose: failure connecting: %d\n", ret);
+		goto err;
+	}
+	return 0;
+err:
+	connect_error();
+	return ret;
+}
+
+static int connect_handler(struct rdma_cm_id *cma_id)
+{
+	struct cmatest_node *node;
+	struct rdma_conn_param conn_param;
+	int ret;
+
+	if (test.conn_index == connections) {
+		ret = -ENOMEM;
+		goto err1;
+	}
+	node = &test.nodes[test.conn_index++];
+
+	node->cma_id = cma_id;
+	cma_id->context = node;
+
+	ret = init_node(node);
+	if (ret)
+		goto err2;
+
+	ret = post_recvs(node);
+	if (ret)
+		goto err2;
+
+	memset(&conn_param, 0, sizeof conn_param);
+	conn_param.qp_num = node->cma_id->qp->qp_num;
+	conn_param.qp_type = node->cma_id->qp->qp_type;
+	ret = rdma_accept(node->cma_id, &conn_param);
+	if (ret) {
+		printf("cmatose: failure accepting: %d\n", ret);
+		goto err2;
+	}
+	node->connected = 1;
+	test.connects_left--;
+	return 0;
+
+err2:
+	node->cma_id = NULL;
+	connect_error();
+err1:
+	printf("cmatose: failing connection request\n");
+	rdma_reject(cma_id, NULL, 0);
+	return ret;
+}
+
+static int resolved_handler(struct cmatest_node *node)
+{
+	struct ibv_ah_attr ah_attr;
+	int ret;
+
+	ret = rdma_get_dst_attr(node->cma_id, test.dst_addr, &ah_attr,
+				&node->remote_qpn, &node->remote_qkey);
+	if (ret) {
+		printf("udaddy: failure getting destination attributes\n");
+		goto err;
+	}
+
+	node->ah = ibv_create_ah(node->pd, &ah_attr);
+	if (!node->ah) {
+		printf("udaddy: failure creating address handle\n");
+		goto err;
+	}
+
+	node->connected = 1;
+	test.connects_left--;
+	return 0;
+err:
+	connect_error();
+	return ret;
+}
+
+static int cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
+{
+	int ret = 0;
+
+	switch (event->event) {
+	case RDMA_CM_EVENT_ADDR_RESOLVED:
+		ret = addr_handler(cma_id->context);
+		break;
+	case RDMA_CM_EVENT_ROUTE_RESOLVED:
+		ret = route_handler(cma_id->context);
+		break;
+	case RDMA_CM_EVENT_CONNECT_REQUEST:
+		ret = connect_handler(cma_id);
+		break;
+	case RDMA_CM_EVENT_ESTABLISHED:
+		ret = resolved_handler(cma_id->context);
+		break;
+	case RDMA_CM_EVENT_ADDR_ERROR:
+	case RDMA_CM_EVENT_ROUTE_ERROR:
+	case RDMA_CM_EVENT_CONNECT_ERROR:
+	case RDMA_CM_EVENT_UNREACHABLE:
+	case RDMA_CM_EVENT_REJECTED:
+		printf("cmatose: event: %d, error: %d\n", event->event,
+			event->status);
+		connect_error();
+		ret = event->status;
+		break;
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
+		/* Cleanup will occur after test completes. */
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
+static void destroy_node(struct cmatest_node *node)
+{
+	if (!node->cma_id)
+		return;
+
+	if (node->ah)
+		ibv_destroy_ah(node->ah);
+
+	if (node->cma_id->qp)
+		rdma_destroy_qp(node->cma_id);
+
+	if (node->cq)
+		ibv_destroy_cq(node->cq);
+
+	if (node->mem) {
+		ibv_dereg_mr(node->mr);
+		free(node->mem);
+	}
+
+	if (node->pd)
+		ibv_dealloc_pd(node->pd);
+
+	/* Destroy the RDMA ID after all device resources */
+	rdma_destroy_id(node->cma_id);
+}
+
+static int alloc_nodes(void)
+{
+	int ret, i;
+
+	test.nodes = malloc(sizeof *test.nodes * connections);
+	if (!test.nodes) {
+		printf("cmatose: unable to allocate memory for test nodes\n");
+		return -ENOMEM;
+	}
+	memset(test.nodes, 0, sizeof *test.nodes * connections);
+
+	for (i = 0; i < connections; i++) {
+		test.nodes[i].id = i;
+		if (!is_server) {
+			ret = rdma_create_id(test.channel,
+					     &test.nodes[i].cma_id,
+					     &test.nodes[i], RDMA_PS_UDP);
+			if (ret)
+				goto err;
+		}
+	}
+	return 0;
+err:
+	while (--i >= 0)
+		rdma_destroy_id(test.nodes[i].cma_id);
+	free(test.nodes);
+	return ret;
+}
+
+static void destroy_nodes(void)
+{
+	int i;
+
+	for (i = 0; i < connections; i++)
+		destroy_node(&test.nodes[i]);
+	free(test.nodes);
+}
+
+static void create_reply_ah(struct cmatest_node *node, struct ibv_wc *wc)
+{
+	node->ah = ibv_create_ah_from_wc(node->pd, wc, node->mem,
+					 node->cma_id->port_num);
+	node->remote_qpn = ntohl(wc->imm_data);
+	node->remote_qkey = ntohs(rdma_get_dst_port(node->cma_id));
+}
+
+static int poll_cqs(void)
+{
+	struct ibv_wc wc[8];
+	int done, i, ret;
+
+	for (i = 0; i < connections; i++) {
+		if (!test.nodes[i].connected)
+			continue;
+
+		for (done = 0; done < message_count; done += ret) {
+			ret = ibv_poll_cq(test.nodes[i].cq, 8, wc);
+			if (ret < 0) {
+				printf("cmatose: failed polling CQ: %d\n", ret);
+				return ret;
+			}
+
+			if (ret && !test.nodes[i].ah)
+				create_reply_ah(&test.nodes[i], wc);
+		}
+	}
+	return 0;
+}
+
+static int connect_events(void)
+{
+	struct rdma_cm_event *event;
+	int ret = 0;
+
+	while (test.connects_left && !ret) {
+		ret = rdma_get_cm_event(test.channel, &event);
+		if (!ret) {
+			ret = cma_handler(event->id, event);
+			rdma_ack_cm_event(event);
+		}
+	}
+	return ret;
+}
+
+static int run_server(void)
+{
+	struct rdma_cm_id *listen_id;
+	int i, ret;
+
+	printf("cmatose: starting server\n");
+	ret = rdma_create_id(test.channel, &listen_id, &test, RDMA_PS_UDP);
+	if (ret) {
+		printf("cmatose: listen request failed\n");
+		return ret;
+	}
+
+	test.src_in.sin_family = PF_INET;
+	test.src_in.sin_port = 7174;
+	ret = rdma_bind_addr(listen_id, test.src_addr);
+	if (ret) {
+		printf("cmatose: bind address failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = rdma_listen(listen_id, 0);
+	if (ret) {
+		printf("cmatose: failure trying to listen: %d\n", ret);
+		goto out;
+	}
+
+	connect_events();
+
+	if (message_count) {
+		printf("receiving data transfers\n");
+		ret = poll_cqs();
+		if (ret)
+			goto out;
+
+		printf("sending replies\n");
+		for (i = 0; i < connections; i++) {
+			ret = post_sends(&test.nodes[i], IBV_SEND_SIGNALED);
+			if (ret)
+				goto out;
+		}
+
+		ret = poll_cqs();
+		if (ret)
+			goto out;
+		printf("data transfers complete\n");
+	}
+out:
+	rdma_destroy_id(listen_id);
+	return ret;
+}
+
+static int get_addr(char *dst, struct sockaddr_in *addr)
+{
+	struct addrinfo *res;
+	int ret;
+
+	ret = getaddrinfo(dst, NULL, NULL, &res);
+	if (ret) {
+		printf("getaddrinfo failed - invalid hostname or IP address\n");
+		return ret;
+	}
+
+	if (res->ai_family != PF_INET) {
+		ret = -1;
+		goto out;
+	}
+
+	*addr = *(struct sockaddr_in *) res->ai_addr;
+out:
+	freeaddrinfo(res);
+	return ret;
+}
+
+static int run_client(char *dst, char *src)
+{
+	int i, ret;
+
+	printf("cmatose: starting client\n");
+	if (src) {
+		ret = get_addr(src, &test.src_in);
+		if (ret)
+			return ret;
+	}
+
+	ret = get_addr(dst, &test.dst_in);
+	if (ret)
+		return ret;
+
+	test.dst_in.sin_port = 7174;
+
+	printf("cmatose: connecting\n");
+	for (i = 0; i < connections; i++) {
+		ret = rdma_resolve_addr(test.nodes[i].cma_id,
+					src ? test.src_addr : NULL,
+					test.dst_addr, 2000);
+		if (ret) {
+			printf("cmatose: failure getting addr: %d\n", ret);
+			connect_error();
+			return ret;
+		}
+	}
+
+	ret = connect_events();
+	if (ret)
+		goto out;
+
+	if (message_count) {
+		printf("initiating data transfers\n");
+		for (i = 0; i < connections; i++) {
+			ret = post_sends(&test.nodes[i], 0);
+			if (ret)
+				goto out;
+		}
+		printf("receiving data transfers\n");
+		ret = poll_cqs();
+		if (ret)
+			goto out;
+
+		printf("data transfers complete\n");
+	}
+out:
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int ret;
+
+	if (argc > 3) {
+		printf("usage: %s [server_addr [src_addr]]\n", argv[0]);
+		exit(1);
+	}
+	is_server = (argc == 1);
+
+	test.dst_addr = (struct sockaddr *) &test.dst_in;
+	test.src_addr = (struct sockaddr *) &test.src_in;
+	test.connects_left = connections;
+
+	test.channel = rdma_create_event_channel();
+	if (!test.channel) {
+		printf("failed to create event channel\n");
+		exit(1);
+	}
+
+	if (alloc_nodes())
+		exit(1);
+
+	if (is_server)
+		ret = run_server();
+	else
+		ret = run_client(argv[1], (argc == 3) ? argv[2] : NULL);
+
+	printf("test complete\n");
+	destroy_nodes();
+	rdma_destroy_event_channel(test.channel);
+
+	printf("return status %d\n", ret);
+	return ret;
+}
Index: examples/cmatose.c
===================================================================
--- examples/cmatose.c	(revision 7636)
+++ examples/cmatose.c	(working copy)
@@ -380,7 +380,7 @@ static int alloc_nodes(void)
 		if (!is_server) {
 			ret = rdma_create_id(test.channel,
 					     &test.nodes[i].cma_id,
-					     &test.nodes[i]);
+					     &test.nodes[i], RDMA_PS_TCP);
 			if (ret)
 				goto err;
 		}
@@ -466,7 +466,7 @@ static int run_server(void)
 	int i, ret;
 
 	printf("cmatose: starting server\n");
-	ret = rdma_create_id(test.channel, &listen_id, &test);
+	ret = rdma_create_id(test.channel, &listen_id, &test, RDMA_PS_TCP);
 	if (ret) {
 		printf("cmatose: listen request failed\n");
 		return ret;





More information about the general mailing list