[ewg] [PATCH 1/9] [RFC] Adds the Xsigo unified API for IB and CM access used by the Xsigo virtual (v*) drivers like vnic and vhba

Hal Rosenstock hrosenstock at xsigo.com
Fri Apr 4 06:12:53 PDT 2008


This patch adds the Xsigo unified API for IB and CM access used by the
Xsigo virtual (v*) drivers like vnic and vhba.

This is common code use by other functions in core services like
XDS and XCPM as well as the Xsigo virtual (v*) drivers like vnic and vhba.
It combines both CM and verbs access.

Signed-off-by: Hal Rosenstock <hal at xsigo.com>
---
 drivers/infiniband/ulp/xsigo/xscore/xsigoib.c | 2082 +++++++++++++++++++++++++
 drivers/infiniband/ulp/xsigo/xscore/xsigoib.h |  267 ++++
 2 files changed, 2349 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/ulp/xsigo/xscore/xsigoib.c
 create mode 100644 drivers/infiniband/ulp/xsigo/xscore/xsigoib.h

diff --git a/drivers/infiniband/ulp/xsigo/xscore/xsigoib.c b/drivers/infiniband/ulp/xsigo/xscore/xsigoib.c
new file mode 100644
index 0000000..fd9baf3
--- /dev/null
+++ b/drivers/infiniband/ulp/xsigo/xscore/xsigoib.c
@@ -0,0 +1,2082 @@
+/*
+ * Copyright (c) 2006-2008 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+/*
+ * XsigoIB provides a layer of abstraction over the IB verbs and CM
+ * APIs to enable most IB operations using a simple unified API
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/proc_fs.h>
+#include <linux/version.h>
+#include <linux/utsname.h>
+
+#include "xsigoib.h"
+#include "xsigoib_stats.h"
+#include "ib_if_xds.h"
+#include "xs_core.h"
+
+#define PFX "xscore/xsigoib: "
+
+#define MAX_CONN_HANDLES 192*6
+#define MAX_LIST_HANDLES 192
+#define MAX_QINFO_HANDLES 256
+#define MAX_SERVER_CONNECTS 256
+
+#define MAD_TIMEOUT_MS  2000
+
+#ifdef CONFIG_XSCORE_DEBUG 
+#define xsigoib_debug(level, fmt, args...) 				\
+            do {                                    			\
+                    if (xsigoib_debug_level > 0)     		\
+			printk(level "<xscore:xsigoib> %s: " fmt, __FUNCTION__ , ## args); \
+            } while (0)
+#else
+#define xsigoib_debug(level, fmt, args...)
+#endif
+
+#define MAX_NUM_LISTS	6
+#define LIST_SIZE	192
+
+static int xsigoib_debug_level = 0;
+module_param(xsigoib_debug_level, int, 0);
+
+static int remote_cm_response_timeout = 22;
+module_param(remote_cm_response_timeout, int, 0);
+
+static int local_cm_response_timeout = 22;
+module_param(local_cm_response_timeout, int, 0);
+
+static int retry_count = 3;
+module_param(retry_count, int, 0);
+
+static int rnr_retry_count = 7;
+module_param(rnr_retry_count, int, 0);
+
+static int max_cm_retries = 3;
+module_param(max_cm_retries, int, 0);
+
+static int responder_resources = 64;
+module_param(responder_resources, int, 0);
+
+static int min_rnr_timer = IB_RNR_TIMER_000_16;
+module_param(min_rnr_timer, int, 0);
+
+static struct ib_sa_client sa_client;
+
+struct buf_info {
+	void *vaddr;
+	u64 dma_addr;
+	int length;
+	int posted;
+};
+
+static spinlock_t handle_lock;
+
+struct xsigo_ib_connect_info_pool {
+	struct xsigo_ib_connect_info *connect_info_list[MAX_NUM_LISTS];
+};
+
+static struct xsigo_ib_connect_info_pool connect_info_pool;
+
+static void delivery_handler(struct work_struct *work);
+static DECLARE_WORK(deliver_work, delivery_handler);
+static void remove_qps_and_cqs(u32 handle);
+
+static struct xsigo_ib_query_info *query_info_list;
+
+static struct workqueue_struct *xsigoib_wq = NULL;
+static wait_queue_head_t xsigoib_wait;
+
+u32 num_connects = 0;
+u32 num_disconnects = 0;
+
+
+static inline struct xsigo_ib_connect_info *get_connect_info(int handle)
+{
+	int list_index = (handle / LIST_SIZE);
+	int handle_index = (handle % LIST_SIZE);
+
+	struct xsigo_ib_connect_info *list =
+				connect_info_pool.connect_info_list[list_index];
+
+	return &list[handle_index];
+}
+
+/* Obtain a new location for a connection handle */
+static u32 find_new_conn_handle(void)
+{
+	u32 count = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&handle_lock, flags);
+
+	for (count = 0; count < MAX_CONN_HANDLES; count++) {
+		struct xsigo_ib_connect_info *connect_info =
+						get_connect_info(count);
+
+		if (connect_info->used == 0) {
+			connect_info->used = 1;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&handle_lock, flags);
+
+	return count;
+}
+
+/* Obtain a new location for a query_info connection handle */
+static u32 find_new_query_handle(void)
+{
+	u32 count = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&handle_lock, flags);
+
+	for (count = 0; count < MAX_QINFO_HANDLES; count++)
+		if (query_info_list[count].used == 0) {
+			query_info_list[count].used = 1;
+			break;
+		}
+
+	spin_unlock_irqrestore(&handle_lock, flags);
+
+	return count;
+}
+
+/* Deliver events to the connection holders */
+static void delivery_handler(struct work_struct *work)
+{
+	int handle;
+
+	/* Deliver connection responses */
+	for (handle = 0; handle < MAX_CONN_HANDLES; handle++) {
+		struct xsigo_ib_connect_info *connect_info =
+						get_connect_info(handle);
+		int count;
+
+		if (!connect_info->active || !connect_info->deliver)
+			continue;
+
+		for (count = 0; count < MAX_NUM_STATUS; count++)
+			if (connect_info->status[count] != -1) {
+				(connect_info->callback)(handle,
+							 connect_info->context,
+							 connect_info->status[count]);
+				connect_info->status[count] = -1;
+			}
+
+		connect_info->deliver = 0;
+	}
+
+	/* Deliver XDS responses */
+	for (handle = 0; handle < MAX_QINFO_HANDLES; handle++) {
+		struct xsigo_ib_query_info *query_info =
+						&query_info_list[handle];
+
+		if (!query_info->used || !query_info->deliver)
+			continue;
+
+		query_info->deliver = 0;
+
+		if (query_info->mad_recv_wc) {
+			(*query_info_list[handle].callback)(handle,
+							    query_info->context,
+							    query_info->status,
+							    (struct ib_xds_mad *) query_info->mad_recv_wc->recv_buf.mad);
+
+			/* callback does not need the MAD after return */
+			ib_free_recv_mad(query_info->mad_recv_wc);
+		} else
+			(*query_info_list[handle].callback)(handle,
+							    query_info->context,
+							    query_info->status,
+							    NULL);
+	}
+}
+
+static void schedule_delivery(u32 handle, int status)
+{
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	int count;
+
+	if (!connect_info->active) {
+		xsigoib_debug(KERN_WARNING, "Incorrect handle %d\n", handle);
+		return;
+	}
+
+	for (count = 0; count < MAX_NUM_STATUS; count++)
+		if (connect_info->status[count] == -1)
+			break;
+
+	if (count == MAX_NUM_STATUS) {
+		xsigoib_debug(KERN_ERR,
+			      "Warning: cannot queue status delivery, queue full\n");
+		return;
+	}
+
+	connect_info->status[count] = status;
+	connect_info->deliver = 1;
+	queue_work(xsigoib_wq, &deliver_work);
+}
+
+static void schedule_xds_resp_delivery(u32 handle, int status,
+				       struct ib_mad_recv_wc *mad_recv_wc)
+{
+	struct xsigo_ib_query_info *query_info = &query_info_list[handle];
+
+	if (!query_info->used) {
+		xsigoib_debug(KERN_WARNING, "Incorrect handle %d\n", handle);
+		return;
+	}
+
+	query_info->status = status;
+	query_info->deliver = 1;
+	query_info->mad_recv_wc = mad_recv_wc;
+	queue_work(xsigoib_wq, &deliver_work);
+}
+
+/* Setup the QP and the completion queues for the connection handle */
+static int setup_qps(u32 handle)
+{
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	struct ib_device *device = connect_info->device;
+	struct ib_qp_init_attr init_attr = {
+		.cap = {
+			.max_send_wr = connect_info->num_send_elements,
+			.max_recv_wr = connect_info->num_recv_elements,
+			.max_send_sge = 1,
+			.max_recv_sge = 1 },
+		.sq_sig_type = IB_SIGNAL_ALL_WR,
+		.qp_type = IB_QPT_RC,
+		.qp_context = (void *)(unsigned long) handle
+	};
+	int ret;
+
+	connect_info->recv_cq = ib_create_cq(device,
+					     connect_info->recv_handler, NULL,
+					     (void *)(unsigned long) handle,
+					     connect_info->num_recv_elements + 1, 0);
+	if (IS_ERR(connect_info->recv_cq)) {
+		ret = PTR_ERR(connect_info->recv_cq);
+		goto setup_qps_exit;
+	}
+
+	ib_req_notify_cq(connect_info->recv_cq, IB_CQ_NEXT_COMP);
+
+	connect_info->send_cq =
+			ib_create_cq(device, connect_info->send_handler, NULL,
+				     (void *)(unsigned long) handle,
+				     connect_info->num_send_elements + 1, 0);
+	if (IS_ERR(connect_info->send_cq)) {
+		ib_destroy_cq(connect_info->recv_cq);
+		ret = PTR_ERR(connect_info->send_cq);
+		goto setup_qps_exit;
+	}
+
+	ib_req_notify_cq(connect_info->send_cq, IB_CQ_NEXT_COMP);
+
+	init_attr.send_cq = connect_info->send_cq;
+	init_attr.recv_cq = connect_info->recv_cq;
+	if (connect_info->qp_type == IB_QPT_UC)
+		init_attr.qp_type = IB_QPT_UC;
+
+	/* Initialize the queue pair for the messages on the link */
+	connect_info->qp = ib_create_qp(connect_info->pd, &init_attr);
+
+	if (IS_ERR(connect_info->qp)) {
+		ib_destroy_cq(connect_info->send_cq);
+		ib_destroy_cq(connect_info->recv_cq);
+		ret = PTR_ERR(connect_info->qp);
+		goto setup_qps_exit;
+	}
+
+	xsigoib_debug(KERN_INFO, "Handle: %d, qp_num: 0x%x\n",
+		      handle, connect_info->qp->qp_num);
+	ret = 0;
+
+setup_qps_exit:
+	return ret;
+}
+
+/* Change the queue pair state to the one specified */
+static int change_qp_state(u32 handle, struct ib_cm_id *cmid, int qpstate)
+{
+	int ret;
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	struct ib_qp *qp = connect_info->qp;
+	struct ib_qp_attr qp_attr;
+	int qp_mask = 0;
+
+	memset(&qp_attr, 0, sizeof(qp_attr));
+
+	qp_attr.qp_state = qpstate;
+
+	if ((ret = ib_cm_init_qp_attr(cmid, &qp_attr, &qp_mask))) {
+		printk(KERN_ERR PFX "ib_cm_init_qp_attr error %d\n", ret);
+		goto change_qp_state_exit;
+	}
+
+	if (qpstate == IB_QPS_RTR && qp->qp_type == IB_QPT_RC) {
+		if (!connect_info->no_rdma) {
+			qp_mask |= IB_QP_MAX_DEST_RD_ATOMIC;
+			qp_attr.max_dest_rd_atomic = responder_resources;
+		}
+		qp_mask |= IB_QP_MIN_RNR_TIMER;
+		qp_attr.min_rnr_timer = min_rnr_timer;
+	}
+
+	if ((ret = ib_modify_qp(qp, &qp_attr, qp_mask)))
+		printk(KERN_ERR PFX "ib_modify_qp error %d new state %d\n",
+		       ret, qpstate);
+
+change_qp_state_exit:
+	return ret;
+}
+
+/* Initialize the send buffer structures */
+static int init_send_bufs(u32 handle)
+{
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	int num_send_elements = connect_info->num_send_elements;
+
+	connect_info->send_bufs =
+		kmalloc(sizeof(*connect_info->send_bufs) * num_send_elements,
+			GFP_ATOMIC);
+
+	if (!connect_info->send_bufs)
+		return -ENOMEM;
+
+	memset(connect_info->send_bufs, 0,
+	       sizeof(*connect_info->send_bufs) * num_send_elements);
+
+	return 0;
+}
+
+/* Deallocate send buffer structures */
+static int dealloc_send_bufs(u32 handle)
+{
+	int count, ret;
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+
+	if (!connect_info->used) {
+		xsigoib_debug(KERN_WARNING, "Warning: handle %d already deleted\n",
+			      handle);
+		ret = -EINVAL;
+		goto dealloc_send_bufs_exit;
+	}
+
+	/* If never allocated */
+	if (!connect_info->send_bufs) {
+		ret = -EINVAL;
+		goto dealloc_send_bufs_exit;
+	}
+
+	for (count = 0; count < connect_info->num_send_elements; count++) {
+		struct buf_info *pbuf = &connect_info->send_bufs[count];
+
+		if (pbuf->posted)
+			continue;
+
+		pbuf->vaddr = NULL;
+		pbuf->dma_addr = 0;
+		pbuf->length = 0;
+	}
+
+	if (connect_info->send_bufs) {
+		kfree(connect_info->send_bufs);
+		connect_info->send_bufs = NULL;
+	}
+
+	ret = 0;
+
+dealloc_send_bufs_exit:
+	return ret;
+}
+
+/* Send a message on an existing connection */
+int xsigo_ib_send_msg(u32 handle, u8 *data, int length)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	int num_send_elements;
+	int ret;
+	struct ib_sge list = {
+		.length = length,
+		.lkey = 0
+	};
+	struct ib_send_wr *bad_param, send_param = {
+		.next = NULL,
+		.wr_id = 0,	/* Filled in later */
+		.sg_list = &list,
+		.num_sge = 1,
+		.opcode = IB_WR_SEND
+	};
+	struct buf_info *pbuf;
+	int count;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		ret = -EINVAL;
+		goto send_msg_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+
+	if (!connect_info->active || !connect_info->connected) {
+		ret = -ENOTCONN;
+		goto send_msg_exit;
+	}
+
+	num_send_elements = connect_info->num_send_elements;
+
+	for (count = 0; count < num_send_elements; count++)
+		if (connect_info->send_bufs[count].posted == 0)
+			break;
+
+	if (count == num_send_elements) {
+		ret = -EBUSY;
+		goto send_msg_exit;
+	}
+
+	list.lkey = connect_info->mr->lkey;
+
+	send_param.wr_id = count;	/* Index of the buffer */
+	pbuf = &connect_info->send_bufs[count];
+
+	/* Use the 'data' buffer passed as the send buffer */
+	pbuf->vaddr = (void *) data;
+	pbuf->length = length;
+	pbuf->dma_addr = dma_map_single(connect_info->device->dma_device,
+					pbuf->vaddr, pbuf->length, DMA_TO_DEVICE);
+
+	/*
+	 * The buffer is now marked as with the HCA/IB stack
+	 * Flag reverted by the send completion handler
+	 */
+	pbuf->posted = 1;
+	list.addr = pbuf->dma_addr;
+	ib_req_notify_cq(connect_info->send_cq, IB_CQ_NEXT_COMP);
+	ret = ib_post_send(connect_info->qp, &send_param, &bad_param);
+
+send_msg_exit:
+	atomic_dec(&connect_info->refcount);
+send_msg_exit2:
+	return ret;
+}
+EXPORT_SYMBOL(xsigo_ib_send_msg);
+
+int xsigo_ib_send_msg_index(u32 handle, u8 *data, int length, int index)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	int num_send_elements;
+	struct buf_info *pbuf;
+	int ret;
+	struct ib_sge list = {
+		.length = length,
+		.lkey = 0
+	};
+	struct ib_send_wr *bad_param, send_param = {
+		.next = NULL,
+		.wr_id = 0,	/* Filled in later */
+		.sg_list = &list,
+		.num_sge = 1,
+		.opcode = IB_WR_SEND
+	};
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+
+	if (!connect_info->active || !connect_info->connected) {
+		ret = -ENOTCONN;
+		goto send_msg_exit;
+	}
+
+	num_send_elements = connect_info->num_send_elements;
+
+	if (index < 0 || index >= num_send_elements) {
+		ret = -EINVAL;
+		goto send_msg_exit;
+	}
+
+	if (connect_info->send_bufs[index].posted) {
+		ret = -EBUSY;
+		goto send_msg_exit;
+	}
+
+	list.lkey = connect_info->mr->lkey;
+
+	send_param.wr_id = index;	/* Index of the buffer */
+	pbuf = &connect_info->send_bufs[index];
+
+	/* Use the 'data' buffer passed as the send buffer */
+	pbuf->vaddr = (void *) data;
+	pbuf->length = length;
+	pbuf->dma_addr = dma_map_single(connect_info->device->dma_device,
+					pbuf->vaddr, pbuf->length, DMA_TO_DEVICE);
+
+	/*
+	 * The buffer is now marked as with the HCA/IB stack
+	 * Flag reverted by the send completion handler
+	 */
+	pbuf->posted = 1;
+	list.addr = pbuf->dma_addr;
+	ret = ib_post_send(connect_info->qp, &send_param, &bad_param);
+
+send_msg_exit:
+	atomic_dec(&connect_info->refcount);
+	return ret;
+}
+EXPORT_SYMBOL(xsigo_ib_send_msg_index);
+
+static int ib_rdma_read_write(u32 handle, u8 *data, int length,
+			      u64 remote_addr, u32 rkey, int read_write)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	int num_send_elements;
+	int ret;
+	struct ib_sge list = {
+		.length = length,
+		.lkey = 0
+	};
+	struct ib_send_wr *bad_param, send_param = {
+		.next = NULL,
+		.wr_id = 0,	/* Filled in later */
+		.sg_list = &list,
+		.num_sge = 1,
+		.opcode = (read_write == 0) ? IB_WR_RDMA_READ :
+					      IB_WR_RDMA_WRITE,
+		.wr = {
+		       .rdma = {
+				.remote_addr = remote_addr,
+				.rkey = rkey }
+		       }
+	};
+	struct buf_info *pbuf;
+	int count;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		ret = -EINVAL;
+		goto rdma_rw_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+
+	if (!connect_info->active || !connect_info->connected) {
+		ret = -ENOTCONN;
+		goto rdma_rw_exit;
+	}
+
+	num_send_elements = connect_info->num_send_elements;
+
+	/* Find the slot that we can use */
+	for (count = 0; count < num_send_elements; count++)
+		if (connect_info->send_bufs[count].posted == 0)
+			break;
+
+	if (count == num_send_elements) {
+		ret = -EBUSY;
+		goto rdma_rw_exit;
+	}
+
+	list.lkey = connect_info->mr->lkey;
+
+	send_param.wr_id = count;	/* Index of the buffer */
+	pbuf = &connect_info->send_bufs[count];
+
+	/* Use the 'data' buffer passed as the send buffer */
+	pbuf->vaddr = (void *) data;
+	pbuf->length = length;
+	pbuf->dma_addr = dma_map_single(connect_info->device->dma_device,
+					pbuf->vaddr, pbuf->length, DMA_BIDIRECTIONAL);
+
+	/*
+	 * The buffer is now marked as with the HCA/IB stack
+	 * Flag reverted by the send completion handler
+	 */
+	pbuf->posted = 1;
+	list.addr = pbuf->dma_addr;
+	ib_req_notify_cq(connect_info->send_cq, IB_CQ_NEXT_COMP);
+	ret = ib_post_send(connect_info->qp, &send_param, &bad_param);
+
+rdma_rw_exit:
+	atomic_dec(&connect_info->refcount);
+rdma_rw_exit2:
+	return ret;
+}
+
+int xsigo_ib_rdma_read(u32 handle, u8 *data, int length, u64 remote_addr,
+		       u32 rkey)
+{
+	return ib_rdma_read_write(handle, data, length, remote_addr, rkey, 0);
+}
+EXPORT_SYMBOL(xsigo_ib_rdma_read);
+
+int xsigo_ib_rdma_write(u32 handle, u8 *data, int length, u64 remote_addr,
+			u32 rkey)
+{
+	return ib_rdma_read_write(handle, data, length, remote_addr, rkey, 1);
+}
+EXPORT_SYMBOL(xsigo_ib_rdma_write);
+
+/* Unmap a send buffer in order to proceed with processing the data */
+int xsigo_ib_unmap_send_buf(u32 handle, u16 index)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	struct buf_info *pbuf;
+	int ret;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		ret = -EINVAL;
+		goto unmap_send_buf_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+
+	if (!connect_info->active || !connect_info->connected) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		ret = -ENOTCONN;
+		goto unmap_send_buf_exit;
+	}
+
+	if (index >= connect_info->num_send_elements) {
+		ret = -EBUSY;
+		goto unmap_send_buf_exit;
+	}
+
+	pbuf = &connect_info->send_bufs[index];
+
+	dma_unmap_single(connect_info->device->dma_device, pbuf->dma_addr,
+			 pbuf->length, DMA_FROM_DEVICE);
+
+	pbuf->posted = 0;
+	ret = 0;
+
+unmap_send_buf_exit:
+	atomic_dec(&connect_info->refcount);
+unmap_send_buf_exit2:
+	return ret;
+}
+EXPORT_SYMBOL(xsigo_ib_unmap_send_buf);
+
+/* Obtain the address of the send buffer with the given index */
+void *xsigo_ib_get_send_buf_address(u32 handle, u16 index)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	struct buf_info *pbuf;
+	void *buf_addr = NULL;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		goto send_buf_addr_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+
+	if (!connect_info->active || !connect_info->connected) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		goto send_buf_addr_exit;
+	}
+
+	if (index >= connect_info->num_send_elements)
+		goto send_buf_addr_exit;
+
+	pbuf = &connect_info->send_bufs[index];
+	buf_addr = pbuf->vaddr;
+
+send_buf_addr_exit:
+	atomic_dec(&connect_info->refcount);
+send_buf_addr_exit2:
+	return buf_addr;
+}
+EXPORT_SYMBOL(xsigo_ib_get_send_buf_address);
+
+int xsigo_ib_send_buf_posted(u32 handle, u16 index)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	struct buf_info *pbuf;
+	int ret = 0;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		goto send_buf_posted_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+
+	if (!connect_info->active) {
+		xsigoib_debug(KERN_WARNING, "Connection not active\n");
+		goto send_buf_posted_exit;
+	}
+	if (!connect_info->connected) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		goto send_buf_posted_exit;
+	}
+
+	if (index >= connect_info->num_send_elements)
+		goto send_buf_posted_exit;
+
+	if (!connect_info->send_bufs)
+		goto send_buf_posted_exit;
+
+	pbuf = &connect_info->send_bufs[index];
+	ret = pbuf->posted;
+
+send_buf_posted_exit:
+	atomic_dec(&connect_info->refcount);
+send_buf_posted_exit2:
+	return ret;
+}
+EXPORT_SYMBOL(xsigo_ib_send_buf_posted);
+
+/*
+ * Enqueue the receive buffer to receive further messages
+ * This includes doing a dma mapping
+ * When a buffer is dequeued, the mapping is removed,
+ * but the buffer is still allocated
+ */
+static int _xsigo_ib_post_receive(u32 handle, int index, int check_state)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	struct buf_info *pbuf;
+	struct ib_recv_wr *bad_wr;
+	int ret;
+	struct ib_sge list;
+	struct ib_recv_wr param = {
+		.wr_id = index,
+		.sg_list = &list,
+		.num_sge = 1,
+	};
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		ret = -EINVAL;
+		goto ib_post_receive_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+
+	if (!connect_info->active ||
+	    (check_state && !connect_info->connected)) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		ret = -ENOTCONN;
+		goto ib_post_receive_exit;
+	}
+
+	pbuf = &connect_info->recv_bufs[index];
+
+	list.length = pbuf->length;
+	list.lkey = connect_info->mr->lkey;
+
+	pbuf->dma_addr = dma_map_single(connect_info->device->dma_device,
+					pbuf->vaddr, pbuf->length, DMA_FROM_DEVICE);
+
+	list.addr = pbuf->dma_addr;
+
+	pbuf->posted = 1;	/* The buffer is with the HCA/IB stack */
+	ret = ib_post_recv(connect_info->qp, &param, &bad_wr);
+
+ib_post_receive_exit:
+	atomic_dec(&connect_info->refcount);
+ib_post_receive_exit2:
+	return ret;
+}
+
+int xsigo_ib_post_receive(u32 handle, int index)
+{
+	return _xsigo_ib_post_receive(handle, index, 1);
+}
+EXPORT_SYMBOL(xsigo_ib_post_receive);
+
+int xsigo_ib_set_receive(u32 handle, int index, void *buf)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	struct buf_info *pbuf;
+	struct ib_recv_wr *bad_wr;
+	int ret;
+	struct ib_sge list;
+	struct ib_recv_wr param = {
+		.wr_id = index,
+		.sg_list = &list,
+		.num_sge = 1,
+	};
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		ret = -EINVAL;
+		goto ib_set_receive_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+
+	if (!connect_info->active || !connect_info->connected) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		ret = -ENOTCONN;
+		goto ib_set_receive_exit;
+	}
+
+	pbuf = &connect_info->recv_bufs[index];
+
+	pbuf->vaddr = buf;
+	pbuf->length = connect_info->recv_buf_size;
+
+	list.length = pbuf->length;
+	list.lkey = connect_info->mr->lkey;
+
+	pbuf->dma_addr = dma_map_single(connect_info->device->dma_device,
+					pbuf->vaddr, pbuf->length, DMA_FROM_DEVICE);
+
+	list.addr = pbuf->dma_addr;
+
+	pbuf->posted = 1;	/* The buffer is with the HCA/IB stack */
+	ret = ib_post_recv(connect_info->qp, &param, &bad_wr);
+
+ib_set_receive_exit:
+	atomic_dec(&connect_info->refcount);
+ib_set_receive_exit2:
+	return ret;
+}
+EXPORT_SYMBOL(xsigo_ib_set_receive);
+
+/* Allocate and post a receive buffer */
+static int alloc_and_post_recv(u32 handle, int index)
+{
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	struct buf_info *pbuf = &connect_info->recv_bufs[index];
+	int recv_buf_size = connect_info->recv_buf_size;
+
+	pbuf->vaddr = kmalloc(recv_buf_size, GFP_ATOMIC);
+	if (!pbuf->vaddr)
+		return -ENOMEM;
+
+	pbuf->length = recv_buf_size;
+	memset(pbuf->vaddr, 0, pbuf->length);
+
+	return _xsigo_ib_post_receive(handle, index, 0);
+}
+
+/*
+ * Allocate and post receive buffers for a receive queue
+ * This is done once during the initialization phase
+ */
+static int post_recv_bufs(u32 handle)
+{
+	int count, ret;
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	int num_recv_elements = connect_info->num_recv_elements;
+
+	connect_info->recv_bufs =
+		kmalloc(sizeof(*connect_info->recv_bufs) * num_recv_elements,
+			GFP_ATOMIC);
+
+	if (!connect_info->recv_bufs) {
+		xsigoib_debug(KERN_ERR, "connect_info->recv_bufs not allocated\n");
+		ret = -ENOMEM;
+		goto post_recv_bufs_exit;
+	}
+
+	memset(connect_info->recv_bufs, 0,
+	       sizeof(*connect_info->recv_bufs) * num_recv_elements);
+
+	if (!connect_info->client_recv_bufs) {
+		for (count = 0; count < num_recv_elements; count++)
+			if ((ret = alloc_and_post_recv(handle, count))) {
+				xsigoib_debug(KERN_ERR,
+					      "Alloc and post recv failed %d\n",
+					      ret);
+				goto post_recv_bufs_exit;
+			}
+	}
+
+	ib_req_notify_cq(connect_info->recv_cq, IB_CQ_NEXT_COMP);
+
+	ret = 0;
+
+post_recv_bufs_exit:
+	return ret;
+}
+
+/* Deallocate receive buffers */
+static int dealloc_recv_bufs(u32 handle)
+{
+	int count, ret;
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+
+	if (!connect_info) {
+		xsigoib_debug(KERN_WARNING, "Warning: handle %d already deleted\n",
+			      handle);
+		ret = -EINVAL;
+		goto dealloc_recv_bufs_exit;
+	}
+
+	/* If never allocated */
+	if (!connect_info->recv_bufs) {
+		ret = -EINVAL;
+		goto dealloc_recv_bufs_exit;
+	}
+
+	if (!connect_info->client_recv_bufs) {
+		for (count = 0; count < connect_info->num_recv_elements; count++) {
+			struct buf_info *pbuf = &connect_info->recv_bufs[count];
+
+			if (!pbuf->posted)
+				xsigoib_debug(KERN_WARNING,
+					      "Buffer not posted during recv "
+					      "deallocation\n");
+
+			dma_unmap_single(connect_info->device->dma_device,
+					 pbuf->dma_addr, pbuf->length,
+					 DMA_FROM_DEVICE);
+
+			if (pbuf->vaddr) {
+				kfree(pbuf->vaddr);
+
+				pbuf->posted = 0;
+				pbuf->vaddr = NULL;
+				pbuf->dma_addr = 0;
+			}
+		}
+	}
+
+	if (connect_info->recv_bufs) {
+		kfree(connect_info->recv_bufs);
+		connect_info->recv_bufs = NULL;
+	}
+
+	ret = 0;
+
+dealloc_recv_bufs_exit:
+	return ret;
+}
+
+/* Obtain the address of the receive buffer with the given index and unmap it */
+int xsigo_ib_unmap_recv_buf(u32 handle, u16 index)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	struct buf_info *pbuf;
+	int ret;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		ret = -EINVAL;
+		goto unmap_recv_buf_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+
+	if (!connect_info->active || !connect_info->connected) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		ret = -ENOTCONN;
+		goto unmap_recv_buf_exit;
+	}
+
+	if (index >= connect_info->num_recv_elements) {
+		ret = -EINVAL;
+		goto unmap_recv_buf_exit;
+	}
+
+	pbuf = &connect_info->recv_bufs[index];
+
+	dma_unmap_single(connect_info->device->dma_device, pbuf->dma_addr,
+			 pbuf->length, DMA_FROM_DEVICE);
+
+	pbuf->posted = 0;
+	ret = 0;
+
+unmap_recv_buf_exit:
+	atomic_dec(&connect_info->refcount);
+unmap_recv_buf_exit2:
+	return ret;
+}
+EXPORT_SYMBOL(xsigo_ib_unmap_recv_buf);
+
+/* Obtain the address of the send buffer with the given index */
+void *xsigo_ib_get_recv_buf_address(u32 handle, u16 index)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	struct buf_info *pbuf;
+	void *buf_addr = NULL;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		goto get_recv_buf_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+
+	if (!connect_info->active || !connect_info->connected) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		goto get_recv_buf_exit;
+	}
+
+	if (index >= connect_info->num_recv_elements)
+		goto get_recv_buf_exit;
+
+	pbuf = &connect_info->recv_bufs[index];
+	buf_addr = pbuf->vaddr;
+
+get_recv_buf_exit:
+	atomic_dec(&connect_info->refcount);
+get_recv_buf_exit2:
+	return buf_addr;
+}
+EXPORT_SYMBOL(xsigo_ib_get_recv_buf_address);
+
+/* Obtain the pending completions for a completion queue */
+int xsigo_ib_get_completions(u32 handle, int cq_type,
+			     int num_entries, struct ib_wc *wc)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	struct ib_cq *cq;
+	int ret;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		ret = -EINVAL;
+		goto ib_get_completions_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+	if (!connect_info->active) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		ret = -ENOTCONN;
+		goto ib_get_completions_exit;
+	}
+
+	cq = (cq_type == RECV_CQ) ? connect_info->recv_cq :
+				    connect_info->send_cq;
+	ret = ib_poll_cq(cq, num_entries, wc);
+
+ib_get_completions_exit:
+	atomic_dec(&connect_info->refcount);
+ib_get_completions_exit2:
+	return ret;
+}
+EXPORT_SYMBOL(xsigo_ib_get_completions);
+
+/* Arm a completion queue to receive further completion notifications */
+int xsigo_ib_arm_cq(u32 handle, int cq_type)
+{
+	struct xsigo_ib_connect_info *connect_info;
+	struct ib_cq *cq;
+	int ret;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		ret = -EINVAL;
+		goto ib_arm_cq_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+	if (!connect_info->active) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		ret = -ENOTCONN;
+		goto ib_arm_cq_exit;
+	}
+
+	cq = (cq_type == RECV_CQ) ?
+	     connect_info->recv_cq : connect_info->send_cq;
+	ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+
+ib_arm_cq_exit:
+	atomic_dec(&connect_info->refcount);
+ib_arm_cq_exit2:
+	return ret;
+}
+EXPORT_SYMBOL(xsigo_ib_arm_cq);
+
+void *xsigo_ib_get_handle_context(u32 handle)
+{
+	void *context = NULL;
+	struct xsigo_ib_connect_info *connect_info;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		goto ib_get_handle_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+	if (connect_info->active == 0) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		goto ib_get_handle_exit;
+	}
+
+	context = connect_info->context;
+
+ib_get_handle_exit:
+	atomic_dec(&connect_info->refcount);
+ib_get_handle_exit2:
+	return context;
+}
+EXPORT_SYMBOL(xsigo_ib_get_handle_context);
+
+int xsigo_ib_set_handle_context(u32 handle, void *context)
+{
+	int ret;
+	struct xsigo_ib_connect_info *connect_info;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		ret = -EINVAL;
+		goto ib_get_handle_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+	if (connect_info->active == 0) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		ret = -ENOTCONN;
+		goto ib_get_handle_exit;
+	}
+
+	connect_info->context = context;
+	ret = 0;
+
+ib_get_handle_exit:
+	atomic_dec(&connect_info->refcount);
+ib_get_handle_exit2:
+	return ret;
+}
+EXPORT_SYMBOL(xsigo_ib_set_handle_context);
+
+static struct ib_cq *xsigo_ib_get_cq(u32 handle, int recv_cq)
+{
+	struct ib_cq *cq = NULL;
+	struct xsigo_ib_connect_info *connect_info;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		goto ib_get_cq_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+	if (connect_info->active == 0) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		goto ib_get_cq_exit;
+	}
+
+	if (recv_cq)
+		cq = connect_info->recv_cq;
+	else
+		cq = connect_info->send_cq;
+
+ib_get_cq_exit:
+	atomic_dec(&connect_info->refcount);
+ib_get_cq_exit2:
+	return cq;
+}
+
+struct ib_cq *xsigo_ib_get_recv_cq(u32 handle)
+{
+	return xsigo_ib_get_cq(handle, 1);
+}
+EXPORT_SYMBOL(xsigo_ib_get_recv_cq);
+
+struct ib_cq *xsigo_ib_get_send_cq(u32 handle)
+{
+	return xsigo_ib_get_cq(handle, 0);
+}
+EXPORT_SYMBOL(xsigo_ib_get_send_cq);
+
+struct ib_qp *xsigo_ib_get_qp(u32 handle)
+{
+	struct ib_qp *qp = NULL;
+	struct xsigo_ib_connect_info *connect_info;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING, "Handle %d is invalid\n", handle);
+		goto ib_get_qp_exit2;
+	}
+
+	connect_info = get_connect_info(handle);
+
+	atomic_inc(&connect_info->refcount);
+	if (connect_info->active == 0) {
+		xsigoib_debug(KERN_WARNING, "Null connection context\n");
+		goto ib_get_qp_exit;
+	}
+
+	qp = connect_info->qp;
+
+ib_get_qp_exit:
+	atomic_dec(&connect_info->refcount);
+ib_get_qp_exit2:
+	return qp;
+}
+EXPORT_SYMBOL(xsigo_ib_get_qp);
+
+union ib_gid xsigo_ib_get_port_gid(struct ib_device *device, int port_num)
+{
+	int ret;
+	union ib_gid gid = {
+		.global = {
+			.interface_id = 0,
+			.subnet_prefix = 0
+		}
+	};
+
+	if (!device)
+		return gid;
+
+	ret = ib_query_gid(device, port_num, 0, &gid);
+	if (ret)
+		printk(KERN_ERR PFX "ib_query_gid failed %d\n", ret);
+
+	return gid;
+}
+EXPORT_SYMBOL(xsigo_ib_get_port_gid);
+
+u16 xsigo_ib_get_port_lid(struct ib_device * device, int port_num)
+{
+	int ret;
+	struct ib_port_attr port_attr;
+
+	if (!device)
+		return 0;
+
+	port_attr.lid = 0;
+	ret = ib_query_port(device, port_num, &port_attr);
+	if (ret)
+		printk(KERN_ERR PFX "ib_query_port failed %d\n", ret);
+
+	return port_attr.lid;
+}
+EXPORT_SYMBOL(xsigo_ib_get_port_lid);
+
+struct ib_port_attr xsigo_ib_get_port_attr(struct ib_device *device,
+					   int port_num)
+{
+	int ret;
+	struct ib_port_attr port_attr;
+
+	memset(&port_attr, 0, sizeof(port_attr));
+
+	if (device) {
+		ret = ib_query_port(device, port_num, &port_attr);
+		if (ret)
+			printk(KERN_ERR PFX "ib_query_port failed %d\n", ret);
+	}
+
+	return port_attr;
+}
+EXPORT_SYMBOL(xsigo_ib_get_port_attr);
+
+/* CM handler for a client-side CM connection */
+static int cm_client_handler(struct ib_cm_id *cmid, struct ib_cm_event *event)
+{
+	u32 handle = (u32)(unsigned long) cmid->context;
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	int ret = 0;
+
+	if (!connect_info->active) {
+		xsigoib_debug(KERN_WARNING, "Invalid or non-existent context\n");
+		goto cm_client_handler_exit;
+	}
+
+	if (event->event == IB_CM_REP_RECEIVED) {
+		xsigoib_debug(KERN_INFO, "CM REP received for handle %d\n", handle);
+		if ((ret = post_recv_bufs(handle))) {
+			xsigoib_debug(KERN_ERR,
+				      "post_recv_bufs error %d\n", ret);
+			schedule_delivery(handle, XSIGO_IB_MEM_ALLOC_ERROR);
+			goto cm_client_handler_exit;
+		}
+
+		if ((ret = change_qp_state(handle, cmid, IB_QPS_RTR))) {
+			printk(KERN_ERR PFX "change_qp_state could not change "
+			       "state to IB_QPS_RTR\n");
+			schedule_delivery(handle,
+					  XSIGO_IB_CHANGE_QP_STATE_FAILED);
+			goto cm_client_handler_exit;
+		}
+
+		if ((ret = init_send_bufs(handle))) {
+			xsigoib_debug(KERN_ERR, "init_send_bufs error %d\n",
+				      ret);
+			schedule_delivery(handle, XSIGO_IB_MEM_ALLOC_ERROR);
+			goto cm_client_handler_exit;
+		}
+
+		if ((ret = change_qp_state(handle, cmid, IB_QPS_RTS))) {
+			printk(KERN_ERR PFX "change_qp_state could not change "
+			       "state to IB_QPS_RTS\n");
+			schedule_delivery(handle,
+					  XSIGO_IB_CHANGE_QP_STATE_FAILED);
+			goto cm_client_handler_exit;
+		}
+
+		if ((ret = ib_send_cm_rtu(cmid, NULL, 0))) {
+			printk(KERN_ERR PFX "CM RTU failed for handle %d %d\n",
+			       handle, ret);
+			goto cm_client_handler_exit;
+		}
+
+		connect_info->connected = 1;
+		schedule_delivery(handle, XSIGO_IB_SUCCESS);
+	} else if (event->event == IB_CM_DREQ_RECEIVED) {
+		xsigoib_debug(KERN_INFO, "CM DREQ received for handle %d\n",
+			      handle);
+		if ((ret = ib_send_cm_drep(cmid, NULL, 0)))
+			printk(KERN_ERR PFX "CM DREP failed for handle %d %d\n",
+			       handle, ret);
+
+		connect_info->connected = 0;
+		schedule_delivery(handle, XSIGO_IB_DISCONNECT_RECEIVED);
+	} else if (event->event == IB_CM_REQ_ERROR ||
+		   event->event == IB_CM_REJ_RECEIVED) {
+		xsigoib_debug(KERN_INFO,
+			      "CM response timeout or error (%d) for handle %d\n",
+			      event->event, handle);
+		ret = -ECONNRESET;
+		schedule_delivery(handle, XSIGO_IB_NO_REP_RECEIVED);
+	} else
+		xsigoib_debug(KERN_INFO, "CM event '%d' for handle %d\n",
+			      event->event, handle);
+
+cm_client_handler_exit:
+	if (ret)
+		connect_info->cm_id = NULL;
+	return ret;
+}
+
+
+/* The main CM handler */
+static int cm_handler(struct ib_cm_id *cmid, struct ib_cm_event *event)
+{
+	u32 handle = (u32)(unsigned long) cmid->context;
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+
+	if (!connect_info) {
+		xsigoib_debug(KERN_WARNING,
+			      "CM event on non-existent handle %d, ignoring...\n",
+			      handle);
+		return 0;
+	}
+
+	return cm_client_handler(cmid, event);
+}
+
+/* Send CM REQ after hearing back from the SA */
+static int cm_connect(u32 handle, struct ib_sa_path_rec *path_rec)
+{
+	struct ib_cm_req_param req_param;
+	int ret;
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	struct ib_cm_id *cm_id;
+
+	if (!path_rec || !connect_info->qp) {
+		xsigoib_debug(KERN_WARNING, "Null or invalid path record\n");
+		schedule_delivery(handle, XSIGO_IB_PATH_REC_GET_FAILED);
+		ret = -EINVAL;
+		goto cm_connect_exit;
+	}
+
+	cm_id = ib_create_cm_id(connect_info->device, &cm_handler,
+				(void *)(unsigned long) handle);
+	connect_info->cm_id = cm_id;
+	if (IS_ERR(cm_id)) {
+		printk(KERN_ERR PFX "could not create CM id\n");
+		schedule_delivery(handle, XSIGO_IB_CM_CREATE_FAILED);
+		ret = PTR_ERR(cm_id);
+		goto cm_connect_exit;
+	}
+
+	xsigoib_debug(KERN_INFO, "cm_id created for handle %d\n", handle);
+
+	connect_info->conn_cm_id = NULL;
+
+	req_param.primary_path = path_rec;
+	req_param.alternate_path = NULL;
+	req_param.service_id = connect_info->service_id;
+	req_param.qp_num = connect_info->qp->qp_num;
+	req_param.qp_type = connect_info->qp->qp_type;
+	req_param.private_data = connect_info->private_data;
+	req_param.private_data_len = connect_info->private_data_len;
+	req_param.peer_to_peer = 0;
+	if (!connect_info->no_rdma)
+		req_param.responder_resources = responder_resources;
+	req_param.initiator_depth = 0;
+	req_param.remote_cm_response_timeout = remote_cm_response_timeout;
+	req_param.flow_control = 1;
+	req_param.local_cm_response_timeout = local_cm_response_timeout;
+	req_param.retry_count = retry_count;
+	req_param.rnr_retry_count = rnr_retry_count;
+	req_param.max_cm_retries = max_cm_retries;
+	req_param.starting_psn = 0;
+
+	if ((ret = ib_send_cm_req(cm_id, &req_param))) {
+		printk(KERN_ERR PFX "CM REQ failed: %d for handle %d\n",
+		       ret, handle);
+		ib_destroy_cm_id(cm_id);
+		connect_info->cm_id = NULL;
+		schedule_delivery(handle, XSIGO_IB_SEND_CM_REQ_FAILED);
+		goto cm_connect_exit;
+	}
+
+	if ((ret = change_qp_state(handle, cm_id, IB_QPS_INIT))) {
+		schedule_delivery(handle, XSIGO_IB_CHANGE_QP_STATE_FAILED);
+		goto cm_connect_exit;
+	}
+
+	xsigoib_debug(KERN_INFO, "CM REQ sent for handle %d...\n", handle);
+
+cm_connect_exit:
+	return ret;
+}
+
+/* Callback for 'ib_sa_path_rec_get' */
+static void path_rec_complete(int status, struct ib_sa_path_rec *resp,
+			      void *context)
+{
+	u32 handle = (u32)(unsigned long)context;
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	int count;
+
+	/* Sanity check, do we really have a context here */
+	if (connect_info->active == 0) {
+		xsigoib_debug(KERN_WARNING, "No context for handle %d\n", handle);
+		goto path_rec_complete_exit;
+	}
+
+	/* Did we have a failure by chance */
+	for (count = 0; count < MAX_NUM_STATUS; count++) {
+		if (connect_info->status[count] == XSIGO_IB_PATH_REC_GET_FAILED) {
+			xsigoib_debug(KERN_ERR, "path_rec_get failed earlier\n");
+			goto path_rec_complete_exit;
+		}
+	}
+
+	if (cm_connect(handle, resp))
+		xsigoib_debug(KERN_WARNING, "Connect failed (handle %d)\n", handle);
+
+path_rec_complete_exit:
+	atomic_dec(&connect_info->refcount);
+}
+
+/* Query/fill the 'path_rec' parameters and query the SA */
+static int query_and_connect(u32 handle)
+{
+	struct ib_sa_path_rec path_rec;
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	struct ib_device *device = connect_info->device;
+	int port = connect_info->port_num;
+	struct ib_sa_query *query;
+	int ret;
+	u16 pkey;
+
+	path_rec.sgid = connect_info->sgid;
+	path_rec.dgid = connect_info->dgid;
+
+	ib_query_pkey(device, port, 0, &pkey);
+	path_rec.pkey = cpu_to_be16(pkey);
+
+	path_rec.numb_path = 1;
+
+	connect_info->connected = 0;
+
+	atomic_inc(&connect_info->refcount);
+	if ((ret = ib_sa_path_rec_get(&sa_client, device, port, &path_rec,
+				      IB_SA_PATH_REC_DGID |
+				      IB_SA_PATH_REC_SGID |
+				      IB_SA_PATH_REC_PKEY |
+				      IB_SA_PATH_REC_NUMB_PATH, 1000,
+				      GFP_ATOMIC, &path_rec_complete,
+				      (void *)(unsigned long) handle, &query)) < 0) {
+		printk(KERN_ERR PFX "Path rec failed: %d (handle 0x%x)\n",
+		       ret, handle);
+		schedule_delivery(handle, XSIGO_IB_PATH_REC_GET_FAILED);
+		atomic_dec(&connect_info->refcount);
+	}
+
+	return 0;
+}
+
+/* Initialize a CM connection */
+/* Called for an active CM connection */
+u32 cm_connect_client(struct xsigo_ib_connect_info *connect_info)
+{
+	u32 handle = find_new_conn_handle();
+	struct xsigo_ib_connect_info *connect_info_new;
+	int count;
+
+	if (handle == MAX_CONN_HANDLES)
+		goto leave_err_1;
+
+	connect_info_new = get_connect_info(handle);
+	if (connect_info == NULL)
+		goto leave_err;
+
+	connect_info->used = 1;	/* Avoid losing the handle */
+	*connect_info_new = *connect_info;
+
+	if (setup_qps(handle))
+		goto leave_err;
+
+	atomic_set(&connect_info_new->refcount, 0);
+	if (query_and_connect(handle))
+		goto rem_leave_err;
+	connect_info_new->deliver = 0;
+
+	/* Set the status fields to invalid */
+	for (count = 0; count < MAX_NUM_STATUS; count++)
+		connect_info_new->status[count] = -1;
+
+	connect_info_new->active = 1;
+
+	*connect_info = *connect_info_new;
+
+	goto exit;
+
+rem_leave_err:
+	remove_qps_and_cqs(handle);
+leave_err:
+	connect_info_new->used = 0;
+leave_err_1:
+	handle = XSIGO_IB_ERROR_HANDLE;
+exit:
+	return handle;
+}
+
+/* Connect to a remote CM - active connection request */
+u32 xsigo_ib_connect(struct xsigo_ib_connect_info *connect_info)
+{
+	if (!connect_info->device) {
+		xsigoib_debug(KERN_WARNING, "Null device\n");
+		return XSIGO_IB_ERROR_HANDLE;
+	}
+	num_connects++;
+	return cm_connect_client(connect_info);
+}
+EXPORT_SYMBOL(xsigo_ib_connect);
+
+/* Disconnect a connection via CM */
+static void cm_disconnect(u32 handle)
+{
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	struct ib_cm_id *cm_id;
+	struct ib_cm_id *conn_cm_id;
+
+	if (!connect_info)
+		xsigoib_debug(KERN_WARNING, "Invalid handle %d or already deleted\n",
+			      handle);
+
+	cm_id = connect_info->cm_id;
+	conn_cm_id = connect_info->conn_cm_id;
+
+	if (cm_id) {
+		xsigoib_debug(KERN_INFO, "Destroying cm_id for handle %d...\n",
+			      handle);
+
+		/*
+		 * The order is important, we want other closely
+		 * following requests to be warded off
+		 */
+		connect_info->cm_id = NULL;
+		ib_destroy_cm_id(cm_id);
+		xsigoib_debug(KERN_INFO, "cm_id destroyed for handle %d\n",
+			      handle);
+	}
+}
+
+/* Remove the QPs and CQs for a connection handle */
+static void remove_qps_and_cqs(u32 handle)
+{
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+	int ret;
+
+	if (!connect_info) {
+		xsigoib_debug(KERN_WARNING, "Warning: handle %d already deleted\n",
+			      handle);
+		return;
+	}
+
+	xsigoib_debug(KERN_INFO, "Destroying CQs and QP (handle %d)...\n",
+		      handle);
+
+	/*
+	 * In the reverse order of creation:
+	 * QP destruction decrements CQ reference counts
+	 */
+	if (connect_info->qp && (ret = ib_destroy_qp(connect_info->qp)))
+		printk(KERN_WARNING PFX "Error %d deleting qp\n", ret);
+	connect_info->qp = NULL;
+
+	if (connect_info->send_cq && (ret = ib_destroy_cq(connect_info->send_cq)))
+		printk(KERN_WARNING PFX "Error %d deleting send cq\n", ret);
+	connect_info->send_cq = NULL;
+
+	if (connect_info->recv_cq && (ret = ib_destroy_cq(connect_info->recv_cq)))
+		printk(KERN_WARNING PFX "Error %d deleting recv cq\n", ret);
+	connect_info->recv_cq = NULL;
+
+	xsigoib_debug(KERN_INFO, "CQs and QP deleted (handle %d)...\n", handle);
+}
+
+/* Release a connection handle */
+static void release_conn_handle(u32 handle)
+{
+	struct xsigo_ib_connect_info *connect_info = get_connect_info(handle);
+
+	if (!connect_info->used)
+		xsigoib_debug(KERN_WARNING, "Warning: handle %d already deleted\n",
+			      handle);
+	else
+		connect_info->used = 0;
+}
+
+/* Disconnect a connection */
+void xsigo_ib_disconnect(u32 handle)
+{
+	struct xsigo_ib_connect_info *connect_info;
+
+	if (handle == XSIGO_IB_ERROR_HANDLE)
+		goto xsigo_ib_disconnect_exit2;
+
+	connect_info = get_connect_info(handle);
+
+	if (!connect_info->used)
+		goto xsigo_ib_disconnect_exit;
+
+	xsigoib_debug(KERN_INFO, "xsigo_ib_disconnect called for handle %d\n",
+		      handle);
+
+	connect_info->active = 0; /* Don't make any more calls to this handle */
+	wait_event_timeout(xsigoib_wait, !atomic_read(&connect_info->refcount),
+			   5 * HZ);
+
+	cm_disconnect(handle);
+	dealloc_recv_bufs(handle);
+	dealloc_send_bufs(handle);
+	remove_qps_and_cqs(handle);
+	release_conn_handle(handle);
+
+xsigo_ib_disconnect_exit:
+	xsigoib_debug(KERN_INFO, "xsigo_ib_disconnect complete for handle %d\n",
+		      handle);
+xsigo_ib_disconnect_exit2:
+	num_disconnects++;
+}
+EXPORT_SYMBOL(xsigo_ib_disconnect);
+
+/* XDS query API */
+
+/*
+ * The handler for MAD send events
+ * Called once after our MADs to the XDS are posted
+ */
+static void ib_if_xds_send_handler(struct ib_mad_agent *agent,
+				   struct ib_mad_send_wc *mad_send_wc)
+{
+	/* We had passed the port_index as the 'wr_id' field */
+	u32 handle = (u32)(unsigned long) agent->context;
+	struct xsigo_ib_query_info *q_info = &query_info_list[handle];
+
+	if (!q_info) {
+		xsigoib_debug(KERN_WARNING, "Null query_info for handle %d\n",
+			      handle);
+		return;
+	}
+
+	switch (mad_send_wc->status) {
+	case IB_WC_SUCCESS:
+	{
+		xsigoib_debug(KERN_INFO, "Send MAD success (handle %d)\n",
+			      handle);
+		break;
+	}
+
+	default:
+	{
+		xsigoib_debug(KERN_ERR, "Error sending a MAD to the XDS (handle %d)," 
+			      " status: 0x%x\n", handle, mad_send_wc->status);
+		schedule_xds_resp_delivery(handle, mad_send_wc->status, NULL);
+	}
+	}
+
+	dma_unmap_single(q_info->device->dma_device, q_info->dma_addr,
+			 sizeof(struct ib_mad), DMA_TO_DEVICE);
+
+	if (q_info->send_buf) {
+		ib_free_send_mad(q_info->send_buf);
+		q_info->send_buf = NULL;
+	}
+
+	if (!IS_ERR(q_info->ah))
+		ib_destroy_ah(q_info->ah);
+}
+
+/*
+ * The handler for MAD receive events
+ * Called once when an XCM list arrives from the XDS
+ * Notify through callbacks that the list has been received
+ */
+static void ib_if_xds_recv_handler(struct ib_mad_agent *mad_agent,
+				   struct ib_mad_recv_wc *mad_recv_wc)
+{
+	u32 handle = (u32)(unsigned long) mad_agent->context;
+
+	/*
+	 * If we get a successful MAD back, then use the
+	 * information to allocate links
+	 */
+	if (mad_recv_wc->wc->status == IB_WC_SUCCESS) {
+		xsigoib_debug(KERN_INFO, "MAD received successfully for handle %d\n",
+			      handle);
+		schedule_xds_resp_delivery(handle, mad_recv_wc->wc->status,
+					   mad_recv_wc);
+	} else {
+		xsigoib_debug(KERN_ERR,
+			      "Error in receiving a MAD, id: %d, handle %d\n",
+			       (int) mad_recv_wc->wc->wr_id, handle);
+		schedule_xds_resp_delivery(handle, mad_recv_wc->wc->status, NULL);
+	}
+}
+
+struct ib_ah *xsigo_ib_create_ah(struct xsigo_ib_query_info *q_info, u16 dlid)
+{
+	struct ib_ah_attr ah_attr;
+	struct ib_port_attr port_attr =
+		xsigo_ib_get_port_attr(q_info->device, q_info->port_num);
+
+	memset(&ah_attr, 0, sizeof(ah_attr));
+	ah_attr.dlid = port_attr.sm_lid;
+	ah_attr.sl = port_attr.sm_sl;
+	ah_attr.port_num = q_info->port_num;
+
+	if (dlid)
+		ah_attr.dlid = dlid;
+
+	return ib_create_ah(q_info->agent->qp->pd, &ah_attr);
+}
+
+u32 xsigo_ib_create_xds_context(struct xsigo_ib_query_info *query_info)
+{
+	u32 handle = find_new_query_handle();
+	struct xsigo_ib_query_info *q_info;
+	struct ib_mad_reg_req mad_reg_req;
+
+	if (handle == MAX_CONN_HANDLES) {
+		handle = XSIGO_IB_ERROR_HANDLE;
+		goto create_xds_context_exit;
+	}
+
+	if (query_info == NULL) {
+		handle = XSIGO_IB_ERROR_HANDLE;
+		goto create_xds_context_exit;
+	}
+
+	query_info->used = 1;
+	query_info_list[handle] = *query_info;
+
+	q_info = &query_info_list[handle];
+	q_info->deliver = 0;
+
+	mad_reg_req.mgmt_class = q_info->mgmt_class;
+	mad_reg_req.mgmt_class_version = q_info->mgmt_class_version;
+
+	set_bit(IB_MGMT_METHOD_GET_RESP, mad_reg_req.method_mask);
+	q_info->agent = ib_register_mad_agent(q_info->device,
+					      q_info->port_num, IB_QPT_GSI,
+					      &mad_reg_req, 0,
+					      ib_if_xds_send_handler,
+					      ib_if_xds_recv_handler,
+					      (void *)(unsigned long) handle);
+	if (IS_ERR(q_info->agent)) {
+		handle = XSIGO_IB_ERROR_HANDLE;
+		goto create_xds_context_exit;
+	}
+
+	/* Defer the ah to be created later, potentially with a new dlid */
+	q_info->ah = NULL;
+
+create_xds_context_exit:
+	return handle;
+}
+EXPORT_SYMBOL(xsigo_ib_create_xds_context);
+
+/* Prepare a Xsigo MAD that requests the XCMs for an HCA port */
+static void prepare_xds_mad(struct ib_xds_mad *xds_mad,
+			    int port_index,
+			    struct xsigo_ib_query_info *q_info,
+			    u8 *data, int length)
+{
+	struct ib_mad_hdr mad_hdr = {
+		.base_version = IB_MGMT_BASE_VERSION,
+		.mgmt_class = q_info->mgmt_class,
+		.class_version = q_info->mgmt_class_version,
+		.method = IB_MGMT_METHOD_GET,
+		.attr_id = q_info->attr_id,
+		.status = 0,
+		.class_specific = 0,
+		.resv = 0,
+		.attr_mod = 0,
+		.tid = 0,
+	};
+
+	memset(xds_mad, 0, sizeof(*xds_mad));
+
+	mad_hdr.tid = q_info->agent->hi_tid;
+	mad_hdr.tid <<= 32;
+	mad_hdr.tid |= q_info->port_num;
+	mad_hdr.tid = cpu_to_be64(mad_hdr.tid);
+
+	xds_mad->mad_hdr = mad_hdr;
+
+	memcpy(xds_mad->data, data, length);
+}
+
+/*
+ * Generate a query to the XDS asking the whereabouts of the XCMs
+ * that we want to connect to
+ */
+int xsigo_ib_query_xds(u32 handle, u8 *data, int length, u16 dlid)
+{
+	int ret;
+	struct xsigo_ib_query_info *q_info = &query_info_list[handle];
+
+	if (!q_info) {
+		xsigoib_debug(KERN_WARNING, "Null query_info for handle %d\n",
+			      handle);
+		ret = -EINVAL;
+		goto xsigo_ib_query_xds_exit;
+	}
+
+	q_info->send_buf = ib_create_send_mad(q_info->agent, 1, 0, 0,
+					      IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
+					      GFP_ATOMIC);
+	if (IS_ERR(q_info->send_buf)) {
+		ret = PTR_ERR(q_info->send_buf);
+		printk(KERN_ERR PFX "ib_create_send_mad failed, error %d\n", ret);
+		goto xsigo_ib_query_xds_exit;
+	}
+
+	prepare_xds_mad(q_info->send_buf->mad, q_info->port_num, q_info,
+			data, length);
+
+	/* Create address handles dynamically before sending a query */
+	q_info->ah = xsigo_ib_create_ah(q_info, dlid);
+	if (IS_ERR(q_info->ah)) {
+		ib_free_send_mad(q_info->send_buf);
+		ret = PTR_ERR(q_info->ah);
+		printk(KERN_ERR PFX "could not create ah error %d\n", ret);
+		goto xsigo_ib_query_xds_exit;
+	}
+
+	q_info->send_buf->ah = q_info->ah;
+
+	q_info->dma_addr = dma_map_single(q_info->device->dma_device,
+					  q_info->send_buf->mad,
+					  sizeof(struct ib_mad), DMA_TO_DEVICE);
+
+	q_info->send_buf->retries = max_cm_retries;
+	q_info->send_buf->timeout_ms = MAD_TIMEOUT_MS;
+
+	ret = ib_post_send_mad(q_info->send_buf, NULL);
+	if (ret) {
+		dma_unmap_single(q_info->device->dma_device, q_info->dma_addr,
+				 sizeof(struct ib_mad), DMA_TO_DEVICE);
+		ib_free_send_mad(q_info->send_buf);
+		ib_destroy_ah(q_info->ah);
+		q_info->send_buf = 0;
+	}
+
+xsigo_ib_query_xds_exit:
+	return ret;
+}
+EXPORT_SYMBOL(xsigo_ib_query_xds);
+
+void xsigo_ib_delete_xds_context(u32 handle)
+{
+	struct xsigo_ib_query_info *q_info;
+
+	if (handle >= MAX_CONN_HANDLES) {
+		xsigoib_debug(KERN_WARNING,
+			      "Deleting invalid query context (handle %d)\n",
+			      handle);
+		return;
+	}
+
+	if ((q_info = &query_info_list[handle]) == 0) {
+		xsigoib_debug(KERN_WARNING,
+			      "Deleting unallocated query context (handle %d)\n",
+			      handle);
+		return;
+	}
+
+	if (!IS_ERR(q_info->agent))
+		ib_unregister_mad_agent(q_info->agent);
+
+	query_info_list[handle].used = 0;
+}
+EXPORT_SYMBOL(xsigo_ib_delete_xds_context);
+
+int xsigoib_init(void)
+{
+	int count;
+	int list_count;
+	int ret = -ENOMEM;
+
+	for (list_count = 0; list_count < MAX_NUM_LISTS; list_count++) {
+		connect_info_pool.connect_info_list[list_count] =
+				kmalloc(sizeof(struct xsigo_ib_connect_info) *
+					MAX_LIST_HANDLES, GFP_KERNEL);
+
+		if (!connect_info_pool.connect_info_list[list_count]) {
+			xsigoib_debug(KERN_ERR, "Low on memory, exiting...\n");
+			goto xsigo_ib_init_exit1;
+		}
+
+		for (count = 0; count < MAX_LIST_HANDLES; count++) {
+			struct xsigo_ib_connect_info *connect_info_list =
+			    connect_info_pool.connect_info_list[list_count];
+
+			memset(&connect_info_list[count], 0,
+			       sizeof(*connect_info_list));
+		}
+	}
+
+	query_info_list = kmalloc(sizeof(*query_info_list) * MAX_QINFO_HANDLES,
+				  GFP_KERNEL);
+	if (!query_info_list) {
+		xsigoib_debug(KERN_ERR, "Low on memory, exiting...\n");
+		goto xsigo_ib_init_exit1;
+	}
+
+	for (count = 0; count < MAX_QINFO_HANDLES; count++)
+		memset(&query_info_list[count], 0,
+		       sizeof(query_info_list[count]));
+
+	spin_lock_init(&handle_lock);
+
+	xsigoib_wq = create_singlethread_workqueue("xscoreibwq");
+	if (!xsigoib_wq)
+		goto xsigo_ib_init_exit2;
+
+	init_waitqueue_head(&xsigoib_wait);
+
+	ret = xsigo_ib_register_sysfs();
+	if (ret)
+		goto xsigo_ib_init_exit3;
+
+	ib_sa_register_client(&sa_client);
+	ret = 0;
+	goto xsigo_ib_init_exit;
+
+xsigo_ib_init_exit3:
+	destroy_workqueue(xsigoib_wq);
+xsigo_ib_init_exit2:
+	kfree(query_info_list);
+xsigo_ib_init_exit1:
+	for (list_count = 0; list_count < MAX_NUM_LISTS; list_count++)
+		if (connect_info_pool.connect_info_list[list_count])
+			kfree(connect_info_pool.connect_info_list[list_count]);
+xsigo_ib_init_exit:
+	return ret;
+}
+
+void xsigoib_exit(void)
+{
+	int count;
+	int list_count;
+
+	ib_sa_unregister_client(&sa_client);
+
+	xsigo_ib_unregister_sysfs();
+
+	/*
+	 * Make sure we disconnect and cleanup if people did not
+	 * Especially important to prevent rampant timers
+	 */
+	for (count = 0; count < MAX_CONN_HANDLES; count++) {
+		struct xsigo_ib_connect_info *connect_info =
+							get_connect_info(count);
+
+		if (connect_info && connect_info->used) {
+			/*
+			 * No spinlock contention: if we are unloading,
+			 * then the users have already
+			 */
+			xsigo_ib_disconnect(count);
+			connect_info->used = 0;
+		}
+	}
+
+	if (xsigoib_wq) {
+		flush_workqueue(xsigoib_wq);
+		destroy_workqueue(xsigoib_wq);
+	}
+
+	kfree(query_info_list);
+	for (list_count = 0; list_count < MAX_NUM_LISTS; list_count++)
+		kfree(connect_info_pool.connect_info_list[list_count]);
+
+	mdelay(2000);
+}
diff --git a/drivers/infiniband/ulp/xsigo/xscore/xsigoib.h b/drivers/infiniband/ulp/xsigo/xscore/xsigoib.h
new file mode 100644
index 0000000..6ab067e
--- /dev/null
+++ b/drivers/infiniband/ulp/xsigo/xscore/xsigoib.h
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 2006-2008 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __XSIGOIB_H__
+#define __XSIGOIB_H__
+
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_cm.h>
+#include <rdma/ib_sa.h>
+
+#define RECV_CQ 1
+#define SEND_CQ 2
+
+#define DEFAULT_SUBNET_PREFIX 0xfe80000000000000ULL
+#define SERVICE_ID 0x02139701
+
+#define XSIGO_IB_ERROR_HANDLE	((u32) -1)
+
+#define MAX_NUM_STATUS  16
+
+enum xsigo_ib_connect_status {
+	XSIGO_IB_SUCCESS = 0,
+	XSIGO_IB_PATH_REC_GET_FAILED,
+	XSIGO_IB_CM_CREATE_FAILED,
+	XSIGO_IB_CM_LISTEN_FAILED,
+	XSIGO_IB_SEND_CM_REQ_FAILED,
+	XSIGO_IB_CHANGE_QP_STATE_FAILED,
+	XSIGO_IB_NO_REP_RECEIVED,
+	XSIGO_IB_DISCONNECT_RECEIVED,
+	XSIGO_IB_NEW_HANDLE,
+	XSIGO_IB_MEM_ALLOC_ERROR
+};
+
+struct ib_xds_mad;
+
+/* Callback invoked by XsigoIB when the connection request succeeds or fails */
+typedef void (*xsigo_connect_callback) (u32 handle, void *context,
+					enum xsigo_ib_connect_status status);
+
+/*
+ * Structure filled in (partially) and passed by a client during a
+ * connection request
+ */
+struct xsigo_ib_connect_info {
+	/* Passed in by the client */
+	struct ib_device *device;
+	u16 port_num;
+	u64 service_id;
+	struct ib_pd *pd;
+	struct ib_mr *mr;
+	u16 slid;
+	union ib_gid sgid;
+	u16 dlid;
+	union ib_gid dgid;
+	u16 num_send_elements;
+	u16 num_recv_elements;
+	u16 recv_buf_size;
+	ib_comp_handler send_handler;
+	ib_comp_handler recv_handler;
+	void *private_data;
+	u8 private_data_len;
+	xsigo_connect_callback callback;
+	void *context;
+	int client_recv_bufs;	/* Will receive buffers be set by the client ? */
+	int no_rdma;
+	u8 sl;
+
+	/* Set and returned by XsigoIB */
+	struct ib_qp *qp;
+	struct ib_cq *send_cq;
+	struct ib_cq *recv_cq;
+
+	/* Internal use by XsigoIB - clients do not read/write */
+	struct ib_cm_id *cm_id;
+	struct buf_info *recv_bufs;
+	struct buf_info *send_bufs;
+	int connected;
+	struct ib_cm_id *conn_cm_id;
+	int deliver;
+	enum xsigo_ib_connect_status status[MAX_NUM_STATUS];
+	spinlock_t handle_lock;
+	int used;		/* In use */
+
+	atomic_t refcount;	/* Reference count of the handle */
+	int active;
+
+	int qp_type;		/* RC or UC */
+};
+
+/* XsigoIB API */
+
+/* Connection establishment */
+
+/*
+ * Connect to a remote CM - active connection request
+ * XSIGO_IB_ERROR_HANDLE is returned in case of error
+ * Otherwise a valid handle is returned
+ */
+u32 xsigo_ib_connect(struct xsigo_ib_connect_info *connect_info);
+
+
+/* Disconnect a connection or cancel a listen request */
+void xsigo_ib_disconnect(u32 conn_handle);
+
+/* Receive */
+
+/*
+ * Unmap a receive buffer with index 'index'. The data shouldn't be 
+ * touched before this
+ */
+int xsigo_ib_unmap_recv_buf(u32 handle, u16 index);
+
+/*
+ * Enqueue a receive buffer after the data is used, so that further
+ * messages can be received on it
+ */
+int xsigo_ib_post_receive(u32 handle, int index);
+
+/* Set the receive buffer and then enqueue it */
+int xsigo_ib_set_receive(u32 handle, int index, void *buf);
+
+/*
+ * Obtain the address of the receive buffer with the given index
+ * Returns NULL on error
+ */
+void *xsigo_ib_get_recv_buf_address(u32 handle, u16 index);
+
+/* Send */
+
+/* Send a message on an existing connection */
+int xsigo_ib_send_msg(u32 handle, u8 *data, int length);
+int xsigo_ib_send_msg_index(u32 handle, u8 *data, int length, int index);
+
+/*
+ * Unmap a send buffer with index 'index'. The data shouldn't be touched
+ * otherwise
+ */
+int xsigo_ib_unmap_send_buf(u32 handle, u16 index);
+
+/*
+ * Obtain the address of the send buffer with the given index
+ * Returns NULL on error
+ */
+void *xsigo_ib_get_send_buf_address(u32 handle, u16 index);
+
+/*
+ * Find out if the send buffer is still posted:
+ * dma mapped and marked as used by the HCA
+ */
+int xsigo_ib_send_buf_posted(u32 handle, u16 index);
+
+/* RDMA */
+
+/*
+ * RDMA read
+ * 'data' and 'length' are of the local buffer to write into
+ * 'remote_addr' and 'rkey' specify the remote location to read from
+ */
+int xsigo_ib_rdma_read(u32 handle, u8 *data, int length, u64 remote_addr,
+		       u32 rkey);
+
+/*
+ * RDMA write
+ * 'data' and 'length' are of the local buffer to read from
+ * 'remote_addr' and 'rkey' specify the remote location to write into
+ */
+int xsigo_ib_rdma_write(u32 handle, u8 *data, int length, u64 remote_addr,
+			u32 rkey);
+
+/*
+ * Obtain pending completions for a completion queue
+ * 'cq_type' is either RECV_CQ or SEND_CQ
+ * 'num_entries' is the number of maximum completions requested
+ * 'wc' is a an allocated array of 'struct ib_wc'
+ * There should be at least 'num_entries' entries in 'wc'
+ */
+int xsigo_ib_get_completions(u32 handle, int cq_type, int num_entries,
+			     struct ib_wc *wc);
+
+/* Arm a completion queue to receive further completion notifications */
+int xsigo_ib_arm_cq(u32 handle, int cq_type);
+
+/* Miscellaneous */
+
+/* Obtain the 'context' passed for the handle */
+void *xsigo_ib_get_handle_context(u32 handle);
+int xsigo_ib_set_handle_context(u32 handle, void *context);
+
+struct ib_cq *xsigo_ib_get_recv_cq(u32 handle);
+struct ib_cq *xsigo_ib_get_send_cq(u32 handle);
+struct ib_qp *xsigo_ib_get_qp(u32 handle);
+
+/* Obtain device and port attributes */
+union ib_gid xsigo_ib_get_port_gid(struct ib_device *device, int port_num);
+u16 xsigo_ib_get_port_lid(struct ib_device *device, int port_num);
+struct ib_port_attr xsigo_ib_get_port_attr(struct ib_device *device,
+					   int port_num);
+
+
+/* XDS query API */
+
+/*
+ * Callback invoked by XsigoIB when the XDS query succeeds or fails
+ * 'xds_mad' returned is NULL unless the status is '0' (success)
+ */
+typedef void (*xsigo_query_callback) (u32 handle, void *context,
+				      int status, struct ib_xds_mad *xds_mad);
+
+struct xsigo_ib_query_info {
+	struct ib_device *device;
+	u8 port_num;
+	u8 mgmt_class;
+	u8 mgmt_class_version;
+	u16 attr_id;		/* Network order */
+	void *context;
+	xsigo_query_callback callback;
+
+	/* Internal use by XsigoIB - clients do not read/write */
+	struct ib_mad_agent *agent;
+	struct ib_ah *ah;
+	struct ib_mad_send_buf *send_buf;
+	struct ib_mad_recv_wc *mad_recv_wc;
+	u64 dma_addr;
+	int deliver;
+	int status;
+	int used;
+};
+
+/* XDS communication */
+u32 xsigo_ib_create_xds_context(struct xsigo_ib_query_info *query_info);
+int xsigo_ib_query_xds(u32 handle, u8 *data, int length, u16 dlid);
+void xsigo_ib_delete_xds_context(u32 handle);
+
+#endif	/*__XSIGOIB_H__ */
-- 
1.5.2






More information about the ewg mailing list