[ewg] [PATCH 4/9] [RFC] Add support for Xsigo logical "links"

Hal Rosenstock hrosenstock at xsigo.com
Fri Apr 4 06:18:48 PDT 2008


This patch adds support for Xsigo logical "links". This includes
the Xsigo Directory Service (XDS). In terms of the host,
XDS contains a list of XCMs (Xsigo Configuration Managers)
assigned to it for this IB port. XDS is first located
via standard SA ServiceRecord query.

Signed-off-by: Hal Rosenstock <hal at xsigo.com>
---
 drivers/infiniband/ulp/xsigo/xscore/ib_if.c     |  837 +++++++++++++++++++++++
 drivers/infiniband/ulp/xsigo/xscore/ib_if.h     |  119 ++++
 drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h |   82 +++
 3 files changed, 1038 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/ulp/xsigo/xscore/ib_if.c
 create mode 100644 drivers/infiniband/ulp/xsigo/xscore/ib_if.h
 create mode 100644 drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h

diff --git a/drivers/infiniband/ulp/xsigo/xscore/ib_if.c b/drivers/infiniband/ulp/xsigo/xscore/ib_if.c
new file mode 100644
index 0000000..52f1c13
--- /dev/null
+++ b/drivers/infiniband/ulp/xsigo/xscore/ib_if.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright (c) 2006-2008 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <rdma/ib_cache.h>
+
+#include "xs_core.h"
+#include "ib_if.h"
+#include "xcpm_export.h"
+#include "xcpm_priv.h"
+#include "xsmp.h"
+
+static struct kmem_cache *ib_if_cachep = NULL;
+extern struct kmem_cache *xsmp_cachep;
+
+/* Used to track outstanding reads and writes at the time of a disconnect */
+static atomic_t msg_refcount;
+
+extern struct workqueue_struct *xcpm_wq;
+extern struct xcpm_info *xcpm;
+
+static int ib_if_port_setup(struct ib_device *device, int port_num,
+			    struct ib_pd *pd, struct ib_mr *mr,
+			    struct ib_port_info *ib_port, u32 *xds_handle);
+static void xds_query_callback(u32 handle, void *context,
+			       int status, struct ib_xds_mad *xds_mad);
+static void recv_comp_handler(struct ib_cq *cq, void *cq_context);
+static void send_comp_handler(struct ib_cq *cq, void *cq_context);
+
+static struct ib_sa_client sa_client;
+
+int ib_if_init(struct ib_client *ibclient)
+{
+	int ret;
+
+	ib_sa_register_client(&sa_client);
+
+	ret = ib_register_client(ibclient);
+	if (!ret)
+		atomic_set(&msg_refcount, 0);
+
+	return ret;
+}
+
+void ib_if_exit(struct ib_client *ibclient)
+{
+	ib_unregister_client(ibclient);
+	ib_sa_unregister_client(&sa_client);
+}
+
+/*
+ * Post a query to the XDS to obtain a list of XCMs assigned to the
+ * server on this port
+ */
+int ib_if_query_xds(struct ib_port_info *ib_port)
+{
+	struct ib_port_attr port_attr;
+	struct xds_request request;
+	int ret = 0;
+	int xds_dlid;
+
+	atomic_inc(&ib_port->refcount);
+
+	if (ib_port->port_down) {
+		ret = -EAGAIN;
+		goto ib_if_query_exit2;
+	}
+
+	xcpm_debug(KERN_INFO, "Querying XDS on port %d\n", ib_port->port_num);
+
+	/* Determine the XDS lid to use for the query */
+	xds_dlid = ib_port->xds_dlid;
+	xcpm_debug(KERN_INFO, "XDS query to lid %d\n", xds_dlid);
+
+	/* We have communicated with the XDS (just about to) */
+	ib_port->queried = 1;
+
+	port_attr = xsigo_ib_get_port_attr(ib_port->device, ib_port->port_num);
+	if (port_attr.state != IB_PORT_ACTIVE) {
+		xcpm_debug(KERN_WARNING, "Port %d not active\n",
+			   ib_port->port_num);
+		ret = -EAGAIN;
+		goto ib_if_query_exit2;
+	}
+
+	if (ib_port->xds_handle != XSIGO_IB_ERROR_HANDLE) {
+		xsigo_ib_delete_xds_context(ib_port->xds_handle);
+		ib_port->xds_handle = XSIGO_IB_ERROR_HANDLE;
+	}
+
+	/* Re-initialize the port, in case any parameters changed */
+	ib_if_port_setup(ib_port->device, ib_port->port_num,
+			 ib_port->pd, ib_port->mr, ib_port,
+			 &ib_port->xds_handle);
+
+
+	memset(&request, 0, sizeof(request));
+
+	if (ib_port->xds_handle == XSIGO_IB_ERROR_HANDLE) {
+		xcpm_debug(KERN_ERR, "Trying to query on an uninitialized "
+			   "query context\n");
+		ret = -EINVAL;
+		goto ib_if_query_exit2;
+	}
+
+	request.server_record.vm_id = 0;
+	request.server_record.port_id = cpu_to_be64(ib_port->guid);
+
+	strncpy(request.hostname, init_utsname()->nodename,
+		XSIGO_MAX_HOSTNAME);
+
+	ret = xsigo_ib_query_xds(ib_port->xds_handle, (u8 *) &request,
+				 sizeof(request), xds_dlid);
+	if (!ret)
+		goto ib_if_query_exit;
+
+ib_if_query_exit2:
+	if (!atomic_dec_return(&ib_port->refcount))
+		wake_up(&xcpm_wait);
+ib_if_query_exit:
+	return ret;
+}
+
+/* Parse the XCM records and send out an XCM list */
+static int parse_and_dispatch_xcmlist(struct ib_port_info *ib_port,
+				      struct ib_xds_mad *xds_mad)
+{
+	u8 *data = xds_mad->data;
+	struct server_info sinfo;
+	int count;
+	struct xcm_list list;
+
+	/* Get the server info record */
+	memcpy((u8 *) &sinfo, data, sizeof(sinfo));
+	data += sizeof(sinfo);
+	sinfo.port_id = cpu_to_be64(sinfo.port_id);
+
+	/* List of XCFMs */
+	memcpy((u8 *) &list, data, sizeof(list));
+
+	if (list.count * sizeof(struct xcfm_record) >
+	    ((u8 *) xds_mad + sizeof(struct ib_xds_mad) - data) ||
+	    list.count > MAX_XCFM_COUNT) {
+		printk(KERN_ERR PFX "Number of XCFM records '%d' is too high '%d'\n",
+			   list.count, MAX_XCFM_COUNT);
+		goto dispatch_exit;
+	}
+
+	if (list.count && list.xcm_version != XCM_REC_VERSION) {
+		printk(KERN_ERR PFX "xcm_version '%d' mismatch, expected '%d'\n",
+		       list.xcm_version, XCM_REC_VERSION);
+		goto dispatch_exit;
+	}
+
+	xcpm_debug(KERN_INFO,
+		   "SA response: %d records (port %d) xcm_version %d\n",
+		   list.count, ib_port->port_num, list.xcm_version);
+
+	for (count = 0; count < list.count; count++) {
+		list.xcms[count].xcm_lid = cpu_to_be16(list.xcms[count].xcm_lid);
+		/* Keep the GID components in the network byte order */
+
+		xcpm_debug(KERN_INFO, "Count %d, xcm_lid %d, port_id 0x%Lx\n",
+			   count, list.xcms[count].xcm_lid,
+			   cpu_to_be64(list.xcms[count].port_id));
+	}
+
+	allocate_port_and_links(ib_port, &list);
+
+dispatch_exit:
+	return list.count;
+}
+
+static void xds_query_callback(u32 handle, void *context,
+			       int status, struct ib_xds_mad *xds_mad)
+{
+	struct ib_port_info *ib_port = (struct ib_port_info *) context;
+
+	if (status == IB_WC_SUCCESS) {
+		if (parse_and_dispatch_xcmlist(ib_port, xds_mad) == 0)
+			ib_port->fast_poll = 1;
+		else
+			ib_port->fast_poll = 0;
+	} else {
+		xcpm_debug(KERN_ERR,
+			   "Error sending query XDS MAD, status %d (port %d)\n",
+			   status, ib_port->port_num);
+		ib_port->fast_poll = 0;
+	}
+
+	if (!atomic_dec_return(&ib_port->refcount))
+		wake_up(&xcpm_wait);
+}
+
+/*
+ * Port sweeping: check all ports periodically to see if
+ * any of the links need to be reconnected
+ */
+static void port_sweep_handler(struct work_struct *work)
+{
+	struct ib_port_info *ib_port = container_of(work, struct ib_port_info,
+						    port_sweep_work.work);
+
+	/* Whether we need to query the SA */
+	if (ib_port->queried)
+		return;
+
+	ib_port->queried = 1;
+
+	ib_if_sa_query_xds(ib_port);
+}
+
+static int ib_if_port_setup(struct ib_device *device, int port_num,
+			    struct ib_pd *pd, struct ib_mr *mr,
+			    struct ib_port_info *ib_port, u32 *xds_handle)
+{
+	int ret;
+	struct xsigo_ib_query_info query_info = {
+		.device = device,
+		.port_num = port_num,
+		.mgmt_class = XSIGO_MGMT_CLASS,
+		.mgmt_class_version = XSIGO_MGMT_CLASS_VERSION,
+		.attr_id = __constant_cpu_to_be16(IB_MAD_ATTR_XCM_REQUEST),
+		.context = ib_port,
+		.callback = &xds_query_callback
+	};
+	struct ib_device_attr dev_attr;
+
+	xcpm_debug(KERN_INFO, "ib_if_port_init port %d...\n", port_num);
+
+	ib_port->device = device;
+	ib_port->port_num = port_num;
+	ib_port->pd = pd;
+	ib_port->mr = mr;
+
+	ib_port->gid = xsigo_ib_get_port_gid(device, port_num);
+
+	/* Link 'guid' stored in host byte order like all other fields */
+	ib_port->guid = be64_to_cpu(ib_port->gid.global.interface_id);
+	ib_port->lid = xsigo_ib_get_port_lid(device, port_num);
+
+	ret = ib_query_device(device, &dev_attr);
+	if (ret) {
+		printk(KERN_ERR PFX "ib_query_device %s failed %d\n",
+		       device->name, ret);
+		ib_port->fw_ver = 0;
+		ib_port->hw_ver = 0;
+		ib_port->vendor_part_id = 0;
+		ret = 0;
+	} else {
+		ib_port->fw_ver = dev_attr.fw_ver;
+		ib_port->hw_ver = dev_attr.hw_ver;
+		ib_port->vendor_part_id = dev_attr.vendor_part_id;
+	}
+
+	*xds_handle = xsigo_ib_create_xds_context(&query_info);
+	if (*xds_handle == XSIGO_IB_ERROR_HANDLE) {
+		printk(KERN_ERR PFX "xsigo_ib_create_xds_context failed on %s port %d\n",
+		       device->name, port_num);
+		ret = -EINVAL;
+		goto port_setup_exit;
+	}
+
+port_setup_exit:
+	return ret;
+}
+
+/* XDS query implementation */
+void service_rec_callback(int status, struct ib_sa_service_rec *resp,
+			  void *context)
+{
+	struct ib_port_info *ib_port = (struct ib_port_info *) context;
+
+	xcpm_debug(KERN_INFO, "Service rec callback, resp: %p, status: %d\n",
+		   resp, status);
+
+	if (!resp || status) {
+		xcpm_debug(KERN_WARNING, "Error %d during SA XDS query\n", status);
+		ib_port->fast_poll = 0;
+	} else {
+		ib_port->xds_dlid = cpu_to_be16(resp->data16[0]);
+
+		xcpm_debug(KERN_INFO, "XDS lid %d\n", ib_port->xds_dlid);
+
+		ib_if_query_xds(ib_port);
+	}
+
+	schedule_port_sweep(ib_port, ib_port->fast_poll);
+
+	if (!atomic_dec_return(&ib_port->refcount))
+		wake_up(&xcpm_wait);
+}
+
+int ib_if_sa_query_xds(struct ib_port_info *ib_port)
+{
+	struct ib_sa_service_rec service_rec;
+	struct ib_sa_query *query;
+	struct ib_port_attr port_attr;
+	int ret;
+
+	xcpm_debug(KERN_INFO, "SA XDS query on port %d\n", ib_port->port_num);
+
+	atomic_inc(&ib_port->refcount);
+
+	port_attr = xsigo_ib_get_port_attr(ib_port->device, ib_port->port_num);
+	if (port_attr.state != IB_PORT_ACTIVE) {
+		xcpm_debug(KERN_WARNING, "Port %d not active\n",
+			   ib_port->port_num);
+		ret = -EAGAIN;
+		goto xds_query_exit;
+	}
+
+	memset(&service_rec, 0, sizeof(service_rec));
+	strcpy(service_rec.name, XSIGO_XDS_STRING);
+
+	ret = ib_sa_service_rec_query(&sa_client, ib_port->device, ib_port->port_num,
+				      IB_MGMT_METHOD_GET, &service_rec,
+				      IB_SA_SERVICE_REC_SERVICE_NAME, 1000,
+				      GFP_ATOMIC, &service_rec_callback,
+				      ib_port, &query);
+
+	xcpm_debug(KERN_INFO, "ib_sa_service_rec_query, return value: %d\n", ret);
+
+xds_query_exit:
+	if (ret) {
+		if (!atomic_dec_return(&ib_port->refcount))
+			wake_up(&xcpm_wait);
+		schedule_port_sweep(ib_port, ib_port->fast_poll);
+	}
+
+	return ret;
+}
+
+/*
+ * Initialize the link: the IB specific part:
+ * setup ib_link_info: queue pair, CQ
+ * Initialize the 'ib_port_info' structure
+ */
+int ib_if_port_init(struct ib_device *device, int port_num, struct ib_pd *pd,
+		    struct ib_mr *mr, struct ib_port_info *ib_port,
+		    u32 *xds_handle)
+{
+	ib_port->port_down = 0;
+
+	INIT_DELAYED_WORK(&ib_port->port_sweep_work, &port_sweep_handler);
+
+	atomic_set(&ib_port->refcount, 0);
+
+	ib_port->fast_poll = 0;
+
+	return ib_if_port_setup(device, port_num, pd, mr, ib_port, xds_handle);
+}
+
+void ib_if_port_exit(struct ib_port_info *ib_port)
+{
+	xcpm_debug(KERN_INFO, "ib_if_port_exit...\n");
+
+	ib_port->port_down = 1;
+
+	/* Wait if we are in the init stage */
+	if (!wait_event_timeout(xcpm_wait,
+				!atomic_read(&ib_port->refcount), 10 * HZ))
+		xcpm_debug(KERN_WARNING,
+			   "Warning: Timed out waiting for the reference count\n");
+
+	if (ib_port->xds_handle != XSIGO_IB_ERROR_HANDLE) {
+		xsigo_ib_delete_xds_context(ib_port->xds_handle);
+		ib_port->xds_handle = XSIGO_IB_ERROR_HANDLE;
+	}
+
+	/* No more port sweeping */
+	cancel_delayed_work(&ib_port->port_sweep_work);
+	flush_workqueue(xcpm_wq);
+	cancel_delayed_work(&ib_port->port_sweep_work);
+}
+
+int ib_if_link_match(struct xcfm_record *pxcm, struct ib_link_info *iblink)
+{
+	if (pxcm == NULL || iblink == NULL) {
+		xcpm_debug(KERN_WARNING, "Null params\n");
+		return 0;
+	}
+
+	return (pxcm->xcm_lid == iblink->link_xcm.xcm_lid &&
+		pxcm->port_id == iblink->link_xcm.port_id);
+}
+
+/*
+ * Initialize the details of the logical link
+ * Initialize the 'ib_link_info' structure
+ */
+void ib_if_link_init(int link_index, struct xcfm_record *pxcm,
+		     struct ib_port_info *pport, struct ib_link_info *iblink)
+{
+	/* Assign HCA and port and all other parameters of the XCM */
+	iblink->link_xcm = *pxcm;
+
+	/* The port on which the link exists */
+	iblink->port = pport;
+
+	iblink->link_index = link_index;
+	iblink->handle = XSIGO_IB_ERROR_HANDLE;
+	iblink->connected = 0;
+}
+
+/* Free any buffers for sends that did not complete */
+static void ib_if_free_posted_bufs(u32 handle)
+{
+	int count;
+
+	if (handle == XSIGO_IB_ERROR_HANDLE) {
+		xcpm_debug(KERN_INFO, "Incorrect handle\n");
+		return;
+	}
+
+	for (count = 0; count < NUM_ELEMENTS; count++) {
+		if (xsigo_ib_send_buf_posted(handle, count)) {
+			void *buf = xsigo_ib_get_send_buf_address(handle, count);
+
+			xsigo_ib_unmap_send_buf(handle, count);
+			xcpm_debug(KERN_WARNING,
+				   "Freeing posted buffer: handle %d, index %d\n",
+				   handle, count);
+
+			if (buf) {
+				if (xsmp_is_local_msg(buf))
+					kmem_cache_free(xsmp_cachep, buf);
+				else
+					kfree(buf);
+			}
+		}
+	}
+}
+
+void ib_if_link_exit(struct ib_link_info *iblink)
+{
+	int time_rem;
+
+	xcpm_debug(KERN_INFO, "enter (handle %d)...\n", iblink->handle);
+
+	if (iblink->handle != XSIGO_IB_ERROR_HANDLE) {
+		iblink->connected = 0;
+
+		time_rem = wait_event_timeout(xcpm_wait,
+					      !atomic_read(&msg_refcount),
+					      10 * HZ);
+		if (!time_rem)
+			xcpm_debug(KERN_WARNING,
+				   "Timed out waiting for the reference count, "
+				   "value %d\n", atomic_read(&msg_refcount));
+
+		ib_if_free_posted_bufs(iblink->handle);
+
+		xsigo_ib_disconnect(iblink->handle);
+		iblink->handle = XSIGO_IB_ERROR_HANDLE;
+	}
+
+	xcpm_debug(KERN_INFO, "exit...\n");
+}
+
+static void ib_if_connect_callback(u32 handle, void *context,
+				   enum xsigo_ib_connect_status status)
+{
+	struct ib_link_info *iblink = (struct ib_link_info *) context;
+	int link_index;
+	struct ib_port_info *ib_port;
+
+	xcpm_debug(KERN_INFO, "Connection status: %d\n", status);
+
+	if (!iblink) {
+		xcpm_debug(KERN_WARNING, "null context\n");
+		goto callback_exit;
+	}
+
+	ib_port = iblink->port;
+	if (!ib_port) {
+		xcpm_debug(KERN_WARNING, "Uninitialized link\n");
+		goto callback_exit;
+	}
+
+	link_index = iblink->link_index;
+
+	if (status != XSIGO_IB_SUCCESS) {
+		iblink->connected = 0;
+		xcpm_debug(KERN_WARNING, "Link %d not connected (status 0x%x)\n",
+			   link_index, status);
+
+		/*
+		 * We want to time out on the link if the other side
+		 * disconnects and cleanup the service resources also
+		 */
+		if (status != XSIGO_IB_DISCONNECT_RECEIVED)
+			bring_down_link(link_index);
+
+		goto callback_end;
+	}
+
+	xcpm_debug(KERN_INFO, "Link %d: IB connection established, "
+		   "starting XSMP connection sequence...\n", link_index);
+	iblink->connected = 1;
+	startup_link(link_index, ib_port->fw_ver, ib_port->hw_ver,
+		     ib_port->vendor_part_id);
+
+callback_end:
+	atomic_dec(&iblink->port->refcount);
+	wake_up(&xcpm_wait);
+callback_exit:
+	return;
+}
+
+/* Connect to the XCM and update the corresponding link_info structure */
+int ib_if_link_connect(int link_index, struct ib_link_info *iblink)
+{
+	u32 handle = XSIGO_IB_ERROR_HANDLE;
+	u16 port_lid;
+	int ret;
+	struct xsigo_ib_connect_info connect_info;
+	union ib_gid gid;
+
+	memset(&connect_info, 0, sizeof(connect_info));
+
+	connect_info.device = iblink->port->device;
+	connect_info.port_num = iblink->port->port_num;
+	connect_info.num_recv_elements = NUM_ELEMENTS;
+	connect_info.num_send_elements = NUM_ELEMENTS;
+	connect_info.recv_buf_size = MAX_BUF_SIZE;
+	connect_info.send_handler = &send_comp_handler;
+	connect_info.recv_handler = &recv_comp_handler;
+	connect_info.private_data = NULL;
+	connect_info.private_data_len = 0;
+	connect_info.pd = iblink->port->pd;
+	connect_info.mr = iblink->port->mr;
+	connect_info.callback = &ib_if_connect_callback;
+	connect_info.context = (void *) iblink;
+	connect_info.no_rdma = 1;
+
+	connect_info.dgid.global.interface_id = iblink->link_xcm.port_id;
+	connect_info.sgid = iblink->port->gid;
+	connect_info.dlid = cpu_to_be16(iblink->link_xcm.xcm_lid);
+	port_lid = iblink->port->lid;
+	connect_info.slid = cpu_to_be16(port_lid);
+
+	ret = ib_get_cached_gid(iblink->port->device, iblink->port->port_num,
+				0, &gid);
+	if (ret)
+		connect_info.dgid.global.subnet_prefix = cpu_to_be64(DEFAULT_SUBNET_PREFIX);
+	else
+		connect_info.dgid.global.subnet_prefix = gid.global.subnet_prefix;
+
+	connect_info.service_id = cpu_to_be64(SERVICE_ID);
+
+	xcpm_debug(KERN_INFO, "GIDs: local <0x%Lx 0x%Lx>, "
+		   "remote <0x%Lx 0x%Lx> service_id <0x%llx>\n",
+		   be64_to_cpu(connect_info.sgid.global.subnet_prefix),
+		   be64_to_cpu(connect_info.sgid.global.interface_id),
+		   be64_to_cpu(connect_info.dgid.global.subnet_prefix),
+		   be64_to_cpu(connect_info.dgid.global.interface_id),
+		   be64_to_cpu(connect_info.service_id));
+
+	xcpm_debug(KERN_INFO, "LIDs: local <0x%x>, remote <0x%x>\n",
+		   be16_to_cpu(connect_info.slid),
+		   be16_to_cpu(connect_info.dlid));
+
+	atomic_inc(&iblink->port->refcount);
+
+	handle = xsigo_ib_connect(&connect_info);
+	iblink->handle = handle;
+	if (handle == XSIGO_IB_ERROR_HANDLE) {
+		xcpm_debug(KERN_ERR,
+			   "Error getting a handle for link %d\n",
+			   link_index);
+		if (!atomic_dec_return(&iblink->port->refcount))
+			wake_up(&xcpm_wait);
+		ret = -EINVAL;
+	} else {
+		xcpm_debug(KERN_INFO,
+			   "Retrieved a handle <0x%x> for link %d\n",
+			   handle, link_index);
+
+		ret = 0;
+	}
+
+	return ret;
+}
+
+struct ib_cq *ib_if_get_recv_cq(struct ib_link_info *link)
+{
+	if (!link->connected)
+		return NULL;
+
+	return xsigo_ib_get_recv_cq(link->handle);
+}
+
+/* Receive message handling */
+
+/*
+ * Completion queue handler for all queue pairs for all links
+ * 'cq_context' is the link number on which the message came on
+ * 'cq' is the completion queue associated with the link
+ */
+void recv_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+	int handle = (unsigned long) cq_context;
+	struct ib_link_info *ib_link = xsigo_ib_get_handle_context(handle);
+	int link;
+
+	if (!ib_link)
+		return;
+
+	link = ib_link->link_index;
+
+	/* Schedule 'work' to handle the events if the link is up */
+	if (xcpm->links[link].link_state != LINK_DOWN &&
+	    xcpm->links[link].link_state != LINK_DEAD &&
+	    !atomic_read(&xcpm->xcpm_down))
+		queue_work(xcpm_wq, &xcpm->links[link].msg_dispatch_work);
+}
+
+/* Actually handle the messages received from the XCM */
+void ib_if_recv_comp_handler(struct ib_link_info *iblink, struct ib_cq *cq)
+{
+	struct ib_wc *wcp = kmem_cache_alloc(ib_if_cachep, GFP_ATOMIC);
+	u32 conn_handle;
+	int count, num_completions;
+
+	if (!wcp) {
+		printk(KERN_ERR PFX "%s cache allocation failed\n", __FUNCTION__);
+		return;
+	}
+
+	atomic_inc(&msg_refcount);
+
+	if (!iblink) {
+		xcpm_debug(KERN_WARNING, "Warning: iblink is null\n");
+		goto recv_exit;
+	}
+
+	if (!cq) {
+		xcpm_debug(KERN_WARNING, "Null CQ, probably a deleted handle\n");
+		goto recv_exit;
+	}
+
+	/* Check the link first before handling messages */
+	if (!iblink->connected) {
+		xcpm_debug(KERN_WARNING,
+			   "Message received for a non-existent link (probably "
+			   "a phantom CQ), ignoring...\n");
+		goto recv_exit;
+	}
+
+	conn_handle = iblink->handle;
+
+	num_completions = xsigo_ib_get_completions(conn_handle, RECV_CQ,
+						   NUM_ELEMENTS, wcp);
+
+	for (count = 0; count < num_completions; count++) {
+		int index = wcp[count].wr_id;
+		struct ib_wc *wc = &wcp[count];
+		u8 *buf;
+		int length;
+
+		if (wc->status != IB_WC_SUCCESS) {
+			if (wc->status != IB_WC_WR_FLUSH_ERR)
+				xcpm_debug(KERN_ERR,
+					   "Receive error: id %d, "
+					   "status %d (handle %d)\n",
+					   index, wc->status, conn_handle);
+			continue;
+		}
+
+		if (iblink->connected) {
+			buf = xsigo_ib_get_recv_buf_address(conn_handle, index);
+			length = wc->byte_len;
+			xsigo_ib_unmap_recv_buf(conn_handle, index);
+			process_incoming_msg(iblink->link_index, buf, length);
+		}
+
+		if (iblink->connected)
+			/*
+			 * Enqueue the receive buffer back to
+			 * get further messages
+			 */
+			xsigo_ib_post_receive(conn_handle, index);
+		else {
+			xcpm_debug(KERN_WARNING,
+				   "Handle %d disconnected, skipping buffer "
+				   "posting\n", conn_handle);
+			continue;
+		}
+	}
+
+	if (iblink->connected)
+		xsigo_ib_arm_cq(conn_handle, RECV_CQ);
+	else
+		xcpm_debug(KERN_WARNING,
+			   "Handle %d disconnected, skipping CQ arming\n",
+			   conn_handle);
+
+recv_exit:
+	atomic_dec(&msg_refcount);
+	wake_up(&xcpm_wait);
+	kmem_cache_free(ib_if_cachep, wcp);
+}
+
+/* End - handling of receive messages */
+
+/* Handling sends */
+
+/* Send message completion handler */
+static void send_comp_handler(struct ib_cq *send_cq, void *cq_context)
+{
+	struct ib_wc *wcp = kmem_cache_alloc(ib_if_cachep, GFP_ATOMIC);
+	u32 conn_handle = (u32)(unsigned long) cq_context;
+	struct ib_link_info *iblink;
+	int count = 0, num_completions;
+
+	if (!wcp) {
+		printk(KERN_ERR PFX "%s cache allocation failed\n", __FUNCTION__);
+		return;
+	}
+
+	iblink = xsigo_ib_get_handle_context(conn_handle);
+	if (!iblink) {
+		xcpm_debug(KERN_WARNING, "Warning: iblink is null\n");
+		goto send_exit;
+	}
+
+	num_completions = xsigo_ib_get_completions(conn_handle, SEND_CQ,
+						   NUM_ELEMENTS, wcp);
+
+	for (count = 0; count < num_completions; count++) {
+		int index = wcp[count].wr_id;
+		struct ib_wc *wc = &wcp[count];
+		void *buf;
+
+		if (wc->status != IB_WC_SUCCESS) {
+			if (wc->status != IB_WC_WR_FLUSH_ERR)
+				xcpm_debug(KERN_ERR,
+					   "Send error: id %d, "
+					   "status %d (handle %d)\n",
+					   index, wc->status, conn_handle);
+		}
+
+		buf = xsigo_ib_get_send_buf_address(conn_handle, index);
+		xsigo_ib_unmap_send_buf(conn_handle, index);
+
+		if (buf) {
+			if (xsmp_is_local_msg(buf))
+				kmem_cache_free(xsmp_cachep, buf);
+			else
+				kfree(buf);
+		}
+
+		atomic_dec(&msg_refcount);
+		wake_up(&xcpm_wait);
+	}
+
+	/*
+	 * Accept further notifications
+	 * This is unconditional unlike the recv completion handler
+	 */
+	xsigo_ib_arm_cq(conn_handle, SEND_CQ);
+
+send_exit:
+	kmem_cache_free(ib_if_cachep, wcp);
+}
+
+/* Send out a message on the queue pair */
+int ib_if_send_msg(struct ib_link_info *iblink, u8 *data, int length)
+{
+	int ret;
+
+	if (!iblink->connected) {
+		xcpm_debug(KERN_WARNING, "Link not connected, discarding send\n");
+		ret = -EINVAL;
+		goto ib_if_send_msg_exit;
+	}
+
+	xcpm_debug(KERN_DEBUG, "Dispatching a send (handle %d, length %d)...\n",
+		   iblink->handle, length);
+
+	atomic_inc(&msg_refcount);
+
+	/* A check should have been made for the link status before this point */
+	ret = xsigo_ib_send_msg(iblink->handle, data, length);
+
+ib_if_send_msg_exit:
+	return ret;
+}
+
+/* Memory pools */
+int alloc_ib_if_mem_pool(void)
+{
+	ib_if_cachep = kmem_cache_create("xscore_xcpm_ib_if_cache",
+					 sizeof(struct ib_wc) * NUM_ELEMENTS, 0,
+					 0, NULL);
+
+	return (ib_if_cachep == NULL) ? -1 : 0;
+}
+
+void dealloc_ib_if_mem_pool(void)
+{
+	if (ib_if_cachep)
+		kmem_cache_destroy(ib_if_cachep);
+}
diff --git a/drivers/infiniband/ulp/xsigo/xscore/ib_if.h b/drivers/infiniband/ulp/xsigo/xscore/ib_if.h
new file mode 100644
index 0000000..8a6cb56
--- /dev/null
+++ b/drivers/infiniband/ulp/xsigo/xscore/ib_if.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2006-2008 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __IB_IF_H__
+#define __IB_IF_H__
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <linux/err.h>
+#include <linux/dma-mapping.h>
+#include <linux/types.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_cm.h>
+#include <asm/byteorder.h>
+
+#include "xsigoib.h"
+#include "ib_if_xds.h"
+
+#define MAD_TIMEOUT_MS 1000
+
+#define NUM_ELEMENTS 288
+
+#define MAX_BUF_SIZE 1024
+#define XSIGO_XDS_STRING "XSIGOXDS"
+
+struct link_msg_buf {
+	void *vaddr;
+	u64 dma_addr;
+	int length;
+	int posted;
+};
+
+/* Parameters for the logical link */
+struct ib_link_info {
+	struct ib_port_info *port;	/* The port used on the HCA */
+	struct xcfm_record link_xcm;	/* Remote endpoint: the XCM */
+	u32 link_index;			/* Index referenced by the XCPM core */
+	u32 handle;			/* XsigoIB connection handle */
+
+	/*
+	 * State of the IB connection
+	 * We need this separate from the handle value because
+	 * sometimes we get messages on stale handle values
+	 */
+	int connected;
+};
+
+struct ib_port_info {
+	u64 guid;		/* Host byte order */
+	u16 lid;		/* Host byte order */
+	struct ib_device *device;	/* The HCA that the port belongs to */
+	int port_num;
+	struct ib_pd *pd;
+	struct ib_mr *mr;
+	union ib_gid gid;
+	u32 xds_handle;
+	int queried;		/* 0 => Query the XDS */
+	u64 fw_ver;
+	u32 vendor_part_id;
+	u32 hw_ver;
+	struct delayed_work port_sweep_work;
+	int used;
+	atomic_t refcount;
+	int port_down;
+	u16 xds_dlid;
+	int fast_poll;
+} __attribute__ ((packed));
+
+int ib_if_port_init(struct ib_device *device, int port_num,
+		    struct ib_pd *pd, struct ib_mr *mr,
+		    struct ib_port_info *ib_port, u32 *xds_handle);
+void ib_if_port_exit(struct ib_port_info *ib_port);
+void ib_if_link_init(int link_index, struct xcfm_record *pxcm,
+		     struct ib_port_info *pport, struct ib_link_info *plink);
+void ib_if_link_exit(struct ib_link_info *iblink);
+int ib_if_link_connect(int port_index, struct ib_link_info *iblink);
+int ib_if_post_receive(struct ib_link_info *ib_link, int id);
+int ib_if_send_msg(struct ib_link_info *ib_link, u8 *data, int length);
+int ib_if_init(struct ib_client *ibclient);
+void ib_if_exit(struct ib_client *ibclient);
+void ib_if_recv_comp_handler(struct ib_link_info *iblink, struct ib_cq *cq);
+struct ib_cq *ib_if_get_recv_cq(struct ib_link_info *link);
+int ib_if_link_match(struct xcfm_record *pxcm, struct ib_link_info *iblink);
+int alloc_ib_if_mem_pool(void);
+void dealloc_ib_if_mem_pool(void);
+int ib_if_sa_query_xds(struct ib_port_info *);
+
+#endif	/* __IB_IF_H__ */
diff --git a/drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h b/drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h
new file mode 100644
index 0000000..3baa603
--- /dev/null
+++ b/drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2006-2008 Xsigo Systems Inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __IB_IF_XDS_H__
+#define __IB_IF_XDS_H__
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+
+#define XCM_REC_VERSION 1
+#define MAX_XCFM_COUNT	8
+
+#define XSIGO_MGMT_CLASS	 0x0B
+#define XSIGO_MGMT_CLASS_VERSION 0x02
+
+#define IB_MAD_ATTR_XCM_REQUEST	 0xB002
+
+#define XSIGO_MGMT_METHOD_GET	IB_MGMT_METHOD_GET
+#define XSIGO_MGMT_METHOD_SET	IB_MGMT_METHOD_SET
+
+#define XSIGO_MAX_HOSTNAME 65
+
+struct xcfm_record {
+	u64 port_id;
+	u16 xcm_lid;     /* lid of the XCM port */
+	u8 reserved[10];
+} __attribute__ ((packed));
+
+struct xcm_list {
+	u8 count;
+	u8 xcm_version;
+	u8 reserved[2];
+	struct xcfm_record xcms[MAX_XCFM_COUNT];
+};
+
+struct server_info {
+	u32 vm_id;
+	u64 port_id;
+} __attribute__ ((packed));
+
+struct xds_request {
+	struct server_info server_record;
+	char hostname[XSIGO_MAX_HOSTNAME];
+} __attribute__ ((packed));
+
+struct ib_xds_mad {
+	struct ib_mad_hdr mad_hdr;
+	u8 reserved[IB_MGMT_SA_HDR - IB_MGMT_MAD_HDR];
+	u8 data[IB_MGMT_SA_DATA];
+} __attribute__ ((packed));
+
+#endif /*__IB_IF_XDS_H__ */
-- 
1.5.2






More information about the ewg mailing list