[ewg] [PATCH 4/9] [RFC] Add support for Xsigo logical "links"
Hal Rosenstock
hrosenstock at xsigo.com
Fri Apr 4 06:18:48 PDT 2008
This patch adds support for Xsigo logical "links". This includes
the Xsigo Directory Service (XDS). In terms of the host,
XDS contains a list of XCMs (Xsigo Configuration Managers)
assigned to it for this IB port. XDS is first located
via standard SA ServiceRecord query.
Signed-off-by: Hal Rosenstock <hal at xsigo.com>
---
drivers/infiniband/ulp/xsigo/xscore/ib_if.c | 837 +++++++++++++++++++++++
drivers/infiniband/ulp/xsigo/xscore/ib_if.h | 119 ++++
drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h | 82 +++
3 files changed, 1038 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/ulp/xsigo/xscore/ib_if.c
create mode 100644 drivers/infiniband/ulp/xsigo/xscore/ib_if.h
create mode 100644 drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h
diff --git a/drivers/infiniband/ulp/xsigo/xscore/ib_if.c b/drivers/infiniband/ulp/xsigo/xscore/ib_if.c
new file mode 100644
index 0000000..52f1c13
--- /dev/null
+++ b/drivers/infiniband/ulp/xsigo/xscore/ib_if.c
@@ -0,0 +1,837 @@
+/*
+ * Copyright (c) 2006-2008 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <rdma/ib_cache.h>
+
+#include "xs_core.h"
+#include "ib_if.h"
+#include "xcpm_export.h"
+#include "xcpm_priv.h"
+#include "xsmp.h"
+
+static struct kmem_cache *ib_if_cachep = NULL;
+extern struct kmem_cache *xsmp_cachep;
+
+/* Used to track outstanding reads and writes at the time of a disconnect */
+static atomic_t msg_refcount;
+
+extern struct workqueue_struct *xcpm_wq;
+extern struct xcpm_info *xcpm;
+
+static int ib_if_port_setup(struct ib_device *device, int port_num,
+ struct ib_pd *pd, struct ib_mr *mr,
+ struct ib_port_info *ib_port, u32 *xds_handle);
+static void xds_query_callback(u32 handle, void *context,
+ int status, struct ib_xds_mad *xds_mad);
+static void recv_comp_handler(struct ib_cq *cq, void *cq_context);
+static void send_comp_handler(struct ib_cq *cq, void *cq_context);
+
+static struct ib_sa_client sa_client;
+
+int ib_if_init(struct ib_client *ibclient)
+{
+ int ret;
+
+ ib_sa_register_client(&sa_client);
+
+ ret = ib_register_client(ibclient);
+ if (!ret)
+ atomic_set(&msg_refcount, 0);
+
+ return ret;
+}
+
+void ib_if_exit(struct ib_client *ibclient)
+{
+ ib_unregister_client(ibclient);
+ ib_sa_unregister_client(&sa_client);
+}
+
+/*
+ * Post a query to the XDS to obtain a list of XCMs assigned to the
+ * server on this port
+ */
+int ib_if_query_xds(struct ib_port_info *ib_port)
+{
+ struct ib_port_attr port_attr;
+ struct xds_request request;
+ int ret = 0;
+ int xds_dlid;
+
+ atomic_inc(&ib_port->refcount);
+
+ if (ib_port->port_down) {
+ ret = -EAGAIN;
+ goto ib_if_query_exit2;
+ }
+
+ xcpm_debug(KERN_INFO, "Querying XDS on port %d\n", ib_port->port_num);
+
+ /* Determine the XDS lid to use for the query */
+ xds_dlid = ib_port->xds_dlid;
+ xcpm_debug(KERN_INFO, "XDS query to lid %d\n", xds_dlid);
+
+ /* We have communicated with the XDS (just about to) */
+ ib_port->queried = 1;
+
+ port_attr = xsigo_ib_get_port_attr(ib_port->device, ib_port->port_num);
+ if (port_attr.state != IB_PORT_ACTIVE) {
+ xcpm_debug(KERN_WARNING, "Port %d not active\n",
+ ib_port->port_num);
+ ret = -EAGAIN;
+ goto ib_if_query_exit2;
+ }
+
+ if (ib_port->xds_handle != XSIGO_IB_ERROR_HANDLE) {
+ xsigo_ib_delete_xds_context(ib_port->xds_handle);
+ ib_port->xds_handle = XSIGO_IB_ERROR_HANDLE;
+ }
+
+ /* Re-initialize the port, in case any parameters changed */
+ ib_if_port_setup(ib_port->device, ib_port->port_num,
+ ib_port->pd, ib_port->mr, ib_port,
+ &ib_port->xds_handle);
+
+
+ memset(&request, 0, sizeof(request));
+
+ if (ib_port->xds_handle == XSIGO_IB_ERROR_HANDLE) {
+ xcpm_debug(KERN_ERR, "Trying to query on an uninitialized "
+ "query context\n");
+ ret = -EINVAL;
+ goto ib_if_query_exit2;
+ }
+
+ request.server_record.vm_id = 0;
+ request.server_record.port_id = cpu_to_be64(ib_port->guid);
+
+ strncpy(request.hostname, init_utsname()->nodename,
+ XSIGO_MAX_HOSTNAME);
+
+ ret = xsigo_ib_query_xds(ib_port->xds_handle, (u8 *) &request,
+ sizeof(request), xds_dlid);
+ if (!ret)
+ goto ib_if_query_exit;
+
+ib_if_query_exit2:
+ if (!atomic_dec_return(&ib_port->refcount))
+ wake_up(&xcpm_wait);
+ib_if_query_exit:
+ return ret;
+}
+
+/* Parse the XCM records and send out an XCM list */
+static int parse_and_dispatch_xcmlist(struct ib_port_info *ib_port,
+ struct ib_xds_mad *xds_mad)
+{
+ u8 *data = xds_mad->data;
+ struct server_info sinfo;
+ int count;
+ struct xcm_list list;
+
+ /* Get the server info record */
+ memcpy((u8 *) &sinfo, data, sizeof(sinfo));
+ data += sizeof(sinfo);
+ sinfo.port_id = cpu_to_be64(sinfo.port_id);
+
+ /* List of XCFMs */
+ memcpy((u8 *) &list, data, sizeof(list));
+
+ if (list.count * sizeof(struct xcfm_record) >
+ ((u8 *) xds_mad + sizeof(struct ib_xds_mad) - data) ||
+ list.count > MAX_XCFM_COUNT) {
+ printk(KERN_ERR PFX "Number of XCFM records '%d' is too high '%d'\n",
+ list.count, MAX_XCFM_COUNT);
+ goto dispatch_exit;
+ }
+
+ if (list.count && list.xcm_version != XCM_REC_VERSION) {
+ printk(KERN_ERR PFX "xcm_version '%d' mismatch, expected '%d'\n",
+ list.xcm_version, XCM_REC_VERSION);
+ goto dispatch_exit;
+ }
+
+ xcpm_debug(KERN_INFO,
+ "SA response: %d records (port %d) xcm_version %d\n",
+ list.count, ib_port->port_num, list.xcm_version);
+
+ for (count = 0; count < list.count; count++) {
+ list.xcms[count].xcm_lid = cpu_to_be16(list.xcms[count].xcm_lid);
+ /* Keep the GID components in the network byte order */
+
+ xcpm_debug(KERN_INFO, "Count %d, xcm_lid %d, port_id 0x%Lx\n",
+ count, list.xcms[count].xcm_lid,
+ cpu_to_be64(list.xcms[count].port_id));
+ }
+
+ allocate_port_and_links(ib_port, &list);
+
+dispatch_exit:
+ return list.count;
+}
+
+static void xds_query_callback(u32 handle, void *context,
+ int status, struct ib_xds_mad *xds_mad)
+{
+ struct ib_port_info *ib_port = (struct ib_port_info *) context;
+
+ if (status == IB_WC_SUCCESS) {
+ if (parse_and_dispatch_xcmlist(ib_port, xds_mad) == 0)
+ ib_port->fast_poll = 1;
+ else
+ ib_port->fast_poll = 0;
+ } else {
+ xcpm_debug(KERN_ERR,
+ "Error sending query XDS MAD, status %d (port %d)\n",
+ status, ib_port->port_num);
+ ib_port->fast_poll = 0;
+ }
+
+ if (!atomic_dec_return(&ib_port->refcount))
+ wake_up(&xcpm_wait);
+}
+
+/*
+ * Port sweeping: check all ports periodically to see if
+ * any of the links need to be reconnected
+ */
+static void port_sweep_handler(struct work_struct *work)
+{
+ struct ib_port_info *ib_port = container_of(work, struct ib_port_info,
+ port_sweep_work.work);
+
+ /* Whether we need to query the SA */
+ if (ib_port->queried)
+ return;
+
+ ib_port->queried = 1;
+
+ ib_if_sa_query_xds(ib_port);
+}
+
+static int ib_if_port_setup(struct ib_device *device, int port_num,
+ struct ib_pd *pd, struct ib_mr *mr,
+ struct ib_port_info *ib_port, u32 *xds_handle)
+{
+ int ret;
+ struct xsigo_ib_query_info query_info = {
+ .device = device,
+ .port_num = port_num,
+ .mgmt_class = XSIGO_MGMT_CLASS,
+ .mgmt_class_version = XSIGO_MGMT_CLASS_VERSION,
+ .attr_id = __constant_cpu_to_be16(IB_MAD_ATTR_XCM_REQUEST),
+ .context = ib_port,
+ .callback = &xds_query_callback
+ };
+ struct ib_device_attr dev_attr;
+
+ xcpm_debug(KERN_INFO, "ib_if_port_init port %d...\n", port_num);
+
+ ib_port->device = device;
+ ib_port->port_num = port_num;
+ ib_port->pd = pd;
+ ib_port->mr = mr;
+
+ ib_port->gid = xsigo_ib_get_port_gid(device, port_num);
+
+ /* Link 'guid' stored in host byte order like all other fields */
+ ib_port->guid = be64_to_cpu(ib_port->gid.global.interface_id);
+ ib_port->lid = xsigo_ib_get_port_lid(device, port_num);
+
+ ret = ib_query_device(device, &dev_attr);
+ if (ret) {
+ printk(KERN_ERR PFX "ib_query_device %s failed %d\n",
+ device->name, ret);
+ ib_port->fw_ver = 0;
+ ib_port->hw_ver = 0;
+ ib_port->vendor_part_id = 0;
+ ret = 0;
+ } else {
+ ib_port->fw_ver = dev_attr.fw_ver;
+ ib_port->hw_ver = dev_attr.hw_ver;
+ ib_port->vendor_part_id = dev_attr.vendor_part_id;
+ }
+
+ *xds_handle = xsigo_ib_create_xds_context(&query_info);
+ if (*xds_handle == XSIGO_IB_ERROR_HANDLE) {
+ printk(KERN_ERR PFX "xsigo_ib_create_xds_context failed on %s port %d\n",
+ device->name, port_num);
+ ret = -EINVAL;
+ goto port_setup_exit;
+ }
+
+port_setup_exit:
+ return ret;
+}
+
+/* XDS query implementation */
+void service_rec_callback(int status, struct ib_sa_service_rec *resp,
+ void *context)
+{
+ struct ib_port_info *ib_port = (struct ib_port_info *) context;
+
+ xcpm_debug(KERN_INFO, "Service rec callback, resp: %p, status: %d\n",
+ resp, status);
+
+ if (!resp || status) {
+ xcpm_debug(KERN_WARNING, "Error %d during SA XDS query\n", status);
+ ib_port->fast_poll = 0;
+ } else {
+ ib_port->xds_dlid = cpu_to_be16(resp->data16[0]);
+
+ xcpm_debug(KERN_INFO, "XDS lid %d\n", ib_port->xds_dlid);
+
+ ib_if_query_xds(ib_port);
+ }
+
+ schedule_port_sweep(ib_port, ib_port->fast_poll);
+
+ if (!atomic_dec_return(&ib_port->refcount))
+ wake_up(&xcpm_wait);
+}
+
+int ib_if_sa_query_xds(struct ib_port_info *ib_port)
+{
+ struct ib_sa_service_rec service_rec;
+ struct ib_sa_query *query;
+ struct ib_port_attr port_attr;
+ int ret;
+
+ xcpm_debug(KERN_INFO, "SA XDS query on port %d\n", ib_port->port_num);
+
+ atomic_inc(&ib_port->refcount);
+
+ port_attr = xsigo_ib_get_port_attr(ib_port->device, ib_port->port_num);
+ if (port_attr.state != IB_PORT_ACTIVE) {
+ xcpm_debug(KERN_WARNING, "Port %d not active\n",
+ ib_port->port_num);
+ ret = -EAGAIN;
+ goto xds_query_exit;
+ }
+
+ memset(&service_rec, 0, sizeof(service_rec));
+ strcpy(service_rec.name, XSIGO_XDS_STRING);
+
+ ret = ib_sa_service_rec_query(&sa_client, ib_port->device, ib_port->port_num,
+ IB_MGMT_METHOD_GET, &service_rec,
+ IB_SA_SERVICE_REC_SERVICE_NAME, 1000,
+ GFP_ATOMIC, &service_rec_callback,
+ ib_port, &query);
+
+ xcpm_debug(KERN_INFO, "ib_sa_service_rec_query, return value: %d\n", ret);
+
+xds_query_exit:
+ if (ret) {
+ if (!atomic_dec_return(&ib_port->refcount))
+ wake_up(&xcpm_wait);
+ schedule_port_sweep(ib_port, ib_port->fast_poll);
+ }
+
+ return ret;
+}
+
+/*
+ * Initialize the link: the IB specific part:
+ * setup ib_link_info: queue pair, CQ
+ * Initialize the 'ib_port_info' structure
+ */
+int ib_if_port_init(struct ib_device *device, int port_num, struct ib_pd *pd,
+ struct ib_mr *mr, struct ib_port_info *ib_port,
+ u32 *xds_handle)
+{
+ ib_port->port_down = 0;
+
+ INIT_DELAYED_WORK(&ib_port->port_sweep_work, &port_sweep_handler);
+
+ atomic_set(&ib_port->refcount, 0);
+
+ ib_port->fast_poll = 0;
+
+ return ib_if_port_setup(device, port_num, pd, mr, ib_port, xds_handle);
+}
+
+void ib_if_port_exit(struct ib_port_info *ib_port)
+{
+ xcpm_debug(KERN_INFO, "ib_if_port_exit...\n");
+
+ ib_port->port_down = 1;
+
+ /* Wait if we are in the init stage */
+ if (!wait_event_timeout(xcpm_wait,
+ !atomic_read(&ib_port->refcount), 10 * HZ))
+ xcpm_debug(KERN_WARNING,
+ "Warning: Timed out waiting for the reference count\n");
+
+ if (ib_port->xds_handle != XSIGO_IB_ERROR_HANDLE) {
+ xsigo_ib_delete_xds_context(ib_port->xds_handle);
+ ib_port->xds_handle = XSIGO_IB_ERROR_HANDLE;
+ }
+
+ /* No more port sweeping */
+ cancel_delayed_work(&ib_port->port_sweep_work);
+ flush_workqueue(xcpm_wq);
+ cancel_delayed_work(&ib_port->port_sweep_work);
+}
+
+int ib_if_link_match(struct xcfm_record *pxcm, struct ib_link_info *iblink)
+{
+ if (pxcm == NULL || iblink == NULL) {
+ xcpm_debug(KERN_WARNING, "Null params\n");
+ return 0;
+ }
+
+ return (pxcm->xcm_lid == iblink->link_xcm.xcm_lid &&
+ pxcm->port_id == iblink->link_xcm.port_id);
+}
+
+/*
+ * Initialize the details of the logical link
+ * Initialize the 'ib_link_info' structure
+ */
+void ib_if_link_init(int link_index, struct xcfm_record *pxcm,
+ struct ib_port_info *pport, struct ib_link_info *iblink)
+{
+ /* Assign HCA and port and all other parameters of the XCM */
+ iblink->link_xcm = *pxcm;
+
+ /* The port on which the link exists */
+ iblink->port = pport;
+
+ iblink->link_index = link_index;
+ iblink->handle = XSIGO_IB_ERROR_HANDLE;
+ iblink->connected = 0;
+}
+
+/* Free any buffers for sends that did not complete */
+static void ib_if_free_posted_bufs(u32 handle)
+{
+ int count;
+
+ if (handle == XSIGO_IB_ERROR_HANDLE) {
+ xcpm_debug(KERN_INFO, "Incorrect handle\n");
+ return;
+ }
+
+ for (count = 0; count < NUM_ELEMENTS; count++) {
+ if (xsigo_ib_send_buf_posted(handle, count)) {
+ void *buf = xsigo_ib_get_send_buf_address(handle, count);
+
+ xsigo_ib_unmap_send_buf(handle, count);
+ xcpm_debug(KERN_WARNING,
+ "Freeing posted buffer: handle %d, index %d\n",
+ handle, count);
+
+ if (buf) {
+ if (xsmp_is_local_msg(buf))
+ kmem_cache_free(xsmp_cachep, buf);
+ else
+ kfree(buf);
+ }
+ }
+ }
+}
+
+void ib_if_link_exit(struct ib_link_info *iblink)
+{
+ int time_rem;
+
+ xcpm_debug(KERN_INFO, "enter (handle %d)...\n", iblink->handle);
+
+ if (iblink->handle != XSIGO_IB_ERROR_HANDLE) {
+ iblink->connected = 0;
+
+ time_rem = wait_event_timeout(xcpm_wait,
+ !atomic_read(&msg_refcount),
+ 10 * HZ);
+ if (!time_rem)
+ xcpm_debug(KERN_WARNING,
+ "Timed out waiting for the reference count, "
+ "value %d\n", atomic_read(&msg_refcount));
+
+ ib_if_free_posted_bufs(iblink->handle);
+
+ xsigo_ib_disconnect(iblink->handle);
+ iblink->handle = XSIGO_IB_ERROR_HANDLE;
+ }
+
+ xcpm_debug(KERN_INFO, "exit...\n");
+}
+
+static void ib_if_connect_callback(u32 handle, void *context,
+ enum xsigo_ib_connect_status status)
+{
+ struct ib_link_info *iblink = (struct ib_link_info *) context;
+ int link_index;
+ struct ib_port_info *ib_port;
+
+ xcpm_debug(KERN_INFO, "Connection status: %d\n", status);
+
+ if (!iblink) {
+ xcpm_debug(KERN_WARNING, "null context\n");
+ goto callback_exit;
+ }
+
+ ib_port = iblink->port;
+ if (!ib_port) {
+ xcpm_debug(KERN_WARNING, "Uninitialized link\n");
+ goto callback_exit;
+ }
+
+ link_index = iblink->link_index;
+
+ if (status != XSIGO_IB_SUCCESS) {
+ iblink->connected = 0;
+ xcpm_debug(KERN_WARNING, "Link %d not connected (status 0x%x)\n",
+ link_index, status);
+
+ /*
+ * We want to time out on the link if the other side
+ * disconnects and cleanup the service resources also
+ */
+ if (status != XSIGO_IB_DISCONNECT_RECEIVED)
+ bring_down_link(link_index);
+
+ goto callback_end;
+ }
+
+ xcpm_debug(KERN_INFO, "Link %d: IB connection established, "
+ "starting XSMP connection sequence...\n", link_index);
+ iblink->connected = 1;
+ startup_link(link_index, ib_port->fw_ver, ib_port->hw_ver,
+ ib_port->vendor_part_id);
+
+callback_end:
+ atomic_dec(&iblink->port->refcount);
+ wake_up(&xcpm_wait);
+callback_exit:
+ return;
+}
+
+/* Connect to the XCM and update the corresponding link_info structure */
+int ib_if_link_connect(int link_index, struct ib_link_info *iblink)
+{
+ u32 handle = XSIGO_IB_ERROR_HANDLE;
+ u16 port_lid;
+ int ret;
+ struct xsigo_ib_connect_info connect_info;
+ union ib_gid gid;
+
+ memset(&connect_info, 0, sizeof(connect_info));
+
+ connect_info.device = iblink->port->device;
+ connect_info.port_num = iblink->port->port_num;
+ connect_info.num_recv_elements = NUM_ELEMENTS;
+ connect_info.num_send_elements = NUM_ELEMENTS;
+ connect_info.recv_buf_size = MAX_BUF_SIZE;
+ connect_info.send_handler = &send_comp_handler;
+ connect_info.recv_handler = &recv_comp_handler;
+ connect_info.private_data = NULL;
+ connect_info.private_data_len = 0;
+ connect_info.pd = iblink->port->pd;
+ connect_info.mr = iblink->port->mr;
+ connect_info.callback = &ib_if_connect_callback;
+ connect_info.context = (void *) iblink;
+ connect_info.no_rdma = 1;
+
+ connect_info.dgid.global.interface_id = iblink->link_xcm.port_id;
+ connect_info.sgid = iblink->port->gid;
+ connect_info.dlid = cpu_to_be16(iblink->link_xcm.xcm_lid);
+ port_lid = iblink->port->lid;
+ connect_info.slid = cpu_to_be16(port_lid);
+
+ ret = ib_get_cached_gid(iblink->port->device, iblink->port->port_num,
+ 0, &gid);
+ if (ret)
+ connect_info.dgid.global.subnet_prefix = cpu_to_be64(DEFAULT_SUBNET_PREFIX);
+ else
+ connect_info.dgid.global.subnet_prefix = gid.global.subnet_prefix;
+
+ connect_info.service_id = cpu_to_be64(SERVICE_ID);
+
+ xcpm_debug(KERN_INFO, "GIDs: local <0x%Lx 0x%Lx>, "
+ "remote <0x%Lx 0x%Lx> service_id <0x%llx>\n",
+ be64_to_cpu(connect_info.sgid.global.subnet_prefix),
+ be64_to_cpu(connect_info.sgid.global.interface_id),
+ be64_to_cpu(connect_info.dgid.global.subnet_prefix),
+ be64_to_cpu(connect_info.dgid.global.interface_id),
+ be64_to_cpu(connect_info.service_id));
+
+ xcpm_debug(KERN_INFO, "LIDs: local <0x%x>, remote <0x%x>\n",
+ be16_to_cpu(connect_info.slid),
+ be16_to_cpu(connect_info.dlid));
+
+ atomic_inc(&iblink->port->refcount);
+
+ handle = xsigo_ib_connect(&connect_info);
+ iblink->handle = handle;
+ if (handle == XSIGO_IB_ERROR_HANDLE) {
+ xcpm_debug(KERN_ERR,
+ "Error getting a handle for link %d\n",
+ link_index);
+ if (!atomic_dec_return(&iblink->port->refcount))
+ wake_up(&xcpm_wait);
+ ret = -EINVAL;
+ } else {
+ xcpm_debug(KERN_INFO,
+ "Retrieved a handle <0x%x> for link %d\n",
+ handle, link_index);
+
+ ret = 0;
+ }
+
+ return ret;
+}
+
+struct ib_cq *ib_if_get_recv_cq(struct ib_link_info *link)
+{
+ if (!link->connected)
+ return NULL;
+
+ return xsigo_ib_get_recv_cq(link->handle);
+}
+
+/* Receive message handling */
+
+/*
+ * Completion queue handler for all queue pairs for all links
+ * 'cq_context' is the link number on which the message came on
+ * 'cq' is the completion queue associated with the link
+ */
+void recv_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+ int handle = (unsigned long) cq_context;
+ struct ib_link_info *ib_link = xsigo_ib_get_handle_context(handle);
+ int link;
+
+ if (!ib_link)
+ return;
+
+ link = ib_link->link_index;
+
+ /* Schedule 'work' to handle the events if the link is up */
+ if (xcpm->links[link].link_state != LINK_DOWN &&
+ xcpm->links[link].link_state != LINK_DEAD &&
+ !atomic_read(&xcpm->xcpm_down))
+ queue_work(xcpm_wq, &xcpm->links[link].msg_dispatch_work);
+}
+
+/* Actually handle the messages received from the XCM */
+void ib_if_recv_comp_handler(struct ib_link_info *iblink, struct ib_cq *cq)
+{
+ struct ib_wc *wcp = kmem_cache_alloc(ib_if_cachep, GFP_ATOMIC);
+ u32 conn_handle;
+ int count, num_completions;
+
+ if (!wcp) {
+ printk(KERN_ERR PFX "%s cache allocation failed\n", __FUNCTION__);
+ return;
+ }
+
+ atomic_inc(&msg_refcount);
+
+ if (!iblink) {
+ xcpm_debug(KERN_WARNING, "Warning: iblink is null\n");
+ goto recv_exit;
+ }
+
+ if (!cq) {
+ xcpm_debug(KERN_WARNING, "Null CQ, probably a deleted handle\n");
+ goto recv_exit;
+ }
+
+ /* Check the link first before handling messages */
+ if (!iblink->connected) {
+ xcpm_debug(KERN_WARNING,
+ "Message received for a non-existent link (probably "
+ "a phantom CQ), ignoring...\n");
+ goto recv_exit;
+ }
+
+ conn_handle = iblink->handle;
+
+ num_completions = xsigo_ib_get_completions(conn_handle, RECV_CQ,
+ NUM_ELEMENTS, wcp);
+
+ for (count = 0; count < num_completions; count++) {
+ int index = wcp[count].wr_id;
+ struct ib_wc *wc = &wcp[count];
+ u8 *buf;
+ int length;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ xcpm_debug(KERN_ERR,
+ "Receive error: id %d, "
+ "status %d (handle %d)\n",
+ index, wc->status, conn_handle);
+ continue;
+ }
+
+ if (iblink->connected) {
+ buf = xsigo_ib_get_recv_buf_address(conn_handle, index);
+ length = wc->byte_len;
+ xsigo_ib_unmap_recv_buf(conn_handle, index);
+ process_incoming_msg(iblink->link_index, buf, length);
+ }
+
+ if (iblink->connected)
+ /*
+ * Enqueue the receive buffer back to
+ * get further messages
+ */
+ xsigo_ib_post_receive(conn_handle, index);
+ else {
+ xcpm_debug(KERN_WARNING,
+ "Handle %d disconnected, skipping buffer "
+ "posting\n", conn_handle);
+ continue;
+ }
+ }
+
+ if (iblink->connected)
+ xsigo_ib_arm_cq(conn_handle, RECV_CQ);
+ else
+ xcpm_debug(KERN_WARNING,
+ "Handle %d disconnected, skipping CQ arming\n",
+ conn_handle);
+
+recv_exit:
+ atomic_dec(&msg_refcount);
+ wake_up(&xcpm_wait);
+ kmem_cache_free(ib_if_cachep, wcp);
+}
+
+/* End - handling of receive messages */
+
+/* Handling sends */
+
+/* Send message completion handler */
+static void send_comp_handler(struct ib_cq *send_cq, void *cq_context)
+{
+ struct ib_wc *wcp = kmem_cache_alloc(ib_if_cachep, GFP_ATOMIC);
+ u32 conn_handle = (u32)(unsigned long) cq_context;
+ struct ib_link_info *iblink;
+ int count = 0, num_completions;
+
+ if (!wcp) {
+ printk(KERN_ERR PFX "%s cache allocation failed\n", __FUNCTION__);
+ return;
+ }
+
+ iblink = xsigo_ib_get_handle_context(conn_handle);
+ if (!iblink) {
+ xcpm_debug(KERN_WARNING, "Warning: iblink is null\n");
+ goto send_exit;
+ }
+
+ num_completions = xsigo_ib_get_completions(conn_handle, SEND_CQ,
+ NUM_ELEMENTS, wcp);
+
+ for (count = 0; count < num_completions; count++) {
+ int index = wcp[count].wr_id;
+ struct ib_wc *wc = &wcp[count];
+ void *buf;
+
+ if (wc->status != IB_WC_SUCCESS) {
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ xcpm_debug(KERN_ERR,
+ "Send error: id %d, "
+ "status %d (handle %d)\n",
+ index, wc->status, conn_handle);
+ }
+
+ buf = xsigo_ib_get_send_buf_address(conn_handle, index);
+ xsigo_ib_unmap_send_buf(conn_handle, index);
+
+ if (buf) {
+ if (xsmp_is_local_msg(buf))
+ kmem_cache_free(xsmp_cachep, buf);
+ else
+ kfree(buf);
+ }
+
+ atomic_dec(&msg_refcount);
+ wake_up(&xcpm_wait);
+ }
+
+ /*
+ * Accept further notifications
+ * This is unconditional unlike the recv completion handler
+ */
+ xsigo_ib_arm_cq(conn_handle, SEND_CQ);
+
+send_exit:
+ kmem_cache_free(ib_if_cachep, wcp);
+}
+
+/* Send out a message on the queue pair */
+int ib_if_send_msg(struct ib_link_info *iblink, u8 *data, int length)
+{
+ int ret;
+
+ if (!iblink->connected) {
+ xcpm_debug(KERN_WARNING, "Link not connected, discarding send\n");
+ ret = -EINVAL;
+ goto ib_if_send_msg_exit;
+ }
+
+ xcpm_debug(KERN_DEBUG, "Dispatching a send (handle %d, length %d)...\n",
+ iblink->handle, length);
+
+ atomic_inc(&msg_refcount);
+
+ /* A check should have been made for the link status before this point */
+ ret = xsigo_ib_send_msg(iblink->handle, data, length);
+
+ib_if_send_msg_exit:
+ return ret;
+}
+
+/* Memory pools */
+int alloc_ib_if_mem_pool(void)
+{
+ ib_if_cachep = kmem_cache_create("xscore_xcpm_ib_if_cache",
+ sizeof(struct ib_wc) * NUM_ELEMENTS, 0,
+ 0, NULL);
+
+ return (ib_if_cachep == NULL) ? -1 : 0;
+}
+
+void dealloc_ib_if_mem_pool(void)
+{
+ if (ib_if_cachep)
+ kmem_cache_destroy(ib_if_cachep);
+}
diff --git a/drivers/infiniband/ulp/xsigo/xscore/ib_if.h b/drivers/infiniband/ulp/xsigo/xscore/ib_if.h
new file mode 100644
index 0000000..8a6cb56
--- /dev/null
+++ b/drivers/infiniband/ulp/xsigo/xscore/ib_if.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright (c) 2006-2008 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __IB_IF_H__
+#define __IB_IF_H__
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <linux/err.h>
+#include <linux/dma-mapping.h>
+#include <linux/types.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_cm.h>
+#include <asm/byteorder.h>
+
+#include "xsigoib.h"
+#include "ib_if_xds.h"
+
+#define MAD_TIMEOUT_MS 1000
+
+#define NUM_ELEMENTS 288
+
+#define MAX_BUF_SIZE 1024
+#define XSIGO_XDS_STRING "XSIGOXDS"
+
+struct link_msg_buf {
+ void *vaddr;
+ u64 dma_addr;
+ int length;
+ int posted;
+};
+
+/* Parameters for the logical link */
+struct ib_link_info {
+ struct ib_port_info *port; /* The port used on the HCA */
+ struct xcfm_record link_xcm; /* Remote endpoint: the XCM */
+ u32 link_index; /* Index referenced by the XCPM core */
+ u32 handle; /* XsigoIB connection handle */
+
+ /*
+ * State of the IB connection
+ * We need this separate from the handle value because
+ * sometimes we get messages on stale handle values
+ */
+ int connected;
+};
+
+struct ib_port_info {
+ u64 guid; /* Host byte order */
+ u16 lid; /* Host byte order */
+ struct ib_device *device; /* The HCA that the port belongs to */
+ int port_num;
+ struct ib_pd *pd;
+ struct ib_mr *mr;
+ union ib_gid gid;
+ u32 xds_handle;
+ int queried; /* 0 => Query the XDS */
+ u64 fw_ver;
+ u32 vendor_part_id;
+ u32 hw_ver;
+ struct delayed_work port_sweep_work;
+ int used;
+ atomic_t refcount;
+ int port_down;
+ u16 xds_dlid;
+ int fast_poll;
+} __attribute__ ((packed));
+
+int ib_if_port_init(struct ib_device *device, int port_num,
+ struct ib_pd *pd, struct ib_mr *mr,
+ struct ib_port_info *ib_port, u32 *xds_handle);
+void ib_if_port_exit(struct ib_port_info *ib_port);
+void ib_if_link_init(int link_index, struct xcfm_record *pxcm,
+ struct ib_port_info *pport, struct ib_link_info *plink);
+void ib_if_link_exit(struct ib_link_info *iblink);
+int ib_if_link_connect(int port_index, struct ib_link_info *iblink);
+int ib_if_post_receive(struct ib_link_info *ib_link, int id);
+int ib_if_send_msg(struct ib_link_info *ib_link, u8 *data, int length);
+int ib_if_init(struct ib_client *ibclient);
+void ib_if_exit(struct ib_client *ibclient);
+void ib_if_recv_comp_handler(struct ib_link_info *iblink, struct ib_cq *cq);
+struct ib_cq *ib_if_get_recv_cq(struct ib_link_info *link);
+int ib_if_link_match(struct xcfm_record *pxcm, struct ib_link_info *iblink);
+int alloc_ib_if_mem_pool(void);
+void dealloc_ib_if_mem_pool(void);
+int ib_if_sa_query_xds(struct ib_port_info *);
+
+#endif /* __IB_IF_H__ */
diff --git a/drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h b/drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h
new file mode 100644
index 0000000..3baa603
--- /dev/null
+++ b/drivers/infiniband/ulp/xsigo/xscore/ib_if_xds.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2006-2008 Xsigo Systems Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef __IB_IF_XDS_H__
+#define __IB_IF_XDS_H__
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+
+#define XCM_REC_VERSION 1
+#define MAX_XCFM_COUNT 8
+
+#define XSIGO_MGMT_CLASS 0x0B
+#define XSIGO_MGMT_CLASS_VERSION 0x02
+
+#define IB_MAD_ATTR_XCM_REQUEST 0xB002
+
+#define XSIGO_MGMT_METHOD_GET IB_MGMT_METHOD_GET
+#define XSIGO_MGMT_METHOD_SET IB_MGMT_METHOD_SET
+
+#define XSIGO_MAX_HOSTNAME 65
+
+struct xcfm_record {
+ u64 port_id;
+ u16 xcm_lid; /* lid of the XCM port */
+ u8 reserved[10];
+} __attribute__ ((packed));
+
+struct xcm_list {
+ u8 count;
+ u8 xcm_version;
+ u8 reserved[2];
+ struct xcfm_record xcms[MAX_XCFM_COUNT];
+};
+
+struct server_info {
+ u32 vm_id;
+ u64 port_id;
+} __attribute__ ((packed));
+
+struct xds_request {
+ struct server_info server_record;
+ char hostname[XSIGO_MAX_HOSTNAME];
+} __attribute__ ((packed));
+
+struct ib_xds_mad {
+ struct ib_mad_hdr mad_hdr;
+ u8 reserved[IB_MGMT_SA_HDR - IB_MGMT_MAD_HDR];
+ u8 data[IB_MGMT_SA_DATA];
+} __attribute__ ((packed));
+
+#endif /*__IB_IF_XDS_H__ */
--
1.5.2
More information about the ewg
mailing list