[openib-general] [PATCH] CMA mods for iWARP
Tom Tucker
tom at opengridcomputing.com
Thu Jan 5 10:57:12 PST 2006
This patch is for CMA changes to support iWARP and is relative to the
trunk. It includes the latest ib_addr generalizations that allowed for
some simplification in the rdma_resolve_addr implementation. This patch
needs the include file patch to compile.
I tested this on 2.6.14.5 with the AMSO1100 iWARP and Volataire IB
adapters.
Please review and comment as appropriate. I would love to get this in
the trunk -- the merges are killing me.
Thanks,
Signed-off-by: Tom Tucker <tom at opengridcomputing.com>
Index: cm.c
===================================================================
--- cm.c (revision 4748)
+++ cm.c (working copy)
@@ -3261,6 +3261,9 @@
int ret;
u8 i;
+ if (device->node_type == IB_NODE_RNIC)
+ return;
+
cm_dev = kmalloc(sizeof(*cm_dev) + sizeof(*port) *
device->phys_port_cnt, GFP_KERNEL);
if (!cm_dev)
Index: iwcm.c
===================================================================
--- iwcm.c (revision 0)
+++ iwcm.c (revision 0)
@@ -0,0 +1,648 @@
+/*
+ * Copyright (c) 2004, 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2004 Topspin Corporation. All rights reserved.
+ * Copyright (c) 2004, 2005 Voltaire Corporation. All rights reserved.
+ * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ * Copyright (c) 2005 Network Appliance, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include <rdma/ib_cache.h>
+#include <rdma/ib_cm.h>
+#include <rdma/iw_cm.h>
+
+#include "cm_msgs.h"
+
+MODULE_AUTHOR("Tom Tucker");
+MODULE_DESCRIPTION("iWARP CM");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static void iwcm_add_one(struct ib_device *device);
+static void iwcm_remove_one(struct ib_device *device);
+struct iwcm_id_private;
+
+static struct ib_client iwcm_client = {
+ .name = "iwcm",
+ .add = iwcm_add_one,
+ .remove = iwcm_remove_one
+};
+
+static struct {
+ spinlock_t lock;
+ struct list_head device_list;
+ rwlock_t device_lock;
+ struct workqueue_struct* wq;
+} iwcm;
+
+struct iwcm_device;
+struct iwcm_port {
+ struct iwcm_device *iwcm_dev;
+ struct sockaddr_in local_addr;
+ u8 port_num;
+};
+
+struct iwcm_device {
+ struct list_head list;
+ struct ib_device *device;
+ struct iwcm_port port[0];
+};
+
+struct iwcm_id_private {
+ struct iw_cm_id id;
+
+ spinlock_t lock;
+ wait_queue_head_t wait;
+ atomic_t refcount;
+
+ struct rb_node listen_node;
+
+ struct list_head work_list;
+ atomic_t work_count;
+};
+
+struct iwcm_work {
+ struct work_struct work;
+ struct iwcm_id_private* cm_id;
+ struct iw_cm_event event;
+};
+
+/* Called whenever a reference added for a cm_id */
+static inline void iwcm_addref_id(struct iwcm_id_private *cm_id_priv)
+{
+ atomic_inc(&cm_id_priv->refcount);
+}
+
+/* Called whenever releasing a reference to a cm id */
+static inline void iwcm_deref_id(struct iwcm_id_private *cm_id_priv)
+{
+ if (atomic_dec_and_test(&cm_id_priv->refcount))
+ wake_up(&cm_id_priv->wait);
+}
+
+static void cm_event_handler(struct iw_cm_id* cm_id, struct iw_cm_event* event);
+
+struct iw_cm_id *iw_create_cm_id(struct ib_device *device,
+ iw_cm_handler cm_handler,
+ void *context)
+{
+ struct iwcm_id_private *iwcm_id_priv;
+
+ iwcm_id_priv = kmalloc(sizeof *iwcm_id_priv, GFP_KERNEL);
+ if (!iwcm_id_priv)
+ return ERR_PTR(-ENOMEM);
+
+ memset(iwcm_id_priv, 0, sizeof *iwcm_id_priv);
+ iwcm_id_priv->id.state = IW_CM_STATE_IDLE;
+ iwcm_id_priv->id.device = device;
+ iwcm_id_priv->id.cm_handler = cm_handler;
+ iwcm_id_priv->id.context = context;
+ iwcm_id_priv->id.event_handler = cm_event_handler;
+
+ spin_lock_init(&iwcm_id_priv->lock);
+ init_waitqueue_head(&iwcm_id_priv->wait);
+ atomic_set(&iwcm_id_priv->refcount, 1);
+
+ return &iwcm_id_priv->id;
+
+}
+EXPORT_SYMBOL(iw_create_cm_id);
+
+void iw_destroy_cm_id(struct iw_cm_id *cm_id)
+{
+ struct iwcm_id_private *iwcm_id_priv;
+ unsigned long flags;
+ int ret = 0;
+
+ iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+ spin_lock_irqsave(&iwcm_id_priv->lock, flags);
+ switch (cm_id->state) {
+ case IW_CM_STATE_LISTEN:
+ cm_id->state = IW_CM_STATE_IDLE;
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+ ret = cm_id->device->iwcm->destroy_listen(cm_id);
+ break;
+
+ case IW_CM_STATE_CONN_RECV:
+ case IW_CM_STATE_CONN_SENT:
+ case IW_CM_STATE_ESTABLISHED:
+ cm_id->state = IW_CM_STATE_IDLE;
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+ ret = cm_id->device->iwcm->disconnect(cm_id,1);
+ break;
+
+ case IW_CM_STATE_IDLE:
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+ break;
+
+ default:
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+ printk(KERN_ERR "%s:%s:%u Illegal state %d for iw_cm_id.\n",
+ __FILE__, __FUNCTION__, __LINE__, cm_id->state);
+ ;
+ }
+
+ atomic_dec(&iwcm_id_priv->refcount);
+ wait_event(iwcm_id_priv->wait, !atomic_read(&iwcm_id_priv->refcount));
+
+ kfree(iwcm_id_priv);
+}
+EXPORT_SYMBOL(iw_destroy_cm_id);
+
+int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
+{
+ struct iwcm_id_private *iwcm_id_priv;
+ unsigned long flags;
+ int ret = 0;
+
+ if (cm_id->device == 0 || cm_id->device->iwcm == 0)
+ return -EINVAL;
+
+ iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ spin_lock_irqsave(&iwcm_id_priv->lock, flags);
+ if (cm_id->state != IW_CM_STATE_IDLE) {
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+ return -EBUSY;
+ }
+ cm_id->state = IW_CM_STATE_LISTEN;
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+
+ ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+ if (ret != 0)
+ cm_id->state = IW_CM_STATE_IDLE;
+
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_listen);
+
+int iw_cm_getpeername(struct iw_cm_id *cm_id,
+ struct sockaddr_in* local_addr,
+ struct sockaddr_in* remote_addr)
+{
+ if (cm_id->device == 0)
+ return -EINVAL;
+
+ if (cm_id->device->iwcm == 0)
+ return -EINVAL;
+
+ /* Make sure there's a connection */
+ if (cm_id->state != IW_CM_STATE_ESTABLISHED)
+ return -ENOTCONN;
+
+ return cm_id->device->iwcm->getpeername(cm_id, local_addr, remote_addr);
+}
+EXPORT_SYMBOL(iw_cm_getpeername);
+
+int iw_cm_reject(struct iw_cm_id *cm_id,
+ const void *private_data,
+ u8 private_data_len)
+{
+ struct iwcm_id_private *iwcm_id_priv;
+ unsigned long flags;
+ int ret;
+
+
+ if (cm_id->device == 0 || cm_id->device->iwcm == 0)
+ return -EINVAL;
+
+ iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+ spin_lock_irqsave(&iwcm_id_priv->lock, flags);
+ switch (cm_id->state) {
+ case IW_CM_STATE_CONN_RECV:
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+ ret = cm_id->device->iwcm->reject(cm_id, private_data, private_data_len);
+ cm_id->state = IW_CM_STATE_IDLE;
+ break;
+ default:
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_reject);
+
+int iw_cm_accept(struct iw_cm_id *cm_id,
+ const void *private_data,
+ u8 private_data_len)
+{
+ struct iwcm_id_private *iwcm_id_priv;
+ unsigned long flags;
+ int ret;
+
+ if (cm_id->device == 0 || cm_id->device->iwcm == 0)
+ return -EINVAL;
+
+ iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+
+ spin_lock_irqsave(&iwcm_id_priv->lock, flags);
+ switch (cm_id->state) {
+ case IW_CM_STATE_CONN_RECV:
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+ ret = cm_id->device->iwcm->accept(cm_id, private_data,
+ private_data_len);
+ if (ret == 0) {
+ struct iw_cm_event event;
+ event.event = IW_CM_EVENT_ESTABLISHED;
+ event.provider_id = cm_id->provider_id;
+ event.status = 0;
+ event.local_addr = cm_id->local_addr;
+ event.remote_addr = cm_id->remote_addr;
+ event.private_data = 0;
+ event.private_data_len = 0;
+ cm_event_handler(cm_id, &event);
+ }
+
+ break;
+ default:
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_accept);
+
+int iw_cm_bind_qp(struct iw_cm_id* cm_id, struct ib_qp* qp)
+{
+ int ret = -EINVAL;
+
+ if (cm_id) {
+ cm_id->qp = qp;
+ ret = 0;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_bind_qp);
+
+int iw_cm_connect(struct iw_cm_id *cm_id,
+ const void* pdata, u8 pdata_len)
+{
+ struct iwcm_id_private* cm_id_priv;
+ int ret = 0;
+ unsigned long flags;
+
+ if (cm_id->device == 0 || cm_id->device->iwcm == 0)
+ return -EINVAL;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id->state != IW_CM_STATE_IDLE) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ return -EBUSY;
+ }
+ cm_id->state = IW_CM_STATE_CONN_SENT;
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ ret = cm_id->device->iwcm->connect(cm_id, pdata, pdata_len);
+ if (ret != 0)
+ cm_id->state = IW_CM_STATE_IDLE;
+
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_connect);
+
+int iw_cm_disconnect(struct iw_cm_id *cm_id)
+{
+ struct iwcm_id_private *iwcm_id_priv;
+ unsigned long flags;
+ int ret;
+
+ if (cm_id->device == 0 || cm_id->device->iwcm == 0 || cm_id->qp == 0)
+ return -EINVAL;
+
+ iwcm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ spin_lock_irqsave(&iwcm_id_priv->lock, flags);
+ switch (cm_id->state) {
+ case IW_CM_STATE_ESTABLISHED:
+ cm_id->state = IW_CM_STATE_IDLE;
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+ ret = cm_id->device->iwcm->disconnect(cm_id, 1);
+ if (ret == 0) {
+ struct iw_cm_event event;
+ event.event = IW_CM_EVENT_LLP_DISCONNECT;
+ event.provider_id = cm_id->provider_id;
+ event.status = 0;
+ event.local_addr = cm_id->local_addr;
+ event.remote_addr = cm_id->remote_addr;
+ event.private_data = 0;
+ event.private_data_len = 0;
+ cm_event_handler(cm_id, &event);
+ }
+
+ break;
+ default:
+ spin_unlock_irqrestore(&iwcm_id_priv->lock, flags);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(iw_cm_disconnect);
+
+static void iwcm_add_one(struct ib_device *device)
+{
+ struct iwcm_device *iwcm_dev;
+ struct iwcm_port *port;
+ unsigned long flags;
+ u8 i;
+
+ if (device->node_type != IB_NODE_RNIC)
+ return;
+
+ iwcm_dev = kmalloc(sizeof(*iwcm_dev) + sizeof(*port) *
+ device->phys_port_cnt, GFP_KERNEL);
+ if (!iwcm_dev)
+ return;
+
+ iwcm_dev->device = device;
+
+ for (i = 1; i <= device->phys_port_cnt; i++) {
+ port = &iwcm_dev->port[i-1];
+ port->iwcm_dev = iwcm_dev;
+ port->port_num = i;
+ }
+
+ ib_set_client_data(device, &iwcm_client, iwcm_dev);
+
+ write_lock_irqsave(&iwcm.device_lock, flags);
+ list_add_tail(&iwcm_dev->list, &iwcm.device_list);
+ write_unlock_irqrestore(&iwcm.device_lock, flags);
+ return;
+}
+
+static void iwcm_remove_one(struct ib_device *device)
+{
+ struct iwcm_device *iwcm_dev;
+ unsigned long flags;
+
+ iwcm_dev = ib_get_client_data(device, &iwcm_client);
+ if (!iwcm_dev)
+ return;
+
+ write_lock_irqsave(&iwcm.device_lock, flags);
+ list_del(&iwcm_dev->list);
+ write_unlock_irqrestore(&iwcm.device_lock, flags);
+
+ kfree(iwcm_dev);
+}
+
+/* Handles an inbound connect request. The function creates a new
+ * iw_cm_id to represent the new connection and inherits the client
+ * callback function and other attributes from the listening parent.
+ *
+ * The work item contains a pointer to the listen_cm_id and the event. The
+ * listen_cm_id contains the client cm_handler, context and device. These are
+ * copied when the device is cloned. The event contains the new four tuple.
+ */
+static int cm_conn_req_handler(struct iwcm_work* work)
+{
+ struct iw_cm_id* cm_id;
+ struct iwcm_id_private* cm_id_priv;
+ int rc;
+
+ /* If the status was not successful, ignore request */
+ if (work->event.status) {
+ printk(KERN_ERR "%s:%d Bad status=%d for connection request ... "
+ "should be filtered by provider\n",
+ __FUNCTION__, __LINE__,
+ work->event.status);
+ return work->event.status;
+ }
+ cm_id = iw_create_cm_id(work->cm_id->id.device, work->cm_id->id.cm_handler,
+ work->cm_id->id.context);
+ if (IS_ERR(cm_id))
+ return PTR_ERR(cm_id);
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ cm_id_priv->id.local_addr = work->event.local_addr;
+ cm_id_priv->id.remote_addr = work->event.remote_addr;
+ cm_id_priv->id.provider_id = work->event.provider_id;
+ cm_id_priv->id.state = IW_CM_STATE_CONN_RECV;
+
+ /* Call the client CM handler */
+ rc = cm_id->cm_handler(cm_id, &work->event);
+ if (rc) {
+ cm_id->state = IW_CM_STATE_IDLE;
+ iw_destroy_cm_id(cm_id);
+ }
+ kfree(work);
+ return 0;
+}
+
+/*
+ * Handles the transition to established state on the passive side.
+ */
+static int cm_conn_est_handler(struct iwcm_work* work)
+{
+ struct iwcm_id_private* cm_id_priv;
+ unsigned long flags;
+ int ret = 0;
+
+ cm_id_priv = work->cm_id;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id_priv->id.state != IW_CM_STATE_CONN_RECV) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ printk(KERN_ERR "%s:%d Invalid cm_id state=%d for established event\n",
+ __FUNCTION__, __LINE__, cm_id_priv->id.state);
+ ret = -EINVAL;
+ goto error_out;
+ }
+
+ if (work->event.status == 0) {
+ cm_id_priv = work->cm_id;
+ cm_id_priv->id.local_addr = work->event.local_addr;
+ cm_id_priv->id.remote_addr = work->event.remote_addr;
+ cm_id_priv->id.state = IW_CM_STATE_ESTABLISHED;
+ } else
+ cm_id_priv->id.state = IW_CM_STATE_IDLE;
+
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ /* Call the client CM handler */
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->event);
+ if (ret) {
+ cm_id_priv->id.state = IW_CM_STATE_IDLE;
+ iw_destroy_cm_id(&cm_id_priv->id);
+ }
+
+ error_out:
+ kfree(work);
+ return ret;
+}
+
+/*
+ * Handles the reply to our connect request. There are three
+ * possibilities:
+ * - If the cm_id is in the wrong state when the event is
+ * delivered, the event is ignored. [What should we do when the
+ * provider does something crazy?]
+ * - If the remote peer accepts the connection, we update the 4-tuple
+ * in the cm_id with the remote peer info, move the cm_id to the
+ * ESTABLISHED state and deliver the event to the client.
+ * - If the remote peer rejects the connection, or there is some
+ * connection error, move the cm_id to the IDLE state, and deliver
+ * the event to the client.
+ */
+static int cm_conn_rep_handler(struct iwcm_work* work)
+{
+ struct iwcm_id_private* cm_id_priv;
+ unsigned long flags;
+ int ret = 0;
+
+ cm_id_priv = work->cm_id;
+ spin_lock_irqsave(&cm_id_priv->lock, flags);
+ if (cm_id_priv->id.state != IW_CM_STATE_CONN_SENT) {
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+ printk(KERN_ERR "%s:%d Invalid cm_id state=%d for connect reply event\n",
+ __FUNCTION__, __LINE__, cm_id_priv->id.state);
+ ret = -EINVAL;
+ goto error_out;
+ }
+
+ if (work->event.status == 0) {
+ cm_id_priv = work->cm_id;
+ cm_id_priv->id.local_addr = work->event.local_addr;
+ cm_id_priv->id.remote_addr = work->event.remote_addr;
+ cm_id_priv->id.state = IW_CM_STATE_ESTABLISHED;
+ } else
+ cm_id_priv->id.state = IW_CM_STATE_IDLE;
+
+ spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+
+ /* Call the client CM handler */
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->event);
+ if (ret) {
+ cm_id_priv->id.state = IW_CM_STATE_IDLE;
+ iw_destroy_cm_id(&cm_id_priv->id);
+ }
+
+ error_out:
+ kfree(work);
+ return ret;
+}
+
+static int cm_disconnect_handler(struct iwcm_work* work)
+{
+ struct iwcm_id_private* cm_id_priv;
+ int ret = 0;
+
+ cm_id_priv = work->cm_id;
+
+ cm_id_priv->id.state = IW_CM_STATE_IDLE;
+
+ /* Call the client CM handler */
+ ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->event);
+ if (ret)
+ iw_destroy_cm_id(&cm_id_priv->id);
+
+ kfree(work);
+ return ret;
+}
+
+static void cm_work_handler(void* arg)
+{
+ struct iwcm_work* work = (struct iwcm_work*)arg;
+ int rc;
+
+ switch (work->event.event) {
+ case IW_CM_EVENT_CONNECT_REQUEST:
+ rc = cm_conn_req_handler(work);
+ break;
+ case IW_CM_EVENT_CONNECT_REPLY:
+ rc = cm_conn_rep_handler(work);
+ break;
+ case IW_CM_EVENT_ESTABLISHED:
+ rc = cm_conn_est_handler(work);
+ break;
+ case IW_CM_EVENT_LLP_DISCONNECT:
+ case IW_CM_EVENT_LLP_TIMEOUT:
+ case IW_CM_EVENT_LLP_RESET:
+ case IW_CM_EVENT_CLOSE:
+ rc = cm_disconnect_handler(work);
+ break;
+ }
+}
+
+/* IW CM provider event callback handler. This function is called on
+ * interrupt context. The function builds a work queue element
+ * and enqueues it for processing on a work queue thread. This allows
+ * CM client callback functions to block.
+ */
+static void cm_event_handler(struct iw_cm_id* cm_id,
+ struct iw_cm_event* event)
+{
+ struct iwcm_work *work;
+ struct iwcm_id_private* cm_id_priv;
+
+ work = kmalloc(sizeof *work, GFP_ATOMIC);
+ if (!work)
+ return;
+
+ cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
+ INIT_WORK(&work->work, cm_work_handler, work);
+ work->cm_id = cm_id_priv;
+ work->event = *event;
+ queue_work(iwcm.wq, &work->work);
+}
+
+static int __init iw_cm_init(void)
+{
+ memset(&iwcm, 0, sizeof iwcm);
+ INIT_LIST_HEAD(&iwcm.device_list);
+ rwlock_init(&iwcm.device_lock);
+ spin_lock_init(&iwcm.lock);
+ iwcm.wq = create_workqueue("iw_cm");
+ if (!iwcm.wq)
+ return -ENOMEM;
+
+ return ib_register_client(&iwcm_client);
+}
+
+static void __exit iw_cm_cleanup(void)
+{
+ ib_unregister_client(&iwcm_client);
+}
+
+module_init(iw_cm_init);
+module_exit(iw_cm_cleanup);
+
Index: addr.c
===================================================================
--- addr.c (revision 4748)
+++ addr.c (working copy)
@@ -65,6 +65,9 @@
case ARPHRD_INFINIBAND:
dev_addr->dev_type = IB_NODE_CA;
break;
+ case ARPHRD_ETHER:
+ dev_addr->dev_type = IB_NODE_RNIC;
+ break;
default:
return -EADDRNOTAVAIL;
}
Index: Makefile
===================================================================
--- Makefile (revision 4748)
+++ Makefile (working copy)
@@ -1,6 +1,6 @@
EXTRA_CFLAGS += -Idrivers/infiniband/include -Idrivers/infiniband/ulp/ipoib
-obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_ping.o ib_cm.o \
+obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_ping.o ib_cm.o iw_cm.o \
ib_sa.o ib_at.o ib_addr.o rdma_cm.o
obj-$(CONFIG_INFINIBAND_USER_MAD) += ib_umad.o
obj-$(CONFIG_INFINIBAND_USER_ACCESS) += ib_uverbs.o ib_ucm.o ib_uat.o rdma_ucm.o
@@ -14,6 +14,8 @@
ib_cm-y := cm.o
+iw_cm-y := iwcm.o
+
rdma_cm-y := cma.o
rdma_ucm-y := ucma.o
Index: cma.c
===================================================================
--- cma.c (revision 4748)
+++ cma.c (working copy)
@@ -3,6 +3,7 @@
* Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
* Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2005 Intel Corporation. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
*
* This Software is licensed under one of the following licenses:
*
@@ -31,9 +32,14 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/random.h>
+#include <linux/inetdevice.h>
+#include <net/route.h>
+#include <net/arp.h>
+#include <net/neighbour.h>
#include <rdma/rdma_cm.h>
#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
+#include <rdma/iw_cm.h>
#include <rdma/ib_sa.h>
MODULE_AUTHOR("Guy German");
@@ -102,8 +108,12 @@
int timeout_ms;
struct ib_sa_query *query;
int query_id;
- struct ib_cm_id *cm_id;
+ union {
+ struct ib_cm_id *ib;
+ struct iw_cm_id *iw;
+ } cm_id;
+
u32 seq_num;
u32 qp_num;
enum ib_qp_type qp_type;
@@ -239,11 +249,40 @@
return ret;
}
+static int cma_acquire_iw_dev(struct rdma_id_private* id_priv)
+{
+ struct rdma_dev_addr* dev_addr = &id_priv->id.route.addr.dev_addr;
+ struct cma_device* cma_dev;
+ int ret = -ENOENT;
+
+ down(&mutex);
+ list_for_each_entry(cma_dev, &dev_list, list) {
+ if (memcmp(dev_addr->src_dev_addr,
+ &cma_dev->node_guid,
+ sizeof(cma_dev->node_guid)) == 0) {
+
+ /* If we find the device, then check if this
+ * is an iWARP device. If it is, then attach
+ */
+ if (cma_dev->device->node_type == IB_NODE_RNIC) {
+ cma_attach_to_dev(id_priv, cma_dev);
+ ret = 0;
+ break;
+ }
+ }
+ }
+ up(&mutex);
+
+ return ret;
+}
+
static int cma_acquire_dev(struct rdma_id_private *id_priv)
{
switch (id_priv->id.route.addr.dev_addr.dev_type) {
case IB_NODE_CA:
return cma_acquire_ib_dev(id_priv);
+ case IB_NODE_RNIC:
+ return cma_acquire_iw_dev(id_priv);
default:
return -ENODEV;
}
@@ -306,6 +345,16 @@
IB_QP_PKEY_INDEX | IB_QP_PORT);
}
+static int cma_init_iw_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
+{
+ struct ib_qp_attr qp_attr;
+
+ qp_attr.qp_state = IB_QPS_INIT;
+ qp_attr.qp_access_flags = IB_ACCESS_LOCAL_WRITE;
+
+ return ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_ACCESS_FLAGS);
+}
+
int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
struct ib_qp_init_attr *qp_init_attr)
{
@@ -325,6 +374,9 @@
case IB_NODE_CA:
ret = cma_init_ib_qp(id_priv, qp);
break;
+ case IB_NODE_RNIC:
+ ret = cma_init_iw_qp(id_priv, qp);
+ break;
default:
ret = -ENOSYS;
break;
@@ -412,7 +464,7 @@
id_priv = container_of(id, struct rdma_id_private, id);
switch (id_priv->id.device->node_type) {
case IB_NODE_CA:
- ret = ib_cm_init_qp_attr(id_priv->cm_id, qp_attr,
+ ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
qp_attr_mask);
if (qp_attr->qp_state == IB_QPS_RTR)
qp_attr->rq_psn = id_priv->seq_num;
@@ -567,8 +619,8 @@
{
cma_exch(id_priv, CMA_DESTROYING);
- if (id_priv->cm_id && !IS_ERR(id_priv->cm_id))
- ib_destroy_cm_id(id_priv->cm_id);
+ if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
+ ib_destroy_cm_id(id_priv->cm_id.ib);
list_del(&id_priv->listen_list);
if (id_priv->cma_dev)
@@ -624,9 +676,20 @@
state = cma_exch(id_priv, CMA_DESTROYING);
cma_cancel_operation(id_priv, state);
- if (id_priv->cm_id && !IS_ERR(id_priv->cm_id))
- ib_destroy_cm_id(id_priv->cm_id);
+ if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib)) {
+ switch (id->device->node_type) {
+ case IB_NODE_RNIC:
+ iw_destroy_cm_id(id_priv->cm_id.iw);
+ break;
+ default:
+ ib_destroy_cm_id(id_priv->cm_id.ib);
+ break;
+ }
+
+ id_priv->cm_id.ib = NULL;
+ }
+
if (id_priv->cma_dev) {
down(&mutex);
cma_detach_from_dev(id_priv);
@@ -652,15 +715,15 @@
ret = cma_modify_qp_rts(&id_priv->id);
if (ret)
goto reject;
-
- ret = ib_send_cm_rtu(id_priv->cm_id, NULL, 0);
+
+ ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
if (ret)
goto reject;
return 0;
reject:
cma_modify_qp_err(&id_priv->id);
- ib_send_cm_rej(id_priv->cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+ ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return ret;
}
@@ -676,7 +739,7 @@
return 0;
reject:
cma_modify_qp_err(&id_priv->id);
- ib_send_cm_rej(id_priv->cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+ ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, NULL, 0);
return ret;
}
@@ -737,7 +800,7 @@
private_data_len);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
- id_priv->cm_id = NULL;
+ id_priv->cm_id.ib = NULL;
cma_exch(id_priv, CMA_DESTROYING);
cma_release_remove(id_priv);
rdma_destroy_id(&id_priv->id);
@@ -819,7 +882,7 @@
goto out;
}
- conn_id->cm_id = cm_id;
+ conn_id->cm_id.ib = cm_id;
cm_id->context = conn_id;
cm_id->cm_handler = cma_ib_handler;
@@ -829,7 +892,7 @@
IB_CM_REQ_PRIVATE_DATA_SIZE - offset);
if (ret) {
/* Destroy the CM ID by returning a non-zero value. */
- conn_id->cm_id = NULL;
+ conn_id->cm_id.ib = NULL;
cma_exch(conn_id, CMA_DESTROYING);
cma_release_remove(conn_id);
rdma_destroy_id(&conn_id->id);
@@ -874,6 +937,115 @@
}
}
+static int cma_iw_handler(struct iw_cm_id* iw_id, struct iw_cm_event* event)
+{
+ struct rdma_id_private *id_priv = iw_id->context;
+ enum rdma_cm_event_type event_type = 0;
+ int ret = 0;
+
+ atomic_inc(&id_priv->dev_remove);
+
+ switch (event->event) {
+ case IW_CM_EVENT_LLP_DISCONNECT:
+ case IW_CM_EVENT_LLP_RESET:
+ case IW_CM_EVENT_LLP_TIMEOUT:
+ case IW_CM_EVENT_CLOSE:
+ event_type = RDMA_CM_EVENT_DISCONNECTED;
+ break;
+
+ case IW_CM_EVENT_CONNECT_REQUEST:
+ BUG_ON(1);
+ break;
+
+ case IW_CM_EVENT_CONNECT_REPLY: {
+ if (event->status)
+ event_type = RDMA_CM_EVENT_REJECTED;
+ else
+ event_type = RDMA_CM_EVENT_ESTABLISHED;
+ break;
+ }
+
+ case IW_CM_EVENT_ESTABLISHED:
+ event_type = RDMA_CM_EVENT_ESTABLISHED;
+ break;
+ }
+
+ ret = cma_notify_user(id_priv,
+ event_type,
+ event->status,
+ event->private_data,
+ event->private_data_len);
+ if (ret) {
+ /* Destroy the CM ID by returning a non-zero value. */
+ id_priv->cm_id.iw = NULL;
+ cma_exch(id_priv, CMA_DESTROYING);
+ cma_release_remove(id_priv);
+ rdma_destroy_id(&id_priv->id);
+ return ret;
+ }
+
+ cma_release_remove(id_priv);
+ return ret;
+}
+
+static int iw_conn_req_handler(struct iw_cm_id *cm_id,
+ struct iw_cm_event *iw_event)
+{
+ struct rdma_cm_id* new_cm_id;
+ struct rdma_id_private *listen_id, *conn_id;
+ struct sockaddr_in* sin;
+ int ret;
+
+ listen_id = cm_id->context;
+ atomic_inc(&listen_id->dev_remove);
+ if (!cma_comp(listen_id, CMA_LISTEN)) {
+ ret = -ECONNABORTED;
+ goto out;
+ }
+
+ /* Create a new RDMA id the new IW CM ID */
+ new_cm_id = rdma_create_id(listen_id->id.event_handler,
+ listen_id->id.context,
+ RDMA_PS_TCP);
+ if (!new_cm_id) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ conn_id = container_of(new_cm_id, struct rdma_id_private, id);
+ atomic_inc(&conn_id->dev_remove);
+ conn_id->state = CMA_CONNECT;
+
+ /* New connection inherits device from parent */
+ down(&mutex);
+ cma_attach_to_dev(conn_id, listen_id->cma_dev);
+ up(&mutex);
+
+ conn_id->cm_id.iw = cm_id;
+ cm_id->context = conn_id;
+ cm_id->cm_handler = cma_iw_handler;
+
+ sin = (struct sockaddr_in*)&new_cm_id->route.addr.src_addr;
+ *sin = iw_event->local_addr;
+
+ sin = (struct sockaddr_in*)&new_cm_id->route.addr.dst_addr;
+ *sin = iw_event->remote_addr;
+
+ ret = cma_notify_user(conn_id, RDMA_CM_EVENT_CONNECT_REQUEST, 0,
+ iw_event->private_data,
+ iw_event->private_data_len);
+ if (ret) {
+ /* Destroy the CM ID by returning a non-zero value. */
+ conn_id->cm_id.iw = NULL;
+ cma_exch(conn_id, CMA_DESTROYING);
+ cma_release_remove(conn_id);
+ rdma_destroy_id(&conn_id->id);
+ }
+
+out:
+ cma_release_remove(listen_id);
+ return ret;
+}
+
static int cma_ib_listen(struct rdma_id_private *id_priv)
{
struct ib_cm_private_data_compare compare_data;
@@ -881,28 +1053,52 @@
__be64 svc_id;
int ret;
- id_priv->cm_id = ib_create_cm_id(id_priv->id.device, cma_req_handler,
+ id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
id_priv);
- if (IS_ERR(id_priv->cm_id))
- return PTR_ERR(id_priv->cm_id);
+ if (IS_ERR(id_priv->cm_id.ib))
+ return PTR_ERR(id_priv->cm_id.ib);
addr = &id_priv->id.route.addr.src_addr;
svc_id = cma_get_service_id(id_priv->id.ps, addr);
if (cma_any_addr(addr))
- ret = ib_cm_listen(id_priv->cm_id, svc_id, 0, NULL);
+ ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
else {
cma_set_compare_data(addr, &compare_data);
- ret = ib_cm_listen(id_priv->cm_id, svc_id, 0, &compare_data);
+ ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
}
if (ret) {
- ib_destroy_cm_id(id_priv->cm_id);
- id_priv->cm_id = NULL;
+ ib_destroy_cm_id(id_priv->cm_id.ib);
+ id_priv->cm_id.ib = NULL;
}
return ret;
}
+static int cma_iw_listen(struct rdma_id_private *id_priv)
+{
+ int ret;
+ struct sockaddr_in* sin;
+
+ id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device,
+ iw_conn_req_handler,
+ id_priv);
+ if (IS_ERR(id_priv->cm_id.iw))
+ return PTR_ERR(id_priv->cm_id.iw);
+
+ sin = (struct sockaddr_in*)&id_priv->id.route.addr.src_addr;
+ id_priv->cm_id.iw->local_addr = *sin;
+
+ ret = iw_cm_listen(id_priv->cm_id.iw, 10 /* backlog */);
+
+ if (ret) {
+ iw_destroy_cm_id(id_priv->cm_id.iw);
+ id_priv->cm_id.iw = NULL;
+ }
+
+ return ret;
+}
+
static int cma_duplicate_listen(struct rdma_id_private *id_priv)
{
struct rdma_id_private *cur_id_priv;
@@ -988,6 +1184,9 @@
case IB_NODE_CA:
ret = cma_ib_listen(id_priv);
break;
+ case IB_NODE_RNIC:
+ ret = cma_iw_listen(id_priv);
+ break;
default:
ret = -ENOSYS;
break;
@@ -1067,6 +1266,45 @@
return (id_priv->query_id < 0) ? id_priv->query_id : 0;
}
+static void iw_route_handler(void* data)
+{
+ struct cma_work *work = data;
+ struct rdma_id_private *id_priv = work->id;
+
+ kfree(work);
+
+ atomic_inc(&id_priv->dev_remove);
+
+ if (!cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ROUTE_RESOLVED))
+ goto out;
+
+ if (cma_notify_user(id_priv, RDMA_CM_EVENT_ROUTE_RESOLVED, 0, NULL, 0)) {
+ cma_exch(id_priv, CMA_DESTROYING);
+ cma_release_remove(id_priv);
+ cma_deref_id(id_priv);
+ rdma_destroy_id(&id_priv->id);
+ return;
+ }
+ out:
+ cma_release_remove(id_priv);
+ cma_deref_id(id_priv);
+}
+
+static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
+{
+ struct cma_work *work;
+
+ work = kmalloc(sizeof *work, GFP_KERNEL);
+ if (!work)
+ return -ENOMEM;
+
+ work->id = id_priv;
+ INIT_WORK(&work->work, iw_route_handler, work);
+ queue_work(rdma_wq, &work->work);
+
+ return 0;
+}
+
int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
{
struct rdma_id_private *id_priv;
@@ -1081,6 +1319,9 @@
case IB_NODE_CA:
ret = cma_resolve_ib_route(id_priv, timeout_ms);
break;
+ case IB_NODE_RNIC:
+ ret = cma_resolve_iw_route(id_priv, timeout_ms);
+ break;
default:
ret = -ENOSYS;
break;
@@ -1221,12 +1462,36 @@
return ret;
}
+static void iw_addr_handler(void* data)
+{
+ struct cma_work *work = data;
+ struct rdma_id_private *id_priv = work->id;
+
+ kfree(work);
+
+ atomic_inc(&id_priv->dev_remove);
+
+ if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED))
+ goto out;
+
+ if (cma_notify_user(id_priv, RDMA_CM_EVENT_ADDR_RESOLVED, 0, NULL, 0)) {
+ cma_exch(id_priv, CMA_DESTROYING);
+ cma_release_remove(id_priv);
+ cma_deref_id(id_priv);
+ rdma_destroy_id(&id_priv->id);
+ return;
+ }
+out:
+ cma_release_remove(id_priv);
+ cma_deref_id(id_priv);
+}
+
int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
struct sockaddr *dst_addr, int timeout_ms)
{
struct rdma_id_private *id_priv;
enum cma_state expected_state;
- int ret;
+ int ret = 0;
id_priv = container_of(id, struct rdma_id_private, id);
if (id_priv->cma_dev) {
@@ -1341,10 +1606,10 @@
memcpy(private_data + offset, conn_param->private_data,
conn_param->private_data_len);
- id_priv->cm_id = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
+ id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
id_priv);
- if (IS_ERR(id_priv->cm_id)) {
- ret = PTR_ERR(id_priv->cm_id);
+ if (IS_ERR(id_priv->cm_id.ib)) {
+ ret = PTR_ERR(id_priv->cm_id.ib);
goto out;
}
@@ -1371,12 +1636,45 @@
req.max_cm_retries = CMA_MAX_CM_RETRIES;
req.srq = id_priv->srq ? 1 : 0;
- ret = ib_send_cm_req(id_priv->cm_id, &req);
+ ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
out:
kfree(private_data);
return ret;
}
+static int cma_connect_iw(struct rdma_id_private *id_priv,
+ struct rdma_conn_param *conn_param)
+{
+ struct iw_cm_id* cm_id;
+ struct sockaddr_in* sin;
+ int ret;
+
+ if (id_priv->id.qp == NULL)
+ return -EINVAL;
+
+ cm_id = iw_create_cm_id(id_priv->id.device, cma_iw_handler, id_priv);
+ if (IS_ERR(cm_id)) {
+ ret = PTR_ERR(cm_id);
+ goto out;
+ }
+
+ id_priv->cm_id.iw = cm_id;
+
+ sin = (struct sockaddr_in*)&id_priv->id.route.addr.src_addr;
+ cm_id->local_addr = *sin;
+
+ sin = (struct sockaddr_in*)&id_priv->id.route.addr.dst_addr;
+ cm_id->remote_addr = *sin;
+
+ iw_cm_bind_qp(cm_id, id_priv->id.qp);
+
+ ret = iw_cm_connect(cm_id, conn_param->private_data,
+ conn_param->private_data_len);
+
+out:
+ return ret;
+}
+
int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
{
struct rdma_id_private *id_priv;
@@ -1396,6 +1694,9 @@
case IB_NODE_CA:
ret = cma_connect_ib(id_priv, conn_param);
break;
+ case IB_NODE_RNIC:
+ ret = cma_connect_iw(id_priv, conn_param);
+ break;
default:
ret = -ENOSYS;
break;
@@ -1433,7 +1734,7 @@
rep.rnr_retry_count = conn_param->rnr_retry_count;
rep.srq = id_priv->srq ? 1 : 0;
- return ib_send_cm_rep(id_priv->cm_id, &rep);
+ return ib_send_cm_rep(id_priv->cm_id.ib, &rep);
}
int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
@@ -1458,6 +1759,12 @@
else
ret = cma_rep_recv(id_priv);
break;
+ case IB_NODE_RNIC: {
+ iw_cm_bind_qp(id_priv->cm_id.iw, id_priv->id.qp);
+ ret = iw_cm_accept(id_priv->cm_id.iw, conn_param->private_data,
+ conn_param->private_data_len);
+ break;
+ }
default:
ret = -ENOSYS;
break;
@@ -1486,9 +1793,15 @@
switch (id->device->node_type) {
case IB_NODE_CA:
- ret = ib_send_cm_rej(id_priv->cm_id, IB_CM_REJ_CONSUMER_DEFINED,
+ ret = ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
NULL, 0, private_data, private_data_len);
break;
+
+ case IB_NODE_RNIC:
+ ret = iw_cm_reject(id_priv->cm_id.iw,
+ private_data, private_data_len);
+ break;
+
default:
ret = -ENOSYS;
break;
@@ -1513,9 +1826,12 @@
switch (id->device->node_type) {
case IB_NODE_CA:
/* Initiate or respond to a disconnect. */
- if (ib_send_cm_dreq(id_priv->cm_id, NULL, 0))
- ib_send_cm_drep(id_priv->cm_id, NULL, 0);
+ if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
+ ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
break;
+ case IB_NODE_RNIC:
+ ret = iw_cm_disconnect(id_priv->cm_id.iw);
+ break;
default:
break;
}
Index: mad.c
===================================================================
--- mad.c (revision 4748)
+++ mad.c (working copy)
@@ -2655,7 +2655,9 @@
{
int start, end, i;
- if (device->node_type == IB_NODE_SWITCH) {
+ if (device->node_type == IB_NODE_RNIC)
+ return;
+ else if (device->node_type == IB_NODE_SWITCH) {
start = 0;
end = 0;
} else {
@@ -2702,7 +2704,9 @@
{
int i, num_ports, cur_port;
- if (device->node_type == IB_NODE_SWITCH) {
+ if (device->node_type == IB_NODE_RNIC)
+ return;
+ else if (device->node_type == IB_NODE_SWITCH) {
num_ports = 1;
cur_port = 0;
} else {
More information about the general
mailing list