[ofa-general] [PATCH 1/2 V2] ib/sa: Add InformInfo/Notice support
Jack Morgenstein
jackm at dev.mellanox.co.il
Tue Feb 5 23:23:18 PST 2008
Add SA client support for notice/trap registration using InformInfo.
Clients can use the ib_sa interface to register for SA events based
on trap numbers, and receive SA event notification. This allows
clients to receive notification, such as GID in/out of service.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
drivers/infiniband/core/Makefile | 2
drivers/infiniband/core/notice.c | 749 ++++++++++++++++++++++++++++++++++++
drivers/infiniband/core/sa.h | 16 +
drivers/infiniband/core/sa_query.c | 316 +++++++++++++++
include/rdma/ib_sa.h | 171 ++++++++
5 files changed, 1251 insertions(+), 3 deletions(-)
V2:
modified ib_sa_register_inform_info() to use alloc_mad,
per changes in commit 2aec5c602c6a44e2a3a173339a9ab94549658e4b
This change is also required for anyone using the infiniband driver
built in to kernels 2.6.23 and above.
Signed-off-by: Jack Morgenstein <jackm at dev.mellanox.co.il>
Index: ofa_1_3_dev_kernel/drivers/infiniband/core/Makefile
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/core/Makefile 2008-02-05 08:30:21.000000000 +0200
+++ ofa_1_3_dev_kernel/drivers/infiniband/core/Makefile 2008-02-05 15:10:53.000000000 +0200
@@ -13,7 +13,7 @@ ib_core-$(CONFIG_INFINIBAND_USER_MEM) +=
ib_mad-y := mad.o smi.o agent.o mad_rmpp.o
-ib_sa-y := sa_query.o multicast.o
+ib_sa-y := sa_query.o multicast.o notice.o
ib_cm-y := cm.o
Index: ofa_1_3_dev_kernel/drivers/infiniband/core/notice.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ ofa_1_3_dev_kernel/drivers/infiniband/core/notice.c 2008-02-05 14:57:05.000000000 +0200
@@ -0,0 +1,749 @@
+/*
+ * Copyright (c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/completion.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/bitops.h>
+#include <linux/random.h>
+
+#include "sa.h"
+
+MODULE_AUTHOR("Sean Hefty");
+MODULE_DESCRIPTION("InfiniBand InformInfo & Notice event handling");
+MODULE_LICENSE("Dual BSD/GPL");
+
+static void inform_add_one(struct ib_device *device);
+static void inform_remove_one(struct ib_device *device);
+
+static struct ib_client inform_client = {
+ .name = "ib_notice",
+ .add = inform_add_one,
+ .remove = inform_remove_one
+};
+
+static struct ib_sa_client sa_client;
+static struct workqueue_struct *inform_wq;
+
+struct inform_device;
+
+struct inform_port {
+ struct inform_device *dev;
+ spinlock_t lock;
+ struct rb_root table;
+ atomic_t refcount;
+ struct completion comp;
+ u8 port_num;
+};
+
+struct inform_device {
+ struct ib_device *device;
+ struct ib_event_handler event_handler;
+ int start_port;
+ int end_port;
+ struct inform_port port[0];
+};
+
+enum inform_state {
+ INFORM_IDLE,
+ INFORM_REGISTERING,
+ INFORM_MEMBER,
+ INFORM_BUSY,
+ INFORM_ERROR
+};
+
+struct inform_member;
+
+struct inform_group {
+ u16 trap_number;
+ struct rb_node node;
+ struct inform_port *port;
+ spinlock_t lock;
+ struct work_struct work;
+ struct list_head pending_list;
+ struct list_head active_list;
+ struct list_head notice_list;
+ struct inform_member *last_join;
+ int members;
+ enum inform_state join_state; /* State relative to SA */
+ atomic_t refcount;
+ enum inform_state state;
+ struct ib_sa_query *query;
+ int query_id;
+};
+
+struct inform_member {
+ struct ib_inform_info info;
+ struct ib_sa_client *client;
+ struct inform_group *group;
+ struct list_head list;
+ enum inform_state state;
+ atomic_t refcount;
+ struct completion comp;
+};
+
+struct inform_notice {
+ struct list_head list;
+ struct ib_sa_notice notice;
+};
+
+static void reg_handler(int status, struct ib_sa_inform *inform,
+ void *context);
+static void unreg_handler(int status, struct ib_sa_inform *inform,
+ void *context);
+
+static struct inform_group *inform_find(struct inform_port *port,
+ u16 trap_number)
+{
+ struct rb_node *node = port->table.rb_node;
+ struct inform_group *group;
+
+ while (node) {
+ group = rb_entry(node, struct inform_group, node);
+ if (trap_number < group->trap_number)
+ node = node->rb_left;
+ else if (trap_number > group->trap_number)
+ node = node->rb_right;
+ else
+ return group;
+ }
+ return NULL;
+}
+
+static struct inform_group *inform_insert(struct inform_port *port,
+ struct inform_group *group)
+{
+ struct rb_node **link = &port->table.rb_node;
+ struct rb_node *parent = NULL;
+ struct inform_group *cur_group;
+
+ while (*link) {
+ parent = *link;
+ cur_group = rb_entry(parent, struct inform_group, node);
+ if (group->trap_number < cur_group->trap_number)
+ link = &(*link)->rb_left;
+ else if (group->trap_number > cur_group->trap_number)
+ link = &(*link)->rb_right;
+ else
+ return cur_group;
+ }
+ rb_link_node(&group->node, parent, link);
+ rb_insert_color(&group->node, &port->table);
+ return NULL;
+}
+
+static void deref_port(struct inform_port *port)
+{
+ if (atomic_dec_and_test(&port->refcount))
+ complete(&port->comp);
+}
+
+static void release_group(struct inform_group *group)
+{
+ struct inform_port *port = group->port;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ if (atomic_dec_and_test(&group->refcount)) {
+ rb_erase(&group->node, &port->table);
+ spin_unlock_irqrestore(&port->lock, flags);
+ kfree(group);
+ deref_port(port);
+ } else
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void deref_member(struct inform_member *member)
+{
+ if (atomic_dec_and_test(&member->refcount))
+ complete(&member->comp);
+}
+
+static void queue_reg(struct inform_member *member)
+{
+ struct inform_group *group = member->group;
+ unsigned long flags;
+
+ spin_lock_irqsave(&group->lock, flags);
+ list_add(&member->list, &group->pending_list);
+ if (group->state == INFORM_IDLE) {
+ group->state = INFORM_BUSY;
+ atomic_inc(&group->refcount);
+ queue_work(inform_wq, &group->work);
+ }
+ spin_unlock_irqrestore(&group->lock, flags);
+}
+
+static int send_reg(struct inform_group *group, struct inform_member *member)
+{
+ struct inform_port *port = group->port;
+ struct ib_sa_inform inform;
+ int ret;
+
+ memset(&inform, 0, sizeof inform);
+ inform.lid_range_begin = cpu_to_be16(0xFFFF);
+ inform.is_generic = 1;
+ inform.subscribe = 1;
+ inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
+ inform.trap.generic.trap_num = cpu_to_be16(member->info.trap_number);
+ inform.trap.generic.resp_time = 19;
+ inform.trap.generic.producer_type =
+ cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
+
+ group->last_join = member;
+ ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
+ port->port_num, &inform, 3000, GFP_KERNEL,
+ reg_handler, group,&group->query);
+ if (ret >= 0) {
+ group->query_id = ret;
+ ret = 0;
+ }
+ return ret;
+}
+
+static int send_unreg(struct inform_group *group)
+{
+ struct inform_port *port = group->port;
+ struct ib_sa_inform inform;
+ int ret;
+
+ memset(&inform, 0, sizeof inform);
+ inform.lid_range_begin = cpu_to_be16(0xFFFF);
+ inform.is_generic = 1;
+ inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
+ inform.trap.generic.trap_num = cpu_to_be16(group->trap_number);
+ inform.trap.generic.qpn = IB_QP1;
+ inform.trap.generic.resp_time = 19;
+ inform.trap.generic.producer_type =
+ cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
+
+ ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
+ port->port_num, &inform, 3000, GFP_KERNEL,
+ unreg_handler, group, &group->query);
+ if (ret >= 0) {
+ group->query_id = ret;
+ ret = 0;
+ }
+ return ret;
+}
+
+static void join_group(struct inform_group *group, struct inform_member *member)
+{
+ member->state = INFORM_MEMBER;
+ group->members++;
+ list_move(&member->list, &group->active_list);
+}
+
+static int fail_join(struct inform_group *group, struct inform_member *member,
+ int status)
+{
+ spin_lock_irq(&group->lock);
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ return member->info.callback(status, &member->info, NULL);
+}
+
+static void process_group_error(struct inform_group *group)
+{
+ struct inform_member *member;
+ int ret;
+
+ spin_lock_irq(&group->lock);
+ while (!list_empty(&group->active_list)) {
+ member = list_entry(group->active_list.next,
+ struct inform_member, list);
+ atomic_inc(&member->refcount);
+ list_del_init(&member->list);
+ group->members--;
+ member->state = INFORM_ERROR;
+ spin_unlock_irq(&group->lock);
+
+ ret = member->info.callback(-ENETRESET, &member->info, NULL);
+ deref_member(member);
+ if (ret)
+ ib_sa_unregister_inform_info(&member->info);
+ spin_lock_irq(&group->lock);
+ }
+
+ group->join_state = INFORM_IDLE;
+ group->state = INFORM_BUSY;
+ spin_unlock_irq(&group->lock);
+}
+
+/*
+ * Report a notice to all active subscribers. We use a temporary list to
+ * handle unsubscription requests while the notice is being reported, which
+ * avoids holding the group lock while in the user's callback.
+ */
+static void process_notice(struct inform_group *group,
+ struct inform_notice *info_notice)
+{
+ struct inform_member *member;
+ struct list_head list;
+ int ret;
+
+ INIT_LIST_HEAD(&list);
+
+ spin_lock_irq(&group->lock);
+ list_splice_init(&group->active_list, &list);
+ while (!list_empty(&list)) {
+
+ member = list_entry(list.next, struct inform_member, list);
+ atomic_inc(&member->refcount);
+ list_move(&member->list, &group->active_list);
+ spin_unlock_irq(&group->lock);
+
+ ret = member->info.callback(0, &member->info,
+ &info_notice->notice);
+ deref_member(member);
+ if (ret)
+ ib_sa_unregister_inform_info(&member->info);
+ spin_lock_irq(&group->lock);
+ }
+ spin_unlock_irq(&group->lock);
+}
+
+static void inform_work_handler(struct work_struct *work)
+{
+ struct inform_group *group;
+ struct inform_member *member;
+ struct ib_inform_info *info;
+ struct inform_notice *info_notice;
+ int status, ret;
+
+ group = container_of(work, typeof(*group), work);
+retest:
+ spin_lock_irq(&group->lock);
+ while (!list_empty(&group->pending_list) ||
+ !list_empty(&group->notice_list) ||
+ (group->state == INFORM_ERROR)) {
+
+ if (group->state == INFORM_ERROR) {
+ spin_unlock_irq(&group->lock);
+ process_group_error(group);
+ goto retest;
+ }
+
+ if (!list_empty(&group->notice_list)) {
+ info_notice = list_entry(group->notice_list.next,
+ struct inform_notice, list);
+ list_del(&info_notice->list);
+ spin_unlock_irq(&group->lock);
+ process_notice(group, info_notice);
+ kfree(info_notice);
+ goto retest;
+ }
+
+ member = list_entry(group->pending_list.next,
+ struct inform_member, list);
+ info = &member->info;
+ atomic_inc(&member->refcount);
+
+ if (group->join_state == INFORM_MEMBER) {
+ join_group(group, member);
+ spin_unlock_irq(&group->lock);
+ ret = info->callback(0, info, NULL);
+ } else {
+ spin_unlock_irq(&group->lock);
+ status = send_reg(group, member);
+ if (!status) {
+ deref_member(member);
+ return;
+ }
+ ret = fail_join(group, member, status);
+ }
+
+ deref_member(member);
+ if (ret)
+ ib_sa_unregister_inform_info(&member->info);
+ spin_lock_irq(&group->lock);
+ }
+
+ if (!group->members && (group->join_state == INFORM_MEMBER)) {
+ group->join_state = INFORM_IDLE;
+ spin_unlock_irq(&group->lock);
+ if (send_unreg(group))
+ goto retest;
+ } else {
+ group->state = INFORM_IDLE;
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+}
+
+/*
+ * Fail a join request if it is still active - at the head of the pending queue.
+ */
+static void process_join_error(struct inform_group *group, int status)
+{
+ struct inform_member *member;
+ int ret;
+
+ spin_lock_irq(&group->lock);
+ member = list_entry(group->pending_list.next,
+ struct inform_member, list);
+ if (group->last_join == member) {
+ atomic_inc(&member->refcount);
+ list_del_init(&member->list);
+ spin_unlock_irq(&group->lock);
+ ret = member->info.callback(status, &member->info, NULL);
+ deref_member(member);
+ if (ret)
+ ib_sa_unregister_inform_info(&member->info);
+ } else
+ spin_unlock_irq(&group->lock);
+}
+
+static void reg_handler(int status, struct ib_sa_inform *inform, void *context)
+{
+ struct inform_group *group = context;
+
+ if (status)
+ process_join_error(group, status);
+ else
+ group->join_state = INFORM_MEMBER;
+
+ inform_work_handler(&group->work);
+}
+
+static void unreg_handler(int status, struct ib_sa_inform *rec, void *context)
+{
+ struct inform_group *group = context;
+
+ inform_work_handler(&group->work);
+}
+
+int notice_dispatch(struct ib_device *device, u8 port_num,
+ struct ib_sa_notice *notice)
+{
+ struct inform_device *dev;
+ struct inform_port *port;
+ struct inform_group *group;
+ struct inform_notice *info_notice;
+
+ dev = ib_get_client_data(device, &inform_client);
+ if (!dev)
+ return 0; /* No one to give notice to. */
+
+ port = &dev->port[port_num - dev->start_port];
+ spin_lock_irq(&port->lock);
+ group = inform_find(port, __be16_to_cpu(notice->trap.
+ generic.trap_num));
+ if (!group) {
+ spin_unlock_irq(&port->lock);
+ return 0;
+ }
+
+ atomic_inc(&group->refcount);
+ spin_unlock_irq(&port->lock);
+
+ info_notice = kmalloc(sizeof *info_notice, GFP_KERNEL);
+ if (!info_notice) {
+ release_group(group);
+ return -ENOMEM;
+ }
+
+ info_notice->notice = *notice;
+
+ spin_lock_irq(&group->lock);
+ list_add(&info_notice->list, &group->notice_list);
+ if (group->state == INFORM_IDLE) {
+ group->state = INFORM_BUSY;
+ spin_unlock_irq(&group->lock);
+ inform_work_handler(&group->work);
+ } else {
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+
+ return 0;
+}
+
+static struct inform_group *acquire_group(struct inform_port *port,
+ u16 trap_number, gfp_t gfp_mask)
+{
+ struct inform_group *group, *cur_group;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ group = inform_find(port, trap_number);
+ if (group)
+ goto found;
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ group = kzalloc(sizeof *group, gfp_mask);
+ if (!group)
+ return NULL;
+
+ group->port = port;
+ group->trap_number = trap_number;
+ INIT_LIST_HEAD(&group->pending_list);
+ INIT_LIST_HEAD(&group->active_list);
+ INIT_LIST_HEAD(&group->notice_list);
+ INIT_WORK(&group->work, inform_work_handler);
+ spin_lock_init(&group->lock);
+
+ spin_lock_irqsave(&port->lock, flags);
+ cur_group = inform_insert(port, group);
+ if (cur_group) {
+ kfree(group);
+ group = cur_group;
+ } else
+ atomic_inc(&port->refcount);
+found:
+ atomic_inc(&group->refcount);
+ spin_unlock_irqrestore(&port->lock, flags);
+ return group;
+}
+
+/*
+ * We serialize all join requests to a single group to make our lives much
+ * easier. Otherwise, two users could try to join the same group
+ * simultaneously, with different configurations, one could leave while the
+ * join is in progress, etc., which makes locking around error recovery
+ * difficult.
+ */
+struct ib_inform_info *
+ib_sa_register_inform_info(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ u16 trap_number, gfp_t gfp_mask,
+ int (*callback)(int status,
+ struct ib_inform_info *info,
+ struct ib_sa_notice *notice),
+ void *context)
+{
+ struct inform_device *dev;
+ struct inform_member *member;
+ struct ib_inform_info *info;
+ int ret;
+
+ dev = ib_get_client_data(device, &inform_client);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ member = kzalloc(sizeof *member, gfp_mask);
+ if (!member)
+ return ERR_PTR(-ENOMEM);
+
+ ib_sa_client_get(client);
+ member->client = client;
+ member->info.trap_number = trap_number;
+ member->info.callback = callback;
+ member->info.context = context;
+ init_completion(&member->comp);
+ atomic_set(&member->refcount, 1);
+ member->state = INFORM_REGISTERING;
+
+ member->group = acquire_group(&dev->port[port_num - dev->start_port],
+ trap_number, gfp_mask);
+ if (!member->group) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /*
+ * The user will get the info structure in their callback. They
+ * could then free the info structure before we can return from
+ * this routine. So we save the pointer to return before queuing
+ * any callback.
+ */
+ info = &member->info;
+ queue_reg(member);
+ return info;
+
+err:
+ ib_sa_client_put(member->client);
+ kfree(member);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_sa_register_inform_info);
+
+void ib_sa_unregister_inform_info(struct ib_inform_info *info)
+{
+ struct inform_member *member;
+ struct inform_group *group;
+
+ member = container_of(info, struct inform_member, info);
+ group = member->group;
+
+ spin_lock_irq(&group->lock);
+ if (member->state == INFORM_MEMBER)
+ group->members--;
+
+ list_del_init(&member->list);
+
+ if (group->state == INFORM_IDLE) {
+ group->state = INFORM_BUSY;
+ spin_unlock_irq(&group->lock);
+ /* Continue to hold reference on group until callback */
+ queue_work(inform_wq, &group->work);
+ } else {
+ spin_unlock_irq(&group->lock);
+ release_group(group);
+ }
+
+ deref_member(member);
+ wait_for_completion(&member->comp);
+ ib_sa_client_put(member->client);
+ kfree(member);
+}
+EXPORT_SYMBOL(ib_sa_unregister_inform_info);
+
+static void inform_groups_lost(struct inform_port *port)
+{
+ struct inform_group *group;
+ struct rb_node *node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ for (node = rb_first(&port->table); node; node = rb_next(node)) {
+ group = rb_entry(node, struct inform_group, node);
+ spin_lock(&group->lock);
+ if (group->state == INFORM_IDLE) {
+ atomic_inc(&group->refcount);
+ queue_work(inform_wq, &group->work);
+ }
+ group->state = INFORM_ERROR;
+ spin_unlock(&group->lock);
+ }
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void inform_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct inform_device *dev;
+
+ dev = container_of(handler, struct inform_device, event_handler);
+
+ switch (event->event) {
+ case IB_EVENT_PORT_ERR:
+ case IB_EVENT_LID_CHANGE:
+ case IB_EVENT_SM_CHANGE:
+ case IB_EVENT_CLIENT_REREGISTER:
+ inform_groups_lost(&dev->port[event->element.port_num -
+ dev->start_port]);
+ break;
+ default:
+ break;
+ }
+}
+
+static void inform_add_one(struct ib_device *device)
+{
+ struct inform_device *dev;
+ struct inform_port *port;
+ int i;
+
+ if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
+ return;
+
+ dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
+ GFP_KERNEL);
+ if (!dev)
+ return;
+
+ if (device->node_type == RDMA_NODE_IB_SWITCH)
+ dev->start_port = dev->end_port = 0;
+ else {
+ dev->start_port = 1;
+ dev->end_port = device->phys_port_cnt;
+ }
+
+ for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+ port = &dev->port[i];
+ port->dev = dev;
+ port->port_num = dev->start_port + i;
+ spin_lock_init(&port->lock);
+ port->table = RB_ROOT;
+ init_completion(&port->comp);
+ atomic_set(&port->refcount, 1);
+ }
+
+ dev->device = device;
+ ib_set_client_data(device, &inform_client, dev);
+
+ INIT_IB_EVENT_HANDLER(&dev->event_handler, device, inform_event_handler);
+ ib_register_event_handler(&dev->event_handler);
+}
+
+static void inform_remove_one(struct ib_device *device)
+{
+ struct inform_device *dev;
+ struct inform_port *port;
+ int i;
+
+ dev = ib_get_client_data(device, &inform_client);
+ if (!dev)
+ return;
+
+ ib_unregister_event_handler(&dev->event_handler);
+ flush_workqueue(inform_wq);
+
+ for (i = 0; i <= dev->end_port - dev->start_port; i++) {
+ port = &dev->port[i];
+ deref_port(port);
+ wait_for_completion(&port->comp);
+ }
+
+ kfree(dev);
+}
+
+int notice_init(void)
+{
+ int ret;
+
+ inform_wq = create_singlethread_workqueue("ib_inform");
+ if (!inform_wq)
+ return -ENOMEM;
+
+ ib_sa_register_client(&sa_client);
+
+ ret = ib_register_client(&inform_client);
+ if (ret)
+ goto err;
+ return 0;
+
+err:
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(inform_wq);
+ return ret;
+}
+
+void notice_cleanup(void)
+{
+ ib_unregister_client(&inform_client);
+ ib_sa_unregister_client(&sa_client);
+ destroy_workqueue(inform_wq);
+}
Index: ofa_1_3_dev_kernel/drivers/infiniband/core/sa.h
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/core/sa.h 2008-02-05 08:30:21.000000000 +0200
+++ ofa_1_3_dev_kernel/drivers/infiniband/core/sa.h 2008-02-05 15:10:53.000000000 +0200
@@ -63,4 +63,20 @@ int ib_sa_mcmember_rec_query(struct ib_s
int mcast_init(void);
void mcast_cleanup(void);
+int ib_sa_informinfo_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_inform *rec,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_inform *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query);
+
+int notice_dispatch(struct ib_device *device, u8 port_num,
+ struct ib_sa_notice *notice);
+
+int notice_init(void);
+void notice_cleanup(void);
+
#endif /* SA_H */
Index: ofa_1_3_dev_kernel/drivers/infiniband/core/sa_query.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/core/sa_query.c 2008-02-05 08:30:21.000000000 +0200
+++ ofa_1_3_dev_kernel/drivers/infiniband/core/sa_query.c 2008-02-05 15:11:24.000000000 +0200
@@ -62,10 +62,12 @@ struct ib_sa_sm_ah {
struct ib_sa_port {
struct ib_mad_agent *agent;
+ struct ib_mad_agent *notice_agent;
struct ib_sa_sm_ah *sm_ah;
struct work_struct update_task;
spinlock_t ah_lock;
u8 port_num;
+ struct ib_device *device;
};
struct ib_sa_device {
@@ -102,6 +104,12 @@ struct ib_sa_mcmember_query {
struct ib_sa_query sa_query;
};
+struct ib_sa_inform_query {
+ void (*callback)(int, struct ib_sa_inform *, void *);
+ void *context;
+ struct ib_sa_query sa_query;
+};
+
static void ib_sa_add_one(struct ib_device *device);
static void ib_sa_remove_one(struct ib_device *device);
@@ -349,6 +357,110 @@ static const struct ib_field service_rec
.size_bits = 2*64 },
};
+#define INFORM_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_inform, field), \
+ .struct_size_bytes = sizeof ((struct ib_sa_inform *) 0)->field, \
+ .field_name = "sa_inform:" #field
+
+static const struct ib_field inform_table[] = {
+ { INFORM_FIELD(gid),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 128 },
+ { INFORM_FIELD(lid_range_begin),
+ .offset_words = 4,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { INFORM_FIELD(lid_range_end),
+ .offset_words = 4,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { RESERVED,
+ .offset_words = 5,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { INFORM_FIELD(is_generic),
+ .offset_words = 5,
+ .offset_bits = 16,
+ .size_bits = 8 },
+ { INFORM_FIELD(subscribe),
+ .offset_words = 5,
+ .offset_bits = 24,
+ .size_bits = 8 },
+ { INFORM_FIELD(type),
+ .offset_words = 6,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { INFORM_FIELD(trap.generic.trap_num),
+ .offset_words = 6,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { INFORM_FIELD(trap.generic.qpn),
+ .offset_words = 7,
+ .offset_bits = 0,
+ .size_bits = 24 },
+ { RESERVED,
+ .offset_words = 7,
+ .offset_bits = 24,
+ .size_bits = 3 },
+ { INFORM_FIELD(trap.generic.resp_time),
+ .offset_words = 7,
+ .offset_bits = 27,
+ .size_bits = 5 },
+ { RESERVED,
+ .offset_words = 8,
+ .offset_bits = 0,
+ .size_bits = 8 },
+ { INFORM_FIELD(trap.generic.producer_type),
+ .offset_words = 8,
+ .offset_bits = 8,
+ .size_bits = 24 },
+};
+
+#define NOTICE_FIELD(field) \
+ .struct_offset_bytes = offsetof(struct ib_sa_notice, field), \
+ .struct_size_bytes = sizeof ((struct ib_sa_notice *) 0)->field, \
+ .field_name = "sa_notice:" #field
+
+static const struct ib_field notice_table[] = {
+ { NOTICE_FIELD(is_generic),
+ .offset_words = 0,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { NOTICE_FIELD(type),
+ .offset_words = 0,
+ .offset_bits = 1,
+ .size_bits = 7 },
+ { NOTICE_FIELD(trap.generic.producer_type),
+ .offset_words = 0,
+ .offset_bits = 8,
+ .size_bits = 24 },
+ { NOTICE_FIELD(trap.generic.trap_num),
+ .offset_words = 1,
+ .offset_bits = 0,
+ .size_bits = 16 },
+ { NOTICE_FIELD(issuer_lid),
+ .offset_words = 1,
+ .offset_bits = 16,
+ .size_bits = 16 },
+ { NOTICE_FIELD(notice_toggle),
+ .offset_words = 2,
+ .offset_bits = 0,
+ .size_bits = 1 },
+ { NOTICE_FIELD(notice_count),
+ .offset_words = 2,
+ .offset_bits = 1,
+ .size_bits = 15 },
+ { NOTICE_FIELD(data_details),
+ .offset_words = 2,
+ .offset_bits = 16,
+ .size_bits = 432 },
+ { NOTICE_FIELD(issuer_gid),
+ .offset_words = 16,
+ .offset_bits = 0,
+ .size_bits = 128 },
+};
+
static void free_sm_ah(struct kref *kref)
{
struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
@@ -923,6 +1035,150 @@ err1:
return ret;
}
+static void ib_sa_inform_callback(struct ib_sa_query *sa_query,
+ int status,
+ struct ib_sa_mad *mad)
+{
+ struct ib_sa_inform_query *query =
+ container_of(sa_query, struct ib_sa_inform_query, sa_query);
+
+ if (mad) {
+ struct ib_sa_inform rec;
+
+ ib_unpack(inform_table, ARRAY_SIZE(inform_table),
+ mad->data, &rec);
+ query->callback(status, &rec, query->context);
+ } else
+ query->callback(status, NULL, query->context);
+}
+
+static void ib_sa_inform_release(struct ib_sa_query *sa_query)
+{
+ kfree(container_of(sa_query, struct ib_sa_inform_query, sa_query));
+}
+
+/**
+ * ib_sa_informinfo_query - Start an InformInfo registration.
+ * @client:SA client
+ * @device:device to send query on
+ * @port_num: port number to send query on
+ * @rec:Inform record to send in query
+ * @timeout_ms:time to wait for response
+ * @gfp_mask:GFP mask to use for internal allocations
+ * @callback:function called when notice handler registration completes,
+ * times out or is canceled
+ * @context:opaque user context passed to callback
+ * @sa_query:query context, used to cancel query
+ *
+ * This function sends inform info to register with SA to receive
+ * in-service notice.
+ * The callback function will be called when the query completes (or
+ * fails); status is 0 for a successful response, -EINTR if the query
+ * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
+ * occurred sending the query. The resp parameter of the callback is
+ * only valid if status is 0.
+ *
+ * If the return value of ib_sa_inform_query() is negative, it is an
+ * error code. Otherwise it is a query ID that can be used to cancel
+ * the query.
+ */
+int ib_sa_informinfo_query(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ struct ib_sa_inform *rec,
+ int timeout_ms, gfp_t gfp_mask,
+ void (*callback)(int status,
+ struct ib_sa_inform *resp,
+ void *context),
+ void *context,
+ struct ib_sa_query **sa_query)
+{
+ struct ib_sa_inform_query *query;
+ struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
+ struct ib_sa_port *port;
+ struct ib_mad_agent *agent;
+ struct ib_sa_mad *mad;
+ int ret;
+
+ if (!sa_dev)
+ return -ENODEV;
+
+ port = &sa_dev->port[port_num - sa_dev->start_port];
+ agent = port->agent;
+
+ query = kmalloc(sizeof *query, gfp_mask);
+ if (!query)
+ return -ENOMEM;
+
+ query->sa_query.port = port;
+ ret = alloc_mad(&query->sa_query, gfp_mask);
+ if (ret)
+ goto err1;
+
+ ib_sa_client_get(client);
+ query->sa_query.client = client;
+ query->callback = callback;
+ query->context = context;
+
+ mad = query->sa_query.mad_buf->mad;
+ init_mad(mad, agent);
+
+ query->sa_query.callback = callback ? ib_sa_inform_callback : NULL;
+ query->sa_query.release = ib_sa_inform_release;
+ query->sa_query.port = port;
+ mad->mad_hdr.method = IB_MGMT_METHOD_SET;
+ mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_INFORM_INFO);
+
+ ib_pack(inform_table, ARRAY_SIZE(inform_table), rec, mad->data);
+
+ *sa_query = &query->sa_query;
+ ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+
+err2:
+ *sa_query = NULL;
+ ib_sa_client_put(query->sa_query.client);
+ ib_free_send_mad(query->sa_query.mad_buf);
+err1:
+ kfree(query);
+ return ret;
+}
+
+static void ib_sa_notice_resp(struct ib_sa_port *port,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_mad_send_buf *mad_buf;
+ struct ib_sa_mad *mad;
+ int ret;
+
+ mad_buf = ib_create_send_mad(port->notice_agent, 1, 0, 0,
+ IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
+ GFP_KERNEL);
+ if (IS_ERR(mad_buf))
+ return;
+
+ mad = mad_buf->mad;
+ memcpy(mad, mad_recv_wc->recv_buf.mad, sizeof *mad);
+ mad->mad_hdr.method = IB_MGMT_METHOD_REPORT_RESP;
+
+ spin_lock_irq(&port->ah_lock);
+ kref_get(&port->sm_ah->ref);
+ mad_buf->context[0] = &port->sm_ah->ref;
+ mad_buf->ah = port->sm_ah->ah;
+ spin_unlock_irq(&port->ah_lock);
+
+ ret = ib_post_send_mad(mad_buf, NULL);
+ if (ret)
+ goto err;
+
+ return;
+err:
+ kref_put(mad_buf->context[0], free_sm_ah);
+ ib_free_send_mad(mad_buf);
+}
+
static void send_handler(struct ib_mad_agent *agent,
struct ib_mad_send_wc *mad_send_wc)
{
@@ -976,9 +1232,36 @@ static void recv_handler(struct ib_mad_a
ib_free_recv_mad(mad_recv_wc);
}
+static void notice_resp_handler(struct ib_mad_agent *agent,
+ struct ib_mad_send_wc *mad_send_wc)
+{
+ kref_put(mad_send_wc->send_buf->context[0], free_sm_ah);
+ ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+static void notice_handler(struct ib_mad_agent *mad_agent,
+ struct ib_mad_recv_wc *mad_recv_wc)
+{
+ struct ib_sa_port *port;
+ struct ib_sa_mad *mad;
+ struct ib_sa_notice notice;
+
+ port = mad_agent->context;
+ mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad;
+ ib_unpack(notice_table, ARRAY_SIZE(notice_table), mad->data, ¬ice);
+
+ if (!notice_dispatch(port->device, port->port_num, ¬ice))
+ ib_sa_notice_resp(port, mad_recv_wc);
+ ib_free_recv_mad(mad_recv_wc);
+}
+
static void ib_sa_add_one(struct ib_device *device)
{
struct ib_sa_device *sa_dev;
+ struct ib_mad_reg_req reg_req = {
+ .mgmt_class = IB_MGMT_CLASS_SUBN_ADM,
+ .mgmt_class_version = 2
+ };
int s, e, i;
if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
@@ -1012,6 +1295,16 @@ static void ib_sa_add_one(struct ib_devi
if (IS_ERR(sa_dev->port[i].agent))
goto err;
+ sa_dev->port[i].device = device;
+ set_bit(IB_MGMT_METHOD_REPORT, reg_req.method_mask);
+ sa_dev->port[i].notice_agent =
+ ib_register_mad_agent(device, i + s, IB_QPT_GSI,
+ ®_req, 0, notice_resp_handler,
+ notice_handler, &sa_dev->port[i]);
+
+ if (IS_ERR(sa_dev->port[i].notice_agent))
+ goto err;
+
INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
}
@@ -1034,8 +1327,14 @@ static void ib_sa_add_one(struct ib_devi
return;
err:
- while (--i >= 0)
- ib_unregister_mad_agent(sa_dev->port[i].agent);
+ while (--i >= 0) {
+ if (!IS_ERR(sa_dev->port[i].notice_agent)) {
+ ib_unregister_mad_agent(sa_dev->port[i].notice_agent);
+ }
+ if (!IS_ERR(sa_dev->port[i].agent)) {
+ ib_unregister_mad_agent(sa_dev->port[i].agent);
+ }
+ }
kfree(sa_dev);
@@ -1055,6 +1354,7 @@ static void ib_sa_remove_one(struct ib_d
flush_scheduled_work();
for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
+ ib_unregister_mad_agent(sa_dev->port[i].notice_agent);
ib_unregister_mad_agent(sa_dev->port[i].agent);
kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
}
@@ -1083,7 +1383,15 @@ static int __init ib_sa_init(void)
goto err2;
}
+ ret = notice_init();
+ if (ret) {
+ printk(KERN_ERR "Couldn't initialize notice handling\n");
+ goto err3;
+ }
+
return 0;
+err3:
+ mcast_cleanup();
err2:
ib_unregister_client(&sa_client);
err1:
@@ -1093,6 +1401,7 @@ err1:
static void __exit ib_sa_cleanup(void)
{
mcast_cleanup();
+ notice_cleanup();
ib_unregister_client(&sa_client);
idr_destroy(&query_idr);
}
Index: ofa_1_3_dev_kernel/include/rdma/ib_sa.h
===================================================================
--- ofa_1_3_dev_kernel.orig/include/rdma/ib_sa.h 2008-02-05 08:30:21.000000000 +0200
+++ ofa_1_3_dev_kernel/include/rdma/ib_sa.h 2008-02-05 10:07:01.000000000 +0200
@@ -253,6 +253,127 @@ struct ib_sa_service_rec {
u64 data64[2];
};
+enum {
+ IB_SA_EVENT_TYPE_FATAL = 0x0,
+ IB_SA_EVENT_TYPE_URGENT = 0x1,
+ IB_SA_EVENT_TYPE_SECURITY = 0x2,
+ IB_SA_EVENT_TYPE_SM = 0x3,
+ IB_SA_EVENT_TYPE_INFO = 0x4,
+ IB_SA_EVENT_TYPE_EMPTY = 0x7F,
+ IB_SA_EVENT_TYPE_ALL = 0xFFFF
+};
+
+enum {
+ IB_SA_EVENT_PRODUCER_TYPE_CA = 0x1,
+ IB_SA_EVENT_PRODUCER_TYPE_SWITCH = 0x2,
+ IB_SA_EVENT_PRODUCER_TYPE_ROUTER = 0x3,
+ IB_SA_EVENT_PRODUCER_TYPE_CLASS_MANAGER = 0x4,
+ IB_SA_EVENT_PRODUCER_TYPE_ALL = 0xFFFFFF
+};
+
+enum {
+ IB_SA_SM_TRAP_GID_IN_SERVICE = 64,
+ IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65,
+ IB_SA_SM_TRAP_CREATE_MC_GROUP = 66,
+ IB_SA_SM_TRAP_DELETE_MC_GROUP = 67,
+ IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128,
+ IB_SA_SM_TRAP_LINK_INTEGRITY = 129,
+ IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130,
+ IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131,
+ IB_SA_SM_TRAP_BAD_M_KEY = 256,
+ IB_SA_SM_TRAP_BAD_P_KEY = 257,
+ IB_SA_SM_TRAP_BAD_Q_KEY = 258,
+ IB_SA_SM_TRAP_SWITCH_BAD_P_KEY = 259,
+ IB_SA_SM_TRAP_ALL = 0xFFFF
+};
+
+struct ib_sa_inform {
+ union ib_gid gid;
+ __be16 lid_range_begin;
+ __be16 lid_range_end;
+ u8 is_generic;
+ u8 subscribe;
+ __be16 type;
+ union {
+ struct {
+ __be16 trap_num;
+ __be32 qpn;
+ u8 resp_time;
+ __be32 producer_type;
+ } generic;
+ struct {
+ __be16 device_id;
+ __be32 qpn;
+ u8 resp_time;
+ __be32 vendor_id;
+ } vendor;
+ } trap;
+};
+
+struct ib_sa_notice {
+ u8 is_generic;
+ u8 type;
+ union {
+ struct {
+ __be32 producer_type;
+ __be16 trap_num;
+ } generic;
+ struct {
+ __be32 vendor_id;
+ __be16 device_id;
+ } vendor;
+ } trap;
+ __be16 issuer_lid;
+ __be16 notice_count;
+ u8 notice_toggle;
+ /*
+ * Align data 16 bits off 64 bit field to match InformInfo definition.
+ * Data contained within this field will then align properly.
+ * See IB spec 1.2, sections 13.4.8.2 and 14.2.5.1.
+ */
+ u8 reserved[5];
+ u8 data_details[54];
+ union ib_gid issuer_gid;
+};
+
+/*
+ * SM notice data details for:
+ *
+ * IB_SA_SM_TRAP_GID_IN_SERVICE = 64
+ * IB_SA_SM_TRAP_GID_OUT_OF_SERVICE = 65
+ * IB_SA_SM_TRAP_CREATE_MC_GROUP = 66
+ * IB_SA_SM_TRAP_DELETE_MC_GROUP = 67
+ */
+struct ib_sa_notice_data_gid {
+ u8 reserved[6];
+ u8 gid[16];
+ u8 padding[32];
+};
+
+/*
+ * SM notice data details for:
+ *
+ * IB_SA_SM_TRAP_PORT_CHANGE_STATE = 128
+ */
+struct ib_sa_notice_data_port_change {
+ __be16 lid;
+ u8 padding[52];
+};
+
+/*
+ * SM notice data details for:
+ *
+ * IB_SA_SM_TRAP_LINK_INTEGRITY = 129
+ * IB_SA_SM_TRAP_EXCESSIVE_BUFFER_OVERRUN = 130
+ * IB_SA_SM_TRAP_FLOW_CONTROL_UPDATE_EXPIRED = 131
+ */
+struct ib_sa_notice_data_port_error {
+ u8 reserved[2];
+ __be16 lid;
+ u8 port_num;
+ u8 padding[49];
+};
+
struct ib_sa_client {
atomic_t users;
struct completion comp;
@@ -381,4 +502,54 @@ int ib_init_ah_from_path(struct ib_devic
struct ib_sa_path_rec *rec,
struct ib_ah_attr *ah_attr);
+struct ib_inform_info {
+ void *context;
+ int (*callback)(int status,
+ struct ib_inform_info *info,
+ struct ib_sa_notice *notice);
+ u16 trap_number;
+};
+
+/**
+ * ib_sa_register_inform_info - Registers to receive notice events.
+ * @device: Device associated with the registration.
+ * @port_num: Port on the specified device to associate with the registration.
+ * @trap_number: InformInfo trap number to register for.
+ * @gfp_mask: GFP mask for memory allocations.
+ * @callback: User callback invoked once the registration completes and to
+ * report noticed events.
+ * @context: User specified context stored with the ib_inform_reg structure.
+ *
+ * This call initiates a registration request with the SA for the specified
+ * trap number. If the operation is started successfully, it returns
+ * an ib_inform_info structure that is used to track the registration operation.
+ * Users must free this structure by calling ib_unregister_inform_info,
+ * even if the operation later fails. (The callback status is non-zero.)
+ *
+ * If the registration fails; status will be non-zero. If the registration
+ * succeeds, the callback status will be zero, but the notice parameter will
+ * be NULL. If the notice parameter is not NULL, a trap or notice is being
+ * reported to the user.
+ *
+ * A status of -ENETRESET indicates that an error occurred which requires
+ * reregisteration.
+ */
+struct ib_inform_info *
+ib_sa_register_inform_info(struct ib_sa_client *client,
+ struct ib_device *device, u8 port_num,
+ u16 trap_number, gfp_t gfp_mask,
+ int (*callback)(int status,
+ struct ib_inform_info *info,
+ struct ib_sa_notice *notice),
+ void *context);
+
+/**
+ * ib_sa_unregister_inform_info - Releases an InformInfo registration.
+ * @info: InformInfo registration tracking structure.
+ *
+ * This call blocks until the registration request is destroyed. It may
+ * not be called from within the registration callback.
+ */
+void ib_sa_unregister_inform_info(struct ib_inform_info *info);
+
#endif /* IB_SA_H */
More information about the general
mailing list