[openib-general] [PATCH 1/2] multicast: notify users on membership errors
Sean Hefty
sean.hefty at intel.com
Thu Jun 8 21:38:07 PDT 2006
Modify ib_multicast module to detect events that require clients to rejoin
multicast groups. Add tracking of clients which are members of any groups,
and provide notification to those clients when such an event occurs.
This patch tracks all active members of a group. When an event occurs that
requires clients to rejoin a multicast group, the active members are moved
into an error state, and the clients are notified of a network reset error.
The group is then reset to force additional join requests to generate requests
to the SA.
Signed-off-by: Sean Hefty <sean.hefty at intel.com>
---
Hal, can you apply these patches and see if it fixes the issues that you
are experiencing. These should eliminate any races with ipoib leaving,
then quickly re-joining a group as a result of an event.
Index: multicast.c
===================================================================
--- multicast.c (revision 7805)
+++ multicast.c (working copy)
@@ -61,6 +61,7 @@ static struct ib_client mcast_client = {
.remove = mcast_remove_one
};
+static struct ib_event_handler event_handler;
static struct workqueue_struct *mcast_wq;
struct mcast_device;
@@ -86,6 +87,7 @@ enum mcast_state {
MCAST_JOINING,
MCAST_MEMBER,
MCAST_BUSY,
+ MCAST_ERROR
};
struct mcast_member;
@@ -97,6 +99,7 @@ struct mcast_group {
spinlock_t lock;
struct work_struct work;
struct list_head pending_list;
+ struct list_head active_list;
struct mcast_member *last_join;
int members[3];
atomic_t refcount;
@@ -338,6 +341,8 @@ static void join_group(struct mcast_grou
group->rec.join_state |= join_state;
member->multicast.rec = group->rec;
member->multicast.rec.join_state = join_state;
+ list_del(&member->list);
+ list_add(&member->list, &group->active_list);
}
static int fail_join(struct mcast_group *group, struct mcast_member *member,
@@ -349,6 +354,34 @@ static int fail_join(struct mcast_group
return member->multicast.callback(status, &member->multicast);
}
+static void process_group_error(struct mcast_group *group)
+{
+ struct mcast_member *member;
+ int ret;
+
+ spin_lock_irq(&group->lock);
+ while (!list_empty(&group->active_list)) {
+ member = list_entry(group->active_list.next,
+ struct mcast_member, list);
+ atomic_inc(&member->refcount);
+ list_del_init(&member->list);
+ adjust_membership(group, member->multicast.rec.join_state, -1);
+ member->state = MCAST_ERROR;
+ spin_unlock_irq(&group->lock);
+
+ ret = member->multicast.callback(-ENETRESET,
+ &member->multicast);
+ deref_member(member);
+ if (ret)
+ ib_free_multicast(&member->multicast);
+ spin_lock_irq(&group->lock);
+ }
+
+ group->rec.join_state = 0;
+ group->state = MCAST_BUSY;
+ spin_unlock_irq(&group->lock);
+}
+
static void mcast_work_handler(void *data)
{
struct mcast_group *group = data;
@@ -359,6 +392,12 @@ static void mcast_work_handler(void *dat
retest:
spin_lock_irq(&group->lock);
+ if (group->state == MCAST_ERROR) {
+ spin_unlock_irq(&group->lock);
+ process_group_error(group);
+ goto retest;
+ }
+
while (!list_empty(&group->pending_list)) {
member = list_entry(group->pending_list.next,
struct mcast_member, list);
@@ -371,8 +410,8 @@ retest:
multicast->comp_mask);
if (!status)
join_group(group, member, join_state);
-
- list_del_init(&member->list);
+ else
+ list_del_init(&member->list);
spin_unlock_irq(&group->lock);
ret = multicast->callback(status, multicast);
} else {
@@ -467,6 +506,7 @@ static struct mcast_group *acquire_group
group->port = port;
group->rec.mgid = *mgid;
INIT_LIST_HEAD(&group->pending_list);
+ INIT_LIST_HEAD(&group->active_list);
INIT_WORK(&group->work, mcast_work_handler, group);
spin_lock_init(&group->lock);
@@ -551,16 +591,10 @@ void ib_free_multicast(struct ib_multica
group = member->group;
spin_lock_irq(&group->lock);
- switch (member->state) {
- case MCAST_MEMBER:
+ if (member->state == MCAST_MEMBER)
adjust_membership(group, multicast->rec.join_state, -1);
- break;
- case MCAST_JOINING:
- list_del_init(&member->list);
- break;
- default:
- break;
- }
+
+ list_del_init(&member->list);
if (group->state == MCAST_IDLE) {
group->state = MCAST_BUSY;
@@ -578,6 +612,48 @@ void ib_free_multicast(struct ib_multica
}
EXPORT_SYMBOL(ib_free_multicast);
+static void mcast_groups_lost(struct mcast_port *port)
+{
+ struct mcast_group *group;
+ struct rb_node *node;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ for (node = rb_first(&port->table); node; node = rb_next(node)) {
+ group = rb_entry(node, struct mcast_group, node);
+ spin_lock(&group->lock);
+ if (group->state == MCAST_IDLE) {
+ atomic_inc(&group->refcount);
+ queue_work(mcast_wq, &group->work);
+ }
+ group->state = MCAST_ERROR;
+ spin_unlock(&group->lock);
+ }
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void mcast_event_handler(struct ib_event_handler *handler,
+ struct ib_event *event)
+{
+ struct mcast_device *dev;
+
+ dev = ib_get_client_data(event->device, &mcast_client);
+ if (!dev)
+ return;
+
+ switch (event->event) {
+ case IB_EVENT_PORT_ERR:
+ case IB_EVENT_LID_CHANGE:
+ case IB_EVENT_SM_CHANGE:
+ case IB_EVENT_CLIENT_REREGISTER:
+ mcast_groups_lost(&dev->port[event->element.port_num -
+ dev->start_port]);
+ break;
+ default:
+ break;
+ }
+}
+
static void mcast_add_one(struct ib_device *device)
{
struct mcast_device *dev;
@@ -611,6 +687,9 @@ static void mcast_add_one(struct ib_devi
dev->device = device;
ib_set_client_data(device, &mcast_client, dev);
+
+ INIT_IB_EVENT_HANDLER(&event_handler, device, mcast_event_handler);
+ ib_register_event_handler(&event_handler);
}
static void mcast_remove_one(struct ib_device *device)
@@ -623,6 +702,7 @@ static void mcast_remove_one(struct ib_d
if (!dev)
return;
+ ib_unregister_event_handler(&event_handler);
flush_workqueue(mcast_wq);
for (i = 0; i < dev->end_port - dev->start_port; i++) {
More information about the general
mailing list