[ofa-general] [PATCH] opensm: multicast group create/delete notification fix

Sasha Khapyorsky sashak at voltaire.com
Wed Sep 17 11:32:58 PDT 2008


Now OpenSM is not sending multicast group delete notification when last
full member leaves the group. But according to IBA it should:

o15-0.2-1.9: If SA supports UD multicast, then if SA receives a valid MC
deletion MAD that removes the last full member, SA shall delete all
records belonging to that multicast group. If this port was the last full
member, SA may release all resources associated with it, including resources
that may exist in the fabric itself. The exact time at which this operation
is done is vendor-specific. When the last full member leaves the
group, SA shall forward Trap 67 (see 14.4.10 Multicast Group Create/Delete
Traps on page 891) to subscribing endports.

This patch fixes this bug: Now OpenSM will track a number of full members
for each multicast group and will send MCG creation notification when
first full member joins the group (so it will work for "well known"
groups as well) and MCG deletion notification when last full member
leaves. Routing will not be calculated for groups w/out full members
(including "well known") and such group will be deleted at end of
routing cycle (except "well known" as it is now).

As side effect of the patch osm_mgrp_port_remove() is not called from
osm_sm_mcgrp_leave() anymore and we can cleanup locking there against SA
MCR processor.

The patch potentially could be dangerous, it changes the current
behavior, so any kind of testing will be appreciated.

Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
---
 opensm/include/opensm/osm_multicast.h  |    9 ++-
 opensm/opensm/osm_drop_mgr.c           |    2 +-
 opensm/opensm/osm_mcast_mgr.c          |   18 ++----
 opensm/opensm/osm_multicast.c          |   95 ++++++++++++++++++++++---------
 opensm/opensm/osm_sa.c                 |    3 +-
 opensm/opensm/osm_sa_mcmember_record.c |   51 ++++-------------
 opensm/opensm/osm_sm.c                 |    6 +-
 7 files changed, 98 insertions(+), 86 deletions(-)

diff --git a/opensm/include/opensm/osm_multicast.h b/opensm/include/opensm/osm_multicast.h
index c860d4a..a0eab16 100644
--- a/opensm/include/opensm/osm_multicast.h
+++ b/opensm/include/opensm/osm_multicast.h
@@ -139,6 +139,7 @@ typedef struct osm_mgrp {
 	boolean_t to_be_deleted;
 	uint32_t last_change_id;
 	uint32_t last_tree_id;
+	unsigned full_members;
 } osm_mgrp_t;
 /*
 * FIELDS
@@ -364,7 +365,8 @@ static inline ib_net16_t osm_mgrp_get_mlid(IN const osm_mgrp_t * const p_mgrp)
 *
 * SYNOPSIS
 */
-osm_mcm_port_t *osm_mgrp_add_port(IN osm_mgrp_t * const p_mgrp,
+osm_mcm_port_t *osm_mgrp_add_port(osm_subn_t *subn, osm_log_t *log,
+				  IN osm_mgrp_t * const p_mgrp,
 				  IN const ib_gid_t * const p_port_gid,
 				  IN const uint8_t join_state,
 				  IN boolean_t proxy_join);
@@ -433,7 +435,7 @@ osm_mgrp_is_port_present(IN const osm_mgrp_t * const p_mgrp,
 * SYNOPSIS
 */
 void
-osm_mgrp_remove_port(IN osm_subn_t * const p_subn,
+osm_mgrp_delete_port(IN osm_subn_t * const p_subn,
 		     IN osm_log_t * const p_log,
 		     IN osm_mgrp_t * const p_mgrp,
 		     IN const ib_net64_t port_guid);
@@ -460,6 +462,9 @@ osm_mgrp_remove_port(IN osm_subn_t * const p_subn,
 * SEE ALSO
 *********/
 
+int osm_mgrp_remove_port(osm_subn_t *subn, osm_log_t *log, osm_mgrp_t *mgrp,
+			 osm_mcm_port_t *mcm, uint8_t join_state);
+
 /****f* OpenSM: Multicast Group/osm_mgrp_apply_func
 * NAME
 *	osm_mgrp_apply_func
diff --git a/opensm/opensm/osm_drop_mgr.c b/opensm/opensm/osm_drop_mgr.c
index 1aeb172..e827c26 100644
--- a/opensm/opensm/osm_drop_mgr.c
+++ b/opensm/opensm/osm_drop_mgr.c
@@ -209,7 +209,7 @@ static void __osm_drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port)
 	while (p_mcm != (osm_mcm_info_t *) cl_qlist_end(&p_port->mcm_list)) {
 		p_mgrp = osm_get_mgrp_by_mlid(sm->p_subn, p_mcm->mlid);
 		if (p_mgrp) {
-			osm_mgrp_remove_port(sm->p_subn, sm->p_log,
+			osm_mgrp_delete_port(sm->p_subn, sm->p_log,
 					     p_mgrp, p_port->guid);
 			osm_mcm_info_delete((osm_mcm_info_t *) p_mcm);
 		}
diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c
index 78eed50..fc8533d 100644
--- a/opensm/opensm/osm_mcast_mgr.c
+++ b/opensm/opensm/osm_mcast_mgr.c
@@ -1073,6 +1073,9 @@ osm_mcast_mgr_process_tree(osm_sm_t * sm,
 	 */
 	__osm_mcast_mgr_clear(sm, p_mgrp);
 
+	if (!p_mgrp->full_members)
+		goto Exit;
+
 	status = __osm_mcast_mgr_build_spanning_tree(sm, p_mgrp);
 	if (status != IB_SUCCESS) {
 		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A17: "
@@ -1109,20 +1112,11 @@ mcast_mgr_process_mgrp(osm_sm_t * sm,
 	}
 	p_mgrp->last_tree_id = p_mgrp->last_change_id;
 
-	/* Remove MGRP only if osm_mcm_port_t count is 0 and
-	 * Not a well known group
-	 */
-	if (cl_qmap_count(&p_mgrp->mcm_port_tbl) == 0 && !p_mgrp->well_known) {
+	/* remove MCGRP if it is marked for deletion */
+	if (p_mgrp->to_be_deleted) {
 		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
-			"Destroying mgrp with lid:0x%X\n",
+			"Destroying mgrp with lid:0x%x\n",
 			cl_ntoh16(p_mgrp->mlid));
-		if (p_mgrp->to_be_deleted == FALSE) {
-			p_mgrp->to_be_deleted = TRUE;
-			/* Send a Report to any InformInfo registered for
-			   Trap 67 : MCGroup delete */
-			osm_mgrp_send_delete_notice(sm->p_subn, sm->p_log,
-						    p_mgrp);
-		}
 		sm->p_subn->mgroups[cl_ntoh16(p_mgrp->mlid) - IB_LID_MCAST_START_HO] = NULL;
 		osm_mgrp_delete(p_mgrp);
 	}
diff --git a/opensm/opensm/osm_multicast.c b/opensm/opensm/osm_multicast.c
index b810630..83c0399 100644
--- a/opensm/opensm/osm_multicast.c
+++ b/opensm/opensm/osm_multicast.c
@@ -95,7 +95,8 @@ osm_mgrp_t *osm_mgrp_new(IN const ib_net16_t mlid)
 
 /**********************************************************************
  **********************************************************************/
-osm_mcm_port_t *osm_mgrp_add_port(IN osm_mgrp_t * const p_mgrp,
+osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t *subn, osm_log_t *log,
+				  IN osm_mgrp_t * const p_mgrp,
 				  IN const ib_gid_t * const p_port_gid,
 				  IN const uint8_t join_state,
 				  IN boolean_t proxy_join)
@@ -103,7 +104,7 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_mgrp_t * const p_mgrp,
 	ib_net64_t port_guid;
 	osm_mcm_port_t *p_mcm_port;
 	cl_map_item_t *prev_item;
-	uint8_t prev_join_state;
+	uint8_t prev_join_state = 0;
 	uint8_t prev_scope;
 
 	p_mcm_port = osm_mcm_port_new(p_port_gid, join_state, proxy_join);
@@ -142,43 +143,81 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_mgrp_t * const p_mgrp,
 		p_mgrp->last_change_id++;
 	}
 
+	if ((join_state ^ prev_join_state) & IB_JOIN_STATE_FULL) {
+		if (join_state & IB_JOIN_STATE_FULL) {
+			if (++p_mgrp->full_members == 1) {
+				osm_mgrp_send_create_notice(subn, log, p_mgrp);
+				p_mgrp->to_be_deleted = 0;
+			}
+		} else if (--p_mgrp->full_members == 0) {
+			osm_mgrp_send_delete_notice(subn, log, p_mgrp);
+			if (!p_mgrp->well_known)
+				p_mgrp->to_be_deleted = 1;
+		}
+	}
+
 	return (p_mcm_port);
 }
 
 /**********************************************************************
  **********************************************************************/
-void
-osm_mgrp_remove_port(IN osm_subn_t * const p_subn,
-		     IN osm_log_t * const p_log,
-		     IN osm_mgrp_t * const p_mgrp,
-		     IN const ib_net64_t port_guid)
+int osm_mgrp_remove_port(osm_subn_t *subn, osm_log_t *log, osm_mgrp_t *mgrp,
+			 osm_mcm_port_t *mcm, uint8_t join_state)
 {
-	cl_map_item_t *p_map_item;
-
-	CL_ASSERT(p_mgrp);
-
-	p_map_item = cl_qmap_get(&p_mgrp->mcm_port_tbl, port_guid);
-
-	if (p_map_item != cl_qmap_end(&p_mgrp->mcm_port_tbl)) {
-		cl_qmap_remove_item(&p_mgrp->mcm_port_tbl, p_map_item);
-		osm_mcm_port_delete((osm_mcm_port_t *) p_map_item);
-
-		/* track the fact we modified the group */
-		p_mgrp->last_change_id++;
-	}
+	int ret;
+	uint8_t port_join_state;
+	uint8_t new_join_state;
 
 	/*
-	   no more ports so the group will be deleted after re-route
-	   but only if it is not a well known group and not already deleted
+	 * according to the same o15-0.1.14 we get the stored
+	 * JoinState and the request JoinState and they must be
+	 * opposite to leave - otherwise just update it
 	 */
-	if ((cl_is_qmap_empty(&p_mgrp->mcm_port_tbl)) &&
-	    (p_mgrp->well_known == FALSE) && (p_mgrp->to_be_deleted == FALSE)) {
-		p_mgrp->to_be_deleted = TRUE;
+	port_join_state = mcm->scope_state & 0x0F;
+	new_join_state = port_join_state & ~join_state;
+
+	if (new_join_state) {
+		mcm->scope_state = new_join_state | (mcm->scope_state & 0xf0);
+		OSM_LOG(log, OSM_LOG_DEBUG,
+			"updating port 0x%" PRIx64 " JoinState 0x%x -> 0x%x\n",
+			cl_ntoh64(mcm->port_gid.unicast.interface_id),
+			port_join_state, new_join_state);
+		ret = 0;
+	} else {
+		cl_qmap_remove_item(&mgrp->mcm_port_tbl, &mcm->map_item);
+		OSM_LOG(log, OSM_LOG_DEBUG, "removing port 0x%" PRIx64 "\n",
+			cl_ntoh64(mcm->port_gid.unicast.interface_id));
+		osm_mcm_port_delete(mcm);
+		/* track the fact we modified the group */
+		mgrp->last_change_id++;
+		ret = 1;
+	}
 
-		/* Send a Report to any InformInfo registered for
-		   Trap 67 : MCGroup delete */
-		osm_mgrp_send_delete_notice(p_subn, p_log, p_mgrp);
+	/* no more full members so the group will be deleted after re-route
+	   but only if it is not a well known group */
+	if ((port_join_state ^ new_join_state) & IB_JOIN_STATE_FULL) {
+		if (port_join_state & IB_JOIN_STATE_FULL) {
+			if (--mgrp->full_members == 0) {
+				osm_mgrp_send_delete_notice(subn, log, mgrp);
+				if (!mgrp->well_known)
+					mgrp->to_be_deleted = 1;
+			}
+		} else if (++mgrp->full_members == 1) {
+			osm_mgrp_send_create_notice(subn, log, mgrp);
+			mgrp->to_be_deleted = 0;
+		}
 	}
+
+	return ret;
+}
+
+void osm_mgrp_delete_port(osm_subn_t *subn, osm_log_t *log, osm_mgrp_t *mgrp,
+			  ib_net64_t port_guid)
+{
+	cl_map_item_t *item = cl_qmap_get(&mgrp->mcm_port_tbl, port_guid);
+
+	if (item != cl_qmap_end(&mgrp->mcm_port_tbl))
+		osm_mgrp_remove_port(subn, log, mgrp, (osm_mcm_port_t *)item, 0xf);
 }
 
 /**********************************************************************
diff --git a/opensm/opensm/osm_sa.c b/opensm/opensm/osm_sa.c
index 37750eb..670deae 100644
--- a/opensm/opensm/osm_sa.c
+++ b/opensm/opensm/osm_sa.c
@@ -1007,7 +1007,8 @@ int osm_sa_db_file_load(osm_opensm_t * p_osm)
 			if (cl_qmap_get(&p_mgrp->mcm_port_tbl,
 					port_gid.unicast.interface_id) ==
 			    cl_qmap_end(&p_mgrp->mcm_port_tbl))
-				osm_mgrp_add_port(p_mgrp, &port_gid,
+				osm_mgrp_add_port(&p_osm->subn, &p_osm->log,
+						  p_mgrp, &port_gid,
 						  scope_state, proxy_join);
 		} else if (!strncmp(p, "Service Record:", 15)) {
 			ib_service_record_t s_rec;
diff --git a/opensm/opensm/osm_sa_mcmember_record.c b/opensm/opensm/osm_sa_mcmember_record.c
index c4f9d48..3b4b435 100644
--- a/opensm/opensm/osm_sa_mcmember_record.c
+++ b/opensm/opensm/osm_sa_mcmember_record.c
@@ -206,7 +206,7 @@ __add_new_mgrp_port(IN osm_sa_t * sa,
 			"Create new port with proxy_join TRUE\n");
 	}
 
-	*pp_mcmr_port = osm_mgrp_add_port(p_mgrp,
+	*pp_mcmr_port = osm_mgrp_add_port(sa->p_subn, sa->p_log, p_mgrp,
 					  &p_recvd_mcmember_rec->port_gid,
 					  p_recvd_mcmember_rec->scope_state,
 					  proxy_join);
@@ -951,10 +951,6 @@ osm_mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa,
 
 	sa->p_subn->mgroups[cl_ntoh16(mlid) - IB_LID_MCAST_START_HO] = *pp_mgrp;
 
-	/* Send a Report to any InformInfo registered for
-	   Trap 66: MCGroup create */
-	osm_mgrp_send_create_notice(sa->p_subn, sa->p_log, *pp_mgrp);
-
 Exit:
 	OSM_LOG_EXIT(sa->p_log);
 	return status;
@@ -1055,8 +1051,7 @@ __osm_mcmr_rcv_leave_mgrp(IN osm_sa_t * sa,
 	ib_net16_t mlid;
 	ib_net64_t portguid;
 	osm_mcm_port_t *p_mcm_port;
-	uint8_t port_join_state;
-	uint8_t new_join_state;
+	int removed;
 
 	OSM_LOG_ENTER(sa->p_log);
 
@@ -1104,37 +1099,19 @@ __osm_mcmr_rcv_leave_mgrp(IN osm_sa_t * sa,
 		goto Exit;
 	}
 
-	/*
-	 * according to the same o15-0.1.14 we get the stored
-	 * JoinState and the request JoinState and they must be
-	 * opposite to leave - otherwise just update it
-	 */
-	port_join_state = p_mcm_port->scope_state & 0x0F;
-	new_join_state =
-	    port_join_state & ~(p_recvd_mcmember_rec->scope_state & 0x0F);
-	if (new_join_state) {
-		/* Just update the result JoinState */
-		p_mcm_port->scope_state =
-		    new_join_state | (p_mcm_port->scope_state & 0xf0);
-
+	mcmember_rec.scope_state = p_mcm_port->scope_state;
+	/* remove port or update join state */
+	removed = osm_mgrp_remove_port(sa->p_subn, sa->p_log, p_mgrp, p_mcm_port,
+				       p_recvd_mcmember_rec->scope_state&0x0F);
+	if (removed)
 		mcmember_rec.scope_state = p_mcm_port->scope_state;
 
-		CL_PLOCK_RELEASE(sa->p_lock);
-
-		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
-			"After update JoinState != 0. "
-			"Updating from 0x%X to 0x%X\n",
-			port_join_state, new_join_state);
-	} else {
-		/* we need to return the stored scope state */
-		mcmember_rec.scope_state = p_mcm_port->scope_state;
+	CL_PLOCK_RELEASE(sa->p_lock);
 
-		/* OK we can leave */
-		/* note: osm_sm_mcgrp_leave() will release sa->p_lock */
-		if (osm_sm_mcgrp_leave(sa->sm, mlid, portguid))
-			OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B09: "
-				"osm_sm_mcgrp_leave failed\n");
-	}
+	/* we can leave if port was deleted from MCG */
+	if (removed && osm_sm_mcgrp_leave(sa->sm, mlid, portguid))
+		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B09: "
+			"osm_sm_mcgrp_leave failed\n");
 
 	/* Send an SA response */
 	__osm_mcmr_rcv_respond(sa, p_madw, &mcmember_rec);
@@ -1382,9 +1359,7 @@ __osm_mcmr_rcv_join_mgrp(IN osm_sa_t * sa, IN osm_madw_t * const p_madw)
 		/* the request for routing failed so we need to remove the port */
 		p_mgrp = osm_get_mgrp_by_mlid(sa->p_subn, mlid);
 		if (p_mgrp != NULL) {
-			osm_mgrp_remove_port(sa->p_subn,
-					     sa->p_log,
-					     p_mgrp,
+			osm_mgrp_delete_port(sa->p_subn, sa->p_log, p_mgrp,
 					     p_recvd_mcmember_rec->port_gid.
 					     unicast.interface_id);
 			__cleanup_mgrp(sa, mlid);
diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c
index 292eb05..9b7810c 100644
--- a/opensm/opensm/osm_sm.c
+++ b/opensm/opensm/osm_sm.c
@@ -622,8 +622,8 @@ osm_sm_mcgrp_leave(IN osm_sm_t * const p_sm,
 	/*
 	 * Acquire the port object for the port leaving this group.
 	 */
-	/* note: p_sm->p_lock is locked by caller, but will be released later
-	   this function */
+	CL_PLOCK_EXCL_ACQUIRE(p_sm->p_lock);
+
 	p_port = osm_get_port_by_guid(p_sm->p_subn, port_guid);
 	if (!p_port) {
 		CL_PLOCK_RELEASE(p_sm->p_lock);
@@ -649,8 +649,6 @@ osm_sm_mcgrp_leave(IN osm_sm_t * const p_sm,
 	/*
 	 * Walk the list of ports in the group, and remove the appropriate one.
 	 */
-	osm_mgrp_remove_port(p_sm->p_subn, p_sm->p_log, p_mgrp, port_guid);
-
 	osm_port_remove_mgrp(p_port, mlid);
 
 	__osm_sm_mgrp_disconnect(p_sm, p_mgrp, port_guid);
-- 
1.5.4.rc2.60.gb2e62




More information about the general mailing list