[ofa-general] [PATCH] opensm/multicast: consolidate port addition/removing code

Sasha Khapyorsky sashak at voltaire.com
Fri Sep 18 07:10:39 PDT 2009


Consolidate port addition/removing to multicast groups code and mlid
rerouting requests so that SA MCMember join and leave requests processor
will use just a single call (osm_mgrp_add_port() and
osm_mgrp_remove_port() respectively) to update multicast group related
data and to request (schedule) multicast routing (MFTs) recalculation.

This also fixes a bug already reported (by me) on the list when due to
race between different SA requests processors during MCMember leave
storming multicast group cleanup code can double free an objects and
crash.

Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
---
 opensm/include/opensm/osm_multicast.h  |    4 +-
 opensm/include/opensm/osm_sm.h         |   55 ++--------------
 opensm/opensm/osm_multicast.c          |   51 +++++++++++----
 opensm/opensm/osm_sa_mcmember_record.c |  108 ++++---------------------------
 opensm/opensm/osm_sm.c                 |  102 +-----------------------------
 5 files changed, 64 insertions(+), 256 deletions(-)

diff --git a/opensm/include/opensm/osm_multicast.h b/opensm/include/opensm/osm_multicast.h
index 825be8e..32bcb78 100644
--- a/opensm/include/opensm/osm_multicast.h
+++ b/opensm/include/opensm/osm_multicast.h
@@ -378,8 +378,8 @@ void osm_mgrp_delete_port(IN osm_subn_t * subn, IN osm_log_t * log,
 * SEE ALSO
 *********/
 
-int osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp,
-			 osm_mcm_port_t * mcm_port, ib_member_rec_t * mcmr);
+void osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp,
+			  osm_mcm_port_t * mcm_port, ib_member_rec_t * mcmr);
 void osm_mgrp_cleanup(osm_subn_t * subn, osm_mgrp_t * mpgr);
 
 END_C_DECLS
diff --git a/opensm/include/opensm/osm_sm.h b/opensm/include/opensm/osm_sm.h
index 986143a..de3f098 100644
--- a/opensm/include/opensm/osm_sm.h
+++ b/opensm/include/opensm/osm_sm.h
@@ -525,65 +525,24 @@ osm_resp_send(IN osm_sm_t * sm,
 *
 *********/
 
-/****f* OpenSM: SM/osm_sm_mcgrp_join
+/****f* OpenSM: SM/osm_sm_reroute_mlid
 * NAME
-*	osm_sm_mcgrp_join
+*	osm_sm_reroute_mlid
 *
 * DESCRIPTION
-*	Adds a port to the multicast group.  Creates the multicast group
-*	if necessary.
-*
-*	This function is called by the SA.
+*	Requests (schedules) MLID rerouting
 *
 * SYNOPSIS
 */
-ib_api_status_t
-osm_sm_mcgrp_join(IN osm_sm_t * const p_sm,
-		  IN osm_mgrp_t *mgrp,
-		  IN const ib_net64_t port_guid);
-/*
-* PARAMETERS
-*	p_sm
-*		[in] Pointer to an osm_sm_t object.
-*
-*	mgrp
-*		[in] Pointer to multicast group to join
-*
-*	port_guid
-*		[in] Port GUID to add to the group.
-*
-* RETURN VALUES
-*	None
-*
-* NOTES
-*
-* SEE ALSO
-*********/
+void osm_sm_reroute_mlid(osm_sm_t * sm, ib_net16_t mlid);
 
-/****f* OpenSM: SM/osm_sm_mcgrp_leave
-* NAME
-*	osm_sm_mcgrp_leave
-*
-* DESCRIPTION
-*	Removes a port from the multicast group.
-*
-*	This function is called by the SA.
-*
-* SYNOPSIS
-*/
-ib_api_status_t
-osm_sm_mcgrp_leave(IN osm_sm_t * const p_sm,
-		   IN osm_mgrp_t *mgrp, IN const ib_net64_t port_guid);
 /*
 * PARAMETERS
-*	p_sm
+*	sm
 *		[in] Pointer to an osm_sm_t object.
 *
-*	mgrp
-*		[in] Poniter to multicast group to leave
-*
-*	port_guid
-*		[in] Port GUID to remove from the group.
+*	mlid
+*		[in] MLID value
 *
 * RETURN VALUES
 *	None
diff --git a/opensm/opensm/osm_multicast.c b/opensm/opensm/osm_multicast.c
index f548990..b03af48 100644
--- a/opensm/opensm/osm_multicast.c
+++ b/opensm/opensm/osm_multicast.c
@@ -44,10 +44,13 @@
 
 #include <stdlib.h>
 #include <string.h>
+#include <arpa/inet.h>
 #include <opensm/osm_multicast.h>
 #include <opensm/osm_mcm_port.h>
 #include <opensm/osm_mtree.h>
 #include <opensm/osm_inform.h>
+#include <opensm/osm_opensm.h>
+#include <opensm/osm_mcm_info.h>
 
 /**********************************************************************
  **********************************************************************/
@@ -139,6 +142,15 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t * subn, osm_log_t * log,
 	uint8_t prev_join_state = 0, join_state = mcmr->scope_state;
 	uint8_t prev_scope;
 
+	if (osm_log_is_active(log, OSM_LOG_VERBOSE)) {
+		char gid_str[INET6_ADDRSTRLEN];
+		OSM_LOG(log, OSM_LOG_VERBOSE, "Port 0x%016" PRIx64 " joining "
+			"MC group %s (mlid 0x%x)\n", cl_ntoh64(port->guid),
+			inet_ntop(AF_INET6, mgrp->mcmember_rec.mgid.raw,
+				  gid_str, sizeof(gid_str)),
+			cl_ntoh16(mgrp->mlid));
+	}
+
 	mcm_port = osm_mcm_port_new(port, mcmr, proxy);
 	if (!mcm_port)
 		return NULL;
@@ -158,18 +170,22 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t * subn, osm_log_t * log,
 		osm_mcm_port_delete(mcm_port);
 		mcm_port = (osm_mcm_port_t *) prev_item;
 
-		/*
-		   o15.0.1.11
-		   Join state of the end port should be the or of the
-		   previous setting with the current one
-		 */
 		ib_member_get_scope_state(mcm_port->scope_state, &prev_scope,
 					  &prev_join_state);
 		mcm_port->scope_state =
 		    ib_member_set_scope_state(prev_scope,
 					      prev_join_state | join_state);
+	} else {
+		if (osm_port_add_mgrp(port, mgrp)) {
+			osm_mcm_port_delete(mcm_port);
+			return NULL;
+		}
+		osm_sm_reroute_mlid(&subn->p_osm->sm, mgrp->mlid);
 	}
 
+	/* o15.0.1.11: copy the join state */
+	mcmr->scope_state = mcm_port->scope_state;
+
 	if ((join_state & IB_JOIN_STATE_FULL) &&
 	    !(prev_join_state & IB_JOIN_STATE_FULL) &&
 	    ++mgrp->full_members == 1)
@@ -180,10 +196,9 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t * subn, osm_log_t * log,
 
 /**********************************************************************
  **********************************************************************/
-int osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp,
-			 osm_mcm_port_t * mcm_port, ib_member_rec_t *mcmr)
+void osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp,
+			  osm_mcm_port_t * mcm_port, ib_member_rec_t *mcmr)
 {
-	int ret;
 	uint8_t join_state = mcmr->scope_state & 0xf;
 	uint8_t port_join_state, new_join_state;
 
@@ -195,22 +210,32 @@ int osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp,
 	port_join_state = mcm_port->scope_state & 0x0F;
 	new_join_state = port_join_state & ~join_state;
 
+	if (osm_log_is_active(log, OSM_LOG_VERBOSE)) {
+		char gid_str[INET6_ADDRSTRLEN];
+		OSM_LOG(log, OSM_LOG_VERBOSE,
+			"Port 0x%" PRIx64 " leaving MC group %s (mlid 0x%x)\n",
+			cl_ntoh64(mcm_port->port->guid),
+			inet_ntop(AF_INET6, mgrp->mcmember_rec.mgid.raw,
+				  gid_str, sizeof(gid_str)),
+			cl_ntoh16(mgrp->mlid));
+	}
+
 	if (new_join_state) {
 		mcm_port->scope_state =
 		    new_join_state | (mcm_port->scope_state & 0xf0);
 		OSM_LOG(log, OSM_LOG_DEBUG,
 			"updating port 0x%" PRIx64 " JoinState 0x%x -> 0x%x\n",
-			cl_ntoh64(mcm_port->port_gid.unicast.interface_id),
+			cl_ntoh64(mcm_port->port->guid),
 			port_join_state, new_join_state);
 		mcmr->scope_state = mcm_port->scope_state;
-		ret = 0;
 	} else {
 		mcmr->scope_state = mcm_port->scope_state;
 		cl_qmap_remove_item(&mgrp->mcm_port_tbl, &mcm_port->map_item);
 		OSM_LOG(log, OSM_LOG_DEBUG, "removing port 0x%" PRIx64 "\n",
-			cl_ntoh64(mcm_port->port_gid.unicast.interface_id));
+			cl_ntoh64(mcm_port->port->guid));
+		osm_port_remove_mgrp(mcm_port->port, mgrp);
 		osm_mcm_port_delete(mcm_port);
-		ret = 1;
+		osm_sm_reroute_mlid(&subn->p_osm->sm, mgrp->mlid);
 	}
 
 	/* no more full members so the group will be deleted after re-route
@@ -218,8 +243,6 @@ int osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp,
 	if ((port_join_state & IB_JOIN_STATE_FULL) &&
 	    !(new_join_state & IB_JOIN_STATE_FULL) && --mgrp->full_members == 0)
 		mgrp_send_notice(subn, log, mgrp, 67);
-
-	return ret;
 }
 
 void osm_mgrp_delete_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp,
diff --git a/opensm/opensm/osm_sa_mcmember_record.c b/opensm/opensm/osm_sa_mcmember_record.c
index a51c839..f291bf0 100644
--- a/opensm/opensm/osm_sa_mcmember_record.c
+++ b/opensm/opensm/osm_sa_mcmember_record.c
@@ -132,59 +132,6 @@ static ib_net16_t get_new_mlid(osm_sa_t * sa, ib_net16_t requested_mlid)
 	return 0;
 }
 
-/*********************************************************************
- Add a port to the group. Calculating its PROXY_JOIN by the Port and
- requester gids.
-**********************************************************************/
-static ib_api_status_t add_new_mgrp_port(osm_sa_t * sa, IN osm_mgrp_t * p_mgrp,
-					 IN osm_port_t *port,
-					 IN ib_member_rec_t *
-					 p_recvd_mcmember_rec,
-					 IN osm_mad_addr_t * p_mad_addr,
-					 OUT osm_mcm_port_t ** pp_mcmr_port)
-{
-	boolean_t proxy_join;
-	ib_gid_t requester_gid;
-	ib_api_status_t res;
-
-	/* set the proxy_join if the requester gid is not identical to the
-	   joined gid */
-	res = osm_get_gid_by_mad_addr(sa->p_log, sa->p_subn, p_mad_addr,
-				      &requester_gid);
-	if (res != IB_SUCCESS) {
-		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B29: "
-			"Could not find GID for requester\n");
-
-		return IB_INVALID_PARAMETER;
-	}
-
-	if (!memcmp(&p_recvd_mcmember_rec->port_gid, &requester_gid,
-		    sizeof(ib_gid_t))) {
-		proxy_join = FALSE;
-		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
-			"Create new port with proxy_join FALSE\n");
-	} else {
-		/* The port is not the one specified in PortGID.
-		   The check that the requester is in the same partition as
-		   the PortGID is done before - just need to update
-		   the proxy_join. */
-		proxy_join = TRUE;
-		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
-			"Create new port with proxy_join TRUE\n");
-	}
-
-	*pp_mcmr_port = osm_mgrp_add_port(sa->p_subn, sa->p_log, p_mgrp, port,
-					  p_recvd_mcmember_rec, proxy_join);
-	if (*pp_mcmr_port == NULL) {
-		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B06: "
-			"osm_mgrp_add_port failed\n");
-
-		return IB_INSUFFICIENT_MEMORY;
-	}
-
-	return IB_SUCCESS;
-}
-
 /**********************************************************************
  **********************************************************************/
 static inline boolean_t check_join_comp_mask(ib_net64_t comp_mask)
@@ -968,7 +915,6 @@ static void mcmr_rcv_leave_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
 	ib_member_rec_t mcmember_rec;
 	ib_net64_t portguid;
 	osm_mcm_port_t *p_mcm_port;
-	int removed;
 
 	OSM_LOG_ENTER(sa->p_log);
 
@@ -1016,20 +962,13 @@ static void mcmr_rcv_leave_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
 	}
 
 	/* remove port and/or update join state */
-	removed = osm_mgrp_remove_port(sa->p_subn, sa->p_log, p_mgrp,
-				       p_mcm_port, &mcmember_rec);
+	osm_mgrp_remove_port(sa->p_subn, sa->p_log, p_mgrp, p_mcm_port,
+			     &mcmember_rec);
+	osm_mgrp_cleanup(sa->p_subn, p_mgrp);
 	CL_PLOCK_RELEASE(sa->p_lock);
 
-	/* we can leave if port was deleted from MCG */
-	if (removed && osm_sm_mcgrp_leave(sa->sm, p_mgrp, portguid))
-		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B09: "
-			"osm_sm_mcgrp_leave failed\n");
-
 	mcmr_rcv_respond(sa, p_madw, &mcmember_rec);
 
-	CL_PLOCK_EXCL_ACQUIRE(sa->p_lock);
-	osm_mgrp_cleanup(sa->p_subn, p_mgrp);
-	CL_PLOCK_RELEASE(sa->p_lock);
 Exit:
 	OSM_LOG_EXIT(sa->p_log);
 }
@@ -1051,6 +990,7 @@ static void mcmr_rcv_join_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
 	osm_physp_t *p_request_physp;
 	uint8_t is_new_group;	/* TRUE = there is a need to create a group */
 	uint8_t join_state;
+	boolean_t proxy;
 
 	OSM_LOG_ENTER(sa->p_log);
 
@@ -1098,6 +1038,8 @@ static void mcmr_rcv_join_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
 		goto Exit;
 	}
 
+	proxy = (p_physp != p_request_physp);
+
 	ib_member_get_scope_state(p_recvd_mcmember_rec->scope_state, NULL,
 				  &join_state);
 
@@ -1214,11 +1156,15 @@ static void mcmr_rcv_join_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
 		goto Exit;
 	}
 
+	/* copy qkey mlid tclass pkey sl_flow_hop mtu rate pkt_life sl_flow_hop */
+	copy_from_create_mc_rec(&mcmember_rec, &p_mgrp->mcmember_rec);
+
 	/* create or update existing port (join-state will be updated) */
-	status = add_new_mgrp_port(sa, p_mgrp, p_port, p_recvd_mcmember_rec,
-				   osm_madw_get_mad_addr_ptr(p_madw),
-				   &p_mcmr_port);
-	if (status != IB_SUCCESS) {
+	p_mcmr_port = osm_mgrp_add_port(sa->p_subn, sa->p_log, p_mgrp, p_port,
+					&mcmember_rec, proxy);
+	if (!p_mcmr_port) {
+		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B06: "
+			"osm_mgrp_add_port failed\n");
 		/* we fail to add the port so we might need to delete the group */
 		osm_mgrp_cleanup(sa->p_subn, p_mgrp);
 		CL_PLOCK_RELEASE(sa->p_lock);
@@ -1228,35 +1174,9 @@ static void mcmr_rcv_join_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
 		goto Exit;
 	}
 
-	/* o15.0.1.11: copy the join state */
-	mcmember_rec.scope_state = p_mcmr_port->scope_state;
-
-	/* copy qkey mlid tclass pkey sl_flow_hop mtu rate pkt_life sl_flow_hop */
-	copy_from_create_mc_rec(&mcmember_rec, &p_mgrp->mcmember_rec);
-
 	/* Release the lock as we don't need it. */
 	CL_PLOCK_RELEASE(sa->p_lock);
 
-	/* do the actual routing (actually schedule the update) */
-	status = osm_sm_mcgrp_join(sa->sm, p_mgrp,
-				   p_recvd_mcmember_rec->port_gid.unicast.
-				   interface_id);
-	if (status != IB_SUCCESS) {
-		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B14: "
-			"osm_sm_mcgrp_join failed from port 0x%016" PRIx64
-			" (%s), " "sending IB_SA_MAD_STATUS_NO_RESOURCES\n",
-			cl_ntoh64(portguid), p_port->p_node->print_desc);
-		CL_PLOCK_EXCL_ACQUIRE(sa->p_lock);
-		/* the request for routing failed so we need to remove the port */
-		osm_mgrp_delete_port(sa->p_subn, sa->p_log, p_mgrp,
-				     p_recvd_mcmember_rec->port_gid.
-				     unicast.interface_id);
-		CL_PLOCK_RELEASE(sa->p_lock);
-		osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RESOURCES);
-		goto Exit;
-
-	}
-	/* failed to route */
 	if (osm_log_is_active(sa->p_log, OSM_LOG_DEBUG))
 		osm_dump_mc_record(sa->p_log, &mcmember_rec, OSM_LOG_DEBUG);
 
diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c
index f3fa7f4..88f5ebe 100644
--- a/opensm/opensm/osm_sm.c
+++ b/opensm/opensm/osm_sm.c
@@ -443,109 +443,15 @@ Exit:
 
 /**********************************************************************
  **********************************************************************/
-static void request_mlid(osm_sm_t * sm, uint16_t mlid)
+void osm_sm_reroute_mlid(osm_sm_t * sm, ib_net16_t mlid)
 {
-	mlid -= IB_LID_MCAST_START_HO;
+	mlid = cl_ntoh16(mlid) - IB_LID_MCAST_START_HO;
 	sm->mlids_req[mlid] = 1;
 	if (sm->mlids_req_max < mlid)
 		sm->mlids_req_max = mlid;
 	osm_sm_signal(sm, OSM_SIGNAL_IDLE_TIME_PROCESS_REQUEST);
-}
-
-ib_api_status_t osm_sm_mcgrp_join(IN osm_sm_t * p_sm, IN osm_mgrp_t *mgrp,
-				  IN const ib_net64_t port_guid)
-{
-	osm_port_t *p_port;
-	ib_api_status_t status = IB_SUCCESS;
-	osm_mcm_info_t *p_mcm;
-
-	OSM_LOG_ENTER(p_sm->p_log);
-
-	OSM_LOG(p_sm->p_log, OSM_LOG_VERBOSE,
-		"Port 0x%016" PRIx64 " joining MLID 0x%X\n",
-		cl_ntoh64(port_guid), cl_ntoh16(mgrp->mlid));
-
-	/*
-	 * Acquire the port object for the port joining this group.
-	 */
-	CL_PLOCK_EXCL_ACQUIRE(p_sm->p_lock);
-	p_port = osm_get_port_by_guid(p_sm->p_subn, port_guid);
-	if (!p_port) {
-		OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E05: "
-			"No port object for port 0x%016" PRIx64 "\n",
-			cl_ntoh64(port_guid));
-		status = IB_INVALID_PARAMETER;
-		goto Exit;
-	}
-
-	/*
-	 * Check if the object (according to mlid) already exists on this port.
-	 * If it does - then no need to update it again, and no need to
-	 * create the mc tree again. Just goto Exit.
-	 */
-	p_mcm = (osm_mcm_info_t *) cl_qlist_head(&p_port->mcm_list);
-	while (p_mcm != (osm_mcm_info_t *) cl_qlist_end(&p_port->mcm_list)) {
-		if (p_mcm->mgrp == mgrp) {
-			OSM_LOG(p_sm->p_log, OSM_LOG_DEBUG,
-				"Found mlid object for Port:"
-				"0x%016" PRIx64 " lid:0x%X\n",
-				cl_ntoh64(port_guid), cl_ntoh16(mgrp->mlid));
-			goto Exit;
-		}
-		p_mcm = (osm_mcm_info_t *) cl_qlist_next(&p_mcm->list_item);
-	}
-
-	status = osm_port_add_mgrp(p_port, mgrp);
-	if (status != IB_SUCCESS) {
-		OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E03: "
-			"Unable to associate port 0x%" PRIx64 " to mlid 0x%X\n",
-			cl_ntoh64(osm_port_get_guid(p_port)),
-			cl_ntoh16(osm_mgrp_get_mlid(mgrp)));
-		goto Exit;
-	}
-
-	request_mlid(p_sm, cl_ntoh16(mgrp->mlid));
-Exit:
-	CL_PLOCK_RELEASE(p_sm->p_lock);
-	OSM_LOG_EXIT(p_sm->p_log);
-
-	return status;
-}
-
-ib_api_status_t osm_sm_mcgrp_leave(IN osm_sm_t * p_sm, IN osm_mgrp_t *mgrp,
-				   IN const ib_net64_t port_guid)
-{
-	osm_port_t *p_port;
-	ib_api_status_t status = IB_SUCCESS;
-
-	OSM_LOG_ENTER(p_sm->p_log);
-
-	OSM_LOG(p_sm->p_log, OSM_LOG_VERBOSE,
-		"Port 0x%" PRIx64 " leaving MLID 0x%X\n",
-		cl_ntoh64(port_guid), cl_ntoh16(mgrp->mlid));
-
-	/*
-	 * Acquire the port object for the port leaving this group.
-	 */
-	CL_PLOCK_EXCL_ACQUIRE(p_sm->p_lock);
-
-	p_port = osm_get_port_by_guid(p_sm->p_subn, port_guid);
-	if (!p_port) {
-		OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E04: "
-			"No port object for port 0x%" PRIx64 "\n",
-			cl_ntoh64(port_guid));
-		status = IB_INVALID_PARAMETER;
-		goto Exit;
-	}
-
-	osm_port_remove_mgrp(p_port, mgrp);
-
-	request_mlid(p_sm, cl_hton16(mgrp->mlid));
-Exit:
-	CL_PLOCK_RELEASE(p_sm->p_lock);
-	OSM_LOG_EXIT(p_sm->p_log);
-
-	return status;
+	OSM_LOG(sm->p_log, OSM_LOG_DEBUG, "rerouting requested for MLID 0x%x\n",
+		mlid + IB_LID_MCAST_START_HO);
 }
 
 void osm_set_sm_priority(osm_sm_t * sm, uint8_t priority)
-- 
1.6.5.rc1




More information about the general mailing list