[ofa-general] Re: [PATCH 1/2 v3] opensm: Storage organization for multicast groups

Sasha Khapyorsky sashak at voltaire.com
Sun Sep 6 08:25:05 PDT 2009


Hi Slava,

On 16:47 Wed 05 Aug     , Slava Strebkov wrote:
> 
> Subject: [PATCH 1/2] Storage organization for multicast groups
> 
> Main purpose is to prepare infrastructure for (many) mgids
> to one mlid compression. Proposed the following changes:
> 1. Element in mlid array is now a multicast group holder.
> 2. mgrp_holder keeps a list of mgroups sharing same mlid.
>         With introduction of compression, there will be many
>         multicast groups per mlid. Current implementation keeps
>         one mgid to one mlid ratio.
> 3. mgrp_holder has a map of ports sharing same mlid. Ports sorted
>         by port guid. Port map is necessary for building spanning
>         tree per mgroup_holder, not just for single mgroup.
> 4. Element in port map keeps a list of mgroups opened by this port.
>         This allows quick deletion of mgroups when port changes
>          state to DOWN.
> 5. Multicast processing functions use mgroup_holder object instead
>         of mgroup.
> 
> Signed-off-by: Slava Strebkov <slavas at voltaire.com>
> ---
>  opensm/include/opensm/osm_multicast.h  |  343 +++++++++++++++++++++++++++++---
>  opensm/include/opensm/osm_sm.h         |   10 +-
>  opensm/include/opensm/osm_subnet.h     |   38 ++--
>  opensm/opensm/osm_drop_mgr.c           |   14 +-
>  opensm/opensm/osm_mcast_mgr.c          |  228 +++++++++++++---------
>  opensm/opensm/osm_multicast.c          |  198 +++++++++++++++++--
>  opensm/opensm/osm_qos_policy.c         |   38 ++--
>  opensm/opensm/osm_sa.c                 |   31 +--
>  opensm/opensm/osm_sa_mcmember_record.c |   94 +++++----
>  opensm/opensm/osm_sa_path_record.c     |   13 +-
>  opensm/opensm/osm_sm.c                 |   81 +++++++-
>  opensm/opensm/osm_subnet.c             |   31 +++-
>  12 files changed, 855 insertions(+), 264 deletions(-)
> 
> diff --git a/opensm/include/opensm/osm_multicast.h b/opensm/include/opensm/osm_multicast.h
> index 9a47de5..61d1ba6 100644
> --- a/opensm/include/opensm/osm_multicast.h
> +++ b/opensm/include/opensm/osm_multicast.h
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   *
> @@ -107,6 +107,82 @@ typedef struct osm_mcast_mgr_ctxt {
>  *
>  * SEE ALSO
>  *********/
> +/****s* OpenSM: Multicast Group Holder/osm_mgrp_holder_t
> +* NAME
> +*       osm_mgrp_holder_t
> +*
> +* DESCRIPTION
> +*       Holder for mgroups.
> +*
> +*       The osm_mgrp_t object should be treated as opaque and should
> +*       be manipulated only through the provided functions.
> +*
> +* SYNOPSIS
> +*/
> +
> +typedef struct osm_mgrp_holder {
> +	cl_qmap_t mgrp_port_map;
> +	cl_qlist_t mgrp_list;
> +	osm_mtree_node_t *p_root;
> +	ib_net16_t mlid;
> +	boolean_t to_be_deleted;
> +	uint32_t last_tree_id;
> +	uint32_t last_change_id;
> +} osm_mgrp_holder_t;

I remember we discussed (off-list) that using shorter name would be
better. You proposed then 'osm_mgrp_box' then...

> +
> +/*
> +* FIELDS
> +*	mgrp_port_map
> +*		Map of  all ports joined same mlid
> +*
> +*	mgrp_list
> +*		List of mgroups having same mlid
> +*
> +*	p_root
> +*		Pointer to the root "tree node" in the single spanning tree
> +*		for this multicast group holder.The nodes of the tree represent
> +*		switches.  Member ports are not represented in the tree.
> +*
> +*	mlid
> +*		mlid of current group holder
> +*
> +*	to_be_deleted
> +*		Since holders  are deleted when there are no mgroups in.
> +*
> +*	last_change_id
> +*		a counter for the number of changes applied to the group in this holder.
> +*		This counter shuold be incremented on any modification
> +*		to the group: joining or leaving of ports.
> +*
> +*	last_tree_id
> +*		the last change id used for building the current tree.
> +*/
> + /****s* OpenSM: Multicast group Port /osm_mgrp_port _t
> +* NAME
> +*	osm_mgrp_port _t
> +*
> +* DESCRIPTION
> +*	Holder for pointers to mgroups and port guid.
> +*
> +*
> +* SYNOPSIS
> +*/
> +typedef struct _osm_mgrp_port {

No leading underscore is needed in public structure definitions.

> +	cl_map_item_t guid_item;
> +	cl_qlist_t mgroups;

What is the purpose of this 'groups' list?

> +	ib_net64_t port_guid;
> +} osm_mgrp_port_t;
> +/*
> +* FIELDS
> +*	guid_item
> +*		Map for ports. Must be first element
> +*
> +*	mgroups
> +*		List  of  mgroups opened by this port.
> +*
> +*	portguid
> +*		guid of  port representing current structure
> +*/
>  
>  /****s* OpenSM: Multicast Group/osm_mgrp_t
>  * NAME
> @@ -122,14 +198,13 @@ typedef struct osm_mcast_mgr_ctxt {
>  */
>  typedef struct osm_mgrp {
>  	cl_fmap_item_t map_item;
> +	cl_list_item_t mlid_item;
> +	cl_list_item_t port_item;

And respectively this 'port_item' field?

Well, I see below what you are trying to do - it serves introduced
holder_port_add/delete() APIs. For me it looks that much simple approach
is possible - see a comments there.

>  	ib_net16_t mlid;
> -	osm_mtree_node_t *p_root;
>  	cl_qmap_t mcm_port_tbl;
>  	ib_member_rec_t mcmember_rec;
>  	boolean_t well_known;
>  	boolean_t to_be_deleted;
> -	uint32_t last_change_id;
> -	uint32_t last_tree_id;
>  	unsigned full_members;
>  } osm_mgrp_t;
>  /*
> @@ -141,10 +216,11 @@ typedef struct osm_mgrp {
>  *		The network ordered LID of this Multicast Group (must be
>  *		>= 0xC000).
>  *
> -*	p_root
> -*		Pointer to the root "tree node" in the single spanning tree
> -*		for this multicast group.  The nodes of the tree represent
> -*		switches.  Member ports are not represented in the tree.
> +*	mlid_item
> +*		List item for groups with same MLID
> +*
> +*	port_item
> +*		List item for groups opened on same port
>  *
>  *	mcm_port_tbl
>  *		Table (sorted by port GUID) of osm_mcm_port_t objects
> @@ -163,14 +239,6 @@ typedef struct osm_mgrp {
>  *		track the fact the group is about to be deleted so we can
>  *		track the fact a new join is actually a create request.
>  *
> -*	last_change_id
> -*		a counter for the number of changes applied to the group.
> -*		This counter shuold be incremented on any modification
> -*		to the group: joining or leaving of ports.
> -*
> -*	last_tree_id
> -*		the last change id used for building the current tree.
> -*
>  * SEE ALSO
>  *********/
>  
> @@ -456,30 +524,111 @@ osm_mgrp_delete_port(IN osm_subn_t * const p_subn,
>  int osm_mgrp_remove_port(osm_subn_t *subn, osm_log_t *log, osm_mgrp_t *mgrp,
>  			 osm_mcm_port_t *mcm, uint8_t join_state);
>  
> -/****f* OpenSM: Multicast Group/osm_mgrp_apply_func
> +/****f* OpenSM: Multicast Group Holder /osm_mgrp_holder_new
>  * NAME
> -*	osm_mgrp_apply_func
> +*	osm_mgrp_holder_new
>  *
>  * DESCRIPTION
> -*	Calls the specified function for each element in the tree.
> -*	Elements are passed to the callback function in no particular order.
> +*	Allocates and initializes a Multicast Group Holder for use.
>  *
>  * SYNOPSIS
>  */
> -void
> -osm_mgrp_apply_func(const osm_mgrp_t * const p_mgrp,
> -		    osm_mgrp_func_t p_func, void *context);
> +osm_mgrp_holder_t *osm_mgrp_holder_new(IN osm_subn_t * p_subn,
> +					IN ib_net16_t mlid);
> +/*
> +* PARAMETERS
> +*	p_subn
> +*		(in) pointer to osm_subnet
> +*	mlid
> +*		[in] Multicast LID for this multicast group holder.
> +*
> +* RETURN VALUES
> +*	pointer to initialized osm_mgrp_holder_t
> +*	or NULL, if unsuccessful
> +*
> +* SEE ALSO
> +*	Multicast Group Holder, osm_mgrp_holder_delete
> +*********/
> +/****f* OpenSM: Multicast Group Holder /osm_mgrp_holder_delete
> +* NAME
> +*	osm_mgrp_holder_delete
> +*
> +* DESCRIPTION
> +*	Removes  entry from  array of holders
> +*	Removes port from mgroup port list
> +*
> +* SYNOPSIS
> +*/
> +void osm_mgrp_holder_delete(IN osm_subn_t * p_subn,
> +				IN ib_net16_t mlid);
> +
>  /*
>  * PARAMETERS
> +*
> +*	p_subn
> +*		[in] Pointer to  osm_subnet
> +*
> +*	mlid
> +*		[in] holder's mlid
> +*
> +* RETURN VALUES
> +*	None.
> +*
> +* NOTES
> +*
> +* SEE ALSO
> +*
> +*********/
> +/****f* OpenSM: Multicast Group Holder /osm_mgrp_holder_add_mgrp_port
> +* NAME
> +*	osm_mgrp_holder_port_add_mgrp
> +*
> +* DESCRIPTION
> +*	Allocates  osm_mgrp_port_t for new port joined to mgroup with mlid of this holder,
> +*	and adds mgroup to mgroup map of  existed osm_mgrp_port_t object.
> +*
> +* SYNOPSIS
> +*/
> +ib_api_status_t osm_mgrp_holder_port_add_mgrp(IN osm_mgrp_holder_t *
> +						p_mgrp_holder,
> +						IN osm_mgrp_t * p_mgrp,
> +						IN ib_net64_t port_guid);

I don't think that you need such APIs, but instead can use existing
osm_mgrp_add_port() and osm_mgrp_remove_port() - both have pointer to
it mgrp as parameter which "knows" its holder (mlid), so just need to
add holder's port map addition there and continue use existing stuff
as is.

> +/*
> +* PARAMETERS
> +*	p_mgrp_holder
> +*		(in) pointer to osm_mgrp_holder_t
>  *	p_mgrp
> -*		[in] Pointer to an osm_mgrp_t object.
> +*		(in)  pointer to  osm_mgrp_t
>  *
> -*	p_func
> -*		[in] Pointer to the users callback function.
> +* RETURN VALUES
> +*	IB_SUCCESS or
> +*	IB_INSUFFICIENT_MEMORY
>  *
> -*	context
> -*		[in] User context passed to the callback function.
> +* SEE ALSO
> +*	Multicast Group Holder, osm_mgrp_holder_delete_mgrp_port
> +*********/
> +/****f* OpenSM: Multicast Group Holder /osm_mgrp_holder_delete_mgrp_port
> +* NAME
> +*	osm_mgrp_holder_port_delete_mgrp
>  *
> +* DESCRIPTION
> +*	Deletes  osm_mgrp_port_t for specified port
> +*
> +* SYNOPSIS
> +*/
> +void osm_mgrp_holder_port_delete_mgrp(IN osm_mgrp_holder_t * p_mgrp_holder,
> +					IN osm_mgrp_t * p_mgrp,
> +					IN ib_net64_t port_guid);

Same as above.

> +/*
> +* PARAMETERS
> +*	p_mgrp_holder
> +*		[in] Pointer to an osm_mgrp_holder_t object.
> +*
> +*	p_mgrp
> +*		(in) Pointer to osm_mgrp_t object
> +*
> +*	port_guid
> +*		[in] Port guid of the departing port.
>  *
>  * RETURN VALUES
>  *	None.
> @@ -487,8 +636,144 @@ osm_mgrp_apply_func(const osm_mgrp_t * const p_mgrp,
>  * NOTES
>  *
>  * SEE ALSO
> -*	Multicast Group
> +Multicast Group Holder,osm_holder_add_mgrp_port

Bad formatting (here and in many other places).

> +*********/
> +/****f* OpenSM: Multicast Group Holder /osm_mgrp_holder_add_mgrp
> +* NAME
> +*	osm_mgrp_holder_add_mgrp
> +*
> +* DESCRIPTION
> +*	Adds mgroup to holder according to its mgid
> +*
> +*
> +* SYNOPSIS
> +*/
> +void osm_mgrp_holder_add_mgrp(IN osm_mgrp_holder_t * p_mgrp_holder,
> +				IN osm_mgrp_t * p_mgrp,
> +				IN osm_log_t * const p_log);
> +/*
> +* PARAMETERS
> +*
> +*	p_mgrp_holder
> +*		[in] Pointer to an osm_mgrp_holder_t object.
> +*
> +*	p_mgrp
> +*		[in] mgroup to add.
> +*
> +* RETURN VALUES
> +*	None.
> +*
> +* NOTES
> +* Updates common_mgid when holder is being reused
> +* SEE ALSO
> +*	Multicast Group Holder,osm_mgrp_holder_delete_mgrp
> +*********/
> +/****f* OpenSM: Multicast Group Holder /osm_mgrp_holder_delete_mgrp
> +* NAME
> +*	osm_mgrp_holder_delete_mgrp
> +*
> +* DESCRIPTION
> +*	Deletes mgroup from holder according to its mgid
> +*
> +*
> +* SYNOPSIS
> +*/
> +void osm_mgrp_holder_delete_mgrp(IN osm_mgrp_holder_t * p_mgrp_holder,
> +					IN osm_mgrp_t * p_mgrp);

Do you really need 'holder' parameter here?

> +/*
> +* PARAMETERS
> +*
> +*	p_mgrp_holder
> +*		[in] Pointer to an osm_mgrp_holder_t object.
> +*
> +*	p_mgrp
> +*		[in] mgroup to delete.
> +*
> +* RETURN VALUES
> +*	None.
> +*
> +* NOTES
> +*
> +* SEE ALSO
> +*	Multicast Group Holder,osm_mgrp_holder_add_mgrp
>  *********/
>  
> +/****f* OpenSM: Multicast Group Holder /osm_mgrp_holder_remove_port
> +* NAME
> +*	osm_mgrp_holder_remove_port
> +*
> +* DESCRIPTION
> +*	Removes  osm_mgrp_port_t from mgrp_port_map of holder
> +*	Removes port from mgroup port list
> +*
> +* SYNOPSIS
> +*/
> +void osm_mgrp_holder_remove_port(IN osm_subn_t * const p_subn,
> +				IN osm_log_t * const p_log,
> +				IN osm_mgrp_holder_t * const p_mgrp_holder,
> +				IN const ib_net64_t port_guid);
> +/*
> +* PARAMETERS
> +*
> +*	p_subn
> +*		[in] Pointer to the subnet object
> +*
> +*	p_log
> +*		[in] The log object pointer
> +*
> +*	p_mgrp_holder
> +*		[in] Pointer to an osm_mgrp_holder_t object.
> +*
> +*	port_guid
> +*		[in] Port guid of the departing port.
> +*
> +* RETURN VALUES
> +*	None.
> +*
> +* NOTES
> +*
> +* SEE ALSO
> +*
> +*********/
> +/****f* OpenSM: Subnet/osm_get_mgrp_by_mlid
> +* NAME
> +*	osm_get_mgrp_by_mlid
> +*
> +* DESCRIPTION
> +*	The looks for the given multicast group in the subnet table by mlid.
> +*	NOTE: this code is not thread safe. Need to grab the lock before
> +*	calling it.
> +*
> +* SYNOPSIS
> +*/
> +static inline struct osm_mgrp_holder *osm_get_mgrp_holder_by_mlid(osm_subn_t const
> +									*p_subn,
> +									ib_net16_t mlid)
> +{
> +	return p_subn->mgroup_holders[cl_ntoh16(mlid) - IB_LID_MCAST_START_HO];
> +}

Why is this function in this file? All 'osm_get_*_by_*()' helpers are
located in osm_sudnet.[ch].

> +/*
> +* PARAMETERS
> +*	p_subn
> +*		[in] Pointer to an osm_subn_t object
> +*
> +*	mlid
> +*		[in] The multicast group mlid in network order
> +*
> +* RETURN VALUES
> +*	The multicast group structure pointer if found. NULL otherwise.
> +*********/
> +static inline ib_net16_t osm_mgrp_holder_get_mlid(IN osm_mgrp_holder_t *
> +							const p_mgrp_holder)
> +{
> +	return (p_mgrp_holder->mlid);
> +}
> +
> +static inline boolean_t osm_mgrp_holder_is_empty(IN const osm_mgrp_holder_t *
> +							const p_mgrp_holder)
> +{
> +	return (cl_qmap_count(&p_mgrp_holder->mgrp_port_map) == 0);
> +}

Where is this function used really? I didn't find.

> +
>  END_C_DECLS
>  #endif				/* _OSM_MULTICAST_H_ */
> diff --git a/opensm/include/opensm/osm_sm.h b/opensm/include/opensm/osm_sm.h
> index cc8321d..7f898ad 100644
> --- a/opensm/include/opensm/osm_sm.h
> +++ b/opensm/include/opensm/osm_sm.h
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   *
> @@ -61,6 +61,7 @@
>  #include <opensm/osm_port.h>
>  #include <opensm/osm_db.h>
>  #include <opensm/osm_remote_sm.h>
> +#include <opensm/osm_multicast.h>
>  
>  #ifdef __cplusplus
>  #  define BEGIN_C_DECLS extern "C" {
> @@ -539,7 +540,8 @@ osm_resp_send(IN osm_sm_t * sm,
>  ib_api_status_t
>  osm_sm_mcgrp_join(IN osm_sm_t * const p_sm,
>  		  IN const ib_net16_t mlid,
> -		  IN const ib_net64_t port_guid);
> +		  IN const ib_net64_t port_guid,
> +		  IN const ib_gid_t * p_mgid);
>  /*
>  * PARAMETERS
>  *	p_sm
> @@ -551,6 +553,8 @@ osm_sm_mcgrp_join(IN osm_sm_t * const p_sm,
>  *	port_guid
>  *		[in] Port GUID to add to the group.
>  *
> +* 	p_mgid
> +*		[in] MGID to add to the group holder.
>  * RETURN VALUES
>  *	None
>  *
> @@ -572,7 +576,7 @@ osm_sm_mcgrp_join(IN osm_sm_t * const p_sm,
>  */
>  ib_api_status_t
>  osm_sm_mcgrp_leave(IN osm_sm_t * const p_sm,
> -		   IN const ib_net16_t mlid, IN const ib_net64_t port_guid);
> +		   IN osm_mgrp_t * p_mgrp, IN ib_net64_t port_guid);
>  /*
>  * PARAMETERS
>  *	p_sm
> diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h
> index 6c20de8..fad8780 100644
> --- a/opensm/include/opensm/osm_subnet.h
> +++ b/opensm/include/opensm/osm_subnet.h
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
> @@ -513,7 +513,7 @@ typedef struct osm_subn {
>  	boolean_t coming_out_of_standby;
>  	unsigned need_update;
>  	cl_fmap_t mgrp_mgid_tbl;
> -	void *mgroups[IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO + 1];
> +	void *mgroup_holders[IB_LID_MCAST_END_HO - IB_LID_MCAST_START_HO + 1];
>  } osm_subn_t;
>  /*
>  * FIELDS
> @@ -634,8 +634,8 @@ typedef struct osm_subn {
>  *		This flag should be on during first non-master heavy
>  *		(including pre-master discovery stage)
>  *
> -*	mgroups
> -*		Array of pointers to all Multicast Group objects in the subnet.
> +*	mgroup_holders
> +*		Array of pointers to all Multicast Group Holder objects in the subnet.
>  *		Indexed by MLID offset from base MLID.
>  *
>  * SEE ALSO
> @@ -935,32 +935,34 @@ struct osm_port *osm_get_port_by_guid(IN osm_subn_t const *p_subn,
>  *	osm_port_t
>  *********/
>  
> -/****f* OpenSM: Subnet/osm_get_mgrp_by_mlid
> +/****f* OpenSM: Multicast Group Holder /osm_mgrp_holder_get_mlid_by_mgid
>  * NAME
> -*	osm_get_mgrp_by_mlid
> +*	osm_mgrp_holder_get_mlid_by_mgid
>  *
>  * DESCRIPTION
> -*	The looks for the given multicast group in the subnet table by mlid.
> -*	NOTE: this code is not thread safe. Need to grab the lock before
> -*	calling it.
> +*	Searches mgroup with given mgid
> +*	Returns mlid of the found mgroup
>  *
>  * SYNOPSIS
>  */
> -static inline
> -struct osm_mgrp *osm_get_mgrp_by_mlid(osm_subn_t const *p_subn, ib_net16_t mlid)
> -{
> -	return p_subn->mgroups[cl_ntoh16(mlid) - IB_LID_MCAST_START_HO];
> -}
> +ib_net16_t osm_mgrp_holder_get_mlid_by_mgid(IN osm_subn_t const *p_subn,
> +					IN const ib_gid_t * const p_mgid);

If you are placing this helper here (in osm_subnet) please keep name
convention, something like:

	osm_get_mgrp_holder_by_mgid();

>  /*
>  * PARAMETERS
> +*
>  *	p_subn
> -*		[in] Pointer to an osm_subn_t object
> +*		[in] Pointer to osm_subn_t object
>  *
> -*	mlid
> -*		[in] The multicast group mlid in network order
> +*	p_mgid
> +*		[in] pointer to mgid
>  *
>  * RETURN VALUES
> -*	The multicast group structure pointer if found. NULL otherwise.
> +*	mlid of found holder, or zero.
> +*
> +* NOTES
> +*
> +* SEE ALSO
> +*
>  *********/
>  
>  /****f* OpenSM: Helper/osm_get_physp_by_mad_addr
> diff --git a/opensm/opensm/osm_drop_mgr.c b/opensm/opensm/osm_drop_mgr.c
> index c9a4f33..e1f2bd3 100644
> --- a/opensm/opensm/osm_drop_mgr.c
> +++ b/opensm/opensm/osm_drop_mgr.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
> @@ -158,7 +158,6 @@ static void drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port)
>  	osm_port_t *p_port_check;
>  	cl_qmap_t *p_sm_guid_tbl;
>  	osm_mcm_info_t *p_mcm;
> -	osm_mgrp_t *p_mgrp;
>  	cl_ptr_vector_t *p_port_lid_tbl;
>  	uint16_t min_lid_ho;
>  	uint16_t max_lid_ho;
> @@ -168,6 +167,7 @@ static void drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port)
>  	ib_gid_t port_gid;
>  	ib_mad_notice_attr_t notice;
>  	ib_api_status_t status;
> +	osm_mgrp_holder_t *p_mgrp_holder;
>  
>  	OSM_LOG_ENTER(sm->p_log);
>  
> @@ -212,10 +212,12 @@ static void drop_mgr_remove_port(osm_sm_t * sm, IN osm_port_t * p_port)
>  
>  	p_mcm = (osm_mcm_info_t *) cl_qlist_remove_head(&p_port->mcm_list);
>  	while (p_mcm != (osm_mcm_info_t *) cl_qlist_end(&p_port->mcm_list)) {
> -		p_mgrp = osm_get_mgrp_by_mlid(sm->p_subn, p_mcm->mlid);
> -		if (p_mgrp) {
> -			osm_mgrp_delete_port(sm->p_subn, sm->p_log,
> -					     p_mgrp, p_port->guid);
> +		p_mgrp_holder =
> +		    osm_get_mgrp_holder_by_mlid(sm->p_subn, p_mcm->mlid);
> +		if (p_mgrp_holder) {
> +			osm_mgrp_holder_remove_port(sm->p_subn, sm->p_log,
> +						    p_mgrp_holder,
> +						    p_port->guid);
>  			osm_mcm_info_delete((osm_mcm_info_t *) p_mcm);
>  		}
>  		p_mcm =
> diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c
> index 4dbbaa0..f506393 100644
> --- a/opensm/opensm/osm_mcast_mgr.c
> +++ b/opensm/opensm/osm_mcast_mgr.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
> @@ -55,6 +55,7 @@
>  #include <opensm/osm_switch.h>
>  #include <opensm/osm_helper.h>
>  #include <opensm/osm_msgdef.h>
> +#include <arpa/inet.h>
>  
>  /**********************************************************************
>   **********************************************************************/
> @@ -111,14 +112,15 @@ static void mcast_mgr_purge_tree_node(IN osm_mtree_node_t * p_mtn)
>  
>  /**********************************************************************
>   **********************************************************************/
> -static void mcast_mgr_purge_tree(osm_sm_t * sm, IN osm_mgrp_t * p_mgrp)
> +static void mcast_mgr_purge_tree(osm_sm_t * sm,
> +				 IN osm_mgrp_holder_t * p_mgrp_holder)
>  {
>  	OSM_LOG_ENTER(sm->p_log);
>  
> -	if (p_mgrp->p_root)
> -		mcast_mgr_purge_tree_node(p_mgrp->p_root);
> +	if (p_mgrp_holder->p_root)
> +		mcast_mgr_purge_tree_node(p_mgrp_holder->p_root);
>  
> -	p_mgrp->p_root = NULL;
> +	p_mgrp_holder->p_root = NULL;
>  
>  	OSM_LOG_EXIT(sm->p_log);
>  }
> @@ -126,41 +128,40 @@ static void mcast_mgr_purge_tree(osm_sm_t * sm, IN osm_mgrp_t * p_mgrp)
>  /**********************************************************************
>   **********************************************************************/
>  static float osm_mcast_mgr_compute_avg_hops(osm_sm_t * sm,
> -					    const osm_mgrp_t * p_mgrp,
> +					    const osm_mgrp_holder_t *
> +					    p_mgrp_holder,
>  					    const osm_switch_t * p_sw)
>  {
>  	float avg_hops = 0;
>  	uint32_t hops = 0;
>  	uint32_t num_ports = 0;
>  	const osm_port_t *p_port;
> -	const osm_mcm_port_t *p_mcm_port;
> -	const cl_qmap_t *p_mcm_tbl;
> +	const osm_mgrp_port_t *p_holder_port;
>  
>  	OSM_LOG_ENTER(sm->p_log);
>  
> -	p_mcm_tbl = &p_mgrp->mcm_port_tbl;
>  
>  	/*
>  	   For each member of the multicast group, compute the
>  	   number of hops to its base LID.
>  	 */
> -	for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl);
> -	     p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl);
> -	     p_mcm_port =
> -	     (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item)) {
> +	for (p_holder_port =
> +	     (osm_mgrp_port_t *) cl_qmap_head(&p_mgrp_holder->mgrp_port_map);
> +	     p_holder_port !=
> +	     (osm_mgrp_port_t *) cl_qmap_end(&p_mgrp_holder->mgrp_port_map);
> +	     p_holder_port =
> +	     (osm_mgrp_port_t *) cl_qmap_next(&p_holder_port->guid_item)) {
>  		/*
>  		   Acquire the port object for this port guid, then create
>  		   the new worker object to build the list.
>  		 */
>  		p_port = osm_get_port_by_guid(sm->p_subn,
> -					      ib_gid_get_guid(&p_mcm_port->
> -							      port_gid));
> +					      p_holder_port->port_guid);
>  
>  		if (!p_port) {
>  			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A18: "
>  				"No port object for port 0x%016" PRIx64 "\n",
> -				cl_ntoh64(ib_gid_get_guid
> -					  (&p_mcm_port->port_gid)));
> +				cl_ntoh64(p_holder_port->port_guid));
>  			continue;
>  		}
>  
> @@ -185,40 +186,39 @@ static float osm_mcast_mgr_compute_avg_hops(osm_sm_t * sm,
>   of the group HCAs
>   **********************************************************************/
>  static float osm_mcast_mgr_compute_max_hops(osm_sm_t * sm,
> -					    const osm_mgrp_t * p_mgrp,
> +					    const osm_mgrp_holder_t *
> +					    p_mgrp_holder,
>  					    const osm_switch_t * p_sw)
>  {
>  	uint32_t max_hops = 0;
>  	uint32_t hops = 0;
>  	const osm_port_t *p_port;
> -	const osm_mcm_port_t *p_mcm_port;
> -	const cl_qmap_t *p_mcm_tbl;
> +	const osm_mgrp_port_t *p_mgrp_holder_port;
>  
>  	OSM_LOG_ENTER(sm->p_log);
>  
> -	p_mcm_tbl = &p_mgrp->mcm_port_tbl;
>  
>  	/*
>  	   For each member of the multicast group, compute the
>  	   number of hops to its base LID.
>  	 */
> -	for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl);
> -	     p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl);
> -	     p_mcm_port =
> -	     (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item)) {
> +	for (p_mgrp_holder_port =
> +	     (osm_mgrp_port_t *) cl_qmap_head(&p_mgrp_holder->mgrp_port_map);
> +	     p_mgrp_holder_port !=
> +	     (osm_mgrp_port_t *) cl_qmap_end(&p_mgrp_holder->mgrp_port_map);
> +	     p_mgrp_holder_port =
> +	     (osm_mgrp_port_t *) cl_qmap_next(&p_mgrp_holder_port->guid_item)) {
>  		/*
>  		   Acquire the port object for this port guid, then create
>  		   the new worker object to build the list.
>  		 */
>  		p_port = osm_get_port_by_guid(sm->p_subn,
> -					      ib_gid_get_guid(&p_mcm_port->
> -							      port_gid));
> +					      p_mgrp_holder_port->port_guid);
>  
>  		if (!p_port) {
>  			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A1A: "
>  				"No port object for port 0x%016" PRIx64 "\n",
> -				cl_ntoh64(ib_gid_get_guid
> -					  (&p_mcm_port->port_gid)));
> +				cl_ntoh64(p_mgrp_holder_port->port_guid));
>  			continue;
>  		}
>  
> @@ -244,7 +244,8 @@ static float osm_mcast_mgr_compute_max_hops(osm_sm_t * sm,
>     of the multicast group.
>  **********************************************************************/
>  static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
> -						   const osm_mgrp_t * p_mgrp)
> +						   const osm_mgrp_holder_t *
> +						   p_mgrp_holder)
>  {
>  	cl_qmap_t *p_sw_tbl;
>  	const osm_switch_t *p_sw;
> @@ -252,7 +253,7 @@ static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
>  	float hops = 0;
>  	float best_hops = 10000;	/* any big # will do */
>  #ifdef OSM_VENDOR_INTF_ANAFA
> -	boolean_t use_avg_hops = TRUE;	/* anafa2 - bug hca on switch *//* use max hops for root */
> +	boolean_t use_avg_hops = TRUE; /* anafa2 - bug hca on switch *//* use max hops for root */
>  #else
>  	boolean_t use_avg_hops = FALSE;	/* use max hops for root */
>  #endif
> @@ -261,7 +262,7 @@ static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
>  
>  	p_sw_tbl = &sm->p_subn->sw_guid_tbl;
>  
> -	CL_ASSERT(!osm_mgrp_is_empty(p_mgrp));
> +	CL_ASSERT(!osm_mgrp_holder_is_empty(p_mgrp_holder));
>  
>  	for (p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
>  	     p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl);
> @@ -270,9 +271,13 @@ static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
>  			continue;
>  
>  		if (use_avg_hops)
> -			hops = osm_mcast_mgr_compute_avg_hops(sm, p_mgrp, p_sw);
> +			hops =
> +			    osm_mcast_mgr_compute_avg_hops(sm, p_mgrp_holder,
> +							   p_sw);
>  		else
> -			hops = osm_mcast_mgr_compute_max_hops(sm, p_mgrp, p_sw);
> +			hops =
> +			    osm_mcast_mgr_compute_max_hops(sm, p_mgrp_holder,
> +							   p_sw);
>  
>  		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
>  			"Switch 0x%016" PRIx64 ", hops = %f\n",
> @@ -301,7 +306,8 @@ static osm_switch_t *mcast_mgr_find_optimal_switch(osm_sm_t * sm,
>     This function returns the existing or optimal root swtich for the tree.
>  **********************************************************************/
>  static osm_switch_t *mcast_mgr_find_root_switch(osm_sm_t * sm,
> -						const osm_mgrp_t * p_mgrp)
> +						const osm_mgrp_holder_t *
> +						p_mgrp_holder)
>  {
>  	const osm_switch_t *p_sw = NULL;
>  
> @@ -313,7 +319,7 @@ static osm_switch_t *mcast_mgr_find_root_switch(osm_sm_t * sm,
>  	   the root will be always on the first switch attached to it.
>  	   - Very bad ...
>  	 */
> -	p_sw = mcast_mgr_find_optimal_switch(sm, p_mgrp);
> +	p_sw = mcast_mgr_find_optimal_switch(sm, p_mgrp_holder);
>  
>  	OSM_LOG_EXIT(sm->p_log);
>  	return (osm_switch_t *) p_sw;
> @@ -393,7 +399,8 @@ static int mcast_mgr_set_tbl(osm_sm_t * sm, IN osm_switch_t * p_sw)
>    spanning tree that eminate from this switch.  On input, the p_list
>    contains the group members that must be routed from this switch.
>  **********************************************************************/
> -static void mcast_mgr_subdivide(osm_sm_t * sm, osm_mgrp_t * p_mgrp,
> +static void mcast_mgr_subdivide(osm_sm_t * sm,
> +				osm_mgrp_holder_t * p_mgrp_holder,
>  				osm_switch_t * p_sw, cl_qlist_t * p_list,
>  				cl_qlist_t * list_array, uint8_t array_size)
>  {
> @@ -404,7 +411,7 @@ static void mcast_mgr_subdivide(osm_sm_t * sm, osm_mgrp_t * p_mgrp,
>  
>  	OSM_LOG_ENTER(sm->p_log);
>  
> -	mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp));
> +	mlid_ho = cl_ntoh16(osm_mgrp_holder_get_mlid(p_mgrp_holder));
>  
>  	/*
>  	   For Multicast Groups, we want not to count on previous
> @@ -494,7 +501,8 @@ static void mcast_mgr_purge_list(osm_sm_t * sm, cl_qlist_t * p_list)
>  
>    The function returns the newly created mtree node element.
>  **********************************************************************/
> -static osm_mtree_node_t *mcast_mgr_branch(osm_sm_t * sm, osm_mgrp_t * p_mgrp,
> +static osm_mtree_node_t *mcast_mgr_branch(osm_sm_t * sm,
> +					  osm_mgrp_holder_t * p_mgrp_holder,
>  					  osm_switch_t * p_sw,
>  					  cl_qlist_t * p_list, uint8_t depth,
>  					  uint8_t upstream_port,
> @@ -520,7 +528,7 @@ static osm_mtree_node_t *mcast_mgr_branch(osm_sm_t * sm, osm_mgrp_t * p_mgrp,
>  
>  	node_guid = osm_node_get_node_guid(p_sw->p_node);
>  	node_guid_ho = cl_ntoh64(node_guid);
> -	mlid_ho = cl_ntoh16(osm_mgrp_get_mlid(p_mgrp));
> +	mlid_ho = cl_ntoh16(osm_mgrp_holder_get_mlid(p_mgrp_holder));
>  
>  	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
>  		"Routing MLID 0x%X through switch 0x%" PRIx64
> @@ -597,7 +605,8 @@ static osm_mtree_node_t *mcast_mgr_branch(osm_sm_t * sm, osm_mgrp_t * p_mgrp,
>  	for (i = 0; i < max_children; i++)
>  		cl_qlist_init(&list_array[i]);
>  
> -	mcast_mgr_subdivide(sm, p_mgrp, p_sw, p_list, list_array, max_children);
> +	mcast_mgr_subdivide(sm, p_mgrp_holder, p_sw, p_list, list_array,
> +			    max_children);
>  
>  	p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
>  
> @@ -680,8 +689,9 @@ static osm_mtree_node_t *mcast_mgr_branch(osm_sm_t * sm, osm_mgrp_t * p_mgrp,
>  			CL_ASSERT(p_remote_physp);
>  
>  			p_mtn->child_array[i] =
> -			    mcast_mgr_branch(sm, p_mgrp, p_remote_node->sw,
> -					     p_port_list, depth,
> +			    mcast_mgr_branch(sm, p_mgrp_holder,
> +					     p_remote_node->sw, p_port_list,
> +					     depth,
>  					     osm_physp_get_port_num
>  					     (p_remote_physp), p_max_depth);
>  		} else {
> @@ -716,11 +726,11 @@ Exit:
>  /**********************************************************************
>   **********************************************************************/
>  static ib_api_status_t mcast_mgr_build_spanning_tree(osm_sm_t * sm,
> -						     osm_mgrp_t * p_mgrp)
> +						     osm_mgrp_holder_t *
> +						     p_mgrp_holder)
>  {
> -	const cl_qmap_t *p_mcm_tbl;
>  	const osm_port_t *p_port;
> -	const osm_mcm_port_t *p_mcm_port;
> +	const osm_mgrp_port_t *p_mgrp_port;
>  	uint32_t num_ports;
>  	cl_qlist_t port_list;
>  	osm_switch_t *p_sw;
> @@ -739,14 +749,13 @@ static ib_api_status_t mcast_mgr_build_spanning_tree(osm_sm_t * sm,
>  	   on multicast forwarding table information if the user wants to
>  	   preserve existing multicast routes.
>  	 */
> -	mcast_mgr_purge_tree(sm, p_mgrp);
> +	mcast_mgr_purge_tree(sm, p_mgrp_holder);
>  
> -	p_mcm_tbl = &p_mgrp->mcm_port_tbl;
> -	num_ports = cl_qmap_count(p_mcm_tbl);
> +	num_ports = cl_qmap_count(&p_mgrp_holder->mgrp_port_map);
>  	if (num_ports == 0) {
>  		OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
>  			"MLID 0x%X has no members - nothing to do\n",
> -			cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)));
> +			cl_ntoh16(osm_mgrp_holder_get_mlid(p_mgrp_holder)));
>  		goto Exit;
>  	}
>  
> @@ -766,11 +775,11 @@ static ib_api_status_t mcast_mgr_build_spanning_tree(osm_sm_t * sm,
>  	   Locate the switch around which to create the spanning
>  	   tree for this multicast group.
>  	 */
> -	p_sw = mcast_mgr_find_root_switch(sm, p_mgrp);
> +	p_sw = mcast_mgr_find_root_switch(sm, p_mgrp_holder);
>  	if (p_sw == NULL) {
>  		OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A08: "
>  			"Unable to locate a suitable switch for group 0x%X\n",
> -			cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)));
> +			cl_ntoh16(osm_mgrp_holder_get_mlid(p_mgrp_holder)));
>  		status = IB_ERROR;
>  		goto Exit;
>  	}
> @@ -778,22 +787,22 @@ static ib_api_status_t mcast_mgr_build_spanning_tree(osm_sm_t * sm,
>  	/*
>  	   Build the first "subset" containing all member ports.
>  	 */
> -	for (p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(p_mcm_tbl);
> -	     p_mcm_port != (osm_mcm_port_t *) cl_qmap_end(p_mcm_tbl);
> -	     p_mcm_port =
> -	     (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item)) {
> +	for (p_mgrp_port =
> +	     (osm_mgrp_port_t *) cl_qmap_head(&p_mgrp_holder->mgrp_port_map);
> +	     p_mgrp_port !=
> +	     (osm_mgrp_port_t *) cl_qmap_end(&p_mgrp_holder->mgrp_port_map);
> +	     p_mgrp_port =
> +	     (osm_mgrp_port_t *) cl_qmap_next(&p_mgrp_port->guid_item)) {
>  		/*
>  		   Acquire the port object for this port guid, then create
>  		   the new worker object to build the list.
>  		 */
> -		p_port = osm_get_port_by_guid(sm->p_subn,
> -					      ib_gid_get_guid(&p_mcm_port->
> -							      port_gid));
> +		p_port =
> +		    osm_get_port_by_guid(sm->p_subn, p_mgrp_port->port_guid);
>  		if (!p_port) {
>  			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A09: "
>  				"No port object for port 0x%016" PRIx64 "\n",
> -				cl_ntoh64(ib_gid_get_guid
> -					  (&p_mcm_port->port_gid)));
> +				cl_ntoh64(p_mgrp_port->port_guid));
>  			continue;
>  		}
>  
> @@ -801,8 +810,7 @@ static ib_api_status_t mcast_mgr_build_spanning_tree(osm_sm_t * sm,
>  		if (p_wobj == NULL) {
>  			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A10: "
>  				"Insufficient memory to route port 0x%016"
> -				PRIx64 "\n",
> -				cl_ntoh64(osm_port_get_guid(p_port)));
> +				PRIx64 "\n", cl_ntoh64(p_mgrp_port->port_guid));
>  			continue;
>  		}
>  
> @@ -810,12 +818,14 @@ static ib_api_status_t mcast_mgr_build_spanning_tree(osm_sm_t * sm,
>  	}
>  
>  	count = cl_qlist_count(&port_list);
> -	p_mgrp->p_root = mcast_mgr_branch(sm, p_mgrp, p_sw, &port_list, 0, 0,
> -					  &max_depth);
> +	p_mgrp_holder->p_root =
> +	    mcast_mgr_branch(sm, p_mgrp_holder, p_sw, &port_list, 0, 0,
> +			     &max_depth);
>  
>  	OSM_LOG(sm->p_log, OSM_LOG_VERBOSE,
>  		"Configured MLID 0x%X for %u ports, max tree depth = %u\n",
> -		cl_ntoh16(osm_mgrp_get_mlid(p_mgrp)), count, max_depth);
> +		cl_ntoh16(osm_mgrp_holder_get_mlid(p_mgrp_holder)), count,
> +		max_depth);
>  
>  Exit:
>  	OSM_LOG_EXIT(sm->p_log);
> @@ -1023,17 +1033,20 @@ Exit:
>   NOTE : The lock should be held externally!
>   **********************************************************************/
>  static ib_api_status_t mcast_mgr_process_mgrp(osm_sm_t * sm,
> -					      IN osm_mgrp_t * p_mgrp)
> +					      IN osm_mgrp_holder_t * p_mgrp_holder)
>  {
>  	ib_api_status_t status = IB_SUCCESS;
>  	ib_net16_t mlid;
> +	osm_mgrp_t *p_mgrp;
> +	cl_list_item_t *p_item;
> +	unsigned has_full_members = 0;
>  
>  	OSM_LOG_ENTER(sm->p_log);
>  
> -	mlid = osm_mgrp_get_mlid(p_mgrp);
> +	mlid = osm_mgrp_holder_get_mlid(p_mgrp_holder);
>  
>  	OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
> -		"Processing multicast group 0x%X\n", cl_ntoh16(mlid));
> +		"Processing multicast group_holder 0x%X\n", cl_ntoh16(mlid));
>  
>  	/*
>  	   Clear the multicast tables to start clean, then build
> @@ -1042,27 +1055,52 @@ static ib_api_status_t mcast_mgr_process_mgrp(osm_sm_t * sm,
>  	 */
>  	mcast_mgr_clear(sm, cl_ntoh16(mlid));
>  
> -	if (p_mgrp->full_members) {
> -		status = mcast_mgr_build_spanning_tree(sm, p_mgrp);
> +	p_item = cl_qlist_head(&p_mgrp_holder->mgrp_list);
> +	while (p_item != cl_qlist_end(&p_mgrp_holder->mgrp_list)) {
> +		char gid_str[INET6_ADDRSTRLEN];
> +		p_mgrp = (osm_mgrp_t *)
> +			PARENT_STRUCT(p_item, osm_mgrp_t, mlid_item);
> +			OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
> +				"MLID  0x%x has mgrp  %s\n",cl_ntoh16(p_mgrp->mlid),
> +				inet_ntop(AF_INET6,
> +				p_mgrp->mcmember_rec.mgid.raw,
> +				gid_str, sizeof(gid_str)));
> +		p_item = cl_qlist_next(p_item);
> +		if (p_mgrp->to_be_deleted) {
> +					osm_mcm_port_t *p_mcm_port;

Wrong indentation (here and in many other places).

> +					OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
> +						"Destroying mgrp  %s with lid:0x%x\n",
> +						inet_ntop(AF_INET6,
> +						p_mgrp->mcmember_rec.mgid.raw,
> +						gid_str, sizeof(gid_str)),
> +						cl_ntoh16(p_mgrp->mlid));
> +					osm_mgrp_holder_delete_mgrp(p_mgrp_holder, p_mgrp);
> +					p_mcm_port = (osm_mcm_port_t *) cl_qmap_head(&p_mgrp->mcm_port_tbl);
> +					while (p_mcm_port !=
> +						(osm_mcm_port_t *) cl_qmap_end(&p_mgrp->mcm_port_tbl)) {
> +						osm_mgrp_holder_port_delete_mgrp(p_mgrp_holder, p_mgrp,
> +							p_mcm_port->port_gid.unicast.interface_id);
> +						p_mcm_port =
> +							(osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item);
> +					}
> +					cl_fmap_remove_item(&sm->p_subn->mgrp_mgid_tbl,
> +						&p_mgrp->map_item);
> +					osm_mgrp_delete(p_mgrp);

I'm not happy with this cleaning block. Actually it removes multicast
group (mgrp) - would be better to consolidate this code in separate
function.

> +		}
> +		else if (!has_full_members)
> +						has_full_members = p_mgrp->full_members;

No need to bother with condition check, something like:

	else
		has_full_members = 1;

should be enough here.

> +	}
> +	if (has_full_members) {
> +		status = mcast_mgr_build_spanning_tree(sm, p_mgrp_holder);
>  		if (status != IB_SUCCESS) {
>  			OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A17: "
>  				"Unable to create spanning tree (%s)\n",
>  				ib_get_err_str(status));
>  			goto Exit;
>  		}
> -	} else  if (p_mgrp->to_be_deleted) {
> -		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
> -			"Destroying mgrp with lid:0x%x\n",
> -			cl_ntoh16(p_mgrp->mlid));
> -		sm->p_subn->mgroups[cl_ntoh16(p_mgrp->mlid) -
> -				    IB_LID_MCAST_START_HO] = NULL;
> -		cl_fmap_remove_item(&sm->p_subn->mgrp_mgid_tbl,
> -				    &p_mgrp->map_item);
> -		osm_mgrp_delete(p_mgrp);
> -		goto Exit;
> +	    p_mgrp_holder->last_tree_id = p_mgrp_holder->last_change_id;

And where is holder deletion handled? I don't see (except a final
osm_subnet cleanup). So does it mean that once allocated mlids will be
never released?

>  	}
>  
> -	p_mgrp->last_tree_id = p_mgrp->last_change_id;
>  
>  Exit:
>  	OSM_LOG_EXIT(sm->p_log);
> @@ -1076,7 +1114,7 @@ int osm_mcast_mgr_process(osm_sm_t * sm)
>  	osm_switch_t *p_sw;
>  	cl_qmap_t *p_sw_tbl;
>  	cl_qlist_t *p_list = &sm->mgrp_list;
> -	osm_mgrp_t *p_mgrp;
> +	osm_mgrp_holder_t *p_mgrp_holder;
>  	int i, ret = 0;
>  
>  	OSM_LOG_ENTER(sm->p_log);
> @@ -1104,9 +1142,10 @@ int osm_mcast_mgr_process(osm_sm_t * sm)
>  		   of the subnet. Not due to a specific multicast request.
>  		   So the request type is subnet_change and the port guid is 0.
>  		 */
> -		p_mgrp = sm->p_subn->mgroups[i];
> -		if (p_mgrp)
> -			mcast_mgr_process_mgrp(sm, p_mgrp);
> +		p_mgrp_holder = sm->p_subn->mgroup_holders[i];
> +		if (p_mgrp_holder) {
> +			mcast_mgr_process_mgrp(sm, p_mgrp_holder);
> +		}
>  	}
>  
>  	/*
> @@ -1141,7 +1180,7 @@ int osm_mcast_mgr_process_mgroups(osm_sm_t * sm)
>  	cl_qlist_t *p_list = &sm->mgrp_list;
>  	osm_switch_t *p_sw;
>  	cl_qmap_t *p_sw_tbl;
> -	osm_mgrp_t *p_mgrp;
> +	osm_mgrp_holder_t *p_mgrp_holder;
>  	ib_net16_t mlid;
>  	osm_mcast_mgr_ctxt_t *ctx;
>  	int ret = 0;
> @@ -1169,24 +1208,25 @@ int osm_mcast_mgr_process_mgroups(osm_sm_t * sm)
>  
>  		/* since we delayed the execution we prefer to pass the
>  		   mlid as the mgrp identifier and then find it or abort */
> -		p_mgrp = osm_get_mgrp_by_mlid(sm->p_subn, mlid);
> -		if (!p_mgrp)
> +		p_mgrp_holder = osm_get_mgrp_holder_by_mlid(sm->p_subn, mlid);
> +		if (!p_mgrp_holder)
>  			continue;
>  
>  		/* if there was no change from the last time
>  		 * we processed the group we can skip doing anything
>  		 */
> -		if (p_mgrp->last_change_id == p_mgrp->last_tree_id) {
> +		if (p_mgrp_holder->last_change_id ==
> +		    p_mgrp_holder->last_tree_id) {
>  			OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
> -				"Skip processing mgrp with lid:0x%X change id:%u\n",
> -				cl_ntoh16(mlid), p_mgrp->last_change_id);
> +				"Skip processing p_mgrp_holder with lid:0x%X change id:%u\n",
> +				cl_ntoh16(mlid), p_mgrp_holder->last_change_id);
>  			continue;
>  		}
>  
>  		OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
>  			"Processing mgrp with lid:0x%X change id:%u\n",
> -			cl_ntoh16(mlid), p_mgrp->last_change_id);
> -		mcast_mgr_process_mgrp(sm, p_mgrp);
> +			cl_ntoh16(mlid), p_mgrp_holder->last_change_id);
> +		mcast_mgr_process_mgrp(sm, p_mgrp_holder);
>  	}
>  
>  	/*
> diff --git a/opensm/opensm/osm_multicast.c b/opensm/opensm/osm_multicast.c
> index d2733c4..072b591 100644
> --- a/opensm/opensm/osm_multicast.c
> +++ b/opensm/opensm/osm_multicast.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   *
> @@ -48,6 +48,7 @@
>  #include <opensm/osm_mcm_port.h>
>  #include <opensm/osm_mtree.h>
>  #include <opensm/osm_inform.h>
> +#include <arpa/inet.h>
>  
>  /**********************************************************************
>   **********************************************************************/
> @@ -67,8 +68,6 @@ void osm_mgrp_delete(IN osm_mgrp_t * p_mgrp)
>  		    (osm_mcm_port_t *) cl_qmap_next(&p_mcm_port->map_item);
>  		osm_mcm_port_delete(p_mcm_port);
>  	}
> -	/* destroy the mtree_node structure */
> -	osm_mtree_destroy(p_mgrp->p_root);
>  
>  	free(p_mgrp);
>  }
> @@ -86,9 +85,6 @@ osm_mgrp_t *osm_mgrp_new(IN const ib_net16_t mlid)
>  	memset(p_mgrp, 0, sizeof(*p_mgrp));
>  	cl_qmap_init(&p_mgrp->mcm_port_tbl);
>  	p_mgrp->mlid = mlid;
> -	p_mgrp->last_change_id = 0;
> -	p_mgrp->last_tree_id = 0;
> -	p_mgrp->to_be_deleted = FALSE;
>  
>  	return p_mgrp;
>  }
> @@ -133,6 +129,7 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t * subn, osm_log_t * log,
>  	ib_net64_t port_guid;
>  	osm_mcm_port_t *p_mcm_port;
>  	cl_map_item_t *prev_item;
> +	osm_mgrp_holder_t *p_mgrp_holder;
>  	uint8_t prev_join_state = 0;
>  	uint8_t prev_scope;
>  
> @@ -167,9 +164,18 @@ osm_mcm_port_t *osm_mgrp_add_port(IN osm_subn_t * subn, osm_log_t * log,
>  		p_mcm_port->scope_state =
>  		    ib_member_set_scope_state(prev_scope,
>  					      prev_join_state | join_state);
> -	} else {
> -		/* track the fact we modified the group ports */
> -		p_mgrp->last_change_id++;
> +	}
> +
> +	p_mgrp_holder = osm_get_mgrp_holder_by_mlid(subn, p_mgrp->mlid);
> +	if (! p_mgrp_holder ||

Is it legal case?

> +			 (IB_SUCCESS != osm_mgrp_holder_port_add_mgrp(p_mgrp_holder,
> +						p_mgrp, port_guid)) ) {
> +			/*  if  the above failed and added port is new one, remove port also from mcm_port_tbl */
> +			if (! prev_join_state) {

I'm not following. Why is this condition needed for error cleanup?

> +				cl_qmap_remove_item(&p_mgrp->mcm_port_tbl, &p_mcm_port->map_item);
> +				osm_mcm_port_delete(p_mcm_port);
> +			}
> +			return NULL;
>  	}
>  
>  	if ((join_state & IB_JOIN_STATE_FULL) &&
> @@ -212,7 +218,6 @@ int osm_mgrp_remove_port(osm_subn_t * subn, osm_log_t * log, osm_mgrp_t * mgrp,
>  			cl_ntoh64(mcm->port_gid.unicast.interface_id));
>  		osm_mcm_port_delete(mcm);
>  		/* track the fact we modified the group */
> -		mgrp->last_change_id++;
>  		ret = 1;
>  	}
>  
> @@ -285,16 +290,173 @@ static void mgrp_apply_func_sub(const osm_mgrp_t * p_mgrp,
>  
>  /**********************************************************************
>   **********************************************************************/
> -void osm_mgrp_apply_func(const osm_mgrp_t * p_mgrp, osm_mgrp_func_t p_func,
> -			 void *context)

This function seems to be not used and removing this is basically fine,
but please: do it in separate patch, remove its prototypes from header
files, cleanup associated compiler warnings.

> +static osm_mgrp_port_t *osm_mgrp_port_new(ib_net64_t port_guid)
> +{
> +	osm_mgrp_port_t *p_mgrp_port =
> +	(osm_mgrp_port_t *) malloc(sizeof(osm_mgrp_port_t));
> +	if (!p_mgrp_port) {
> +		return NULL;
> +	}
> +	memset(p_mgrp_port, 0, sizeof(*p_mgrp_port));
> +	p_mgrp_port->port_guid = port_guid;
> +	cl_qlist_init(&p_mgrp_port->mgroups);
> +	return p_mgrp_port;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +osm_mgrp_holder_t *osm_mgrp_holder_new(IN osm_subn_t * p_subn,
> +					ib_net16_t mlid)
>  {
> -	osm_mtree_node_t *p_mtn;
> +	osm_mgrp_holder_t *p_mgrp_holder;
> +	p_mgrp_holder =
> +		p_subn->mgroup_holders[cl_ntoh16(mlid) - IB_LID_MCAST_START_HO] =
> +		(osm_mgrp_holder_t *) malloc(sizeof(*p_mgrp_holder));
> +	if (!p_mgrp_holder)
> +		return NULL;
>  
> -	CL_ASSERT(p_mgrp);
> -	CL_ASSERT(p_func);
> +	memset(p_mgrp_holder, 0, sizeof(*p_mgrp_holder));
> +	p_mgrp_holder->mlid = mlid;
> +	cl_qmap_init(&p_mgrp_holder->mgrp_port_map);
> +	cl_qlist_init(&p_mgrp_holder->mgrp_list);
> +	return p_mgrp_holder;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +void osm_mgrp_holder_delete(IN osm_subn_t *p_subn, ib_net16_t mlid)
> +{
> +	osm_mgrp_port_t *p_osm_mgr_port;
> +	cl_map_item_t *p_item;
> +
> +	osm_mgrp_holder_t *p_mgrp_holder =
> +		p_subn->mgroup_holders[cl_ntoh16(mlid) - IB_LID_MCAST_START_HO];
> +	p_item = cl_qmap_head(&p_mgrp_holder->mgrp_port_map);
> +	/* Delete ports shared same MLID */
> +	while (p_item != cl_qmap_end(&p_mgrp_holder->mgrp_port_map)) {
> +		p_osm_mgr_port = (osm_mgrp_port_t *) p_item;
> +		cl_qlist_remove_all(&p_osm_mgr_port->mgroups);
> +		cl_qmap_remove_item(&p_mgrp_holder->mgrp_port_map, p_item);
> +		p_item = cl_qmap_head(&p_mgrp_holder->mgrp_port_map);
> +		free(p_osm_mgr_port);
> +	}
> +	/* Remove mgrp from this MLID */
> +	cl_qlist_remove_all(&p_mgrp_holder->mgrp_list);
> +	/* Destroy the mtree_node structure */
> +	osm_mtree_destroy(p_mgrp_holder->p_root);
> +	p_subn->mgroup_holders[cl_ntoh16(mlid) - IB_LID_MCAST_START_HO] = NULL;
> +	free(p_mgrp_holder);
> +}

As commented already I cannot see where those resources are freed
(except final OpenSM destroying).

> +
> +/**********************************************************************
> + **********************************************************************/
> +void osm_mgrp_holder_remove_port(osm_subn_t * subn, osm_log_t * p_log,
> +				osm_mgrp_holder_t * p_mgrp_holder,
> +				ib_net64_t port_guid)
> +{
> +	osm_mgrp_t *p_mgrp;
> +	cl_list_item_t *p_item;
> +
> +	OSM_LOG_ENTER(p_log);
> +
> +	osm_mgrp_port_t *p_mgrp_port = (osm_mgrp_port_t *)
> +		cl_qmap_remove(&p_mgrp_holder->mgrp_port_map, port_guid);
> +	if (p_mgrp_port !=
> +		(osm_mgrp_port_t *) cl_qmap_end(&p_mgrp_holder->mgrp_port_map)) {
> +		char gid_str[INET6_ADDRSTRLEN];
> +		OSM_LOG(p_log, OSM_LOG_DEBUG,
> +		"port  0x%" PRIx64 " removed from  mlid 0x%X\n",
> +		port_guid, cl_ntoh16(p_mgrp_holder->mlid));
> +		while ((p_item =
> +			cl_qlist_remove_head(&p_mgrp_port->mgroups)) !=
> +			cl_qlist_end(&p_mgrp_port->mgroups)) {
> +			p_mgrp = (osm_mgrp_t *)
> +				PARENT_STRUCT(p_item, osm_mgrp_t,port_item);
> +			OSM_LOG(p_log, OSM_LOG_DEBUG,
> +				"removing mgrp mgid %s from port  0x%" PRIx64"\n",
> +				 inet_ntop(AF_INET6,p_mgrp->mcmember_rec.mgid.raw,
> +					gid_str, sizeof(gid_str)),
> +					cl_ntoh64(port_guid));
> +			osm_mgrp_delete_port(subn, p_log, p_mgrp, port_guid);
> +		}
> +		free(p_mgrp_port);
> +	}
> +	OSM_LOG_EXIT(p_log);
> +}
>  
> -	p_mtn = p_mgrp->p_root;
> +/**********************************************************************
> + **********************************************************************/
> +void osm_mgrp_holder_add_mgrp(osm_mgrp_holder_t * p_mgrp_holder,
> +				osm_mgrp_t * p_mgrp, osm_log_t *  p_log)
> +{
> +	char gid_str[INET6_ADDRSTRLEN];
> +
> +	OSM_LOG_ENTER(p_log);
> +	p_mgrp_holder->to_be_deleted = 0;
> +	cl_qlist_insert_tail(&p_mgrp_holder->mgrp_list, &p_mgrp->mlid_item);
> +	OSM_LOG(p_log, OSM_LOG_DEBUG,
> +		"mgrp with MGID:%s added to holder with mlid = 0x%X\n",
> +		inet_ntop(AF_INET6, p_mgrp->mcmember_rec.mgid.raw, gid_str,
> +		sizeof(gid_str)), cl_ntoh16(p_mgrp_holder->mlid));
> +	p_mgrp_holder->last_change_id++;
> +	OSM_LOG_EXIT(p_log);
> +}
>  
> -	if (p_mtn)
> -		mgrp_apply_func_sub(p_mgrp, p_mtn, p_func, context);
> +/**********************************************************************
> + **********************************************************************/
> +void osm_mgrp_holder_delete_mgrp(osm_mgrp_holder_t * p_mgrp_holder,
> +				 osm_mgrp_t * p_mgrp)
> +{
> +	p_mgrp->to_be_deleted = 1;
> +	cl_qlist_remove_item(&p_mgrp_holder->mgrp_list, &p_mgrp->mlid_item);
> +	if (0 == cl_qlist_count(&p_mgrp_holder->mgrp_list)) {
> +		/* No more mgroups on this mlid */
> +		p_mgrp_holder->to_be_deleted = 1;
> +		p_mgrp_holder->last_tree_id = 0;
> +		p_mgrp_holder->last_change_id = 0;
> +	}
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +ib_api_status_t osm_mgrp_holder_port_add_mgrp(osm_mgrp_holder_t * p_mgrp_holder,
> +						osm_mgrp_t * p_mgrp,
> +						ib_net64_t port_guid)
> +{
> +	osm_mgrp_port_t *p_mgrp_port = (osm_mgrp_port_t *)
> +		cl_qmap_get(&p_mgrp_holder->mgrp_port_map, port_guid);
> +	if (p_mgrp_port ==
> +		(osm_mgrp_port_t *) cl_qmap_end(&p_mgrp_holder->mgrp_port_map)) {
> +		/* new port to mlid */
> +		p_mgrp_port = osm_mgrp_port_new(port_guid);
> +		if (!p_mgrp_port) {
> +			return IB_INSUFFICIENT_MEMORY;
> +		}
> +		cl_qmap_insert(&p_mgrp_holder->mgrp_port_map,
> +			p_mgrp_port->port_guid, &p_mgrp_port->guid_item);
> +	}
> +	cl_qlist_insert_tail(&p_mgrp_port->mgroups, &p_mgrp->port_item);

This function (osm_mgrp_holder_port_add_mgrp()) is called from
osm_mgrp_add_port() and as far as I can see will be executed not only on
"pure" port addition but also when port join state in specified MC group
is changing. If so wouldn't this potentially double MC group addition
corrupt this list?

> +    p_mgrp_holder->last_change_id++;

And for same reason, wouldn't last_change_id be updated when actually no
ports were added to MC group?

> +	return IB_SUCCESS;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +void osm_mgrp_holder_port_delete_mgrp(osm_mgrp_holder_t * p_mgrp_holder,
> +				      osm_mgrp_t * p_mgrp,
> +				      ib_net64_t port_guid)
> +{
> +	osm_mgrp_port_t *p_mgrp_port = (osm_mgrp_port_t *)
> +	cl_qmap_get(&p_mgrp_holder->mgrp_port_map, port_guid);
> +	if (p_mgrp_port !=
> +		(osm_mgrp_port_t *) cl_qmap_end(&p_mgrp_holder->mgrp_port_map)) {
> +		cl_qlist_remove_item(&p_mgrp_port->mgroups, &p_mgrp->port_item);
> +		if (0 == cl_qlist_count(&p_mgrp_port->mgroups)) {
> +			/* No mgroups registered on this port for current mlid */
> +			cl_qmap_remove_item(&p_mgrp_holder->mgrp_port_map,
> +			&p_mgrp_port->guid_item);
> +			free(p_mgrp_port);
> +		}
> +	p_mgrp_holder->last_change_id++;
> +	}
>  }
> diff --git a/opensm/opensm/osm_qos_policy.c b/opensm/opensm/osm_qos_policy.c
> index 7826578..041377f 100644
> --- a/opensm/opensm/osm_qos_policy.c
> +++ b/opensm/opensm/osm_qos_policy.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
> @@ -785,7 +785,9 @@ static void __qos_policy_validate_pkey(
>  	uint8_t sl;
>  	uint32_t flow;
>  	uint8_t hop;
> +	osm_mgrp_holder_t * p_mgrp_holder;
>  	osm_mgrp_t * p_mgrp;
> +	cl_list_item_t *p_item;
>  
>  	if (!p_qos_policy || !p_qos_match_rule || !p_prtn)
>  		return;
> @@ -809,31 +811,35 @@ static void __qos_policy_validate_pkey(
>  	if (!p_prtn->mlid)
>  		return;
>  
> -	p_mgrp = osm_get_mgrp_by_mlid(p_qos_policy->p_subn, p_prtn->mlid);
> -	if (!p_mgrp) {
> +	p_mgrp_holder =
> +		osm_get_mgrp_holder_by_mlid(p_qos_policy->p_subn, p_prtn->mlid);
> +	if (!p_mgrp_holder) {
>  		OSM_LOG(&p_qos_policy->p_subn->p_osm->log, OSM_LOG_ERROR,
> -			"ERR AC16: MCast group for partition with "
> -			"pkey 0x%04X not found\n",
> -			cl_ntoh16(p_prtn->pkey));
> +		"ERR AC16: MCast mgrp_holder for partition with pkey 0x%04X not found\n",
> +		cl_ntoh16(p_prtn->pkey));
>  		return;
>  	}
>  
> -	CL_ASSERT((cl_ntoh16(p_mgrp->mcmember_rec.pkey) & 0x7fff) ==
> -		  (cl_ntoh16(p_prtn->pkey) & 0x7fff));
> -
> -	ib_member_get_sl_flow_hop(p_mgrp->mcmember_rec.sl_flow_hop,
> -				  &sl, &flow, &hop);
> -	if (sl != p_prtn->sl) {
> -		OSM_LOG(&p_qos_policy->p_subn->p_osm->log, OSM_LOG_DEBUG,
> +	p_item = cl_qlist_head(&p_mgrp_holder->mgrp_list);
> +	while (p_item != cl_qlist_end(&p_mgrp_holder->mgrp_list)) {
> +		p_mgrp = (osm_mgrp_t *) PARENT_STRUCT(p_item, osm_mgrp_t,
> +			mlid_item);
> +		p_item = cl_qlist_next(p_item);
> +		CL_ASSERT((cl_ntoh16(p_mgrp->mcmember_rec.pkey) & 0x7fff) ==
> +			(cl_ntoh16(p_prtn->pkey) & 0x7fff));
> +		ib_member_get_sl_flow_hop(p_mgrp->mcmember_rec.sl_flow_hop,
> +			&sl, &flow, &hop);
> +		if (sl != p_prtn->sl) {
> +			OSM_LOG(&p_qos_policy->p_subn->p_osm->log, OSM_LOG_DEBUG,
>  			"Updating MCGroup (MLID 0x%04x) SL to "
>  			"match partition SL (%u)\n",
>  			cl_hton16(p_mgrp->mcmember_rec.mlid),
>  			p_prtn->sl);
> -		p_mgrp->mcmember_rec.sl_flow_hop =
> -			ib_member_set_sl_flow_hop(p_prtn->sl, flow, hop);
> +			p_mgrp->mcmember_rec.sl_flow_hop =
> +				ib_member_set_sl_flow_hop(p_prtn->sl, flow, hop);
> +		}
>  	}
>  }
> -
>  /***************************************************
>   ***************************************************/
>  
> diff --git a/opensm/opensm/osm_sa.c b/opensm/opensm/osm_sa.c
> index fcc3f27..22dd495 100644
> --- a/opensm/opensm/osm_sa.c
> +++ b/opensm/opensm/osm_sa.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
> @@ -706,17 +706,15 @@ static void sa_dump_all_sa(osm_opensm_t * p_osm, FILE * file)
>  {
>  	struct opensm_dump_context dump_context;
>  	osm_mgrp_t *p_mgrp;
> -	int i;
>  
>  	dump_context.p_osm = p_osm;
>  	dump_context.file = file;
>  	OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Dump multicast\n");
>  	cl_plock_acquire(&p_osm->lock);
> -	for (i = 0; i <= p_osm->subn.max_mcast_lid_ho - IB_LID_MCAST_START_HO;
> -	     i++) {
> -		p_mgrp = p_osm->subn.mgroups[i];
> -		if (p_mgrp)
> -			sa_dump_one_mgrp(p_mgrp, &dump_context);
> +	p_mgrp = (osm_mgrp_t*)cl_fmap_head(&p_osm->subn.mgrp_mgid_tbl);
> +	while (p_mgrp != (osm_mgrp_t*)cl_fmap_end(&p_osm->subn.mgrp_mgid_tbl)) {
> +		sa_dump_one_mgrp(p_mgrp, &dump_context);
> +		p_mgrp = (osm_mgrp_t*) cl_fmap_next(&p_mgrp->map_item);
>  	}
>  	OSM_LOG(&p_osm->log, OSM_LOG_DEBUG, "Dump inform\n");
>  	cl_qlist_apply_func(&p_osm->subn.sa_infr_list,
> @@ -740,23 +738,16 @@ static osm_mgrp_t *load_mcgroup(osm_opensm_t * p_osm, ib_net16_t mlid,
>  				unsigned well_known)
>  {
>  	ib_net64_t comp_mask;
> -	osm_mgrp_t *p_mgrp;
>  
> +	cl_fmap_item_t *p_fitem;
> +	osm_mgrp_t *p_mgrp = NULL;
>  	cl_plock_excl_acquire(&p_osm->lock);
>  
> -	p_mgrp = osm_get_mgrp_by_mlid(&p_osm->subn, mlid);
> -	if (p_mgrp) {
> -		if (!memcmp(&p_mgrp->mcmember_rec.mgid, &p_mcm_rec->mgid,
> -			    sizeof(ib_gid_t))) {
> -			OSM_LOG(&p_osm->log, OSM_LOG_DEBUG,
> -				"mgrp %04x is already here.", cl_ntoh16(mlid));
> +	p_fitem = cl_fmap_get(&p_osm->subn.mgrp_mgid_tbl, &p_mcm_rec->mgid);
> +	if (p_fitem != cl_fmap_end(&p_osm->subn.mgrp_mgid_tbl)) {
> +		OSM_LOG(&p_osm->log, OSM_LOG_DEBUG,
> +			"mgrp %04x is already here.", cl_ntoh16(mlid));
>  			goto _out;
> -		}
> -		OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
> -			"mlid %04x is already used by another MC group. Will "
> -			"request clients reregistration.\n", cl_ntoh16(mlid));
> -		p_mgrp = NULL;
> -		goto _out;
>  	}
>  
>  	comp_mask = IB_MCR_COMPMASK_MTU | IB_MCR_COMPMASK_MTU_SEL
> diff --git a/opensm/opensm/osm_sa_mcmember_record.c b/opensm/opensm/osm_sa_mcmember_record.c
> index a9e0a3b..3838a08 100644
> --- a/opensm/opensm/osm_sa_mcmember_record.c
> +++ b/opensm/opensm/osm_sa_mcmember_record.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
> @@ -121,14 +121,17 @@ static ib_net16_t get_new_mlid(osm_sa_t * sa, ib_net16_t requested_mlid)
>  
>  	if (requested_mlid && cl_ntoh16(requested_mlid) >= IB_LID_MCAST_START_HO
>  	    && cl_ntoh16(requested_mlid) <= p_subn->max_mcast_lid_ho
> -	    && !osm_get_mgrp_by_mlid(p_subn, requested_mlid))
> +	    && !osm_get_mgrp_holder_by_mlid(p_subn, requested_mlid))
>  		return requested_mlid;
>  
>  	max = p_subn->max_mcast_lid_ho - IB_LID_MCAST_START_HO + 1;
>  	for (i = 0; i < max; i++) {
> -		osm_mgrp_t *p_mgrp = sa->p_subn->mgroups[i];
> -		if (!p_mgrp || p_mgrp->to_be_deleted)
> -			return cl_hton16(i + IB_LID_MCAST_START_HO);
> +		osm_mgrp_holder_t *p_mgrp_holder = sa->p_subn->mgroup_holders[i];
> +		if (!p_mgrp_holder || p_mgrp_holder->to_be_deleted) {
> +				OSM_LOG(sa->p_log, OSM_LOG_DEBUG, "returning mgrp_holder to_be_deleted =%d\n",
> +						p_mgrp_holder ? p_mgrp_holder->to_be_deleted : 0);

Is such debug message needed permanently (for more than development
uses)? I think that we should avoid adding new debug messages when
possible (as well as to cleanup existing ones) - OpenSM debug log is
overflowed by junks and almost unreadable now.

> +				return cl_hton16(i + IB_LID_MCAST_START_HO);
> +		}
>  	}
>  
>  	return 0;
> @@ -146,8 +149,9 @@ static void cleanup_mgrp(IN osm_sa_t * sa, osm_mgrp_t * mgrp)
>  	/* Remove MGRP only if osm_mcm_port_t count is 0 and
>  	   not a well known group */
>  	if (cl_is_qmap_empty(&mgrp->mcm_port_tbl) && !mgrp->well_known) {
> -		sa->p_subn->mgroups[cl_ntoh16(mgrp->mlid) -
> -				    IB_LID_MCAST_START_HO] = NULL;
> +		osm_mgrp_holder_t *p_mgrp_holder =
> +			osm_get_mgrp_holder_by_mlid(sa->p_subn, mgrp->mlid);
> +		osm_mgrp_holder_delete_mgrp(p_mgrp_holder, mgrp);
>  		cl_fmap_remove_item(&sa->p_subn->mgrp_mgid_tbl,
>  				    &mgrp->map_item);
>  		osm_mgrp_delete(mgrp);
> @@ -802,19 +806,19 @@ static boolean_t mgrp_request_is_realizable(IN osm_sa_t * sa,
>   Call this function to create a new mgrp.
>  **********************************************************************/
>  ib_api_status_t osm_mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa,
> -					     IN ib_net64_t comp_mask,
> -					     IN const ib_member_rec_t *
> -					     const p_recvd_mcmember_rec,
> -					     IN const osm_physp_t * p_physp,
> -					     OUT osm_mgrp_t ** pp_mgrp)
> +						IN ib_net64_t comp_mask,
> +						IN const ib_member_rec_t *
> +						const p_recvd_mcmember_rec,
> +						IN const osm_physp_t * p_physp,
> +						OUT osm_mgrp_t ** pp_mgrp)
>  {
> -	ib_net16_t mlid;
> +	ib_net16_t mlid, existed_mlid;
>  	unsigned zero_mgid, i;
>  	uint8_t scope;
>  	ib_gid_t *p_mgid;
> -	osm_mgrp_t *p_prev_mgrp;
>  	ib_api_status_t status = IB_SUCCESS;
>  	ib_member_rec_t mcm_rec = *p_recvd_mcmember_rec;	/* copy for modifications */
> +	osm_mgrp_holder_t * p_mgrp_holder;
>  
>  	OSM_LOG_ENTER(sa->p_log);
>  
> @@ -890,6 +894,15 @@ ib_api_status_t osm_mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa,
>  		goto Exit;
>  	}
>  
> +	if (0 != (existed_mlid = osm_mgrp_holder_get_mlid_by_mgid(sa->p_subn, p_mgid))) {
> +		char gid_str[INET6_ADDRSTRLEN];
> +		mlid = existed_mlid;
> +		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
> +			"found existed  mlid  0x%04x for mgid %s\n",
> +			cl_ntoh16(mlid), inet_ntop(AF_INET6, p_mgid->raw,
> +						   gid_str, sizeof gid_str));

This debug message is pure duplication.

> +	}
> +
>  	/* create a new MC Group */
>  	*pp_mgrp = osm_mgrp_new(mlid);
>  	if (*pp_mgrp == NULL) {
> @@ -914,25 +927,26 @@ ib_api_status_t osm_mcmr_rcv_create_new_mgrp(IN osm_sa_t * sa,
>  
>  	/* Insert the new group in the data base */
>  
> -	/* since we might have an old group by that mlid
> -	   one whose deletion was delayed for an idle time
> -	   we need to deallocate it first */
> -	p_prev_mgrp = osm_get_mgrp_by_mlid(sa->p_subn, mlid);
> -	if (p_prev_mgrp) {
> +
> +	p_mgrp_holder = osm_get_mgrp_holder_by_mlid(sa->p_subn, mlid);
> +	if (!p_mgrp_holder) {
>  		OSM_LOG(sa->p_log, OSM_LOG_DEBUG,
> -			"Found previous group for mlid:0x%04x - "
> -			"Destroying it first\n", cl_ntoh16(mlid));
> -		sa->p_subn->mgroups[cl_ntoh16(mlid) - IB_LID_MCAST_START_HO] =
> -		    NULL;
> -		cl_fmap_remove_item(&sa->p_subn->mgrp_mgid_tbl,
> -				    &p_prev_mgrp->map_item);
> -		osm_mgrp_delete(p_prev_mgrp);
> +			"Creating new mgrp_holder  for mlid:0x%04x\n",
> +			cl_ntoh16(mlid));
> +		p_mgrp_holder = osm_mgrp_holder_new(sa->p_subn,  mlid);
>  	}
>  
> +	if (!p_mgrp_holder) {
> +		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B08: "
> +			"osm_mgrp_holder_new failed\n");
> +		free_mlid(sa, mlid);
> +		status = IB_INSUFFICIENT_MEMORY;
> +		goto Exit;
> +	}
>  	cl_fmap_insert(&sa->p_subn->mgrp_mgid_tbl,
>  		       &(*pp_mgrp)->mcmember_rec.mgid, &(*pp_mgrp)->map_item);
>  
> -	sa->p_subn->mgroups[cl_ntoh16(mlid) - IB_LID_MCAST_START_HO] = *pp_mgrp;
> +	osm_mgrp_holder_add_mgrp(p_mgrp_holder, *pp_mgrp, sa->p_log);
>  
>  Exit:
>  	OSM_LOG_EXIT(sa->p_log);
> @@ -1074,7 +1088,7 @@ static void mcmr_rcv_leave_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
>  	CL_PLOCK_RELEASE(sa->p_lock);
>  
>  	/* we can leave if port was deleted from MCG */
> -	if (removed && osm_sm_mcgrp_leave(sa->sm, mlid, portguid))
> +	if (removed && osm_sm_mcgrp_leave(sa->sm, p_mgrp, portguid))
>  		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B09: "
>  			"osm_sm_mcgrp_leave failed\n");
>  
> @@ -1102,6 +1116,7 @@ static void mcmr_rcv_join_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
>  	osm_physp_t *p_request_physp;
>  	uint8_t is_new_group;	/* TRUE = there is a need to create a group */
>  	uint8_t join_state;
> +	osm_mgrp_holder_t *p_mgrp_holder;
>  
>  	OSM_LOG_ENTER(sa->p_log);
>  
> @@ -1275,6 +1290,8 @@ static void mcmr_rcv_join_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
>  		goto Exit;
>  	}
>  
> +	p_mgrp_holder = osm_get_mgrp_holder_by_mlid(sa->p_subn, mlid);
> +	CL_ASSERT(p_mgrp_holder);
>  	/* create or update existing port (join-state will be updated) */
>  	status = add_new_mgrp_port(sa, p_mgrp, p_recvd_mcmember_rec,
>  				   osm_madw_get_mad_addr_ptr(p_madw),
> @@ -1282,6 +1299,8 @@ static void mcmr_rcv_join_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
>  
>  	if (status != IB_SUCCESS) {
>  		/* we fail to add the port so we might need to delete the group */
> +		osm_mgrp_holder_port_delete_mgrp(p_mgrp_holder, p_mgrp,
> +					p_recvd_mcmember_rec->port_gid.unicast.interface_id);
>  		cleanup_mgrp(sa, p_mgrp);
>  
>  		CL_PLOCK_RELEASE(sa->p_lock);
> @@ -1304,7 +1323,7 @@ static void mcmr_rcv_join_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
>  	/* do the actual routing (actually schedule the update) */
>  	status = osm_sm_mcgrp_join(sa->sm, mlid,
>  				   p_recvd_mcmember_rec->port_gid.unicast.
> -				   interface_id);
> +				   interface_id, &p_recvd_mcmember_rec->mgid);
>  
>  	if (status != IB_SUCCESS) {
>  		OSM_LOG(sa->p_log, OSM_LOG_ERROR, "ERR 1B14: "
> @@ -1315,9 +1334,10 @@ static void mcmr_rcv_join_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
>  		CL_PLOCK_EXCL_ACQUIRE(sa->p_lock);
>  
>  		/* the request for routing failed so we need to remove the port */
> +		osm_mgrp_holder_port_delete_mgrp(p_mgrp_holder, p_mgrp,
> +				p_recvd_mcmember_rec->port_gid.unicast.interface_id);
>  		osm_mgrp_delete_port(sa->p_subn, sa->p_log, p_mgrp,
> -				     p_recvd_mcmember_rec->port_gid.
> -				     unicast.interface_id);
> +				p_recvd_mcmember_rec->port_gid.unicast.interface_id);
>  		cleanup_mgrp(sa, p_mgrp);
>  		CL_PLOCK_RELEASE(sa->p_lock);
>  		osm_sa_send_error(sa, p_madw, IB_SA_MAD_STATUS_NO_RESOURCES);
> @@ -1549,7 +1569,6 @@ static void mcmr_query_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
>  	osm_physp_t *p_req_physp;
>  	boolean_t trusted_req;
>  	osm_mgrp_t *p_mgrp;
> -	int i;
>  
>  	OSM_LOG_ENTER(sa->p_log);
>  
> @@ -1578,12 +1597,11 @@ static void mcmr_query_mgrp(IN osm_sa_t * sa, IN osm_madw_t * p_madw)
>  	CL_PLOCK_ACQUIRE(sa->p_lock);
>  
>  	/* simply go over all MCGs and match */
> -	for (i = 0; i <= sa->p_subn->max_mcast_lid_ho - IB_LID_MCAST_START_HO;
> -	     i++) {
> -		p_mgrp = sa->p_subn->mgroups[i];
> -		if (p_mgrp)
> -			mcmr_by_comp_mask(sa, p_rcvd_rec, comp_mask, p_mgrp,
> -					  p_req_physp, trusted_req, &rec_list);
> +	p_mgrp = (osm_mgrp_t *) cl_fmap_head(&sa->p_subn->mgrp_mgid_tbl);
> +	while (p_mgrp != (osm_mgrp_t *) cl_fmap_end(&sa->p_subn->mgrp_mgid_tbl)) {
> +		mcmr_by_comp_mask(sa, p_rcvd_rec, comp_mask, p_mgrp,
> +				  p_req_physp, trusted_req, &rec_list);
> +		p_mgrp = (osm_mgrp_t *) cl_fmap_next(&p_mgrp->map_item);
>  	}
>  
>  	CL_PLOCK_RELEASE(sa->p_lock);
> diff --git a/opensm/opensm/osm_sa_path_record.c b/opensm/opensm/osm_sa_path_record.c
> index 75d9516..aa63d78 100644
> --- a/opensm/opensm/osm_sa_path_record.c
> +++ b/opensm/opensm/osm_sa_path_record.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   * Copyright (c) 2008 Xsigo Systems Inc. All rights reserved.
> @@ -1468,11 +1468,14 @@ static osm_mgrp_t *pr_get_mgrp(IN osm_sa_t * sa, IN const osm_madw_t * p_madw)
>  				mgrp = NULL;
>  				goto Exit;
>  			}
> -		} else
> -		    if (!(mgrp = osm_get_mgrp_by_mlid(sa->p_subn, p_pr->dlid)))
> -			OSM_LOG(sa->p_log, OSM_LOG_ERROR,
> -				"ERR 1F11: " "No MC group found for PathRecord "
> +		} else {
> +			mgrp = osm_get_mgrp_by_mgid(sa, &p_pr->dgid);

Such replacement seems wrong to me - it is under block where SA PR
compmask has DLID (not DGID) bit on.

> +			if (!mgrp)
> +				OSM_LOG(sa->p_log, OSM_LOG_ERROR,
> +				"ERR 1F11: "
> +				"No MC group found for PathRecord "
>  				"destination LID 0x%x\n", p_pr->dlid);
> +		}
>  	}
>  
>  Exit:
> diff --git a/opensm/opensm/osm_sm.c b/opensm/opensm/osm_sm.c
> index b3ce69a..d990450 100644
> --- a/opensm/opensm/osm_sm.c
> +++ b/opensm/opensm/osm_sm.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2005 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
> @@ -47,6 +47,7 @@
>  
>  #include <stdlib.h>
>  #include <string.h>
> +#include <arpa/inet.h>
>  #include <iba/ib_types.h>
>  #include <complib/cl_qmap.h>
>  #include <complib/cl_passivelock.h>
> @@ -468,12 +469,15 @@ static ib_api_status_t sm_mgrp_process(IN osm_sm_t * p_sm,
>  /**********************************************************************
>   **********************************************************************/
>  ib_api_status_t osm_sm_mcgrp_join(IN osm_sm_t * p_sm, IN const ib_net16_t mlid,
> -				  IN const ib_net64_t port_guid)
> +				  IN const ib_net64_t port_guid,
> +				  IN const ib_gid_t * p_mgid)

Any reason to have 'mlid' as parameter for this function after adding
MGID there?

>  {
> -	osm_mgrp_t *p_mgrp;
> +	osm_mgrp_t *p_mgrp = NULL;
>  	osm_port_t *p_port;
>  	ib_api_status_t status = IB_SUCCESS;
>  	osm_mcm_info_t *p_mcm;
> +	cl_list_item_t *p_item;
> +	osm_mgrp_holder_t *p_mgrp_holder;
>  
>  	OSM_LOG_ENTER(p_sm->p_log);
>  
> @@ -497,8 +501,44 @@ ib_api_status_t osm_sm_mcgrp_join(IN osm_sm_t * p_sm, IN const ib_net16_t mlid,
>  	/*
>  	 * If this multicast group does not already exist, create it.
>  	 */
> -	p_mgrp = osm_get_mgrp_by_mlid(p_sm->p_subn, mlid);
> -	if (!p_mgrp || !osm_mgrp_is_guid(p_mgrp, port_guid)) {
> +	p_mgrp_holder = osm_get_mgrp_holder_by_mlid(p_sm->p_subn, mlid);
> +	if (p_mgrp_holder) {
> +		char gid_str[INET6_ADDRSTRLEN];
> +		if (TRUE) {
> +			size_t gr_count = cl_qlist_count(&p_mgrp_holder->mgrp_list);
> +			OSM_LOG(p_sm->p_log, OSM_LOG_DEBUG,
> +				"mlid 0x%X has  %lu mgroups\n", cl_ntoh16(mlid), gr_count);
> +			if (gr_count) {
> +				p_item =
> +				    cl_qlist_head(&p_mgrp_holder->mgrp_list);
> +				while (p_item !=
> +				       cl_qlist_end(&p_mgrp_holder->mgrp_list)) {
> +					p_mgrp = (osm_mgrp_t *)
> +					    PARENT_STRUCT(p_item, osm_mgrp_t,
> +							  mlid_item);
> +					OSM_LOG(p_sm->p_log, OSM_LOG_DEBUG,
> +						"mlid  0x%X has mgrp with MGID: %s\n",
> +						cl_ntoh16(mlid),
> +						inet_ntop(AF_INET6,
> +							  p_mgrp->mcmember_rec.
> +							  mgid.raw, gid_str,
> +							  sizeof gid_str));
> +					p_item = cl_qlist_next(p_item);
> +				}
> +			}
> +		}

What does this 'if (TRUE) ...' block? You overwrites resolved p_mgrp in
the line below.

> +		p_mgrp  = (osm_mgrp_t *)cl_fmap_get(&p_sm->p_subn->mgrp_mgid_tbl, p_mgid);
> +		if (p_mgrp == (osm_mgrp_t *)cl_fmap_end(&p_sm->p_subn->mgrp_mgid_tbl)) {
> +			p_mgrp = NULL;
> +			OSM_LOG(p_sm->p_log, OSM_LOG_ERROR,
> +				"group with MGID: %s not found on mlid 0x%X\n",
> +				inet_ntop(AF_INET6,
> +					  p_mgid->raw,
> +					  gid_str, sizeof gid_str),
> +				cl_ntoh16(mlid));
> +		}
> +	}
> +	if (!p_mgrp_holder || !p_mgrp || !osm_mgrp_is_guid(p_mgrp, port_guid)) {
>  		/*
>  		 * The group removed or the port is not a
>  		 * member of the group, then fail immediately.
> @@ -513,6 +553,22 @@ ib_api_status_t osm_sm_mcgrp_join(IN osm_sm_t * p_sm, IN const ib_net16_t mlid,
>  		goto Exit;
>  	}
>  
> +	/* if there was no change from the last time
> +	 * we processed the group we can skip doing anything
> +	 */
> +	if (p_mgrp_holder->last_change_id == p_mgrp_holder->last_tree_id) {
> +		OSM_LOG(p_sm->p_log, OSM_LOG_VERBOSE,
> +			"Skip processing mgrp holder with lid:0x%X last change id:%u\n",
> +			cl_ntoh16(mlid), p_mgrp_holder->last_change_id);
> +		goto Exit;
> +	} else {
> +		OSM_LOG(p_sm->p_log, OSM_LOG_DEBUG,
> +			"processing mgrp holder with lid:0x%X port: 0x%016"
> +			PRIx64 " last change id:%u tree id:%u\n",
> +			cl_ntoh16(mlid), cl_ntoh64(port_guid),
> +			p_mgrp_holder->last_change_id,
> +			p_mgrp_holder->last_tree_id);
> +	}

How is this part related to the patch?

(And do you need 'holder' struct in this function for any other reason?)

It looks that this is part of Eli's patch where he tried to fix MCG
join/leave bug, which still have some issues, was commented and no new
version was received yet.

Sasha

>  	/*
>  	 * Check if the object (according to mlid) already exists on this port.
>  	 * If it does - then no need to update it again, and no need to
> @@ -549,12 +605,13 @@ Exit:
>  
>  /**********************************************************************
>   **********************************************************************/
> -ib_api_status_t osm_sm_mcgrp_leave(IN osm_sm_t * p_sm, IN const ib_net16_t mlid,
> +ib_api_status_t osm_sm_mcgrp_leave(IN osm_sm_t * p_sm, IN osm_mgrp_t * p_mgrp,
>  				   IN const ib_net64_t port_guid)
>  {
> -	osm_mgrp_t *p_mgrp;
>  	osm_port_t *p_port;
>  	ib_api_status_t status;
> +	osm_mgrp_holder_t *p_mgrp_holder;
> +	ib_net16_t mlid = p_mgrp->mlid;
>  
>  	OSM_LOG_ENTER(p_sm->p_log);
>  
> @@ -577,21 +634,25 @@ ib_api_status_t osm_sm_mcgrp_leave(IN osm_sm_t * p_sm, IN const ib_net16_t mlid,
>  	}
>  
>  	/*
> -	 * Get the multicast group object for this group.
> +	 * Get the multicast group holder object for this group.
>  	 */
> -	p_mgrp = osm_get_mgrp_by_mlid(p_sm->p_subn, mlid);
> -	if (!p_mgrp) {
> +	p_mgrp_holder = osm_get_mgrp_holder_by_mlid(p_sm->p_subn, mlid);
> +	if (!p_mgrp_holder) {
>  		OSM_LOG(p_sm->p_log, OSM_LOG_ERROR, "ERR 2E08: "
>  			"No multicast group for MLID 0x%X\n", cl_ntoh16(mlid));
>  		status = IB_INVALID_PARAMETER;
>  		goto Exit;
>  	}
>  
> +	osm_mgrp_holder_port_delete_mgrp(p_mgrp_holder, p_mgrp, port_guid);
>  	/*
>  	 * Walk the list of ports in the group, and remove the appropriate one.
>  	 */
>  	osm_port_remove_mgrp(p_port, mlid);
>  
> +	OSM_LOG(p_sm->p_log, OSM_LOG_DEBUG,
> +		" Calling sm_mgrp_process for mgrp with mlid = 0x%X\n",
> +		cl_ntoh16(mlid));
>  	status = sm_mgrp_process(p_sm, p_mgrp);
>  Exit:
>  	CL_PLOCK_RELEASE(p_sm->p_lock);
> diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
> index 0d11811..6ed95d4 100644
> --- a/opensm/opensm/osm_subnet.c
> +++ b/opensm/opensm/osm_subnet.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2004-2009 Voltaire, Inc. All rights reserved.
>   * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
> @@ -428,8 +428,9 @@ void osm_subn_destroy(IN osm_subn_t * const p_subn)
>  	osm_switch_t *p_sw, *p_next_sw;
>  	osm_remote_sm_t *p_rsm, *p_next_rsm;
>  	osm_prtn_t *p_prtn, *p_next_prtn;
> -	osm_mgrp_t *p_mgrp;
> +	osm_mgrp_holder_t *p_mgrp_holder;
>  	osm_infr_t *p_infr, *p_next_infr;
> +	osm_mgrp_t *p_mgrp;
>  
>  	/* it might be a good idea to de-allocate all known objects */
>  	p_next_node = (osm_node_t *) cl_qmap_head(&p_subn->node_guid_tbl);
> @@ -471,14 +472,20 @@ void osm_subn_destroy(IN osm_subn_t * const p_subn)
>  		osm_prtn_delete(&p_prtn);
>  	}
>  
> -	cl_fmap_remove_all(&p_subn->mgrp_mgid_tbl);
>  
>  	for (i = 0; i <= p_subn->max_mcast_lid_ho - IB_LID_MCAST_START_HO;
>  	     i++) {
> -		p_mgrp = p_subn->mgroups[i];
> -		p_subn->mgroups[i] = NULL;
> -		if (p_mgrp)
> -			osm_mgrp_delete(p_mgrp);
> +		p_mgrp_holder = p_subn->mgroup_holders[i];
> +		if (p_mgrp_holder){
> +				osm_mgrp_holder_delete(p_subn, p_mgrp_holder->mlid);
> +		}
> +	}
> +
> +	p_mgrp = (osm_mgrp_t*)cl_fmap_head(&p_subn->mgrp_mgid_tbl);
> +	while (p_mgrp != (osm_mgrp_t*)cl_fmap_end(&p_subn->mgrp_mgid_tbl)) {
> +		cl_fmap_remove_item(&p_subn->mgrp_mgid_tbl, (cl_fmap_item_t*)p_mgrp);
> +		osm_mgrp_delete(p_mgrp);
> +		p_mgrp = (osm_mgrp_t*)cl_fmap_head(&p_subn->mgrp_mgid_tbl);
>  	}
>  
>  	p_next_infr = (osm_infr_t *) cl_qlist_head(&p_subn->sa_infr_list);
> @@ -1646,3 +1653,13 @@ int osm_subn_write_conf_file(char *file_name, IN osm_subn_opt_t *const p_opts)
>  
>  	return 0;
>  }
> +
> +ib_net16_t osm_mgrp_holder_get_mlid_by_mgid(IN osm_subn_t const *p_subn,
> +						IN const ib_gid_t * const p_mgid)
> +{
> +	osm_mgrp_t *p_mgrp = (osm_mgrp_t*)cl_fmap_get(&p_subn->mgrp_mgid_tbl, p_mgid);
> +	if (p_mgrp != (osm_mgrp_t*)cl_fmap_end(&p_subn->mgrp_mgid_tbl)) {
> +		return p_mgrp->mlid;
> +	}
> +	return 0;
> +}
> -- 
> 1.6.3.3
> 



More information about the general mailing list