[ofw][IPOIB]bypass_check_bcast_rate usage extension

Alex Estrin alex.estrin at qlogic.com
Mon Apr 23 13:56:24 PDT 2007


Hi Fab,

You are correct if active SM does have control over group rate limits.
If it doesn't then we have real possibility to shutdown whole fabric
because of one crippled port.
Proposed patch just completes already enabled rate check for join MC
group,
by adding similar check for creating group.

Thanks,
Alex

> -----Original Message-----
> From: Fab Tillier [mailto:ftillier at windows.microsoft.com]
> Sent: Monday, April 23, 2007 4:17 PM
> To: Alex Estrin; Yossi Leybovich; ofw at lists.openfabrics.org
> Subject: RE: [ofw][IPOIB]bypass_check_bcast_rate usage extension
> 
> Shouldn't minimum MC group rates be controlled at the SM, rather than
at
> every system on the fabric?  It seems to me that having a per-port
> variable is a recipe for configuration errors.
> 
> The SM is a single-point where policy like minimum rate and default
rate
> should be managed.
> 
> Just my $.02
> -Fab
> 
> -----Original Message-----
> From: ofw-bounces at lists.openfabrics.org
> [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Alex Estrin
> Sent: Monday, April 23, 2007 1:08 PM
> To: Yossi Leybovich; ofw at lists.openfabrics.org
> Subject: [ofw][IPOIB]bypass_check_bcast_rate usage extension
> 
> Hi Yossi,
> 
> Proposed patch is intended to complete 'check bcast rate behavior' of
> joining existing multicast group (fail to join if offered rate is too
> low), by failing to creating such group with rate too low for other
> nodes to be able to join.
> Please review.
> 
> Thanks,
> Alex
> 
> 
> Index: kernel/ipoib_driver.c
> ===================================================================
> --- kernel/ipoib_driver.c	(revision 630)
> +++ kernel/ipoib_driver.c	(working copy)
> @@ -316,7 +316,7 @@
>  {
>  	NTSTATUS						status;
>  	/* Remember the terminating entry in the table below. */
> -	RTL_QUERY_REGISTRY_TABLE		table[4];
> +	RTL_QUERY_REGISTRY_TABLE		table[5];
>  	UNICODE_STRING					param_path;
> 
>  	IPOIB_ENTER( IPOIB_DBG_INIT );
> @@ -362,6 +362,13 @@
>  	table[2].DefaultData = &g_ipoib.bypass_check_bcast_rate;
>  	table[2].DefaultLength = sizeof(ULONG);
> 
> +	table[3].Flags = RTL_QUERY_REGISTRY_DIRECT;
> +	table[3].Name = L"min_group_rate";
> +	table[3].EntryContext = &g_ipoib.min_group_rate;
> +	table[3].DefaultType = REG_DWORD;
> +	table[3].DefaultData = &g_ipoib.min_group_rate;
> +	table[3].DefaultLength = sizeof(ULONG);
> +
>  	/* Have at it! */
>  	status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE,
>  		param_path.Buffer, table, NULL, NULL );
> Index: kernel/ipoib_driver.h
> ===================================================================
> --- kernel/ipoib_driver.h	(revision 630)
> +++ kernel/ipoib_driver.h	(working copy)
> @@ -77,7 +77,7 @@
>  	NDIS_HANDLE		h_ibat_dev;
>  	volatile LONG	ibat_ref;
>  	uint32_t		bypass_check_bcast_rate;
> -
> +	uint32_t		min_group_rate;
>  }	ipoib_globals_t;
>  /*
>  * FIELDS
> @@ -95,6 +95,9 @@
>  *
>  *	h_ibat_dev
>  *		Device handle returned by NdisMRegisterDevice.
> +*
> +*	min_group_rate
> +*		minimum port rate allowed to create bcast/mcast group.
> Gbps.
>  *********/
> 
>  extern ipoib_globals_t	g_ipoib;
> Index: kernel/ipoib_log.mc
> ===================================================================
> --- kernel/ipoib_log.mc	(revision 630)
> +++ kernel/ipoib_log.mc	(working copy)
> @@ -281,5 +281,5 @@
>  Severity=Error
>  SymbolicName=EVENT_IPOIB_BCAST_RATE
>  Language=English
> -%2: The local port rate is too slow for the existing broadcast MC
> group.
> +%2: The local port rate is too low to join or create broadcast MC
> group.
>  .
> Index: kernel/ipoib_port.c
> ===================================================================
> --- kernel/ipoib_port.c	(revision 630)
> +++ kernel/ipoib_port.c	(working copy)
> @@ -94,7 +94,14 @@
>  __port_free(
>  	IN				cl_obj_t* const
> p_obj );
> 
> +static inline uint8_t
> +__port_rate_to_Gbps(
> +	IN				uint8_t
> rate );
> 
> +static inline uint8_t
> +__port_rate_from_Gbps(
> +	IN				uint8_t
> rate );
> +
> 
>
/***********************************************************************
> *******
>  *
>  * IB resource manager operations
> @@ -1775,7 +1782,6 @@
>  			status = __endpt_mgr_insert( p_port, mac,
> *pp_src );
>  			if( status != IB_SUCCESS )
>  			{
> -				cl_obj_unlock( &p_port->obj );
>  				IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
> IPOIB_DBG_ERROR,
>  					("__endpt_mgr_insert returned
> %s\n",
> 
> p_port->p_adapter->p_ifc->get_err_str( status )) ); @@ -5069,6 +5075,8
> @@  {
>  	ib_api_status_t		status;
>  	ib_mcast_req_t		mcast_req;
> +	uint32_t			mcast_rate_gbps;
> +	uint32_t			port_rate_gbps;
> 
>  	IPOIB_ENTER( IPOIB_DBG_INIT );
> 
> @@ -5106,6 +5114,34 @@
>  	mcast_req.port_guid = p_port->p_adapter->guids.port_guid;
>  	mcast_req.pkey_index = 0;
> 
> +	/* prevent mcast group creating if local port rate
> +	 is too low for other nodes to be able to join */
> +	if( !g_ipoib.bypass_check_bcast_rate )
> +	{
> +		port_rate_gbps = __port_rate_to_Gbps(
> p_port->ib_mgr.rate );
> +
> +		/*if service parameter min_group_rate is not specified
> +		  or 0, or invalid then MIN_DEFAULT_RATE will be
> selected */
> +		mcast_rate_gbps = __port_rate_to_Gbps(
> +			__port_rate_from_Gbps(
> (uint8_t)g_ipoib.min_group_rate ) );
> +
> +		if(	mcast_rate_gbps > port_rate_gbps )
> +		{
> +			IPOIB_PRINT( TRACE_LEVEL_WARNING,
> IPOIB_DBG_INIT,
> +				("Port rate is too low to create Bcast
> group.\n") );
> +
> +			NdisWriteErrorLogEntry(
> p_port->p_adapter->h_adapter,
> +				EVENT_IPOIB_BCAST_RATE, 2,
> +				(uint32_t)p_port->ib_mgr.rate,
> +				(uint32_t)__port_rate_from_Gbps(
> (uint8_t)g_ipoib.min_group_rate ) );
> +
> +			return IB_ERROR;
> +		}
> +		mcast_req.member_rec.rate =
> +			( __port_rate_from_Gbps(
> (uint8_t)g_ipoib.min_group_rate ) );
> +		mcast_req.member_rec.rate |= ( IB_PATH_SELECTOR_EXACTLY
> << 6 );
> +	}
> +
>  	/* reference the object for the multicast join request. */
>  	ipoib_port_ref( p_port, ref_join_bcast );
> 
> @@ -5656,4 +5692,61 @@
>  	IPOIB_EXIT( IPOIB_DBG_MCAST );
>  }
> 
> +static inline uint8_t
> +__port_rate_to_Gbps(
> +	IN			uint8_t			rate )
> +{
> +	switch ( (int)rate )
> +	{
> +	case IB_PATH_RECORD_RATE_2_5_GBS:
> +		return 2;
> +	case IB_PATH_RECORD_RATE_5_GBS:
> +		return 5;
> +	case IB_PATH_RECORD_RATE_10_GBS:
> +		return 10;
> +	case IB_PATH_RECORD_RATE_20_GBS:
> +		return 20;
> +	case IB_PATH_RECORD_RATE_30_GBS:
> +		return 30;
> +	case IB_PATH_RECORD_RATE_40_GBS:
> +		return 40;
> +	case IB_PATH_RECORD_RATE_60_GBS:
> +		return 60;
> +	case IB_PATH_RECORD_RATE_80_GBS:
> +		return 80;
> +	case IB_PATH_RECORD_RATE_120_GBS:
> +		return 120;
> +	default:
> +		return 0;
> +	}
> +}
> 
> +static inline uint8_t
> +__port_rate_from_Gbps(
> +	IN			uint8_t			rate_gbps )
> +{
> +	switch ( (int)rate_gbps )
> +	{
> +	case 2:
> +		return (uint8_t)IB_PATH_RECORD_RATE_2_5_GBS;
> +	case 5:
> +		return (uint8_t)IB_PATH_RECORD_RATE_5_GBS;
> +	case 10:
> +		return (uint8_t)IB_PATH_RECORD_RATE_10_GBS;
> +	case 20:
> +		return (uint8_t)IB_PATH_RECORD_RATE_20_GBS;
> +	case 30:
> +		return (uint8_t)IB_PATH_RECORD_RATE_30_GBS;
> +	case 40:
> +		return (uint8_t)IB_PATH_RECORD_RATE_40_GBS;
> +	case 60:
> +		return (uint8_t)IB_PATH_RECORD_RATE_60_GBS;
> +	case 80:
> +		return (uint8_t)IB_PATH_RECORD_RATE_80_GBS;
> +	case 120:
> +		return (uint8_t)IB_PATH_RECORD_RATE_120_GBS;
> +	default :
> +		return (uint8_t)MIN_DEFAULT_GROUP_RATE;
> +	}
> +}
> +
> Index: kernel/ipoib_port.h
> ===================================================================
> --- kernel/ipoib_port.h	(revision 630)
> +++ kernel/ipoib_port.h	(working copy)
> @@ -54,6 +54,8 @@
>  /* Max send data segment list size. */
>  #define MAX_SEND_SGE	8
> 
> +/* Min port rate allowed to create mcast group */ #define
> +MIN_DEFAULT_GROUP_RATE (IB_PATH_RECORD_RATE_10_GBS)
> 
>  /*
>   * Define to control how transfers are done.  When defined as 1,
causes
> Index: kernel/netipoib.inf
> ===================================================================
> --- kernel/netipoib.inf	(revision 630)
> +++ kernel/netipoib.inf	(working copy)
> @@ -138,7 +138,7 @@
>  HKR,"Parameters","DebugLevel",%REG_DWORD_NO_CLOBBER%,0x00000002
>  HKR,"Parameters","DebugFlags",%REG_DWORD_NO_CLOBBER%,0x00000fff
> 
>
HKR,"Parameters","bypass_check_bcast_rate",%REG_DWORD_NO_CLOBBER%,0x0000
> 0000
> -
> +HKR,"Parameters","min_group_rate",%REG_DWORD_NO_CLOBBER%,
%RATE_10_GBS%
>  [IpoibEventLog]
>  AddReg = IpoibAddEventLogReg
> 
> @@ -194,3 +194,10 @@
>  DIRID_DRIVERS        = 12
>  DIRID_SYSTEM_X86     = 16425
>  REG_DWORD_NO_CLOBBER = 0x00010003
> +RATE_10_GBS         = 10
> +RATE_20_GBS         = 20
> +RATE_30_GBS         = 30
> +RATE_40_GBS         = 40
> +RATE_60_GBS         = 60
> +RATE_80_GBS         = 80
> +RATE_120_GBS        = 120



More information about the ofw mailing list