[ofw][IPOIB]bypass_check_bcast_rate usage extension

Fab Tillier ftillier at windows.microsoft.com
Mon Apr 23 13:16:31 PDT 2007


Shouldn't minimum MC group rates be controlled at the SM, rather than at
every system on the fabric?  It seems to me that having a per-port
variable is a recipe for configuration errors.

The SM is a single-point where policy like minimum rate and default rate
should be managed.

Just my $.02
-Fab

-----Original Message-----
From: ofw-bounces at lists.openfabrics.org
[mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Alex Estrin
Sent: Monday, April 23, 2007 1:08 PM
To: Yossi Leybovich; ofw at lists.openfabrics.org
Subject: [ofw][IPOIB]bypass_check_bcast_rate usage extension

Hi Yossi,

Proposed patch is intended to complete 'check bcast rate behavior' of
joining existing multicast group (fail to join if offered rate is too
low), by failing to creating such group with rate too low for other
nodes to be able to join.
Please review.

Thanks,
Alex


Index: kernel/ipoib_driver.c
===================================================================
--- kernel/ipoib_driver.c	(revision 630)
+++ kernel/ipoib_driver.c	(working copy)
@@ -316,7 +316,7 @@
 {
 	NTSTATUS						status;
 	/* Remember the terminating entry in the table below. */
-	RTL_QUERY_REGISTRY_TABLE		table[4];
+	RTL_QUERY_REGISTRY_TABLE		table[5];
 	UNICODE_STRING					param_path;
 
 	IPOIB_ENTER( IPOIB_DBG_INIT );
@@ -362,6 +362,13 @@
 	table[2].DefaultData = &g_ipoib.bypass_check_bcast_rate;
 	table[2].DefaultLength = sizeof(ULONG);
 
+	table[3].Flags = RTL_QUERY_REGISTRY_DIRECT;
+	table[3].Name = L"min_group_rate";
+	table[3].EntryContext = &g_ipoib.min_group_rate;
+	table[3].DefaultType = REG_DWORD;
+	table[3].DefaultData = &g_ipoib.min_group_rate;
+	table[3].DefaultLength = sizeof(ULONG);
+
 	/* Have at it! */
 	status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE, 
 		param_path.Buffer, table, NULL, NULL );
Index: kernel/ipoib_driver.h
===================================================================
--- kernel/ipoib_driver.h	(revision 630)
+++ kernel/ipoib_driver.h	(working copy)
@@ -77,7 +77,7 @@
 	NDIS_HANDLE		h_ibat_dev;
 	volatile LONG	ibat_ref;
 	uint32_t		bypass_check_bcast_rate;
-
+	uint32_t		min_group_rate;
 }	ipoib_globals_t;
 /*
 * FIELDS
@@ -95,6 +95,9 @@
 *
 *	h_ibat_dev
 *		Device handle returned by NdisMRegisterDevice.
+*
+*	min_group_rate
+*		minimum port rate allowed to create bcast/mcast group.
Gbps.
 *********/
 
 extern ipoib_globals_t	g_ipoib;
Index: kernel/ipoib_log.mc
===================================================================
--- kernel/ipoib_log.mc	(revision 630)
+++ kernel/ipoib_log.mc	(working copy)
@@ -281,5 +281,5 @@
 Severity=Error
 SymbolicName=EVENT_IPOIB_BCAST_RATE
 Language=English
-%2: The local port rate is too slow for the existing broadcast MC
group.
+%2: The local port rate is too low to join or create broadcast MC
group.
 .
Index: kernel/ipoib_port.c
===================================================================
--- kernel/ipoib_port.c	(revision 630)
+++ kernel/ipoib_port.c	(working copy)
@@ -94,7 +94,14 @@
 __port_free(
 	IN				cl_obj_t* const
p_obj );
 
+static inline uint8_t
+__port_rate_to_Gbps(
+	IN				uint8_t
rate );
 
+static inline uint8_t
+__port_rate_from_Gbps(
+	IN				uint8_t
rate );
+
 
/***********************************************************************
*******
 *
 * IB resource manager operations
@@ -1775,7 +1782,6 @@
 			status = __endpt_mgr_insert( p_port, mac,
*pp_src );
 			if( status != IB_SUCCESS )
 			{
-				cl_obj_unlock( &p_port->obj );
 				IPOIB_PRINT_EXIT( TRACE_LEVEL_ERROR,
IPOIB_DBG_ERROR,
 					("__endpt_mgr_insert returned
%s\n",
 
p_port->p_adapter->p_ifc->get_err_str( status )) ); @@ -5069,6 +5075,8
@@  {
 	ib_api_status_t		status;
 	ib_mcast_req_t		mcast_req;
+	uint32_t			mcast_rate_gbps;
+	uint32_t			port_rate_gbps;
 
 	IPOIB_ENTER( IPOIB_DBG_INIT );
 
@@ -5106,6 +5114,34 @@
 	mcast_req.port_guid = p_port->p_adapter->guids.port_guid;
 	mcast_req.pkey_index = 0;
 
+	/* prevent mcast group creating if local port rate
+	 is too low for other nodes to be able to join */
+	if( !g_ipoib.bypass_check_bcast_rate )
+	{
+		port_rate_gbps = __port_rate_to_Gbps(
p_port->ib_mgr.rate );
+		
+		/*if service parameter min_group_rate is not specified 
+		  or 0, or invalid then MIN_DEFAULT_RATE will be
selected */
+		mcast_rate_gbps = __port_rate_to_Gbps(
+			__port_rate_from_Gbps(
(uint8_t)g_ipoib.min_group_rate ) );
+
+		if(	mcast_rate_gbps > port_rate_gbps )
+		{
+			IPOIB_PRINT( TRACE_LEVEL_WARNING,
IPOIB_DBG_INIT,
+				("Port rate is too low to create Bcast
group.\n") );
+
+			NdisWriteErrorLogEntry(
p_port->p_adapter->h_adapter,
+				EVENT_IPOIB_BCAST_RATE, 2, 
+				(uint32_t)p_port->ib_mgr.rate,
+				(uint32_t)__port_rate_from_Gbps(
(uint8_t)g_ipoib.min_group_rate ) );
+
+			return IB_ERROR;
+		}
+		mcast_req.member_rec.rate = 
+			( __port_rate_from_Gbps(
(uint8_t)g_ipoib.min_group_rate ) );
+		mcast_req.member_rec.rate |= ( IB_PATH_SELECTOR_EXACTLY
<< 6 );
+	}
+
 	/* reference the object for the multicast join request. */
 	ipoib_port_ref( p_port, ref_join_bcast );
 
@@ -5656,4 +5692,61 @@
 	IPOIB_EXIT( IPOIB_DBG_MCAST );
 }
 
+static inline uint8_t
+__port_rate_to_Gbps(
+	IN			uint8_t			rate )
+{
+	switch ( (int)rate )
+	{
+	case IB_PATH_RECORD_RATE_2_5_GBS:
+		return 2;
+	case IB_PATH_RECORD_RATE_5_GBS:
+		return 5;
+	case IB_PATH_RECORD_RATE_10_GBS:
+		return 10;
+	case IB_PATH_RECORD_RATE_20_GBS:
+		return 20;
+	case IB_PATH_RECORD_RATE_30_GBS:
+		return 30;
+	case IB_PATH_RECORD_RATE_40_GBS:
+		return 40;
+	case IB_PATH_RECORD_RATE_60_GBS:
+		return 60;
+	case IB_PATH_RECORD_RATE_80_GBS:
+		return 80;
+	case IB_PATH_RECORD_RATE_120_GBS:
+		return 120;
+	default:
+		return 0;
+	}
+}
 
+static inline uint8_t
+__port_rate_from_Gbps(
+	IN			uint8_t			rate_gbps )
+{
+	switch ( (int)rate_gbps )
+	{
+	case 2:
+		return (uint8_t)IB_PATH_RECORD_RATE_2_5_GBS;
+	case 5:
+		return (uint8_t)IB_PATH_RECORD_RATE_5_GBS;
+	case 10:
+		return (uint8_t)IB_PATH_RECORD_RATE_10_GBS;
+	case 20:
+		return (uint8_t)IB_PATH_RECORD_RATE_20_GBS;
+	case 30:
+		return (uint8_t)IB_PATH_RECORD_RATE_30_GBS;
+	case 40:
+		return (uint8_t)IB_PATH_RECORD_RATE_40_GBS;
+	case 60:
+		return (uint8_t)IB_PATH_RECORD_RATE_60_GBS;
+	case 80:
+		return (uint8_t)IB_PATH_RECORD_RATE_80_GBS;
+	case 120:
+		return (uint8_t)IB_PATH_RECORD_RATE_120_GBS;
+	default :
+		return (uint8_t)MIN_DEFAULT_GROUP_RATE;
+	}
+}
+
Index: kernel/ipoib_port.h
===================================================================
--- kernel/ipoib_port.h	(revision 630)
+++ kernel/ipoib_port.h	(working copy)
@@ -54,6 +54,8 @@
 /* Max send data segment list size. */
 #define MAX_SEND_SGE	8
 
+/* Min port rate allowed to create mcast group */ #define 
+MIN_DEFAULT_GROUP_RATE (IB_PATH_RECORD_RATE_10_GBS)
 
 /*
  * Define to control how transfers are done.  When defined as 1, causes
Index: kernel/netipoib.inf
===================================================================
--- kernel/netipoib.inf	(revision 630)
+++ kernel/netipoib.inf	(working copy)
@@ -138,7 +138,7 @@
 HKR,"Parameters","DebugLevel",%REG_DWORD_NO_CLOBBER%,0x00000002
 HKR,"Parameters","DebugFlags",%REG_DWORD_NO_CLOBBER%,0x00000fff
 
HKR,"Parameters","bypass_check_bcast_rate",%REG_DWORD_NO_CLOBBER%,0x0000
0000
-
+HKR,"Parameters","min_group_rate",%REG_DWORD_NO_CLOBBER%, %RATE_10_GBS%
 [IpoibEventLog]
 AddReg = IpoibAddEventLogReg
 
@@ -194,3 +194,10 @@
 DIRID_DRIVERS        = 12
 DIRID_SYSTEM_X86     = 16425
 REG_DWORD_NO_CLOBBER = 0x00010003
+RATE_10_GBS         = 10
+RATE_20_GBS         = 20
+RATE_30_GBS         = 30
+RATE_40_GBS         = 40
+RATE_60_GBS         = 60
+RATE_80_GBS         = 80
+RATE_120_GBS        = 120



More information about the ofw mailing list