[ofw][IPOIB]bypass_check_bcast_rate usage extension
Yossi Leybovich
sleybo at dev.mellanox.co.il
Wed Apr 25 06:52:10 PDT 2007
Hi
Let me see if I got it:
In case there is no bcast group in the fabric (which is not the case with
openSM)
The code will create bcast group with minimum rate that is configure by
registry (instead of the port rate)
right?
In that case what will happen with mixed platform ,does Linux have such
variable ?
Why not create in low rate like 10Gbs and let higher rates to join (I think
that this the way OpenSM create its group )
If you leave the creation to be configure by the ports you still can end up
with cripple port that create bcat with high rate.
Yossi
> -----Original Message-----
> From: Alex Estrin [mailto:alex.estrin at qlogic.com]
> Sent: Monday, April 23, 2007 11:08 PM
> To: Yossi Leybovich; ofw at lists.openfabrics.org
> Subject: [ofw][IPOIB]bypass_check_bcast_rate usage extension
>
> Hi Yossi,
>
> Proposed patch is intended to complete 'check bcast rate
> behavior' of joining existing multicast group (fail to join
> if offered rate is too low), by failing to creating such
> group with rate too low for other nodes to be able to join.
> Please review.
>
> Thanks,
> Alex
>
>
> Index: kernel/ipoib_driver.c
> ===================================================================
> --- kernel/ipoib_driver.c (revision 630)
> +++ kernel/ipoib_driver.c (working copy)
> @@ -316,7 +316,7 @@
> {
> NTSTATUS status;
> /* Remember the terminating entry in the table below. */
> - RTL_QUERY_REGISTRY_TABLE table[4];
> + RTL_QUERY_REGISTRY_TABLE table[5];
> UNICODE_STRING param_path;
>
> IPOIB_ENTER( IPOIB_DBG_INIT );
> @@ -362,6 +362,13 @@
> table[2].DefaultData = &g_ipoib.bypass_check_bcast_rate;
> table[2].DefaultLength = sizeof(ULONG);
>
> + table[3].Flags = RTL_QUERY_REGISTRY_DIRECT;
> + table[3].Name = L"min_group_rate";
> + table[3].EntryContext = &g_ipoib.min_group_rate;
> + table[3].DefaultType = REG_DWORD;
> + table[3].DefaultData = &g_ipoib.min_group_rate;
> + table[3].DefaultLength = sizeof(ULONG);
> +
> /* Have at it! */
> status = RtlQueryRegistryValues( RTL_REGISTRY_ABSOLUTE,
> param_path.Buffer, table, NULL, NULL );
> Index: kernel/ipoib_driver.h
> ===================================================================
> --- kernel/ipoib_driver.h (revision 630)
> +++ kernel/ipoib_driver.h (working copy)
> @@ -77,7 +77,7 @@
> NDIS_HANDLE h_ibat_dev;
> volatile LONG ibat_ref;
> uint32_t bypass_check_bcast_rate;
> -
> + uint32_t min_group_rate;
> } ipoib_globals_t;
> /*
> * FIELDS
> @@ -95,6 +95,9 @@
> *
> * h_ibat_dev
> * Device handle returned by NdisMRegisterDevice.
> +*
> +* min_group_rate
> +* minimum port rate allowed to create bcast/mcast group.
> Gbps.
> *********/
>
> extern ipoib_globals_t g_ipoib;
> Index: kernel/ipoib_log.mc
> ===================================================================
> --- kernel/ipoib_log.mc (revision 630)
> +++ kernel/ipoib_log.mc (working copy)
> @@ -281,5 +281,5 @@
> Severity=Error
> SymbolicName=EVENT_IPOIB_BCAST_RATE
> Language=English
> -%2: The local port rate is too slow for the existing
> broadcast MC group.
> +%2: The local port rate is too low to join or create broadcast MC
> group.
> .
> Index: kernel/ipoib_port.c
> ===================================================================
> --- kernel/ipoib_port.c (revision 630)
> +++ kernel/ipoib_port.c (working copy)
> @@ -94,7 +94,14 @@
> __port_free(
> IN cl_obj_t* const
> p_obj );
>
> +static inline uint8_t
> +__port_rate_to_Gbps(
> + IN uint8_t
> rate );
>
> +static inline uint8_t
> +__port_rate_from_Gbps(
> + IN uint8_t
> rate );
> +
>
> /*************************************************************
> **********
> *******
> *
> * IB resource manager operations
> @@ -1775,7 +1782,6 @@
> status = __endpt_mgr_insert( p_port,
> mac, *pp_src );
> if( status != IB_SUCCESS )
> {
> - cl_obj_unlock( &p_port->obj );
> IPOIB_PRINT_EXIT(
> TRACE_LEVEL_ERROR, IPOIB_DBG_ERROR,
> ("__endpt_mgr_insert returned
> %s\n",
>
> p_port->p_adapter->p_ifc->get_err_str( status )) ); @@
> -5069,6 +5075,8 @@ {
> ib_api_status_t status;
> ib_mcast_req_t mcast_req;
> + uint32_t mcast_rate_gbps;
> + uint32_t port_rate_gbps;
>
> IPOIB_ENTER( IPOIB_DBG_INIT );
>
> @@ -5106,6 +5114,34 @@
> mcast_req.port_guid = p_port->p_adapter->guids.port_guid;
> mcast_req.pkey_index = 0;
>
> + /* prevent mcast group creating if local port rate
> + is too low for other nodes to be able to join */
> + if( !g_ipoib.bypass_check_bcast_rate )
> + {
> + port_rate_gbps = __port_rate_to_Gbps(
> p_port->ib_mgr.rate );
> +
> + /*if service parameter min_group_rate is not specified
> + or 0, or invalid then MIN_DEFAULT_RATE will be
> selected */
> + mcast_rate_gbps = __port_rate_to_Gbps(
> + __port_rate_from_Gbps(
> (uint8_t)g_ipoib.min_group_rate ) );
> +
> + if( mcast_rate_gbps > port_rate_gbps )
> + {
> + IPOIB_PRINT( TRACE_LEVEL_WARNING,
> IPOIB_DBG_INIT,
> + ("Port rate is too low to create Bcast
> group.\n") );
> +
> + NdisWriteErrorLogEntry(
> p_port->p_adapter->h_adapter,
> + EVENT_IPOIB_BCAST_RATE, 2,
> + (uint32_t)p_port->ib_mgr.rate,
> + (uint32_t)__port_rate_from_Gbps(
> (uint8_t)g_ipoib.min_group_rate ) );
> +
> + return IB_ERROR;
> + }
> + mcast_req.member_rec.rate =
> + ( __port_rate_from_Gbps(
> (uint8_t)g_ipoib.min_group_rate ) );
> + mcast_req.member_rec.rate |= ( IB_PATH_SELECTOR_EXACTLY
> << 6 );
> + }
> +
> /* reference the object for the multicast join request. */
> ipoib_port_ref( p_port, ref_join_bcast );
>
> @@ -5656,4 +5692,61 @@
> IPOIB_EXIT( IPOIB_DBG_MCAST );
> }
>
> +static inline uint8_t
> +__port_rate_to_Gbps(
> + IN uint8_t rate )
> +{
> + switch ( (int)rate )
> + {
> + case IB_PATH_RECORD_RATE_2_5_GBS:
> + return 2;
> + case IB_PATH_RECORD_RATE_5_GBS:
> + return 5;
> + case IB_PATH_RECORD_RATE_10_GBS:
> + return 10;
> + case IB_PATH_RECORD_RATE_20_GBS:
> + return 20;
> + case IB_PATH_RECORD_RATE_30_GBS:
> + return 30;
> + case IB_PATH_RECORD_RATE_40_GBS:
> + return 40;
> + case IB_PATH_RECORD_RATE_60_GBS:
> + return 60;
> + case IB_PATH_RECORD_RATE_80_GBS:
> + return 80;
> + case IB_PATH_RECORD_RATE_120_GBS:
> + return 120;
> + default:
> + return 0;
> + }
> +}
>
> +static inline uint8_t
> +__port_rate_from_Gbps(
> + IN uint8_t rate_gbps )
> +{
> + switch ( (int)rate_gbps )
> + {
> + case 2:
> + return (uint8_t)IB_PATH_RECORD_RATE_2_5_GBS;
> + case 5:
> + return (uint8_t)IB_PATH_RECORD_RATE_5_GBS;
> + case 10:
> + return (uint8_t)IB_PATH_RECORD_RATE_10_GBS;
> + case 20:
> + return (uint8_t)IB_PATH_RECORD_RATE_20_GBS;
> + case 30:
> + return (uint8_t)IB_PATH_RECORD_RATE_30_GBS;
> + case 40:
> + return (uint8_t)IB_PATH_RECORD_RATE_40_GBS;
> + case 60:
> + return (uint8_t)IB_PATH_RECORD_RATE_60_GBS;
> + case 80:
> + return (uint8_t)IB_PATH_RECORD_RATE_80_GBS;
> + case 120:
> + return (uint8_t)IB_PATH_RECORD_RATE_120_GBS;
> + default :
> + return (uint8_t)MIN_DEFAULT_GROUP_RATE;
> + }
> +}
> +
> Index: kernel/ipoib_port.h
> ===================================================================
> --- kernel/ipoib_port.h (revision 630)
> +++ kernel/ipoib_port.h (working copy)
> @@ -54,6 +54,8 @@
> /* Max send data segment list size. */
> #define MAX_SEND_SGE 8
>
> +/* Min port rate allowed to create mcast group */ #define
> +MIN_DEFAULT_GROUP_RATE (IB_PATH_RECORD_RATE_10_GBS)
>
> /*
> * Define to control how transfers are done. When defined
> as 1, causes
> Index: kernel/netipoib.inf
> ===================================================================
> --- kernel/netipoib.inf (revision 630)
> +++ kernel/netipoib.inf (working copy)
> @@ -138,7 +138,7 @@
> HKR,"Parameters","DebugLevel",%REG_DWORD_NO_CLOBBER%,0x00000002
> HKR,"Parameters","DebugFlags",%REG_DWORD_NO_CLOBBER%,0x00000fff
>
> HKR,"Parameters","bypass_check_bcast_rate",%REG_DWORD_NO_CLOBB
> ER%,0x0000
> 0000
> -
> +HKR,"Parameters","min_group_rate",%REG_DWORD_NO_CLOBBER%,
> %RATE_10_GBS%
> [IpoibEventLog]
> AddReg = IpoibAddEventLogReg
>
> @@ -194,3 +194,10 @@
> DIRID_DRIVERS = 12
> DIRID_SYSTEM_X86 = 16425
> REG_DWORD_NO_CLOBBER = 0x00010003
> +RATE_10_GBS = 10
> +RATE_20_GBS = 20
> +RATE_30_GBS = 30
> +RATE_40_GBS = 40
> +RATE_60_GBS = 60
> +RATE_80_GBS = 80
> +RATE_120_GBS = 120
>
More information about the ofw
mailing list