[openib-general] [PATCH] osm: PathRecord prefer 1K MTU for MT23108 devices

Eitan Zahavi eitan at mellanox.co.il
Fri Sep 15 04:45:35 PDT 2006


Hi Hal

The following patch solves an issue with OpenSM preferring largest MTU 
for PathRecord/MultiPathRecord for paths going to or from MT23108 (Tavor)
devices instead of using a 1K MTU which is best for this device.

Since this is a device specific quirk I have added a configuration option
named enable_quirks which is FALSE by default to enable this functionality.

To summarize the functionality change:
1. Added enable_quirks option 
2. If enable_quirks is FALSE do nothing
3. If a specific MTU is requested (either =2K or >1K) do nothing
4. If either source port or destination port is a Tavor device  
	MTU is limited to 1K (can be further reduced by path traversal) 

Target is both trunk and OFED 1.1

Thanks

Eitan

Signed-off-by:  Eitan Zahavi <eitan at mellanox.co.il>

Index: include/opensm/osm_subnet.h
===================================================================
--- include/opensm/osm_subnet.h	(revision 9493)
+++ include/opensm/osm_subnet.h	(working copy)
@@ -286,6 +286,7 @@ typedef struct _osm_subn_opt
   osm_qos_options_t        qos_sw0_options;
   osm_qos_options_t        qos_swe_options;
   osm_qos_options_t        qos_rtr_options;
+  boolean_t                enable_quirks;
 } osm_subn_opt_t;
 /*
 * FIELDS
@@ -469,6 +470,10 @@ typedef struct _osm_subn_opt
 *	qos_rtr_options
 *		QoS options for router ports
 *
+*  enable_quirks
+*     Enable high risk new features and not fully qualified 
+*     hardware specific work arounds
+*
 * SEE ALSO
 *	Subnet object
 *********/
Index: include/opensm/osm_base.h
===================================================================
--- include/opensm/osm_base.h	(revision 9493)
+++ include/opensm/osm_base.h	(working copy)
@@ -778,6 +778,34 @@ typedef enum _osm_mcast_req_type
 #define MAX_UPDN_GUID_FILE_LINE_LENGTH 120
 /**********/
 
+/****s* OpenSM: Base/VendorOUIs
+* NAME
+*	VendorOUIs
+*
+* DESCRIPTION
+*	Known device vendor ID and GUID OUIs
+*
+* SYNOPSIS
+*/
+#define OSM_VENDOR_ID_INTEL         0x00D0B7
+#define OSM_VENDOR_ID_MELLANOX      0x0002C9
+#define OSM_VENDOR_ID_REDSWITCH     0x000617
+#define OSM_VENDOR_ID_SILVERSTORM   0x00066A
+#define OSM_VENDOR_ID_TOPSPIN       0x0005AD
+#define OSM_VENDOR_ID_FUJITSU       0x00E000
+#define OSM_VENDOR_ID_FUJITSU2      0x000B5D
+#define OSM_VENDOR_ID_VOLTAIRE      0x0008F1
+#define OSM_VENDOR_ID_YOTTAYOTTA    0x000453
+#define OSM_VENDOR_ID_PATHSCALE     0x001175
+#define OSM_VENDOR_ID_IBM           0x000255
+#define OSM_VENDOR_ID_DIVERGENET    0x00084E
+#define OSM_VENDOR_ID_FLEXTRONICS   0x000B8C
+#define OSM_VENDOR_ID_AGILENT       0x0030D3
+#define OSM_VENDOR_ID_OBSIDIAN      0x001777
+#define OSM_VENDOR_ID_BAYMICRO      0x000BC1
+#define OSM_VENDOR_ID_LSILOGIC      0x00A0B8
+/**********/
+
 END_C_DECLS
 
 #endif	/* _OSM_BASE_H_ */
Index: opensm/osm_sa_multipath_record.c
===================================================================
--- opensm/osm_sa_multipath_record.c	(revision 9493)
+++ opensm/osm_sa_multipath_record.c	(working copy)
@@ -150,6 +150,75 @@ osm_mpr_rcv_init(
 
 /**********************************************************************
  **********************************************************************/
+static inline boolean_t
+__osm_sa_multipath_rec_is_tavor_port(
+	IN const osm_port_t*     const p_port)
+{
+	osm_node_t const* p_node;
+	ib_net32_t vend_id;
+
+	p_node = osm_port_get_parent_node( p_port );
+	vend_id = ib_node_info_get_vendor_id( &p_node->node_info );
+	
+	return( (p_node->node_info.device_id == CL_HTON16(23108)) &&
+			  ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) || 
+				(vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) || 
+				(vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) || 
+				(vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE))));
+}
+
+/**********************************************************************
+ **********************************************************************/
+boolean_t
+ __osm_sa_multipath_rec_apply_tavor_mtu_limit(
+  IN const ib_multipath_rec_t*  const p_mpr,
+  IN const osm_port_t*          const p_src_port,
+  IN const osm_port_t*          const p_dest_port,
+  IN const ib_net64_t           comp_mask)
+{
+	uint8_t   required_mtu;
+	
+	/* only if one of the ports is a Tavor device */
+	if (! __osm_sa_multipath_rec_is_tavor_port(p_src_port) && 
+		 ! __osm_sa_multipath_rec_is_tavor_port(p_dest_port) )
+		return( FALSE );
+	
+	/*
+	  we can apply the patch if either:
+	  1. No MTU required
+	  2. Required MTU < 
+	  3. Required MTU = 1K or 512 or 256
+	  4. Required MTU > 256 or 512
+	*/
+	required_mtu = ib_multipath_rec_mtu( p_mpr );
+	if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) &&
+		  ( comp_mask & IB_PR_COMPMASK_MTU ) )
+	{
+		switch( ib_multipath_rec_mtu_sel( p_mpr ) )
+		{
+		case 0:    /* must be greater than */
+		case 2:    /* exact match */
+			if( IB_MTU_LEN_1024 < required_mtu )
+				return(FALSE);
+			break;
+
+		case 1:    /* must be less than */
+		case 3:    /* largest available */
+			/* can't be disqualified by this one */
+			break;
+			
+		default:
+			/* if we're here, there's a bug in ib_path_rec_mtu_sel() */
+			CL_ASSERT( FALSE );
+			break;
+		}
+	}
+
+	return(TRUE);
+}
+
+/**********************************************************************
+ **********************************************************************/
 static ib_api_status_t
 __osm_mpr_rcv_get_path_parms(
   IN osm_mpr_rcv_t*		const p_rcv,
@@ -195,6 +264,23 @@ __osm_mpr_rcv_get_path_parms(
   mtu = ib_port_info_get_mtu_cap( p_pi );
   rate = ib_port_info_compute_rate( p_pi );
 
+  /* 
+	  Mellanox Tavor device performance is better using 1K MTU.
+	  If required MTU and MTU selector are such that 1K is OK 
+	  and one of the ends of the path is Tavor we override the
+	  port MTU with 1K.
+  */
+  if ( p_rcv->p_subn->opt.enable_quirks &&
+		 __osm_sa_multipath_rec_apply_tavor_mtu_limit(
+			 p_mpr, p_src_port, p_dest_port, comp_mask) )
+	  if (mtu > IB_MTU_LEN_1024) 
+	  {
+		  mtu = IB_MTU_LEN_1024;
+  		  osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
+					  "__osm_mpr_rcv_get_path_parms: "
+					  "Optimized Path MTU to 1K for Mellanox Tavor device\n");
+	  }
+
   if ( comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
        cl_ntoh32( p_mpr->hop_flow_raw ) & ( 1<<31 ) )
     required_pkey = osm_physp_find_common_pkey( p_physp, p_dest_physp );
Index: opensm/osm_subnet.c
===================================================================
--- opensm/osm_subnet.c	(revision 9493)
+++ opensm/osm_subnet.c	(working copy)
@@ -494,6 +494,7 @@ osm_subn_set_default_opt(
   p_opt->ucast_dump_file = NULL;
   p_opt->updn_guid_file = NULL;
   p_opt->exit_on_fatal = TRUE;
+  p_opt->enable_quirks = FALSE;
   subn_set_default_qos_options(&p_opt->qos_options);
   subn_set_default_qos_options(&p_opt->qos_ca_options);
   subn_set_default_qos_options(&p_opt->qos_sw0_options);
@@ -979,6 +980,10 @@ osm_subn_parse_conf_file(
       subn_parse_qos_options("qos_rtr",
         p_key, p_val, &p_opts->qos_rtr_options);
 
+      __osm_subn_opts_unpack_boolean(
+        "enable_quirks",
+        p_key, p_val, &p_opts->enable_quirks);
+
     }
   }
   fclose(opts_file);
@@ -1179,11 +1184,15 @@ osm_subn_write_conf_file(
     "force_log_flush %s\n\n"
     "# Log file to be used\n"
     "log_file %s\n\n"
+	 "# Limit the the size of the log file. If overrun log is restarted\n"
     "log_max_size %lu\n\n"
+	 "# If TRUE will accumulate the log over multiple OpenSM sessions\n"
     "accum_log_file %s\n\n"
     "# The directory to hold the file OpenSM dumps\n"
     "dump_files_dir %s\n\n"
-    "# If TRUE if OpenSM should disable multicast support\n"
+	 "# If TRUE enables new high risk options and hardware specific quirks\n"
+	 "enable_quirks %s\n\n"
+    "# If TRUE OpenSM should disable multicast support\n"  
     "no_multicast_option %s\n\n"
     "# No multicast routing is performed if TRUE\n"
     "disable_multicast %s\n\n"
@@ -1195,6 +1204,7 @@ osm_subn_write_conf_file(
     p_opts->log_max_size,
     p_opts->accum_log_file ? "TRUE" : "FALSE",
     p_opts->dump_files_dir,
+    p_opts->enable_quirks ? "TRUE" : "FALSE",
     p_opts->no_multicast_option ? "TRUE" : "FALSE",
     p_opts->disable_multicast ? "TRUE" : "FALSE",
     p_opts->exit_on_fatal ? "TRUE" : "FALSE"
Index: opensm/osm_helper.c
===================================================================
--- opensm/osm_helper.c	(revision 9493)
+++ opensm/osm_helper.c	(working copy)
@@ -2289,24 +2289,6 @@ osm_get_node_type_str_fixed_width(
   return( __osm_node_type_str_fixed_width[node_type] );
 }
 
-#define OSM_VENDOR_ID_INTEL         0x00D0B7
-#define OSM_VENDOR_ID_MELLANOX      0x0002C9
-#define OSM_VENDOR_ID_REDSWITCH     0x000617
-#define OSM_VENDOR_ID_SILVERSTORM     0x00066A
-#define OSM_VENDOR_ID_TOPSPIN    0x0005AD
-#define OSM_VENDOR_ID_FUJITSU    0x00E000
-#define OSM_VENDOR_ID_FUJITSU2      0x000B5D
-#define OSM_VENDOR_ID_VOLTAIRE   0x0008F1
-#define OSM_VENDOR_ID_YOTTAYOTTA    0x000453
-#define OSM_VENDOR_ID_PATHSCALE     0x001175
-#define OSM_VENDOR_ID_IBM           0x000255
-#define OSM_VENDOR_ID_DIVERGENET    0x00084E
-#define OSM_VENDOR_ID_FLEXTRONICS   0x000B8C
-#define OSM_VENDOR_ID_AGILENT       0x0030D3
-#define OSM_VENDOR_ID_OBSIDIAN      0x001777
-#define OSM_VENDOR_ID_BAYMICRO      0x000BC1
-#define OSM_VENDOR_ID_LSILOGIC      0x00A0B8
-
 /**********************************************************************
  **********************************************************************/
 const char*
Index: opensm/osm_sa_path_record.c
===================================================================
--- opensm/osm_sa_path_record.c	(revision 9493)
+++ opensm/osm_sa_path_record.c	(working copy)
@@ -57,6 +57,7 @@
 #include <complib/cl_passivelock.h>
 #include <complib/cl_debug.h>
 #include <complib/cl_qlist.h>
+#include <opensm/osm_base.h>
 #include <opensm/osm_sa_path_record.h>
 #include <opensm/osm_port.h>
 #include <opensm/osm_node.h>
@@ -150,6 +151,75 @@ osm_pr_rcv_init(
 
 /**********************************************************************
  **********************************************************************/
+static inline boolean_t
+__osm_sa_path_rec_is_tavor_port(
+	IN const osm_port_t*     const p_port)
+{
+	osm_node_t const* p_node;
+	ib_net32_t vend_id;
+
+	p_node = osm_port_get_parent_node( p_port );
+	vend_id = ib_node_info_get_vendor_id( &p_node->node_info );
+	
+	return( (p_node->node_info.device_id == CL_HTON16(23108)) &&
+			  ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) || 
+				(vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) || 
+				(vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) || 
+				(vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE))));
+}
+
+/**********************************************************************
+ **********************************************************************/
+static boolean_t
+ __osm_sa_path_rec_apply_tavor_mtu_limit(
+  IN const ib_path_rec_t*  const p_pr,
+  IN const osm_port_t*     const p_src_port,
+  IN const osm_port_t*     const p_dest_port,
+  IN const ib_net64_t      comp_mask)
+{
+	uint8_t   required_mtu;
+	
+	/* only if one of the ports is a Tavor device */
+	if (! __osm_sa_path_rec_is_tavor_port(p_src_port) && 
+		 ! __osm_sa_path_rec_is_tavor_port(p_dest_port) )
+		return( FALSE );
+	
+	/*
+	  we can apply the patch if either:
+	  1. No MTU required
+	  2. Required MTU < 
+	  3. Required MTU = 1K or 512 or 256
+	  4. Required MTU > 256 or 512
+	*/
+	required_mtu = ib_path_rec_mtu( p_pr );
+	if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) &&
+		  ( comp_mask & IB_PR_COMPMASK_MTU ) )
+	{
+		switch( ib_path_rec_mtu_sel( p_pr ) )
+		{
+		case 0:    /* must be greater than */
+		case 2:    /* exact match */
+			if( IB_MTU_LEN_1024 < required_mtu )
+				return(FALSE);
+			break;
+
+		case 1:    /* must be less than */
+		case 3:    /* largest available */
+			/* can't be disqualified by this one */
+			break;
+			
+		default:
+			/* if we're here, there's a bug in ib_path_rec_mtu_sel() */
+			CL_ASSERT( FALSE );
+			break;
+		}
+	}
+
+	return(TRUE);
+}
+
+/**********************************************************************
+ **********************************************************************/
 static ib_api_status_t
 __osm_pr_rcv_get_path_parms(
   IN osm_pr_rcv_t*         const p_rcv,
@@ -191,6 +261,23 @@ __osm_pr_rcv_get_path_parms(
   mtu = ib_port_info_get_mtu_cap( p_pi );
   rate = ib_port_info_compute_rate( p_pi );
 
+  /* 
+	  Mellanox Tavor device performance is better using 1K MTU.
+	  If required MTU and MTU selector are such that 1K is OK 
+	  and one of the ends of the path is Tavor we override the
+	  port MTU with 1K.
+  */
+  if (  p_rcv->p_subn->opt.enable_quirks &&
+		  __osm_sa_path_rec_apply_tavor_mtu_limit(
+			  p_pr, p_src_port, p_dest_port, comp_mask) )
+	  if (mtu > IB_MTU_LEN_1024) 
+	  {
+		  mtu = IB_MTU_LEN_1024;
+		  osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
+					  "__osm_pr_rcv_get_path_parms: "
+					  "Optimized Path MTU to 1K for Mellanox Tavor device\n");
+	  }
+
   /*
     Walk the subnet object from source to destination,
     tracking the most restrictive rate and mtu values along the way...
@@ -444,10 +531,10 @@ __osm_pr_rcv_get_path_parms(
   */
 
   /* we silently ignore cases where only the MTU selector is defined */
+  required_mtu = ib_path_rec_mtu( p_pr );
   if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) &&
        ( comp_mask & IB_PR_COMPMASK_MTU ) )
   {
-    required_mtu = ib_path_rec_mtu( p_pr );
     switch( ib_path_rec_mtu_sel( p_pr ) )
     {
     case 0:    /* must be greater than */





More information about the general mailing list