[openib-general] [PATCH] osm: PathRecord prefer 1K MTU for MT23108 devices
Eitan Zahavi
eitan at mellanox.co.il
Fri Sep 15 04:45:35 PDT 2006
Hi Hal
The following patch solves an issue with OpenSM preferring largest MTU
for PathRecord/MultiPathRecord for paths going to or from MT23108 (Tavor)
devices instead of using a 1K MTU which is best for this device.
Since this is a device specific quirk I have added a configuration option
named enable_quirks which is FALSE by default to enable this functionality.
To summarize the functionality change:
1. Added enable_quirks option
2. If enable_quirks is FALSE do nothing
3. If a specific MTU is requested (either =2K or >1K) do nothing
4. If either source port or destination port is a Tavor device
MTU is limited to 1K (can be further reduced by path traversal)
Target is both trunk and OFED 1.1
Thanks
Eitan
Signed-off-by: Eitan Zahavi <eitan at mellanox.co.il>
Index: include/opensm/osm_subnet.h
===================================================================
--- include/opensm/osm_subnet.h (revision 9493)
+++ include/opensm/osm_subnet.h (working copy)
@@ -286,6 +286,7 @@ typedef struct _osm_subn_opt
osm_qos_options_t qos_sw0_options;
osm_qos_options_t qos_swe_options;
osm_qos_options_t qos_rtr_options;
+ boolean_t enable_quirks;
} osm_subn_opt_t;
/*
* FIELDS
@@ -469,6 +470,10 @@ typedef struct _osm_subn_opt
* qos_rtr_options
* QoS options for router ports
*
+* enable_quirks
+* Enable high risk new features and not fully qualified
+* hardware specific work arounds
+*
* SEE ALSO
* Subnet object
*********/
Index: include/opensm/osm_base.h
===================================================================
--- include/opensm/osm_base.h (revision 9493)
+++ include/opensm/osm_base.h (working copy)
@@ -778,6 +778,34 @@ typedef enum _osm_mcast_req_type
#define MAX_UPDN_GUID_FILE_LINE_LENGTH 120
/**********/
+/****s* OpenSM: Base/VendorOUIs
+* NAME
+* VendorOUIs
+*
+* DESCRIPTION
+* Known device vendor ID and GUID OUIs
+*
+* SYNOPSIS
+*/
+#define OSM_VENDOR_ID_INTEL 0x00D0B7
+#define OSM_VENDOR_ID_MELLANOX 0x0002C9
+#define OSM_VENDOR_ID_REDSWITCH 0x000617
+#define OSM_VENDOR_ID_SILVERSTORM 0x00066A
+#define OSM_VENDOR_ID_TOPSPIN 0x0005AD
+#define OSM_VENDOR_ID_FUJITSU 0x00E000
+#define OSM_VENDOR_ID_FUJITSU2 0x000B5D
+#define OSM_VENDOR_ID_VOLTAIRE 0x0008F1
+#define OSM_VENDOR_ID_YOTTAYOTTA 0x000453
+#define OSM_VENDOR_ID_PATHSCALE 0x001175
+#define OSM_VENDOR_ID_IBM 0x000255
+#define OSM_VENDOR_ID_DIVERGENET 0x00084E
+#define OSM_VENDOR_ID_FLEXTRONICS 0x000B8C
+#define OSM_VENDOR_ID_AGILENT 0x0030D3
+#define OSM_VENDOR_ID_OBSIDIAN 0x001777
+#define OSM_VENDOR_ID_BAYMICRO 0x000BC1
+#define OSM_VENDOR_ID_LSILOGIC 0x00A0B8
+/**********/
+
END_C_DECLS
#endif /* _OSM_BASE_H_ */
Index: opensm/osm_sa_multipath_record.c
===================================================================
--- opensm/osm_sa_multipath_record.c (revision 9493)
+++ opensm/osm_sa_multipath_record.c (working copy)
@@ -150,6 +150,75 @@ osm_mpr_rcv_init(
/**********************************************************************
**********************************************************************/
+static inline boolean_t
+__osm_sa_multipath_rec_is_tavor_port(
+ IN const osm_port_t* const p_port)
+{
+ osm_node_t const* p_node;
+ ib_net32_t vend_id;
+
+ p_node = osm_port_get_parent_node( p_port );
+ vend_id = ib_node_info_get_vendor_id( &p_node->node_info );
+
+ return( (p_node->node_info.device_id == CL_HTON16(23108)) &&
+ ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) ||
+ (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) ||
+ (vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) ||
+ (vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE))));
+}
+
+/**********************************************************************
+ **********************************************************************/
+boolean_t
+ __osm_sa_multipath_rec_apply_tavor_mtu_limit(
+ IN const ib_multipath_rec_t* const p_mpr,
+ IN const osm_port_t* const p_src_port,
+ IN const osm_port_t* const p_dest_port,
+ IN const ib_net64_t comp_mask)
+{
+ uint8_t required_mtu;
+
+ /* only if one of the ports is a Tavor device */
+ if (! __osm_sa_multipath_rec_is_tavor_port(p_src_port) &&
+ ! __osm_sa_multipath_rec_is_tavor_port(p_dest_port) )
+ return( FALSE );
+
+ /*
+ we can apply the patch if either:
+ 1. No MTU required
+ 2. Required MTU <
+ 3. Required MTU = 1K or 512 or 256
+ 4. Required MTU > 256 or 512
+ */
+ required_mtu = ib_multipath_rec_mtu( p_mpr );
+ if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) &&
+ ( comp_mask & IB_PR_COMPMASK_MTU ) )
+ {
+ switch( ib_multipath_rec_mtu_sel( p_mpr ) )
+ {
+ case 0: /* must be greater than */
+ case 2: /* exact match */
+ if( IB_MTU_LEN_1024 < required_mtu )
+ return(FALSE);
+ break;
+
+ case 1: /* must be less than */
+ case 3: /* largest available */
+ /* can't be disqualified by this one */
+ break;
+
+ default:
+ /* if we're here, there's a bug in ib_path_rec_mtu_sel() */
+ CL_ASSERT( FALSE );
+ break;
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************
+ **********************************************************************/
static ib_api_status_t
__osm_mpr_rcv_get_path_parms(
IN osm_mpr_rcv_t* const p_rcv,
@@ -195,6 +264,23 @@ __osm_mpr_rcv_get_path_parms(
mtu = ib_port_info_get_mtu_cap( p_pi );
rate = ib_port_info_compute_rate( p_pi );
+ /*
+ Mellanox Tavor device performance is better using 1K MTU.
+ If required MTU and MTU selector are such that 1K is OK
+ and one of the ends of the path is Tavor we override the
+ port MTU with 1K.
+ */
+ if ( p_rcv->p_subn->opt.enable_quirks &&
+ __osm_sa_multipath_rec_apply_tavor_mtu_limit(
+ p_mpr, p_src_port, p_dest_port, comp_mask) )
+ if (mtu > IB_MTU_LEN_1024)
+ {
+ mtu = IB_MTU_LEN_1024;
+ osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
+ "__osm_mpr_rcv_get_path_parms: "
+ "Optimized Path MTU to 1K for Mellanox Tavor device\n");
+ }
+
if ( comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC &&
cl_ntoh32( p_mpr->hop_flow_raw ) & ( 1<<31 ) )
required_pkey = osm_physp_find_common_pkey( p_physp, p_dest_physp );
Index: opensm/osm_subnet.c
===================================================================
--- opensm/osm_subnet.c (revision 9493)
+++ opensm/osm_subnet.c (working copy)
@@ -494,6 +494,7 @@ osm_subn_set_default_opt(
p_opt->ucast_dump_file = NULL;
p_opt->updn_guid_file = NULL;
p_opt->exit_on_fatal = TRUE;
+ p_opt->enable_quirks = FALSE;
subn_set_default_qos_options(&p_opt->qos_options);
subn_set_default_qos_options(&p_opt->qos_ca_options);
subn_set_default_qos_options(&p_opt->qos_sw0_options);
@@ -979,6 +980,10 @@ osm_subn_parse_conf_file(
subn_parse_qos_options("qos_rtr",
p_key, p_val, &p_opts->qos_rtr_options);
+ __osm_subn_opts_unpack_boolean(
+ "enable_quirks",
+ p_key, p_val, &p_opts->enable_quirks);
+
}
}
fclose(opts_file);
@@ -1179,11 +1184,15 @@ osm_subn_write_conf_file(
"force_log_flush %s\n\n"
"# Log file to be used\n"
"log_file %s\n\n"
+ "# Limit the the size of the log file. If overrun log is restarted\n"
"log_max_size %lu\n\n"
+ "# If TRUE will accumulate the log over multiple OpenSM sessions\n"
"accum_log_file %s\n\n"
"# The directory to hold the file OpenSM dumps\n"
"dump_files_dir %s\n\n"
- "# If TRUE if OpenSM should disable multicast support\n"
+ "# If TRUE enables new high risk options and hardware specific quirks\n"
+ "enable_quirks %s\n\n"
+ "# If TRUE OpenSM should disable multicast support\n"
"no_multicast_option %s\n\n"
"# No multicast routing is performed if TRUE\n"
"disable_multicast %s\n\n"
@@ -1195,6 +1204,7 @@ osm_subn_write_conf_file(
p_opts->log_max_size,
p_opts->accum_log_file ? "TRUE" : "FALSE",
p_opts->dump_files_dir,
+ p_opts->enable_quirks ? "TRUE" : "FALSE",
p_opts->no_multicast_option ? "TRUE" : "FALSE",
p_opts->disable_multicast ? "TRUE" : "FALSE",
p_opts->exit_on_fatal ? "TRUE" : "FALSE"
Index: opensm/osm_helper.c
===================================================================
--- opensm/osm_helper.c (revision 9493)
+++ opensm/osm_helper.c (working copy)
@@ -2289,24 +2289,6 @@ osm_get_node_type_str_fixed_width(
return( __osm_node_type_str_fixed_width[node_type] );
}
-#define OSM_VENDOR_ID_INTEL 0x00D0B7
-#define OSM_VENDOR_ID_MELLANOX 0x0002C9
-#define OSM_VENDOR_ID_REDSWITCH 0x000617
-#define OSM_VENDOR_ID_SILVERSTORM 0x00066A
-#define OSM_VENDOR_ID_TOPSPIN 0x0005AD
-#define OSM_VENDOR_ID_FUJITSU 0x00E000
-#define OSM_VENDOR_ID_FUJITSU2 0x000B5D
-#define OSM_VENDOR_ID_VOLTAIRE 0x0008F1
-#define OSM_VENDOR_ID_YOTTAYOTTA 0x000453
-#define OSM_VENDOR_ID_PATHSCALE 0x001175
-#define OSM_VENDOR_ID_IBM 0x000255
-#define OSM_VENDOR_ID_DIVERGENET 0x00084E
-#define OSM_VENDOR_ID_FLEXTRONICS 0x000B8C
-#define OSM_VENDOR_ID_AGILENT 0x0030D3
-#define OSM_VENDOR_ID_OBSIDIAN 0x001777
-#define OSM_VENDOR_ID_BAYMICRO 0x000BC1
-#define OSM_VENDOR_ID_LSILOGIC 0x00A0B8
-
/**********************************************************************
**********************************************************************/
const char*
Index: opensm/osm_sa_path_record.c
===================================================================
--- opensm/osm_sa_path_record.c (revision 9493)
+++ opensm/osm_sa_path_record.c (working copy)
@@ -57,6 +57,7 @@
#include <complib/cl_passivelock.h>
#include <complib/cl_debug.h>
#include <complib/cl_qlist.h>
+#include <opensm/osm_base.h>
#include <opensm/osm_sa_path_record.h>
#include <opensm/osm_port.h>
#include <opensm/osm_node.h>
@@ -150,6 +151,75 @@ osm_pr_rcv_init(
/**********************************************************************
**********************************************************************/
+static inline boolean_t
+__osm_sa_path_rec_is_tavor_port(
+ IN const osm_port_t* const p_port)
+{
+ osm_node_t const* p_node;
+ ib_net32_t vend_id;
+
+ p_node = osm_port_get_parent_node( p_port );
+ vend_id = ib_node_info_get_vendor_id( &p_node->node_info );
+
+ return( (p_node->node_info.device_id == CL_HTON16(23108)) &&
+ ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) ||
+ (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) ||
+ (vend_id == CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) ||
+ (vend_id == CL_HTON32(OSM_VENDOR_ID_VOLTAIRE))));
+}
+
+/**********************************************************************
+ **********************************************************************/
+static boolean_t
+ __osm_sa_path_rec_apply_tavor_mtu_limit(
+ IN const ib_path_rec_t* const p_pr,
+ IN const osm_port_t* const p_src_port,
+ IN const osm_port_t* const p_dest_port,
+ IN const ib_net64_t comp_mask)
+{
+ uint8_t required_mtu;
+
+ /* only if one of the ports is a Tavor device */
+ if (! __osm_sa_path_rec_is_tavor_port(p_src_port) &&
+ ! __osm_sa_path_rec_is_tavor_port(p_dest_port) )
+ return( FALSE );
+
+ /*
+ we can apply the patch if either:
+ 1. No MTU required
+ 2. Required MTU <
+ 3. Required MTU = 1K or 512 or 256
+ 4. Required MTU > 256 or 512
+ */
+ required_mtu = ib_path_rec_mtu( p_pr );
+ if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) &&
+ ( comp_mask & IB_PR_COMPMASK_MTU ) )
+ {
+ switch( ib_path_rec_mtu_sel( p_pr ) )
+ {
+ case 0: /* must be greater than */
+ case 2: /* exact match */
+ if( IB_MTU_LEN_1024 < required_mtu )
+ return(FALSE);
+ break;
+
+ case 1: /* must be less than */
+ case 3: /* largest available */
+ /* can't be disqualified by this one */
+ break;
+
+ default:
+ /* if we're here, there's a bug in ib_path_rec_mtu_sel() */
+ CL_ASSERT( FALSE );
+ break;
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************
+ **********************************************************************/
static ib_api_status_t
__osm_pr_rcv_get_path_parms(
IN osm_pr_rcv_t* const p_rcv,
@@ -191,6 +261,23 @@ __osm_pr_rcv_get_path_parms(
mtu = ib_port_info_get_mtu_cap( p_pi );
rate = ib_port_info_compute_rate( p_pi );
+ /*
+ Mellanox Tavor device performance is better using 1K MTU.
+ If required MTU and MTU selector are such that 1K is OK
+ and one of the ends of the path is Tavor we override the
+ port MTU with 1K.
+ */
+ if ( p_rcv->p_subn->opt.enable_quirks &&
+ __osm_sa_path_rec_apply_tavor_mtu_limit(
+ p_pr, p_src_port, p_dest_port, comp_mask) )
+ if (mtu > IB_MTU_LEN_1024)
+ {
+ mtu = IB_MTU_LEN_1024;
+ osm_log( p_rcv->p_log, OSM_LOG_DEBUG,
+ "__osm_pr_rcv_get_path_parms: "
+ "Optimized Path MTU to 1K for Mellanox Tavor device\n");
+ }
+
/*
Walk the subnet object from source to destination,
tracking the most restrictive rate and mtu values along the way...
@@ -444,10 +531,10 @@ __osm_pr_rcv_get_path_parms(
*/
/* we silently ignore cases where only the MTU selector is defined */
+ required_mtu = ib_path_rec_mtu( p_pr );
if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) &&
( comp_mask & IB_PR_COMPMASK_MTU ) )
{
- required_mtu = ib_path_rec_mtu( p_pr );
switch( ib_path_rec_mtu_sel( p_pr ) )
{
case 0: /* must be greater than */
More information about the general
mailing list