[openib-general] [PATCH] opensm: configurable VLStallCount values

Sasha Khapyorsky sashak at voltaire.com
Sun May 21 09:34:44 PDT 2006


Hello Hal,

This adds configurable vl_stall_count and leaf_vl_stall_count values for
switch external ports. Also fixes existed hoq_lifetime processing. The
features are: don't bother about not connected ports, hoq_lifetime
setup is only for switch and router ports, vl_stall_count setup is only
for switch ports, leaf_ values are used for switch ports connected to a
CA.

Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>


---

 osm/include/opensm/osm_base.h   |   17 ++++++++++++-
 osm/include/opensm/osm_subnet.h |   10 ++++++++
 osm/opensm/osm_link_mgr.c       |   52 ++++++++++++++++++++++++---------------
 osm/opensm/osm_subnet.c         |   20 +++++++++++++++
 4 files changed, 78 insertions(+), 21 deletions(-)

4fe5d21c60e16bc77ee7d739a3c9170f5197ba8a
diff --git a/osm/include/opensm/osm_base.h b/osm/include/opensm/osm_base.h
index 4e29d2b..53e85d4 100644
--- a/osm/include/opensm/osm_base.h
+++ b/osm/include/opensm/osm_base.h
@@ -318,6 +318,20 @@ #define OSM_DEFAULT_HEAD_OF_QUEUE_LIFE 0
 #define OSM_DEFAULT_LEAF_HEAD_OF_QUEUE_LIFE 0xC
 /***********/
 
+/****d* OpenSM: Base/OSM_DEFAULT_VL_STALL_COUNT
+* NAME
+*	OSM_DEFAULT_LEAF_VL_COUNT
+*
+* DESCRIPTION
+*	Sets the number of contiguous head of queue life time drops that 
+*  puts the VL into stalled state. In stalled state the port supposed to
+*  drop everything for 8*(head of queue lifetime) 
+*
+* SYNOPSIS
+*/
+#define OSM_DEFAULT_VL_STALL_COUNT 0x7
+/***********/
+
 /****d* OpenSM: Base/OSM_DEFAULT_LEAF_VL_STALL_COUNT
 * NAME
 *	OSM_DEFAULT_LEAF_VL_STALL_COUNT
@@ -325,7 +339,8 @@ #define OSM_DEFAULT_LEAF_HEAD_OF_QUEUE_L
 * DESCRIPTION
 *	Sets the number of contiguous head of queue life time drops that 
 *  puts the VL into stalled state. In stalled state the port supposed to
-*  drop everything for 8*(head of queue lifetime) 
+*  drop everything for 8*(head of queue lifetime). For switch ports
+*  driving a CAs.
 *  We use here the value of 1 - so any drop due to HOQ means stalling the VL
 *
 * SYNOPSIS
diff --git a/osm/include/opensm/osm_subnet.h b/osm/include/opensm/osm_subnet.h
index 319c494..affa13c 100644
--- a/osm/include/opensm/osm_subnet.h
+++ b/osm/include/opensm/osm_subnet.h
@@ -249,6 +249,8 @@ typedef struct _osm_subn_opt
   boolean_t						force_log_flush;
   uint8_t                  subnet_timeout;
   uint8_t                  packet_life_time;
+  uint8_t                  vl_stall_count;
+  uint8_t                  leaf_vl_stall_count;
   uint8_t                  head_of_queue_lifetime;
   uint8_t                  leaf_head_of_queue_lifetime;
   uint8_t                  local_phy_errors_threshold;
@@ -345,6 +347,14 @@ typedef struct _osm_subn_opt
 *     The subnet_timeout that will be set for all the ports in the
 *     design SubnMgt.Set(PortInfo.vl_stall_life))
 *
+*  vl_stall_count
+*     The number of sequential packets dropped that caused the port
+*     to enter the VLStalled state.
+*
+*  leaf_vl_stall_count
+*     The number of sequential packets dropped that caused the port
+*     to enter the VLStalled state. For switch ports driving a CA
+*
 *  head_of_queue_lifetime
 *     The maximal time a packet can live at the head of a VL queue
 *     on any port not driving an HCA port
diff --git a/osm/opensm/osm_link_mgr.c b/osm/opensm/osm_link_mgr.c
index c495ec3..225da81 100644
--- a/osm/opensm/osm_link_mgr.c
+++ b/osm/opensm/osm_link_mgr.c
@@ -248,27 +248,39 @@ __osm_link_mgr_set_physp_pi(
       Several timeout mechanisms:
     */
     p_remote_physp = osm_physp_get_remote( p_physp );
-    
-    if (p_remote_physp && 
-        osm_physp_is_valid(p_remote_physp) &&
-        (osm_node_get_type( osm_physp_get_node_ptr(p_remote_physp) ) != 
-         IB_NODE_TYPE_SWITCH))
-    {
-      /* we drive an HCA port so we need to set stall-count to 1 and
-         use leaf hoq value */
-      ib_port_info_set_hoq_lifetime(
-        p_pi, p_mgr->p_subn->opt.leaf_head_of_queue_lifetime);
-      ib_port_info_set_vl_stall_count(
-        p_pi, OSM_DEFAULT_LEAF_VL_STALL_COUNT);
-    }
-    else
-    {
-      ib_port_info_set_hoq_lifetime(
-        p_pi, p_mgr->p_subn->opt.head_of_queue_lifetime);
+    if (port_num != 0 && p_remote_physp && 
+        osm_physp_is_valid(p_remote_physp)) {
+      if (osm_node_get_type(osm_physp_get_node_ptr(p_physp)) ==
+          IB_NODE_TYPE_ROUTER)
+      {
+        ib_port_info_set_hoq_lifetime(
+          p_pi, p_mgr->p_subn->opt.head_of_queue_lifetime);
+      }
+      else if (osm_node_get_type(osm_physp_get_node_ptr(p_physp)) ==
+               IB_NODE_TYPE_SWITCH)
+     {
+       if (osm_node_get_type(osm_physp_get_node_ptr(p_remote_physp)) ==
+           IB_NODE_TYPE_CA)
+       {
+         ib_port_info_set_hoq_lifetime(
+           p_pi, p_mgr->p_subn->opt.leaf_head_of_queue_lifetime);
+         ib_port_info_set_vl_stall_count(
+           p_pi, p_mgr->p_subn->opt.leaf_vl_stall_count);
+       }
+       else
+       {
+         ib_port_info_set_hoq_lifetime(
+           p_pi, p_mgr->p_subn->opt.head_of_queue_lifetime);
+         ib_port_info_set_vl_stall_count(
+           p_pi, p_mgr->p_subn->opt.vl_stall_count);
+	}
+     }
+     if ( ib_port_info_get_hoq_lifetime(p_pi) !=
+          ib_port_info_get_hoq_lifetime(p_old_pi) ||
+          ib_port_info_get_vl_stall_count(p_pi) !=
+          ib_port_info_get_vl_stall_count(p_old_pi) )
+       send_set = TRUE;
     }
-    if ( ib_port_info_get_hoq_lifetime(p_pi) !=
-         ib_port_info_get_hoq_lifetime(p_old_pi) )
-      send_set = TRUE;
 
     ib_port_info_set_phy_and_overrun_err_thd(
       p_pi,
diff --git a/osm/opensm/osm_subnet.c b/osm/opensm/osm_subnet.c
index c251411..0cf0869 100644
--- a/osm/opensm/osm_subnet.c
+++ b/osm/opensm/osm_subnet.c
@@ -441,6 +441,8 @@ osm_subn_set_default_opt(
   p_opt->force_log_flush = FALSE;
   p_opt->subnet_timeout = OSM_DEFAULT_SUBNET_TIMEOUT;
   p_opt->packet_life_time = OSM_DEFAULT_SWITCH_PACKET_LIFE;
+  p_opt->vl_stall_count = OSM_DEFAULT_VL_STALL_COUNT;
+  p_opt->leaf_vl_stall_count = OSM_DEFAULT_LEAF_VL_STALL_COUNT;
   p_opt->head_of_queue_lifetime = OSM_DEFAULT_HEAD_OF_QUEUE_LIFE;
   p_opt->leaf_head_of_queue_lifetime = OSM_DEFAULT_LEAF_HEAD_OF_QUEUE_LIFE;
   p_opt->local_phy_errors_threshold = OSM_DEFAULT_ERROR_THRESHOLD;
@@ -844,6 +846,14 @@ osm_subn_parse_conf_file(
         p_key, p_val, &p_opts->packet_life_time);
       
       __osm_subn_opts_unpack_uint8(
+        "vl_stall_count",
+        p_key, p_val, &p_opts->vl_stall_count);
+      
+      __osm_subn_opts_unpack_uint8(
+        "leaf_vl_stall_count",
+        p_key, p_val, &p_opts->leaf_vl_stall_count);
+      
+      __osm_subn_opts_unpack_uint8(
         "head_of_queue_lifetime",
         p_key, p_val, &p_opts->head_of_queue_lifetime);
       
@@ -980,6 +990,14 @@ osm_subn_write_conf_file(
     "# The actual time is 4.096usec * 2^<packet_life_time>\n"
     "# The value 0x14 disables this mechanism\n"
     "packet_life_time 0x%02x\n\n"
+    "# The number of sequential packets dropped that caused the port\n"
+    "# to enter the VLStalled state. The result of setting this value to\n"
+    "# zero is undefined.\n"
+    "vl_stall_count 0x%02x\n\n"
+    "# The number of sequential packets dropped that caused the port\n"
+    "# to enter the VLStalled state. For switch ports driving a CA. The\n"
+    "# result of setting this value to zero is undefined.\n"
+    "leaf_vl_stall_count 0x%02x\n\n"
     "# The code of maximal time a packet can wait at the head of\n"
     "# transmission queue. \n"
     "# The actual time is 4.096usec * 2^<head_of_queue_lifetime>\n"
@@ -1004,6 +1022,8 @@ osm_subn_write_conf_file(
     cl_ntoh64(p_opts->subnet_prefix),
     p_opts->lmc,
     p_opts->packet_life_time,
+    p_opts->vl_stall_count,
+    p_opts->leaf_vl_stall_count,
     p_opts->head_of_queue_lifetime,
     p_opts->leaf_head_of_queue_lifetime,
     p_opts->max_op_vls,
-- 
1.3.2




More information about the general mailing list