[openib-general] [PATCH 2/2] opensm: basic QoS implementation

Sasha Khapyorsky sashak at voltaire.com
Mon May 8 13:03:47 PDT 2006


Basic low-level QoS implementation. The main procedure (osm_qos_setup())
will be called from resweeper (after configuration refreshing). And
then this will setup low level QoS related ports' attributes
(PortInfo:VLHighLimit, VL*Arbitration and SL2VLMapping tables).
Different port categories (HCA, switch external ports and switch port 0)
will be updated according to provided configurations.

Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
---

 osm/include/opensm/osm_madw.h |    1 
 osm/opensm/Makefile.am        |    2 
 osm/opensm/osm_qos.c          |  439 +++++++++++++++++++++++++++++++++++++++++
 osm/opensm/osm_state_mgr.c    |   11 +
 4 files changed, 452 insertions(+), 1 deletions(-)

diff --git a/osm/include/opensm/osm_madw.h b/osm/include/opensm/osm_madw.h
index 5b4ddab..4d92db4 100644
--- a/osm/include/opensm/osm_madw.h
+++ b/osm/include/opensm/osm_madw.h
@@ -352,6 +352,7 @@ typedef union _osm_madw_context
 	osm_smi_context_t		smi_context;
 	osm_slvl_context_t	slvl_context;
 	osm_pkey_context_t	pkey_context;
+	osm_vla_context_t	vla_context;
 #ifndef OSM_VENDOR_INTF_OPENIB
 	osm_arbitrary_context_t arb_context;
 #endif
diff --git a/osm/opensm/Makefile.am b/osm/opensm/Makefile.am
index e396dcf..ebb6295 100644
--- a/osm/opensm/Makefile.am
+++ b/osm/opensm/Makefile.am
@@ -81,7 +81,7 @@ opensm_SOURCES = main.c osm_console.c os
 		 osm_state_mgr_ctrl.c osm_subnet.c \
 		 osm_sweep_fail_ctrl.c osm_sw_info_rcv.c \
 		 osm_sw_info_rcv_ctrl.c osm_switch.c \
-		 osm_prtn.c osm_prtn_config.c \
+		 osm_prtn.c osm_prtn_config.c osm_qos.c \
 		 osm_trap_rcv.c osm_trap_rcv_ctrl.c \
 		 osm_ucast_mgr.c osm_ucast_updn.c \
 		 osm_vl15intf.c osm_vl_arb_rcv.c \
diff --git a/osm/opensm/osm_qos.c b/osm/opensm/osm_qos.c
new file mode 100644
index 0000000..be27b40
--- /dev/null
+++ b/osm/opensm/osm_qos.c
@@ -0,0 +1,439 @@
+/*
+ * Copyright (c) 2006 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+/*
+ * Abstract:
+ *    Implementation of OpenSM QoS infrastructure primitives
+ *
+ * Environment:
+ *    Linux User Mode
+ *
+ * $Revision$
+ */
+
+#if HAVE_CONFIG_H
+#  include <config.h>
+#endif				/* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+
+#include <iba/ib_types.h>
+#include <complib/cl_qmap.h>
+#include <complib/cl_debug.h>
+#include <opensm/osm_opensm.h>
+#include <opensm/osm_subnet.h>
+
+struct qos_config {
+	uint8_t max_vls;
+	uint8_t vl_high_limit;
+	ib_vl_arb_table_t vlarb_high[2];
+	ib_vl_arb_table_t vlarb_low[2];
+	ib_slvl_table_t sl2vl;
+};
+
+static void qos_build_config(struct qos_config *cfg,
+			     osm_qos_options_t * opt, osm_qos_options_t * dflt);
+
+/*
+ * QoS primitives
+ *
+ */
+
+static ib_api_status_t vlarb_update_table_block(osm_req_t * p_req,
+						osm_physp_t * p,
+						unsigned port_num,
+						const ib_vl_arb_table_t *table_block,
+						unsigned block_length,
+						unsigned block_num)
+{
+	ib_vl_arb_table_t block;
+	osm_madw_context_t context;
+	uint32_t attr_mod;
+	ib_port_info_t *p_pi;
+	unsigned vl_mask;
+	int i;
+
+	if (!(p_pi = osm_physp_get_port_info_ptr(p)))
+		return IB_ERROR;
+
+	vl_mask = (1 << (ib_port_info_get_op_vls(p_pi) - 1)) - 1;
+
+	cl_memset(&block, 0, sizeof(block));
+	cl_memcpy(&block, table_block,
+		  block_length * sizeof(block.vl_entry[0]));
+	for (i = 0; i < block_length; i++)
+		block.vl_entry[i].vl &= vl_mask;
+
+	if (!cl_memcmp(&p->vl_arb[block_num], &block,
+		       block_length * sizeof(block.vl_entry[0])))
+		return IB_SUCCESS;
+
+	context.vla_context.node_guid =
+	    osm_node_get_node_guid(osm_physp_get_node_ptr(p));
+	context.vla_context.port_guid = osm_physp_get_port_guid(p);
+	context.vla_context.set_method = TRUE;
+	attr_mod = ((block_num + 1) << 16) | port_num;
+
+	return osm_req_set(p_req, osm_physp_get_dr_path_ptr(p),
+			   (uint8_t *) & block, sizeof(block),
+			   IB_MAD_ATTR_VL_ARBITRATION,
+			   cl_hton32(attr_mod), CL_DISP_MSGID_NONE, &context);
+}
+
+static ib_api_status_t vlarb_update(osm_req_t * p_req,
+				    osm_physp_t * p, unsigned port_num,
+				    const struct qos_config *qcfg)
+{
+	ib_api_status_t status = IB_SUCCESS;
+	ib_port_info_t *p_pi;
+	unsigned len;
+
+	if (!(p_pi = osm_physp_get_port_info_ptr(p)))
+		return IB_ERROR;
+
+	if (p_pi->vl_arb_low_cap > 0) {
+		len = p_pi->vl_arb_low_cap < IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK ?
+		    p_pi->vl_arb_low_cap : IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
+		if ((status = vlarb_update_table_block(p_req, p, port_num,
+						       &qcfg->vlarb_low[0],
+						       len, 0)) != IB_SUCCESS)
+			return status;
+	}
+	if (p_pi->vl_arb_low_cap > IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK) {
+		len = p_pi->vl_arb_low_cap % IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
+		if ((status = vlarb_update_table_block(p_req, p, port_num,
+						       &qcfg->vlarb_low[1],
+						       len, 1)) != IB_SUCCESS)
+			return status;
+	}
+	if (p_pi->vl_arb_high_cap > 0) {
+		len = p_pi->vl_arb_high_cap < IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK ?
+		    p_pi->vl_arb_high_cap : IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
+		if ((status = vlarb_update_table_block(p_req, p, port_num,
+						       &qcfg->vlarb_high[0],
+						       len, 2)) != IB_SUCCESS)
+			return status;
+	}
+	if (p_pi->vl_arb_high_cap > IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK) {
+		len = p_pi->vl_arb_high_cap % IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
+		if ((status = vlarb_update_table_block(p_req, p, port_num,
+						       &qcfg->vlarb_high[1],
+						       len, 3)) != IB_SUCCESS)
+			return status;
+	}
+
+	return status;
+}
+
+static ib_api_status_t sl2vl_update_table(osm_req_t * p_req,
+					  osm_physp_t * p, uint8_t in_port,
+					  uint8_t out_port,
+					  const ib_slvl_table_t * sl2vl_table)
+{
+	osm_madw_context_t context;
+	ib_slvl_table_t tbl, *p_tbl;
+	osm_node_t *p_node = osm_physp_get_node_ptr(p);
+	uint32_t attr_mod;
+	ib_port_info_t *p_pi;
+	unsigned vl_mask;
+	uint8_t vl1, vl2;
+	int i;
+
+	if (!(p_pi = osm_physp_get_port_info_ptr(p)))
+		return IB_ERROR;
+
+	vl_mask = (1 << (ib_port_info_get_op_vls(p_pi) - 1)) - 1;
+
+	for (i = 0; i < IB_MAX_NUM_VLS / 2; i++) {
+		vl1 = sl2vl_table->raw_vl_by_sl[i] >> 4;
+		vl2 = sl2vl_table->raw_vl_by_sl[i] & 0xf;
+		if (vl1 != 15)
+			vl1 &= vl_mask;
+		if (vl2 != 15)
+			vl2 &= vl_mask;
+		tbl.raw_vl_by_sl[i] = (vl1 << 4 ) | vl2 ;
+	}
+
+	p_tbl = osm_physp_get_slvl_tbl(p, in_port);
+	if (p_tbl && !cl_memcmp(p_tbl, &tbl, sizeof(tbl)))
+		return IB_SUCCESS;
+
+	context.slvl_context.node_guid = osm_node_get_node_guid(p_node);
+	context.slvl_context.port_guid = osm_physp_get_port_guid(p);
+	context.slvl_context.set_method = TRUE;
+	attr_mod = in_port << 8 | out_port;
+	return osm_req_set(p_req, osm_physp_get_dr_path_ptr(p),
+			   (uint8_t *) & tbl, sizeof(tbl),
+			   IB_MAD_ATTR_SLVL_TABLE,
+			   cl_hton32(attr_mod), CL_DISP_MSGID_NONE, &context);
+}
+
+static ib_api_status_t sl2vl_update(osm_req_t * p_req,
+				    osm_physp_t * p, unsigned port_num,
+				    const struct qos_config *qcfg)
+{
+	ib_api_status_t status;
+	unsigned i, num_ports;
+	ib_port_info_t *p_pi = osm_physp_get_port_info_ptr(p);
+
+	if (p_pi && !(p_pi->capability_mask & IB_PORT_CAP_HAS_SL_MAP))
+		return IB_SUCCESS;
+
+	if (osm_node_get_type(osm_physp_get_node_ptr(p)) == IB_NODE_TYPE_SWITCH)
+		num_ports = osm_node_get_num_physp(osm_physp_get_node_ptr(p));
+	else
+		num_ports = 1;
+
+	for (i = 0; i < num_ports; i++) {
+		status =
+		    sl2vl_update_table(p_req, p, i, port_num, &qcfg->sl2vl);
+		if (status != IB_SUCCESS)
+			return status;
+	}
+
+	return IB_SUCCESS;
+}
+
+static ib_api_status_t vl_high_limit_update(osm_req_t * p_req,
+					    osm_physp_t * p,
+					    const struct qos_config *qcfg)
+{
+	uint8_t payload[IB_SMP_DATA_SIZE];
+	osm_madw_context_t context;
+	ib_port_info_t *p_pi;
+
+	if (!(p_pi = osm_physp_get_port_info_ptr(p)))
+		return IB_ERROR;
+
+	if (p_pi->vl_high_limit == qcfg->vl_high_limit)
+		return IB_SUCCESS;
+
+	cl_memclr(payload, IB_SMP_DATA_SIZE);
+	cl_memcpy(payload, p_pi, sizeof(ib_port_info_t));
+
+	p_pi = (ib_port_info_t *) payload;
+	p_pi->state_info2 = 0;
+	ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE);
+
+	p_pi->vl_high_limit = qcfg->vl_high_limit;
+
+	context.pi_context.node_guid =
+	    osm_node_get_node_guid(osm_physp_get_node_ptr(p));
+	context.pi_context.port_guid = osm_physp_get_port_guid(p);
+	context.pi_context.set_method = TRUE;
+	context.pi_context.update_master_sm_base_lid = FALSE;
+	context.pi_context.ignore_errors = FALSE;
+	context.pi_context.light_sweep = FALSE;
+
+	return osm_req_set(p_req, osm_physp_get_dr_path_ptr(p),
+			   payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
+			   cl_hton32(osm_physp_get_port_num(p)),
+			   CL_DISP_MSGID_NONE, &context);
+}
+
+static ib_api_status_t qos_physp_setup(osm_log_t * p_log, osm_req_t * p_req,
+				       osm_physp_t * p, unsigned port_num,
+				       const struct qos_config *qcfg)
+{
+	ib_api_status_t status;
+
+	/* OpVLs should be ok at this moment - just use it */
+
+	/* setup vl high limit */
+	status = vl_high_limit_update(p_req, p, qcfg);
+	if (status != IB_SUCCESS) {
+		osm_log(p_log, OSM_LOG_ERROR, "qos_physp_setup: "
+			"failed to update VLHighLimit "
+			"for port %" PRIx64 " #%d\n",
+			cl_ntoh64(p->port_guid), port_num);
+		return status;
+	}
+
+	/* setup VLArbitration */
+	status = vlarb_update(p_req, p, port_num, qcfg);
+	if (status != IB_SUCCESS) {
+		osm_log(p_log, OSM_LOG_ERROR, "qos_physp_setup: "
+			"failed to update VLArbitration tables "
+			"for port %" PRIx64 " #%d\n",
+			cl_ntoh64(p->port_guid), port_num);
+		return status;
+	}
+
+	/* setup Sl2VL tables */
+	status = sl2vl_update(p_req, p, port_num, qcfg);
+	if (status != IB_SUCCESS) {
+		osm_log(p_log, OSM_LOG_ERROR, "qos_physp_setup: "
+			"failed to update SL2VLMapping tables "
+			"for port %" PRIx64 " #%d\n",
+			cl_ntoh64(p->port_guid), port_num);
+		return status;
+	}
+
+	return IB_SUCCESS;
+}
+
+osm_signal_t osm_qos_setup(osm_opensm_t * p_osm)
+{
+	struct qos_config hca_config, sw0_config, swe_config;
+	struct qos_config *cfg;
+	osm_switch_t *p_sw;
+	ib_switch_info_t *p_si;
+	cl_qmap_t *p_tbl;
+	cl_map_item_t *p_next;
+	osm_port_t *p_port;
+	uint32_t num_physp;
+	osm_physp_t *p_physp;
+	uint8_t node_type;
+	ib_api_status_t status;
+	uint32_t i;
+
+	OSM_LOG_ENTER(&p_osm->log, osm_qos_setup);
+
+	qos_build_config(&hca_config, &p_osm->subn.opt.qos_hca_options,
+			 &p_osm->subn.opt.qos_options);
+	qos_build_config(&sw0_config, &p_osm->subn.opt.qos_sw0_options,
+			 &p_osm->subn.opt.qos_options);
+	qos_build_config(&swe_config, &p_osm->subn.opt.qos_swe_options,
+			 &p_osm->subn.opt.qos_options);
+
+	cl_plock_excl_acquire(&p_osm->lock);
+
+	p_tbl = &p_osm->subn.port_guid_tbl;
+	p_next = cl_qmap_head(p_tbl);
+	while (p_next != cl_qmap_end(p_tbl)) {
+		p_port = (osm_port_t *) p_next;
+		p_next = cl_qmap_next(p_next);
+
+		node_type = osm_node_get_type(osm_port_get_parent_node(p_port));
+		if (node_type == IB_NODE_TYPE_SWITCH) {
+			num_physp = osm_port_get_num_physp(p_port);
+			for (i = 1; i < num_physp; i++) {
+				p_physp = osm_port_get_phys_ptr(p_port, i);
+				if (!p_physp || !osm_physp_is_valid(p_physp))
+					continue;
+				status =
+				    qos_physp_setup(&p_osm->log, &p_osm->sm.req,
+						    p_physp, i, &swe_config);
+			}
+			/* skip base port 0 */
+			p_sw = osm_get_switch_by_guid(&p_osm->subn,
+						      osm_port_get_guid(p_port));
+			if (!p_sw || !(p_si = osm_switch_get_si_ptr(p_sw)) ||
+				!ib_switch_info_is_enhanced_port_0(p_si))
+				continue;
+
+			cfg = &sw0_config;
+		}
+		else
+			cfg = &hca_config;
+
+		p_physp = osm_port_get_default_phys_ptr(p_port);
+		if (!osm_physp_is_valid(p_physp))
+			continue;
+
+		status = qos_physp_setup(&p_osm->log, &p_osm->sm.req,
+					 p_physp, 0, cfg);
+	}
+
+	cl_plock_release(&p_osm->lock);
+	OSM_LOG_EXIT(&p_osm->log);
+
+	return OSM_SIGNAL_DONE;
+}
+
+/*
+ *  QoS config stuff
+ *
+ */
+
+static int parse_one_unsigned(char *str, char delim, unsigned *val)
+{
+	char *end;
+	*val = strtoul(str, &end, 0);
+	if (*end)
+		end++;
+	return end - str;
+}
+
+static int parse_vlarb_entry(char *str, ib_vl_arb_element_t * e)
+{
+	unsigned val;
+	char *p = str;
+	p += parse_one_unsigned(p, ':', &val);
+	e->vl = val % 15;
+	p += parse_one_unsigned(p, ',', &val);
+	e->weight = val;
+	return p - str;
+}
+
+static int parse_sl2vl_entry(char *str, uint8_t * raw)
+{
+	unsigned val1, val2;
+	char *p = str;
+	p += parse_one_unsigned(p, ',', &val1);
+	p += parse_one_unsigned(p, ',', &val2);
+	*raw = (val1 << 4) | (val2 & 0xf);
+	return p - str;
+}
+
+static void qos_build_config(struct qos_config *cfg,
+			     osm_qos_options_t * opt, osm_qos_options_t * dflt)
+{
+	int i;
+	char *p;
+
+	memset(cfg, 0, sizeof(*cfg));
+
+	cfg->max_vls = opt->max_vls > 0 ? opt->max_vls : dflt->max_vls;
+	cfg->vl_high_limit = opt->high_limit;
+
+	p = opt->vlarb_high ? opt->vlarb_high : dflt->vlarb_high;
+	for (i = 0; i < 2 * IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; i++) {
+		p += parse_vlarb_entry(p,
+				       &cfg->vlarb_high[i/IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK].
+				       vl_entry[i%IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK]);
+	}
+
+	p = opt->vlarb_low ? opt->vlarb_low : dflt->vlarb_low;
+	for (i = 0; i < 2 * IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; i++) {
+		p += parse_vlarb_entry(p,
+				       &cfg->vlarb_low[i/IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK].
+				       vl_entry[i%IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK]);
+	}
+
+	p = opt->sl2vl ? opt->sl2vl : dflt->sl2vl;
+	for (i = 0; i < IB_MAX_NUM_VLS / 2; i++)
+		p += parse_sl2vl_entry(p, &cfg->sl2vl.raw_vl_by_sl[i]);
+
+}
diff --git a/osm/opensm/osm_state_mgr.c b/osm/opensm/osm_state_mgr.c
index 1aefc0b..ca46937 100644
--- a/osm/opensm/osm_state_mgr.c
+++ b/osm/opensm/osm_state_mgr.c
@@ -71,6 +71,10 @@ #include <opensm/osm_opensm.h>
 
 /**********************************************************************
  **********************************************************************/
+osm_signal_t osm_qos_setup(IN osm_opensm_t * p_osm);
+
+/**********************************************************************
+ **********************************************************************/
 void
 osm_state_mgr_construct(
    IN osm_state_mgr_t * const p_mgr )
@@ -1971,6 +1975,9 @@ osm_state_mgr_process(
                 * need to unset it. */
                p_mgr->p_subn->subnet_initialization_error = FALSE;
 
+               /* rescan configuration updates */
+               osm_subn_rescan_conf_file(&p_mgr->p_subn->opt);
+
                status = __osm_state_mgr_sweep_hop_0( p_mgr );
                if( status == IB_SUCCESS )
                {
@@ -2234,6 +2241,10 @@ osm_state_mgr_process(
 
             /* the returned signal might be DONE or DONE_PENDING */
             signal = osm_pkey_mgr_process( p_mgr->p_subn->p_osm );
+
+            /* the returned signal is always DONE */
+            signal = osm_qos_setup(p_mgr->p_subn->p_osm);
+
             break;
 
          default:



More information about the general mailing list