[openib-general] [PATCH 2/2] opensm: basic QoS implementation
Sasha Khapyorsky
sashak at voltaire.com
Mon May 8 13:03:47 PDT 2006
Basic low-level QoS implementation. The main procedure (osm_qos_setup())
will be called from resweeper (after configuration refreshing). And
then this will setup low level QoS related ports' attributes
(PortInfo:VLHighLimit, VL*Arbitration and SL2VLMapping tables).
Different port categories (HCA, switch external ports and switch port 0)
will be updated according to provided configurations.
Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
---
osm/include/opensm/osm_madw.h | 1
osm/opensm/Makefile.am | 2
osm/opensm/osm_qos.c | 439 +++++++++++++++++++++++++++++++++++++++++
osm/opensm/osm_state_mgr.c | 11 +
4 files changed, 452 insertions(+), 1 deletions(-)
diff --git a/osm/include/opensm/osm_madw.h b/osm/include/opensm/osm_madw.h
index 5b4ddab..4d92db4 100644
--- a/osm/include/opensm/osm_madw.h
+++ b/osm/include/opensm/osm_madw.h
@@ -352,6 +352,7 @@ typedef union _osm_madw_context
osm_smi_context_t smi_context;
osm_slvl_context_t slvl_context;
osm_pkey_context_t pkey_context;
+ osm_vla_context_t vla_context;
#ifndef OSM_VENDOR_INTF_OPENIB
osm_arbitrary_context_t arb_context;
#endif
diff --git a/osm/opensm/Makefile.am b/osm/opensm/Makefile.am
index e396dcf..ebb6295 100644
--- a/osm/opensm/Makefile.am
+++ b/osm/opensm/Makefile.am
@@ -81,7 +81,7 @@ opensm_SOURCES = main.c osm_console.c os
osm_state_mgr_ctrl.c osm_subnet.c \
osm_sweep_fail_ctrl.c osm_sw_info_rcv.c \
osm_sw_info_rcv_ctrl.c osm_switch.c \
- osm_prtn.c osm_prtn_config.c \
+ osm_prtn.c osm_prtn_config.c osm_qos.c \
osm_trap_rcv.c osm_trap_rcv_ctrl.c \
osm_ucast_mgr.c osm_ucast_updn.c \
osm_vl15intf.c osm_vl_arb_rcv.c \
diff --git a/osm/opensm/osm_qos.c b/osm/opensm/osm_qos.c
new file mode 100644
index 0000000..be27b40
--- /dev/null
+++ b/osm/opensm/osm_qos.c
@@ -0,0 +1,439 @@
+/*
+ * Copyright (c) 2006 Voltaire, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * $Id$
+ */
+
+/*
+ * Abstract:
+ * Implementation of OpenSM QoS infrastructure primitives
+ *
+ * Environment:
+ * Linux User Mode
+ *
+ * $Revision$
+ */
+
+#if HAVE_CONFIG_H
+# include <config.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <stdlib.h>
+
+#include <iba/ib_types.h>
+#include <complib/cl_qmap.h>
+#include <complib/cl_debug.h>
+#include <opensm/osm_opensm.h>
+#include <opensm/osm_subnet.h>
+
+struct qos_config {
+ uint8_t max_vls;
+ uint8_t vl_high_limit;
+ ib_vl_arb_table_t vlarb_high[2];
+ ib_vl_arb_table_t vlarb_low[2];
+ ib_slvl_table_t sl2vl;
+};
+
+static void qos_build_config(struct qos_config *cfg,
+ osm_qos_options_t * opt, osm_qos_options_t * dflt);
+
+/*
+ * QoS primitives
+ *
+ */
+
+static ib_api_status_t vlarb_update_table_block(osm_req_t * p_req,
+ osm_physp_t * p,
+ unsigned port_num,
+ const ib_vl_arb_table_t *table_block,
+ unsigned block_length,
+ unsigned block_num)
+{
+ ib_vl_arb_table_t block;
+ osm_madw_context_t context;
+ uint32_t attr_mod;
+ ib_port_info_t *p_pi;
+ unsigned vl_mask;
+ int i;
+
+ if (!(p_pi = osm_physp_get_port_info_ptr(p)))
+ return IB_ERROR;
+
+ vl_mask = (1 << (ib_port_info_get_op_vls(p_pi) - 1)) - 1;
+
+ cl_memset(&block, 0, sizeof(block));
+ cl_memcpy(&block, table_block,
+ block_length * sizeof(block.vl_entry[0]));
+ for (i = 0; i < block_length; i++)
+ block.vl_entry[i].vl &= vl_mask;
+
+ if (!cl_memcmp(&p->vl_arb[block_num], &block,
+ block_length * sizeof(block.vl_entry[0])))
+ return IB_SUCCESS;
+
+ context.vla_context.node_guid =
+ osm_node_get_node_guid(osm_physp_get_node_ptr(p));
+ context.vla_context.port_guid = osm_physp_get_port_guid(p);
+ context.vla_context.set_method = TRUE;
+ attr_mod = ((block_num + 1) << 16) | port_num;
+
+ return osm_req_set(p_req, osm_physp_get_dr_path_ptr(p),
+ (uint8_t *) & block, sizeof(block),
+ IB_MAD_ATTR_VL_ARBITRATION,
+ cl_hton32(attr_mod), CL_DISP_MSGID_NONE, &context);
+}
+
+static ib_api_status_t vlarb_update(osm_req_t * p_req,
+ osm_physp_t * p, unsigned port_num,
+ const struct qos_config *qcfg)
+{
+ ib_api_status_t status = IB_SUCCESS;
+ ib_port_info_t *p_pi;
+ unsigned len;
+
+ if (!(p_pi = osm_physp_get_port_info_ptr(p)))
+ return IB_ERROR;
+
+ if (p_pi->vl_arb_low_cap > 0) {
+ len = p_pi->vl_arb_low_cap < IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK ?
+ p_pi->vl_arb_low_cap : IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
+ if ((status = vlarb_update_table_block(p_req, p, port_num,
+ &qcfg->vlarb_low[0],
+ len, 0)) != IB_SUCCESS)
+ return status;
+ }
+ if (p_pi->vl_arb_low_cap > IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK) {
+ len = p_pi->vl_arb_low_cap % IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
+ if ((status = vlarb_update_table_block(p_req, p, port_num,
+ &qcfg->vlarb_low[1],
+ len, 1)) != IB_SUCCESS)
+ return status;
+ }
+ if (p_pi->vl_arb_high_cap > 0) {
+ len = p_pi->vl_arb_high_cap < IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK ?
+ p_pi->vl_arb_high_cap : IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
+ if ((status = vlarb_update_table_block(p_req, p, port_num,
+ &qcfg->vlarb_high[0],
+ len, 2)) != IB_SUCCESS)
+ return status;
+ }
+ if (p_pi->vl_arb_high_cap > IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK) {
+ len = p_pi->vl_arb_high_cap % IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK;
+ if ((status = vlarb_update_table_block(p_req, p, port_num,
+ &qcfg->vlarb_high[1],
+ len, 3)) != IB_SUCCESS)
+ return status;
+ }
+
+ return status;
+}
+
+static ib_api_status_t sl2vl_update_table(osm_req_t * p_req,
+ osm_physp_t * p, uint8_t in_port,
+ uint8_t out_port,
+ const ib_slvl_table_t * sl2vl_table)
+{
+ osm_madw_context_t context;
+ ib_slvl_table_t tbl, *p_tbl;
+ osm_node_t *p_node = osm_physp_get_node_ptr(p);
+ uint32_t attr_mod;
+ ib_port_info_t *p_pi;
+ unsigned vl_mask;
+ uint8_t vl1, vl2;
+ int i;
+
+ if (!(p_pi = osm_physp_get_port_info_ptr(p)))
+ return IB_ERROR;
+
+ vl_mask = (1 << (ib_port_info_get_op_vls(p_pi) - 1)) - 1;
+
+ for (i = 0; i < IB_MAX_NUM_VLS / 2; i++) {
+ vl1 = sl2vl_table->raw_vl_by_sl[i] >> 4;
+ vl2 = sl2vl_table->raw_vl_by_sl[i] & 0xf;
+ if (vl1 != 15)
+ vl1 &= vl_mask;
+ if (vl2 != 15)
+ vl2 &= vl_mask;
+ tbl.raw_vl_by_sl[i] = (vl1 << 4 ) | vl2 ;
+ }
+
+ p_tbl = osm_physp_get_slvl_tbl(p, in_port);
+ if (p_tbl && !cl_memcmp(p_tbl, &tbl, sizeof(tbl)))
+ return IB_SUCCESS;
+
+ context.slvl_context.node_guid = osm_node_get_node_guid(p_node);
+ context.slvl_context.port_guid = osm_physp_get_port_guid(p);
+ context.slvl_context.set_method = TRUE;
+ attr_mod = in_port << 8 | out_port;
+ return osm_req_set(p_req, osm_physp_get_dr_path_ptr(p),
+ (uint8_t *) & tbl, sizeof(tbl),
+ IB_MAD_ATTR_SLVL_TABLE,
+ cl_hton32(attr_mod), CL_DISP_MSGID_NONE, &context);
+}
+
+static ib_api_status_t sl2vl_update(osm_req_t * p_req,
+ osm_physp_t * p, unsigned port_num,
+ const struct qos_config *qcfg)
+{
+ ib_api_status_t status;
+ unsigned i, num_ports;
+ ib_port_info_t *p_pi = osm_physp_get_port_info_ptr(p);
+
+ if (p_pi && !(p_pi->capability_mask & IB_PORT_CAP_HAS_SL_MAP))
+ return IB_SUCCESS;
+
+ if (osm_node_get_type(osm_physp_get_node_ptr(p)) == IB_NODE_TYPE_SWITCH)
+ num_ports = osm_node_get_num_physp(osm_physp_get_node_ptr(p));
+ else
+ num_ports = 1;
+
+ for (i = 0; i < num_ports; i++) {
+ status =
+ sl2vl_update_table(p_req, p, i, port_num, &qcfg->sl2vl);
+ if (status != IB_SUCCESS)
+ return status;
+ }
+
+ return IB_SUCCESS;
+}
+
+static ib_api_status_t vl_high_limit_update(osm_req_t * p_req,
+ osm_physp_t * p,
+ const struct qos_config *qcfg)
+{
+ uint8_t payload[IB_SMP_DATA_SIZE];
+ osm_madw_context_t context;
+ ib_port_info_t *p_pi;
+
+ if (!(p_pi = osm_physp_get_port_info_ptr(p)))
+ return IB_ERROR;
+
+ if (p_pi->vl_high_limit == qcfg->vl_high_limit)
+ return IB_SUCCESS;
+
+ cl_memclr(payload, IB_SMP_DATA_SIZE);
+ cl_memcpy(payload, p_pi, sizeof(ib_port_info_t));
+
+ p_pi = (ib_port_info_t *) payload;
+ p_pi->state_info2 = 0;
+ ib_port_info_set_port_state(p_pi, IB_LINK_NO_CHANGE);
+
+ p_pi->vl_high_limit = qcfg->vl_high_limit;
+
+ context.pi_context.node_guid =
+ osm_node_get_node_guid(osm_physp_get_node_ptr(p));
+ context.pi_context.port_guid = osm_physp_get_port_guid(p);
+ context.pi_context.set_method = TRUE;
+ context.pi_context.update_master_sm_base_lid = FALSE;
+ context.pi_context.ignore_errors = FALSE;
+ context.pi_context.light_sweep = FALSE;
+
+ return osm_req_set(p_req, osm_physp_get_dr_path_ptr(p),
+ payload, sizeof(payload), IB_MAD_ATTR_PORT_INFO,
+ cl_hton32(osm_physp_get_port_num(p)),
+ CL_DISP_MSGID_NONE, &context);
+}
+
+static ib_api_status_t qos_physp_setup(osm_log_t * p_log, osm_req_t * p_req,
+ osm_physp_t * p, unsigned port_num,
+ const struct qos_config *qcfg)
+{
+ ib_api_status_t status;
+
+ /* OpVLs should be ok at this moment - just use it */
+
+ /* setup vl high limit */
+ status = vl_high_limit_update(p_req, p, qcfg);
+ if (status != IB_SUCCESS) {
+ osm_log(p_log, OSM_LOG_ERROR, "qos_physp_setup: "
+ "failed to update VLHighLimit "
+ "for port %" PRIx64 " #%d\n",
+ cl_ntoh64(p->port_guid), port_num);
+ return status;
+ }
+
+ /* setup VLArbitration */
+ status = vlarb_update(p_req, p, port_num, qcfg);
+ if (status != IB_SUCCESS) {
+ osm_log(p_log, OSM_LOG_ERROR, "qos_physp_setup: "
+ "failed to update VLArbitration tables "
+ "for port %" PRIx64 " #%d\n",
+ cl_ntoh64(p->port_guid), port_num);
+ return status;
+ }
+
+ /* setup Sl2VL tables */
+ status = sl2vl_update(p_req, p, port_num, qcfg);
+ if (status != IB_SUCCESS) {
+ osm_log(p_log, OSM_LOG_ERROR, "qos_physp_setup: "
+ "failed to update SL2VLMapping tables "
+ "for port %" PRIx64 " #%d\n",
+ cl_ntoh64(p->port_guid), port_num);
+ return status;
+ }
+
+ return IB_SUCCESS;
+}
+
+osm_signal_t osm_qos_setup(osm_opensm_t * p_osm)
+{
+ struct qos_config hca_config, sw0_config, swe_config;
+ struct qos_config *cfg;
+ osm_switch_t *p_sw;
+ ib_switch_info_t *p_si;
+ cl_qmap_t *p_tbl;
+ cl_map_item_t *p_next;
+ osm_port_t *p_port;
+ uint32_t num_physp;
+ osm_physp_t *p_physp;
+ uint8_t node_type;
+ ib_api_status_t status;
+ uint32_t i;
+
+ OSM_LOG_ENTER(&p_osm->log, osm_qos_setup);
+
+ qos_build_config(&hca_config, &p_osm->subn.opt.qos_hca_options,
+ &p_osm->subn.opt.qos_options);
+ qos_build_config(&sw0_config, &p_osm->subn.opt.qos_sw0_options,
+ &p_osm->subn.opt.qos_options);
+ qos_build_config(&swe_config, &p_osm->subn.opt.qos_swe_options,
+ &p_osm->subn.opt.qos_options);
+
+ cl_plock_excl_acquire(&p_osm->lock);
+
+ p_tbl = &p_osm->subn.port_guid_tbl;
+ p_next = cl_qmap_head(p_tbl);
+ while (p_next != cl_qmap_end(p_tbl)) {
+ p_port = (osm_port_t *) p_next;
+ p_next = cl_qmap_next(p_next);
+
+ node_type = osm_node_get_type(osm_port_get_parent_node(p_port));
+ if (node_type == IB_NODE_TYPE_SWITCH) {
+ num_physp = osm_port_get_num_physp(p_port);
+ for (i = 1; i < num_physp; i++) {
+ p_physp = osm_port_get_phys_ptr(p_port, i);
+ if (!p_physp || !osm_physp_is_valid(p_physp))
+ continue;
+ status =
+ qos_physp_setup(&p_osm->log, &p_osm->sm.req,
+ p_physp, i, &swe_config);
+ }
+ /* skip base port 0 */
+ p_sw = osm_get_switch_by_guid(&p_osm->subn,
+ osm_port_get_guid(p_port));
+ if (!p_sw || !(p_si = osm_switch_get_si_ptr(p_sw)) ||
+ !ib_switch_info_is_enhanced_port_0(p_si))
+ continue;
+
+ cfg = &sw0_config;
+ }
+ else
+ cfg = &hca_config;
+
+ p_physp = osm_port_get_default_phys_ptr(p_port);
+ if (!osm_physp_is_valid(p_physp))
+ continue;
+
+ status = qos_physp_setup(&p_osm->log, &p_osm->sm.req,
+ p_physp, 0, cfg);
+ }
+
+ cl_plock_release(&p_osm->lock);
+ OSM_LOG_EXIT(&p_osm->log);
+
+ return OSM_SIGNAL_DONE;
+}
+
+/*
+ * QoS config stuff
+ *
+ */
+
+static int parse_one_unsigned(char *str, char delim, unsigned *val)
+{
+ char *end;
+ *val = strtoul(str, &end, 0);
+ if (*end)
+ end++;
+ return end - str;
+}
+
+static int parse_vlarb_entry(char *str, ib_vl_arb_element_t * e)
+{
+ unsigned val;
+ char *p = str;
+ p += parse_one_unsigned(p, ':', &val);
+ e->vl = val % 15;
+ p += parse_one_unsigned(p, ',', &val);
+ e->weight = val;
+ return p - str;
+}
+
+static int parse_sl2vl_entry(char *str, uint8_t * raw)
+{
+ unsigned val1, val2;
+ char *p = str;
+ p += parse_one_unsigned(p, ',', &val1);
+ p += parse_one_unsigned(p, ',', &val2);
+ *raw = (val1 << 4) | (val2 & 0xf);
+ return p - str;
+}
+
+static void qos_build_config(struct qos_config *cfg,
+ osm_qos_options_t * opt, osm_qos_options_t * dflt)
+{
+ int i;
+ char *p;
+
+ memset(cfg, 0, sizeof(*cfg));
+
+ cfg->max_vls = opt->max_vls > 0 ? opt->max_vls : dflt->max_vls;
+ cfg->vl_high_limit = opt->high_limit;
+
+ p = opt->vlarb_high ? opt->vlarb_high : dflt->vlarb_high;
+ for (i = 0; i < 2 * IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; i++) {
+ p += parse_vlarb_entry(p,
+ &cfg->vlarb_high[i/IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK].
+ vl_entry[i%IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK]);
+ }
+
+ p = opt->vlarb_low ? opt->vlarb_low : dflt->vlarb_low;
+ for (i = 0; i < 2 * IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK; i++) {
+ p += parse_vlarb_entry(p,
+ &cfg->vlarb_low[i/IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK].
+ vl_entry[i%IB_NUM_VL_ARB_ELEMENTS_IN_BLOCK]);
+ }
+
+ p = opt->sl2vl ? opt->sl2vl : dflt->sl2vl;
+ for (i = 0; i < IB_MAX_NUM_VLS / 2; i++)
+ p += parse_sl2vl_entry(p, &cfg->sl2vl.raw_vl_by_sl[i]);
+
+}
diff --git a/osm/opensm/osm_state_mgr.c b/osm/opensm/osm_state_mgr.c
index 1aefc0b..ca46937 100644
--- a/osm/opensm/osm_state_mgr.c
+++ b/osm/opensm/osm_state_mgr.c
@@ -71,6 +71,10 @@ #include <opensm/osm_opensm.h>
/**********************************************************************
**********************************************************************/
+osm_signal_t osm_qos_setup(IN osm_opensm_t * p_osm);
+
+/**********************************************************************
+ **********************************************************************/
void
osm_state_mgr_construct(
IN osm_state_mgr_t * const p_mgr )
@@ -1971,6 +1975,9 @@ osm_state_mgr_process(
* need to unset it. */
p_mgr->p_subn->subnet_initialization_error = FALSE;
+ /* rescan configuration updates */
+ osm_subn_rescan_conf_file(&p_mgr->p_subn->opt);
+
status = __osm_state_mgr_sweep_hop_0( p_mgr );
if( status == IB_SUCCESS )
{
@@ -2234,6 +2241,10 @@ osm_state_mgr_process(
/* the returned signal might be DONE or DONE_PENDING */
signal = osm_pkey_mgr_process( p_mgr->p_subn->p_osm );
+
+ /* the returned signal is always DONE */
+ signal = osm_qos_setup(p_mgr->p_subn->p_osm);
+
break;
default:
More information about the general
mailing list