[ofa-general] [PATCH] opensm: Implement weighted routing

Dale Purdy purdy at sgi.com
Wed Feb 25 13:09:32 PST 2009


Implement a weighted routing scheme for fine tuning the lid matrix for
routing engines that use the lid matrix.  An optional file containing
a switch_guid port and weighing factor combination per line can be
supplied to override a default hop weight factor of 1 for each switch
output port in computing the lid matrix.  This allows one to alter the
min hop paths for things like routes to I/O.

Signed-off-by: Dale Purdy <purdy at sgi.com>
---
 opensm/include/opensm/osm_port.h   |    4 ++
 opensm/include/opensm/osm_subnet.h |    1 +
 opensm/man/opensm.8.in             |    7 +++
 opensm/opensm/main.c               |   13 +++++-
 opensm/opensm/osm_subnet.c         |    7 +++
 opensm/opensm/osm_ucast_mgr.c      |   82 ++++++++++++++++++++++++++++++++++--
 6 files changed, 109 insertions(+), 5 deletions(-)

diff --git a/opensm/include/opensm/osm_port.h b/opensm/include/opensm/osm_port.h
index 3dda541..ae54c9f 100644
--- a/opensm/include/opensm/osm_port.h
+++ b/opensm/include/opensm/osm_port.h
@@ -115,6 +115,7 @@ typedef struct osm_physp {
 	osm_pkey_tbl_t pkeys;
 	ib_vl_arb_table_t vl_arb[4];
 	cl_ptr_vector_t slvl_by_port;
+	uint8_t hop_wf;
 } osm_physp_t;
 /*
 * FIELDS
@@ -171,6 +172,9 @@ typedef struct osm_physp {
 *		Switches have an entry for every other input port (inc SMA=0).
 *		On CAs only one per port.
 *
+*	hop_wf
+*		Hop weighting factor to be used in the routing.
+*
 * SEE ALSO
 *	Port
 *********/
diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h
index 2dfccda..6353d22 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -181,6 +181,7 @@ typedef struct osm_subn_opt {
 	char *console;
 	uint16_t console_port;
 	char *port_prof_ignore_file;
+	char *hop_weights_file;
 	boolean_t port_profile_switch_nodes;
 	boolean_t sweep_on_trap;
 	char *routing_engine_names;
diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in
index 7690980..c77ecab 100644
--- a/opensm/man/opensm.8.in
+++ b/opensm/man/opensm.8.in
@@ -31,6 +31,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA)
 [\-console [off | local | socket | loopback]]
 [\-console-port <port>]
 [\-i(gnore-guids) <equalize-ignore-guids-file>]
+[\-w | \-\-hop_weights_file <path to file>]
 [\-f <log file path> | \-\-log_file <log file path> ]
 [\-L | \-\-log_limit <size in MB>] [\-e(rase_log_file)]
 [\-P(config) <partition config file> ]
@@ -233,6 +234,12 @@ This option provides the means to define a set of ports
 (by node guid and port number) that will be ignored by the link load
 equalization algorithm.
 .TP
+\fB\-w\fR, \fB\-\-hop_weights_file\fR <path to file>
+This option provides weighting factors per port representing a hop
+cost in computing the lid matrix.  The file consists of lines
+containing a switch GUID, output port, and weighting factor.  Any port
+not listed in the file defaults to a weighting factor of 1.
+.TP
 \fB\-x\fR, \fB\-\-honor_guid2lid\fR
 This option forces OpenSM to honor the guid2lid file,
 when it comes out of Standby state, if such file exists
diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
index 47fd658..f145dab 100644
--- a/opensm/opensm/main.c
+++ b/opensm/opensm/main.c
@@ -255,6 +255,10 @@ static void show_usage(void)
 	       "          This option provides the means to define a set of ports\n"
 	       "          (by guid) that will be ignored by the link load\n"
 	       "          equalization algorithm.\n\n");
+	printf("--hop_weights_file, -w <path to file>\n"
+	       "          This option provides the means to define a weighting\n"
+	       "          factor per port for customizing the least weight\n"
+	       "          hops for the routing.\n\n");
 	printf("--honor_guid2lid, -x\n"
 	       "          This option forces OpenSM to honor the guid2lid file,\n"
 	       "          when it comes out of Standby state, if such file exists\n"
@@ -524,7 +528,7 @@ int main(int argc, char *argv[])
 	char *conf_template = NULL, *config_file = NULL;
 	uint32_t val;
 	const char *const short_option =
-	    "F:c:i:f:ed:D:g:l:L:s:t:a:u:m:X:R:zM:U:S:P:Y:ANBIQvVhoryxp:n:q:k:C:";
+	    "F:c:i:w:f:ed:D:g:l:L:s:t:a:u:m:X:R:zM:U:S:P:Y:ANBIQvVhoryxp:n:q:k:C:";
 
 	/*
 	   In the array below, the 2nd parameter specifies the number
@@ -540,6 +544,7 @@ int main(int argc, char *argv[])
 		{"debug", 1, NULL, 'd'},
 		{"guid", 1, NULL, 'g'},
 		{"ignore_guids", 1, NULL, 'i'},
+		{"hop_weights_file", 1, NULL, 'w'},
 		{"lmc", 1, NULL, 'l'},
 		{"sweep", 1, NULL, 's'},
 		{"timeout", 1, NULL, 't'},
@@ -664,6 +669,12 @@ int main(int argc, char *argv[])
 			       opt.port_prof_ignore_file);
 			break;
 
+		case 'w':
+			opt.hop_weights_file = optarg;
+			printf(" Hop Weights File = %s\n",
+			       opt.hop_weights_file);
+			break;
+
 		case 'g':
 			/*
 			   Specifies port guid with which to bind.
diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index b3100a4..26e4481 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -322,6 +322,7 @@ static const opt_rec_t opt_tbl[] = {
 	{ "polling_retry_number", OPT_OFFSET(polling_retry_number), opts_parse_uint32, NULL, 1 },
 	{ "force_heavy_sweep", OPT_OFFSET(force_heavy_sweep), opts_parse_boolean, NULL, 1 },
 	{ "port_prof_ignore_file", OPT_OFFSET(port_prof_ignore_file), opts_parse_charp, NULL, 0 },
+	{ "hop_weights_file", OPT_OFFSET(hop_weights_file), opts_parse_charp, NULL, 0 },
 	{ "port_profile_switch_nodes", OPT_OFFSET(port_profile_switch_nodes), opts_parse_boolean, NULL, 1 },
 	{ "sweep_on_trap", OPT_OFFSET(sweep_on_trap), opts_parse_boolean, NULL, 1 },
 	{ "routing_engine", OPT_OFFSET(routing_engine_names), opts_parse_charp, NULL, 0 },
@@ -727,6 +728,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * const p_opt)
 	p_opt->qos_policy_file = strdup(OSM_DEFAULT_QOS_POLICY_FILE);
 	p_opt->accum_log_file = TRUE;
 	p_opt->port_prof_ignore_file = NULL;
+	p_opt->hop_weights_file = NULL;
 	p_opt->port_profile_switch_nodes = FALSE;
 	p_opt->sweep_on_trap = TRUE;
 	p_opt->use_ucast_cache = FALSE;
@@ -1359,6 +1361,11 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t *const p_opts)
 		p_opts->port_prof_ignore_file : null_str);
 
 	fprintf(out,
+ 		"# The file holding routing weighting factors per output port\n"
+ 		"hop_weights_file %s\n\n",
+ 		p_opts->hop_weights_file ? p_opts->hop_weights_file : null_str);
+ 
+ 	fprintf(out,
 		"# Routing engine\n"
 		"# Multiple routing engines can be specified separated by\n"
 		"# commas so that specific ordering of routing algorithms will\n"
diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c
index e404c91..81c3604 100644
--- a/opensm/opensm/osm_ucast_mgr.c
+++ b/opensm/opensm/osm_ucast_mgr.c
@@ -125,11 +125,11 @@ __osm_ucast_mgr_process_hop_0_1(IN cl_map_item_t * const p_map_item,
 
 		if (p_remote_node && p_remote_node->sw &&
 		    (p_remote_node != p_sw->p_node)) {
+			osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, i);
+
 			remote_lid = osm_node_get_base_lid(p_remote_node, 0);
 			remote_lid = cl_ntoh16(remote_lid);
-			osm_switch_set_hops(p_sw, remote_lid, i, 1);
-			osm_switch_set_hops(p_remote_node->sw, lid, remote_port,
-					    1);
+			osm_switch_set_hops(p_sw, remote_lid, i, p->hop_wf);
 		}
 	}
 }
@@ -146,6 +146,7 @@ __osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr,
 	osm_switch_t *p_sw, *p_next_sw;
 	uint16_t lid_ho;
 	uint8_t hops;
+	osm_physp_t *p;
 
 	OSM_LOG_ENTER(p_mgr->p_log);
 
@@ -156,6 +157,8 @@ __osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr,
 		cl_ntoh64(osm_node_get_node_guid(p_remote_sw->p_node)),
 		port_num, remote_port_num);
 
+	p = osm_node_get_physp_ptr(p_this_sw->p_node, port_num);
+
 	p_next_sw = (osm_switch_t *) cl_qmap_head(&p_mgr->p_subn->sw_guid_tbl);
 	while (p_next_sw !=
 	       (osm_switch_t *) cl_qmap_end(&p_mgr->p_subn->sw_guid_tbl)) {
@@ -166,7 +169,7 @@ __osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr,
 		hops = osm_switch_get_least_hops(p_remote_sw, lid_ho);
 		if (hops == OSM_NO_PATH)
 			continue;
-		hops++;
+		hops += p->hop_wf;
 		if (hops <
 		    osm_switch_get_hop_count(p_this_sw, lid_ho, port_num)) {
 			if (osm_switch_set_hops
@@ -573,6 +576,61 @@ __osm_ucast_mgr_process_neighbors(IN cl_map_item_t * const p_map_item,
 
 /**********************************************************************
  **********************************************************************/
+static int set_hop_wf(void *ctx, uint64_t guid, char *p)
+{
+	osm_ucast_mgr_t *m = ctx;
+	osm_node_t *node = osm_get_node_by_guid(m->p_subn, cl_hton64(guid));
+	osm_physp_t *physp;
+	unsigned port, hop_wf;
+	char *e;
+
+	if (!node || !node->sw) {
+		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
+			"switch with guid 0x%016" PRIx64 " is not found\n",
+			guid);
+		return 0;
+	}
+
+	if (!p || !*p || !(port = strtoul(p, &e, 0)) || (p == e) ||
+	    port >= node->sw->num_ports) {
+		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
+			"bad port specified for guid 0x%016" PRIx64 "\n", guid);
+		return 0;
+	}
+
+	p = e + 1;
+
+	if (!*p || !(hop_wf = strtoul(p, &e, 0)) || (p == e) ||
+		(hop_wf >= 0x100)) {
+		OSM_LOG(m->p_log, OSM_LOG_DEBUG,
+			"bad hop weight factor specified for guid 0x%016" PRIx64 "port %u\n",
+			guid, port);
+		return 0;
+	}
+
+	physp = osm_node_get_physp_ptr(node, port);
+	if (!physp)
+		return 0;
+
+	physp->hop_wf = hop_wf;
+
+	return 0;
+}
+
+static void set_default_hop_wf(cl_map_item_t * const p_map_item, void *ctx)
+{
+	osm_switch_t *sw = (osm_switch_t *)p_map_item;
+	int i;
+
+	for (i = 1; i < sw->num_ports; i++) {
+		osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, i);
+		if (p)
+			p->hop_wf = 1;
+	}
+}
+
+/**********************************************************************
+ **********************************************************************/
 int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
 {
 	uint32_t i;
@@ -585,6 +643,22 @@ int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
 		"Starting switches' Min Hop Table Assignment\n");
 
 	/*
+	   Set up the weighting factors for the routing.
+	*/
+	cl_qmap_apply_func(p_sw_guid_tbl, set_default_hop_wf, NULL);
+	if (p_mgr->p_subn->opt.hop_weights_file) {
+		OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
+			"Fetching hop weight factor file \'%s\'\n",
+			p_mgr->p_subn->opt.hop_weights_file);
+		if (parse_node_map(p_mgr->p_subn->opt.hop_weights_file,
+				   set_hop_wf, p_mgr)) {
+			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR : cannot "
+				"parse hop_weights_file \'%s\'\n",
+				p_mgr->p_subn->opt.hop_weights_file);
+		}
+	}
+
+	/*
 	   Set the switch matrices for each switch's own port 0 LID(s)
 	   then set the lid matrices for the each switch's leaf nodes.
 	 */
-- 
1.5.6.5




More information about the general mailing list