[ofa-general] [PATCH] opensm: Implement weighted routing
Dale Purdy
purdy at sgi.com
Wed Feb 25 13:09:32 PST 2009
Implement a weighted routing scheme for fine tuning the lid matrix for
routing engines that use the lid matrix. An optional file containing
a switch_guid port and weighing factor combination per line can be
supplied to override a default hop weight factor of 1 for each switch
output port in computing the lid matrix. This allows one to alter the
min hop paths for things like routes to I/O.
Signed-off-by: Dale Purdy <purdy at sgi.com>
---
opensm/include/opensm/osm_port.h | 4 ++
opensm/include/opensm/osm_subnet.h | 1 +
opensm/man/opensm.8.in | 7 +++
opensm/opensm/main.c | 13 +++++-
opensm/opensm/osm_subnet.c | 7 +++
opensm/opensm/osm_ucast_mgr.c | 82 ++++++++++++++++++++++++++++++++++--
6 files changed, 109 insertions(+), 5 deletions(-)
diff --git a/opensm/include/opensm/osm_port.h b/opensm/include/opensm/osm_port.h
index 3dda541..ae54c9f 100644
--- a/opensm/include/opensm/osm_port.h
+++ b/opensm/include/opensm/osm_port.h
@@ -115,6 +115,7 @@ typedef struct osm_physp {
osm_pkey_tbl_t pkeys;
ib_vl_arb_table_t vl_arb[4];
cl_ptr_vector_t slvl_by_port;
+ uint8_t hop_wf;
} osm_physp_t;
/*
* FIELDS
@@ -171,6 +172,9 @@ typedef struct osm_physp {
* Switches have an entry for every other input port (inc SMA=0).
* On CAs only one per port.
*
+* hop_wf
+* Hop weighting factor to be used in the routing.
+*
* SEE ALSO
* Port
*********/
diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h
index 2dfccda..6353d22 100644
--- a/opensm/include/opensm/osm_subnet.h
+++ b/opensm/include/opensm/osm_subnet.h
@@ -181,6 +181,7 @@ typedef struct osm_subn_opt {
char *console;
uint16_t console_port;
char *port_prof_ignore_file;
+ char *hop_weights_file;
boolean_t port_profile_switch_nodes;
boolean_t sweep_on_trap;
char *routing_engine_names;
diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in
index 7690980..c77ecab 100644
--- a/opensm/man/opensm.8.in
+++ b/opensm/man/opensm.8.in
@@ -31,6 +31,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA)
[\-console [off | local | socket | loopback]]
[\-console-port <port>]
[\-i(gnore-guids) <equalize-ignore-guids-file>]
+[\-w | \-\-hop_weights_file <path to file>]
[\-f <log file path> | \-\-log_file <log file path> ]
[\-L | \-\-log_limit <size in MB>] [\-e(rase_log_file)]
[\-P(config) <partition config file> ]
@@ -233,6 +234,12 @@ This option provides the means to define a set of ports
(by node guid and port number) that will be ignored by the link load
equalization algorithm.
.TP
+\fB\-w\fR, \fB\-\-hop_weights_file\fR <path to file>
+This option provides weighting factors per port representing a hop
+cost in computing the lid matrix. The file consists of lines
+containing a switch GUID, output port, and weighting factor. Any port
+not listed in the file defaults to a weighting factor of 1.
+.TP
\fB\-x\fR, \fB\-\-honor_guid2lid\fR
This option forces OpenSM to honor the guid2lid file,
when it comes out of Standby state, if such file exists
diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
index 47fd658..f145dab 100644
--- a/opensm/opensm/main.c
+++ b/opensm/opensm/main.c
@@ -255,6 +255,10 @@ static void show_usage(void)
" This option provides the means to define a set of ports\n"
" (by guid) that will be ignored by the link load\n"
" equalization algorithm.\n\n");
+ printf("--hop_weights_file, -w <path to file>\n"
+ " This option provides the means to define a weighting\n"
+ " factor per port for customizing the least weight\n"
+ " hops for the routing.\n\n");
printf("--honor_guid2lid, -x\n"
" This option forces OpenSM to honor the guid2lid file,\n"
" when it comes out of Standby state, if such file exists\n"
@@ -524,7 +528,7 @@ int main(int argc, char *argv[])
char *conf_template = NULL, *config_file = NULL;
uint32_t val;
const char *const short_option =
- "F:c:i:f:ed:D:g:l:L:s:t:a:u:m:X:R:zM:U:S:P:Y:ANBIQvVhoryxp:n:q:k:C:";
+ "F:c:i:w:f:ed:D:g:l:L:s:t:a:u:m:X:R:zM:U:S:P:Y:ANBIQvVhoryxp:n:q:k:C:";
/*
In the array below, the 2nd parameter specifies the number
@@ -540,6 +544,7 @@ int main(int argc, char *argv[])
{"debug", 1, NULL, 'd'},
{"guid", 1, NULL, 'g'},
{"ignore_guids", 1, NULL, 'i'},
+ {"hop_weights_file", 1, NULL, 'w'},
{"lmc", 1, NULL, 'l'},
{"sweep", 1, NULL, 's'},
{"timeout", 1, NULL, 't'},
@@ -664,6 +669,12 @@ int main(int argc, char *argv[])
opt.port_prof_ignore_file);
break;
+ case 'w':
+ opt.hop_weights_file = optarg;
+ printf(" Hop Weights File = %s\n",
+ opt.hop_weights_file);
+ break;
+
case 'g':
/*
Specifies port guid with which to bind.
diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
index b3100a4..26e4481 100644
--- a/opensm/opensm/osm_subnet.c
+++ b/opensm/opensm/osm_subnet.c
@@ -322,6 +322,7 @@ static const opt_rec_t opt_tbl[] = {
{ "polling_retry_number", OPT_OFFSET(polling_retry_number), opts_parse_uint32, NULL, 1 },
{ "force_heavy_sweep", OPT_OFFSET(force_heavy_sweep), opts_parse_boolean, NULL, 1 },
{ "port_prof_ignore_file", OPT_OFFSET(port_prof_ignore_file), opts_parse_charp, NULL, 0 },
+ { "hop_weights_file", OPT_OFFSET(hop_weights_file), opts_parse_charp, NULL, 0 },
{ "port_profile_switch_nodes", OPT_OFFSET(port_profile_switch_nodes), opts_parse_boolean, NULL, 1 },
{ "sweep_on_trap", OPT_OFFSET(sweep_on_trap), opts_parse_boolean, NULL, 1 },
{ "routing_engine", OPT_OFFSET(routing_engine_names), opts_parse_charp, NULL, 0 },
@@ -727,6 +728,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * const p_opt)
p_opt->qos_policy_file = strdup(OSM_DEFAULT_QOS_POLICY_FILE);
p_opt->accum_log_file = TRUE;
p_opt->port_prof_ignore_file = NULL;
+ p_opt->hop_weights_file = NULL;
p_opt->port_profile_switch_nodes = FALSE;
p_opt->sweep_on_trap = TRUE;
p_opt->use_ucast_cache = FALSE;
@@ -1359,6 +1361,11 @@ int osm_subn_output_conf(FILE *out, IN osm_subn_opt_t *const p_opts)
p_opts->port_prof_ignore_file : null_str);
fprintf(out,
+ "# The file holding routing weighting factors per output port\n"
+ "hop_weights_file %s\n\n",
+ p_opts->hop_weights_file ? p_opts->hop_weights_file : null_str);
+
+ fprintf(out,
"# Routing engine\n"
"# Multiple routing engines can be specified separated by\n"
"# commas so that specific ordering of routing algorithms will\n"
diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c
index e404c91..81c3604 100644
--- a/opensm/opensm/osm_ucast_mgr.c
+++ b/opensm/opensm/osm_ucast_mgr.c
@@ -125,11 +125,11 @@ __osm_ucast_mgr_process_hop_0_1(IN cl_map_item_t * const p_map_item,
if (p_remote_node && p_remote_node->sw &&
(p_remote_node != p_sw->p_node)) {
+ osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, i);
+
remote_lid = osm_node_get_base_lid(p_remote_node, 0);
remote_lid = cl_ntoh16(remote_lid);
- osm_switch_set_hops(p_sw, remote_lid, i, 1);
- osm_switch_set_hops(p_remote_node->sw, lid, remote_port,
- 1);
+ osm_switch_set_hops(p_sw, remote_lid, i, p->hop_wf);
}
}
}
@@ -146,6 +146,7 @@ __osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr,
osm_switch_t *p_sw, *p_next_sw;
uint16_t lid_ho;
uint8_t hops;
+ osm_physp_t *p;
OSM_LOG_ENTER(p_mgr->p_log);
@@ -156,6 +157,8 @@ __osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr,
cl_ntoh64(osm_node_get_node_guid(p_remote_sw->p_node)),
port_num, remote_port_num);
+ p = osm_node_get_physp_ptr(p_this_sw->p_node, port_num);
+
p_next_sw = (osm_switch_t *) cl_qmap_head(&p_mgr->p_subn->sw_guid_tbl);
while (p_next_sw !=
(osm_switch_t *) cl_qmap_end(&p_mgr->p_subn->sw_guid_tbl)) {
@@ -166,7 +169,7 @@ __osm_ucast_mgr_process_neighbor(IN osm_ucast_mgr_t * const p_mgr,
hops = osm_switch_get_least_hops(p_remote_sw, lid_ho);
if (hops == OSM_NO_PATH)
continue;
- hops++;
+ hops += p->hop_wf;
if (hops <
osm_switch_get_hop_count(p_this_sw, lid_ho, port_num)) {
if (osm_switch_set_hops
@@ -573,6 +576,61 @@ __osm_ucast_mgr_process_neighbors(IN cl_map_item_t * const p_map_item,
/**********************************************************************
**********************************************************************/
+static int set_hop_wf(void *ctx, uint64_t guid, char *p)
+{
+ osm_ucast_mgr_t *m = ctx;
+ osm_node_t *node = osm_get_node_by_guid(m->p_subn, cl_hton64(guid));
+ osm_physp_t *physp;
+ unsigned port, hop_wf;
+ char *e;
+
+ if (!node || !node->sw) {
+ OSM_LOG(m->p_log, OSM_LOG_DEBUG,
+ "switch with guid 0x%016" PRIx64 " is not found\n",
+ guid);
+ return 0;
+ }
+
+ if (!p || !*p || !(port = strtoul(p, &e, 0)) || (p == e) ||
+ port >= node->sw->num_ports) {
+ OSM_LOG(m->p_log, OSM_LOG_DEBUG,
+ "bad port specified for guid 0x%016" PRIx64 "\n", guid);
+ return 0;
+ }
+
+ p = e + 1;
+
+ if (!*p || !(hop_wf = strtoul(p, &e, 0)) || (p == e) ||
+ (hop_wf >= 0x100)) {
+ OSM_LOG(m->p_log, OSM_LOG_DEBUG,
+ "bad hop weight factor specified for guid 0x%016" PRIx64 "port %u\n",
+ guid, port);
+ return 0;
+ }
+
+ physp = osm_node_get_physp_ptr(node, port);
+ if (!physp)
+ return 0;
+
+ physp->hop_wf = hop_wf;
+
+ return 0;
+}
+
+static void set_default_hop_wf(cl_map_item_t * const p_map_item, void *ctx)
+{
+ osm_switch_t *sw = (osm_switch_t *)p_map_item;
+ int i;
+
+ for (i = 1; i < sw->num_ports; i++) {
+ osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, i);
+ if (p)
+ p->hop_wf = 1;
+ }
+}
+
+/**********************************************************************
+ **********************************************************************/
int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
{
uint32_t i;
@@ -585,6 +643,22 @@ int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
"Starting switches' Min Hop Table Assignment\n");
/*
+ Set up the weighting factors for the routing.
+ */
+ cl_qmap_apply_func(p_sw_guid_tbl, set_default_hop_wf, NULL);
+ if (p_mgr->p_subn->opt.hop_weights_file) {
+ OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
+ "Fetching hop weight factor file \'%s\'\n",
+ p_mgr->p_subn->opt.hop_weights_file);
+ if (parse_node_map(p_mgr->p_subn->opt.hop_weights_file,
+ set_hop_wf, p_mgr)) {
+ OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR : cannot "
+ "parse hop_weights_file \'%s\'\n",
+ p_mgr->p_subn->opt.hop_weights_file);
+ }
+ }
+
+ /*
Set the switch matrices for each switch's own port 0 LID(s)
then set the lid matrices for the each switch's leaf nodes.
*/
--
1.5.6.5
More information about the general
mailing list