[ofa-general] Re: [PATCH] opensm: routing chaining
Al Chu
chu11 at llnl.gov
Mon Sep 29 15:17:33 PDT 2008
Hey Sasha,
Comments inlined.
On Sun, 2008-09-28 at 23:42 +0300, Sasha Khapyorsky wrote:
> From: Albert Chu <chu11 at llnl.gov>
>
> Routing chaining is the ability to configure the order in which routing
> algorithms are applied in opensm, i.e.
>
> -R ftree,updn,minhop
>
> Try using ftree routing. If ftree fails, try updn. If updn fails, try
> minhop.
>
> In order to get this done, some rearchitecture of the routing code had
> to be done b/c there is no longer an assumption that only one routing
> engine can be specified.
>
> Always setup a routing engine, assume no default "fallthrough" minhop
> routing engine. On configured routing engine failure, do minhop as
> a last resort. Stick a *next pointer into struct osm_routing_engine.
> Rearchitect routing engine usage as a list instead of a single struct.
>
> Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
> ---
> opensm/include/opensm/osm_opensm.h | 10 ++-
> opensm/include/opensm/osm_subnet.h | 7 +-
> opensm/include/opensm/osm_ucast_mgr.h | 2 +-
> opensm/man/opensm.8.in | 8 ++-
> opensm/opensm/main.c | 10 ++-
> opensm/opensm/osm_opensm.c | 121 +++++++++++++++++++++++----------
> opensm/opensm/osm_subnet.c | 11 ++-
> opensm/opensm/osm_ucast_file.c | 19 ++---
> opensm/opensm/osm_ucast_ftree.c | 35 ++++------
> opensm/opensm/osm_ucast_lash.c | 16 ++--
> opensm/opensm/osm_ucast_mgr.c | 119 +++++++++++++++++++++-----------
> opensm/opensm/osm_ucast_updn.c | 10 ++--
> 12 files changed, 226 insertions(+), 142 deletions(-)
>
> diff --git a/opensm/include/opensm/osm_opensm.h b/opensm/include/opensm/osm_opensm.h
> index 5d45724..c121be4 100644
> --- a/opensm/include/opensm/osm_opensm.h
> +++ b/opensm/include/opensm/osm_opensm.h
> @@ -126,6 +126,7 @@ struct osm_routing_engine {
> int (*ucast_build_fwd_tables) (void *context);
> void (*ucast_dump_tables) (void *context);
> void (*delete) (void *context);
> + struct osm_routing_engine *next;
> };
> /*
> * FIELDS
> @@ -148,6 +149,9 @@ struct osm_routing_engine {
> * delete
> * The delete method, may be used for routing engine
> * internals cleanup.
> +*
> +* next
> +* Pointer to next routing engine in the list.
> */
>
> /****s* OpenSM: OpenSM/osm_opensm_t
> @@ -178,7 +182,7 @@ typedef struct osm_opensm {
> osm_log_t log;
> cl_dispatcher_t disp;
> cl_plock_t lock;
> - struct osm_routing_engine routing_engine;
> + struct osm_routing_engine *routing_engine_list;
> osm_routing_engine_type_t routing_engine_used;
> osm_stats_t stats;
> osm_console_t console;
> @@ -221,8 +225,8 @@ typedef struct osm_opensm {
> * lock
> * Shared lock guarding most OpenSM structures.
> *
> -* routing_engine
> -* Routing engine; will be initialized then used.
> +* routing_engine_list
> +* List of routing engines that should be tried for use.
> *
> * routing_engine_used
> * Indicates which routing engine was used to route a subnet.
> diff --git a/opensm/include/opensm/osm_subnet.h b/opensm/include/opensm/osm_subnet.h
> index f90f7ea..0c7f3b9 100644
> --- a/opensm/include/opensm/osm_subnet.h
> +++ b/opensm/include/opensm/osm_subnet.h
> @@ -182,7 +182,7 @@ typedef struct osm_subn_opt {
> char *port_prof_ignore_file;
> boolean_t port_profile_switch_nodes;
> boolean_t sweep_on_trap;
> - char *routing_engine_name;
> + char *routing_engine_names;
> boolean_t connect_roots;
> char *lid_matrix_dump_file;
> char *lfts_file;
> @@ -353,9 +353,8 @@ typedef struct osm_subn_opt {
> * sweep_on_trap
> * Received traps will initiate a new sweep.
> *
> -* routing_engine_name
> -* Name of used routing engine
> -* (other than default Min Hop Algorithm)
> +* routing_engine_names
> +* Name of routing engine(s) to use.
> *
> * connect_roots
> * The option which will enforce root to root connectivity with
> diff --git a/opensm/include/opensm/osm_ucast_mgr.h b/opensm/include/opensm/osm_ucast_mgr.h
> index 1dc9a37..59ba9fa 100644
> --- a/opensm/include/opensm/osm_ucast_mgr.h
> +++ b/opensm/include/opensm/osm_ucast_mgr.h
> @@ -264,7 +264,7 @@ osm_ucast_mgr_set_fwd_table(IN osm_ucast_mgr_t * const p_mgr,
> *
> * SYNOPSIS
> */
> -void osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr);
> +int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr);
> /*
> * PARAMETERS
> * p_mgr
> diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in
> index 565c5f8..6790d11 100644
> --- a/opensm/man/opensm.8.in
> +++ b/opensm/man/opensm.8.in
> @@ -9,7 +9,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA)
> [\-F | \-\-config <file_name>] [\-c(reate-config) <file_name>]
> [\-g(uid) <GUID in hex>] [\-l(mc) <LMC>]
> [\-p(riority) <PRIORITY>] [\-smkey <SM_Key>] [\-r(eassign_lids)]
> -[\-R <engine name> | \-\-routing_engine <engine name>]
> +[\-R <engine name(s)> | \-\-routing_engine <engine name(s)>]
> [\-z | \-\-connect_roots]
> [\-M <file name> | \-\-lid_matrix_file <file name>]
> [\-U <file name> | \-\-lfts_file <file name>]
> @@ -116,8 +116,10 @@ Without -r, OpenSM attempts to preserve existing
> LID assignments resolving multiple use of same LID.
> .TP
> \fB\-R\fR, \fB\-\-routing_engine\fR
> -This option chooses routing engine instead of Min Hop
> -algorithm (default).
> +This option chooses routing engine(s) to use instead of Min Hop
> +algorithm (default). Multiple routing engines can be specified
> +separated by commas so that specific ordering of routing algorithms
> +will be tried if earlier routing engines fail.
> Supported engines: minhop, updn, file, ftree, lash, dor
> .TP
> \fB\-z\fR, \fB\-\-connect_roots\fR
> diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
> index 01bfddf..2f53157 100644
> --- a/opensm/opensm/main.c
> +++ b/opensm/opensm/main.c
> @@ -177,8 +177,10 @@ static void show_usage(void)
> " LID assignments resolving multiple use of same LID.\n\n");
> printf("-R\n"
> "--routing_engine <engine name>\n"
> - " This option chooses routing engine instead of Min Hop\n"
> - " algorithm (default).\n"
> + " This option chooses routing engine(s) to use instead of default\n"
> + " Min Hop algorithm. Multiple routing engines can be specified\n"
> + " separated by commas so that specific ordering of routing\n"
> + " algorithms will be tried if earlier routing engines fail.\n"
> " Supported engines: updn, file, ftree, lash, dor\n\n");
> printf("-z\n"
> "--connect_roots\n"
> @@ -851,8 +853,8 @@ int main(int argc, char *argv[])
> break;
>
> case 'R':
> - opt.routing_engine_name = optarg;
> - printf(" Activate \'%s\' routing engine\n", optarg);
> + opt.routing_engine_names = optarg;
> + printf(" Activate \'%s\' routing engine(s)\n", optarg);
> break;
>
> case 'z':
> diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
> index d17fed3..4970d0c 100644
> --- a/opensm/opensm/osm_opensm.c
> +++ b/opensm/opensm/osm_opensm.c
> @@ -61,24 +61,23 @@
>
> struct routing_engine_module {
> const char *name;
> - int (*setup) (osm_opensm_t * p_osm);
> + int (*setup) (struct osm_routing_engine *, osm_opensm_t *);
> };
>
> -extern int osm_ucast_updn_setup(osm_opensm_t * p_osm);
> -extern int osm_ucast_file_setup(osm_opensm_t * p_osm);
> -extern int osm_ucast_ftree_setup(osm_opensm_t * p_osm);
> -extern int osm_ucast_lash_setup(osm_opensm_t * p_osm);
> -
> -static int osm_ucast_null_setup(osm_opensm_t * p_osm);
> +extern int osm_ucast_minhop_setup(struct osm_routing_engine *, osm_opensm_t *);
> +extern int osm_ucast_updn_setup(struct osm_routing_engine *, osm_opensm_t *);
> +extern int osm_ucast_file_setup(struct osm_routing_engine *, osm_opensm_t *);
> +extern int osm_ucast_ftree_setup(struct osm_routing_engine *, osm_opensm_t *);
> +extern int osm_ucast_lash_setup(struct osm_routing_engine *, osm_opensm_t *);
> +extern int osm_ucast_dor_setup(struct osm_routing_engine *, osm_opensm_t *);
>
> const static struct routing_engine_module routing_modules[] = {
> - {"null", osm_ucast_null_setup},
Not sure how much legacy opensm.opts files are out there, but I kept the
"null" routing engine in there just for safety. Is it ok to remove?
> - {"minhop", osm_ucast_null_setup},
> + {"minhop", osm_ucast_minhop_setup},
> {"updn", osm_ucast_updn_setup},
> {"file", osm_ucast_file_setup},
> {"ftree", osm_ucast_ftree_setup},
> {"lash", osm_ucast_lash_setup},
> - {"dor", osm_ucast_null_setup},
> + {"dor", osm_ucast_dor_setup},
> {NULL, NULL}
> };
>
> @@ -135,33 +134,77 @@ osm_routing_engine_type_t osm_routing_engine_type(IN const char *str)
>
> /**********************************************************************
> **********************************************************************/
> -static int setup_routing_engine(osm_opensm_t * p_osm, const char *name)
> +static void append_routing_engine(osm_opensm_t *osm,
> + struct osm_routing_engine *routing_engine)
> {
> - const struct routing_engine_module *r;
> + struct osm_routing_engine *r;
> +
> + routing_engine->next = NULL;
> +
> + if (!osm->routing_engine_list) {
> + osm->routing_engine_list = routing_engine;
> + return;
> + }
> +
> + r = osm->routing_engine_list;
> + while (r->next)
> + r = r->next;
>
> - for (r = routing_modules; r->name && *r->name; r++) {
> - if (!strcmp(r->name, name)) {
> - p_osm->routing_engine.name = r->name;
> - if (r->setup(p_osm)) {
> - OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
> + r->next = routing_engine;
> +}
> +
> +static void setup_routing_engine(osm_opensm_t *osm, const char *name)
> +{
> + struct osm_routing_engine *re;
> + const struct routing_engine_module *m;
> +
> + for (m = routing_modules; m->name && *m->name; m++) {
> + if (!strcmp(m->name, name)) {
> + re = malloc(sizeof(struct osm_routing_engine));
> + if (!re) {
> + OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
> + "memory allocation failed\n");
> + return;
> + }
> + memset(re, 0, sizeof(struct osm_routing_engine));
> +
> + re->name = m->name;
> + if (m->setup(re, osm)) {
> + OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
> "setup of routing"
> " engine \'%s\' failed\n", name);
> - return -2;
> + return;
> }
> - OSM_LOG(&p_osm->log, OSM_LOG_DEBUG,
> - "\'%s\' routing engine set up\n",
> - p_osm->routing_engine.name);
> - return 0;
> + OSM_LOG(&osm->log, OSM_LOG_DEBUG,
> + "\'%s\' routing engine set up\n", re->name);
> + append_routing_engine(osm, re);
> + return;
> }
> }
> - return -1;
> +
> + OSM_LOG(&osm->log, OSM_LOG_ERROR,
> + "cannot find or setup routing engine \'%s\'", name);
> }
>
> -static int osm_ucast_null_setup(osm_opensm_t * p_osm)
> +static void setup_routing_engines(osm_opensm_t *osm, const char *engine_names)
> {
> - OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
> - "nothing yet - using default (minhop) routing engine\n");
> - return 0;
> + char *name, *str, *p;
> +
> + if (!engine_names || !*engine_names) {
> + setup_routing_engine(osm, "minhop");
> + return;
> + }
> +
> + str = strdup(engine_names);
> + name = strtok_r(str, ", \t\n", &p);
> + while (name && *name) {
> + setup_routing_engine(osm, name);
> + name = strtok_r(NULL, ", \t\n", &p);
> + }
> + free(str);
> +
> + if (!osm->routing_engine_list)
> + setup_routing_engine(osm, "minhop");
> }
>
> /**********************************************************************
> @@ -181,6 +224,20 @@ void osm_opensm_construct(IN osm_opensm_t * const p_osm)
>
> /**********************************************************************
> **********************************************************************/
> +static void destroy_routing_engines(osm_opensm_t *osm)
> +{
> + struct osm_routing_engine *r, *next;
> +
> + next = osm->routing_engine_list;
> + while (next) {
> + r = next;
> + next = r->next;
> + if (r->delete)
> + r->delete(r->context);
> + free(r);
> + }
> +}
> +
> void osm_opensm_destroy(IN osm_opensm_t * const p_osm)
> {
> /* in case of shutdown through exit proc - no ^C */
> @@ -218,8 +275,7 @@ void osm_opensm_destroy(IN osm_opensm_t * const p_osm)
> osm_sa_db_file_dump(p_osm);
>
> /* do the destruction in reverse order as init */
> - if (p_osm->routing_engine.delete)
> - p_osm->routing_engine.delete(p_osm->routing_engine.context);
> + destroy_routing_engines(p_osm);
> osm_sa_destroy(&p_osm->sa);
> osm_sm_destroy(&p_osm->sm);
> #ifdef ENABLE_OSM_PERF_MGR
> @@ -371,12 +427,7 @@ osm_opensm_init(IN osm_opensm_t * const p_osm,
> goto Exit;
> #endif /* ENABLE_OSM_PERF_MGR */
>
> - if (p_opt->routing_engine_name &&
> - setup_routing_engine(p_osm, p_opt->routing_engine_name))
> - OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
> - "cannot find or setup routing engine"
> - " \'%s\'. Default will be used instead\n",
> - p_opt->routing_engine_name);
> + setup_routing_engines(p_osm, p_opt->routing_engine_names);
>
> p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
>
> diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
> index 278aa3d..a39ce75 100644
> --- a/opensm/opensm/osm_subnet.c
> +++ b/opensm/opensm/osm_subnet.c
> @@ -442,7 +442,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * const p_opt)
> p_opt->port_prof_ignore_file = NULL;
> p_opt->port_profile_switch_nodes = FALSE;
> p_opt->sweep_on_trap = TRUE;
> - p_opt->routing_engine_name = NULL;
> + p_opt->routing_engine_names = NULL;
> p_opt->connect_roots = FALSE;
> p_opt->lid_matrix_dump_file = NULL;
> p_opt->lfts_file = NULL;
> @@ -1264,7 +1264,7 @@ int osm_subn_parse_conf_file(char *file_name, osm_subn_opt_t * const p_opts)
> p_key, p_val, &p_opts->sweep_on_trap);
>
> opts_unpack_charp("routing_engine",
> - p_key, p_val, &p_opts->routing_engine_name);
> + p_key, p_val, &p_opts->routing_engine_names);
>
> opts_unpack_boolean("connect_roots",
> p_key, p_val, &p_opts->connect_roots);
> @@ -1521,9 +1521,12 @@ int osm_subn_write_conf_file(char *file_name, IN osm_subn_opt_t *const p_opts)
>
> fprintf(opts_file,
> "# Routing engine\n"
> + "# Multiple routing engines can be specified separated by\n"
> + "# commas so that specific ordering of routing algorithms will\n"
> + "# be tried if earlier routing engines fail.\n"
> "# Supported engines: minhop, updn, file, ftree, lash, dor\n"
> - "routing_engine %s\n\n", p_opts->routing_engine_name ?
> - p_opts->routing_engine_name : null_str);
> + "routing_engine %s\n\n", p_opts->routing_engine_names ?
> + p_opts->routing_engine_names : null_str);
>
> fprintf(opts_file,
> "# Connect roots (use FALSE if unsure)\n"
> diff --git a/opensm/opensm/osm_ucast_file.c b/opensm/opensm/osm_ucast_file.c
> index 3d00cb2..cbd65c1 100644
> --- a/opensm/opensm/osm_ucast_file.c
> +++ b/opensm/opensm/osm_ucast_file.c
> @@ -135,14 +135,13 @@ static int do_ucast_file_load(void *context)
> OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
> "LFTs file name is not given; "
> "using default routing algorithm\n");
> - return -1;
> + return 1;
> }
>
> file = fopen(file_name, "r");
> if (!file) {
> OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 6302: "
> - "cannot open ucast dump file \'%s\'; "
> - "using default routing algorithm\n", file_name);
> + "cannot open ucast dump file \'%s\': %m\n", file_name);
> return -1;
> }
>
> @@ -270,15 +269,13 @@ static int do_lid_matrix_file_load(void *context)
> OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
> "lid matrix file name is not given; "
> "using default lid matrix generation algorithm\n");
> - return -1;
> + return 1;
> }
>
> file = fopen(file_name, "r");
> if (!file) {
> OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 6305: "
> - "cannot open lid matrix file \'%s\'; "
> - "using default lid matrix generation algorithm\n",
> - file_name);
> + "cannot open lid matrix file \'%s\': %m\n", file_name);
> return -1;
> }
>
> @@ -389,10 +386,10 @@ static int do_lid_matrix_file_load(void *context)
> return 0;
> }
>
> -int osm_ucast_file_setup(osm_opensm_t * p_osm)
> +int osm_ucast_file_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
> {
> - p_osm->routing_engine.context = (void *)p_osm;
> - p_osm->routing_engine.build_lid_matrices = do_lid_matrix_file_load;
> - p_osm->routing_engine.ucast_build_fwd_tables = do_ucast_file_load;
> + r->context = osm;
> + r->build_lid_matrices = do_lid_matrix_file_load;
> + r->ucast_build_fwd_tables = do_ucast_file_load;
> return 0;
> }
> diff --git a/opensm/opensm/osm_ucast_ftree.c b/opensm/opensm/osm_ucast_ftree.c
> index 1d3233c..15168b7 100644
> --- a/opensm/opensm/osm_ucast_ftree.c
> +++ b/opensm/opensm/osm_ucast_ftree.c
> @@ -3552,8 +3552,7 @@ static int __osm_ftree_construct_fabric(IN void *context)
> OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Ranking FatTree\n");
> if (__osm_ftree_fabric_rank(p_ftree) != 0) {
> osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> - "Failed ranking the tree - "
> - "fat-tree routing falls back to default routing\n");
> + "Failed ranking the tree\n");
> status = -1;
> goto Exit;
> }
> @@ -3567,14 +3566,12 @@ static int __osm_ftree_construct_fabric(IN void *context)
> "Populating CA & switch ports\n");
> if (__osm_ftree_fabric_populate_ports(p_ftree) != 0) {
> osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> - "Fabric topology is not a fat-tree - "
> - "routing falls back to default routing\n");
> + "Fabric topology is not a fat-tree\n");
> status = -1;
> goto Exit;
> } else if (p_ftree->cn_num == 0) {
> osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> - "Fabric has no valid compute nodes - "
> - "routing falls back to default routing\n");
> + "Fabric has no valid compute nodes\n");
> status = -1;
> goto Exit;
> }
> @@ -3586,8 +3583,7 @@ static int __osm_ftree_construct_fabric(IN void *context)
> if (__osm_ftree_fabric_get_rank(p_ftree) > FAT_TREE_MAX_RANK ||
> __osm_ftree_fabric_get_rank(p_ftree) < FAT_TREE_MIN_RANK) {
> osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> - "Fabric rank is %u (should be between %u and %u) - "
> - "fat-tree routing falls back to default routing\n",
> + "Fabric rank is %u (should be between %u and %u)\n",
> __osm_ftree_fabric_get_rank(p_ftree), FAT_TREE_MIN_RANK,
> FAT_TREE_MAX_RANK);
> status = -1;
> @@ -3600,8 +3596,7 @@ static int __osm_ftree_construct_fabric(IN void *context)
> validation - it checks that all the CNs are at the same rank. */
> if (__osm_ftree_fabric_mark_leaf_switches(p_ftree)) {
> osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> - "Fabric topology is not a fat-tree - "
> - "routing falls back to default routing\n");
> + "Fabric topology is not a fat-tree\n");
> status = -1;
> goto Exit;
> }
> @@ -3619,8 +3614,7 @@ static int __osm_ftree_construct_fabric(IN void *context)
> In any case, the first and the last switches in the array are REAL leafs. */
> if (__osm_ftree_fabric_create_leaf_switch_array(p_ftree)) {
> osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> - "Fabric topology is not a fat-tree - "
> - "routing falls back to default routing\n");
> + "Fabric topology is not a fat-tree\n");
> status = -1;
> goto Exit;
> }
> @@ -3640,8 +3634,7 @@ static int __osm_ftree_construct_fabric(IN void *context)
> if (!__osm_ftree_fabric_roots_provided(p_ftree) &&
> !__osm_ftree_fabric_validate_topology(p_ftree)) {
> osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> - "Fabric topology is not a fat-tree - "
> - "routing falls back to default routing\n");
> + "Fabric topology is not a fat-tree\n");
> status = -1;
> goto Exit;
> }
> @@ -3726,7 +3719,7 @@ static void __osm_ftree_delete(IN void *context)
> /***************************************************
> ***************************************************/
>
> -int osm_ucast_ftree_setup(osm_opensm_t * p_osm)
> +int osm_ucast_ftree_setup(struct osm_routing_engine *r, osm_opensm_t * p_osm)
> {
> ftree_fabric_t *p_ftree = __osm_ftree_fabric_create();
> if (!p_ftree)
> @@ -3734,12 +3727,10 @@ int osm_ucast_ftree_setup(osm_opensm_t * p_osm)
>
> p_ftree->p_osm = p_osm;
>
> - p_osm->routing_engine.context = (void *)p_ftree;
> - p_osm->routing_engine.build_lid_matrices = __osm_ftree_construct_fabric;
> - p_osm->routing_engine.ucast_build_fwd_tables = __osm_ftree_do_routing;
> - p_osm->routing_engine.delete = __osm_ftree_delete;
> + r->context = (void *)p_ftree;
> + r->build_lid_matrices = __osm_ftree_construct_fabric;
> + r->ucast_build_fwd_tables = __osm_ftree_do_routing;
> + r->delete = __osm_ftree_delete;
> +
> return 0;
> }
> -
> -/***************************************************
> - ***************************************************/
> diff --git a/opensm/opensm/osm_ucast_lash.c b/opensm/opensm/osm_ucast_lash.c
> index b985e9a..ce3982f 100644
> --- a/opensm/opensm/osm_ucast_lash.c
> +++ b/opensm/opensm/osm_ucast_lash.c
> @@ -785,7 +785,7 @@ static int init_lash_structures(lash_t * p_lash)
> unsigned vl_min = p_lash->vl_min;
> unsigned num_switches = p_lash->num_switches;
> osm_log_t *p_log = &p_lash->p_osm->log;
> - int status = IB_SUCCESS;
> + int status = 0;
> unsigned int i, j, k;
>
> OSM_LOG_ENTER(p_log);
> @@ -852,7 +852,7 @@ static int init_lash_structures(lash_t * p_lash)
> goto Exit;
>
> Exit_Mem_Error:
> - status = IB_ERROR;
> + status = -1;
> OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D01: "
> "Could not allocate required memory for LASH errno %d, errno %d for lack of memory\n",
> errno, ENOMEM);
> @@ -875,7 +875,7 @@ static int lash_core(lash_t * p_lash)
> int stop = 0, output_link, i_next_switch;
> int output_link2, i_next_switch2;
> int cycle_found2 = 0;
> - int status = IB_SUCCESS;
> + int status = 0;
> int *switch_bitmap = NULL; /* Bitmap to check if we have processed this pair */
>
> OSM_LOG_ENTER(p_log);
> @@ -1028,7 +1028,7 @@ static int lash_core(lash_t * p_lash)
> goto Exit;
>
> Error_Not_Enough_Lanes:
> - status = IB_ERROR;
> + status = -1;
> OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D02: "
> "Lane requirements (%d) exceed available lanes (%d)\n",
> p_lash->vl_min, lanes_needed);
> @@ -1360,15 +1360,15 @@ uint8_t osm_get_lash_sl(osm_opensm_t * p_osm, osm_port_t * p_src_port,
> return (uint8_t) ((switch_t *) p_sw->priv)->routing_table[dst_id].lane;
> }
>
> -int osm_ucast_lash_setup(osm_opensm_t * p_osm)
> +int osm_ucast_lash_setup(struct osm_routing_engine *r, osm_opensm_t *p_osm)
> {
> lash_t *p_lash = lash_create(p_osm);
> if (!p_lash)
> return -1;
>
> - p_osm->routing_engine.context = p_lash;
> - p_osm->routing_engine.ucast_build_fwd_tables = lash_process;
> - p_osm->routing_engine.delete = lash_delete;
> + r->context = p_lash;
> + r->ucast_build_fwd_tables = lash_process;
> + r->delete = lash_delete;
>
> return 0;
> }
> diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c
> index 9d0ad13..935846c 100644
> --- a/opensm/opensm/osm_ucast_mgr.c
> +++ b/opensm/opensm/osm_ucast_mgr.c
> @@ -216,7 +216,6 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
> uint8_t port;
> boolean_t is_ignored_by_port_prof;
> ib_net64_t node_guid;
> - struct osm_routing_engine *p_routing_eng;
> unsigned start_from = 1;
>
> OSM_LOG_ENTER(p_mgr->p_log);
> @@ -253,8 +252,6 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
>
> node_guid = osm_node_get_node_guid(p_sw->p_node);
>
> - p_routing_eng = &p_mgr->p_subn->p_osm->routing_engine;
> -
> /*
> The lid matrix contains the number of hops to each
> lid from each port. From this information we determine
> @@ -269,18 +266,9 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
> /* do not try to overwrite the ppro of non existing port ... */
> is_ignored_by_port_prof = TRUE;
>
> - /* Up/Down routing can cause unreachable routes between some
> - switches so we do not report that as an error in that case */
> - if (!p_routing_eng->build_lid_matrices) {
> - OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A08: "
> - "No path to get to LID %u from switch 0x%"
> - PRIx64 "\n", lid_ho, cl_ntoh64(node_guid));
> - /* trigger a new sweep - try again ... */
> - p_mgr->p_subn->subnet_initialization_error = TRUE;
> - } else
> - OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
> - "No path to get to LID %u from switch 0x%"
> - PRIx64 "\n", lid_ho, cl_ntoh64(node_guid));
> + OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
> + "No path to get to LID %u from switch 0x%" PRIx64 "\n",
> + lid_ho, cl_ntoh64(node_guid));
> } else {
> osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, port);
>
> @@ -583,7 +571,7 @@ __osm_ucast_mgr_process_neighbors(IN cl_map_item_t * const p_map_item,
>
> /**********************************************************************
> **********************************************************************/
> -void osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
> +int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
> {
> uint32_t i;
> uint32_t iteration_max;
> @@ -646,6 +634,8 @@ void osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
> OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
> "Min-hop propagated in %d steps\n", i);
> }
> +
> + return 0;
> }
>
> /**********************************************************************
> @@ -752,7 +742,7 @@ static void clear_prof_ignore_flag(cl_map_item_t * const p_map_item, void *ctx)
> }
> }
>
> -static void ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
> +static int ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
> {
> cl_qlist_init(&p_mgr->port_order_list);
>
> @@ -786,27 +776,56 @@ static void ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
> __osm_ucast_mgr_process_tbl, p_mgr);
>
> cl_qlist_remove_all(&p_mgr->port_order_list);
> +
> + return 0;
> }
>
> /**********************************************************************
> **********************************************************************/
> +static int ucast_mgr_route(struct osm_routing_engine *r, osm_opensm_t *osm)
> +{
> + int ret;
> +
> + OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
> + "building routing with \'%s\' routing algorithm...\n", r->name);
> +
> + if (!r->build_lid_matrices ||
> + (ret = r->build_lid_matrices(r->context)) > 0)
> + ret = osm_ucast_mgr_build_lid_matrices(&osm->sm.ucast_mgr);
> +
> + if (ret < 0) {
> + OSM_LOG(&osm->log, OSM_LOG_ERROR,
> + "%s: cannot build lid matrices.\n", r->name);
> + return ret;
> + }
> +
> + if (!r->ucast_build_fwd_tables ||
> + (ret = r->ucast_build_fwd_tables(r->context)) > 0)
> + ret = ucast_mgr_build_lfts(&osm->sm.ucast_mgr);
> +
> + if (ret < 0) {
> + OSM_LOG(&osm->log, OSM_LOG_ERROR,
> + "%s: cannot build fwd tables.\n", r->name);
> + return ret;
> + }
> +
> + osm->routing_engine_used = osm_routing_engine_type(r->name);
> +
> + return 0;
> +}
> +
> osm_signal_t osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr)
> {
> osm_opensm_t *p_osm;
> struct osm_routing_engine *p_routing_eng;
> osm_signal_t signal = OSM_SIGNAL_DONE;
> cl_qmap_t *p_sw_guid_tbl;
> - int blm = 0;
> - int ubft = 0;
>
> OSM_LOG_ENTER(p_mgr->p_log);
>
> p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
> p_osm = p_mgr->p_subn->p_osm;
> - p_routing_eng = &p_osm->routing_engine;
> -
> - p_mgr->is_dor = p_routing_eng->name
> - && (strcmp(p_routing_eng->name, "dor") == 0);
> + p_routing_eng = p_osm->routing_engine_list;
>
> CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock);
>
> @@ -819,28 +838,19 @@ osm_signal_t osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr)
>
> p_mgr->any_change = FALSE;
>
> - if (!p_routing_eng->build_lid_matrices ||
> - (blm = p_routing_eng->build_lid_matrices(p_routing_eng->context)))
> - osm_ucast_mgr_build_lid_matrices(p_mgr);
> + p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
> + while (p_routing_eng) {
> + if (!ucast_mgr_route(p_routing_eng, p_osm))
> + break;
> + p_routing_eng = p_routing_eng->next;
> + }
>
> - /*
> - Now that the lid matrices have been built, we can
> - build and download the switch forwarding tables.
> - */
> - if (!p_routing_eng->ucast_build_fwd_tables ||
> - (ubft =
> - p_routing_eng->ucast_build_fwd_tables(p_routing_eng->context)))
> + if (p_osm->routing_engine_used == OSM_ROUTING_ENGINE_TYPE_NONE) {
> + /* If configured routing algorithm failed, use default MinHop */
> + osm_ucast_mgr_build_lid_matrices(p_mgr);
> ucast_mgr_build_lfts(p_mgr);
> -
> - /* 'file' routing engine has one unique logic corner case */
> - if (p_routing_eng->name && (strcmp(p_routing_eng->name, "file") == 0)
> - && (!blm || !ubft))
> - p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_FILE;
> - else if (!blm && !ubft)
> - p_osm->routing_engine_used =
> - osm_routing_engine_type(p_routing_eng->name);
> - else
> p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_MINHOP;
> + }
>
> OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
> "%s tables configured on all switches\n",
> @@ -861,3 +871,28 @@ Exit:
> OSM_LOG_EXIT(p_mgr->p_log);
> return (signal);
> }
> +
> +static int ucast_build_lid_matrices(void *context)
> +{
> + return osm_ucast_mgr_build_lid_matrices(context);
> +}
> +
> +static int ucast_build_lfts(void *context)
> +{
> + return ucast_mgr_build_lfts(context);
> +}
> +
> +int osm_ucast_minhop_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
> +{
> + r->context = &osm->sm.ucast_mgr;
> + r->build_lid_matrices = ucast_build_lid_matrices;
> + r->ucast_build_fwd_tables = ucast_build_lfts;
> + return 0;
> +}
> +
> +int osm_ucast_dor_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
> +{
> + osm_ucast_minhop_setup(r, osm);
> + osm->sm.ucast_mgr.is_dor = 1;
If dor is listed in the routing chain, all other algorithms that may
fall-through into minhop's build_lfts callback (minhop, updn, file),
will be affected by the is_dor flag. Is this intended?
If we don't want to abstract it for this round, perhaps we could stick
the "is_dor" flag set/unset into ucast_mgr_route() so that is_dor is set
only when dor is being routed.
> + return 0;
> +}
> diff --git a/opensm/opensm/osm_ucast_updn.c b/opensm/opensm/osm_ucast_updn.c
> index 90e9af8..4fdcc78 100644
> --- a/opensm/opensm/osm_ucast_updn.c
> +++ b/opensm/opensm/osm_ucast_updn.c
> @@ -643,7 +643,7 @@ static int __osm_updn_call(void *ctx)
> } else {
> OSM_LOG(&p_updn->p_osm->log, OSM_LOG_INFO,
> "disabling UPDN algorithm, no root nodes were found\n");
> - ret = 1;
> + ret = -1;
> }
>
> if (osm_log_is_active(&p_updn->p_osm->log, OSM_LOG_ROUTING))
> @@ -669,7 +669,7 @@ static void __osm_updn_delete(void *context)
> free(context);
> }
>
> -int osm_ucast_updn_setup(osm_opensm_t * p_osm)
> +int osm_ucast_updn_setup(struct osm_routing_engine *r, osm_opensm_t *p_osm)
> {
> updn_t *p_updn;
>
> @@ -680,9 +680,9 @@ int osm_ucast_updn_setup(osm_opensm_t * p_osm)
>
> p_updn->p_osm = p_osm;
>
> - p_osm->routing_engine.context = p_updn;
> - p_osm->routing_engine.delete = __osm_updn_delete;
> - p_osm->routing_engine.build_lid_matrices = __osm_updn_call;
> + r->context = p_updn;
> + r->delete = __osm_updn_delete;
> + r->build_lid_matrices = __osm_updn_call;
>
> return 0;
> }
The patch looks fine as whole.
Thanks,
Al
--
Albert Chu
chu11 at llnl.gov
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
More information about the general
mailing list