[ofa-general] [PATCH RFC] opensm: sort port order for routing by switch loads

Eli Dorfman (Voltaire) dorfman.eli at gmail.com
Wed Jan 21 07:48:35 PST 2009


Sasha Khapyorsky wrote:
> It follows "port order" routing load balancer improvements
> (implemented using "--guid_routing_order_file" command line option).
> 
> The idea of this patch is about default behavior and it is to balance
> routing paths in such order that most loaded links enter balancer first
> - in most cases it should provide a better performance than just
> random balancing (as it is done now by default).
> 
> The implementation is simple - endport list for load balancer is reverse
> sorted by number of active endport links of leaf switches.
> 
> Signed-off-by: Sasha Khapyorsky <sashak at voltaire.com>
> ---
> 
> Comments are appreciated.
> 
> Sasha
> 
>  opensm/opensm/osm_ucast_mgr.c |   58 ++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 57 insertions(+), 1 deletions(-)
> 
> diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c
> index 96921a0..58a6714 100644
> --- a/opensm/opensm/osm_ucast_mgr.c
> +++ b/opensm/opensm/osm_ucast_mgr.c
> @@ -744,6 +744,61 @@ static void clear_prof_ignore_flag(cl_map_item_t * const p_map_item, void *ctx)
>  	}
>  }
>  
> +static void add_sw_endports_to_order_list(osm_switch_t *sw, osm_ucast_mgr_t *m)
> +{
> +	osm_port_t *port;
> +	osm_physp_t *p;
> +	int i;
> +	for (i = 1; i < sw->num_ports; i++) {
> +		p = osm_node_get_physp_ptr(sw->p_node, i);
> +		if (p && p->p_remote_physp && !p->p_remote_physp->p_node->sw) {
> +			port = osm_get_port_by_guid(m->p_subn,
> +						    p->p_remote_physp->port_guid);
> +			cl_qlist_insert_tail(&m->port_order_list,
> +					     &port->list_item);
> +			port->flag = 1;
> +		}
> +	}
> +}
> +
> +static int sw_count_endport_links(osm_switch_t * const *s)

isn't it better calculate this count only once before sort_ports_by_switch_load()

> +{
> +	const osm_switch_t *sw = *s;
> +	int i, n = 0;
> +	for (i = 1; i < sw->num_ports; i++) {
> +		osm_physp_t *p = osm_node_get_physp_ptr(sw->p_node, i);
> +		if (p && p->p_remote_physp && !p->p_remote_physp->p_node->sw &&
> +		    ib_port_info_get_port_state(&p->port_info) ==
> +		    IB_LINK_ACTIVE)
> +			n++;
> +	}
> +	return n;
> +}
> +
> +static int compar_sw_load(const void *s1, const void *s2)
> +{
> +	return sw_count_endport_links(s2) - sw_count_endport_links(s1);
> +}
> +
> +static void sort_ports_by_switch_load(osm_ucast_mgr_t *m)
> +{
> +	int i, num = cl_qmap_count(&m->p_subn->sw_guid_tbl);
> +	osm_switch_t **s = malloc(num * sizeof(*s));
> +	if (!s) {
> +		OSM_LOG(m->p_log, OSM_LOG_ERROR, "ERR: "
> +			"No memory, skip by switch load sorting.\n");
> +		return;
> +	}
> +	s[0] = (osm_switch_t *)cl_qmap_head(&m->p_subn->sw_guid_tbl);
> +	for (i = 1; i < num; i++)
> +		s[i] = (osm_switch_t *)cl_qmap_next(&s[i-1]->map_item);
> +
> +	qsort(s, num, sizeof(*s), compar_sw_load);
> +
> +	for (i = 0; i < num; i++)
> +		add_sw_endports_to_order_list(s[i], m);
> +}
> +
>  static int ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
>  {
>  	cl_qlist_init(&p_mgr->port_order_list);
> @@ -758,7 +813,8 @@ static int ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
>  			OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR : "
>  				"cannot parse guid routing order file \'%s\'\n",
>  				p_mgr->p_subn->opt.guid_routing_order_file);
> -	}
> +	} else
> +		sort_ports_by_switch_load(p_mgr);
>  
>  	if (p_mgr->p_subn->opt.port_prof_ignore_file) {
>  		cl_qmap_apply_func(&p_mgr->p_subn->sw_guid_tbl,




More information about the general mailing list