[ofa-general] Re: [OpenSM] [PATCH 3/3] implement port_offsetting option
Al Chu
chu11 at llnl.gov
Tue Jun 3 09:48:50 PDT 2008
Hey Sasha,
> Maybe to move remote_dests allocation to be here and so minimize needed
> 'goto Exit' changes?
Good idea.
> Should there be sizeof(osm_switch_guid_count_t)?
Good catch. At first glance, I was concerned that would have affected
the routing. But since I'm memcmp-ing pointers to identical locations,
the incorrect 'sizeof' wouldn't have had an affect.
Thanks. I'll submit a follow up patch.
Al
On Tue, 2008-06-03 at 04:20 +0300, Sasha Khapyorsky wrote:
> Hi Al,
>
> Sorry about huge delay. Some questions are below.
>
> On 14:12 Thu 10 Apr , Al Chu wrote:
> > This is the primary patch that fiddles with the path recommendation
> > code. A few notes:
> >
> > 1) b/c I want to keep track of how many remote destinations there can
> > be, the 'remote_guids' array now stores all remote destinations, not
> > just the ones we have already forwarded to.
> >
> > 2) b/c I may need to free memory, I now "goto Exit" instead of just
> > calling 'return' many times.
> >
> > 3) Although the option is called 'port_offsetting', I actually "offset"
> > both the remote destination I send to and the port pointing towards that
> > remote destination.
> >
> > Al
> >
> > --
> > Albert Chu
> > chu11 at llnl.gov
> > 925-422-5311
> > Computer Scientist
> > High Performance Systems Division
> > Lawrence Livermore National Laboratory
>
> > From 57eb4d9bf55fbbbf39dc1c7ddfeeb2cae4776ef0 Mon Sep 17 00:00:00 2001
> > From: Albert L. Chu <chu11 at llnl.gov>
> > Date: Thu, 20 Mar 2008 16:23:13 -0700
> > Subject: [PATCH] implement port_offsetting
> >
> >
> > Signed-off-by: Albert L. Chu <chu11 at llnl.gov>
> > ---
> > opensm/include/opensm/osm_switch.h | 7 +-
> > opensm/opensm/osm_dump.c | 3 +-
> > opensm/opensm/osm_switch.c | 246 ++++++++++++++++++++++++++++++++----
> > opensm/opensm/osm_ucast_mgr.c | 14 ++-
> > 4 files changed, 239 insertions(+), 31 deletions(-)
> >
> > diff --git a/opensm/include/opensm/osm_switch.h b/opensm/include/opensm/osm_switch.h
> > index 2624d5f..45f4718 100644
> > --- a/opensm/include/opensm/osm_switch.h
> > +++ b/opensm/include/opensm/osm_switch.h
> > @@ -997,7 +997,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> > IN const boolean_t dor,
> > IN OUT osm_switch_guid_count_t * remote_guids,
> > IN OUT uint16_t * p_num_remote_guids,
> > - IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used);
> > + IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used,
> > + IN uint16_t port_offsetting_lids_per_port);
> > /*
> > * PARAMETERS
> > * p_sw
> > @@ -1031,6 +1032,10 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> > * [in out] The specific osm_switch_guid_count_t used
> > * in switch recommendations.
> > *
> > +* port_offsetting_lids_per_port
> > +* [in] If > 0, indicates lids_per_port to use with
> > +* port_offsetting option.
> > +*
> > * RETURN VALUE
> > * Returns the recommended port on which to route this LID.
> > *
> > diff --git a/opensm/opensm/osm_dump.c b/opensm/opensm/osm_dump.c
> > index 2bac75a..37842ac 100644
> > --- a/opensm/opensm/osm_dump.c
> > +++ b/opensm/opensm/osm_dump.c
> > @@ -219,7 +219,8 @@ static void dump_ucast_routes(cl_map_item_t *p_map_item, FILE *file, void *cxt)
> > /* No LMC Optimization */
> > best_port = osm_switch_recommend_path(p_sw, p_port,
> > lid_ho, TRUE, dor,
> > - NULL, NULL, NULL);
> > + NULL, NULL,
> > + NULL, 0);
> > fprintf(file, "No %u hop path possible via port %u!",
> > best_hops, best_port);
> > }
> > diff --git a/opensm/opensm/osm_switch.c b/opensm/opensm/osm_switch.c
> > index f346b25..8e3342b 100644
> > --- a/opensm/opensm/osm_switch.c
> > +++ b/opensm/opensm/osm_switch.c
> > @@ -55,6 +55,15 @@
> > #include <iba/ib_types.h>
> > #include <opensm/osm_switch.h>
> >
> > +/* Local structs */
> > +struct osm_switch_remote_dest {
> > + uint32_t total_paths;
> > + uint32_t min_paths;
> > + uint8_t ports[256]; /* 256 b/c max_ports is a uint8_t */
> > + unsigned int ports_count;
> > + osm_switch_guid_count_t *p_remote_guid;
> > +};
> > +
> > /**********************************************************************
> > **********************************************************************/
> > cl_status_t
> > @@ -304,9 +313,32 @@ osm_switch_find_guid_count(IN const osm_switch_t * const p_sw,
> > 1);
> > }
> >
> > +/**********************************************************************
> > + **********************************************************************/
> > +
> > +/* greatest common divisor */
> > +static unsigned int
> > +_gcd(unsigned int a, unsigned int b)
> > +{
> > + unsigned int t;
> > + while (b != 0) {
> > + t = b;
> > + b = a % b;
> > + a = t;
> > + }
> > + return a;
> > +}
> > +
> > +/* least common multiple */
> > +static unsigned int
> > +_lcm(unsigned int a, unsigned int b)
> > +{
> > + return ((a*b) / _gcd(a,b));
> > +}
> >
> > /**********************************************************************
> > **********************************************************************/
> > +
> > uint8_t
> > osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> > IN osm_port_t * p_port,
> > @@ -315,7 +347,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> > IN const boolean_t dor,
> > IN OUT osm_switch_guid_count_t * remote_guids,
> > IN OUT uint16_t * p_num_remote_guids,
> > - IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used)
> > + IN OUT osm_switch_guid_count_t ** p_remote_guid_count_used,
> > + IN uint16_t port_offsetting_lids_per_port)
> > {
> > /*
> > We support an enhanced LMC aware routing mode:
> > @@ -356,6 +389,20 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> > osm_node_t *p_rem_node;
> > osm_node_t *p_rem_node_first = NULL;
> > osm_switch_guid_count_t *p_remote_guid = NULL;
> > + /*
> > + These vars track information for port offsetting.
> > + */
> > + boolean_t port_offsetting = remote_guids && p_num_remote_guids
> > + && p_remote_guid_count_used && port_offsetting_lids_per_port;
> > + struct osm_switch_remote_dest * remote_dests = NULL;
> > + struct osm_switch_remote_dest * p_remote_dest = NULL;
> > + uint32_t num_remote_dests = 0;
> > + uint32_t total_paths_count = 0;
> > + uint32_t num_potential_ports = 0;
> > + uint32_t lcm = 0;
> > + uint32_t indx = 0;
> > + boolean_t dest_found = FALSE;
> > + unsigned int i;
> >
> > CL_ASSERT(lid_ho > 0);
> >
> > @@ -378,9 +425,22 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> >
> > num_ports = p_sw->num_ports;
> >
> > + if (port_offsetting) {
> > + remote_dests = malloc(sizeof(struct osm_switch_remote_dest) * num_ports);
> > + if (remote_dests == NULL) {
> > + osm_log(p_sw->p_log, OSM_LOG_ERROR,
> > + "osm_switch_recommend_path: "
> > + "Cannot allocate array. Insufficient memory: "
> > + "Disabling port_offsetting\n");
> > + port_offsetting = 0;
> > + }
> > + }
> > +
> > least_hops = osm_switch_get_least_hops(p_sw, base_lid);
> > - if (least_hops == OSM_NO_PATH)
> > - return (OSM_NO_PATH);
> > + if (least_hops == OSM_NO_PATH) {
> > + best_port = OSM_NO_PATH;
> > + goto Exit;
> > + }
> >
> > /*
> > First, inquire with the forwarding table for an existing
> > @@ -417,8 +477,10 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> > in the forwarding tables that he wants to be overridden by the
> > minimum hop function.
> > */
> > - if (hops == least_hops)
> > - return (port_num);
> > + if (hops == least_hops) {
> > + best_port = port_num;
> > + goto Exit;
> > + }
> > }
> > }
> > }
>
> Maybe to move remote_dests allocation to be here and so minimize needed
> 'goto Exit' changes?
>
> > @@ -475,7 +537,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> > port_num);
> >
> > /* If not update the least hops for this case */
> > - if (!p_remote_guid) {
> > + if (!p_remote_guid
> > + || !p_remote_guid->forwarded_to) {
> > if (check_count < least_paths_other_sys) {
> > least_paths_other_sys = check_count;
> > best_port_other_sys = port_num;
> > @@ -489,7 +552,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> > port_num);
> >
> > /* If not update the least hops for this case */
> > - if (!p_remote_guid
> > + if ((!p_remote_guid
> > + || !p_remote_guid->forwarded_to)
> > && check_count < least_paths_other_nodes) {
> > least_paths_other_nodes = check_count;
> > best_port_other_node = port_num;
> > @@ -498,6 +562,50 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> > /* else prior sys and node guid already used */
> >
> > } /* same sys found */
> > +
> > + /* Store the new sys/node guid that we haven't seen yet */
> > + if (!p_remote_guid) {
> > + p_rem_physp = osm_physp_get_remote(p_physp);
> > + p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
> > + memcpy(&(remote_guids[*p_num_remote_guids].sys_guid),
> > + &(p_rem_node->node_info.sys_guid),
> > + sizeof(uint64_t));
> > + memcpy(&(remote_guids[*p_num_remote_guids].node_guid),
> > + &(p_rem_node->node_info.node_guid),
> > + sizeof(uint64_t));
> > + remote_guids[*p_num_remote_guids].forwarded_to = 0;
> > + p_remote_guid = &remote_guids[*p_num_remote_guids];
> > + (*p_num_remote_guids)++;
> > + }
> > + }
> > +
> > + if (port_offsetting) {
> > + /* Keep track of the destinations we've seen so far */
> > + p_remote_dest = NULL;
> > + for (i = 0; i < num_remote_dests; i++) {
> > + if (!memcmp(p_remote_guid,
> > + (&remote_dests[i])->p_remote_guid,
> > + sizeof(struct osm_switch_remote_dest))) {
>
> Should there be sizeof(osm_switch_guid_count_t)?
>
> Sasha
>
> > + p_remote_dest = &remote_dests[i];
> > + break;
> > + }
> > + }
> > + if (!p_remote_dest) {
> > + p_remote_dest = &remote_dests[num_remote_dests];
> > + p_remote_dest->p_remote_guid = p_remote_guid;
> > + p_remote_dest->total_paths = 0;
> > + p_remote_dest->min_paths = 0xFFFFFFFF;
> > + p_remote_dest->ports_count = 0;
> > + num_remote_dests++;
> > + }
> > + p_remote_dest->total_paths += check_count;
> > + if (check_count < p_remote_dest->min_paths)
> > + p_remote_dest->min_paths = check_count;
> > + p_remote_dest->ports[p_remote_dest->ports_count] = port_num;
> > + p_remote_dest->ports_count++;
> > +
> > + total_paths_count += check_count;
> > + num_potential_ports++;
> > }
> >
> > /* routing for LMC mode */
> > @@ -523,21 +631,23 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> > best_port = port_num;
> > least_paths = check_count;
> > if (routing_for_lmc
> > - && p_remote_guid
> > + && p_remote_guid->forwarded_to
> > && p_remote_guid->forwarded_to < least_forwarded_to)
> > least_forwarded_to = p_remote_guid->forwarded_to;
> > }
> > else if (routing_for_lmc
> > - && p_remote_guid
> > && check_count == least_paths
> > + && p_remote_guid->forwarded_to
> > && p_remote_guid->forwarded_to < least_forwarded_to) {
> > least_forwarded_to = p_remote_guid->forwarded_to;
> > best_port = port_num;
> > }
> > }
> >
> > - if (port_found == FALSE)
> > - return (OSM_NO_PATH);
> > + if (port_found == FALSE) {
> > + best_port = OSM_NO_PATH;
> > + goto Exit;
> > + }
> >
> > /*
> > if we are in enhanced routing mode and the best port is not
> > @@ -555,24 +665,110 @@ osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
> > remote_guids,
> > p_num_remote_guids,
> > best_port);
> > + /* Must be stored */
> > + CL_ASSERT(p_remote_guid);
> > + *p_remote_guid_count_used = p_remote_guid;
> > + }
> >
> > - if (!p_remote_guid) {
> > - /* track the remote node and system of the port used. */
> > - p_physp = osm_node_get_physp_ptr(p_sw->p_node, best_port);
> > - p_rem_physp = osm_physp_get_remote(p_physp);
> > - p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
> > - memcpy(&(remote_guids[*p_num_remote_guids].sys_guid),
> > - &(p_rem_node->node_info.sys_guid),
> > - sizeof(uint64_t));
> > - memcpy(&(remote_guids[*p_num_remote_guids].node_guid),
> > - &(p_rem_node->node_info.node_guid),
> > - sizeof(uint64_t));
> > - remote_guids[*p_num_remote_guids].forwarded_to = 0;
> > - (*p_num_remote_guids)++;
> > + /*
> > + * As an example of what we're trying to do with port
> > + * offsetting, assume LMC = 2 and we are trying to route
> > + * the lids of 4 ports. The lids of these 4 ports are:
> > + *
> > + * (1,2,3,4)
> > + * (5,6,7,8)
> > + * (9,10,11,12)
> > + * (13,14,15,16)
> > + *
> > + * Suppose forwarding to all these lids goes through
> > + * 4 specific switch ports. If we just cycle through
> > + * ports and lids in a normal iterative fashion, we would
> > + * normally forward out ports in this manner.
> > + *
> > + * switch port 1: 1, 5, 9, 13
> > + * switch port 2: 2, 6, 10, 14
> > + * switch port 3: 3, 7, 11, 15
> > + * switch port 4: 4, 8, 12, 1
> > + *
> > + * Note that the base lid of each port (lids 1, 5, 9, 13)
> > + * are all routed out of switch port 1. Thus, if the user
> > + * only uses the base lid of each port, they will get pretty
> > + * bad performance. We will try to get this layout instead.
> > + *
> > + * switch port 1: 1, 8, 11, 14
> > + * switch port 2: 2, 5, 12, 15
> > + * switch port 3: 3, 6, 9, 16
> > + * switch port 4: 4, 7, 10, 13
> > + *
> > + * where switch ports are distributed in a more even manner.
> > + * The base lid of each port is now distributed evenly
> > + * across all 4 switch ports. The remaining lids are still
> > + * distributed evenly across all the remaining switch ports.
> > + *
> > + * In order to accomplish this, we (effectively) will iterate
> > + * through all ports like before, but instead of iterating from
> > + * 0 to N-1 all the time, we will select the starting index
> > + * based on the number of paths we have routed thus far.
> > + */
> > +
> > + /* We will not do port offsetting if num_potential_ports == 1
> > + * b/c there is no offsetting that can be done.
> > + */
> > + if (port_offsetting
> > + && best_port
> > + && num_potential_ports > 1) {
> > + /* Select which destination we want to forward to with our
> > + * offsetting loop.
> > + */
> > + lcm = _lcm(port_offsetting_lids_per_port,
> > + num_remote_dests);
> > + indx = (total_paths_count / lcm) % num_remote_dests;
> > + for (i = 0; i < num_remote_dests; i++) {
> > + p_remote_dest = &remote_dests[indx];
> > + p_remote_guid = p_remote_dest->p_remote_guid;
> > +
> > + if (p_remote_guid->forwarded_to == least_forwarded_to
> > + && p_remote_dest->min_paths == least_paths) {
> > + dest_found = TRUE;
> > + break;
> > + }
> > +
> > + indx++;
> > + if (indx >= num_remote_dests)
> > + indx = 0;
> > + }
> > +
> > + /* Then we "offset" within the potential ports we could
> > + * forward out of for this specific destination.
> > + */
> > + if (dest_found && p_remote_dest) {
> > + lcm = _lcm(port_offsetting_lids_per_port,
> > + p_remote_dest->ports_count);
> > + indx = (p_remote_dest->total_paths / lcm) % p_remote_dest->ports_count;
> > + for (i = 0; i < p_remote_dest->ports_count; i++) {
> > + port_num = p_remote_dest->ports[indx];
> > + check_count =
> > + osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);
> > + if (check_count == least_paths) {
> > + if (best_port != port_num)
> > + osm_log(p_sw->p_log, OSM_LOG_DEBUG,
> > + "osm_switch_recommend_path: "
> > + "best port offsetted: %d to %d\n",
> > + best_port, port_num);
> > + best_port = port_num;
> > + *p_remote_guid_count_used = p_remote_dest->p_remote_guid;
> > + break;
> > + }
> > + indx++;
> > + if (indx >= p_remote_dest->ports_count)
> > + indx = 0;
> > + }
> > }
> > - *p_remote_guid_count_used = p_remote_guid;
> > }
> >
> > +Exit:
> > + if (remote_dests)
> > + free(remote_dests);
> > return (best_port);
> > }
> >
> > diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c
> > index 938db84..501c2c7 100644
> > --- a/opensm/opensm/osm_ucast_mgr.c
> > +++ b/opensm/opensm/osm_ucast_mgr.c
> > @@ -212,18 +212,23 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
> > osm_switch_guid_count_t *remote_guids = NULL;
> > uint16_t num_used_guids = 0;
> > osm_switch_guid_count_t *p_remote_guid_used = NULL;
> > + uint16_t port_offsetting_lids_per_port = 0;
> >
> > OSM_LOG_ENTER(p_mgr->p_log);
> >
> > if (lids_per_port > 1) {
> > - remote_guids = malloc(sizeof(osm_switch_guid_count_t) * lids_per_port);
> > + uint8_t num_ports = p_sw->num_ports;
> > + remote_guids = malloc(sizeof(osm_switch_guid_count_t) * num_ports);
> > if (remote_guids == NULL) {
> > osm_log(p_mgr->p_log, OSM_LOG_ERROR,
> > "__osm_ucast_mgr_process_port: ERR 3A09: "
> > "Cannot allocate array. Insufficient memory\n");
> > goto Exit;
> > }
> > - memset(remote_guids, 0, sizeof(osm_switch_guid_count_t) * lids_per_port);
> > + memset(remote_guids, 0, sizeof(osm_switch_guid_count_t) * num_ports);
> > +
> > + if (p_mgr->p_subn->opt.port_offsetting)
> > + port_offsetting_lids_per_port = lids_per_port;
> > }
> >
> > osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
> > @@ -270,14 +275,15 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const p_mgr,
> > p_mgr->is_dor,
> > remote_guids,
> > &num_used_guids,
> > - &p_remote_guid_used);
> > + &p_remote_guid_used,
> > + port_offsetting_lids_per_port);
> > }
> > else
> > port = osm_switch_recommend_path(p_sw, p_port, lid_ho,
> > p_mgr->p_subn->
> > ignore_existing_lfts,
> > p_mgr->is_dor,
> > - NULL, NULL, NULL);
> > + NULL, NULL, NULL, 0);
> >
> > /*
> > There might be no path to the target
> > --
> > 1.5.1
> >
>
--
Albert Chu
chu11 at llnl.gov
925-422-5311
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory
More information about the general
mailing list