[ofa-general] Re: [PATCH 1/4] opensm/osm_ucast_cache.{c, h}: ucast routing cache implementation
Sasha Khapyorsky
sashak at voltaire.com
Sun Jun 29 14:43:45 PDT 2008
Hi Yevgeny,
Sorry about huge delay with looking at this.
On 12:59 Sun 04 May , Yevgeny Kliteynik wrote:
> Unicast routing cache implementation.
>
> Unicast routing cache comprises the following:
> - Topology: a data structure with all the switches and CAs of the fabric
> - LFTs: each switch has an LFT cached
> - Lid matrices: each switch has lid matrices cached, which is needed for
> multicast routing (which is not cached).
>
> There is also a topology matching function that compares the current topology
> with the cached one to find out whether the cache is usable (valid) or not.
As I wrote in another email, I believe that saving needless full-reroutig
cycle is a good idea and very needed for OpenSM, but I don't like this
implementation.
In order to understand it better I was need to pass over code, some
comments may be useful. It is below.
Sasha
> Signed-off-by: Yevgeny Kliteynik <kliteyn at dev.mellanox.co.il>
> ---
> opensm/include/opensm/osm_ucast_cache.h | 319 ++++++++
> opensm/opensm/osm_ucast_cache.c | 1197 +++++++++++++++++++++++++++++++
> 2 files changed, 1516 insertions(+), 0 deletions(-)
> create mode 100644 opensm/include/opensm/osm_ucast_cache.h
> create mode 100644 opensm/opensm/osm_ucast_cache.c
>
> diff --git a/opensm/include/opensm/osm_ucast_cache.h b/opensm/include/opensm/osm_ucast_cache.h
> new file mode 100644
> index 0000000..a3b40f9
> --- /dev/null
> +++ b/opensm/include/opensm/osm_ucast_cache.h
> @@ -0,0 +1,319 @@
> +/*
> + * Copyright (c) 2002-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
> + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses. You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + * Redistribution and use in source and binary forms, with or
> + * without modification, are permitted provided that the following
> + * conditions are met:
> + *
> + * - Redistributions of source code must retain the above
> + * copyright notice, this list of conditions and the following
> + * disclaimer.
> + *
> + * - Redistributions in binary form must reproduce the above
> + * copyright notice, this list of conditions and the following
> + * disclaimer in the documentation and/or other materials
> + * provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + */
> +
> +/*
> + * Abstract:
> + * Declaration of osm_ucast_cache_t.
> + * This object represents the Unicast Cache object.
> + *
> + * Environment:
> + * Linux User Mode
> + *
> + * $Revision: 1.4 $
> + */
> +
> +#ifndef _OSM_UCAST_CACHE_H_
> +#define _OSM_UCAST_CACHE_H_
> +
> +#ifdef __cplusplus
> +# define BEGIN_C_DECLS extern "C" {
> +# define END_C_DECLS }
> +#else /* !__cplusplus */
> +# define BEGIN_C_DECLS
> +# define END_C_DECLS
> +#endif /* __cplusplus */
> +
> +BEGIN_C_DECLS
> +
> +struct _osm_ucast_mgr;
> +
> +#define UCAST_CACHE_TOPOLOGY_MATCH 0x0000
> +#define UCAST_CACHE_TOPOLOGY_LESS_SWITCHES 0x0001
> +#define UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING 0x0002
> +#define UCAST_CACHE_TOPOLOGY_LINK_TO_CA_MISSING 0x0004
> +#define UCAST_CACHE_TOPOLOGY_MORE_SWITCHES 0x0008
> +#define UCAST_CACHE_TOPOLOGY_NEW_LID 0x0010
> +#define UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING 0x0020
> +#define UCAST_CACHE_TOPOLOGY_LINK_ADDED 0x0040
> +#define UCAST_CACHE_TOPOLOGY_NEW_SWITCH 0x0080
> +#define UCAST_CACHE_TOPOLOGY_NEW_CA 0x0100
> +#define UCAST_CACHE_TOPOLOGY_NO_MATCH 0x0200
> +
> +/****h* OpenSM/Unicast Manager/Unicast Cache
> +* NAME
> +* Unicast Cache
> +*
> +* DESCRIPTION
> +* The Unicast Cache object encapsulates the information
> +* needed to cache and write unicast routing of the subnet.
> +*
> +* The Unicast Cache object is NOT thread safe.
> +*
> +* This object should be treated as opaque and should be
> +* manipulated only through the provided functions.
> +*
> +* AUTHOR
> +* Yevgeny Kliteynik, Mellanox
> +*
> +*********/
> +
> +
> +/****s* OpenSM: Unicast Cache/osm_ucast_cache_t
> +* NAME
> +* osm_ucast_cache_t
> +*
> +* DESCRIPTION
> +* Unicast Cache structure.
> +*
> +* This object should be treated as opaque and should
> +* be manipulated only through the provided functions.
> +*
> +* SYNOPSIS
> +*/
> +typedef struct osm_ucast_cache_t_ {
> + struct _osm_ucast_mgr * p_ucast_mgr;
> + cl_qmap_t sw_tbl;
> + cl_qmap_t ca_tbl;
> + boolean_t topology_valid;
> + boolean_t routing_valid;
> + boolean_t need_update;
> +} osm_ucast_cache_t;
> +/*
> +* FIELDS
> +* p_ucast_mgr
> +* Pointer to the Unicast Manager for this subnet.
> +*
> +* sw_tbl
> +* Cached switches table.
> +*
> +* ca_tbl
> +* Cached CAs table.
> +*
> +* topology_valid
> +* TRUE if the cache is populated with the fabric topology.
> +*
> +* routing_valid
> +* TRUE if the cache is populated with the unicast routing
> +* in addition to the topology.
> +*
> +* need_update
> +* TRUE if the cached routing needs to be updated.
> +*
> +* SEE ALSO
> +* Unicast Manager object
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_construct
> +* NAME
> +* osm_ucast_cache_construct
> +*
> +* DESCRIPTION
> +* This function constructs a Unicast Cache object.
> +*
> +* SYNOPSIS
> +*/
> +osm_ucast_cache_t *
> +osm_ucast_cache_construct(struct _osm_ucast_mgr * const p_mgr);
> +/*
> +* PARAMETERS
> +* p_mgr
> +* [in] Pointer to a Unicast Manager object.
> +*
> +* RETURN VALUE
> +* This function return the created Ucast Cache object on success,
> +* or NULL on any error.
> +*
> +* NOTES
> +* Allows osm_ucast_cache_destroy
> +*
> +* Calling osm_ucast_mgr_construct is a prerequisite to
> +* calling any other method.
> +*
> +* SEE ALSO
> +* Unicast Cache object, osm_ucast_cache_destroy
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_destroy
> +* NAME
> +* osm_ucast_cache_destroy
> +*
> +* DESCRIPTION
> +* The osm_ucast_cache_destroy function destroys the object,
> +* releasing all resources.
> +*
> +* SYNOPSIS
> +*/
> +void osm_ucast_cache_destroy(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +* p_cache
> +* [in] Pointer to the object to destroy.
> +*
> +* RETURN VALUE
> +* This function does not return any value.
> +*
> +* NOTES
> +* Performs any necessary cleanup of the specified
> +* Unicast Cache object.
> +* Further operations should not be attempted on the
> +* destroyed object.
> +* This function should only be called after a call to
> +* osm_ucast_cache_construct.
> +*
> +* SEE ALSO
> +* Unicast Cache object, osm_ucast_cache_construct
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_refresh_topo
> +* NAME
> +* osm_ucast_cache_refresh_topo
> +*
> +* DESCRIPTION
> +* The osm_ucast_cache_refresh_topo function re-reads the
> +* updated topology.
> +*
> +* SYNOPSIS
> +*/
> +void osm_ucast_cache_refresh_topo(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +* p_cache
> +* [in] Pointer to the cache object to refresh.
> +*
> +* RETURN VALUE
> +* This function does not return any value.
> +*
> +* NOTES
> +* This function invalidates the existing unicast cache
> +* and re-reads the updated topology.
> +*
> +* SEE ALSO
> +* Unicast Cache object, osm_ucast_cache_construct
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_refresh_lid_matrices
> +* NAME
> +* osm_ucast_cache_refresh_lid_matrices
> +*
> +* DESCRIPTION
> +* The osm_ucast_cache_refresh_topo function re-reads the
> +* updated lid matrices.
> +*
> +* SYNOPSIS
> +*/
> +void osm_ucast_cache_refresh_lid_matrices(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +* p_cache
> +* [in] Pointer to the cache object to refresh.
> +*
> +* RETURN VALUE
> +* This function does not return any value.
> +*
> +* NOTES
> +* This function re-reads the updated lid matrices.
> +*
> +* SEE ALSO
> +* Unicast Cache object, osm_ucast_cache_construct
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_apply
> +* NAME
> +* osm_ucast_cache_apply
> +*
> +* DESCRIPTION
> +* The osm_ucast_cache_apply function tries to apply
> +* the cached unicast routing on the subnet switches.
> +*
> +* SYNOPSIS
> +*/
> +int osm_ucast_cache_apply(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +* p_cache
> +* [in] Pointer to the cache object to be used.
> +*
> +* RETURN VALUE
> +* 0 if unicast cache was successfully written to switches,
> +* non-zero for any error.
> +*
> +* NOTES
> +* Compares the current topology to the cached topology,
> +* and if the topology matches, or if changes in topology
> +* have no impact on routing tables, writes the cached
> +* unicast routing to the subnet switches.
> +*
> +* SEE ALSO
> +* Unicast Cache object
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_set_sw_fwd_table
> +* NAME
> +* osm_ucast_cache_set_sw_fwd_table
> +*
> +* DESCRIPTION
> +* The osm_ucast_cache_set_sw_fwd_table function sets
> +* (caches) linear forwarding table for the specified
> +* switch.
> +*
> +* SYNOPSIS
> +*/
> +void
> +osm_ucast_cache_set_sw_fwd_table(osm_ucast_cache_t * p_cache,
> + uint8_t * ucast_mgr_lft_buf,
> + osm_switch_t * p_osm_sw);
> +/*
> +* PARAMETERS
> +* p_cache
> +* [in] Pointer to the cache object to be used.
> +*
> +* ucast_mgr_lft_buf
> +* [in] LFT to set.
> +*
> +* p_osm_sw
> +* [in] pointer to the switch that the LFT refers to.
> +*
> +* RETURN VALUE
> +* This function does not return any value.
> +*
> +* NOTES
> +*
> +* SEE ALSO
> +* Unicast Cache object
> +*********/
> +
> +END_C_DECLS
> +#endif /* _OSM_UCAST_MGR_H_ */
> +
> diff --git a/opensm/opensm/osm_ucast_cache.c b/opensm/opensm/osm_ucast_cache.c
> new file mode 100644
> index 0000000..4ad7c30
> --- /dev/null
> +++ b/opensm/opensm/osm_ucast_cache.c
> @@ -0,0 +1,1197 @@
> +/*
> + * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
> + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses. You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + * Redistribution and use in source and binary forms, with or
> + * without modification, are permitted provided that the following
> + * conditions are met:
> + *
> + * - Redistributions of source code must retain the above
> + * copyright notice, this list of conditions and the following
> + * disclaimer.
> + *
> + * - Redistributions in binary form must reproduce the above
> + * copyright notice, this list of conditions and the following
> + * disclaimer in the documentation and/or other materials
> + * provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + */
> +
> +/*
> + * Abstract:
> + * Implementation of OpenSM Cached routing
> + *
> + * Environment:
> + * Linux User Mode
> + *
> + */
> +
> +#if HAVE_CONFIG_H
> +# include <config.h>
> +#endif
> +
> +#include <stdlib.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <errno.h>
> +#include <iba/ib_types.h>
> +#include <complib/cl_qmap.h>
> +#include <complib/cl_pool.h>
> +#include <complib/cl_debug.h>
> +#include <opensm/osm_opensm.h>
> +#include <opensm/osm_ucast_mgr.h>
> +#include <opensm/osm_ucast_cache.h>
> +#include <opensm/osm_switch.h>
> +#include <opensm/osm_node.h>
> +#include <opensm/osm_port.h>
> +
> +struct cache_sw_t_;
> +struct cache_ca_t_;
> +struct cache_port_t_;
> +
> +typedef union cache_sw_or_ca_ {
> + struct cache_sw_t_ * p_sw;
> + struct cache_ca_t_ * p_ca;
> +} cache_node_t;
> +
> +typedef struct cache_port_t_ {
> + uint8_t remote_node_type;
> + cache_node_t remote_node;
> +} cache_port_t;
> +
> +typedef struct cache_ca_t_ {
> + cl_map_item_t map_item;
> + uint16_t lid_ho;
> +} cache_ca_t;
> +
> +typedef struct cache_sw_t_ {
> + cl_map_item_t map_item;
> + uint16_t lid_ho;
> + uint16_t max_lid_ho;
> + osm_switch_t *p_osm_sw; /* pointer to the updated switch object */
> + uint8_t num_ports;
> + cache_port_t ** ports;
> + uint8_t **lid_matrix;
> + uint8_t * lft_buff;
> + boolean_t is_leaf;
Please use tabs for indentation.
> +} cache_sw_t;
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static osm_switch_t *
> +__ucast_cache_get_starting_osm_sw(osm_ucast_cache_t * p_cache)
> +{
> + osm_port_t * p_osm_port;
> + osm_node_t * p_osm_node;
> + osm_physp_t * p_osm_physp;
> +
> + CL_ASSERT(p_cache->p_ucast_mgr);
> +
> + /* find the OSM node */
> + p_osm_port = osm_get_port_by_guid(
> + p_cache->p_ucast_mgr->p_subn,
> + p_cache->p_ucast_mgr->p_subn->sm_port_guid);
> + CL_ASSERT(p_osm_port);
> +
> + p_osm_node = p_osm_port->p_node;
> + switch (osm_node_get_type(p_osm_node)) {
> + case IB_NODE_TYPE_SWITCH:
> + /* OpenSM runs on switch - we're done */
> + return p_osm_node->sw;
> +
> + case IB_NODE_TYPE_CA:
> + /* SM runs on CA - get the switch
> + that CA is connected to. */
> + p_osm_physp = p_osm_port->p_physp;
> + p_osm_physp = osm_physp_get_remote(p_osm_physp);
> + p_osm_node = osm_physp_get_node_ptr(p_osm_physp);
> + CL_ASSERT(p_osm_node);
> + return p_osm_node->sw;
> +
> + default:
> + /* SM runs on some other node - not supported */
> + return NULL;
> + }
> +} /* __ucast_cache_get_starting_osm_sw() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_sw_t *
> +__ucast_cache_get_sw(osm_ucast_cache_t * p_cache,
> + uint16_t lid_ho)
> +{
> + cache_sw_t * p_sw;
> +
> + p_sw = (cache_sw_t *) cl_qmap_get(&p_cache->sw_tbl, lid_ho);
> + if (p_sw == (cache_sw_t *) cl_qmap_end(&p_cache->sw_tbl))
> + return NULL;
> +
> + return p_sw;
> +} /* __ucast_cache_get_sw() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_ca_t *
> +__ucast_cache_get_ca(osm_ucast_cache_t * p_cache,
> + uint16_t lid_ho)
> +{
> + cache_ca_t * p_ca;
> +
> + p_ca = (cache_ca_t *) cl_qmap_get(&p_cache->ca_tbl, lid_ho);
> + if (p_ca == (cache_ca_t *) cl_qmap_end(&p_cache->ca_tbl))
> + return NULL;
> +
> + return p_ca;
> +} /* __ucast_cache_get_ca() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_port_t *
> +__ucast_cache_add_port(osm_ucast_cache_t * p_cache,
> + uint8_t remote_node_type,
> + uint16_t lid_ho)
> +{
> + cache_port_t * p_port = (cache_port_t *) malloc(sizeof(cache_port_t));
> + memset(p_port, 0, sizeof(cache_port_t));
> +
> + p_port->remote_node_type = remote_node_type;
> + if (remote_node_type == IB_NODE_TYPE_SWITCH)
> + {
> + cache_sw_t * p_sw = __ucast_cache_get_sw(
> + p_cache, lid_ho);
> + CL_ASSERT(p_sw);
> + p_port->remote_node.p_sw = p_sw;
> + }
> + else {
> + cache_ca_t * p_ca = __ucast_cache_get_ca(
> + p_cache, lid_ho);
> + CL_ASSERT(p_ca);
> + p_port->remote_node.p_ca = p_ca;
> + }
> +
> + return p_port;
> +} /* __ucast_cache_add_port() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_sw_t *
> +__ucast_cache_add_sw(osm_ucast_cache_t * p_cache,
> + osm_switch_t * p_osm_sw)
> +{
> + cache_sw_t *p_sw = (cache_sw_t*)malloc(sizeof(cache_sw_t));
> + memset(p_sw, 0, sizeof(cache_sw_t));
> +
> + p_sw->p_osm_sw = p_osm_sw;
> +
> + p_sw->lid_ho =
> + cl_ntoh16(osm_node_get_base_lid(p_osm_sw->p_node, 0));
> +
> + p_sw->num_ports = osm_node_get_num_physp(p_osm_sw->p_node);
> + p_sw->ports = (cache_port_t **)
> + malloc(p_sw->num_ports * sizeof(cache_port_t *));
> + memset(p_sw->ports, 0, p_sw->num_ports * sizeof(cache_port_t *));
> +
> + cl_qmap_insert(&p_cache->sw_tbl, p_sw->lid_ho, &p_sw->map_item);
> + return p_sw;
> +} /* __ucast_cache_add_sw() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_ca_t *
> +__ucast_cache_add_ca(osm_ucast_cache_t * p_cache,
> + uint16_t lid_ho)
> +{
> + cache_ca_t *p_ca = (cache_ca_t*)malloc(sizeof(cache_ca_t));
> + memset(p_ca, 0, sizeof(cache_ca_t));
> +
> + p_ca->lid_ho = lid_ho;
> +
> + cl_qmap_insert(&p_cache->ca_tbl, p_ca->lid_ho, &p_ca->map_item);
> + return p_ca;
> +} /* __ucast_cache_add_ca() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__cache_port_destroy(cache_port_t * p_port)
> +{
> + if (!p_port)
> + return;
> + free(p_port);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__cache_sw_destroy(cache_sw_t * p_sw)
> +{
> + int i;
> +
> + if (!p_sw)
> + return;
> +
> + if (p_sw->ports) {
> + for (i = 0; i < p_sw->num_ports; i++)
> + if (p_sw->ports[i])
> + __cache_port_destroy(p_sw->ports[i]);
> + free(p_sw->ports);
> + }
> +
> + if (p_sw->lid_matrix) {
> + for (i = 0; i <= p_sw->max_lid_ho; i++)
> + if (p_sw->lid_matrix[i])
> + free(p_sw->lid_matrix[i]);
> + free(p_sw->lid_matrix);
> + }
> +
> + if (p_sw->lft_buff)
> + free(p_sw->lft_buff);
> +
> + free(p_sw);
> +} /* __cache_sw_destroy() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__cache_ca_destroy(cache_ca_t * p_ca)
> +{
> + if (!p_ca)
> + return;
> + free(p_ca);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static int
> +__ucast_cache_populate(osm_ucast_cache_t * p_cache)
> +{
> + cl_list_t sw_bfs_list;
cl_list, cl_map, etc. (w/out 'q') are slow. It is really better to use
cl_q* version.
> + osm_switch_t * p_osm_sw;
> + osm_switch_t * p_remote_osm_sw;
Seems that those variables (and maybe others) are never used together.
Use just one is so.
> + osm_node_t * p_osm_node;
> + osm_node_t * p_remote_osm_node;
> + osm_physp_t * p_osm_physp;
> + osm_physp_t * p_remote_osm_physp;
> + cache_sw_t * p_sw;
> + cache_sw_t * p_remote_sw;
> + cache_ca_t * p_remote_ca;
> + uint16_t remote_lid_ho;
> + unsigned num_ports;
> + unsigned i;
> + int res = 0;
> + osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +
> + OSM_LOG_ENTER(p_log);
> +
> + cl_list_init(&sw_bfs_list, 10);
> +
> + /* Use management switch or switch that is connected
> + to management CA as a BFS scan starting point */
> +
> + p_osm_sw = __ucast_cache_get_starting_osm_sw(p_cache);
> + if (!p_osm_sw) {
> + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A51: "
> + "failed getting cache population starting point\n");
> + res = 1;
> + goto Exit;
> + }
> +
> + /* switch is cached BEFORE entering to the BFS list,
> + so we will know whether this switch was "visited" */
> +
> + p_sw = __ucast_cache_add_sw(p_cache, p_osm_sw);
> + cl_list_insert_tail(&sw_bfs_list, p_sw);
> +
> + /* Create cached switches in the BFS order.
> + This will ensure that the fabric scan is done each
> + time the same way and will allow accurate matching
> + between the current fabric and the cached one. */
Why BFSing is needed there? Would not it be simpler to run over
p_subn->sw_guid_tbl?
> + while (!cl_is_list_empty(&sw_bfs_list)) {
> + p_sw = (cache_sw_t *) cl_list_remove_head(&sw_bfs_list);
> + p_osm_sw = p_sw->p_osm_sw;
> + p_osm_node = p_osm_sw->p_node;
> + num_ports = osm_node_get_num_physp(p_osm_node);
> +
> + /* skipping port 0 on switches */
> + for (i = 1; i < num_ports; i++) {
> + p_osm_physp = osm_node_get_physp_ptr(p_osm_node, i);
> + if (!p_osm_physp ||
> + !osm_physp_is_valid(p_osm_physp) ||
> + !osm_link_is_healthy(p_osm_physp))
> + continue;
> +
> + p_remote_osm_physp = osm_physp_get_remote(p_osm_physp);
> + if (!p_remote_osm_physp ||
> + !osm_physp_is_valid(p_remote_osm_physp) ||
> + !osm_link_is_healthy(p_remote_osm_physp))
> + continue;
> +
> + p_remote_osm_node =
> + osm_physp_get_node_ptr(p_remote_osm_physp);
> + if (!p_remote_osm_node) {
> + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A52: "
> + "no node for remote port\n");
> + res = 1;
> + goto Exit;
> + }
> +
> + if (osm_node_get_type(p_remote_osm_node) ==
> + IB_NODE_TYPE_SWITCH) {
> +
> + remote_lid_ho = cl_ntoh16(
> + osm_node_get_base_lid(
> + p_remote_osm_node, 0));
> +
> + p_remote_osm_sw = p_remote_osm_node->sw;
> + CL_ASSERT(p_remote_osm_sw);
> +
> + p_remote_sw = __ucast_cache_get_sw(
> + p_cache,
> + remote_lid_ho);
> +
> + /* If the remote switch hasn't been
> + cached yet, add it to the cache
> + and insert it into the BFS list */
> +
> + if (!p_remote_sw) {
> + p_remote_sw = __ucast_cache_add_sw(
> + p_cache,
> + p_remote_osm_sw);
> + cl_list_insert_tail(&sw_bfs_list,
> + p_remote_sw);
> + }
> + }
> + else {
opensm/osn_indent will suggest about '} else {' style.
> + remote_lid_ho = cl_ntoh16(
> + osm_physp_get_base_lid(
> + p_remote_osm_physp));
> +
> + p_sw->is_leaf = TRUE;
> + p_remote_ca = __ucast_cache_add_ca(
> + p_cache, remote_lid_ho);
> +
> + /* no need to add this node to BFS list */
> + }
> +
> + /* cache this port */
> + p_sw->ports[i] = __ucast_cache_add_port(
> + p_cache,
> + osm_node_get_type(p_remote_osm_node),
> + remote_lid_ho);
> + }
> + }
> +
> + cl_list_destroy(&sw_bfs_list);
Tabs...
> + p_cache->topology_valid = TRUE;
> +
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "cache populated (%u SWs, %u CAs)\n",
> + cl_qmap_count(&p_cache->sw_tbl),
> + cl_qmap_count(&p_cache->ca_tbl));
> +
> + Exit:
> + OSM_LOG_EXIT(p_log);
> + return res;
> +} /* __ucast_cache_populate() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_read_sw_lid_matrix(cl_map_item_t * const p_map_item,
> + void *context)
> +{
> + cache_sw_t *p_sw = (cache_sw_t * const)p_map_item;
> + uint16_t target_lid_ho;
> + uint8_t port_num;
> +
> + if (!p_sw->p_osm_sw)
> + return;
> +
> + /* allocate lid matrices buffer:
> + lid_matrix[target_lids][port_nums] */
> + CL_ASSERT(!p_sw->lid_matrix);
> + p_sw->lid_matrix = (uint8_t **)
> + malloc((p_sw->max_lid_ho + 1) * sizeof(uint8_t*));
> +
> + for (target_lid_ho = 0;
> + target_lid_ho <= p_sw->max_lid_ho; target_lid_ho++){
> +
> + /* set hops for this target through every switch port */
> +
> + p_sw->lid_matrix[target_lid_ho] =
> + (uint8_t *)malloc(p_sw->num_ports);
> + memset(p_sw->lid_matrix[target_lid_ho],
> + OSM_NO_PATH, p_sw->num_ports);
> +
> + for (port_num = 1; port_num < p_sw->num_ports; port_num++)
> + p_sw->lid_matrix[target_lid_ho][port_num] =
> + osm_switch_get_hop_count(p_sw->p_osm_sw,
> + target_lid_ho,
> + port_num);
Original switches keep lid matrices for switches only, and not for CAs,
it was done to sleep LID matrix generation and to save a lot of memory.
> + }
> +} /* __ucast_cache_read_sw_lid_matrix() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_write_sw_routing(cl_map_item_t * const p_map_item,
> + void * context)
> +{
> + cache_sw_t *p_sw = (cache_sw_t * const)p_map_item;
> + osm_ucast_cache_t * p_cache = (osm_ucast_cache_t *) context;
> + uint8_t *ucast_mgr_lft_buf = p_cache->p_ucast_mgr->lft_buf;
> + uint16_t target_lid_ho;
> + uint8_t port_num;
> + uint8_t hops;
> + osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +
> + OSM_LOG_ENTER(p_log);
> +
> + if (!p_sw->p_osm_sw) {
> + /* some switches (leaf switches) may exist in the
> + cache, but not exist in the current topology */
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "cached switch 0x%04x doesn't exist in the fabric\n",
> + p_sw->lid_ho);
Now we are using decimal format for unicast LIDs representation.
Also what about to use OSM_LOG_DEBUG for debug purposes? This file has 30
OSM_LOG_VERBOSE message, osm_ucast_mgr.c - only 3.
> + goto Exit;
> + }
> +
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "writing routing for cached switch 0x%04x, "
> + "max_lid_ho = 0x%04x\n",
> + p_sw->lid_ho, p_sw->max_lid_ho);
> +
> + /* write cached LFT to this switch: clear existing
> + ucast mgr lft buffer, write the cached lft to the
> + ucast mgr buffer, and set this lft on switch */
> + CL_ASSERT(p_sw->lft_buff);
> + memset(ucast_mgr_lft_buf, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1);
Why memset()? Isn't it enough to memcpy() and max_lid_ho setup which you
do below anyway.
> + if (p_sw->max_lid_ho > 0)
> + memcpy(ucast_mgr_lft_buf, p_sw->lft_buff,
> + p_sw->max_lid_ho + 1);
> +
> + p_sw->p_osm_sw->max_lid_ho = p_sw->max_lid_ho;
> + osm_ucast_mgr_set_fwd_table(p_cache->p_ucast_mgr,p_sw->p_osm_sw);
> +
> + /* write cached lid matrix to this switch */
> +
> + osm_switch_prepare_path_rebuild(p_sw->p_osm_sw, p_sw->max_lid_ho);
> +
> + /* set hops to itself */
> + osm_switch_set_hops(p_sw->p_osm_sw,p_sw->lid_ho,0,0);
> +
> + for (target_lid_ho = 0;
> + target_lid_ho <= p_sw->max_lid_ho; target_lid_ho++){
> + /* port 0 on switches lid matrices is used
> + for storing minimal hops to the target
> + lid, so we iterate from port 1 */
> + for (port_num = 1; port_num < p_sw->num_ports; port_num++) {
> + hops = p_sw->lid_matrix[target_lid_ho][port_num];
> + if (hops != OSM_NO_PATH)
> + osm_switch_set_hops(p_sw->p_osm_sw,
> + target_lid_ho, port_num, hops);
> + }
As above - switches need lid matrices only for switch nodes.
> + }
> + Exit:
> + OSM_LOG_EXIT(p_log);
> +} /* __ucast_cache_write_sw_routing() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_clear_sw_routing(cl_map_item_t * const p_map_item,
> + void *context)
> +{
> + cache_sw_t *p_sw = (cache_sw_t * const)p_map_item;
> + unsigned lid;
> +
> + if(p_sw->lft_buff) {
> + free(p_sw->lft_buff);
> + p_sw->lft_buff = NULL;
> + }
> +
> + if(p_sw->lid_matrix) {
> + for (lid = 0; lid < p_sw->max_lid_ho; lid++)
> + if (p_sw->lid_matrix[lid])
> + free(p_sw->lid_matrix[lid]);
> + free(p_sw->lid_matrix);
> + p_sw->lid_matrix = NULL;
> + }
> +
> + p_sw->max_lid_ho = 0;
> +} /* __ucast_cache_clear_sw_routing() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_clear_routing(osm_ucast_cache_t * p_cache)
> +{
> + cl_qmap_apply_func(&p_cache->sw_tbl, __ucast_cache_clear_sw_routing,
> + (void *)p_cache);
> + p_cache->routing_valid = FALSE;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_invalidate(osm_ucast_cache_t * p_cache)
> +{
> + cache_sw_t * p_sw;
> + cache_sw_t * p_next_sw;
> + cache_ca_t * p_ca;
> + cache_ca_t * p_next_ca;
> +
> + p_next_sw = (cache_sw_t *) cl_qmap_head(&p_cache->sw_tbl);
> + while (p_next_sw != (cache_sw_t *) cl_qmap_end(&p_cache->sw_tbl)) {
> + p_sw = p_next_sw;
> + p_next_sw = (cache_sw_t *) cl_qmap_next(&p_sw->map_item);
> + __cache_sw_destroy(p_sw);
> + }
> + cl_qmap_remove_all(&p_cache->sw_tbl);
> +
> + p_next_ca = (cache_ca_t *) cl_qmap_head(&p_cache->ca_tbl);
> + while (p_next_ca != (cache_ca_t *) cl_qmap_end(&p_cache->ca_tbl)) {
> + p_ca = p_next_ca;
> + p_next_ca = (cache_ca_t *) cl_qmap_next(&p_ca->map_item);
> + __cache_ca_destroy(p_ca);
> + }
> + cl_qmap_remove_all(&p_cache->ca_tbl);
> +
> + p_cache->routing_valid = FALSE;
> + p_cache->topology_valid = FALSE;
> + p_cache->need_update = FALSE;
> +} /* __ucast_cache_invalidate() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static int
> +__ucast_cache_read_topology(osm_ucast_cache_t * p_cache)
> +{
> + CL_ASSERT(p_cache && p_cache->p_ucast_mgr);
> +
> + return __ucast_cache_populate(p_cache);
> +}
What is a reason to make this wrapper function?
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_read_lid_matrices(osm_ucast_cache_t * p_cache)
> +{
> + CL_ASSERT(p_cache && p_cache->p_ucast_mgr &&
> + p_cache->topology_valid);
> +
> + if (p_cache->routing_valid)
> + __ucast_cache_clear_routing(p_cache);
I see that this two lines are already presented in
osm_ucast_cache_refresh_lid_matrices() and it is only place where
__ucast_cache_read_lid_matrices() called.
For me it looks that whole logic could be simplified if you will have
separate reread_lfts() and reread_lid_matrices() primitives.
> +
> + cl_qmap_apply_func(&p_cache->sw_tbl,
> + __ucast_cache_read_sw_lid_matrix,
> + (void *)p_cache);
> + p_cache->routing_valid = TRUE;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_write_routing(osm_ucast_cache_t * p_cache)
> +{
> + CL_ASSERT(p_cache && p_cache->p_ucast_mgr &&
> + p_cache->topology_valid && p_cache->routing_valid);
> +
> + cl_qmap_apply_func(&p_cache->sw_tbl,
> + __ucast_cache_write_sw_routing,
> + (void *)p_cache);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_sw_clear_osm_ptr(cl_map_item_t * const p_map_item,
> + void *context)
> +{
> + ((cache_sw_t * const)p_map_item)->p_osm_sw = NULL;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static int
> +__ucast_cache_validate(osm_ucast_cache_t * p_cache)
For me it seems that whole validation can be better (and faster)
performed during subnet discovery, many things are already there
(details are below).
Actually I thought already about having something like osm_sm.sweep_stat
bitmask instead of just osm_sm.master_sm_found to indicate various events
which were found during discovery.
> +{
> + osm_switch_t * p_osm_sw;
> + osm_node_t * p_osm_node;
> + osm_node_t * p_remote_osm_node;
> + osm_physp_t * p_osm_physp;
> + osm_physp_t * p_remote_osm_physp;
> + cache_sw_t * p_sw;
> + cache_sw_t * p_remote_sw;
> + cache_ca_t * p_remote_ca;
> + uint16_t lid_ho;
> + uint16_t remote_lid_ho;
> + uint8_t remote_node_type;
> + unsigned num_ports;
> + unsigned i;
> + int res = UCAST_CACHE_TOPOLOGY_MATCH;
> + boolean_t fabric_link_exists;
> + osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> + cl_qmap_t * p_osm_sw_guid_tbl;
> +
> + OSM_LOG_ENTER(p_log);
> +
> + p_osm_sw_guid_tbl = &p_cache->p_ucast_mgr->p_subn->sw_guid_tbl;
> +
> + if (cl_qmap_count(p_osm_sw_guid_tbl) >
> + cl_qmap_count(&p_cache->sw_tbl)) {
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "current subnet has more switches than the cache - "
> + "cache is invalid\n");
> + res |= UCAST_CACHE_TOPOLOGY_MORE_SWITCHES;
> + goto Exit;
> + }
> +
> + if (cl_qmap_count(p_osm_sw_guid_tbl) <
> + cl_qmap_count(&p_cache->sw_tbl)) {
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "current subnet has less switches than the cache - "
> + "continuing validation\n");
> + res |= UCAST_CACHE_TOPOLOGY_LESS_SWITCHES;
> + }
This handled already in drop manager.
> +
> + /* Clear the pointers to osm switch on all the cached switches.
> + These pointers might be invalid right now: some cached switch
> + might be missing in the real subnet, and some missing switch
> + might reappear, such as in case of switch reboot. */
> + cl_qmap_apply_func(&p_cache->sw_tbl, __ucast_cache_sw_clear_osm_ptr,
> + NULL);
> +
> +
> + for (p_osm_sw = (osm_switch_t *) cl_qmap_head(p_osm_sw_guid_tbl);
> + p_osm_sw != (osm_switch_t *) cl_qmap_end(p_osm_sw_guid_tbl);
> + p_osm_sw = (osm_switch_t *) cl_qmap_next(&p_osm_sw->map_item)) {
> +
> + lid_ho = cl_ntoh16(osm_node_get_base_lid(p_osm_sw->p_node,0));
> + p_sw = __ucast_cache_get_sw(p_cache, lid_ho);
> + if (!p_sw) {
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "new lid (0x%04x)is in the fabric - "
> + "cache is invalid\n", lid_ho);
> + res |= UCAST_CACHE_TOPOLOGY_NEW_LID;
> + goto Exit;
> + }
New ports are tracked already with 'is_new' field of osm_port structure
(it is necessary anyway for port in/out traps sending).
> +
> + p_sw->p_osm_sw = p_osm_sw;
> +
> + /* scan all the ports and check if the cache is valid */
> +
> + p_osm_node = p_osm_sw->p_node;
> + num_ports = osm_node_get_num_physp(p_osm_node);
> +
> + /* skipping port 0 on switches */
> + for (i = 1; i < num_ports; i++) {
> + p_osm_physp = osm_node_get_physp_ptr(p_osm_node, i);
> +
> + fabric_link_exists = FALSE;
> + if (p_osm_physp &&
> + osm_physp_is_valid(p_osm_physp) &&
osm_node_get_physp_ptr() returns NULL if port is not "valid".
> + osm_link_is_healthy(p_osm_physp)) {
> + p_remote_osm_physp =
> + osm_physp_get_remote(p_osm_physp);
> + if (p_remote_osm_physp &&
> + osm_physp_is_valid(p_remote_osm_physp) &&
> + osm_link_is_healthy(p_remote_osm_physp))
> + fabric_link_exists = TRUE;
> + }
> +
> + if (!fabric_link_exists && !p_sw->ports[i])
> + continue;
> +
> + if (fabric_link_exists && !p_sw->ports[i]) {
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "lid 0x%04x, port %d, link exists "
> + "in the fabric, but not cached - "
> + "cache is invalid\n",
> + lid_ho, i);
> + res |= UCAST_CACHE_TOPOLOGY_LINK_ADDED;
> + goto Exit;
> + }
> +
> + if (!fabric_link_exists && p_sw->ports[i]){
> + /*
> + * link exists in cache, but missing
> + * in current fabric
> + */
> + if (p_sw->ports[i]->remote_node_type ==
> + IB_NODE_TYPE_SWITCH) {
> + p_remote_sw =
> + p_sw->ports[i]->remote_node.p_sw;
> + /* cache is allowed to have a
> + leaf switch that is missing
> + in the current subnet */
> + if (!p_remote_sw->is_leaf) {
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "lid 0x%04x, port %d, "
> + "fabric is missing a link "
> + "to non-leaf switch - "
> + "cache is invalid\n",
> + lid_ho, i);
> + res |= UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING;
> + goto Exit;
> + }
> + else {
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "lid 0x%04x, port %d, "
> + "fabric is missing a link "
> + "to leaf switch - "
> + "continuing validation\n",
> + lid_ho, i);
> + res |= UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING;
> + continue;
> + }
> + }
> + else {
> + /* this means that link to
> + non-switch node is missing */
> + CL_ASSERT(p_sw->is_leaf);
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "lid 0x%04x, port %d, "
> + "fabric is missing a link "
> + "to CA - "
> + "continuing validation\n",
> + lid_ho, i);
> + res |= UCAST_CACHE_TOPOLOGY_LINK_TO_CA_MISSING;
> + continue;
> + }
> + }
I think all this can be tracked in port_info.
> +
> + /*
> + * Link exists both in fabric and in cache.
> + * Compare remote nodes.
> + */
> +
> + p_remote_osm_node =
> + osm_physp_get_node_ptr(p_remote_osm_physp);
> + if (!p_remote_osm_node) {
> + /* No node for remote port!
> + Something wrong is going on here,
> + so we better not use cache... */
> + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A53: "
> + "lid 0x%04x, port %d, "
> + "no node for remote port - "
> + "cache mismatch\n",
> + lid_ho, i);
> + res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> + goto Exit;
> + }
> +
> + remote_node_type =
> + osm_node_get_type(p_remote_osm_node);
> +
> + if (remote_node_type !=
> + p_sw->ports[i]->remote_node_type) {
> + /* remote node type in the current fabric
> + differs from the cached one - looks like
> + node was replaced by something else */
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "lid 0x%04x, port %d, "
> + "remote node type mismatch - "
> + "cache is invalid\n",
> + lid_ho, i);
> + res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> + goto Exit;
> + }
Why are nodes and not ports compared? Will this handle the case when CA
port 1 was disconnected and port 2 connected by the same cable (and will
get another LID value)?
> +
> + if (remote_node_type == IB_NODE_TYPE_SWITCH) {
> + remote_lid_ho =
> + cl_ntoh16(osm_node_get_base_lid(
> + p_remote_osm_node, 0));
> +
> + p_remote_sw = __ucast_cache_get_sw(
> + p_cache,
> + remote_lid_ho);
And if switch was changed, but the same LID value reassigned for some
reason?
Wouldn't it be easier to compare port GUIDs?
> +
> + if (!p_remote_sw) {
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "lid 0x%04x, "
> + "new switch in the fabric - "
> + "cache is invalid\n",
> + remote_lid_ho);
> + res |= UCAST_CACHE_TOPOLOGY_NEW_SWITCH;
> + goto Exit;
> + }
> +
> + if (p_sw->ports[i]->remote_node.p_sw !=
> + p_remote_sw) {
> + /* remote cached switch that pointed
> + by the port is not equal to the
> + switch that was obtained for the
> + remote lid - link was changed */
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "lid 0x%04x, port %d, "
> + "link location changed "
> + "(remote node mismatch) - "
> + "cache is invalid\n",
> + lid_ho, i);
> + res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> + goto Exit;
> + }
Could you elaborate, when will this be possible? (I'm starting to miss
things :( )
> + }
> + else {
> + if (!p_sw->is_leaf) {
> + /* remote node type is CA, but the
> + cached switch is not marked as
> + leaf - something has changed */
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "lid 0x%04x, port %d, "
> + "link changed - "
> + "cache is invalid\n",
> + lid_ho, i);
> + res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> + goto Exit;
> + }
> +
> + remote_lid_ho =
> + cl_ntoh16(osm_physp_get_base_lid(
> + p_remote_osm_physp));
> +
> + p_remote_ca = __ucast_cache_get_ca(
> + p_cache, remote_lid_ho);
> +
> + if (!p_remote_ca) {
> + /* new lid is in the fabric -
> + cache is invalid */
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "lid 0x%04x, port %d, "
> + "new CA in the fabric "
> + "(lid 0x%04x) - "
> + "cache is invalid\n",
> + lid_ho, i, remote_lid_ho);
> + res |= UCAST_CACHE_TOPOLOGY_NEW_CA;
> + goto Exit;
> + }
> +
> + if (p_sw->ports[i]->remote_node.p_ca !=
> + p_remote_ca) {
> + /* remote cached CA that pointed
> + by the port is not equal to the
> + CA that was obtained for the
> + remote lid - link was changed */
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "lid 0x%04x, port %d, "
> + "link to CA (lid 0x%04x) "
> + "has changed - "
> + "cache is invalid\n",
> + lid_ho, i, remote_lid_ho);
> + res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> + goto Exit;
> + }
> + }
> + } /* done comparing the ports of the switch */
I think it will be *much* easies to track in osm_port_info.c - look at
osm_physp's and osm_switch's need_update flag setup, and also where
osm_node_unlink() called.
> + } /* done comparing all the switches */
> +
> + /* At this point we have four possible flags on:
> + 1. UCAST_CACHE_TOPOLOGY_MATCH
> + We have a perfect topology match to the cache
> + 2. UCAST_CACHE_TOPOLOGY_LESS_SWITCHES
> + Cached topology has one or more switches that do not exist
> + in the current topology. There are two types of such switches:
> + leaf switches and the regular switches. But if some regular
> + switch was missing, we would exit the comparison with the
> + UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING flag, so if some switch
> + in the topology is missing, it has to be leaf switch.
> + 3. UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING
> + One or more link to leaf switches are missing in the current
> + topology.
> + 4. UCAST_CACHE_TOPOLOGY_LINK_TO_CA_MISSING
> + One or more CAs are missing in the current topology.
> + In all these cases the cache is perfectly usable - it just might
> + have routing to unexisting lids. */
> +
> + if (res & UCAST_CACHE_TOPOLOGY_LESS_SWITCHES) {
> + /* if there are switches in the cache that don't exist
> + in the current topology, make sure that they are
> + all leaf switches, otherwise cache is useless */
> + for (p_sw = (cache_sw_t *) cl_qmap_head(&p_cache->sw_tbl);
> + p_sw != (cache_sw_t *) cl_qmap_end(&p_cache->sw_tbl);
> + p_sw = (cache_sw_t *) cl_qmap_next(&p_sw->map_item)) {
> + if (!p_sw->p_osm_sw && !p_sw->is_leaf) {
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "non-leaf switch in the fabric is "
> + "missing - cache is invalid\n");
> + res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> + goto Exit;
> + }
> + }
> + }
> +
> + if ((res & UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING) &&
> + !(res & UCAST_CACHE_TOPOLOGY_LESS_SWITCHES)) {
> + /* some link to leaf switch is missing, but there are
> + no missing switches - link failure or topology
> + changes, which means that we probably shouldn't
> + use the cache here */
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "topology change - cache is invalid\n");
> + res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> + goto Exit;
> + }
> +
> + Exit:
> + OSM_LOG_EXIT(p_log);
> + return res;
> +
> +} /* __ucast_cache_validate() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +int
> +osm_ucast_cache_apply(osm_ucast_cache_t * p_cache)
> +{
> + int res = 0;
> + osm_log_t * p_log;
> +
> + if (!p_cache)
> + return 1;
> +
> + p_log = p_cache->p_ucast_mgr->p_log;
> +
> + OSM_LOG_ENTER(p_log);
> + if (!p_cache->topology_valid) {
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "unicast cache is empty - can't "
> + "use it on this sweep\n");
> + res = UCAST_CACHE_TOPOLOGY_NO_MATCH;
> + goto Exit;
> + }
> +
> + if (!p_cache->routing_valid) {
> + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A55: "
> + "cached routing invalid\n");
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "invalidating cache\n");
> + __ucast_cache_invalidate(p_cache);
> + res = UCAST_CACHE_TOPOLOGY_NO_MATCH;
> + goto Exit;
> + }
> +
> + res = __ucast_cache_validate(p_cache);
> +
> + if ((res & UCAST_CACHE_TOPOLOGY_NO_MATCH ) ||
> + (res & UCAST_CACHE_TOPOLOGY_MORE_SWITCHES ) ||
> + (res & UCAST_CACHE_TOPOLOGY_LINK_ADDED ) ||
> + (res & UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING) ||
> + (res & UCAST_CACHE_TOPOLOGY_NEW_SWITCH ) ||
> + (res & UCAST_CACHE_TOPOLOGY_NEW_CA ) ||
> + (res & UCAST_CACHE_TOPOLOGY_NEW_LID ) ||
> + (res & UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING)) {
Why to not make single return status?
> + /* The change in topology doesn't allow us to use the.
> + existing cache. Cache should be invalidated, and new
> + cache should be built after the routing recalculation. */
> + OSM_LOG(p_log, OSM_LOG_INFO,
> + "changes in topology (0x%x) - "
> + "invalidating cache\n", res);
> + __ucast_cache_invalidate(p_cache);
> + goto Exit;
> + }
> +
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "cache is valid (status 0x%04x) - using the cached routing\n",res);
> +
> + /* existing cache can be used - write back the cached routing */
> + __ucast_cache_write_routing(p_cache);
> +
> + /*
> + * ToDo: Detailed result of the topology comparison will
> + * ToDo: be needed later for the Incremental Routing,
> + * ToDo: where based on this result, the routing algorithm
> + * ToDo: will try to route "around" the missing components.
> + * ToDo: For now - reset the result whenever the cache
> + * ToDo: is valid.
> + */
> + res = 0;
> +
> + Exit:
> + OSM_LOG_EXIT(p_log);
> + return res;
> +} /* osm_ucast_cache_apply() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void osm_ucast_cache_set_sw_fwd_table(osm_ucast_cache_t * p_cache,
> + uint8_t * ucast_mgr_lft_buf,
> + osm_switch_t * p_osm_sw)
> +{
> + uint16_t lid_ho =
> + cl_ntoh16(osm_node_get_base_lid(p_osm_sw->p_node, 0));
> + cache_sw_t * p_sw = __ucast_cache_get_sw(p_cache, lid_ho);
> +
> + OSM_LOG_ENTER(p_cache->p_ucast_mgr->p_log);
> +
> + OSM_LOG(p_cache->p_ucast_mgr->p_log, OSM_LOG_VERBOSE,
> + "caching lft for switch 0x%04x\n",
> + lid_ho);
> +
> + if (!p_sw || !p_sw->p_osm_sw) {
> + OSM_LOG(p_cache->p_ucast_mgr->p_log, OSM_LOG_ERROR,
> + "ERR 3A57: "
> + "fabric switch 0x%04x %s in the unicast cache\n",
> + lid_ho,
> + (p_sw) ? "is not initialized" : "doesn't exist");
> + goto Exit;
> + }
> +
> + CL_ASSERT(p_sw->p_osm_sw == p_osm_sw);
> + CL_ASSERT(!p_sw->lft_buff);
> +
> + p_sw->max_lid_ho = p_osm_sw->max_lid_ho;
> +
> + /* allocate linear forwarding table buffer and fill it */
> + p_sw->lft_buff = (uint8_t *)malloc(IB_LID_UCAST_END_HO + 1);
> + memcpy(p_sw->lft_buff, p_cache->p_ucast_mgr->lft_buf,
> + IB_LID_UCAST_END_HO + 1);
> +
> + Exit:
> + OSM_LOG_EXIT(p_cache->p_ucast_mgr->p_log);
> +} /* osm_ucast_cache_set_sw_fwd_table() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void osm_ucast_cache_refresh_topo(osm_ucast_cache_t * p_cache)
> +{
> + osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> + OSM_LOG_ENTER(p_log);
> +
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "starting ucast cache topology refresh\n");
> +
> + if (p_cache->topology_valid) {
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "invalidating existing ucast cache\n");
> + __ucast_cache_invalidate(p_cache);
> + }
> +
> + OSM_LOG(p_log, OSM_LOG_VERBOSE, "caching topology\n");
> +
> + if (__ucast_cache_read_topology(p_cache) != 0) {
> + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A56: "
> + "cache population failed\n");
> + __ucast_cache_invalidate(p_cache);
> + goto Exit;
> + }
> +
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "ucast cache topology refresh done\n");
> + Exit:
> + OSM_LOG_EXIT(p_log);
> +} /* osm_ucast_cache_refresh_topo() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void osm_ucast_cache_refresh_lid_matrices(osm_ucast_cache_t * p_cache)
> +{
> + osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> + OSM_LOG_ENTER(p_log);
> +
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "starting ucast cache lid matrices refresh\n");
> +
> + if (!p_cache->topology_valid) {
> + OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A54: "
> + "cached topology is invalid\n");
> + goto Exit;
> + }
> +
> + if (p_cache->routing_valid) {
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "invalidating existing ucast routing cache\n");
> + __ucast_cache_clear_routing(p_cache);
> + }
> +
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "caching lid matrices\n");
> +
> + __ucast_cache_read_lid_matrices(p_cache);
> +
> + OSM_LOG(p_log, OSM_LOG_VERBOSE,
> + "ucast cache lid matrices refresh done\n");
> + Exit:
> + OSM_LOG_EXIT(p_log);
> +} /* osm_ucast_cache_refresh_lid_matrices() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +osm_ucast_cache_t *
> +osm_ucast_cache_construct(osm_ucast_mgr_t * const p_mgr)
> +{
> + if (p_mgr->p_subn->opt.lmc > 0) {
> + OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A50: "
> + "Unicast cache is not supported for LMC>0\n");
> + return NULL;
> + }
> +
> + osm_ucast_cache_t * p_cache =
> + (osm_ucast_cache_t*)malloc(sizeof(osm_ucast_cache_t));
> + if (!p_cache)
> + return NULL;
> +
> + memset(p_cache, 0, sizeof(osm_ucast_cache_t));
> +
> + cl_qmap_init(&p_cache->sw_tbl);
> + cl_qmap_init(&p_cache->ca_tbl);
> + p_cache->p_ucast_mgr = p_mgr;
> +
> + return p_cache;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void
> +osm_ucast_cache_destroy(osm_ucast_cache_t * p_cache)
> +{
> + if (!p_cache)
> + return;
> + __ucast_cache_invalidate(p_cache);
> + free(p_cache);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> --
> 1.5.1.4
>
More information about the general
mailing list