[ofa-general] Re: [PATCH 1/4] opensm/osm_ucast_cache.{c, h}: ucast routing cache implementation

Sasha Khapyorsky sashak at voltaire.com
Sun Jun 29 14:43:45 PDT 2008


Hi Yevgeny,

Sorry about huge delay with looking at this.

On 12:59 Sun 04 May     , Yevgeny Kliteynik wrote:
> Unicast routing cache implementation.
> 
> Unicast routing cache comprises the following:
>  - Topology: a data structure with all the switches and CAs of the fabric
>  - LFTs: each switch has an LFT cached
>  - Lid matrices: each switch has lid matrices cached, which is needed for
>    multicast routing (which is not cached).
> 
> There is also a topology matching function that compares the current topology
> with the cached one to find out whether the cache is usable (valid) or not.

As I wrote in another email, I believe that saving needless full-reroutig
cycle is a good idea and very needed for OpenSM, but I don't like this
implementation.

In order to understand it better I was need to pass over code, some
comments may be useful. It is below.

Sasha

> Signed-off-by: Yevgeny Kliteynik <kliteyn at dev.mellanox.co.il>
> ---
>  opensm/include/opensm/osm_ucast_cache.h |  319 ++++++++
>  opensm/opensm/osm_ucast_cache.c         | 1197 +++++++++++++++++++++++++++++++
>  2 files changed, 1516 insertions(+), 0 deletions(-)
>  create mode 100644 opensm/include/opensm/osm_ucast_cache.h
>  create mode 100644 opensm/opensm/osm_ucast_cache.c
> 
> diff --git a/opensm/include/opensm/osm_ucast_cache.h b/opensm/include/opensm/osm_ucast_cache.h
> new file mode 100644
> index 0000000..a3b40f9
> --- /dev/null
> +++ b/opensm/include/opensm/osm_ucast_cache.h
> @@ -0,0 +1,319 @@
> +/*
> + * Copyright (c) 2002-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
> + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + */
> +
> +/*
> + * Abstract:
> + * 	Declaration of osm_ucast_cache_t.
> + *	This object represents the Unicast Cache object.
> + *
> + * Environment:
> + * 	Linux User Mode
> + *
> + * $Revision: 1.4 $
> + */
> +
> +#ifndef _OSM_UCAST_CACHE_H_
> +#define _OSM_UCAST_CACHE_H_
> +
> +#ifdef __cplusplus
> +#  define BEGIN_C_DECLS extern "C" {
> +#  define END_C_DECLS   }
> +#else				/* !__cplusplus */
> +#  define BEGIN_C_DECLS
> +#  define END_C_DECLS
> +#endif				/* __cplusplus */
> +
> +BEGIN_C_DECLS
> +
> +struct _osm_ucast_mgr;
> +
> +#define UCAST_CACHE_TOPOLOGY_MATCH                   0x0000
> +#define UCAST_CACHE_TOPOLOGY_LESS_SWITCHES           0x0001
> +#define UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING 0x0002
> +#define UCAST_CACHE_TOPOLOGY_LINK_TO_CA_MISSING      0x0004
> +#define UCAST_CACHE_TOPOLOGY_MORE_SWITCHES           0x0008
> +#define UCAST_CACHE_TOPOLOGY_NEW_LID                 0x0010
> +#define UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING      0x0020
> +#define UCAST_CACHE_TOPOLOGY_LINK_ADDED              0x0040
> +#define UCAST_CACHE_TOPOLOGY_NEW_SWITCH              0x0080
> +#define UCAST_CACHE_TOPOLOGY_NEW_CA                  0x0100
> +#define UCAST_CACHE_TOPOLOGY_NO_MATCH                0x0200
> +
> +/****h* OpenSM/Unicast Manager/Unicast Cache
> +* NAME
> +*	Unicast Cache
> +*
> +* DESCRIPTION
> +*	The Unicast Cache object encapsulates the information
> +*	needed to cache and write unicast routing of the subnet.
> +*
> +*	The Unicast Cache object is NOT thread safe.
> +*
> +*	This object should be treated as opaque and should be
> +*	manipulated only through the provided functions.
> +*
> +* AUTHOR
> +*	Yevgeny Kliteynik, Mellanox
> +*
> +*********/
> +
> +
> +/****s* OpenSM: Unicast Cache/osm_ucast_cache_t
> +* NAME
> +*	osm_ucast_cache_t
> +*
> +* DESCRIPTION
> +*	Unicast Cache structure.
> +*
> +*	This object should be treated as opaque and should
> +*	be manipulated only through the provided functions.
> +*
> +* SYNOPSIS
> +*/
> +typedef struct osm_ucast_cache_t_ {
> +	struct _osm_ucast_mgr * p_ucast_mgr;
> +	cl_qmap_t sw_tbl;
> +	cl_qmap_t ca_tbl;
> +	boolean_t topology_valid;
> +	boolean_t routing_valid;
> +	boolean_t need_update;
> +} osm_ucast_cache_t;
> +/*
> +* FIELDS
> +*	p_ucast_mgr
> +*		Pointer to the Unicast Manager for this subnet.
> +*
> +*	sw_tbl
> +*		Cached switches table.
> +*
> +*	ca_tbl
> +*		Cached CAs table.
> +*
> +*	topology_valid
> +*		TRUE if the cache is populated with the fabric topology.
> +*
> +*	routing_valid
> +*		TRUE if the cache is populated with the unicast routing
> +*		in addition to the topology.
> +*
> +*	need_update
> +*		TRUE if the cached routing needs to be updated.
> +*
> +* SEE ALSO
> +*	Unicast Manager object
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_construct
> +* NAME
> +*	osm_ucast_cache_construct
> +*
> +* DESCRIPTION
> +*	This function constructs a Unicast Cache object.
> +*
> +* SYNOPSIS
> +*/
> +osm_ucast_cache_t *
> +osm_ucast_cache_construct(struct _osm_ucast_mgr * const p_mgr);
> +/*
> +* PARAMETERS
> +*	p_mgr
> +*		[in] Pointer to a Unicast Manager object.
> +*
> +* RETURN VALUE
> +*	This function return the created Ucast Cache object on success,
> +*	or NULL on any error.
> +*
> +* NOTES
> +*	Allows osm_ucast_cache_destroy
> +*
> +*	Calling osm_ucast_mgr_construct is a prerequisite to
> +*	calling any other method.
> +*
> +* SEE ALSO
> +*	Unicast Cache object, osm_ucast_cache_destroy
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_destroy
> +* NAME
> +*	osm_ucast_cache_destroy
> +*
> +* DESCRIPTION
> +*	The osm_ucast_cache_destroy function destroys the object,
> +*	releasing all resources.
> +*
> +* SYNOPSIS
> +*/
> +void osm_ucast_cache_destroy(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +*	p_cache
> +*		[in] Pointer to the object to destroy.
> +*
> +* RETURN VALUE
> +*	This function does not return any value.
> +*
> +* NOTES
> +*	Performs any necessary cleanup of the specified
> +*	Unicast Cache object.
> +*	Further operations should not be attempted on the
> +*	destroyed object.
> +*	This function should only be called after a call to
> +*	osm_ucast_cache_construct.
> +*
> +* SEE ALSO
> +*	Unicast Cache object, osm_ucast_cache_construct
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_refresh_topo
> +* NAME
> +*	osm_ucast_cache_refresh_topo
> +*
> +* DESCRIPTION
> +*	The osm_ucast_cache_refresh_topo function re-reads the
> +*	updated topology.
> +*
> +* SYNOPSIS
> +*/
> +void osm_ucast_cache_refresh_topo(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +*	p_cache
> +*		[in] Pointer to the cache object to refresh.
> +*
> +* RETURN VALUE
> +*	This function does not return any value.
> +*
> +* NOTES
> +*	This function invalidates the existing unicast cache
> +*	and re-reads the updated topology.
> +*
> +* SEE ALSO
> +*	Unicast Cache object, osm_ucast_cache_construct
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_refresh_lid_matrices
> +* NAME
> +*	osm_ucast_cache_refresh_lid_matrices
> +*
> +* DESCRIPTION
> +*	The osm_ucast_cache_refresh_topo function re-reads the
> +*	updated lid matrices.
> +*
> +* SYNOPSIS
> +*/
> +void osm_ucast_cache_refresh_lid_matrices(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +*	p_cache
> +*		[in] Pointer to the cache object to refresh.
> +*
> +* RETURN VALUE
> +*	This function does not return any value.
> +*
> +* NOTES
> +*	This function re-reads the updated lid matrices.
> +*
> +* SEE ALSO
> +*	Unicast Cache object, osm_ucast_cache_construct
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_apply
> +* NAME
> +*	osm_ucast_cache_apply
> +*
> +* DESCRIPTION
> +*	The osm_ucast_cache_apply function tries to apply
> +*	the cached unicast routing on the subnet switches.
> +*
> +* SYNOPSIS
> +*/
> +int osm_ucast_cache_apply(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +*	p_cache
> +*		[in] Pointer to the cache object to be used.
> +*
> +* RETURN VALUE
> +*	0 if unicast cache was successfully written to switches,
> +*	non-zero for any error.
> +*
> +* NOTES
> +*	Compares the current topology to the cached topology,
> +*	and if the topology matches, or if changes in topology
> +*	have no impact on routing tables, writes the cached
> +*	unicast routing to the subnet switches.
> +*
> +* SEE ALSO
> +*	Unicast Cache object
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_set_sw_fwd_table
> +* NAME
> +*	osm_ucast_cache_set_sw_fwd_table
> +*
> +* DESCRIPTION
> +*	The osm_ucast_cache_set_sw_fwd_table function sets
> +*	(caches) linear forwarding table for the specified
> +*	switch.
> +*
> +* SYNOPSIS
> +*/
> +void
> +osm_ucast_cache_set_sw_fwd_table(osm_ucast_cache_t * p_cache,
> +				 uint8_t * ucast_mgr_lft_buf,
> +				 osm_switch_t * p_osm_sw);
> +/*
> +* PARAMETERS
> +*	p_cache
> +*		[in] Pointer to the cache object to be used.
> +*
> +*	ucast_mgr_lft_buf
> +*		[in] LFT to set.
> +*
> +*	p_osm_sw
> +*		[in] pointer to the switch that the LFT refers to.
> +*
> +* RETURN VALUE
> +*	This function does not return any value.
> +*
> +* NOTES
> +*
> +* SEE ALSO
> +*	Unicast Cache object
> +*********/
> +
> +END_C_DECLS
> +#endif				/* _OSM_UCAST_MGR_H_ */
> +
> diff --git a/opensm/opensm/osm_ucast_cache.c b/opensm/opensm/osm_ucast_cache.c
> new file mode 100644
> index 0000000..4ad7c30
> --- /dev/null
> +++ b/opensm/opensm/osm_ucast_cache.c
> @@ -0,0 +1,1197 @@
> +/*
> + * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
> + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + */
> +
> +/*
> + * Abstract:
> + *    Implementation of OpenSM Cached routing
> + *
> + * Environment:
> + *    Linux User Mode
> + *
> + */
> +
> +#if HAVE_CONFIG_H
> +#  include <config.h>
> +#endif
> +
> +#include <stdlib.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <errno.h>
> +#include <iba/ib_types.h>
> +#include <complib/cl_qmap.h>
> +#include <complib/cl_pool.h>
> +#include <complib/cl_debug.h>
> +#include <opensm/osm_opensm.h>
> +#include <opensm/osm_ucast_mgr.h>
> +#include <opensm/osm_ucast_cache.h>
> +#include <opensm/osm_switch.h>
> +#include <opensm/osm_node.h>
> +#include <opensm/osm_port.h>
> +
> +struct cache_sw_t_;
> +struct cache_ca_t_;
> +struct cache_port_t_;
> +
> +typedef union cache_sw_or_ca_ {
> +	struct cache_sw_t_ * p_sw;
> +	struct cache_ca_t_ * p_ca;
> +} cache_node_t;
> +
> +typedef struct cache_port_t_ {
> +	uint8_t remote_node_type;
> +	cache_node_t remote_node;
> +} cache_port_t;
> +
> +typedef struct cache_ca_t_ {
> +	cl_map_item_t map_item;
> +	uint16_t lid_ho;
> +} cache_ca_t;
> +
> +typedef struct cache_sw_t_ {
> +	cl_map_item_t map_item;
> +	uint16_t lid_ho;
> +	uint16_t max_lid_ho;
> +	osm_switch_t *p_osm_sw; /* pointer to the updated switch object */
> +	uint8_t num_ports;
> +	cache_port_t ** ports;
> +	uint8_t **lid_matrix;
> +        uint8_t * lft_buff;
> +        boolean_t is_leaf;

Please use tabs for indentation.

> +} cache_sw_t;
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static osm_switch_t *
> +__ucast_cache_get_starting_osm_sw(osm_ucast_cache_t * p_cache)
> +{
> +	osm_port_t * p_osm_port;
> +	osm_node_t * p_osm_node;
> +	osm_physp_t * p_osm_physp;
> +
> +	CL_ASSERT(p_cache->p_ucast_mgr);
> +
> +	/* find the OSM node */
> +	p_osm_port = osm_get_port_by_guid(
> +			p_cache->p_ucast_mgr->p_subn,
> +			p_cache->p_ucast_mgr->p_subn->sm_port_guid);
> +	CL_ASSERT(p_osm_port);
> +
> +	p_osm_node = p_osm_port->p_node;
> +	switch (osm_node_get_type(p_osm_node)) {
> +		case IB_NODE_TYPE_SWITCH:
> +			/* OpenSM runs on switch - we're done */
> +			return p_osm_node->sw;
> +
> +		case IB_NODE_TYPE_CA:
> +			/* SM runs on CA - get the switch
> +			   that CA is connected to. */
> +			p_osm_physp = p_osm_port->p_physp;
> +			p_osm_physp = osm_physp_get_remote(p_osm_physp);
> +			p_osm_node = osm_physp_get_node_ptr(p_osm_physp);
> +			CL_ASSERT(p_osm_node);
> +			return p_osm_node->sw;
> +
> +		default:
> +			/* SM runs on some other node - not supported */
> +			return NULL;
> +	}
> +} /* __ucast_cache_get_starting_osm_sw() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_sw_t *
> +__ucast_cache_get_sw(osm_ucast_cache_t * p_cache,
> +		     uint16_t lid_ho)
> +{
> +	cache_sw_t * p_sw;
> +
> +	p_sw = (cache_sw_t *) cl_qmap_get(&p_cache->sw_tbl, lid_ho);
> +	if (p_sw == (cache_sw_t *) cl_qmap_end(&p_cache->sw_tbl))
> +		return NULL;
> +
> +	return p_sw;
> +} /* __ucast_cache_get_sw() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_ca_t *
> +__ucast_cache_get_ca(osm_ucast_cache_t * p_cache,
> +		     uint16_t lid_ho)
> +{
> +	cache_ca_t * p_ca;
> +
> +	p_ca = (cache_ca_t *) cl_qmap_get(&p_cache->ca_tbl, lid_ho);
> +	if (p_ca == (cache_ca_t *) cl_qmap_end(&p_cache->ca_tbl))
> +		return NULL;
> +
> +	return p_ca;
> +} /* __ucast_cache_get_ca() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_port_t *
> +__ucast_cache_add_port(osm_ucast_cache_t * p_cache,
> +		       uint8_t remote_node_type,
> +		       uint16_t lid_ho)
> +{
> +	cache_port_t * p_port = (cache_port_t *) malloc(sizeof(cache_port_t));
> +	memset(p_port, 0, sizeof(cache_port_t));
> +
> +	p_port->remote_node_type = remote_node_type;
> +	if (remote_node_type == IB_NODE_TYPE_SWITCH)
> +	{
> +		cache_sw_t * p_sw = __ucast_cache_get_sw(
> +					p_cache, lid_ho);
> +		CL_ASSERT(p_sw);
> +		p_port->remote_node.p_sw = p_sw;
> +	}
> +	else {
> +		cache_ca_t * p_ca = __ucast_cache_get_ca(
> +					p_cache, lid_ho);
> +		CL_ASSERT(p_ca);
> +		p_port->remote_node.p_ca = p_ca;
> +	}
> +
> +	return p_port;
> +} /* __ucast_cache_add_port() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_sw_t *
> +__ucast_cache_add_sw(osm_ucast_cache_t * p_cache,
> +		     osm_switch_t * p_osm_sw)
> +{
> +	cache_sw_t *p_sw = (cache_sw_t*)malloc(sizeof(cache_sw_t));
> +	memset(p_sw, 0, sizeof(cache_sw_t));
> +
> +	p_sw->p_osm_sw = p_osm_sw;
> +
> +	p_sw->lid_ho =
> +		cl_ntoh16(osm_node_get_base_lid(p_osm_sw->p_node, 0));
> +
> +	p_sw->num_ports = osm_node_get_num_physp(p_osm_sw->p_node);
> +	p_sw->ports = (cache_port_t **)
> +		malloc(p_sw->num_ports * sizeof(cache_port_t *));
> +	memset(p_sw->ports, 0, p_sw->num_ports * sizeof(cache_port_t *));
> +
> +	cl_qmap_insert(&p_cache->sw_tbl, p_sw->lid_ho, &p_sw->map_item);
> +	return p_sw;
> +} /* __ucast_cache_add_sw() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_ca_t *
> +__ucast_cache_add_ca(osm_ucast_cache_t * p_cache,
> +		     uint16_t lid_ho)
> +{
> +	cache_ca_t *p_ca = (cache_ca_t*)malloc(sizeof(cache_ca_t));
> +	memset(p_ca, 0, sizeof(cache_ca_t));
> +
> +	p_ca->lid_ho = lid_ho;
> +
> +	cl_qmap_insert(&p_cache->ca_tbl, p_ca->lid_ho, &p_ca->map_item);
> +	return p_ca;
> +} /* __ucast_cache_add_ca() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__cache_port_destroy(cache_port_t * p_port)
> +{
> +	if (!p_port)
> +		return;
> +	free(p_port);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__cache_sw_destroy(cache_sw_t * p_sw)
> +{
> +	int i;
> +
> +	if (!p_sw)
> +		return;
> +
> +	if (p_sw->ports) {
> +		for (i = 0; i < p_sw->num_ports; i++)
> +			if (p_sw->ports[i])
> +				__cache_port_destroy(p_sw->ports[i]);
> +		free(p_sw->ports);
> +	}
> +
> +	if (p_sw->lid_matrix) {
> +		for (i = 0; i <= p_sw->max_lid_ho; i++)
> +			if (p_sw->lid_matrix[i])
> +				free(p_sw->lid_matrix[i]);
> +		free(p_sw->lid_matrix);
> +	}
> +
> +	if (p_sw->lft_buff)
> +		free(p_sw->lft_buff);
> +
> +	free(p_sw);
> +} /* __cache_sw_destroy() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__cache_ca_destroy(cache_ca_t * p_ca)
> +{
> +	if (!p_ca)
> +		return;
> +	free(p_ca);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static int
> +__ucast_cache_populate(osm_ucast_cache_t * p_cache)
> +{
> +	cl_list_t sw_bfs_list;

cl_list, cl_map, etc. (w/out 'q') are slow. It is really better to use
cl_q* version.

> +	osm_switch_t * p_osm_sw;
> +	osm_switch_t * p_remote_osm_sw;

Seems that those variables (and maybe others) are never used together.
Use just one is so.

> +	osm_node_t   * p_osm_node;
> +	osm_node_t   * p_remote_osm_node;
> +	osm_physp_t  * p_osm_physp;
> +	osm_physp_t  * p_remote_osm_physp;
> +	cache_sw_t   * p_sw;
> +	cache_sw_t   * p_remote_sw;
> +	cache_ca_t   * p_remote_ca;
> +	uint16_t remote_lid_ho;
> +	unsigned num_ports;
> +	unsigned i;
> +	int res = 0;
> +	osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +
> +	OSM_LOG_ENTER(p_log);
> +
> +	cl_list_init(&sw_bfs_list, 10);
> +
> +	/* Use management switch or switch that is connected
> +	   to management CA as a BFS scan starting point */
> +
> +	p_osm_sw = __ucast_cache_get_starting_osm_sw(p_cache);
> +	if (!p_osm_sw) {
> +		OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A51: "
> +			"failed getting cache population starting point\n");
> +		res = 1;
> +		goto Exit;
> +	}
> +
> +	/* switch is cached BEFORE entering to the BFS list,
> +	   so we will know whether this switch was "visited" */
> +
> +	p_sw = __ucast_cache_add_sw(p_cache, p_osm_sw);
> +	cl_list_insert_tail(&sw_bfs_list, p_sw);
> +
> +	/* Create cached switches in the BFS order.
> +	   This will ensure that the fabric scan is done each
> +	   time the same way and will allow accurate matching
> +	   between the current fabric and the cached one. */

Why BFSing is needed there? Would not it be simpler to run over
p_subn->sw_guid_tbl?

> +	while (!cl_is_list_empty(&sw_bfs_list)) {
> +		p_sw = (cache_sw_t *) cl_list_remove_head(&sw_bfs_list);
> +		p_osm_sw = p_sw->p_osm_sw;
> +		p_osm_node = p_osm_sw->p_node;
> +		num_ports = osm_node_get_num_physp(p_osm_node);
> +
> +		/* skipping port 0 on switches */
> +		for (i = 1; i < num_ports; i++) {
> +			p_osm_physp = osm_node_get_physp_ptr(p_osm_node, i);
> +			if (!p_osm_physp ||
> +			    !osm_physp_is_valid(p_osm_physp) ||
> +			    !osm_link_is_healthy(p_osm_physp))
> +				continue;
> +
> +			p_remote_osm_physp = osm_physp_get_remote(p_osm_physp);
> +			if (!p_remote_osm_physp ||
> +			    !osm_physp_is_valid(p_remote_osm_physp) ||
> +			    !osm_link_is_healthy(p_remote_osm_physp))
> +				continue;
> +
> +			p_remote_osm_node =
> +				osm_physp_get_node_ptr(p_remote_osm_physp);
> +			if (!p_remote_osm_node) {
> +				OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A52: "
> +					"no node for remote port\n");
> +				res = 1;
> +				goto Exit;
> +			}
> +
> +			if (osm_node_get_type(p_remote_osm_node) ==
> +			    IB_NODE_TYPE_SWITCH) {
> +
> +				remote_lid_ho = cl_ntoh16(
> +					osm_node_get_base_lid(
> +						p_remote_osm_node, 0));
> +
> +				p_remote_osm_sw = p_remote_osm_node->sw;
> +				CL_ASSERT(p_remote_osm_sw);
> +
> +				p_remote_sw = __ucast_cache_get_sw(
> +					p_cache,
> +					remote_lid_ho);
> +
> +				/* If the remote switch hasn't been
> +				   cached yet, add it to the cache
> +				   and insert it into the BFS list */
> +
> +				if (!p_remote_sw) {
> +					p_remote_sw = __ucast_cache_add_sw(
> +						p_cache,
> +						p_remote_osm_sw);
> +					cl_list_insert_tail(&sw_bfs_list,
> +						    p_remote_sw);
> +				}
> +			}
> +			else {

opensm/osn_indent will suggest about '} else {' style.

> +				remote_lid_ho = cl_ntoh16(
> +					osm_physp_get_base_lid(
> +						p_remote_osm_physp));
> +
> +				p_sw->is_leaf = TRUE;
> +				p_remote_ca = __ucast_cache_add_ca(
> +					p_cache, remote_lid_ho);
> +
> +				/* no need to add this node to BFS list */
> +			}
> +
> +			/* cache this port */
> +			p_sw->ports[i] = __ucast_cache_add_port(
> +				p_cache,
> +				osm_node_get_type(p_remote_osm_node),
> +				remote_lid_ho);
> +		}
> +	}
> +
> +        cl_list_destroy(&sw_bfs_list);

Tabs...

> +	p_cache->topology_valid = TRUE;
> +
> +	OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +		"cache populated (%u SWs, %u CAs)\n",
> +		cl_qmap_count(&p_cache->sw_tbl),
> +		cl_qmap_count(&p_cache->ca_tbl));
> +
> +    Exit:
> +	OSM_LOG_EXIT(p_log);
> +	return res;
> +} /* __ucast_cache_populate() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_read_sw_lid_matrix(cl_map_item_t * const p_map_item,
> +				 void *context)
> +{
> +	cache_sw_t *p_sw = (cache_sw_t * const)p_map_item;
> +	uint16_t target_lid_ho;
> +	uint8_t port_num;
> +
> +	if (!p_sw->p_osm_sw)
> +		return;
> +
> +	/* allocate lid matrices buffer:
> +	   lid_matrix[target_lids][port_nums] */
> +        CL_ASSERT(!p_sw->lid_matrix);
> +	p_sw->lid_matrix = (uint8_t **)
> +		malloc((p_sw->max_lid_ho + 1) * sizeof(uint8_t*));
> +
> +	for (target_lid_ho = 0;
> +	     target_lid_ho <= p_sw->max_lid_ho; target_lid_ho++){
> +
> +		/* set hops for this target through every switch port */
> +
> +		p_sw->lid_matrix[target_lid_ho] =
> +			(uint8_t *)malloc(p_sw->num_ports);
> +		memset(p_sw->lid_matrix[target_lid_ho],
> +		       OSM_NO_PATH, p_sw->num_ports);
> +
> +		for (port_num = 1; port_num < p_sw->num_ports; port_num++)
> +			p_sw->lid_matrix[target_lid_ho][port_num] =
> +				osm_switch_get_hop_count(p_sw->p_osm_sw,
> +							 target_lid_ho,
> +							 port_num);

Original switches keep lid matrices for switches only, and not for CAs,
it was done to sleep LID matrix generation and to save a lot of memory.

> +	}
> +} /* __ucast_cache_read_sw_lid_matrix() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_write_sw_routing(cl_map_item_t * const p_map_item,
> +			       void * context)
> +{
> +	cache_sw_t *p_sw = (cache_sw_t * const)p_map_item;
> +	osm_ucast_cache_t * p_cache = (osm_ucast_cache_t *) context;
> +	uint8_t *ucast_mgr_lft_buf = p_cache->p_ucast_mgr->lft_buf;
> +	uint16_t target_lid_ho;
> +	uint8_t port_num;
> +	uint8_t hops;
> +	osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +
> +	OSM_LOG_ENTER(p_log);
> +
> +	if (!p_sw->p_osm_sw) {
> +		/* some switches (leaf switches) may exist in the
> +		   cache, but not exist in the current topology */
> +		OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +			"cached switch 0x%04x doesn't exist in the fabric\n",
> +			p_sw->lid_ho);

Now we are using decimal format for unicast LIDs representation.

Also what about to use OSM_LOG_DEBUG for debug purposes? This file has 30
OSM_LOG_VERBOSE message, osm_ucast_mgr.c - only 3.

> +		goto Exit;
> +	}
> +
> +	OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +		"writing routing for cached switch 0x%04x, "
> +		"max_lid_ho = 0x%04x\n",
> +		p_sw->lid_ho, p_sw->max_lid_ho);
> +
> +	/* write cached LFT to this switch: clear existing
> +	   ucast mgr lft buffer, write the cached lft to the
> +	   ucast mgr buffer, and set this lft on switch */
> +	CL_ASSERT(p_sw->lft_buff);
> +	memset(ucast_mgr_lft_buf, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1);

Why memset()? Isn't it enough to memcpy() and max_lid_ho setup which you
do below anyway.

> +	if (p_sw->max_lid_ho > 0)
> +		memcpy(ucast_mgr_lft_buf, p_sw->lft_buff,
> +		       p_sw->max_lid_ho + 1);
> +
> +	p_sw->p_osm_sw->max_lid_ho = p_sw->max_lid_ho;
> +	osm_ucast_mgr_set_fwd_table(p_cache->p_ucast_mgr,p_sw->p_osm_sw);
> +
> +	/* write cached lid matrix to this switch */
> +
> +	osm_switch_prepare_path_rebuild(p_sw->p_osm_sw, p_sw->max_lid_ho);
> +
> +	/* set hops to itself */
> +	osm_switch_set_hops(p_sw->p_osm_sw,p_sw->lid_ho,0,0);
> +
> +	for (target_lid_ho = 0;
> +	     target_lid_ho <= p_sw->max_lid_ho; target_lid_ho++){
> +		/* port 0 on switches lid matrices is used
> +		   for storing minimal hops to the target
> +		   lid, so we iterate from port 1 */
> +		for (port_num = 1; port_num < p_sw->num_ports; port_num++) {
> +			hops = p_sw->lid_matrix[target_lid_ho][port_num];
> +			if (hops != OSM_NO_PATH)
> +				osm_switch_set_hops(p_sw->p_osm_sw,
> +				    target_lid_ho, port_num, hops);
> +		}

As above - switches need lid matrices only for switch nodes.

> +	}
> +    Exit:
> +	OSM_LOG_EXIT(p_log);
> +} /* __ucast_cache_write_sw_routing() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_clear_sw_routing(cl_map_item_t * const p_map_item,
> +			       void *context)
> +{
> +	cache_sw_t *p_sw = (cache_sw_t * const)p_map_item;
> +	unsigned lid;
> +
> +	if(p_sw->lft_buff) {
> +		free(p_sw->lft_buff);
> +		p_sw->lft_buff = NULL;
> +	}
> +
> +	if(p_sw->lid_matrix) {
> +		for (lid = 0; lid < p_sw->max_lid_ho; lid++)
> +			if (p_sw->lid_matrix[lid])
> +				free(p_sw->lid_matrix[lid]);
> +		free(p_sw->lid_matrix);
> +		p_sw->lid_matrix = NULL;
> +	}
> +
> +	p_sw->max_lid_ho = 0;
> +} /* __ucast_cache_clear_sw_routing() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_clear_routing(osm_ucast_cache_t * p_cache)
> +{
> +	cl_qmap_apply_func(&p_cache->sw_tbl, __ucast_cache_clear_sw_routing,
> +			   (void *)p_cache);
> +	p_cache->routing_valid = FALSE;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_invalidate(osm_ucast_cache_t * p_cache)
> +{
> +	cache_sw_t * p_sw;
> +	cache_sw_t * p_next_sw;
> +	cache_ca_t * p_ca;
> +	cache_ca_t * p_next_ca;
> +
> +	p_next_sw = (cache_sw_t *) cl_qmap_head(&p_cache->sw_tbl);
> +	while (p_next_sw != (cache_sw_t *) cl_qmap_end(&p_cache->sw_tbl)) {
> +		p_sw = p_next_sw;
> +		p_next_sw = (cache_sw_t *) cl_qmap_next(&p_sw->map_item);
> +		__cache_sw_destroy(p_sw);
> +	}
> +	cl_qmap_remove_all(&p_cache->sw_tbl);
> +
> +	p_next_ca = (cache_ca_t *) cl_qmap_head(&p_cache->ca_tbl);
> +	while (p_next_ca != (cache_ca_t *) cl_qmap_end(&p_cache->ca_tbl)) {
> +		p_ca = p_next_ca;
> +		p_next_ca = (cache_ca_t *) cl_qmap_next(&p_ca->map_item);
> +		__cache_ca_destroy(p_ca);
> +	}
> +	cl_qmap_remove_all(&p_cache->ca_tbl);
> +
> +	p_cache->routing_valid = FALSE;
> +	p_cache->topology_valid = FALSE;
> +	p_cache->need_update = FALSE;
> +} /* __ucast_cache_invalidate() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static int
> +__ucast_cache_read_topology(osm_ucast_cache_t * p_cache)
> +{
> +	CL_ASSERT(p_cache && p_cache->p_ucast_mgr);
> +
> +	return __ucast_cache_populate(p_cache);
> +}

What is a reason to make this wrapper function?

> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_read_lid_matrices(osm_ucast_cache_t * p_cache)
> +{
> +	CL_ASSERT(p_cache && p_cache->p_ucast_mgr &&
> +		  p_cache->topology_valid);
> +
> +	if (p_cache->routing_valid)
> +		__ucast_cache_clear_routing(p_cache);

I see that this two lines are already presented in
osm_ucast_cache_refresh_lid_matrices() and it is only place where
__ucast_cache_read_lid_matrices() called.

For me it looks that whole logic could be simplified if you will have
separate reread_lfts() and reread_lid_matrices() primitives.

> +
> +	cl_qmap_apply_func(&p_cache->sw_tbl,
> +			   __ucast_cache_read_sw_lid_matrix,
> +			   (void *)p_cache);
> +	p_cache->routing_valid = TRUE;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_write_routing(osm_ucast_cache_t * p_cache)
> +{
> +	CL_ASSERT(p_cache && p_cache->p_ucast_mgr &&
> +		  p_cache->topology_valid && p_cache->routing_valid);
> +
> +	cl_qmap_apply_func(&p_cache->sw_tbl,
> +			   __ucast_cache_write_sw_routing,
> +			   (void *)p_cache);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_sw_clear_osm_ptr(cl_map_item_t * const p_map_item,
> +			       void *context)
> +{
> +	((cache_sw_t * const)p_map_item)->p_osm_sw = NULL;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static int
> +__ucast_cache_validate(osm_ucast_cache_t * p_cache)

For me it seems that whole validation can be better (and faster)
performed during subnet discovery, many things are already there
(details are below).

Actually I thought already about having something like osm_sm.sweep_stat
bitmask instead of just osm_sm.master_sm_found to indicate various events
which were found during discovery.

> +{
> +	osm_switch_t * p_osm_sw;
> +	osm_node_t   * p_osm_node;
> +	osm_node_t   * p_remote_osm_node;
> +	osm_physp_t  * p_osm_physp;
> +	osm_physp_t  * p_remote_osm_physp;
> +	cache_sw_t   * p_sw;
> +	cache_sw_t   * p_remote_sw;
> +	cache_ca_t   * p_remote_ca;
> +	uint16_t lid_ho;
> +	uint16_t remote_lid_ho;
> +	uint8_t remote_node_type;
> +	unsigned num_ports;
> +	unsigned i;
> +	int res = UCAST_CACHE_TOPOLOGY_MATCH;
> +	boolean_t fabric_link_exists;
> +	osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +	cl_qmap_t * p_osm_sw_guid_tbl;
> +
> +	OSM_LOG_ENTER(p_log);
> +
> +	p_osm_sw_guid_tbl = &p_cache->p_ucast_mgr->p_subn->sw_guid_tbl;
> +
> +	if (cl_qmap_count(p_osm_sw_guid_tbl) >
> +	    cl_qmap_count(&p_cache->sw_tbl)) {
> +		OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +			"current subnet has more switches than the cache - "
> +			"cache is invalid\n");
> +		res |= UCAST_CACHE_TOPOLOGY_MORE_SWITCHES;
> +		goto Exit;
> +	}
> +
> +	if (cl_qmap_count(p_osm_sw_guid_tbl) <
> +	    cl_qmap_count(&p_cache->sw_tbl)) {
> +		OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +			"current subnet has less switches than the cache - "
> +			"continuing validation\n");
> +		res |= UCAST_CACHE_TOPOLOGY_LESS_SWITCHES;
> +	}

This handled already in drop manager.

> +
> +	/* Clear the pointers to osm switch on all the cached switches.
> +	   These pointers might be invalid right now: some cached switch
> +	   might be missing in the real subnet, and some missing switch
> +	   might reappear, such as in case of switch reboot. */
> +	cl_qmap_apply_func(&p_cache->sw_tbl, __ucast_cache_sw_clear_osm_ptr,
> +			   NULL);
> +
> +
> +	for (p_osm_sw = (osm_switch_t *) cl_qmap_head(p_osm_sw_guid_tbl);
> +	     p_osm_sw != (osm_switch_t *) cl_qmap_end(p_osm_sw_guid_tbl);
> +	     p_osm_sw = (osm_switch_t *) cl_qmap_next(&p_osm_sw->map_item)) {
> +
> +		lid_ho = cl_ntoh16(osm_node_get_base_lid(p_osm_sw->p_node,0));
> +		p_sw = __ucast_cache_get_sw(p_cache, lid_ho);
> +		if (!p_sw) {
> +			OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +				"new lid (0x%04x)is in the fabric - "
> +				"cache is invalid\n", lid_ho);
> +			res |= UCAST_CACHE_TOPOLOGY_NEW_LID;
> +			goto Exit;
> +		}

New ports are tracked already with 'is_new' field of osm_port structure
(it is necessary anyway for port in/out traps sending).

> +
> +		p_sw->p_osm_sw = p_osm_sw;
> +
> +		/* scan all the ports and check if the cache is valid */
> +
> +		p_osm_node = p_osm_sw->p_node;
> +		num_ports = osm_node_get_num_physp(p_osm_node);
> +
> +		/* skipping port 0 on switches */
> +		for (i = 1; i < num_ports; i++) {
> +			p_osm_physp = osm_node_get_physp_ptr(p_osm_node, i);
> +
> +			fabric_link_exists = FALSE;
> +			if (p_osm_physp &&
> +			    osm_physp_is_valid(p_osm_physp) &&

osm_node_get_physp_ptr() returns NULL if port is not "valid".

> +			    osm_link_is_healthy(p_osm_physp)) {
> +				p_remote_osm_physp =
> +					osm_physp_get_remote(p_osm_physp);
> +				if (p_remote_osm_physp &&
> +				    osm_physp_is_valid(p_remote_osm_physp) &&
> +				    osm_link_is_healthy(p_remote_osm_physp))
> +					fabric_link_exists = TRUE;
> +			}
> +
> +			if (!fabric_link_exists && !p_sw->ports[i])
> +				continue;
> +
> +			if (fabric_link_exists && !p_sw->ports[i]) {
> +				OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +					"lid 0x%04x, port %d, link exists "
> +					"in the fabric, but not cached - "
> +					"cache is invalid\n",
> +					lid_ho, i);
> +				res |= UCAST_CACHE_TOPOLOGY_LINK_ADDED;
> +				goto Exit;
> +			}
> +
> +			if (!fabric_link_exists && p_sw->ports[i]){
> +				/*
> +				 * link exists in cache, but missing
> +				 * in current fabric
> +				 */
> +				if (p_sw->ports[i]->remote_node_type ==
> +				    IB_NODE_TYPE_SWITCH) {
> +					p_remote_sw =
> +					    p_sw->ports[i]->remote_node.p_sw;
> +					/* cache is allowed to have a
> +					   leaf switch that is missing
> +					   in the current subnet */
> +					if (!p_remote_sw->is_leaf) {
> +						OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +							"lid 0x%04x, port %d, "
> +							"fabric is missing a link "
> +							"to non-leaf switch - "
> +							"cache is invalid\n",
> +							lid_ho, i);
> +						res |= UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING;
> +						goto Exit;
> +					}
> +					else {
> +						OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +							"lid 0x%04x, port %d, "
> +							"fabric is missing a link "
> +							"to leaf switch - "
> +							"continuing validation\n",
> +							lid_ho, i);
> +						res |= UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING;
> +						continue;
> +					}
> +				}
> +				else {
> +					/* this means that link to
> +					   non-switch node is missing */
> +					CL_ASSERT(p_sw->is_leaf);
> +					OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +						"lid 0x%04x, port %d, "
> +						"fabric is missing a link "
> +						"to CA - "
> +						"continuing validation\n",
> +						lid_ho, i);
> +					res |= UCAST_CACHE_TOPOLOGY_LINK_TO_CA_MISSING;
> +					continue;
> +				}
> +			}

I think all this can be tracked in port_info.

> +
> +			/*
> +			 * Link exists both in fabric and in cache.
> +			 * Compare remote nodes.
> +			 */
> +
> +			p_remote_osm_node =
> +				osm_physp_get_node_ptr(p_remote_osm_physp);
> +			if (!p_remote_osm_node) {
> +				/* No node for remote port!
> +				   Something wrong is going on here,
> +				    so we better not use cache... */
> +				OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A53: "
> +					"lid 0x%04x, port %d, "
> +					"no node for remote port - "
> +					"cache mismatch\n",
> +					lid_ho, i);
> +				res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +				goto Exit;
> +			}
> +
> +			remote_node_type =
> +				osm_node_get_type(p_remote_osm_node);
> +
> +			if (remote_node_type !=
> +			    p_sw->ports[i]->remote_node_type) {
> +				/* remote node type in the current fabric
> +				   differs from the cached one - looks like
> +				   node was replaced by something else */
> +				OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +					"lid 0x%04x, port %d, "
> +					"remote node type mismatch - "
> +					"cache is invalid\n",
> +					lid_ho, i);
> +				res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +				goto Exit;
> +			}

Why are nodes and not ports compared? Will this handle the case when CA
port 1 was disconnected and port 2 connected by the same cable (and will
get another LID value)?

> +
> +			if (remote_node_type == IB_NODE_TYPE_SWITCH) {
> +				remote_lid_ho =
> +					cl_ntoh16(osm_node_get_base_lid(
> +						p_remote_osm_node, 0));
> +
> +				p_remote_sw = __ucast_cache_get_sw(
> +					p_cache,
> +					remote_lid_ho);

And if switch was changed, but the same LID value reassigned for some
reason?

Wouldn't it be easier to compare port GUIDs?

> +
> +				if (!p_remote_sw) {
> +					OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +						"lid 0x%04x, "
> +						"new switch in the fabric - "
> +						"cache is invalid\n",
> +						remote_lid_ho);
> +					res |= UCAST_CACHE_TOPOLOGY_NEW_SWITCH;
> +					goto Exit;
> +				}
> +
> +				if (p_sw->ports[i]->remote_node.p_sw !=
> +				    p_remote_sw) {
> +					/* remote cached switch that pointed
> +					   by the port is not equal to the
> +					   switch that was obtained for the
> +					   remote lid - link was changed */
> +					OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +						"lid 0x%04x, port %d, "
> +						"link location changed "
> +						"(remote node mismatch) - "
> +						"cache is invalid\n",
> +						lid_ho, i);
> +					res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +					goto Exit;
> +				}

Could you elaborate, when will this be possible? (I'm starting to miss
things :( )

> +			}
> +			else {
> +				if (!p_sw->is_leaf) {
> +					/* remote node type is CA, but the
> +					   cached switch is not marked as
> +					   leaf - something has changed */
> +					OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +						"lid 0x%04x, port %d, "
> +						"link changed - "
> +						"cache is invalid\n",
> +						lid_ho, i);
> +					res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +					goto Exit;
> +				}
> +
> +				remote_lid_ho =
> +					cl_ntoh16(osm_physp_get_base_lid(
> +						p_remote_osm_physp));
> +
> +				p_remote_ca = __ucast_cache_get_ca(
> +					p_cache, remote_lid_ho);
> +
> +				if (!p_remote_ca) {
> +					/* new lid is in the fabric -
> +					   cache is invalid */
> +					OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +						"lid 0x%04x, port %d, "
> +						"new CA in the fabric "
> +						"(lid 0x%04x) - "
> +						"cache is invalid\n",
> +						lid_ho, i, remote_lid_ho);
> +					res |= UCAST_CACHE_TOPOLOGY_NEW_CA;
> +					goto Exit;
> +				}
> +
> +				if (p_sw->ports[i]->remote_node.p_ca !=
> +				    p_remote_ca) {
> +					/* remote cached CA that pointed
> +					   by the port is not equal to the
> +					   CA that was obtained for the
> +					   remote lid - link was changed */
> +					OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +						"lid 0x%04x, port %d, "
> +						"link to CA (lid 0x%04x) "
> +						"has changed - "
> +						"cache is invalid\n",
> +						lid_ho, i, remote_lid_ho);
> +					res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +					goto Exit;
> +				}
> +			}
> +		} /* done comparing the ports of the switch */

I think it will be *much* easies to track in osm_port_info.c - look at
osm_physp's and osm_switch's need_update flag setup, and also where
osm_node_unlink() called.

> +	} /* done comparing all the switches */
> +
> +	/* At this point we have four possible flags on:
> +	   1. UCAST_CACHE_TOPOLOGY_MATCH
> +	      We have a perfect topology match to the cache
> +	   2. UCAST_CACHE_TOPOLOGY_LESS_SWITCHES
> +	      Cached topology has one or more switches that do not exist
> +	      in the current topology. There are two types of such switches:
> +	      leaf switches and the regular switches. But if some regular
> +	      switch was missing, we would exit the comparison with the
> +	      UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING flag, so if some switch
> +	      in the topology is missing, it has to be leaf switch.
> +	   3. UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING
> +	      One or more link to leaf switches are missing in the current
> +	      topology.
> +	   4. UCAST_CACHE_TOPOLOGY_LINK_TO_CA_MISSING
> +	      One or more CAs are missing in the current topology.
> +	   In all these cases the cache is perfectly usable - it just might
> +	   have routing to unexisting lids. */
> +
> +	if (res & UCAST_CACHE_TOPOLOGY_LESS_SWITCHES) {
> +		/* if there are switches in the cache that don't exist
> +		   in the current topology, make sure that they are
> +		   all leaf switches, otherwise cache is useless */
> +		for (p_sw = (cache_sw_t *) cl_qmap_head(&p_cache->sw_tbl);
> +		     p_sw != (cache_sw_t *) cl_qmap_end(&p_cache->sw_tbl);
> +		     p_sw = (cache_sw_t *) cl_qmap_next(&p_sw->map_item)) {
> +			if (!p_sw->p_osm_sw && !p_sw->is_leaf) {
> +				OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +					"non-leaf switch in the fabric is "
> +					"missing - cache is invalid\n");
> +				res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +				goto Exit;
> +			}
> +		}
> +	}
> +
> +	if ((res & UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING) &&
> +	    !(res & UCAST_CACHE_TOPOLOGY_LESS_SWITCHES)) {
> +		/* some link to leaf switch is missing, but there are
> +		   no missing switches - link failure or topology
> +		   changes, which means that we probably shouldn't
> +		   use the cache here */
> +		OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +			"topology change - cache is invalid\n");
> +		res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +		goto Exit;
> +	}
> +
> +    Exit:
> +	OSM_LOG_EXIT(p_log);
> +	return res;
> +
> +} /* __ucast_cache_validate() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +int
> +osm_ucast_cache_apply(osm_ucast_cache_t * p_cache)
> +{
> +	int res = 0;
> +	osm_log_t * p_log;
> +
> +	if (!p_cache)
> +		return 1;
> +
> +	p_log = p_cache->p_ucast_mgr->p_log;
> +
> +	OSM_LOG_ENTER(p_log);
> +	if (!p_cache->topology_valid) {
> +		OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +			"unicast cache is empty - can't "
> +			"use it on this sweep\n");
> +		res = UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +		goto Exit;
> +	}
> +
> +	if (!p_cache->routing_valid) {
> +		OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A55: "
> +			"cached routing invalid\n");
> +		OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +			"invalidating cache\n");
> +		__ucast_cache_invalidate(p_cache);
> +		res = UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +		goto Exit;
> +	}
> +
> +	res = __ucast_cache_validate(p_cache);
> +
> +	if ((res & UCAST_CACHE_TOPOLOGY_NO_MATCH          ) ||
> +	    (res & UCAST_CACHE_TOPOLOGY_MORE_SWITCHES     ) ||
> +	    (res & UCAST_CACHE_TOPOLOGY_LINK_ADDED        ) ||
> +	    (res & UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING) ||
> +	    (res & UCAST_CACHE_TOPOLOGY_NEW_SWITCH        ) ||
> +	    (res & UCAST_CACHE_TOPOLOGY_NEW_CA            ) ||
> +	    (res & UCAST_CACHE_TOPOLOGY_NEW_LID           ) ||
> +	    (res & UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING)) {

Why to not make single return status?

> +		/* The change in topology doesn't allow us to use the.
> +		   existing cache. Cache should be invalidated, and new
> +		   cache should be built after the routing recalculation. */
> +		OSM_LOG(p_log, OSM_LOG_INFO,
> +			"changes in topology (0x%x) - "
> +			"invalidating cache\n", res);
> +		__ucast_cache_invalidate(p_cache);
> +		goto Exit;
> +	}
> +
> +	OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +		"cache is valid (status 0x%04x) - using the cached routing\n",res);
> +
> +	/* existing cache can be used - write back the cached routing */
> +	__ucast_cache_write_routing(p_cache);
> +
> +	/*
> +	 * ToDo: Detailed result of the topology comparison will
> +	 * ToDo: be needed later for the Incremental Routing,
> +	 * ToDo: where based on this result, the routing algorithm
> +	 * ToDo: will try to route "around" the missing components.
> +	 * ToDo: For now - reset the result whenever the cache
> +	 * ToDo: is valid.
> +	 */
> +	res = 0;
> +
> +    Exit:
> +	OSM_LOG_EXIT(p_log);
> +	return res;
> +} /* osm_ucast_cache_apply() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void osm_ucast_cache_set_sw_fwd_table(osm_ucast_cache_t * p_cache,
> +				      uint8_t * ucast_mgr_lft_buf,
> +				      osm_switch_t * p_osm_sw)
> +{
> +	uint16_t lid_ho =
> +		cl_ntoh16(osm_node_get_base_lid(p_osm_sw->p_node, 0));
> +	cache_sw_t * p_sw = __ucast_cache_get_sw(p_cache, lid_ho);
> +
> +	OSM_LOG_ENTER(p_cache->p_ucast_mgr->p_log);
> +
> +	OSM_LOG(p_cache->p_ucast_mgr->p_log, OSM_LOG_VERBOSE,
> +		"caching lft for switch 0x%04x\n",
> +		lid_ho);
> +
> +	if (!p_sw || !p_sw->p_osm_sw) {
> +		OSM_LOG(p_cache->p_ucast_mgr->p_log, OSM_LOG_ERROR,
> +			"ERR 3A57: "
> +			"fabric switch 0x%04x %s in the unicast cache\n",
> +			lid_ho,
> +			(p_sw) ? "is not initialized" : "doesn't exist");
> +		goto Exit;
> +	}
> +
> +	CL_ASSERT(p_sw->p_osm_sw == p_osm_sw);
> +	CL_ASSERT(!p_sw->lft_buff);
> +
> +	p_sw->max_lid_ho = p_osm_sw->max_lid_ho;
> +
> +	/* allocate linear forwarding table buffer and fill it */
> +	p_sw->lft_buff = (uint8_t *)malloc(IB_LID_UCAST_END_HO + 1);
> +	memcpy(p_sw->lft_buff, p_cache->p_ucast_mgr->lft_buf,
> +	       IB_LID_UCAST_END_HO + 1);
> +
> +    Exit:
> +	OSM_LOG_EXIT(p_cache->p_ucast_mgr->p_log);
> +} /* osm_ucast_cache_set_sw_fwd_table() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void osm_ucast_cache_refresh_topo(osm_ucast_cache_t * p_cache)
> +{
> +	osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +	OSM_LOG_ENTER(p_log);
> +
> +	OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +		"starting ucast cache topology refresh\n");
> +
> +	if (p_cache->topology_valid) {
> +		OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +			"invalidating existing ucast cache\n");
> +		__ucast_cache_invalidate(p_cache);
> +	}
> +
> +	OSM_LOG(p_log, OSM_LOG_VERBOSE, "caching topology\n");
> +
> +	if (__ucast_cache_read_topology(p_cache) != 0) {
> +		OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A56: "
> +			"cache population failed\n");
> +		__ucast_cache_invalidate(p_cache);
> +		goto Exit;
> +	}
> +
> +	OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +		"ucast cache topology refresh done\n");
> +    Exit:
> +	OSM_LOG_EXIT(p_log);
> +} /* osm_ucast_cache_refresh_topo() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void osm_ucast_cache_refresh_lid_matrices(osm_ucast_cache_t * p_cache)
> +{
> +	osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +	OSM_LOG_ENTER(p_log);
> +
> +	OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +		"starting ucast cache lid matrices refresh\n");
> +
> +	if (!p_cache->topology_valid) {
> +		OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A54: "
> +			"cached topology is invalid\n");
> +		goto Exit;
> +	}
> +
> +	if (p_cache->routing_valid) {
> +		OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +			"invalidating existing ucast routing cache\n");
> +		__ucast_cache_clear_routing(p_cache);
> +	}
> +
> +	OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +		"caching lid matrices\n");
> +
> +	__ucast_cache_read_lid_matrices(p_cache);
> +
> +	OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +		"ucast cache lid matrices refresh done\n");
> +    Exit:
> +	OSM_LOG_EXIT(p_log);
> +} /* osm_ucast_cache_refresh_lid_matrices() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +osm_ucast_cache_t *
> +osm_ucast_cache_construct(osm_ucast_mgr_t * const p_mgr)
> +{
> +	if (p_mgr->p_subn->opt.lmc > 0) {
> +		OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A50: "
> +			"Unicast cache is not supported for LMC>0\n");
> +		return NULL;
> +	}
> +
> +	osm_ucast_cache_t * p_cache =
> +		(osm_ucast_cache_t*)malloc(sizeof(osm_ucast_cache_t));
> +	if (!p_cache)
> +		return NULL;
> +
> +	memset(p_cache, 0, sizeof(osm_ucast_cache_t));
> +
> +	cl_qmap_init(&p_cache->sw_tbl);
> +	cl_qmap_init(&p_cache->ca_tbl);
> +	p_cache->p_ucast_mgr = p_mgr;
> +
> +	return p_cache;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void
> +osm_ucast_cache_destroy(osm_ucast_cache_t * p_cache)
> +{
> +	if (!p_cache)
> +		return;
> +	__ucast_cache_invalidate(p_cache);
> +	free(p_cache);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> -- 
> 1.5.1.4
> 



More information about the general mailing list