[ofa-general] Re: [PATCH 3/3] OpenSM: Fix incorrect identification of routing engine used

Sasha Khapyorsky sashak at voltaire.com
Sat Dec 22 04:42:28 PST 2007


Hi Al,

Some notes below...

On 16:41 Tue 18 Dec     , Al Chu wrote:
> Hey Sasha,
> 
> And like my previous serious of patches, this patch 3/3 fixes several
> locations in the code that incorrectly determined what routing algorithm
> was used to route the subnet.
> 
> Thanks,
> Al
> -- 
> Albert Chu
> chu11 at llnl.gov
> 925-422-5311
> Computer Scientist
> High Performance Systems Division
> Lawrence Livermore National Laboratory

> From 4691381e56879de190c4968bd1e90c92995e3ac0 Mon Sep 17 00:00:00 2001
> From: Albert L. Chu <chu11 at llnl.gov>
> Date: Tue, 18 Dec 2007 10:48:02 -0800
> Subject: [PATCH] fix incorrect identification of routing engine
> 
> 
> Signed-off-by: Albert L. Chu <chu11 at llnl.gov>
> ---
>  opensm/opensm/osm_dump.c           |    6 ++++--
>  opensm/opensm/osm_sa_path_record.c |   21 ++++++++++++---------
>  opensm/opensm/osm_ucast_lash.c     |    6 +++++-
>  3 files changed, 21 insertions(+), 12 deletions(-)
> 
> diff --git a/opensm/opensm/osm_dump.c b/opensm/opensm/osm_dump.c
> index fa07f83..ed948d3 100644
> --- a/opensm/opensm/osm_dump.c
> +++ b/opensm/opensm/osm_dump.c
> @@ -49,6 +49,7 @@
>  #include <iba/ib_types.h>
>  #include <complib/cl_qmap.h>
>  #include <complib/cl_debug.h>
> +#include <complib/cl_passivelock.h>
>  #include <opensm/osm_opensm.h>
>  #include <opensm/osm_log.h>
>  #include <opensm/osm_node.h>
> @@ -150,8 +151,9 @@ static void dump_ucast_routes(cl_map_item_t * p_map_item, void *cxt)
>  		"LID    : Port : Hops : Optimal\n",
>  		cl_ntoh64(osm_node_get_node_guid(p_node)));
>  
> -	dor = (p_osm->routing_engine.name &&
> -	       (strcmp(p_osm->routing_engine.name, "dor") == 0));
> +        cl_plock_acquire(&p_osm->lock);
> +	dor = (p_osm->routing_engine_used == OSM_ROUTING_ENGINE_TYPE_DOR);
> +        cl_plock_release(&p_osm->lock);
>  
>  	for (lid_ho = 1; lid_ho <= max_lid_ho; lid_ho++) {
>  		fprintf(file, "0x%04X : ", lid_ho);
> diff --git a/opensm/opensm/osm_sa_path_record.c b/opensm/opensm/osm_sa_path_record.c
> index 4f20d8e..efd7c41 100644
> --- a/opensm/opensm/osm_sa_path_record.c
> +++ b/opensm/opensm/osm_sa_path_record.c
> @@ -240,6 +240,7 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
>  	const osm_physp_t *p_src_physp;
>  	const osm_physp_t *p_dest_physp;
>  	const osm_prtn_t *p_prtn = NULL;
> +	osm_opensm_t *p_osm;
>  	const ib_port_info_t *p_pi;
>  	ib_api_status_t status = IB_SUCCESS;
>  	ib_net16_t pkey;
> @@ -256,6 +257,7 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
>  	ib_slvl_table_t *p_slvl_tbl = NULL;
>  	osm_qos_level_t *p_qos_level = NULL;
>  	uint16_t valid_sl_mask = 0xffff;
> +	int is_lash;
>  
>  	OSM_LOG_ENTER(p_rcv->p_log, __osm_pr_rcv_get_path_parms);
>  
> @@ -266,6 +268,8 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
>  	p_src_physp = p_physp;
>  	p_pi = &p_physp->port_info;
>  
> +	p_osm = p_rcv->p_subn->p_osm;
> +
>  	mtu = ib_port_info_get_mtu_cap(p_pi);
>  	rate = ib_port_info_compute_rate(p_pi);
>  
> @@ -733,6 +737,10 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
>  	 * Set PathRecord SL.
>  	 */
>  
> +	cl_plock_acquire(&p_osm->lock);
> +	is_lash = (p_osm->routing_engine_used == OSM_ROUTING_ENGINE_TYPE_LASH);	
> +	cl_plock_release(&p_osm->lock);
> +

Here p_osm->lock is grabbed already (look at the caller 
osm_pr_rcv_process() - there p_rcv->p_lock refers &p_sm->lock (again,
I must admit that in OpenSM a lock and another pointers are copied over
various structures multiple times, this mess makes it very hard to track
where and which lock is actually used and we need to improve this)).

On Linux OpenSM doesn't deadlock here because cl_plock_acquire() is
implemented via pthread_rwlock_rdlock() (read-only lock which can be
acquired multiple times), on other systems or with another
cl_plock_acquire() implementation it could be not a true.

So I'm removing this locking here and in another "already locked"
sections.

>  	if (comp_mask & IB_PR_COMPMASK_SL) {
>  		/*
>  		 * Specific SL was requested
> @@ -750,10 +758,8 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
>  			goto Exit;
>  		}
>  
> -		if (p_rcv->p_subn->opt.routing_engine_name &&
> -		    strcmp(p_rcv->p_subn->opt.routing_engine_name, "lash") == 0
> -		    && osm_get_lash_sl(p_rcv->p_subn->p_osm, p_src_port,
> -				       p_dest_port) != sl) {
> +		if (is_lash
> +		    && osm_get_lash_sl(p_osm, p_src_port, p_dest_port) != sl) {
>  			osm_log(p_rcv->p_log, OSM_LOG_ERROR,
>  				"__osm_pr_rcv_get_path_parms: ERR 1F23: "
>  				"Required PathRecord SL (%u) doesn't "
> @@ -762,16 +768,13 @@ __osm_pr_rcv_get_path_parms(IN osm_pr_rcv_t * const p_rcv,
>  			goto Exit;
>  		}
>  
> -	} else if (p_rcv->p_subn->opt.routing_engine_name &&
> -		   strcmp(p_rcv->p_subn->opt.routing_engine_name,
> -			  "lash") == 0) {
> +	} else if (is_lash) {
>  		/*
>  		 * No specific SL in PathRecord request.
>  		 * If it's LASH routing - use its SL.
>  		 * slid and dest_lid are stored in network in lash.
>  		 */
> -		sl = osm_get_lash_sl(p_rcv->p_subn->p_osm,
> -				     p_src_port, p_dest_port);
> +		sl = osm_get_lash_sl(p_osm, p_src_port, p_dest_port);
>  
>  	} else if (p_qos_level && p_qos_level->sl_set) {
>  		/*
> diff --git a/opensm/opensm/osm_ucast_lash.c b/opensm/opensm/osm_ucast_lash.c
> index 10dda3a..71e8b56 100644
> --- a/opensm/opensm/osm_ucast_lash.c
> +++ b/opensm/opensm/osm_ucast_lash.c
> @@ -1425,8 +1425,12 @@ uint8_t osm_get_lash_sl(osm_opensm_t * p_osm, osm_port_t * p_src_port,
>  	unsigned src_id;
>  	osm_switch_t *p_sw;
>  
> -	if (p_osm->routing_engine.ucast_build_fwd_tables != lash_process)
> +	cl_plock_acquire(&p_osm->lock);
> +	if (p_osm->routing_engine_used != OSM_ROUTING_ENGINE_TYPE_LASH) {
> +		cl_plock_release(&p_osm->lock);
>  		return OSM_DEFAULT_SL;
> +	}
> +	cl_plock_release(&p_osm->lock);

The same story is here too.

Sasha

>  
>  	p_sw = get_osm_switch_from_port(p_dst_port);
>  	if (!p_sw)
> -- 
> 1.5.1
> 




More information about the general mailing list