[openib-general] [PATCH] osm: partition manager force policy

Sasha Khapyorsky sashak at voltaire.com
Thu Jun 15 04:06:17 PDT 2006


Hi Eitan,

Some comments about the patch.

Personally I'm glad to see that you are using tab instead of spaces as
identaion character. But it would be nice if next time you will not mix
the functional changes and identaion fixes in the same patch, but instead
will provide two different patches. Also it would be nice if your
identation fixes will cover whole file(s) and not just selected lines.

The same is about massive code moving, the patch separation may simplify
review.

The rest is below.

On 15:54 Tue 13 Jun     , Eitan Zahavi wrote:
> --text follows this line--
> Hi Hal
> 
> This is a second take after debug and cleanup of the partition manager
> patch I have previously provided. The functionality is the same but
> this one is after 2 days of testing on the simulator.
> I also did some code restructuring for clarity. 
> 
> Tests passed were both dedicated pkey enforcements (pkey.*) and
> stress test (osmStress.*)
> 
> As I started to test the partition manager code (using ibmgtsim pkey test),
> I realized the implementation does not really enforces the partition policy
> on the given fabric. This patch fixes that. It was verified using the 
> simulation test. Several other corner cases were fixed too.
> 
> Eitan
> 
> Signed-off-by:  Eitan Zahavi <eitan at mellanox.co.il>
> 
> Index: include/opensm/osm_port.h
> ===================================================================
> --- include/opensm/osm_port.h	(revision 7867)
> +++ include/opensm/osm_port.h	(working copy)
> @@ -586,6 +586,39 @@ osm_physp_get_pkey_tbl( IN const osm_phy
>  *  Port, Physical Port
>  *********/
>  
> +/****f* OpenSM: Physical Port/osm_physp_get_mod_pkey_tbl
> +* NAME
> +*  osm_physp_get_mod_pkey_tbl
> +*
> +* DESCRIPTION
> +*  Returns a NON CONST pointer to the P_Key table object of the Physical Port object.
> +*
> +* SYNOPSIS
> +*/
> +static inline osm_pkey_tbl_t *
> +osm_physp_get_mod_pkey_tbl( IN osm_physp_t* const p_physp )
> +{
> +  CL_ASSERT( osm_physp_is_valid( p_physp ) );
> +  /*
> +    (14.2.5.7) - the block number valid values are 0-2047, and are further
> +    limited by the size of the P_Key table specified by the PartitionCap on the node. 
> +  */
> +  return( &p_physp->pkeys );
> +};
> +/*
> +* PARAMETERS
> +*  p_physp
> +*     [in] Pointer to an osm_physp_t object.
> +*
> +* RETURN VALUES
> +*  The pointer to the P_Key table object.
> +*
> +* NOTES
> +*
> +* SEE ALSO
> +*  Port, Physical Port
> +*********/
> +

Is not this simpler to remove 'const' from existing
osm_physp_get_pkey_tbl() function instead of using new one?

>  /****f* OpenSM: Physical Port/osm_physp_set_slvl_tbl
>  * NAME
>  *	osm_physp_set_slvl_tbl
> Index: include/opensm/osm_pkey.h
> ===================================================================
> --- include/opensm/osm_pkey.h	(revision 7867)
> +++ include/opensm/osm_pkey.h	(working copy)
> @@ -92,6 +92,9 @@ typedef struct _osm_pkey_tbl
>    cl_ptr_vector_t blocks;
>    cl_ptr_vector_t new_blocks;
>    cl_map_t        keys;
> +  cl_qlist_t      pending;
> +  uint16_t        used_blocks;
> +  uint16_t        max_blocks;
>  } osm_pkey_tbl_t;
>  /*
>  * FIELDS
> @@ -104,6 +107,18 @@ typedef struct _osm_pkey_tbl
>  *	keys
>  *		A set holding all keys
>  *
> +*  pending
> +*     A list osm_pending_pkey structs that is temporarily set by the 
> +*     pkey mgr and used during pkey mgr algorithm only
> +*
> +*  used_blocks
> +*     Tracks the number of blocks having non-zero pkeys
> +*
> +*  max_blocks
> +*     The maximal number of blocks this partition table might hold
> +*     this value is based on node_info (for port 0 or CA) or switch_info
> +*     updated on receiving the node_info or switch_info GetResp
> +*
>  * NOTES
>  * 'blocks' vector should be used to store pkey values obtained from
>  * the port and SM pkey manager should not change it directly, for this
> @@ -114,6 +129,39 @@ typedef struct _osm_pkey_tbl
>  *
>  *********/
>  
> +/****s* OpenSM: osm_pending_pkey_t
> +* NAME
> +*	osm_pending_pkey_t
> +*
> +* DESCRIPTION
> +*	This objects stores temporary information on pkeys their target block and index
> +*  during the pkey manager operation
> +*
> +* SYNOPSIS
> +*/
> +typedef struct _osm_pending_pkey {
> +  cl_list_item_t list_item;
> +  uint16_t		  pkey;
> +  uint32_t		  block;
> +  uint8_t		  index;
> +  boolean_t		  is_new;
> +} osm_pending_pkey_t;
> +/*
> +* FIELDS
> +*	pkey
> +*		The actual P_Key
> +*
> +*	block
> +*		The block index based on the previous table extracted from the device
> +*
> +*	index
> +*		The index of the pky within the block
> +*
> +*  is_new
> +*     TRUE for new P_Keys such that the block and index are invalid in that case
> +*
> +*********/
> +
>  /****f* OpenSM: osm_pkey_tbl_construct
>  * NAME
>  *  osm_pkey_tbl_construct
> @@ -209,8 +257,8 @@ osm_pkey_tbl_get_num_blocks( 
>  static inline ib_pkey_table_t *osm_pkey_tbl_block_get( 
>    const osm_pkey_tbl_t *p_pkey_tbl, uint16_t block)
>  {
> -  CL_ASSERT(block < cl_ptr_vector_get_size(&p_pkey_tbl->blocks));
> -  return(cl_ptr_vector_get(&p_pkey_tbl->blocks, block));
> +	return( (block < cl_ptr_vector_get_size(&p_pkey_tbl->blocks)) ?
> +			  cl_ptr_vector_get(&p_pkey_tbl->blocks, block) : NULL);
>  };
>  /*
>  *  p_pkey_tbl
> @@ -244,6 +292,106 @@ static inline ib_pkey_table_t *osm_pkey_
>  /*
>   *********/
>  
> +
> +/****f* OpenSM: osm_pkey_tbl_make_block_pair
> +* NAME
> +*  osm_pkey_tbl_make_block_pair
> +*
> +* DESCRIPTION
> +*  Find or create a pair of "old" and "new" blocks for the
> +*  given block index
> +*
> +* SYNOPSIS
> +*/
> +int osm_pkey_tbl_make_block_pair( 
> +	osm_pkey_tbl_t   *p_pkey_tbl, 
> +	uint16_t          block_idx,
> +	ib_pkey_table_t **pp_old_block,
> +	ib_pkey_table_t **pp_new_block);
> +/*
> +* p_pkey_tbl
> +*   [in] Pointer to the PKey table 
> +*
> +* block_idx
> +*   [in] The block index to use
> +*
> +* pp_old_block
> +*   [out] Pointer to the old block pointer arg
> +*
> +* pp_new_block
> +*   [out] Pointer to the new block pointer arg
> +*
> +* RETURN VALUES
> +*   0 if OK 1 if failed

It is better (conventional) to use -1 as failure return status.

> +* 
> +*********/
> +
> +/****f* OpenSM: osm_pkey_tbl_set_new_entry
> +* NAME 
> +*  osm_pkey_tbl_set_new_entry
> +*
> +* DESCRIPTION
> +*   stores the given pkey in the "new" blocks array and update
> +*   the "map" to show that on the "old" blocks
> +*
> +* SYNOPSIS
> +*/
> +int
> +osm_pkey_tbl_set_new_entry( 
> +	IN osm_pkey_tbl_t *p_pkey_tbl,
> +	IN uint16_t        block_idx,
> +	IN uint8_t         pkey_idx,
> +	IN uint16_t        pkey);
> +/*
> +* p_pkey_tbl
> +*   [in] Pointer to the PKey table 
> +*
> +* block_idx
> +*   [in] The block index to use
> +*
> +* pkey_idx
> +*   [in] The index within the block
> +*
> +* pkey
> +*   [in] PKey to store
> +*
> +* RETURN VALUES
> +*   0 if OK 1 if failed

Ditto

> +* 
> +*********/
> +
> +/****f* OpenSM: osm_pkey_find_next_free_entry
> +* NAME
> +*  osm_pkey_find_next_free_entry
> +*
> +* DESCRIPTION
> +*  Find the next free entry in the PKey table. Starting at the given
> +*  index and block number. The user should increment pkey_idx before 
> +*  next call
> +*  Inspect the "new" blocks array for empty space.
> +*
> +* SYNOPSIS
> +*/
> +boolean_t
> +osm_pkey_find_next_free_entry(
> +	IN osm_pkey_tbl_t *p_pkey_tbl, 
> +	OUT uint16_t      *p_block_idx,
> +	OUT uint8_t       *p_pkey_idx);
> +/*
> +* p_pkey_tbl
> +*   [in] Pointer to the PKey table 
> +*
> +* p_block_idx
> +*   [out] The block index to use
> +*
> +* p_pkey_idx
> +*   [out] The index within the block to use
> +*
> +* RETURN VALUES
> +*   TRUE if found FALSE if did not find
> +* 
> +*********/
> +
>  /****f* OpenSM: osm_pkey_tbl_sync_new_blocks
>  * NAME
>  *  osm_pkey_tbl_sync_new_blocks
> @@ -263,9 +411,44 @@ void osm_pkey_tbl_sync_new_blocks( 
>  *
>  *********/
>  
> +/****f* OpenSM: osm_pkey_tbl_get_block_and_idx
> +* NAME
> +*  osm_pkey_tbl_get_block_and_idx
> +*
> +* DESCRIPTION
> +*  set the block index and pkey index the given
> +*  pkey is found in. return 1 if cound not find 
> +*  it, 0 if OK
> +*
> +* SYNOPSIS
> +*/
> +int
> +osm_pkey_tbl_get_block_and_idx(
> +  IN  osm_pkey_tbl_t *p_pkey_tbl, 
> +  IN  uint16_t       *p_pkey,
> +  OUT uint32_t       *block_idx,
> +  OUT uint8_t        *pkey_index);
> +/*
> +*  p_pkey_tbl
> +*     [in] Pointer to osm_pkey_tbl_t object.
> +*  
> +*  p_pkey
> +*     [in] Pointer to the P_Key entry searched
> +*
> +*  p_block_idx
> +*     [out] Pointer to the block index to be updated
> +*
> +*  p_pkey_idx 
> +*     [out] Pointer to the pkey index (in the block) to be updated
> +*
> +*
> +* NOTES
> +*
> +*********/
> +
>  /****f* OpenSM: osm_pkey_tbl_set
>  * NAME
>  *  osm_pkey_tbl_set
> Index: opensm/osm_pkey.c
> ===================================================================
> --- opensm/osm_pkey.c	(revision 7904)
> +++ opensm/osm_pkey.c	(working copy)
> @@ -100,6 +100,9 @@ int osm_pkey_tbl_init( 
>    cl_ptr_vector_init( &p_pkey_tbl->blocks, 0, 1);
>    cl_ptr_vector_init( &p_pkey_tbl->new_blocks, 0, 1);
>    cl_map_init( &p_pkey_tbl->keys, 1 );
> +	cl_qlist_init( &p_pkey_tbl->pending );
> +	p_pkey_tbl->used_blocks = 0;
> +	p_pkey_tbl->max_blocks = 0;
>    return(IB_SUCCESS);
>  }
>  
> @@ -118,14 +121,29 @@ void osm_pkey_tbl_sync_new_blocks(
>      p_block = cl_ptr_vector_get(&p_pkey_tbl->blocks, b);
>      if ( b < new_blocks )
>        p_new_block = cl_ptr_vector_get(&p_pkey_tbl->new_blocks, b);
> -    else {
> +		else 
> +      {
>        p_new_block = (ib_pkey_table_t *)malloc(sizeof(*p_new_block));
>        if (!p_new_block)
>          break;
> +			cl_ptr_vector_set(&((osm_pkey_tbl_t *)p_pkey_tbl)->new_blocks, 
> +									b, p_new_block);
> +		}
> +
>        memset(p_new_block, 0, sizeof(*p_new_block));
> -      cl_ptr_vector_set(&((osm_pkey_tbl_t *)p_pkey_tbl)->new_blocks, b, p_new_block);
>      }
> -    memcpy(p_new_block, p_block, sizeof(*p_new_block));
> +}

You changed this function so it does not do any sync anymore. Should
function name be changed too?

> +
> +/**********************************************************************
> + **********************************************************************/
> +void osm_pkey_tbl_cleanup_pending(
> +	IN osm_pkey_tbl_t *p_pkey_tbl)
> +{
> +	cl_list_item_t	*p_item;
> +	p_item = cl_qlist_remove_head( &p_pkey_tbl->pending );
> +	while (p_item != cl_qlist_end( &p_pkey_tbl->pending ) )
> +	{
> +		free( (osm_pending_pkey_t *)p_item );
>    }
>  }
>  
> @@ -202,6 +220,138 @@ int osm_pkey_tbl_set( 
>  
>  /**********************************************************************
>   **********************************************************************/
> +int osm_pkey_tbl_make_block_pair( 
> +	osm_pkey_tbl_t   *p_pkey_tbl, 
> +	uint16_t          block_idx,
> +	ib_pkey_table_t **pp_old_block,
> +	ib_pkey_table_t **pp_new_block)
> +{
> +	if (block_idx >= p_pkey_tbl->max_blocks) return 1;
> +
> +	if (pp_old_block)
> +	{
> +		*pp_old_block = osm_pkey_tbl_block_get( p_pkey_tbl, block_idx );
> +		if (! *pp_old_block)
> +		{
> +			*pp_old_block = (ib_pkey_table_t *)malloc(sizeof(ib_pkey_table_t));
> +			if (!*pp_old_block) return 1;
> +			memset(*pp_old_block, 0, sizeof(ib_pkey_table_t));
> +			cl_ptr_vector_set(&p_pkey_tbl->blocks, block_idx, *pp_old_block);
> +		}
> +	}
> +	
> +	if (pp_new_block)
> +	{
> +		*pp_new_block = osm_pkey_tbl_new_block_get( p_pkey_tbl, block_idx );
> +		if (! *pp_new_block)
> +		{
> +			*pp_new_block = (ib_pkey_table_t *)malloc(sizeof(ib_pkey_table_t));
> +			if (!*pp_new_block) return 1;
> +			memset(*pp_new_block, 0, sizeof(ib_pkey_table_t));
> +			cl_ptr_vector_set(&p_pkey_tbl->new_blocks, block_idx, *pp_new_block);
> +		}
> +	}
> +	return 0;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +/*
> +  store the given pkey in the "new" blocks array and update the "map"
> +  to show that on the "old" blocks
> +*/
> +int
> +osm_pkey_tbl_set_new_entry( 
> +	IN osm_pkey_tbl_t *p_pkey_tbl,
> +	IN uint16_t        block_idx,
> +	IN uint8_t         pkey_idx,
> +	IN uint16_t        pkey)
> +{  
> +	ib_pkey_table_t *p_old_block;
> +	ib_pkey_table_t *p_new_block;
> +	
> +	if (osm_pkey_tbl_make_block_pair(
> +			 p_pkey_tbl,  block_idx, &p_old_block, &p_new_block))
> +		return 1;
> +		
> +	cl_map_insert( &p_pkey_tbl->keys,
> +						ib_pkey_get_base(pkey),
> +						&(p_old_block->pkey_entry[pkey_idx]));

Here you map potentially empty pkey entry. Why? "old block" will be
remapped anyway on pkey receiving.

Actually I don't see why you want this pretty tricky and pkey_mgr
specific procedure as generic function.

> +	p_new_block->pkey_entry[pkey_idx] = pkey;
> +	if (p_pkey_tbl->used_blocks < block_idx)
> +		p_pkey_tbl->used_blocks = block_idx;
> +
> +	return 0;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +boolean_t
> +osm_pkey_find_next_free_entry(
> +	IN osm_pkey_tbl_t *p_pkey_tbl, 
> +	OUT uint16_t      *p_block_idx,
> +	OUT uint8_t       *p_pkey_idx)
> +{
> +	ib_pkey_table_t *p_new_block;
> +	
> +	CL_ASSERT(p_block_idx);
> +	CL_ASSERT(p_pkey_idx);
> +
> +	while ( *p_block_idx < p_pkey_tbl->max_blocks)
> +	{
> +		if (*p_pkey_idx > IB_NUM_PKEY_ELEMENTS_IN_BLOCK - 1)
> +		{
> +			*p_pkey_idx = 0;
> +			(*p_block_idx)++;
> +			if (*p_block_idx >= p_pkey_tbl->max_blocks) 
> +				return FALSE;
> +		}
> +
> +		p_new_block = osm_pkey_tbl_new_block_get( p_pkey_tbl, *p_block_idx);
> +
> +		if ( !p_new_block || 
> +			  ib_pkey_is_invalid(p_new_block->pkey_entry[*p_pkey_idx]))
> +			return TRUE;
> +		else
> +			(*p_pkey_idx)++;
> +	}
> +	return FALSE;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +int
> +osm_pkey_tbl_get_block_and_idx(
> +	IN	 osm_pkey_tbl_t *p_pkey_tbl,
> +	IN	 uint16_t		 *p_pkey,
> +	OUT uint32_t		 *p_block_idx,
> +	OUT uint8_t			 *p_pkey_index)
> +{
> +	uint32_t			  num_of_blocks;
> +	uint32_t			  block_index;
> +	ib_pkey_table_t *block;
> +
> +	CL_ASSERT( p_pkey_tbl );
> +	CL_ASSERT( p_block_idx != NULL );
> +	CL_ASSERT( p_pkey_idx != NULL );

Why last two CL_ASSERTs? What should be problem with uninitialized
pointers here?

> + 
> +	num_of_blocks = cl_ptr_vector_get_size( &p_pkey_tbl->blocks);
> +	for ( block_index = 0; block_index < num_of_blocks; block_index++ )
> +	{
> +		block = osm_pkey_tbl_block_get( p_pkey_tbl, block_index );
> +		if ( ( block->pkey_entry <= p_pkey ) &&
> +			  ( p_pkey < block->pkey_entry + IB_NUM_PKEY_ELEMENTS_IN_BLOCK))
> +		{
> +			*p_block_idx = block_index;
> +			*p_pkey_index = p_pkey - block->pkey_entry;
> +			return 0;
> +		}
> +	}
> +	return 1;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
>  static boolean_t __osm_match_pkey (
>    IN const ib_net16_t *pkey1,
>    IN const ib_net16_t *pkey2 ) {
> @@ -305,7 +455,8 @@ osm_physp_share_pkey(
>    if (cl_is_map_empty(&pkey_tbl1->keys) || cl_is_map_empty(&pkey_tbl2->keys))
>      return TRUE;
>  
> -  return !ib_pkey_is_invalid(osm_physp_find_common_pkey(p_physp_1, p_physp_2));
> +	return 
> +		!ib_pkey_is_invalid(osm_physp_find_common_pkey(p_physp_1, p_physp_2));
>  }
>  
>  /**********************************************************************
> @@ -321,7 +472,8 @@ osm_port_share_pkey(
>  
>    OSM_LOG_ENTER( p_log, osm_port_share_pkey );
>  
> -  if (!p_port_1 || !p_port_2) {
> +	if (!p_port_1 || !p_port_2)
> +	{
>  	ret = FALSE;
>  	goto Exit;
>    }
> @@ -329,7 +481,8 @@ osm_port_share_pkey(
>    p_physp1 = osm_port_get_default_phys_ptr(p_port_1);
>    p_physp2 = osm_port_get_default_phys_ptr(p_port_2);
>  
> -  if (!p_physp1 || !p_physp2) {
> +	if (!p_physp1 || !p_physp2)
> +	{
>  	ret = FALSE;
>  	goto Exit;
>    }
> Index: opensm/osm_pkey_mgr.c
> ===================================================================
> --- opensm/osm_pkey_mgr.c	(revision 7904)
> +++ opensm/osm_pkey_mgr.c	(working copy)
> @@ -62,6 +62,139 @@
>  
>  /**********************************************************************
>   **********************************************************************/
> +/*
> +  the max number of pkey blocks for a physical port is located in
> +  different place for switch external ports (SwitchInfo) and the
> +  rest of the ports (NodeInfo)
> +*/
> +static int pkey_mgr_get_physp_max_blocks(

I would suggest to add _cap_ to function name. Not too much critical
since it is static function.

> +	IN const osm_subn_t *p_subn,
> +	IN const osm_physp_t *p_physp)
> +{
> +	osm_node_t *p_node = osm_physp_get_node_ptr(p_physp);
> +	osm_switch_t *p_sw;
> +	uint16_t num_pkeys = 0;
> +
> +	if ( (osm_node_get_type(p_node) != IB_NODE_TYPE_SWITCH) ||
> +		  (osm_physp_get_port_num( p_physp ) == 0))
> +		num_pkeys = cl_ntoh16( p_node->node_info.partition_cap );
> +	else
> +	{
> +		p_sw = osm_get_switch_by_guid(p_subn, p_node->node_info.node_guid);
> +		if (p_sw)
> +			num_pkeys = cl_ntoh16( p_sw->switch_info.enforce_cap );
> +	}
> +	return( (num_pkeys + 31) / 32 );
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +/*
> + * Insert the new pending pkey entry to the specific port pkey table
> + * pending pkeys. new entries are inserted at the back.
> + */
> +static void pkey_mgr_process_physical_port(
> +	IN osm_log_t *p_log,
> +	IN const osm_req_t *p_req,
> +	IN const ib_net16_t pkey,
> +	IN osm_physp_t *p_physp )
> +{
> +	osm_node_t *p_node = osm_physp_get_node_ptr( p_physp );
> +	osm_pkey_tbl_t *p_pkey_tbl;
> +	ib_net16_t *p_orig_pkey;
> +	char *stat = NULL;
> +	osm_pending_pkey_t *p_pending;
> +
> +	p_pkey_tbl = osm_physp_get_mod_pkey_tbl( p_physp );
> +	if (! p_pkey_tbl)
           ^^^^^^^^^^^^^
Is it possible?

> +	{
> +		osm_log( p_log, OSM_LOG_ERROR,
> +					"pkey_mgr_process_physical_port: ERR 0501: "
> +					"No pkey table found for node "
> +					"0x%016" PRIx64 " port %u\n",
> +					cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> +					osm_physp_get_port_num( p_physp ) );
> +		return;
> +	}
> +
> +	p_pending = (osm_pending_pkey_t *)malloc(sizeof(osm_pending_pkey_t));
> +	if (! p_pending)
> +	{
> +		osm_log( p_log, OSM_LOG_ERROR,
> +					"pkey_mgr_process_physical_port: ERR 0502: "
> +					"Fail to allocate new pending pkey entry for node "
> +					"0x%016" PRIx64 " port %u\n",
> +					cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> +					osm_physp_get_port_num( p_physp ) );
> +		return;
> +	}
> +	p_pending->pkey = pkey;
> +	p_orig_pkey = cl_map_get( &p_pkey_tbl->keys, ib_pkey_get_base( pkey ) );
> +	if ( !p_orig_pkey  || 
> +		  (ib_pkey_get_base(*p_orig_pkey) != ib_pkey_get_base(pkey) ))

There the cases of new pkey and updated pkey membership is mixed. Why?

> +	{
> +		p_pending->is_new = TRUE;
> +		cl_qlist_insert_tail(&p_pkey_tbl->pending, (cl_list_item_t*)p_pending);
> +		stat = "inserted";
> +	}
> +	else
> +	{
> +		p_pending->is_new = FALSE;
> +		if (osm_pkey_tbl_get_block_and_idx(p_pkey_tbl, p_orig_pkey,
> +													  &p_pending->block, &p_pending->index))
                              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AFAIK in this function there were CL_ASSERTs which check for uinitialized
pointers.

> +		{
> +			osm_log( p_log, OSM_LOG_ERROR,
> +						"pkey_mgr_process_physical_port: ERR 0503: "
> +						"Fail to obtain P_Key 0x%04x block and index for node "
> +						"0x%016" PRIx64 " port %u\n",
> +						cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> +						osm_physp_get_port_num( p_physp ) );
> +			return;
> +		}
> +		cl_qlist_insert_head(&p_pkey_tbl->pending, (cl_list_item_t*)p_pending);
> +		stat = "updated";

Is it will be updated? It is likely "already there" case. No?

Also in this case you can already put the pkey in new_block instead of
holding it in pending list. Then later you will only need to add new
pkeys. This may simplify the flow and even save some mem.

> +	}
> +
> +	osm_log( p_log, OSM_LOG_DEBUG,
> +				"pkey_mgr_process_physical_port:	"
> +				"pkey 0x%04x was %s for node 0x%016" PRIx64
> +				" port %u\n",
> +				cl_ntoh16( pkey ), stat,
> +				cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> +				osm_physp_get_port_num( p_physp ) );
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +static void
> +pkey_mgr_process_partition_table(
> +	osm_log_t *p_log,
> +	const osm_req_t *p_req,
> +	const osm_prtn_t *p_prtn,
> +	const boolean_t full )
> +{
> +	const cl_map_t *p_tbl = full ?
> +		&p_prtn->full_guid_tbl : &p_prtn->part_guid_tbl;
> +	cl_map_iterator_t i, i_next;
> +	ib_net16_t pkey = p_prtn->pkey;
> +	osm_physp_t *p_physp;
> +
> +	if ( full )
> +		pkey = cl_hton16( cl_ntoh16( pkey ) | 0x8000 );
> +
> +	i_next = cl_map_head( p_tbl );
> +	while ( i_next != cl_map_end( p_tbl ) )
> +	{
> +		i = i_next;
> +		i_next = cl_map_next( i );
> +		p_physp = cl_map_obj( i );
> +		if ( p_physp && osm_physp_is_valid( p_physp ) )
> +			pkey_mgr_process_physical_port( p_log, p_req, pkey, p_physp );
> +	}
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
>  static ib_api_status_t
>  pkey_mgr_update_pkey_entry(
>     IN const osm_req_t *p_req,
> @@ -114,7 +247,8 @@ pkey_mgr_enforce_partition(
>     p_pi->state_info2 = 0;
>     ib_port_info_set_port_state( p_pi, IB_LINK_NO_CHANGE );
>  
> -   context.pi_context.node_guid = osm_node_get_node_guid( osm_physp_get_node_ptr( p_physp ) );
> +	context.pi_context.node_guid = 
> +		osm_node_get_node_guid( osm_physp_get_node_ptr( p_physp ) );
>     context.pi_context.port_guid = osm_physp_get_port_guid( p_physp );
>     context.pi_context.set_method = TRUE;
>     context.pi_context.update_master_sm_base_lid = FALSE;
> @@ -131,80 +265,132 @@ pkey_mgr_enforce_partition(
>  
>  /**********************************************************************
>   **********************************************************************/
> -/*
> - * Prepare a new entry for the pkey table for this port when this pkey
> - * does not exist. Update existed entry when membership was changed.
> - */
> -static void pkey_mgr_process_physical_port(
> -   IN osm_log_t *p_log,
> -   IN const osm_req_t *p_req,
> -   IN const ib_net16_t pkey,
> -   IN osm_physp_t *p_physp )
> +static boolean_t pkey_mgr_update_port(
> +	osm_log_t *p_log,
> +	osm_req_t *p_req,
> +	const osm_port_t * const p_port )
>  {
> -   osm_node_t *p_node = osm_physp_get_node_ptr( p_physp );
> -   ib_pkey_table_t *block;
> +	osm_physp_t *p_physp;
> +	osm_node_t *p_node;
> +	ib_pkey_table_t *block, *new_block;
> +	osm_pkey_tbl_t *p_pkey_tbl;
>     uint16_t block_index;
> +	uint8_t  pkey_index;
> +	uint16_t last_free_block_index = 0;
> +	uint16_t last_free_pkey_index = 0;
>     uint16_t num_of_blocks;
> -   const osm_pkey_tbl_t *p_pkey_tbl;
> -   ib_net16_t *p_orig_pkey;
> -   char *stat = NULL;
> -   uint32_t i;
> +	uint16_t max_num_of_blocks;
>  
> -   p_pkey_tbl = osm_physp_get_pkey_tbl( p_physp );
> -   num_of_blocks = osm_pkey_tbl_get_num_blocks( p_pkey_tbl );
> +	ib_api_status_t status;
> +	boolean_t ret_val = FALSE;
> +	osm_pending_pkey_t *p_pending;
> +	boolean_t found;
>  
> -   p_orig_pkey = cl_map_get( &p_pkey_tbl->keys, ib_pkey_get_base( pkey ) );
> +	p_physp = osm_port_get_default_phys_ptr( p_port );
> +	if ( !osm_physp_is_valid( p_physp ) )
> +		return FALSE;
>  
> -   if ( !p_orig_pkey )
> -   {
> -      for ( block_index = 0; block_index < num_of_blocks; block_index++ )
> +	p_pkey_tbl = osm_physp_get_mod_pkey_tbl( p_physp );
> +	num_of_blocks = osm_pkey_tbl_get_num_blocks( p_pkey_tbl );
> +	max_num_of_blocks = pkey_mgr_get_physp_max_blocks( p_req->p_subn, p_physp );
> +	if (	p_pkey_tbl->max_blocks > max_num_of_blocks )
>        {
> -         block = osm_pkey_tbl_new_block_get( p_pkey_tbl, block_index );
> -         for ( i = 0; i < IB_NUM_PKEY_ELEMENTS_IN_BLOCK; i++ )
> +		osm_log( p_log, OSM_LOG_INFO,
> +					"pkey_mgr_update_port: "
> +					"Max number of blocks reduced from %u to %u " 
> +					"for node 0x%016" PRIx64 " port %u\n",
> +					p_pkey_tbl->max_blocks, max_num_of_blocks,
> +					cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> +					osm_physp_get_port_num( p_physp ) );				
> +	}
> +	p_pkey_tbl->max_blocks = max_num_of_blocks;
> +
> +	osm_pkey_tbl_sync_new_blocks( p_pkey_tbl );
> +	cl_map_remove_all( &p_pkey_tbl->keys );

What is the reason to drop map here? AFAIK it will be reinitialized later
anyway when pkey blocks will be received.

> +	p_pkey_tbl->used_blocks = 0;
> +
> +	/* 
> +		process every pending pkey in order - 
> +		first must be "updated" last are "new" 
> +	*/
> +	p_pending = 
> +		(osm_pending_pkey_t *)cl_qlist_remove_head( &p_pkey_tbl->pending );
> +	while (p_pending != 
> +			 (osm_pending_pkey_t *)cl_qlist_end( &p_pkey_tbl->pending ) )
> +	{
> +		if (p_pending->is_new == FALSE)
> +		{
> +			block_index = p_pending->block;
> +			pkey_index = p_pending->index;
> +			found = TRUE;
> +		} 
> +		else
>           {
> -            if ( ib_pkey_is_invalid( block->pkey_entry[i] ) )
> +			found = osm_pkey_find_next_free_entry(p_pkey_tbl, 
> +															  &last_free_block_index,
> +															  &last_free_pkey_index);

There should be warning: expected third arg is uint8_t*

> +			if ( !found )
>              {
> -               block->pkey_entry[i] = pkey;
> -	       stat = "inserted";
> -	       goto _done;
> +				osm_log( p_log, OSM_LOG_ERROR,
> +							"pkey_mgr_update_port: ERR 0504: "
> +							"failed to find empty space for new pkey 0x%04x "
> +							"of node 0x%016" PRIx64 " port %u\n",
> +							cl_ntoh16(p_pending->pkey),
> +							cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> +							osm_physp_get_port_num( p_physp ) );
>              }
> +			else
> +			{
> +				block_index = last_free_block_index;
> +				pkey_index = last_free_pkey_index++;
>           }
>        }
> +		
> +		if (found) 
> +		{
> +			if (osm_pkey_tbl_set_new_entry( 
> +					 p_pkey_tbl, block_index, pkey_index, p_pending->pkey) )
> +			{
>        osm_log( p_log, OSM_LOG_ERROR,
> -               "pkey_mgr_process_physical_port: ERR 0501: "
> -               "No empty pkey entry was found to insert 0x%04x for node "
> -               "0x%016" PRIx64 " port %u\n",
> -               cl_ntoh16( pkey ),
> +							"pkey_mgr_update_port: ERR 0505: "
> +							"failed to set PKey 0x%04x in block %u idx %u "
> +							"of node 0x%016" PRIx64 " port %u\n",
> +							p_pending->pkey, block_index, pkey_index,
>                 cl_ntoh64( osm_node_get_node_guid( p_node ) ),
>                 osm_physp_get_port_num( p_physp ) );
>     }
> -   else if ( *p_orig_pkey != pkey )
> -   {
> +		}
> +
> +		free( p_pending );
> +		p_pending = 
> +			(osm_pending_pkey_t *)cl_qlist_remove_head( &p_pkey_tbl->pending );
> +	}
> +
> +	/* now look for changes and store */
>        for ( block_index = 0; block_index < num_of_blocks; block_index++ )
>        {
> -         /* we need real block (not just new_block) in order
> -          * to resolve block/pkey indices */
>           block = osm_pkey_tbl_block_get( p_pkey_tbl, block_index );
> -	 i = p_orig_pkey - block->pkey_entry;
> -	 if (i < IB_NUM_PKEY_ELEMENTS_IN_BLOCK) {
> -            block = osm_pkey_tbl_new_block_get( p_pkey_tbl, block_index );
> -	    block->pkey_entry[i] = pkey;
> -	    stat = "updated";
> -	    goto _done;
> -	 }
> -      }
> -   }
> +		new_block = osm_pkey_tbl_new_block_get( p_pkey_tbl, block_index );
>  
> - _done:
> -   if (stat) {
> -      osm_log( p_log, OSM_LOG_VERBOSE,
> -               "pkey_mgr_process_physical_port:  "
> -               "pkey 0x%04x was %s for node 0x%016" PRIx64
> -               " port %u\n",
> -               cl_ntoh16( pkey ), stat,
> +		if (block && 
> +			 (!new_block || !memcmp( new_block, block, sizeof( *block ) )) )
> +			continue;
> +
> +		status = pkey_mgr_update_pkey_entry(
> +			p_req, p_physp , new_block, block_index );
> +		if (status == IB_SUCCESS)
> +			ret_val = TRUE;
> +		else
> +			osm_log( p_log, OSM_LOG_ERROR,
> +						"pkey_mgr_update_port: ERR 0506: "
> +						"pkey_mgr_update_pkey_entry() failed to update "
> +						"pkey table block %d for node 0x%016" PRIx64 " port %u\n",
> +						block_index,
>                 cl_ntoh64( osm_node_get_node_guid( p_node ) ),
>                 osm_physp_get_port_num( p_physp ) );
>     }
> +
> +	return ret_val;
>  }
>  
>  /**********************************************************************
> @@ -217,21 +403,23 @@ pkey_mgr_update_peer_port(
>     const osm_port_t * const p_port,
>     boolean_t enforce )
>  {
> -   osm_physp_t *p, *peer;
> +	osm_physp_t *p_physp, *peer;
>     osm_node_t *p_node;
>     ib_pkey_table_t *block, *peer_block;
> -   const osm_pkey_tbl_t *p_pkey_tbl, *p_peer_pkey_tbl;
> +	const osm_pkey_tbl_t *p_pkey_tbl;
> +	osm_pkey_tbl_t *p_peer_pkey_tbl;
>     osm_switch_t *p_sw;
>     ib_switch_info_t *p_si;
>     uint16_t block_index;
>     uint16_t num_of_blocks;
> +	uint16_t peer_max_blocks;
>     ib_api_status_t status = IB_SUCCESS;
>     boolean_t ret_val = FALSE;
>  
> -   p = osm_port_get_default_phys_ptr( p_port );
> -   if ( !osm_physp_is_valid( p ) )
> +	p_physp = osm_port_get_default_phys_ptr( p_port );
> +	if ( !osm_physp_is_valid( p_physp ) )
>        return FALSE;
> -   peer = osm_physp_get_remote( p );
> +	peer = osm_physp_get_remote( p_physp );
>     if ( !peer || !osm_physp_is_valid( peer ) )
>        return FALSE;
>     p_node = osm_physp_get_node_ptr( peer );
> @@ -245,7 +433,7 @@ pkey_mgr_update_peer_port(
>     if (pkey_mgr_enforce_partition( p_req, peer, enforce ) != IB_SUCCESS)
>     {
>        osm_log( p_log, OSM_LOG_ERROR,
> -               "pkey_mgr_update_peer_port: ERR 0502: "
> +					"pkey_mgr_update_peer_port: ERR 0507: "
>                 "pkey_mgr_enforce_partition() failed to update "
>                 "node 0x%016" PRIx64 " port %u\n",
>                 cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> @@ -255,24 +443,36 @@ pkey_mgr_update_peer_port(
>     if (enforce == FALSE)
>        return FALSE;
>  
> -   p_pkey_tbl = osm_physp_get_pkey_tbl( p );
> -   p_peer_pkey_tbl = osm_physp_get_pkey_tbl( peer );
> +	p_pkey_tbl = osm_physp_get_pkey_tbl( p_physp );
> +	p_peer_pkey_tbl = osm_physp_get_mod_pkey_tbl( peer );
>     num_of_blocks = osm_pkey_tbl_get_num_blocks( p_pkey_tbl );
> -   if ( num_of_blocks > osm_pkey_tbl_get_num_blocks( p_peer_pkey_tbl ) )
> -      num_of_blocks = osm_pkey_tbl_get_num_blocks( p_peer_pkey_tbl );
> +	peer_max_blocks = pkey_mgr_get_physp_max_blocks( p_subn, peer );
> +	if (peer_max_blocks < p_pkey_tbl->used_blocks)
> +	{
> +		osm_log( p_log, OSM_LOG_ERROR,
> +					"pkey_mgr_update_peer_port: ERR 0508: "
> +					"not enough entries (%u < %u) on switch 0x%016" PRIx64
> +					" port %u\n",
> +					peer_max_blocks, num_of_blocks,
> +					cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> +					osm_physp_get_port_num( peer ) );
> +		return FALSE;

Do you think it is the best way, just to skip update - partitions are
enforced already on the switch. May be better to truncate pkey tables
in order to meet peer's capabilities?

> +	}
>  
> -   for ( block_index = 0; block_index < num_of_blocks; block_index++ )
> +	p_peer_pkey_tbl->used_blocks = p_pkey_tbl->used_blocks;
> +	for ( block_index = 0; block_index < p_pkey_tbl->used_blocks; block_index++)
>     {
>        block = osm_pkey_tbl_new_block_get( p_pkey_tbl, block_index );
>        peer_block = osm_pkey_tbl_block_get( p_peer_pkey_tbl, block_index );
>        if ( memcmp( peer_block, block, sizeof( *peer_block ) ) )
>        {
> +			osm_pkey_tbl_set(p_peer_pkey_tbl, block_index, block);

Why this (osm_pkey_tbl_set())? This will be called by receiver.

>           status = pkey_mgr_update_pkey_entry( p_req, peer, block, block_index );
>           if ( status == IB_SUCCESS )
>              ret_val = TRUE;
>           else
>              osm_log( p_log, OSM_LOG_ERROR,
> -                     "pkey_mgr_update_peer_port: ERR 0503: "
> +							"pkey_mgr_update_peer_port: ERR 0509: "
>                       "pkey_mgr_update_pkey_entry() failed to update "
>                       "pkey table block %d for node 0x%016" PRIx64
>                       " port %u\n",
> @@ -282,10 +482,10 @@ pkey_mgr_update_peer_port(
>        }
>     }
>  
> -   if ( ret_val == TRUE &&
> -        osm_log_is_active( p_log, OSM_LOG_VERBOSE ) )
> +	if ( (ret_val == TRUE) &&
> +		  osm_log_is_active( p_log, OSM_LOG_DEBUG ) )
>     {
> -      osm_log( p_log, OSM_LOG_VERBOSE,
> +		osm_log( p_log, OSM_LOG_DEBUG,
>                 "pkey_mgr_update_peer_port: "
>                 "pkey table was updated for node 0x%016" PRIx64
>                 " port %u\n",
> @@ -298,82 +498,6 @@ pkey_mgr_update_peer_port(
>  
>  /**********************************************************************
>   **********************************************************************/
> -static boolean_t pkey_mgr_update_port(
> -   osm_log_t *p_log,
> -   osm_req_t *p_req,
> -   const osm_port_t * const p_port )
> -{
> -   osm_physp_t *p;
> -   osm_node_t *p_node;
> -   ib_pkey_table_t *block, *new_block;
> -   const osm_pkey_tbl_t *p_pkey_tbl;
> -   uint16_t block_index;
> -   uint16_t num_of_blocks;
> -   ib_api_status_t status;
> -   boolean_t ret_val = FALSE;
> -
> -   p = osm_port_get_default_phys_ptr( p_port );
> -   if ( !osm_physp_is_valid( p ) )
> -      return FALSE;
> -
> -   p_pkey_tbl = osm_physp_get_pkey_tbl(p);
> -   num_of_blocks = osm_pkey_tbl_get_num_blocks( p_pkey_tbl );
> -
> -   for ( block_index = 0; block_index < num_of_blocks; block_index++ )
> -   {
> -      block = osm_pkey_tbl_block_get( p_pkey_tbl, block_index );
> -      new_block = osm_pkey_tbl_new_block_get( p_pkey_tbl, block_index );
> -
> -      if (!new_block || !memcmp( new_block, block, sizeof( *block ) ) )
> -         continue;
> -
> -      status = pkey_mgr_update_pkey_entry( p_req, p, new_block, block_index );
> -      if (status == IB_SUCCESS)
> -         ret_val = TRUE;
> -      else
> -         osm_log( p_log, OSM_LOG_ERROR,
> -                  "pkey_mgr_update_port: ERR 0504: "
> -                  "pkey_mgr_update_pkey_entry() failed to update "
> -                  "pkey table block %d for node 0x%016" PRIx64 " port %u\n",
> -                  block_index,
> -                  cl_ntoh64( osm_node_get_node_guid( p_node ) ),
> -                  osm_physp_get_port_num( p ) );
> -   }
> -
> -   return ret_val;
> -}
> -
> -/**********************************************************************
> - **********************************************************************/
> -static void
> -pkey_mgr_process_partition_table(
> -   osm_log_t *p_log,
> -   const osm_req_t *p_req,
> -   const osm_prtn_t *p_prtn,
> -   const boolean_t full )
> -{
> -   const cl_map_t *p_tbl = full ?
> -      &p_prtn->full_guid_tbl : &p_prtn->part_guid_tbl;
> -   cl_map_iterator_t i, i_next;
> -   ib_net16_t pkey = p_prtn->pkey;
> -   osm_physp_t *p_physp;
> -
> -   if ( full )
> -      pkey = cl_hton16( cl_ntoh16( pkey ) | 0x8000 );
> -
> -   i_next = cl_map_head( p_tbl );
> -   while ( i_next != cl_map_end( p_tbl ) )
> -   {
> -      i = i_next;
> -      i_next = cl_map_next( i );
> -      p_physp = cl_map_obj( i );
> -      if ( p_physp && osm_physp_is_valid( p_physp ) )
> -          pkey_mgr_process_physical_port( p_log, p_req, pkey, p_physp );
> -   }
> -}
> -
> -/**********************************************************************
> - **********************************************************************/
>  osm_signal_t
>  osm_pkey_mgr_process(
>     IN osm_opensm_t *p_osm )
> @@ -383,8 +507,7 @@ osm_pkey_mgr_process(
>     osm_prtn_t *p_prtn;
>     osm_port_t *p_port;
>     osm_signal_t signal = OSM_SIGNAL_DONE;
> -   osm_physp_t *p_physp;
> -
> +	osm_node_t *p_node;
>     CL_ASSERT( p_osm );
>  
>     OSM_LOG_ENTER( &p_osm->log, osm_pkey_mgr_process );
> @@ -394,32 +517,25 @@ osm_pkey_mgr_process(
>     if ( osm_prtn_make_partitions( &p_osm->log, &p_osm->subn ) != IB_SUCCESS )
>     {
>        osm_log( &p_osm->log, OSM_LOG_ERROR,
> -               "osm_pkey_mgr_process: ERR 0505: "
> +					"osm_pkey_mgr_process: ERR 0510: "
>                 "osm_prtn_make_partitions() failed\n" );
>        goto _err;
>     }
>  
> -   p_tbl = &p_osm->subn.port_guid_tbl;
> -   p_next = cl_qmap_head( p_tbl );
> -   while ( p_next != cl_qmap_end( p_tbl ) )
> -   {
> -      p_port = ( osm_port_t * ) p_next;
> -      p_next = cl_qmap_next( p_next );
> -      p_physp = osm_port_get_default_phys_ptr( p_port );
> -      if ( osm_physp_is_valid( p_physp ) )
> -        osm_pkey_tbl_sync_new_blocks( osm_physp_get_pkey_tbl( p_physp ) );
> -   }
> -
> +	/* populate the pending pkey entries by scanning all partitions */
>     p_tbl = &p_osm->subn.prtn_pkey_tbl;
>     p_next = cl_qmap_head( p_tbl );
>     while ( p_next != cl_qmap_end( p_tbl ) )
>     {
>        p_prtn = ( osm_prtn_t * ) p_next;
>        p_next = cl_qmap_next( p_next );
> -      pkey_mgr_process_partition_table( &p_osm->log, &p_osm->sm.req, p_prtn, FALSE );
> -      pkey_mgr_process_partition_table( &p_osm->log, &p_osm->sm.req, p_prtn, TRUE );
> +		pkey_mgr_process_partition_table( 
> +			&p_osm->log, &p_osm->sm.req, p_prtn, FALSE );
> +		pkey_mgr_process_partition_table( 
> +			&p_osm->log, &p_osm->sm.req, p_prtn, TRUE );
>     }
>  
> +	/* calculate new pkey tables and set */
>     p_tbl = &p_osm->subn.port_guid_tbl;
>     p_next = cl_qmap_head( p_tbl );
>     while ( p_next != cl_qmap_end( p_tbl ) )
> @@ -428,8 +544,10 @@ osm_pkey_mgr_process(
>        p_next = cl_qmap_next( p_next );
>        if ( pkey_mgr_update_port( &p_osm->log, &p_osm->sm.req, p_port ) )
>          signal = OSM_SIGNAL_DONE_PENDING;
> -      if ( osm_node_get_type( osm_port_get_parent_node( p_port ) ) != IB_NODE_TYPE_SWITCH &&
> -           pkey_mgr_update_peer_port( &p_osm->log, &p_osm->sm.req,
> +		p_node = osm_port_get_parent_node( p_port );
> +		if ( ( osm_node_get_type( p_node ) != IB_NODE_TYPE_SWITCH ) &&
> +			  pkey_mgr_update_peer_port( 
> +				  &p_osm->log, &p_osm->sm.req,
>                                        &p_osm->subn, p_port,
>                                        !p_osm->subn.opt.no_partition_enforcement ) )
>          signal = OSM_SIGNAL_DONE_PENDING;        
> 
> 

Thanks,
Sasha




More information about the general mailing list