[Openib-windows] [IBAL][Patch] reference counter for IOC obj in IOC PnP manager

Yossi Leybovich sleybo at mellanox.co.il
Fri Nov 17 00:48:47 PST 2006


Hi
Sorry for the delay in the response.
I was working on the TRAP and mad issues in order to pass the compliance
tests.
And on the SRP I bombed into few issues that cause me blue screen, I
spent some time to fix the problem .
I am now on the process of stabling the removal/insertion of IOCs.

I will be glad if you review this patch :
This patch address 2 issues 

1. cases that AL start new sweep before the former sweep finished 
IBAL will not start new sweep if there are queries outstanding
2. When connecting SRP target to 2 HCA on the same host.
The PnP events use the IOC GUID as key to insert/remove from the
context_map.
In case we use the same SRP target with the 2 HCAs they both have same
key.

I change the context_map to use 128 bit key ( and not 64bit key)
The key is construct from IOC GUID and CA GUID
This change effect also the port events and the CA events.

I tested it and it works 

Thanks
Yossi 

Index: core/al/al_pnp.h
===================================================================
--- core/al/al_pnp.h	(revision 1797)
+++ core/al/al_pnp.h	(working copy)
@@ -38,7 +38,7 @@
 #include "al_ca.h"
 #include <iba/ib_al_ioctl.h>
 #include <complib/cl_ptr_vector.h>
-#include <complib/cl_qmap.h>
+#include <complib/cl_fleximap.h>
 
 
 extern char*	ib_pnp_event_str[];
@@ -54,7 +54,8 @@
 	cl_list_item_t			list_item;
 	cl_async_proc_item_t	dereg_item;
 	ib_pnp_class_t			pnp_class;
-	cl_qmap_t				context_map;
+	//cl_qmap_t				context_map;
+	cl_fmap_t				context_map;
 	IRP						*p_rearm_irp;
 	IRP						*p_dereg_irp;
 #else	/* defined( CL_KERNEL ) */
@@ -99,8 +100,9 @@
 typedef struct _al_pnp_context
 {
 	/* List item must be first. */
-	cl_map_item_t			map_item;
+	cl_fmap_item_t			map_item;
 	ib_net64_t				guid;
+	ib_net64_t				ca_guid;
 	const void				*context;
 
 }	al_pnp_context_t;
@@ -195,13 +197,13 @@
 al_pnp_context_t*
 pnp_create_context(
 	IN				al_pnp_t* const
p_reg,
-	IN		const	net64_t
guid );
+	IN		const	void* const
p_key );
 /******/
 
 al_pnp_context_t*
 pnp_get_context(
 	IN		const	al_pnp_t* const
p_reg,
-	IN		const	ib_net64_t
guid );
+	IN		const	void* const
p_key );
 
 void
 pnp_reg_complete(
Index: core/al/kernel/al_ioc_pnp.c
===================================================================
--- core/al/kernel/al_ioc_pnp.c	(revision 1797)
+++ core/al/kernel/al_ioc_pnp.c	(working copy)
@@ -1223,7 +1223,7 @@
 	case IB_PNP_SM_CHANGE:
 	case IB_PNP_PORT_ACTIVE:
 		/* Initiate a sweep - delay a bit to allow the ports to
come up. */
-		if( g_ioc_poll_interval )
+		if( g_ioc_poll_interval && !gp_ioc_pnp->query_cnt)
 		{
 			cl_status = cl_timer_start(
&gp_ioc_pnp->sweep_timer, 250 );
 			CL_ASSERT( cl_status == CL_SUCCESS );
@@ -2926,24 +2926,29 @@
 	{
 	case IB_PNP_IOU_ADD:
 		CL_ASSERT( pnp_get_class( p_reg->pnp_class ) ==
IB_PNP_IOU );
-		p_context = pnp_create_context( p_reg,
p_event->p_rec->guid );
+		p_context = pnp_create_context( p_reg,
&p_event->p_rec->guid);
 		break;
 
 	case IB_PNP_IOU_REMOVE:
 		CL_ASSERT( pnp_get_class( p_reg->pnp_class ) ==
IB_PNP_IOU );
 		/* Lookup the context for this IOU. */
-		p_context = pnp_get_context( p_reg, p_event->p_rec->guid
);
+		p_context = pnp_get_context( p_reg,
&p_event->p_rec->guid );
 		break;
 
 	case IB_PNP_IOC_ADD:
 		CL_ASSERT( pnp_get_class( p_reg->pnp_class ) ==
IB_PNP_IOC );
-		p_context = pnp_create_context( p_reg,
p_event->p_rec->guid );
+		p_context = pnp_create_context( p_reg,
&p_event->p_rec->guid);
 		break;
-
-	default:
+	case IB_PNP_IOC_PATH_ADD:
+	case IB_PNP_IOC_PATH_REMOVE:
 		CL_ASSERT( pnp_get_class( p_reg->pnp_class ) ==
IB_PNP_IOC );
-		p_context = pnp_get_context( p_reg, p_event->p_rec->guid
);
+		p_context = pnp_get_context( p_reg,
&p_event->p_rec->guid );
 		break;
+	default:
+		AL_PRINT_EXIT(TRACE_LEVEL_WARNING, AL_DBG_PNP,("Invalid
PnP event %#x\n",
+			p_event->p_rec->pnp_event));
+		return CL_NOT_DONE;
+		break;
 	}
 	if( !p_context )
 		return CL_NOT_FOUND;
@@ -2958,7 +2963,7 @@
 		p_event->p_rec->pnp_event == IB_PNP_IOU_REMOVE ||
 		p_event->p_rec->pnp_event == IB_PNP_IOC_REMOVE )
 	{
-		cl_qmap_remove_item( &p_reg->context_map,
&p_context->map_item );
+		cl_fmap_remove_item( &p_reg->context_map,
&p_context->map_item );
 		cl_free( p_context );
 	}
 	else
@@ -2993,7 +2998,10 @@
 	}
 	p_rec->pnp_rec.pnp_event = IB_PNP_IOU_ADD;
 	p_rec->pnp_rec.guid = p_iou->guid;
+	p_rec->pnp_rec.ca_guid = p_iou->ca_guid;
+	
 	p_rec->ca_guid = p_iou->ca_guid;
+	p_rec->guid = p_iou->guid;
 	p_rec->vend_id = p_iou->vend_id;
 	p_rec->dev_id = p_iou->dev_id;
 	p_rec->revision = p_iou->revision;
@@ -3086,6 +3094,8 @@
 
 	p_rec->pnp_rec.pnp_event = IB_PNP_IOC_ADD;
 	p_rec->pnp_rec.guid = p_ioc->profile.ioc_guid;
+	p_rec->pnp_rec.ca_guid = p_ioc->p_iou->ca_guid;
+	
 	p_rec->ca_guid = p_ioc->p_iou->ca_guid;
 	cl_memcpy( p_rec->svc_entry_array, p_ioc->p_svc_entries,
 		p_ioc->profile.num_svc_entries * sizeof(ib_svc_entry_t)
);
@@ -3178,14 +3188,14 @@
 	p_rec = (ib_pnp_ioc_path_rec_t*)cl_zalloc( event.rec_size * 2 );
 	if( !p_rec )
 		return;
-
+	p_rec->pnp_rec.pnp_event = pnp_event;
+	p_rec->pnp_rec.guid = p_ioc->profile.ioc_guid;
+	p_rec->pnp_rec.ca_guid = p_path->ca_guid;
+	
 	p_rec->ca_guid = p_path->ca_guid;
 	p_rec->port_guid = p_path->port_guid;
 	p_rec->path = p_path->rec;
 
-	p_rec->pnp_rec.pnp_event = pnp_event;
-	p_rec->pnp_rec.guid = p_ioc->profile.ioc_guid;
-
 	event.p_rec = (ib_pnp_rec_t*)p_rec;
 	event.p_user_rec = (ib_pnp_rec_t*)(((uint8_t*)p_rec) +
event.rec_size);
 
Index: core/al/kernel/al_pnp.c
===================================================================
--- core/al/kernel/al_pnp.c	(revision 1797)
+++ core/al/kernel/al_pnp.c	(working copy)
@@ -84,6 +84,20 @@
 
 
 /*
+ * Compares two context for inserts/lookups in a flexi map.  Keys are
the
+ * address of the reg guid1, which is adjacent to the context guid2 (if
exist).
+ * This allows for a single call to cl_memcmp.
+ */
+static intn_t
+__context_cmp(
+	IN		const	void* const
p_key1,
+	IN		const	void* const
p_key2 )
+{
+	return cl_memcmp( p_key1, p_key2, sizeof(uint64_t) * 2 );
+}
+
+
+/*
  * Event structures for queuing to the async proc manager.
  */
 typedef struct _al_pnp_ca_change
@@ -291,7 +305,7 @@
 	IN				al_obj_t
*p_obj )
 {
 	al_pnp_t		*p_reg;
-	cl_map_item_t	*p_item;
+	cl_fmap_item_t	*p_item;
 	IRP				*p_irp;
 
 	AL_ENTER( AL_DBG_PNP );
@@ -299,10 +313,10 @@
 	p_reg = PARENT_STRUCT( p_obj, al_pnp_t, obj );
 
 	/* Cleanup the context list. */
-	while( cl_qmap_count( &p_reg->context_map ) )
+	while( cl_fmap_count( &p_reg->context_map ) )
 	{
-		p_item = cl_qmap_tail( &p_reg->context_map );
-		cl_qmap_remove_item( &p_reg->context_map, p_item );
+		p_item = cl_fmap_tail( &p_reg->context_map );
+		cl_fmap_remove_item( &p_reg->context_map, p_item );
 		cl_free( p_item );
 	}
 
@@ -338,17 +352,17 @@
 	IN				al_obj_t
*p_obj )
 {
 	al_pnp_t		*p_reg;
-	cl_map_item_t	*p_item;
+	cl_fmap_item_t	*p_item;
 
 	AL_ENTER( AL_DBG_PNP );
 
 	p_reg = PARENT_STRUCT( p_obj, al_pnp_t, obj );
 
 	/* Cleanup the context list. */
-	while( cl_qmap_count( &p_reg->context_map ) )
+	while( cl_fmap_count( &p_reg->context_map ) )
 	{
-		p_item = cl_qmap_tail( &p_reg->context_map );
-		cl_qmap_remove_item( &p_reg->context_map, p_item );
+		p_item = cl_fmap_tail( &p_reg->context_map );
+		cl_fmap_remove_item( &p_reg->context_map, p_item );
 		cl_free( p_item );
 	}
 
@@ -373,15 +387,15 @@
 al_pnp_context_t*
 pnp_get_context(
 	IN		const	al_pnp_t* const
p_reg,
-	IN		const	ib_net64_t
guid )
+	IN				const void* const
p_key )
 {
-	cl_map_item_t		*p_context_item;
+	cl_fmap_item_t		*p_context_item;
 
 	AL_ENTER( AL_DBG_PNP );
 
 	/* Search the context list for this CA. */
-	p_context_item = cl_qmap_get( &p_reg->context_map, guid );
-	if( p_context_item != cl_qmap_end( &p_reg->context_map ) )
+	p_context_item = cl_fmap_get( &p_reg->context_map, p_key );
+	if( p_context_item != cl_fmap_end( &p_reg->context_map ) )
 	{
 		AL_EXIT( AL_DBG_PNP );
 		return PARENT_STRUCT( p_context_item, al_pnp_context_t,
map_item );
@@ -529,7 +543,7 @@
 	}
 	else
 	{
-		cl_qmap_remove_item( &p_reg->context_map,
&p_context->map_item );
+		cl_fmap_remove_item( &p_reg->context_map,
&p_context->map_item );
 		cl_free( p_context );
 	}
 
@@ -545,10 +559,10 @@
 al_pnp_context_t*
 pnp_create_context(
 	IN				al_pnp_t* const
p_reg,
-	IN		const	net64_t
guid )
+	IN				const void* const
p_key )
 {
 	al_pnp_context_t	*p_context;
-	cl_map_item_t		*p_item;
+	cl_fmap_item_t		*p_item;
 
 	AL_ENTER( AL_DBG_PNP );
 
@@ -563,14 +577,20 @@
 			sizeof(al_pnp_context_t)) );
 		return NULL;
 	}
-
 	/* Store the GUID in the context record. */
-	p_context->guid = guid;
+	cl_memcpy(&p_context->guid, p_key, sizeof(ib_net64_t) * 2);
 
 	/* Add the context to the context list. */
-	p_item = cl_qmap_insert( &p_reg->context_map, p_context->guid,
+	p_item = cl_fmap_insert( &p_reg->context_map, &p_context->guid,
 		&p_context->map_item );
-
+	if( p_item != &p_context->map_item )
+	{
+		AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_PNP,
+			("p_context is already in context map %I64x
\n",p_context->guid));
+		p_context = NULL;
+	}
+	
+	
 	AL_EXIT( AL_DBG_PNP );
 	return p_context;
 }
@@ -603,7 +623,7 @@
 		p_port_attr += event_rec.port_index;
 
 		/* Create a new context for user port information. */
-		p_context = pnp_create_context( p_reg,
p_port_attr->port_guid );
+		p_context = pnp_create_context( p_reg,
&p_port_attr->port_guid);
 		if( !p_context )
 			continue;
 
@@ -674,7 +694,7 @@
 		case IB_PNP_CA:
 			event_rec.pnp_event = IB_PNP_CA_ADD;
 			p_context = pnp_create_context( p_reg,
-				event_rec.p_ci_ca->p_pnp_attr->ca_guid
);
+
&event_rec.p_ci_ca->p_pnp_attr->ca_guid);
 			if( !p_context )
 				break;
 
@@ -803,7 +823,7 @@
 
 	/* Initialize the registration info. */
 	construct_al_obj( &p_reg->obj, AL_OBJ_TYPE_H_PNP );
-	cl_qmap_init( &p_reg->context_map );
+	cl_fmap_init( &p_reg->context_map, __context_cmp );
 	status = init_al_obj( &p_reg->obj, p_pnp_req->pnp_context, TRUE,
 		__pnp_reg_destroying, __pnp_reg_cleanup, __pnp_reg_free
);
 	if( status != IB_SUCCESS )
@@ -924,7 +944,7 @@
 		 * exercise to the open source community.
 		 */
 		p_context = pnp_create_context( p_reg,
-			p_event_rec->p_ci_ca->p_pnp_attr->ca_guid );
+			&p_event_rec->p_ci_ca->p_pnp_attr->ca_guid);
 		if( !p_context )
 			continue;
 
@@ -990,7 +1010,7 @@
 		{
 			p_port_attr =
p_event_rec->p_ci_ca->p_pnp_attr->p_port_attr;
 			p_port_attr += port_index;
-			p_context = pnp_get_context( p_reg,
p_port_attr->port_guid );
+			p_context = pnp_get_context( p_reg,
&p_port_attr->port_guid );
 			if( !p_context )
 				continue;
 
@@ -1014,7 +1034,7 @@
 				 * Remove the port context from the
registrant's
 				 * context list.
 				 */
-				cl_qmap_remove_item(
&p_reg->context_map,
+				cl_fmap_remove_item(
&p_reg->context_map,
 					&p_context->map_item );
 				/* Free the context. */
 				cl_free( p_context );
@@ -1054,7 +1074,7 @@
 
 		/* Search the context list for this CA. */
 		p_context =
-			pnp_get_context( p_reg,
p_event_rec->p_ci_ca->verbs.guid );
+			pnp_get_context( p_reg,
&p_event_rec->p_ci_ca->p_pnp_attr->ca_guid);
 
 		/* Make sure we found a context. */
 		if( !p_context )
@@ -1064,7 +1084,7 @@
 		if( __pnp_notify_user( p_reg, p_context, p_event_rec )
== IB_SUCCESS )
 		{
 			/* Remove the context from the context list. */
-			cl_qmap_remove_item( &p_reg->context_map,
&p_context->map_item );
+			cl_fmap_remove_item( &p_reg->context_map,
&p_context->map_item );
 
 			/* Deallocate the context block. */
 			cl_free( p_context );
@@ -1221,7 +1241,7 @@
 		p_port_attr =
p_event_rec->p_ci_ca->p_pnp_attr->p_port_attr;
 		p_port_attr += p_event_rec->port_index;
 
-		p_context = pnp_get_context( p_reg,
p_port_attr->port_guid );
+		p_context = pnp_get_context( p_reg,
&p_port_attr->port_guid );
 		if( !p_context )
 			continue;
 
@@ -1260,7 +1280,7 @@
 		p_port_attr =
p_event_rec->p_ci_ca->p_pnp_attr->p_port_attr;
 		p_port_attr += p_event_rec->port_index;
 
-		p_context = pnp_get_context( p_reg,
p_port_attr->port_guid );
+		p_context = pnp_get_context( p_reg,
&p_port_attr->port_guid );
 		if( !p_context )
 			continue;
 
Index: core/iou/kernel/iou_driver.h
===================================================================
--- core/iou/kernel/iou_driver.h	(revision 1797)
+++ core/iou/kernel/iou_driver.h	(working copy)
@@ -199,6 +199,14 @@
 
 	cl_list_item_t			list_item;
 
+	/* All reported PDOs are children of an HCA. */
+	ib_ca_handle_t			h_ca;
+
+	/*
+	 * CA GUID copy - in case we get IRPs after the CA
+	 * handle has been released.
+	 */
+	net64_t					ca_guid;
 	POWER_STATE				dev_po_state;
 
 	/*
Index: core/iou/kernel/iou_ioc_mgr.c
===================================================================
--- core/iou/kernel/iou_ioc_mgr.c	(revision 1797)
+++ core/iou/kernel/iou_ioc_mgr.c	(working copy)
@@ -538,7 +538,8 @@
 	p_ioc_ext->pdo.p_parent_ext = p_ext;
 	p_ioc_ext->pdo.b_present = TRUE;
 	p_ioc_ext->pdo.b_reported_missing = FALSE;
-
+	p_ioc_ext->pdo.ca_guid = p_pnp_rec->ca_guid;
+		
 	/* Copy the IOC profile and service entries. */
 	p_ioc_ext->info = p_pnp_rec->info;
 	cl_memcpy( p_ioc_ext->svc_entries, p_pnp_rec->svc_entry_array,
@@ -1031,7 +1032,7 @@
 	}
 
 	/* The instance ID is the port GUID. */
-	p_string = ExAllocatePool( PagedPool, sizeof(WCHAR) * 17 );
+	p_string = ExAllocatePool( PagedPool, sizeof(WCHAR) * 33 );
 	if( !p_string )
 	{
 		IOU_PRINT_EXIT( TRACE_LEVEL_ERROR, IOU_DBG_ERROR,
@@ -1040,8 +1041,8 @@
 		return STATUS_NO_MEMORY;
 	}
 
-	status = RtlStringCchPrintfW(
-		p_string, 17, L"%016I64x", p_ext->info.profile.ioc_guid
);
+	status = RtlStringCchPrintfW(p_string, 33, L"%016I64x%016I64x",
+		 p_ext->info.profile.ioc_guid,p_ext->pdo.ca_guid);
 	if( !NT_SUCCESS( status ) )
 	{
 		CL_ASSERT( NT_SUCCESS( status ) );
Index: inc/iba/ib_al.h
===================================================================
--- inc/iba/ib_al.h	(revision 1797)
+++ inc/iba/ib_al.h	(working copy)
@@ -43,7 +43,7 @@
 {
 #endif	/* __cplusplus */
 
-/****h* IB Access Layer API/Overview
+/****h* IB Access Layer API/Access Layer
 * NAME
 *	InfiniBand Access Layer
 * COPYRIGHT
@@ -8776,6 +8776,7 @@
 	void* __ptr64				context;
 
 	ib_net64_t					guid;
+	ib_net64_t					ca_guid;
 
 }	ib_pnp_rec_t;
 /*
@@ -8809,6 +8810,9 @@
 *		The GUID of the adapter, port, IOU, or IOC for which
 *		the PnP event occurred.
 *
+*	ca_guid
+*		The  GUID of the HCA 
+*
 * NOTES
 *	This structure is returned to the user to notify them of: the
addition
 *	of a channel adapter, the removal of a channel adapter, a port
up or down
@@ -8945,6 +8949,7 @@
 typedef struct _ib_pnp_iou_rec
 {
 	ib_pnp_rec_t				pnp_rec;
+	net64_t						guid;
 	net64_t						ca_guid;
 	net64_t						chassis_guid;
 	uint8_t						slot;

> -----Original Message-----
> From: Alex Estrin [mailto:alex.estrin at qlogic.com] 
> Sent: Thursday, November 16, 2006 8:37 PM
> To: Yossi Leybovich
> Subject: RE: [Openib-windows] [IBAL][Patch] reference counter 
> for IOC obj in IOC PnP manager
> 
> Hi Yossi,
> 
> I didn't hear from you for a some time. 
> Probably this was just a sideffect
>  of our transitioning to the new company environment/e-mail 
> network and some of e-mail was bounced.
> Have you come up with ioc service entries solution yet?
> I would appreciate if let me know when you do.
> 
> Thank you,
> Alex
> 
> > -----Original Message-----
> > From: Yossi Leybovich [mailto:sleybo at mellanox.co.il]
> > Sent: Thursday, November 02, 2006 2:49 AM
> > To: Yossi Leybovich; Estrin, Alex
> > Cc: openib-windows at openib.org
> > Subject: RE: [Openib-windows] [IBAL][Patch] reference 
> counter for IOC 
> > obj in IOC PnP manager
> > 
> > 
> > I am also encountering case when ioc_async_cb was called when 
> > query_cnt != 0 , I am trying to solve this problem I hope it will 
> > solve also the ref_cnt problem .
> > Yossi
> > 
> > > -----Original Message-----
> > > From: openib-windows-bounces at openib.org 
> > > [mailto:openib-windows-bounces at openib.org] On Behalf Of Yossi 
> > > Leybovich
> > > Sent: Sunday, October 29, 2006 3:07 PM
> > > To: Estrin, Alex; Yossi Leybovich
> > > Cc: openib-windows at openib.org
> > > Subject: Re: [Openib-windows] [IBAL][Patch] reference counter for 
> > > IOC obj in IOC PnP manager
> > > 
> > > Hi
> > > 
> > > Thanks for the patch I am reviewing it to check if the query_cnt 
> > > field can be use to track the outstanding queries.
> > > Any way see below my comment
> > > 
> > > > -----Original Message-----
> > > > From: openib-windows-bounces at openib.org 
> > > > [mailto:openib-windows-bounces at openib.org] On Behalf Of
> > Estrin, Alex
> > > > Sent: Friday, October 27, 2006 1:43 AM
> > > > To: Yossi Leybovich
> > > > Cc: openib-windows at openib.org
> > > > Subject: [Openib-windows] [IBAL][Patch] reference counter
> > > for IOC obj
> > > > in IOC PnP manager
> > > > 
> > > > Hi Yossi,
> > > > 
> > > > This patch introduces a reference counter to protect 
> ioc obj from 
> > > > double memory free.
> > > > 
> > > > Possible scenario - IOC Manager handles PORT_DOWN event,
> > > frees ioc obj
> > > > and put it back to the pool, while there are outstanding
> > > send MAD svc
> > > > that has reference to that ioc obj.
> > > > On timeout send callback will try to free ioc obj again.
> > > > 
> > > > By using this counter we also protect ioc obj from return
> > > to the pool
> > > > and possible reusage while outstanding send mad keeps it's
> > > reference.
> > > > Please review.
> > > > 
> > > > Thanks,
> > > > Alex
> > > > 
> > > ...
> > > >  
> > > >  				cl_atomic_inc(
> > > > &p_results->p_svc->query_cnt );
> > > >  			}
> > > > +			if( status == IB_SUCCESS )
> > > > +				cl_atomic_inc( 
> &p_ioc->ref_cnt );
> > > 
> > > The AL create mad for each chunk of 4 service entries it 
> create list 
> > > from the mad and post it.
> > > I think that we need to take reference for each mad we 
> send and not 
> > > for each mad list we post ( same as the query_cnt) What 
> do you think 
> > > ?
> > > 
> > > >  		}
> > > >  	}
> > > >  
> > > > 
> > > 
> > > _______________________________________________
> > > openib-windows mailing list
> > > openib-windows at openib.org
> > > http://openib.org/mailman/listinfo/openib-windows
> > > 
> > 
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ioc_context_map.patch
Type: application/octet-stream
Size: 14293 bytes
Desc: ioc_context_map.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20061117/3731f587/attachment.obj>


More information about the ofw mailing list