[Openib-windows] srp blue screen when CM fail to connect

Fabian Tillier ftillier at silverstorm.com
Fri Sep 1 16:03:46 PDT 2006


Hi Yossi,

On 8/31/06, Yossi Leybovich <sleybo at mellanox.co.il> wrote:
>
> Fab
> I got blue screen while trying to bring our SRP target up.

I have a fix, but I don't quite understand why it makes the problem go
away.  I didn't find any double free issues in the code, but did take
the opportunity to clean up the code a little.

Since I don't quite understand why it works now, I didn't check the
changes in.  Instead the patch is attached - please give it a shot and
let me know if you see the previous bug.

Thanks,

- Fab

Index: core/al/al_common.c
===================================================================
--- core/al/al_common.c	(revision 477)
+++ core/al/al_common.c	(working copy)
@@ -127,7 +127,7 @@
  * Used to force synchronous destruction of AL objects.
  */
 void
-ib_sync_destroy(
+__sync_destroy_cb(
 	IN				void						*context )
 {
 	UNUSED_PARAM( context );
@@ -405,7 +405,7 @@
 	if( ref_cnt == 0 )
 	{
 		if( p_obj->pfn_destroy == async_destroy_obj &&
-			p_obj->user_destroy_cb != ib_sync_destroy )
+			p_obj->user_destroy_cb != __sync_destroy_cb )
 		{
 			/* Queue the object for asynchronous destruction. */
 #if AL_OBJ_PRIVATE_ASYNC_PROC
@@ -467,7 +467,7 @@
 	AL_ENTER( AL_DBG_AL_OBJ );

 	if( pfn_destroy_cb == ib_sync_destroy )
-		sync_destroy_obj( p_obj, pfn_destroy_cb );
+		sync_destroy_obj( p_obj, __sync_destroy_cb );
 	else if( destroy_obj( p_obj, pfn_destroy_cb ) )
 		deref_al_obj( p_obj );	/* Only destroy the object once. */

Index: inc/iba/ib_al.h
===================================================================
--- inc/iba/ib_al.h	(revision 469)
+++ inc/iba/ib_al.h	(working copy)
@@ -433,9 +433,7 @@
 *
 * SYNOPSIS
 */
-AL_EXPORT void AL_API
-ib_sync_destroy(
-	IN				void						*context );
+static const ib_pfn_destroy_cb_t ib_sync_destroy =
(ib_pfn_destroy_cb_t)(LONG_PTR)-1;
 /*
 * PARAMETERS
 *	Not Applicable.
Index: ulp/srp/kernel/srp_session.c
===================================================================
--- ulp/srp/kernel/srp_session.c	(revision 469)
+++ ulp/srp/kernel/srp_session.c	(working copy)
@@ -81,9 +81,25 @@

 	srp_close_ca( &p_srp_session->hca );

+	if ( p_srp_session->p_shutdown_srb != NULL )
+	{
+		p_srp_session->p_shutdown_srb->SrbStatus = SRB_STATUS_SUCCESS;
+		SRP_TRACE( SRP_DBG_DEBUG,
+			("Returning SrbStatus %s(0x%x) for Function = %s(0x%x), "
+			"Path = 0x%x, Target = 0x%x, Lun = 0x%x\n",
+			g_srb_status_name[p_srp_session->p_shutdown_srb->SrbStatus],
+			p_srp_session->p_shutdown_srb->SrbStatus,
+			p_srp_session->p_shutdown_srb[p_srp_session->p_shutdown_srb->Function],
+			p_srp_session->p_shutdown_srb->Function,
+			p_srp_session->p_shutdown_srb->PathId,
+			p_srp_session->p_shutdown_srb->TargetId,
+			p_srp_session->p_shutdown_srb->Lun) );
+		StorPortNotification( RequestComplete, p_srp_session->p_hba->p_ext,
+			p_srp_session->p_shutdown_srb );
+	}
+
 	srp_free_connection( &p_srp_session->connection );
 	srp_destroy_descriptors( &p_srp_session->descriptors );
-	srp_free_hca( &p_srp_session->hca );

 	SRP_TRACE( SRP_DBG_VERBOSE, ("Session Object ref_cnt = %d\n",
p_srp_session->obj.ref_cnt) );

@@ -302,24 +318,15 @@
 									  &p_srp_session->p_hba->p_srp_path_record->path_rec.sgid,
 									  p_srp_session );
 	if ( status != IB_SUCCESS )
-	{
-		srp_free_hca( &p_srp_session->hca );
 		goto exit;
-	}

 	status = srp_connect( &p_srp_session->connection,
 						  &p_srp_session->hca,
 						  1, /* initiator_extension */
 						  (uint8_t)p_srp_session->p_hba->ioc_info.profile.send_msg_depth,
 						  p_srp_session );
-	if ( status != IB_SUCCESS )
-	{
-		srp_free_hca( &p_srp_session->hca );
-	}

 exit:
-
 	SRP_EXIT( SRP_DBG_PNP );
-
 	return ( status );
 }
Index: ulp/srp/kernel/srp_hca.c
===================================================================
--- ulp/srp/kernel/srp_hca.c	(revision 469)
+++ ulp/srp/kernel/srp_hca.c	(working copy)
@@ -73,11 +73,8 @@
 		goto exit;
 	}

-	status = p_hca->p_hba->ifc.open_ca( h_al,
-						 p_hca->ca_guid,
-						 srp_async_event_handler_cb,
-						 p_context,
-						 &p_hca->h_ca );
+	status = p_hca->p_hba->ifc.open_ca( h_al, p_hca->ca_guid,
+		srp_async_event_handler_cb, p_context, &p_hca->h_ca );
 	if ( status != IB_SUCCESS )
 	{
 		SRP_TRACE( SRP_DBG_ERROR,
@@ -85,8 +82,6 @@
 		goto exit;
 	}

-	p_hca->initialized = TRUE;
-
 	status = p_hca->p_hba->ifc.alloc_pd( p_hca->h_ca,
 						  IB_PDT_NORMAL,
 						  p_context,
@@ -121,15 +116,8 @@
 	{
 		SRP_TRACE( SRP_DBG_ERROR,
 			("Physical Memory Registration Failure. Status = %d\n", status) );
-	}
-
 exit:
-	if ( status != IB_SUCCESS )
-	{
-		if ( p_hca->initialized == TRUE )
-		{
-			srp_close_ca( p_hca );
-		}
+		srp_close_ca( p_hca );
 	}

 	SRP_EXIT( SRP_DBG_PNP );
@@ -137,44 +125,6 @@
 	return ( status );
 }

-
-/* srp_close_ca_cb */
-/*!
-Notifies StorPort when CA has closed
-
- at param p_context - session being closed
-
- at return - none
-*/
-void
-srp_close_ca_cb(
-	IN void *p_context)
-{
-	srp_session_t       *p_srp_session = (srp_session_t *)p_context;
-	SCSI_REQUEST_BLOCK  *p_srb         = p_srp_session->p_shutdown_srb;
-
-	SRP_ENTER( SRP_DBG_PNP );
-
-	if ( p_srb != NULL )
-	{
-		p_srb->SrbStatus = SRB_STATUS_SUCCESS;
-		SRP_TRACE( SRP_DBG_DEBUG,
-				   ("Returning SrbStatus %s(0x%x) for Function = %s(0x%x), Path =
0x%x, Target = 0x%x, Lun = 0x%x\n",
-				   g_srb_status_name[p_srb->SrbStatus],
-				   p_srb->SrbStatus,
-				   g_srb_function_name[p_srb->Function],
-				   p_srb->Function,
-				   p_srb->PathId,
-				   p_srb->TargetId,
-				   p_srb->Lun) );
-		StorPortNotification( RequestComplete, p_srp_session->p_hba->p_ext, p_srb );
-	}
-
-	cl_event_signal( &p_srp_session->hca.hca_destroyed_event );
-
-	SRP_EXIT( SRP_DBG_PNP );
-}
-
 /* srp_close_ca */
 /*!
 Closes the channel adapter
@@ -185,41 +135,18 @@
 */
 void
 srp_close_ca(
-	IN OUT  srp_hca_t   *p_hca )
+	IN OUT  srp_hca_t	*p_hca )
 {
 	SRP_ENTER( SRP_DBG_PNP );

-	if ( p_hca->initialized == TRUE )
+	if( p_hca->h_ca )
 	{
-		cl_status_t cl_status;
-
-		SRP_TRACE( SRP_DBG_DEBUG, ("Closing Channel Adapter.\n") );
-
-		cl_status = cl_event_init( &p_hca->hca_destroyed_event, TRUE );
-		if ( cl_status != CL_SUCCESS )
-		{
-			SRP_TRACE( SRP_DBG_ERROR, ("Cannot Initialize HCA Destroyed Event.
Status = %d\n", cl_status) );
-			p_hca->p_hba->ifc.close_ca( p_hca->h_ca, NULL );
-		}
-		else
-		{
-			p_hca->p_hba->ifc.close_ca( p_hca->h_ca, srp_close_ca_cb );
-
-			cl_status = cl_event_wait_on( &p_hca->hca_destroyed_event,
EVENT_NO_TIMEOUT, FALSE );
-			if ( cl_status != CL_SUCCESS )
-			{
-				SRP_TRACE( SRP_DBG_ERROR, ("Wait On HCA Destroyed Event Failed.
Status = %d\n", cl_status) );
-			}
-
-			cl_event_destroy( &p_hca->hca_destroyed_event );
-		}
-
-		SRP_TRACE( SRP_DBG_DEBUG,
-			("Closed Channel Adapter.\n") );
-
-		cl_memclr( p_hca, sizeof( *p_hca ) );
+		p_hca->p_hba->ifc.close_ca( p_hca->h_ca, ib_sync_destroy );
+		SRP_TRACE( SRP_DBG_DEBUG, ("Closed Channel Adapter.\n") );
 	}

+	cl_memclr( p_hca, sizeof( *p_hca ) );
+
 	SRP_EXIT( SRP_DBG_PNP );
 }

@@ -296,7 +223,6 @@

 	cl_memclr( p_hca, sizeof( *p_hca ) );

-	p_hca->initialized = FALSE;
 	p_hca->p_hba = p_hba;

 	SRP_EXIT( SRP_DBG_PNP );
@@ -304,18 +230,3 @@
 	return ( IB_SUCCESS );
 }

-/* srp_free_hca */
-/*!
-Frees hca resources
-
- at param p_hca  - pointer to the hca structure
-
- at return -  none
-*/
-void
-srp_free_hca(
-	IN OUT  srp_hca_t   *p_hca  )
-{
-	cl_memclr( p_hca, sizeof( *p_hca ) );
-}
-
Index: ulp/srp/kernel/srp_hca.h
===================================================================
--- ulp/srp/kernel/srp_hca.h	(revision 469)
+++ ulp/srp/kernel/srp_hca.h	(working copy)
@@ -40,7 +40,6 @@

 typedef struct _srp_hca
 {
-	BOOLEAN					initialized;
 	srp_hba_t				*p_hba;

 	ib_net64_t				ca_guid;
@@ -50,7 +49,6 @@
 	uint64_t				vaddr;
 	net32_t					lkey;
 	net32_t					rkey;
-	cl_event_t				hca_destroyed_event;

 }	srp_hca_t;

@@ -75,8 +73,4 @@
 	IN	OUT			srp_hca_t					*p_hca,
 	IN				srp_hba_t					*p_hba );

-void
-srp_free_hca(
-	IN	OUT			srp_hca_t					*p_hca  );
-
 #endif  /* _SRP_HCA_H_ */
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: srp_cm_timeout.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20060901/9538e8ad/attachment.ksh>


More information about the ofw mailing list