[Openib-windows] srp blue screen when CM fail to connect

Yossi Leybovich sleybo at mellanox.co.il
Mon Sep 4 06:16:20 PDT 2006


 

> -----Original Message-----
> From: ftillier.sst at gmail.com [mailto:ftillier.sst at gmail.com] 
> On Behalf Of Fabian Tillier
> Sent: Saturday, September 02, 2006 2:04 AM
> To: Yossi Leybovich
> Cc: openib-windows at openib.org
> Subject: Re: [Openib-windows] srp blue screen when CM fail to connect
> 
> Hi Yossi,
> 
> On 8/31/06, Yossi Leybovich <sleybo at mellanox.co.il> wrote:
> >
> > Fab
> > I got blue screen while trying to bring our SRP target up.
> 
> I have a fix, but I don't quite understand why it makes the 
> problem go away.  I didn't find any double free issues in the 
> code, but did take the opportunity to clean up the code a little.
> 
> Since I don't quite understand why it works now, I didn't 
> check the changes in.  Instead the patch is attached - please 
> give it a shot and let me know if you see the previous bug.
> 
> Thanks,
> 
> - Fab
> 
> Index: core/al/al_common.c
> ===================================================================
> --- core/al/al_common.c	(revision 477)
> +++ core/al/al_common.c	(working copy)
> @@ -127,7 +127,7 @@
>   * Used to force synchronous destruction of AL objects.
>   */
>  void
> -ib_sync_destroy(
> +__sync_destroy_cb(
>  	IN				void			
> 			*context )
>  {
>  	UNUSED_PARAM( context );
> @@ -405,7 +405,7 @@
>  	if( ref_cnt == 0 )
>  	{
>  		if( p_obj->pfn_destroy == async_destroy_obj &&
> -			p_obj->user_destroy_cb != ib_sync_destroy )
> +			p_obj->user_destroy_cb != __sync_destroy_cb )
>  		{
>  			/* Queue the object for asynchronous 
> destruction. */  #if AL_OBJ_PRIVATE_ASYNC_PROC @@ -467,7 +467,7 @@
>  	AL_ENTER( AL_DBG_AL_OBJ );
> 
>  	if( pfn_destroy_cb == ib_sync_destroy )
> -		sync_destroy_obj( p_obj, pfn_destroy_cb );
> +		sync_destroy_obj( p_obj, __sync_destroy_cb );
>  	else if( destroy_obj( p_obj, pfn_destroy_cb ) )
>  		deref_al_obj( p_obj );	/* Only destroy the 
> object once. */
> 
> Index: inc/iba/ib_al.h
> ===================================================================
> --- inc/iba/ib_al.h	(revision 469)
> +++ inc/iba/ib_al.h	(working copy)
> @@ -433,9 +433,7 @@
>  *
>  * SYNOPSIS
>  */
> -AL_EXPORT void AL_API
> -ib_sync_destroy(
> -	IN				void			
> 			*context );
> +static const ib_pfn_destroy_cb_t ib_sync_destroy =
> (ib_pfn_destroy_cb_t)(LONG_PTR)-1;
>  /*
>  * PARAMETERS
>  *	Not Applicable.


Why do we need this change ?
I think it cause blue screen with user level apps (like vstat) 
Here is a trace:
I checked and the user_destroy_cb = 0xffffffff 

STACK_TEXT:  
b9879638 8085f753 00000003 ffffffff 00000000
nt!RtlpBreakWithStatusInstruction
b9879684 8086021e 00000003 80a2ca4c c03ffffc
nt!KiBugCheckDebugBreak+0x19
b9879a64 80860832 00000050 ffffffff 00000000 nt!KeBugCheck2+0x574
b9879a84 808526be 00000050 ffffffff 00000000 nt!KeBugCheckEx+0x1b
b9879ad4 80809f07 00000000 ffffffff 00000000 nt!MmAccessFault+0x77e
b9879ad4 ffffffff 00000000 ffffffff 00000000 nt!KiTrap0E+0xd0
WARNING: Frame IP not in any known module. Following frames may be
wrong.
b9879b5c b8e24f19 00088360 b9879b94 b8e228aa 0xffffffff
b9879b94 b8e23707 8a14a978 00000000 00000001
ibbus!async_destroy_cb+0x5b9 [w:\work\latest\core\al\al_common.c @ 675]
b9879bb8 b8e3e95b 8a14a958 ffffffff ffffffff
ibbus!sync_destroy_obj+0x517 [w:\work\latest\core\al\al_common.c @ 548]
b9879bdc b8e3ca16 89b1e278 8a602678 b9879c18 ibbus!proxy_close_ca+0x25b
[w:\work\latest\core\al\kernel\al_proxy_verbs.c @ 638]
b9879c00 b8dac6ea 8a602678 b9879c18 8a572c08 ibbus!verbs_ioctl+0x456
[w:\work\latest\core\al\kernel\al_proxy_verbs.c @ 3450]
b9879c28 b8e7e5ef 8a602678 8a5d6ea0 00000000 ibbus!al_dev_ioctl+0x29a
[w:\work\latest\core\al\kernel\al_dev.c @ 452]
b9879c40 8080a9ee 8a5d6de8 8a602678 80a2c410 ibbus!bus_drv_ioctl+0x22f
[w:\work\latest\core\bus\kernel\bus_driver.c @ 401]
b9879c50 8089a84c 8a60270c 8a1b0590 8a602678 nt!IopfCallDriver+0x31
b9879c64 808abcef 8a5d6de8 8a602678 8a1b0590
nt!IopSynchronousServiceTail+0x60
b9879d00 808b7cc3 000007b8 00000000 00000000 nt!IopXxxControlFile+0x5ef
b9879d34 80806f0f 000007b8 00000000 00000000
nt!NtDeviceIoControlFile+0x2a
b9879d34 7c90eb94 000007b8 00000000 00000000 nt!KiFastCallEntry+0xfc
0006fb34 7c90d8ef 7c801671 000007b8 00000000 ntdll!KiFastSystemCallRet
0006fb38 7c801671 000007b8 00000000 00000000
ntdll!ZwDeviceIoControlFile+0xc
0006fb98 0040f25c 000007b8 003b0040 0006fc44
kernel32!DeviceIoControl+0xdd
0006fbc4 002add5e 000007b8 003b0040 0006fc44
complibd!cl_ioctl_request+0x2c
[w:\work\latest\inc\user\complib\cl_ioctl_osd.h @ 76]
0006fc04 002b633c 003b0040 0006fc44 00000008 ibald!do_al_dev_ioctl+0xee
[w:\work\latest\core\al\user\al_dll.c @ 192]
0006fd3c 002b978d 00088360 0006fd38 00088360 ibald!ual_close_ca+0x1fc
[w:\work\latest\core\al\user\ual_ca.c @ 234]
0006fd5c 002810a2 00088360 7c802600 0040f43a ibald!cleanup_ci_ca+0xed
[w:\work\latest\core\al\user\ual_ci_ca.c @ 324]
0006fd94 00280469 00088380 0040a35f 00000001
ibald!async_destroy_cb+0x422 [w:\work\latest\core\al\al_common.c @ 661]
0006fdb8 00280b9e 00088360 00000000 00000000
ibald!sync_destroy_obj+0x539 [w:\work\latest\core\al\al_common.c @ 548]
0006fdec 0027ffd1 000833f0 00000000 00000000 ibald!destroy_obj+0x6ae
[w:\work\latest\core\al\al_common.c @ 615]
0006fe18 00287a44 000833f0 00000000 00000000 ibald!sync_destroy_obj+0xa1
[w:\work\latest\core\al\al_common.c @ 488]
0006fe44 002cde9b 00000000 0006ff20 01003813 ibald!al_cleanup+0x3b4
[w:\work\latest\core\al\al_init.c @ 146]
0006fe50 01003813 000899c8 00000000 00000001 ibald!ib_close_al+0x3b
[w:\work\latest\core\al\user\ual_mgr.c @ 1106]
0006ff20 0100397e 00000000 00000000 00000000 vstat!vstat_ca_attr+0x3f3
[w:\work\latest\tools\vstat\user\vstat_main.c @ 480]
0006ff44 01003abb 00000002 00303bf8 00302bc8 vstat!main+0xfe
[w:\work\latest\tools\vstat\user\vstat_main.c @ 532]
0006ffc0 7c816d4f 00000012 00000000 7ffdc000 vstat!mainCRTStartup+0x12f
[d:\dnsrv\base\crts\crtw32\dllstuff\crtexe.c @ 501]
0006fff0 00000000 0100398c 00000000 78746341
kernel32!BaseProcessStart+0x23


STACK_COMMAND:  kb

FOLLOWUP_IP: 
ibbus!async_destroy_cb+5b9 [w:\work\latest\core\al\al_common.c @ 675]
b8e24f19 833d28d0e7b804   cmp dword ptr [ibbus!g_al_dbg_level
(b8e7d028)],0x4

FAULTING_SOURCE_CODE:  
   671: 		p_obj->user_destroy_cb( (void*)p_obj->context );
   672: 	}
   673: 
   674: 	/* Free the resources associated with the object. */
>  675: 	AL_PRINT( TRACE_LEVEL_INFORMATION, AL_DBG_AL_OBJ,
("freeing object\n" ) );
   676: 	p_obj->pfn_free( p_obj );
   677: 
   678: 	/* Dereference the parent after freeing the child. */
   679: 	if( p_parent_obj )
   680: 		deref_al_obj( p_parent_obj );




More information about the ofw mailing list