[ofw] BSOD in ipoib

Ishai Rabinovitz ishai at mellanox.co.il
Tue Nov 4 05:15:31 PST 2008


Anatoly,
 
Can you please also open a bug in Bugzilla
 
Ishai


________________________________

	From: ofw-bounces at lists.openfabrics.org
[mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Tzachi Dar
	Sent: Tuesday, November 04, 2008 3:13 PM
	To: Anatoly Greenblatt; ofw at lists.openfabrics.org
	Subject: RE: [ofw] BSOD in ipoib
	
	
	How do you repro this issue? How long does it take to reach the
crash.
	 
	Next step is to understand what is the end point that was
causing this issue:
	 
	What was the dlid of it? Is it the same dlid for all of them?
	 
	Are we trying to take out an endpoint that is not in the list,
or was the list corrupted before we started?
	 
	Can you please add a print of every time that we add an endpoint
to the list and every time that we remove it and send me the log.
	 
	Thanks
	Tzachi
	 
	 


________________________________

		From: ofw-bounces at lists.openfabrics.org
[mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Anatoly
Greenblatt
		Sent: Tuesday, November 04, 2008 2:55 PM
		To: ofw at lists.openfabrics.org
		Subject: [ofw] BSOD in ipoib
		
		

		Hi,

		 

		We had almost simultaneous crash in 7 systems running on
rev 1722.

		It is 100% reproducible, how to proceed?

		Crash analysis follows.

		 

		Regards,

		Anatoly.

		 

		 

		DRIVER_IRQL_NOT_LESS_OR_EQUAL (d1)

		An attempt was made to access a pageable (or completely
invalid) address at an

		interrupt request level (IRQL) that is too high.  This
is usually

		caused by drivers using improper addresses.

		If kernel debugger is available get stack backtrace.

		Arguments:

		Arg1: 0000000000000008, memory referenced

		Arg2: 0000000000000002, IRQL

		Arg3: 0000000000000001, value 0 = read operation, 1 =
write operation

		Arg4: fffffa60051df334, address which referenced memory

		 

		Debugging Details:

		------------------

		 

		 

		WRITE_ADDRESS:  0000000000000008 

		 

		CURRENT_IRQL:  2

		 

		FAULTING_IP: 

		ipoib!cl_qmap_remove_item+34
[c:\work\winof\sources\core\complib\cl_map.c @ 2044]

		fffffa60`051df334 48894108        mov     qword ptr
[rcx+8],rax

		 

		DEFAULT_BUCKET_ID:  VISTA_DRIVER_FAULT

		 

		BUGCHECK_STR:  0xD1

		 

		PROCESS_NAME:  System

		 

		TRAP_FRAME:  fffffa600578a7c0 -- (.trap
0xfffffa600578a7c0)

		NOTE: The trap frame does not contain all registers.

		Some register values may be zeroed or incorrect.

		rax=0000000000000000 rbx=fffffa600578a8a8
rcx=0000000000000000

		rdx=fffffa8034943528 rsi=0000000000000000
rdi=0000000000000028

		rip=fffffa60051df334 rsp=fffffa600578a950
rbp=fffffa8032c88570

		 r8=fffffa80349434f0  r9=fffffa8034943468
r10=fffffa8032ba0170

		r11=fffffa8032c886d0 r12=0000000000000000
r13=0000000000000000

		r14=0000000000000000 r15=0000000000000000

		iopl=0         nv up ei pl zr na po nc

		ipoib!cl_qmap_remove_item+0x34:

		fffffa60`051df334 48894108        mov     qword ptr
[rcx+8],rax ds:00000000`00000008=????????????????

		Resetting default scope

		 

		LAST_CONTROL_TRANSFER:  from fffff8000166e12e to
fffff8000166e390

		 

		STACK_TEXT:  

		fffffa60`0578a678 fffff800`0166e12e : 00000000`0000000a
00000000`00000008 00000000`00000002 00000000`00000001 : nt!KeBugCheckEx

		fffffa60`0578a680 fffff800`0166d00b : 00000000`00000001
fffffa60`009751e1 00000000`40010000 fffffa80`32c886d0 :
nt!KiBugCheckDispatch+0x6e

		fffffa60`0578a7c0 fffffa60`051df334 : fffffa80`32cb5520
00000000`00000000 00000000`00000000 fffffa60`051e3110 :
nt!KiPageFault+0x20b

		fffffa60`0578a950 fffffa60`051d9296 : ffff0000`00638072
fffff800`01666444 fffffa60`0578aa10 00000000`00000000 :
ipoib!cl_qmap_remove_item+0x34
[c:\work\winof\sources\core\complib\cl_map.c @ 2044]

		fffffa60`0578a980 fffffa60`051db48e : fffffa60`051bd780
fffffa60`051e3110 00000000`00000000 fffffa80`34943010 :
ipoib!__endpt_mgr_reset_all+0x256
[c:\work\winof\sources\ulp\ipoib\kernel\ipoib_port.c @ 4600]

		fffffa60`0578a9e0 fffffa60`051d03cd : ffffffff`00000000
fffffa60`051bd780 00000000`00000000 fffffa80`34940af0 :
ipoib!ipoib_port_down+0x222
[c:\work\winof\sources\ulp\ipoib\kernel\ipoib_port.c @ 5625]

		fffffa60`0578aae0 fffffa60`0518b1cb : fffffa60`051bd780
fffffa60`051bd780 fffffa80`32ca6120 fffff880`055c59b0 :
ipoib!__ipoib_pnp_cb+0x6f5
[c:\work\winof\sources\ulp\ipoib\kernel\ipoib_adapter.c @ 797]

		fffffa60`0578ab40 fffffa60`0518c676 : fffffa80`32ca6230
fffffa60`051bd110 fffffa80`354a10f8 fffffa60`0578ac20 :
ibbus!__pnp_notify_user+0x17b
[c:\work\winof\sources\core\al\kernel\al_pnp.c @ 557]

		fffffa60`0578abd0 fffffa60`0518ca81 : 00000000`00000000
fffffa80`34582bb0 fffffa80`354a10f8 fffffa60`05196adc :
ibbus!__pnp_process_port_forward+0xa6
[c:\work\winof\sources\core\al\kernel\al_pnp.c @ 1279]

		fffffa60`0578ac00 fffffa60`0518cc4d : fffffa80`34582bb0
00000000`00000002 fffffa80`3457df00 00000000`00000000 :
ibbus!__pnp_check_ports+0x2a9
[c:\work\winof\sources\core\al\kernel\al_pnp.c @ 1478]

		fffffa60`0578ac60 fffffa60`0516be9d : fffffa80`3457cb98
fffffa80`00001490 fffffa80`35022000 00000000`00000018 :
ibbus!__pnp_check_events+0x171
[c:\work\winof\sources\core\al\kernel\al_pnp.c @ 1566]

		fffffa60`0578acc0 fffffa60`0516d0d9 : fffffa80`3457ca60
fffffa80`32883360 00000000`00000000 fffffa60`0516d43c :
ibbus!__cl_async_proc_worker+0x61
[c:\work\winof\sources\core\complib\cl_async_proc.c @ 153]

		fffffa60`0578acf0 fffffa60`0516d464 : fffffa80`32883360
00000000`00000080 fffffa80`32883360 ffb533e8`00000408 :
ibbus!__cl_thread_pool_routine+0x41
[c:\work\winof\sources\core\complib\cl_threadpool.c @ 66]

		fffffa60`0578ad20 fffff800`01891de3 : 72052979`801a7402
058d4c18`498b4814 0062ba66`00048024 9c8d4cff`ffbc2b01 :
ibbus!__thread_callback+0x28
[c:\work\winof\sources\core\complib\kernel\cl_thread.c @ 49]

		fffffa60`0578ad50 fffff800`016a8536 : fffff800`0178d680
fffffa80`34578720 fffff800`01792b80 00000000`00000001 :
nt!PspSystemThreadStartup+0x57

		fffffa60`0578ad80 00000000`00000000 : 00000000`00000000
00000000`00000000 00000000`00000000 00000000`00000000 :
nt!KiStartSystemThread+0x16

		 

		 

		STACK_COMMAND:  kb

		 

		FOLLOWUP_IP: 

		ipoib!cl_qmap_remove_item+34
[c:\work\winof\sources\core\complib\cl_map.c @ 2044]

		fffffa60`051df334 48894108        mov     qword ptr
[rcx+8],rax

		 

		SYMBOL_STACK_INDEX:  3

		 

		SYMBOL_NAME:  ipoib!cl_qmap_remove_item+34

		 

		FOLLOWUP_NAME:  MachineOwner

		 

		MODULE_NAME: ipoib

		 

		IMAGE_NAME:  ipoib.sys

		 

		DEBUG_FLR_IMAGE_TIMESTAMP:  490dcba5

		 

		FAILURE_BUCKET_ID:
X64_0xD1_W_ipoib!cl_qmap_remove_item+34

		 

		BUCKET_ID:  X64_0xD1_W_ipoib!cl_qmap_remove_item+34

		 

		Followup: MachineOwner

		---------

		 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20081104/6db73087/attachment.html>


More information about the ofw mailing list