[ofw] BSOD in ipoib

Tzachi Dar tzachid at mellanox.co.il
Tue Nov 4 05:12:48 PST 2008


How do you repro this issue? How long does it take to reach the crash.
 
Next step is to understand what is the end point that was causing this
issue:
 
What was the dlid of it? Is it the same dlid for all of them?
 
Are we trying to take out an endpoint that is not in the list, or was
the list corrupted before we started?
 
Can you please add a print of every time that we add an endpoint to the
list and every time that we remove it and send me the log.
 
Thanks
Tzachi
 
 


________________________________

	From: ofw-bounces at lists.openfabrics.org
[mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Anatoly
Greenblatt
	Sent: Tuesday, November 04, 2008 2:55 PM
	To: ofw at lists.openfabrics.org
	Subject: [ofw] BSOD in ipoib
	
	

	Hi,

	 

	We had almost simultaneous crash in 7 systems running on rev
1722.

	It is 100% reproducible, how to proceed?

	Crash analysis follows.

	 

	Regards,

	Anatoly.

	 

	 

	DRIVER_IRQL_NOT_LESS_OR_EQUAL (d1)

	An attempt was made to access a pageable (or completely invalid)
address at an

	interrupt request level (IRQL) that is too high.  This is
usually

	caused by drivers using improper addresses.

	If kernel debugger is available get stack backtrace.

	Arguments:

	Arg1: 0000000000000008, memory referenced

	Arg2: 0000000000000002, IRQL

	Arg3: 0000000000000001, value 0 = read operation, 1 = write
operation

	Arg4: fffffa60051df334, address which referenced memory

	 

	Debugging Details:

	------------------

	 

	 

	WRITE_ADDRESS:  0000000000000008 

	 

	CURRENT_IRQL:  2

	 

	FAULTING_IP: 

	ipoib!cl_qmap_remove_item+34
[c:\work\winof\sources\core\complib\cl_map.c @ 2044]

	fffffa60`051df334 48894108        mov     qword ptr [rcx+8],rax

	 

	DEFAULT_BUCKET_ID:  VISTA_DRIVER_FAULT

	 

	BUGCHECK_STR:  0xD1

	 

	PROCESS_NAME:  System

	 

	TRAP_FRAME:  fffffa600578a7c0 -- (.trap 0xfffffa600578a7c0)

	NOTE: The trap frame does not contain all registers.

	Some register values may be zeroed or incorrect.

	rax=0000000000000000 rbx=fffffa600578a8a8 rcx=0000000000000000

	rdx=fffffa8034943528 rsi=0000000000000000 rdi=0000000000000028

	rip=fffffa60051df334 rsp=fffffa600578a950 rbp=fffffa8032c88570

	 r8=fffffa80349434f0  r9=fffffa8034943468 r10=fffffa8032ba0170

	r11=fffffa8032c886d0 r12=0000000000000000 r13=0000000000000000

	r14=0000000000000000 r15=0000000000000000

	iopl=0         nv up ei pl zr na po nc

	ipoib!cl_qmap_remove_item+0x34:

	fffffa60`051df334 48894108        mov     qword ptr [rcx+8],rax
ds:00000000`00000008=????????????????

	Resetting default scope

	 

	LAST_CONTROL_TRANSFER:  from fffff8000166e12e to
fffff8000166e390

	 

	STACK_TEXT:  

	fffffa60`0578a678 fffff800`0166e12e : 00000000`0000000a
00000000`00000008 00000000`00000002 00000000`00000001 : nt!KeBugCheckEx

	fffffa60`0578a680 fffff800`0166d00b : 00000000`00000001
fffffa60`009751e1 00000000`40010000 fffffa80`32c886d0 :
nt!KiBugCheckDispatch+0x6e

	fffffa60`0578a7c0 fffffa60`051df334 : fffffa80`32cb5520
00000000`00000000 00000000`00000000 fffffa60`051e3110 :
nt!KiPageFault+0x20b

	fffffa60`0578a950 fffffa60`051d9296 : ffff0000`00638072
fffff800`01666444 fffffa60`0578aa10 00000000`00000000 :
ipoib!cl_qmap_remove_item+0x34
[c:\work\winof\sources\core\complib\cl_map.c @ 2044]

	fffffa60`0578a980 fffffa60`051db48e : fffffa60`051bd780
fffffa60`051e3110 00000000`00000000 fffffa80`34943010 :
ipoib!__endpt_mgr_reset_all+0x256
[c:\work\winof\sources\ulp\ipoib\kernel\ipoib_port.c @ 4600]

	fffffa60`0578a9e0 fffffa60`051d03cd : ffffffff`00000000
fffffa60`051bd780 00000000`00000000 fffffa80`34940af0 :
ipoib!ipoib_port_down+0x222
[c:\work\winof\sources\ulp\ipoib\kernel\ipoib_port.c @ 5625]

	fffffa60`0578aae0 fffffa60`0518b1cb : fffffa60`051bd780
fffffa60`051bd780 fffffa80`32ca6120 fffff880`055c59b0 :
ipoib!__ipoib_pnp_cb+0x6f5
[c:\work\winof\sources\ulp\ipoib\kernel\ipoib_adapter.c @ 797]

	fffffa60`0578ab40 fffffa60`0518c676 : fffffa80`32ca6230
fffffa60`051bd110 fffffa80`354a10f8 fffffa60`0578ac20 :
ibbus!__pnp_notify_user+0x17b
[c:\work\winof\sources\core\al\kernel\al_pnp.c @ 557]

	fffffa60`0578abd0 fffffa60`0518ca81 : 00000000`00000000
fffffa80`34582bb0 fffffa80`354a10f8 fffffa60`05196adc :
ibbus!__pnp_process_port_forward+0xa6
[c:\work\winof\sources\core\al\kernel\al_pnp.c @ 1279]

	fffffa60`0578ac00 fffffa60`0518cc4d : fffffa80`34582bb0
00000000`00000002 fffffa80`3457df00 00000000`00000000 :
ibbus!__pnp_check_ports+0x2a9
[c:\work\winof\sources\core\al\kernel\al_pnp.c @ 1478]

	fffffa60`0578ac60 fffffa60`0516be9d : fffffa80`3457cb98
fffffa80`00001490 fffffa80`35022000 00000000`00000018 :
ibbus!__pnp_check_events+0x171
[c:\work\winof\sources\core\al\kernel\al_pnp.c @ 1566]

	fffffa60`0578acc0 fffffa60`0516d0d9 : fffffa80`3457ca60
fffffa80`32883360 00000000`00000000 fffffa60`0516d43c :
ibbus!__cl_async_proc_worker+0x61
[c:\work\winof\sources\core\complib\cl_async_proc.c @ 153]

	fffffa60`0578acf0 fffffa60`0516d464 : fffffa80`32883360
00000000`00000080 fffffa80`32883360 ffb533e8`00000408 :
ibbus!__cl_thread_pool_routine+0x41
[c:\work\winof\sources\core\complib\cl_threadpool.c @ 66]

	fffffa60`0578ad20 fffff800`01891de3 : 72052979`801a7402
058d4c18`498b4814 0062ba66`00048024 9c8d4cff`ffbc2b01 :
ibbus!__thread_callback+0x28
[c:\work\winof\sources\core\complib\kernel\cl_thread.c @ 49]

	fffffa60`0578ad50 fffff800`016a8536 : fffff800`0178d680
fffffa80`34578720 fffff800`01792b80 00000000`00000001 :
nt!PspSystemThreadStartup+0x57

	fffffa60`0578ad80 00000000`00000000 : 00000000`00000000
00000000`00000000 00000000`00000000 00000000`00000000 :
nt!KiStartSystemThread+0x16

	 

	 

	STACK_COMMAND:  kb

	 

	FOLLOWUP_IP: 

	ipoib!cl_qmap_remove_item+34
[c:\work\winof\sources\core\complib\cl_map.c @ 2044]

	fffffa60`051df334 48894108        mov     qword ptr [rcx+8],rax

	 

	SYMBOL_STACK_INDEX:  3

	 

	SYMBOL_NAME:  ipoib!cl_qmap_remove_item+34

	 

	FOLLOWUP_NAME:  MachineOwner

	 

	MODULE_NAME: ipoib

	 

	IMAGE_NAME:  ipoib.sys

	 

	DEBUG_FLR_IMAGE_TIMESTAMP:  490dcba5

	 

	FAILURE_BUCKET_ID:  X64_0xD1_W_ipoib!cl_qmap_remove_item+34

	 

	BUCKET_ID:  X64_0xD1_W_ipoib!cl_qmap_remove_item+34

	 

	Followup: MachineOwner

	---------

	 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20081104/eb9103ba/attachment.html>


More information about the ofw mailing list