<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv=Content-Type content="text/html; charset=us-ascii">
<meta name=Generator content="Microsoft Word 12 (filtered medium)">
<style>
<!--
/* Font Definitions */
@font-face
{font-family:"Cambria Math";
panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
{font-family:Tahoma;
panose-1:2 11 6 4 3 5 4 4 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0in;
margin-bottom:.0001pt;
font-size:12.0pt;
font-family:"Times New Roman","serif";}
a:link, span.MsoHyperlink
{mso-style-priority:99;
color:blue;
text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
{mso-style-priority:99;
color:purple;
text-decoration:underline;}
span.EmailStyle17
{mso-style-type:personal-reply;
font-family:"Calibri","sans-serif";
color:#1F497D;}
.MsoChpDefault
{mso-style-type:export-only;
font-size:10.0pt;}
@page Section1
{size:8.5in 11.0in;
margin:1.0in 1.0in 1.0in 1.0in;}
div.Section1
{page:Section1;}
-->
</style>
<!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang=EN-US link=blue vlink=purple>
<div class=Section1>
<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Hi Alex,<o:p></o:p></span></p>
<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>
<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Why not move the call to IoSetCancelRoutine up in the function,
and check the return value? If the return is NULL, then someone else
already cancelled the IRP. If non-NULL, then the routine cancels
it. This also will close any potential race with completing the IRP
(where the cancel routine should also be cleared.)<o:p></o:p></span></p>
<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>
<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>IoSetCancelRoutine is atomic, so should provide the correct
level of synchronization.<o:p></o:p></span></p>
<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>
<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>Thoughts?<o:p></o:p></span></p>
<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'>-Fab<o:p></o:p></span></p>
<p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";
color:#1F497D'><o:p> </o:p></span></p>
<div style='border:none;border-left:solid blue 1.5pt;padding:0in 0in 0in 4.0pt'>
<div>
<div style='border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in'>
<p class=MsoNormal><b><span style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'>From:</span></b><span
style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'>
ofw-bounces@lists.openfabrics.org [mailto:ofw-bounces@lists.openfabrics.org] <b>On
Behalf Of </b>Alex Naslednikov<br>
<b>Sent:</b> Tuesday, June 08, 2010 12:56 AM<br>
<b>To:</b> ofw@lists.openfabrics.org; Leonid Keller; Tzachi Dar<br>
<b>Cc:</b> Uri Habusha<br>
<b>Subject:</b> [ofw] [Patch][Core] Fix at IRP cancellation mechanism<o:p></o:p></span></p>
</div>
</div>
<p class=MsoNormal><o:p> </o:p></p>
<div>
<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Arial","sans-serif"'>This
patch fixes the race (and BSOD) within IRP cancellation mechanism.<br>
The original problem:<br>
1.Function al_dev_cancel_ioctl() called from the 2 places:<br>
directly from IBAL and by OS (as a callback)<br>
2. This is because I/O request can be cancelled both by user-level mechanism
and <br>
by OS.<br>
3. Function al_dev_cancel_ioctl() calls to IoGetCurrentIrpStackLocation() in
order <br>
to receive context and take a spinlock (p_context->cb_lock)<br>
4. But IoGetCurrentIrpStackLocation() as well further access to stack pointer<br>
should be protected !!! <br>
5. BSOD happened when context or IRP itself were already cancelled by concurrent
<br>
calls to al_dev_cancel_ioctl()</span><o:p></o:p></p>
</div>
<div>
<p class=MsoNormal> <o:p></o:p></p>
</div>
<div>
<p class=MsoNormal><span style='font-size:10.0pt;font-family:"Arial","sans-serif"'>Signed-off
by: Alexander Naslednikov (xalex at mellanox.co.il)<br>
Index: B:/users/xalex/2_1_0/core/al/kernel/al_dev.c<br>
===================================================================<br>
--- B:/users/xalex/2_1_0/core/al/kernel/al_dev.c (revision 5929)<br>
+++ B:/users/xalex/2_1_0/core/al/kernel/al_dev.c (revision 5930)<br>
@@ -65,9 +65,16 @@<br>
__proxy_cancel_cblists(<br>
IN al_dev_open_context_t *p_context );<br>
<br>
+CL_INLINE void<br>
+al_dev_cancel_ioctl_unlocked(<br>
+ IN al_dev_open_context_t *p_context,<br>
+ IN OUT cl_ioctl_handle_t *p_ioctl );<br>
static void<br>
__construct_open_context(<br>
IN al_dev_open_context_t *p_context )<br>
@@ -343,12 +350,19 @@<br>
p_context->closing = TRUE;<br>
<br>
/* Flush any pending IOCTLs in case user-mode threads died on us.
*/<br>
+ /* Protect IOCTL cancellation by spinlock to avoid race <br>
+ */<br>
+ <br>
+ cl_spinlock_acquire( &p_context->cb_lock );<br>
+ <br>
if( p_context->h_cm_ioctl )<br>
- al_dev_cancel_ioctl( p_context->h_cm_ioctl );<br>
- if( p_context->h_comp_ioctl )<br>
- al_dev_cancel_ioctl( p_context->h_comp_ioctl );<br>
+ al_dev_cancel_ioctl_unlocked( p_context,
&p_context->h_cm_ioctl );<br>
+ if( p_context->h_comp_ioctl ) <br>
+ al_dev_cancel_ioctl_unlocked( p_context,
&p_context->h_comp_ioctl );<br>
if( p_context->h_misc_ioctl )<br>
- al_dev_cancel_ioctl( p_context->h_misc_ioctl );<br>
+ al_dev_cancel_ioctl_unlocked( p_context,
&p_context->h_misc_ioctl );<br>
+ <br>
+ cl_spinlock_release ( &p_context->cb_lock );<br>
<br>
while( p_context->ref_cnt )<br>
{<br>
@@ -514,20 +528,31 @@<br>
<br>
/* Get the stack location. */<br>
p_io_stack = IoGetCurrentIrpStackLocation( h_ioctl );<br>
+ if (!p_io_stack) {<br>
+ AL_PRINT( TRACE_LEVEL_WARNING, AL_DBG_DEV, ("This IOCTL was
previosly destroyed \n") );<br>
+ return;<br>
+ }<br>
<br>
p_context = (al_dev_open_context_t
*)p_io_stack->FileObject->FsContext;<br>
- ASSERT( p_context );<br>
-<br>
+ if (!p_context) {<br>
+ AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, ("This IOCTL was
previosly destroyed \n") );<br>
+ return;<br>
+ }<br>
/* Clear the IOCTL. */<br>
cl_spinlock_acquire( &p_context->cb_lock );<br>
switch( cl_ioctl_ctl_code( h_ioctl ) )<br>
{<br>
case UAL_GET_COMP_CB_INFO:<br>
ph_ioctl = &p_context->h_comp_ioctl;<br>
+ ASSERT(ph_ioctl);<br>
break;<br>
case UAL_GET_MISC_CB_INFO:<br>
ph_ioctl = &p_context->h_misc_ioctl;<br>
break;<br>
+ case NULL:<br>
+ AL_PRINT( TRACE_LEVEL_WARNING, AL_DBG_DEV, ("This IOCTL was
previosly destroyed \n") );<br>
+ cl_spinlock_release( &p_context->cb_lock );<br>
+ return;<br>
default:<br>
AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, ("Invalid CB
type\n") );<br>
ph_ioctl = NULL;<br>
@@ -550,7 +575,33 @@<br>
AL_EXIT( AL_DBG_DEV );<br>
}<br>
<br>
+/*<br>
+ * Cancel any pending IOCTL calls for the specified type.<br>
+ * This routine is also called when closing the device.<br>
+ * The lock should already be taken by the caller<br>
+ */<br>
+CL_INLINE void<br>
+al_dev_cancel_ioctl_unlocked(<br>
+ IN al_dev_open_context_t *p_context,<br>
+ IN OUT cl_ioctl_handle_t *p_ioctl )<br>
+{<br>
+ <br>
+ AL_ENTER( AL_DBG_DEV );<br>
<br>
+#pragma warning(push, 3)<br>
+ IoSetCancelRoutine( *p_ioctl, NULL );<br>
+#pragma warning(pop)<br>
+<br>
+ /* Complete the IOCTL. */<br>
+ cl_ioctl_complete( *p_ioctl, CL_CANCELED, 0 );<br>
+ proxy_context_deref( p_context );<br>
+ *p_ioctl = NULL;<br>
+<br>
+ AL_EXIT( AL_DBG_DEV );<br>
+}<br>
+<br>
+<br>
+<br>
void<br>
al_dev_cancel_io(<br>
IN DEVICE_OBJECT *p_dev_obj,<br>
Index: B:/users/xalex/2_1_0/inc/kernel/complib/cl_ioctl_osd.h<br>
===================================================================<br>
--- B:/users/xalex/2_1_0/inc/kernel/complib/cl_ioctl_osd.h (revision 5929)<br>
+++ B:/users/xalex/2_1_0/inc/kernel/complib/cl_ioctl_osd.h (revision 5930)<br>
@@ -49,7 +49,9 @@<br>
#define IOCTL_CODE( type, cmd ) \<br>
CTL_CODE( type, (cmd & 0x0FFF), METHOD_BUFFERED,
FILE_ANY_ACCESS)<br>
<br>
+#define BAD_IRP_STACK 0<br>
<br>
+<br>
typedef PIRP cl_ioctl_handle_t;<br>
<br>
<br>
@@ -101,6 +103,10 @@<br>
IO_STACK_LOCATION *p_io_stack;<br>
<br>
p_io_stack = IoGetCurrentIrpStackLocation( h_ioctl );<br>
+ if (!p_io_stack) {<br>
+ /* "This IOCTL was previosly destroyed ! */<br>
+ return BAD_IRP_STACK ;<br>
+ }<br>
return p_io_stack->Parameters.DeviceIoControl.IoControlCode;<br>
}<br>
</span><o:p></o:p></p>
</div>
</div>
</div>
</body>
</html>