[ofw] [Patch][Core] Fix at IRP cancellation mechanism

Fab Tillier ftillier at microsoft.com
Tue Jun 8 08:33:31 PDT 2010


Hi Alex,

Why not move the call to IoSetCancelRoutine up in the function, and check the return value?  If the return is NULL, then someone else already cancelled the IRP.  If non-NULL, then the routine cancels it.  This also will close any potential race with completing the IRP (where the cancel routine should also be cleared.)

IoSetCancelRoutine is atomic, so should provide the correct level of synchronization.

Thoughts?
-Fab

From: ofw-bounces at lists.openfabrics.org [mailto:ofw-bounces at lists.openfabrics.org] On Behalf Of Alex Naslednikov
Sent: Tuesday, June 08, 2010 12:56 AM
To: ofw at lists.openfabrics.org; Leonid Keller; Tzachi Dar
Cc: Uri Habusha
Subject: [ofw] [Patch][Core] Fix at IRP cancellation mechanism

This patch fixes the race (and BSOD) within IRP cancellation mechanism.
The original problem:
1.Function al_dev_cancel_ioctl() called from the 2 places:
directly from IBAL and by OS (as a callback)
2. This is because I/O request can be cancelled both by user-level mechanism and
by OS.
3. Function al_dev_cancel_ioctl() calls to IoGetCurrentIrpStackLocation() in order
to receive context and take a spinlock (p_context->cb_lock)
4. But IoGetCurrentIrpStackLocation() as well further access to stack pointer
should be protected !!!
5. BSOD happened when context or IRP itself were already cancelled by concurrent
calls to al_dev_cancel_ioctl()

Signed-off by: Alexander Naslednikov (xalex at mellanox.co.il)
Index: B:/users/xalex/2_1_0/core/al/kernel/al_dev.c
===================================================================
--- B:/users/xalex/2_1_0/core/al/kernel/al_dev.c (revision 5929)
+++ B:/users/xalex/2_1_0/core/al/kernel/al_dev.c (revision 5930)
@@ -65,9 +65,16 @@
 __proxy_cancel_cblists(
  IN  al_dev_open_context_t   *p_context );

+CL_INLINE void
+al_dev_cancel_ioctl_unlocked(
+ IN     al_dev_open_context_t *p_context,
+ IN OUT    cl_ioctl_handle_t  *p_ioctl );
 static void
 __construct_open_context(
  IN  al_dev_open_context_t   *p_context )
@@ -343,12 +350,19 @@
  p_context->closing = TRUE;

  /* Flush any pending IOCTLs in case user-mode threads died on us. */
+ /* Protect IOCTL cancellation by spinlock to avoid race
+ */
+
+ cl_spinlock_acquire( &p_context->cb_lock );
+
  if( p_context->h_cm_ioctl )
-  al_dev_cancel_ioctl( p_context->h_cm_ioctl );
- if( p_context->h_comp_ioctl )
-  al_dev_cancel_ioctl( p_context->h_comp_ioctl );
+  al_dev_cancel_ioctl_unlocked( p_context, &p_context->h_cm_ioctl );
+ if( p_context->h_comp_ioctl )
+  al_dev_cancel_ioctl_unlocked( p_context, &p_context->h_comp_ioctl );
  if( p_context->h_misc_ioctl )
-  al_dev_cancel_ioctl( p_context->h_misc_ioctl );
+  al_dev_cancel_ioctl_unlocked( p_context, &p_context->h_misc_ioctl );
+
+ cl_spinlock_release ( &p_context->cb_lock );

  while( p_context->ref_cnt )
  {
@@ -514,20 +528,31 @@

  /* Get the stack location. */
  p_io_stack = IoGetCurrentIrpStackLocation( h_ioctl );
+ if (!p_io_stack) {
+  AL_PRINT( TRACE_LEVEL_WARNING, AL_DBG_DEV, ("This IOCTL was previosly destroyed \n") );
+  return;
+ }

  p_context = (al_dev_open_context_t *)p_io_stack->FileObject->FsContext;
- ASSERT( p_context );
-
+ if (!p_context) {
+  AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, ("This IOCTL was previosly destroyed \n") );
+  return;
+ }
  /* Clear the IOCTL. */
  cl_spinlock_acquire( &p_context->cb_lock );
  switch( cl_ioctl_ctl_code( h_ioctl ) )
  {
  case UAL_GET_COMP_CB_INFO:
   ph_ioctl = &p_context->h_comp_ioctl;
+  ASSERT(ph_ioctl);
   break;
  case UAL_GET_MISC_CB_INFO:
   ph_ioctl = &p_context->h_misc_ioctl;
   break;
+ case NULL:
+  AL_PRINT( TRACE_LEVEL_WARNING, AL_DBG_DEV, ("This IOCTL was previosly destroyed \n") );
+  cl_spinlock_release( &p_context->cb_lock );
+  return;
  default:
   AL_PRINT( TRACE_LEVEL_ERROR, AL_DBG_ERROR, ("Invalid CB type\n") );
   ph_ioctl = NULL;
@@ -550,7 +575,33 @@
  AL_EXIT( AL_DBG_DEV );
 }

+/*
+ * Cancel any pending IOCTL calls for the specified type.
+ * This routine is also called when closing the device.
+ * The lock should already be taken by the caller
+ */
+CL_INLINE void
+al_dev_cancel_ioctl_unlocked(
+ IN     al_dev_open_context_t *p_context,
+ IN OUT    cl_ioctl_handle_t  *p_ioctl )
+{
+
+ AL_ENTER( AL_DBG_DEV );

+#pragma warning(push, 3)
+ IoSetCancelRoutine( *p_ioctl, NULL );
+#pragma warning(pop)
+
+ /* Complete the IOCTL. */
+ cl_ioctl_complete( *p_ioctl, CL_CANCELED, 0 );
+ proxy_context_deref( p_context );
+ *p_ioctl = NULL;
+
+ AL_EXIT( AL_DBG_DEV );
+}
+
+
+
 void
 al_dev_cancel_io(
  IN    DEVICE_OBJECT    *p_dev_obj,
Index: B:/users/xalex/2_1_0/inc/kernel/complib/cl_ioctl_osd.h
===================================================================
--- B:/users/xalex/2_1_0/inc/kernel/complib/cl_ioctl_osd.h (revision 5929)
+++ B:/users/xalex/2_1_0/inc/kernel/complib/cl_ioctl_osd.h (revision 5930)
@@ -49,7 +49,9 @@
 #define IOCTL_CODE( type, cmd ) \
  CTL_CODE( type, (cmd & 0x0FFF), METHOD_BUFFERED, FILE_ANY_ACCESS)

+#define BAD_IRP_STACK 0

+
 typedef PIRP cl_ioctl_handle_t;


@@ -101,6 +103,10 @@
  IO_STACK_LOCATION *p_io_stack;

  p_io_stack = IoGetCurrentIrpStackLocation( h_ioctl );
+ if (!p_io_stack) {
+  /* "This IOCTL was previosly destroyed ! */
+  return BAD_IRP_STACK ;
+ }
  return p_io_stack->Parameters.DeviceIoControl.IoControlCode;
 }

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20100608/edf9b192/attachment.html>


More information about the ofw mailing list