[ofw][patch][IBBUS] WHQL PnP test fixes

Leonid Keller leonid at mellanox.co.il
Sun Nov 23 10:48:28 PST 2008


[IBBUS] workaround for some problems with WHQL PnP test over IPoIB.
[mlnx: 3535]

This fix succeeds SurpriseRemoval and 3 of 4 Rebalance tests for
ConnectX.

Rebalance(FailRestart) still fails.

 

Index: bus/kernel/bus_pnp.c
===================================================================
--- bus/kernel/bus_pnp.c (revision 1765)
+++ bus/kernel/bus_pnp.c (working copy)
@@ -1174,7 +1174,20 @@
   p_pdo_ext = PARENT_STRUCT( p_list_item, bus_pdo_ext_t, list_item );
 
   if( !p_pdo_ext->b_present )
+  {
+   // mark it missing to be removed in port_remove
+   p_pdo_ext->b_reported_missing = TRUE;
+   /*
+    * We don't report a PDO that is no longer present.  This is how
+    * the PDO will get cleaned up.
+    */
+   BUS_TRACE( BUS_DBG_PNP, ("Don't report PDO! %s: PDO %p, ext %p, "
+    "present %d, missing %d .\n",
+    p_pdo_ext->cl_ext.vfptr_pnp_po->identity,
+    p_pdo_ext->cl_ext.p_self_do, p_pdo_ext, p_pdo_ext->b_present,
+    p_pdo_ext->b_reported_missing ) );
    continue;
+  }
   
   if( ca_guid && p_pdo_ext->ca_guid != ca_guid )
    continue;
@@ -1208,19 +1221,7 @@
   p_pdo_ext = PARENT_STRUCT( p_list_item, bus_pdo_ext_t, list_item );
 
   if( !p_pdo_ext->b_present )
-  {
-   /*
-    * We don't report a PDO that is no longer present.  This is how
-    * the PDO will get cleaned up.
-    */
-   p_pdo_ext->b_reported_missing = TRUE;
-   BUS_TRACE( BUS_DBG_PNP, ("Don't report PDO! %s: PDO %p, ext %p, "
-    "present %d, missing %d .\n",
-    p_pdo_ext->cl_ext.vfptr_pnp_po->identity,
-    p_pdo_ext->cl_ext.p_self_do, p_pdo_ext, p_pdo_ext->b_present,
-    p_pdo_ext->b_reported_missing ) );
    continue;
-  }
 
   if( ca_guid && p_pdo_ext->ca_guid != ca_guid )
    continue;
Index: bus/kernel/bus_port_mgr.c
===================================================================
--- bus/kernel/bus_port_mgr.c (revision 1765)
+++ bus/kernel/bus_port_mgr.c (working copy)
@@ -864,6 +864,10 @@
    return IB_ERROR;
   }
 
+  /* clean the extension (must be before initializing) */
+  p_port_ext = p_pdo[num_pdo]->DeviceExtension;
+  memset( p_port_ext, 0, sizeof(bus_port_ext_t) );
+
   /* Initialize the device extension. */
   cl_init_pnp_po_ext( p_pdo[num_pdo], NULL, p_pdo[num_pdo],
        bus_globals.dbg_lvl, &vfptr_port_pnp,
@@ -872,7 +876,6 @@
   /* Set the DO_BUS_ENUMERATED_DEVICE flag to mark it as a PDO. */
   p_pdo[num_pdo]->Flags |= DO_BUS_ENUMERATED_DEVICE;
 
-  p_port_ext = p_pdo[num_pdo]->DeviceExtension;
   p_port_ext->pdo.dev_po_state.DeviceState = PowerDeviceD0;
   p_port_ext->pdo.p_parent_ext = p_bfi->p_bus_ext;
   p_port_ext->pdo.b_present = TRUE;
@@ -923,7 +926,7 @@
    * Set the context of the PNP event. The context is passed in for
future
    * events on the same port.
    */
-  if(num_pdo == 0)
+  if(num_pdo == 0) 
    p_ctx->p_pdo_ext = p_port_ext;
  }
 
@@ -1237,8 +1240,6 @@
  }
 
  p_ext->b_present = FALSE;
- p_ext->b_reported_missing = TRUE;
-
  BUS_TRACE( BUS_DBG_PNP,
   ("Mark removing %s: PDO %p, ext %p, present %d, missing %d .\n",
   p_ext->cl_ext.vfptr_pnp_po->identity, p_ext->cl_ext.p_self_do, p_ext,
@@ -1346,6 +1347,13 @@
  p_ext = p_dev_obj->DeviceExtension;
  p_port_mgr = p_ext->pdo.p_parent_ext->bus_filter->p_port_mgr;
 
+ /* skip releasing resources if PDO has not been yet reported missing
*/
+ if (!p_ext->pdo.b_reported_missing) {
+  BUS_TRACE_EXIT( BUS_DBG_PNP, ("PDO is not yet reported missing - skip
the removing port from vector: PDO %p, ext %p\n",
+   p_dev_obj, p_ext) );
+  return;
+ }
+
  /* Remove this PDO from its list. */
  cl_mutex_acquire( &p_port_mgr->pdo_mutex );
  BUS_TRACE( BUS_DBG_PNP, ("Removing port from vector: PDO %p, ext
%p\n",
@@ -1432,8 +1440,20 @@
  UNUSED_PARAM( p_irp );
 
  p_ext = p_dev_obj->DeviceExtension;
- p_ext->pdo.b_present = FALSE;
- p_ext->pdo.b_reported_missing = TRUE;
+ //
+ // Setting 2 folloeing flags seems like the right behaviour
+ // according to DDK, but it causes 
+ // WHQL PnP SurpriseRemoval test to fail
+ // So, as a work around, they are disabled for now.
+ // The best solution is to rewrite all the drivers
+ // to WDF model, hoping it will handle right all PnP/Power issues
+ //
+// p_ext->pdo.b_present = FALSE;
+// p_ext->pdo.b_reported_missing = TRUE;
+ if (!p_ext->pdo.b_reported_missing) {
+  // we have not yet reported the device absence 
+  cl_rollback_pnp_state( &p_ext->pdo.cl_ext );
+ }
  BUS_TRACE( BUS_DBG_PNP, ("%s: PDO %p, ext %p, present %d, missing %d
.\n",
   p_ext->pdo.cl_ext.vfptr_pnp_po->identity,
p_ext->pdo.cl_ext.p_self_do, 
   p_ext, p_ext->pdo.b_present, p_ext->pdo.b_reported_missing ) );
Index: complib/kernel/cl_pnp_po.c
===================================================================
--- complib/kernel/cl_pnp_po.c (revision 1765)
+++ complib/kernel/cl_pnp_po.c (working copy)
@@ -213,6 +213,9 @@
  {
   CL_TRACE_EXIT( CL_DBG_ERROR, p_ext->dbg_lvl, 
    ("IoAcquireRemoveLock returned %08x.\n", status) );
+  p_io_stack = IoGetCurrentIrpStackLocation( p_irp );
+  CL_TRACE( CL_DBG_PNP, p_ext->dbg_lvl, 
+   ("Minor function %x for %s\n", p_io_stack->MinorFunction,
p_ext->vfptr_pnp_po->identity) );
   p_irp->IoStatus.Status = status;
   IoCompleteRequest( p_irp, IO_NO_INCREMENT );
   return status;
@@ -243,7 +246,7 @@
 
  case IRP_MN_CANCEL_STOP_DEVICE:
   CL_TRACE( CL_DBG_PNP, p_ext->dbg_lvl, 
-   ("IRP_MN_START_DEVICE for %s\n", p_ext->vfptr_pnp_po->identity) );
+   ("IRP_MN_CANCEL_STOP_DEVICE for %s\n",
p_ext->vfptr_pnp_po->identity) );
   status = __cancel_stop( p_dev_obj, p_irp, &action );
   break;
 
@@ -394,6 +397,8 @@
  {
  case IrpPassDown:
   p_irp->IoStatus.Status = status;
+  CL_TRACE( CL_DBG_PNP, p_ext->dbg_lvl, ("IrpPassDown: passing down to
PDO %p, ext %p, status %#x\n",
+   p_ext->p_next_do, p_ext, p_irp->IoStatus.Status) );
   IoCopyCurrentIrpStackLocationToNext( p_irp );
   status = IoCallDriver( p_ext->p_next_do, p_irp );
   break;
@@ -402,23 +407,28 @@
   p_irp->IoStatus.Status = status;
 
  case IrpIgnore:
+  CL_TRACE( CL_DBG_PNP, p_ext->dbg_lvl, ("IrpSkip/IrpIgnore: skipping
down to PDO %p, ext %p, status %#x\n",
+   p_ext->p_next_do, p_ext, p_irp->IoStatus.Status) );
   IoSkipCurrentIrpStackLocation( p_irp );
   status = IoCallDriver( p_ext->p_next_do, p_irp );
   break;
 
  case IrpComplete:
   p_irp->IoStatus.Status = status;
+  CL_TRACE( CL_DBG_PNP, p_ext->dbg_lvl, ("IrpComplete: complete IRP
with status %#x\n",
+   p_irp->IoStatus.Status) );
   IoCompleteRequest( p_irp, IO_NO_INCREMENT );
   break;
 
  case IrpDoNothing:
+  CL_TRACE( CL_DBG_PNP, p_ext->dbg_lvl, ("IrpDoNothing: do nothing\n")
);
   break;
  }
 
  if( action != IrpDoNothing )
   IoReleaseRemoveLock( &p_ext->remove_lock, p_irp );
 
- CL_EXIT( CL_DBG_PNP, p_ext->dbg_lvl );
+ CL_TRACE_EXIT( CL_DBG_PNP, p_ext->dbg_lvl, ("returned with status
%#x\n", status) );
  return status;
 }
 
@@ -444,8 +454,10 @@
   * If we get the start request when we're already started, don't 
   * re-initialize the stop lock.
   */
- if( p_ext->last_pnp_state != Started )
+ if( p_ext->last_pnp_state != Started ) {
+  CL_TRACE( CL_DBG_PNP, p_ext->dbg_lvl, ("IoInitializeRemoveLock:
stop_lock %p[\n", &p_ext->stop_lock));
   IoInitializeRemoveLock( &p_ext->stop_lock, 'dtci', 0, 1000 );
+ }
 
  CL_EXIT( CL_DBG_PNP, p_ext->dbg_lvl );
  return status;
@@ -493,7 +505,12 @@
  if( p_ext->last_pnp_state == Started )
  {
   /* Acquire the lock so we can release and wait. */
-  IoAcquireRemoveLock( &p_ext->stop_lock, p_irp );
+  status = IoAcquireRemoveLock( &p_ext->stop_lock, p_irp );
+  if( !NT_SUCCESS( status ) )
+  {
+   CL_TRACE( CL_DBG_ERROR, p_ext->dbg_lvl, 
+    ("IoAcquireRemoveLock returned %08x. Continue anyway ...\n",
status) );
+  }
   /* Wait for all IO operations to complete. */
   IoReleaseRemoveLockAndWait( &p_ext->stop_lock, p_irp );
  }
@@ -563,14 +580,28 @@
  if( p_ext->last_pnp_state == Started )
  {
   /*
-   * Re-initialize the remove lock before rolling back the PnP
+   * Re-initialize the stop lock before rolling back the PnP
    * state so that there's no contention while it's uninitialized.
    */
+  CL_TRACE( CL_DBG_PNP, p_ext->dbg_lvl, ("IoInitializeRemoveLock:
stop_lock %p[\n", &p_ext->stop_lock));
   IoInitializeRemoveLock( &p_ext->stop_lock, 'dtci', 0, 1000 );
+#if 0  
+  // leo: it seems like a bug, because it can never get released
+  {
   /* 
    * Acquire the stop lock to allow releasing and waiting when
stopping.
    */
-  IoAcquireRemoveLock( &p_ext->stop_lock, NULL );
+   NTSTATUS   status1;
+   CL_TRACE( CL_DBG_PNP, p_ext->dbg_lvl, ("IoAcquireRemoveLock:
stop_lock %p[\n", &p_ext->stop_lock));
+   status1 = IoAcquireRemoveLock( &p_ext->stop_lock, NULL );
+   if( !NT_SUCCESS( status1 ) )
+   {
+    CL_TRACE( CL_DBG_ERROR, p_ext->dbg_lvl, 
+     ("IoAcquireRemoveLock returned %08x. Continue anyway ...\n",
status) );
+   }
+   CL_TRACE( CL_DBG_PNP, p_ext->dbg_lvl, ("IoAcquireRemoveLock:
stop_lock ]\n"));
+  }
+#endif  
  }
 
  /* Return to the previous PnP state. */
@@ -653,7 +684,10 @@
  ASSERT( p_ext->pnp_state == NotStarted ||
   p_ext->pnp_state == Started ||
   p_ext->pnp_state == RemovePending ||
-  p_ext->pnp_state == SurpriseRemoved );
+  p_ext->pnp_state == SurpriseRemoved ||
+  // it can be on this state if IRP_MN_START_DEVICE failed
+  // pnpdtest /rebalance FailRestart creates this situation
+  p_ext->pnp_state == Stopped);
 
  /* Set the device state. */
  cl_set_pnp_state( p_ext, Deleted );


-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20081123/d2dfeab5/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: whql_pnp.patch
Type: application/octet-stream
Size: 9417 bytes
Desc: whql_pnp.patch
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20081123/d2dfeab5/attachment.obj>


More information about the ofw mailing list