[ofw][patch][HW] fix bugs in low resources flow

Leonid Keller leonid at mellanox.co.il
Wed Mar 25 07:17:16 PDT 2009


This patch fixes several bugs that show up upon low resources.
(found with the help of Verifier with error injection)
 
Index: hw/mlx4/kernel/bus/core/cache.c
===================================================================
--- hw/mlx4/kernel/bus/core/cache.c (revision 2055)
+++ hw/mlx4/kernel/bus/core/cache.c (working copy)
@@ -366,48 +366,38 @@
  int p;
  int port_num;
  
+ shutter_init( &device->cache.x.work_thread );
  rwlock_init(&device->cache.lock);
+ INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
+         device, ib_cache_event, NULL, NULL, 0);
+ ib_register_event_handler(&device->cache.event_handler);
+
  port_num = end_port(device) - start_port(device) + 1;
- 
  if (port_num > 0 ) { 
   // if port_num ==0   ==> there are no IB ports
   device->cache.pkey_cache =
    kmalloc(sizeof *device->cache.pkey_cache * port_num, GFP_KERNEL);
   device->cache.gid_cache =
    kmalloc(sizeof *device->cache.gid_cache * port_num, GFP_KERNEL);
-
   device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
-        port_num, GFP_KERNEL);
+   port_num, GFP_KERNEL);
 
   if (!device->cache.pkey_cache || !device->cache.gid_cache ||
-      !device->cache.lmc_cache) {
+   !device->cache.lmc_cache) {
    printk(KERN_WARNING "Couldn't allocate cache "
-          "for %s\n", device->name);
+    "for %s\n", device->name);
    goto err;
   }
  }
 
- shutter_init( &device->cache.x.work_thread );
-
  for (p = 0; p < port_num; ++p) {
   device->cache.pkey_cache[p] = NULL;
   device->cache.gid_cache [p] = NULL;
   ib_cache_update(device, (u8)(p + start_port(device)));
  }
 
- INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
-         device, ib_cache_event, NULL, NULL, 0);
- if (ib_register_event_handler(&device->cache.event_handler))
-  goto err_cache;
-
  return;
 
-err_cache:
- for (p = 0; p <= end_port(device) - start_port(device); ++p) {
-  kfree(device->cache.pkey_cache[p]);
-  kfree(device->cache.gid_cache[p]);
- }
-
 err:
  kfree(device->cache.pkey_cache);
  kfree(device->cache.gid_cache);
@@ -422,6 +412,7 @@
 {
  int p;
 
+ ASSERT(device->cache.event_handler.device);
  ib_unregister_event_handler(&device->cache.event_handler);
  // instead of Linux flush_scheduled_work(): wait for them to quit
  shutter_shut( &device->cache.x.work_thread );
Index: hw/mlx4/kernel/bus/core/device.c
===================================================================
--- hw/mlx4/kernel/bus/core/device.c (revision 2055)
+++ hw/mlx4/kernel/bus/core/device.c (working copy)
@@ -302,17 +302,23 @@
   goto out;
  }
 
- list_add_tail(&device->core_list, &device_list);
-
- device->reg_state = IB_DEV_REGISTERED;
-
  {
   struct ib_client *client;
 
-  list_for_each_entry(client, &client_list, list, struct ib_client)
-   if (client->add && !add_client_context(device, client))
+  list_for_each_entry(client, &client_list, list, struct ib_client) {
+   if ( add_client_context(device, client) ) {
+    printk(KERN_WARNING "add_client_context failed for device %s\n",
+        device->name);
+    ret = -EFAULT;
+    goto out;
+   }
+   if (client->add)
     client->add(device);
+  }
  }
+    
+ list_add_tail(&device->core_list, &device_list);
+ device->reg_state = IB_DEV_REGISTERED;
 
  out:
  mutex_unlock(&device_mutex);
@@ -381,17 +387,25 @@
 int ib_register_client(struct ib_client *client)
 {
  struct ib_device *device;
+ int ret = 0;
 
  mutex_lock(&device_mutex);
 
- list_add_tail(&client->list, &client_list);
- list_for_each_entry(device, &device_list, core_list, struct ib_device)
-  if (client->add && !add_client_context(device, client))
+ list_for_each_entry(device, &device_list, core_list, struct ib_device)
{
+  if ( add_client_context(device, client) ) {
+   printk(KERN_WARNING "add_client_context failed for device %s\n",
+       device->name);
+   ret = -EFAULT;
+   goto out;
+  }
+  if (client->add)
    client->add(device);
-
+ }
+    
+    list_add_tail(&client->list, &client_list);
+out:
  mutex_unlock(&device_mutex);
-
- return 0;
+ return ret;
 }
 EXPORT_SYMBOL(ib_register_client);
 
Index: hw/mlx4/kernel/bus/drv/drv.c
===================================================================
--- hw/mlx4/kernel/bus/drv/drv.c (revision 2055)
+++ hw/mlx4/kernel/bus/drv/drv.c (working copy)
@@ -323,10 +323,12 @@
 
  p_fdo->bus_ib_ifc.pdev = &p_fdo->pci_dev;
  p_fdo->bus_ib_ifc.p_ibdev = p_fdo->pci_dev.ib_dev;
- p_fdo->bus_ib_ifc.pmlx4_dev = to_mdev(p_fdo->pci_dev.ib_dev)->dev;
- p_fdo->bus_ib_ifc.is_livefish = mlx4_is_livefish(p_fdo->pci_dev.dev);
- if ( p_fdo->bus_ib_ifc.pmlx4_dev->flags & MLX4_FLAG_MSI_X )
-  p_fdo->bus_ib_ifc.n_msi_vectors = p_fdo->pci_dev.n_msi_vectors - 2;
+    p_fdo->bus_ib_ifc.is_livefish =
mlx4_is_livefish(p_fdo->pci_dev.dev);    
+    if ( p_fdo->bus_ib_ifc.is_livefish == 0 ) {
+        p_fdo->bus_ib_ifc.pmlx4_dev =
to_mdev(p_fdo->pci_dev.ib_dev)->dev;    
+     if ( p_fdo->bus_ib_ifc.pmlx4_dev->flags & MLX4_FLAG_MSI_X )
+      p_fdo->bus_ib_ifc.n_msi_vectors = p_fdo->pci_dev.n_msi_vectors -
2;
+    }
 
  p_fdo->card_started = TRUE;
 
@@ -572,13 +574,13 @@
      pdev->int_info = *desc;
     if (desc->Flags & CM_RESOURCE_INTERRUPT_MESSAGE) {
      pdev->n_msi_vectors_alloc =
(u8)(pdev->n_msi_vectors_alloc+desc_raw->u.MessageInterrupt.Raw.MessageC
ount);
-     MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,
+     MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV,
       ("EvtPrepareHardware: Desc %d: MsiInterrupt: Share %d, Flags %#x,
Level %d, Vector %#x, Affinity %#x\n", 
       i, desc->ShareDisposition, desc->Flags,
       desc->u.MessageInterrupt.Translated.Level, 
       desc->u.MessageInterrupt.Translated.Vector, 
       (u32)desc->u.MessageInterrupt.Translated.Affinity ));
-     MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,
+     MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV,
       ("EvtPrepareHardware: Desc %d: RawMsiInterrupt: Share %d, Flags
%#x, MessageCount %#hx, Vector %#x, Affinity %#x\n", 
       i, desc_raw->ShareDisposition, desc_raw->Flags,
       desc_raw->u.MessageInterrupt.Raw.MessageCount, 
@@ -586,7 +588,7 @@
       (u32)desc_raw->u.MessageInterrupt.Raw.Affinity ));
     }
     else { // line-based interrupt
-     MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,
+     MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV,
       ("EvtPrepareHardware: Desc %d: LineInterrupt: Share %d, Flags
%#x, Level %d, Vector %#x, Affinity %#x\n", 
       i, desc->ShareDisposition, desc->Flags,
       desc->u.Interrupt.Level, desc->u.Interrupt.Vector, 
@@ -774,14 +776,14 @@
 }
 
 NTSTATUS
-EvtDeviceAdd(
+EvtDriverDeviceAdd(
  IN WDFDRIVER        Driver,
  IN PWDFDEVICE_INIT  DeviceInit
  )
 /*++
 Routine Description:
 
- EvtDeviceAdd is called by the framework in response to AddDevice
+ EvtDriverDeviceAdd is called by the framework in response to AddDevice
  call from the PnP manager. We create and initialize a device object to
  represent a new instance of mxe bus.
 
@@ -1191,7 +1193,7 @@
  //
 
  WDF_DRIVER_CONFIG_INIT(
-  &config, EvtDeviceAdd );
+  &config, EvtDriverDeviceAdd );
  config.EvtDriverUnload = EvtDriverUnload;
 
  //
Index: hw/mlx4/kernel/bus/drv/drv.h
===================================================================
--- hw/mlx4/kernel/bus/drv/drv.h (revision 2055)
+++ hw/mlx4/kernel/bus/drv/drv.h (working copy)
@@ -155,7 +155,7 @@
  );
  
 NTSTATUS
-EvtDeviceAdd(
+EvtDriverDeviceAdd(
  IN WDFDRIVER        Driver,
  IN PWDFDEVICE_INIT  DeviceInit
  );
Index: hw/mlx4/kernel/bus/drv/pci.c
===================================================================
--- hw/mlx4/kernel/bus/drv/pci.c (revision 2055)
+++ hw/mlx4/kernel/bus/drv/pci.c (working copy)
@@ -541,7 +541,7 @@
    p_vector = ka;
    /* print (allocated+2) vectors */
    for (i=0; i<pdev->n_msi_vectors_alloc+2; i++) {
-    MLX4_PRINT( TRACE_LEVEL_WARNING  ,MLX4_DBG_PNP  ,
+    MLX4_PRINT( TRACE_LEVEL_VERBOSE  ,MLX4_DBG_PNP  ,
      ("MSI-X Vectors: Id %d, Masked %d, Addr %#I64x, Data %#x\n",
      i, MSIX_VECTOR_MASKED(p_vector[i].Flags),
      p_vector[i].Addr, p_vector[i].Data ));
@@ -587,7 +587,7 @@
 )
 {
  u32       sem;
- NTSTATUS     status = STATUS_SUCCESS;
+ NTSTATUS     status = STATUS_SUCCESS, status1;
  PBUS_INTERFACE_STANDARD  p_ifc = &pdev->bus_pci_ifc;
  PCI_COMMON_CONFIG*   p_cfg = &pdev->pci_cfg_space;
  struct msix_saved_info   msix_info;
@@ -703,19 +703,19 @@
   }
  }
 
+ status = STATUS_SUCCESS;
+
+err:
  /* restore MSI-X info after reset */
- status = __pci_restore_msix_info( pdev, &msix_info );
- if (!NT_SUCCESS(status))
-  goto err;
+ status1 = __pci_restore_msix_info( pdev, &msix_info );
+ status = (!status) ? status1 : status; /* return the only or the first
error */
+ if( NT_SUCCESS( status ) ) {
+  MLX4_PRINT( TRACE_LEVEL_WARNING ,MLX4_DBG_PNP , ("HCA has been reset
! \n"));
+ }
 
- /* check, whether MSI-X capabilities were restore */
+ /* check, whether MSI-X capabilities have been restored */
  pci_get_msi_info( pdev, p_cfg, &pdev->uplink_info );
 
- MLX4_PRINT( TRACE_LEVEL_WARNING ,MLX4_DBG_PNP , ("HCA has been reset !
\n"));
-
- status = STATUS_SUCCESS;
-
-err:
  if (pdev->msix_info.valid) 
   pci_free_msix_info_resources(&pdev->msix_info);
  MLX4_EXIT( MLX4_DBG_PNP );
Index: hw/mlx4/kernel/bus/ib/main.c
===================================================================
--- hw/mlx4/kernel/bus/ib/main.c (revision 2055)
+++ hw/mlx4/kernel/bus/ib/main.c (working copy)
@@ -611,6 +611,7 @@
  mlx4_pd_free(dev, ibdev->priv_pdn);
 
 err_dealloc:
+ ibdev->ib_dev.reg_state = IB_DEV_UNINITIALIZED;
  ib_dealloc_device(&ibdev->ib_dev);
 
  return NULL;
Index: hw/mlx4/kernel/bus/net/catas.c
===================================================================
--- hw/mlx4/kernel/bus/net/catas.c (revision 2055)
+++ hw/mlx4/kernel/bus/net/catas.c (working copy)
@@ -370,6 +370,7 @@
   // to allow for end of operations that are in progress
   reset_work = IoAllocateWorkItem( dev->pdev->p_self_do );
   if (!reset_work) {
+            spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
    mlx4_err(dev, "mlx4_reset_request IoAllocateWorkItem failed, reset
will not be propagated\n");
    err = -EFAULT;
    goto err_workitem;
Index: hw/mlx4/kernel/bus/net/cmd.c
===================================================================
--- hw/mlx4/kernel/bus/net/cmd.c (revision 2055)
+++ hw/mlx4/kernel/bus/net/cmd.c (working copy)
@@ -337,9 +337,15 @@
     mlx4_dispatch_reset_event(dev->pdev->ib_dev,
IB_EVENT_RESET_DRIVER);
    }
   }
+  else {
+   err = -EFAULT;
+   mlx4_err(dev, "mlx4_cmd_wait: Unexpected end of waiting for a comand
\n");
+   ASSERT(0);
+  }
  }
-
- err = context->result;
+ else
+  err = context->result;
+ 
  if (err)
   goto out;
 
Index: hw/mlx4/kernel/bus/net/intf.c
===================================================================
--- hw/mlx4/kernel/bus/net/intf.c (revision 2055)
+++ hw/mlx4/kernel/bus/net/intf.c (working copy)
@@ -43,13 +43,13 @@
 static LIST_HEAD(dev_list);
 static DEFINE_MUTEX(intf_mutex);
 
-static void mlx4_add_device(struct mlx4_interface *intf, struct
mlx4_priv *priv)
+static int mlx4_add_device(struct mlx4_interface *intf, struct
mlx4_priv *priv)
 {
  struct mlx4_device_context *dev_ctx;
 
  dev_ctx = kmalloc(sizeof *dev_ctx, GFP_KERNEL);
  if (!dev_ctx)
-  return;
+  return -EFAULT;
 
  dev_ctx->intf    = intf;
  dev_ctx->context = intf->add(&priv->dev);
@@ -59,8 +59,11 @@
   spin_lock_irq(&priv->ctx_lock);
   list_add_tail(&dev_ctx->list, &priv->ctx_list);
   spin_unlock_irq(&priv->ctx_lock);
- } else
+ } else {
   kfree(dev_ctx);
+  return -EFAULT;
+ }
+ return 0;
 }
 
 static void mlx4_remove_device(struct mlx4_interface *intf, struct
mlx4_priv *priv)
@@ -82,19 +85,25 @@
 int mlx4_register_interface(struct mlx4_interface *intf)
 {
  struct mlx4_priv *priv;
+ int err = 0;
 
  if (!intf->add || !intf->remove)
   return -EINVAL;
 
  mutex_lock(&intf_mutex);
 
- list_add_tail(&intf->list, &intf_list);
- list_for_each_entry(priv, &dev_list, dev_list, struct mlx4_priv)
-  mlx4_add_device(intf, priv);
+ list_for_each_entry(priv, &dev_list, dev_list, struct mlx4_priv) {
+  if (mlx4_add_device(intf, priv)) {
+   err = -EFAULT;
+   goto end;
+  }
+ }
 
+    list_add_tail(&intf->list, &intf_list);
+
+end:
  mutex_unlock(&intf_mutex);
-
- return 0;
+ return err;
 }
 EXPORT_SYMBOL_GPL(mlx4_register_interface);
 
@@ -137,12 +146,18 @@
 
  mutex_lock(&intf_mutex);
 
+ list_for_each_entry(intf, &intf_list, list, struct mlx4_interface) {
+  if (mlx4_add_device(intf, priv)) {
+   err = -EFAULT;
+   goto end;
+  }
+ }
+    
  list_add_tail(&priv->dev_list, &dev_list);
- list_for_each_entry(intf, &intf_list, list, struct mlx4_interface)
-  mlx4_add_device(intf, priv);
-
+    
+end:
  mutex_unlock(&intf_mutex);
- if (!mlx4_is_livefish(dev))
+ if (!err && !mlx4_is_livefish(dev))
   err = mlx4_start_catas_poll(dev);
 
  return err;
Index: hw/mlx4/kernel/bus/net/main.c
===================================================================
--- hw/mlx4/kernel/bus/net/main.c (revision 2055)
+++ hw/mlx4/kernel/bus/net/main.c (working copy)
@@ -956,9 +956,11 @@
     ("mlx4_register_device for livefish failed, return with
error.\n"));
    pdev->dev = NULL;
    kfree(priv);
+  } 
+  else {
+      MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_LOW ,
+       ("MLX4_BUS started in \"livefish\" mode !!!.\n"));
   }
-  MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_LOW ,
-   ("MLX4_BUS started in \"livefish\" mode !!!.\n"));
   goto end;
  }
 
@@ -1064,8 +1066,8 @@
   mlx4_close_hca(dev);
   mlx4_cmd_cleanup(dev);
 
-  if (reset)
-   mlx4_reset(dev);
+  if (reset && mlx4_reset(dev))
+   mlx4_err(dev, "Failed to reset HCA\n");
   mlx4_dbg(dev, "MLX4_BUS: NET device (dev_id=%d) is REMOVED ! \n",
(int)pdev->dev_id);
   pdev->dev = NULL;
 done:
Index: hw/mthca/kernel/mt_cache.c
===================================================================
--- hw/mthca/kernel/mt_cache.c (revision 2055)
+++ hw/mthca/kernel/mt_cache.c (working copy)
@@ -341,6 +341,9 @@
  u8 p;
 
  rwlock_init(&device->cache.lock);
+ INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
+         device, ib_cache_event);
+ ib_register_event_handler(&device->cache.event_handler);
 
  device->cache.pkey_cache =
   kmalloc(sizeof *device->cache.pkey_cache *
@@ -361,19 +364,8 @@
   ib_cache_update(device, p + start_port(device));
  }
 
- INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
-         device, ib_cache_event);
- if (ib_register_event_handler(&device->cache.event_handler))
-  goto err_cache;
-
  return;
 
-err_cache:
- for (p = 0; p <= end_port(device) - start_port(device); ++p) {
-  kfree(device->cache.pkey_cache[p]);
-  kfree(device->cache.gid_cache[p]);
- }
-
 err:
  kfree(device->cache.pkey_cache);
  kfree(device->cache.gid_cache);

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20090325/c18412ff/attachment.html>


More information about the ofw mailing list