[ofw][patch][HW] fix bugs in low resources flow
Leonid Keller
leonid at mellanox.co.il
Wed Mar 25 07:17:16 PDT 2009
This patch fixes several bugs that show up upon low resources.
(found with the help of Verifier with error injection)
Index: hw/mlx4/kernel/bus/core/cache.c
===================================================================
--- hw/mlx4/kernel/bus/core/cache.c (revision 2055)
+++ hw/mlx4/kernel/bus/core/cache.c (working copy)
@@ -366,48 +366,38 @@
int p;
int port_num;
+ shutter_init( &device->cache.x.work_thread );
rwlock_init(&device->cache.lock);
+ INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
+ device, ib_cache_event, NULL, NULL, 0);
+ ib_register_event_handler(&device->cache.event_handler);
+
port_num = end_port(device) - start_port(device) + 1;
-
if (port_num > 0 ) {
// if port_num ==0 ==> there are no IB ports
device->cache.pkey_cache =
kmalloc(sizeof *device->cache.pkey_cache * port_num, GFP_KERNEL);
device->cache.gid_cache =
kmalloc(sizeof *device->cache.gid_cache * port_num, GFP_KERNEL);
-
device->cache.lmc_cache = kmalloc(sizeof *device->cache.lmc_cache *
- port_num, GFP_KERNEL);
+ port_num, GFP_KERNEL);
if (!device->cache.pkey_cache || !device->cache.gid_cache ||
- !device->cache.lmc_cache) {
+ !device->cache.lmc_cache) {
printk(KERN_WARNING "Couldn't allocate cache "
- "for %s\n", device->name);
+ "for %s\n", device->name);
goto err;
}
}
- shutter_init( &device->cache.x.work_thread );
-
for (p = 0; p < port_num; ++p) {
device->cache.pkey_cache[p] = NULL;
device->cache.gid_cache [p] = NULL;
ib_cache_update(device, (u8)(p + start_port(device)));
}
- INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
- device, ib_cache_event, NULL, NULL, 0);
- if (ib_register_event_handler(&device->cache.event_handler))
- goto err_cache;
-
return;
-err_cache:
- for (p = 0; p <= end_port(device) - start_port(device); ++p) {
- kfree(device->cache.pkey_cache[p]);
- kfree(device->cache.gid_cache[p]);
- }
-
err:
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
@@ -422,6 +412,7 @@
{
int p;
+ ASSERT(device->cache.event_handler.device);
ib_unregister_event_handler(&device->cache.event_handler);
// instead of Linux flush_scheduled_work(): wait for them to quit
shutter_shut( &device->cache.x.work_thread );
Index: hw/mlx4/kernel/bus/core/device.c
===================================================================
--- hw/mlx4/kernel/bus/core/device.c (revision 2055)
+++ hw/mlx4/kernel/bus/core/device.c (working copy)
@@ -302,17 +302,23 @@
goto out;
}
- list_add_tail(&device->core_list, &device_list);
-
- device->reg_state = IB_DEV_REGISTERED;
-
{
struct ib_client *client;
- list_for_each_entry(client, &client_list, list, struct ib_client)
- if (client->add && !add_client_context(device, client))
+ list_for_each_entry(client, &client_list, list, struct ib_client) {
+ if ( add_client_context(device, client) ) {
+ printk(KERN_WARNING "add_client_context failed for device %s\n",
+ device->name);
+ ret = -EFAULT;
+ goto out;
+ }
+ if (client->add)
client->add(device);
+ }
}
+
+ list_add_tail(&device->core_list, &device_list);
+ device->reg_state = IB_DEV_REGISTERED;
out:
mutex_unlock(&device_mutex);
@@ -381,17 +387,25 @@
int ib_register_client(struct ib_client *client)
{
struct ib_device *device;
+ int ret = 0;
mutex_lock(&device_mutex);
- list_add_tail(&client->list, &client_list);
- list_for_each_entry(device, &device_list, core_list, struct ib_device)
- if (client->add && !add_client_context(device, client))
+ list_for_each_entry(device, &device_list, core_list, struct ib_device)
{
+ if ( add_client_context(device, client) ) {
+ printk(KERN_WARNING "add_client_context failed for device %s\n",
+ device->name);
+ ret = -EFAULT;
+ goto out;
+ }
+ if (client->add)
client->add(device);
-
+ }
+
+ list_add_tail(&client->list, &client_list);
+out:
mutex_unlock(&device_mutex);
-
- return 0;
+ return ret;
}
EXPORT_SYMBOL(ib_register_client);
Index: hw/mlx4/kernel/bus/drv/drv.c
===================================================================
--- hw/mlx4/kernel/bus/drv/drv.c (revision 2055)
+++ hw/mlx4/kernel/bus/drv/drv.c (working copy)
@@ -323,10 +323,12 @@
p_fdo->bus_ib_ifc.pdev = &p_fdo->pci_dev;
p_fdo->bus_ib_ifc.p_ibdev = p_fdo->pci_dev.ib_dev;
- p_fdo->bus_ib_ifc.pmlx4_dev = to_mdev(p_fdo->pci_dev.ib_dev)->dev;
- p_fdo->bus_ib_ifc.is_livefish = mlx4_is_livefish(p_fdo->pci_dev.dev);
- if ( p_fdo->bus_ib_ifc.pmlx4_dev->flags & MLX4_FLAG_MSI_X )
- p_fdo->bus_ib_ifc.n_msi_vectors = p_fdo->pci_dev.n_msi_vectors - 2;
+ p_fdo->bus_ib_ifc.is_livefish =
mlx4_is_livefish(p_fdo->pci_dev.dev);
+ if ( p_fdo->bus_ib_ifc.is_livefish == 0 ) {
+ p_fdo->bus_ib_ifc.pmlx4_dev =
to_mdev(p_fdo->pci_dev.ib_dev)->dev;
+ if ( p_fdo->bus_ib_ifc.pmlx4_dev->flags & MLX4_FLAG_MSI_X )
+ p_fdo->bus_ib_ifc.n_msi_vectors = p_fdo->pci_dev.n_msi_vectors -
2;
+ }
p_fdo->card_started = TRUE;
@@ -572,13 +574,13 @@
pdev->int_info = *desc;
if (desc->Flags & CM_RESOURCE_INTERRUPT_MESSAGE) {
pdev->n_msi_vectors_alloc =
(u8)(pdev->n_msi_vectors_alloc+desc_raw->u.MessageInterrupt.Raw.MessageC
ount);
- MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,
+ MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV,
("EvtPrepareHardware: Desc %d: MsiInterrupt: Share %d, Flags %#x,
Level %d, Vector %#x, Affinity %#x\n",
i, desc->ShareDisposition, desc->Flags,
desc->u.MessageInterrupt.Translated.Level,
desc->u.MessageInterrupt.Translated.Vector,
(u32)desc->u.MessageInterrupt.Translated.Affinity ));
- MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,
+ MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV,
("EvtPrepareHardware: Desc %d: RawMsiInterrupt: Share %d, Flags
%#x, MessageCount %#hx, Vector %#x, Affinity %#x\n",
i, desc_raw->ShareDisposition, desc_raw->Flags,
desc_raw->u.MessageInterrupt.Raw.MessageCount,
@@ -586,7 +588,7 @@
(u32)desc_raw->u.MessageInterrupt.Raw.Affinity ));
}
else { // line-based interrupt
- MLX4_PRINT(TRACE_LEVEL_WARNING, MLX4_DBG_DRV,
+ MLX4_PRINT(TRACE_LEVEL_VERBOSE, MLX4_DBG_DRV,
("EvtPrepareHardware: Desc %d: LineInterrupt: Share %d, Flags
%#x, Level %d, Vector %#x, Affinity %#x\n",
i, desc->ShareDisposition, desc->Flags,
desc->u.Interrupt.Level, desc->u.Interrupt.Vector,
@@ -774,14 +776,14 @@
}
NTSTATUS
-EvtDeviceAdd(
+EvtDriverDeviceAdd(
IN WDFDRIVER Driver,
IN PWDFDEVICE_INIT DeviceInit
)
/*++
Routine Description:
- EvtDeviceAdd is called by the framework in response to AddDevice
+ EvtDriverDeviceAdd is called by the framework in response to AddDevice
call from the PnP manager. We create and initialize a device object to
represent a new instance of mxe bus.
@@ -1191,7 +1193,7 @@
//
WDF_DRIVER_CONFIG_INIT(
- &config, EvtDeviceAdd );
+ &config, EvtDriverDeviceAdd );
config.EvtDriverUnload = EvtDriverUnload;
//
Index: hw/mlx4/kernel/bus/drv/drv.h
===================================================================
--- hw/mlx4/kernel/bus/drv/drv.h (revision 2055)
+++ hw/mlx4/kernel/bus/drv/drv.h (working copy)
@@ -155,7 +155,7 @@
);
NTSTATUS
-EvtDeviceAdd(
+EvtDriverDeviceAdd(
IN WDFDRIVER Driver,
IN PWDFDEVICE_INIT DeviceInit
);
Index: hw/mlx4/kernel/bus/drv/pci.c
===================================================================
--- hw/mlx4/kernel/bus/drv/pci.c (revision 2055)
+++ hw/mlx4/kernel/bus/drv/pci.c (working copy)
@@ -541,7 +541,7 @@
p_vector = ka;
/* print (allocated+2) vectors */
for (i=0; i<pdev->n_msi_vectors_alloc+2; i++) {
- MLX4_PRINT( TRACE_LEVEL_WARNING ,MLX4_DBG_PNP ,
+ MLX4_PRINT( TRACE_LEVEL_VERBOSE ,MLX4_DBG_PNP ,
("MSI-X Vectors: Id %d, Masked %d, Addr %#I64x, Data %#x\n",
i, MSIX_VECTOR_MASKED(p_vector[i].Flags),
p_vector[i].Addr, p_vector[i].Data ));
@@ -587,7 +587,7 @@
)
{
u32 sem;
- NTSTATUS status = STATUS_SUCCESS;
+ NTSTATUS status = STATUS_SUCCESS, status1;
PBUS_INTERFACE_STANDARD p_ifc = &pdev->bus_pci_ifc;
PCI_COMMON_CONFIG* p_cfg = &pdev->pci_cfg_space;
struct msix_saved_info msix_info;
@@ -703,19 +703,19 @@
}
}
+ status = STATUS_SUCCESS;
+
+err:
/* restore MSI-X info after reset */
- status = __pci_restore_msix_info( pdev, &msix_info );
- if (!NT_SUCCESS(status))
- goto err;
+ status1 = __pci_restore_msix_info( pdev, &msix_info );
+ status = (!status) ? status1 : status; /* return the only or the first
error */
+ if( NT_SUCCESS( status ) ) {
+ MLX4_PRINT( TRACE_LEVEL_WARNING ,MLX4_DBG_PNP , ("HCA has been reset
! \n"));
+ }
- /* check, whether MSI-X capabilities were restore */
+ /* check, whether MSI-X capabilities have been restored */
pci_get_msi_info( pdev, p_cfg, &pdev->uplink_info );
- MLX4_PRINT( TRACE_LEVEL_WARNING ,MLX4_DBG_PNP , ("HCA has been reset !
\n"));
-
- status = STATUS_SUCCESS;
-
-err:
if (pdev->msix_info.valid)
pci_free_msix_info_resources(&pdev->msix_info);
MLX4_EXIT( MLX4_DBG_PNP );
Index: hw/mlx4/kernel/bus/ib/main.c
===================================================================
--- hw/mlx4/kernel/bus/ib/main.c (revision 2055)
+++ hw/mlx4/kernel/bus/ib/main.c (working copy)
@@ -611,6 +611,7 @@
mlx4_pd_free(dev, ibdev->priv_pdn);
err_dealloc:
+ ibdev->ib_dev.reg_state = IB_DEV_UNINITIALIZED;
ib_dealloc_device(&ibdev->ib_dev);
return NULL;
Index: hw/mlx4/kernel/bus/net/catas.c
===================================================================
--- hw/mlx4/kernel/bus/net/catas.c (revision 2055)
+++ hw/mlx4/kernel/bus/net/catas.c (working copy)
@@ -370,6 +370,7 @@
// to allow for end of operations that are in progress
reset_work = IoAllocateWorkItem( dev->pdev->p_self_do );
if (!reset_work) {
+ spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
mlx4_err(dev, "mlx4_reset_request IoAllocateWorkItem failed, reset
will not be propagated\n");
err = -EFAULT;
goto err_workitem;
Index: hw/mlx4/kernel/bus/net/cmd.c
===================================================================
--- hw/mlx4/kernel/bus/net/cmd.c (revision 2055)
+++ hw/mlx4/kernel/bus/net/cmd.c (working copy)
@@ -337,9 +337,15 @@
mlx4_dispatch_reset_event(dev->pdev->ib_dev,
IB_EVENT_RESET_DRIVER);
}
}
+ else {
+ err = -EFAULT;
+ mlx4_err(dev, "mlx4_cmd_wait: Unexpected end of waiting for a comand
\n");
+ ASSERT(0);
+ }
}
-
- err = context->result;
+ else
+ err = context->result;
+
if (err)
goto out;
Index: hw/mlx4/kernel/bus/net/intf.c
===================================================================
--- hw/mlx4/kernel/bus/net/intf.c (revision 2055)
+++ hw/mlx4/kernel/bus/net/intf.c (working copy)
@@ -43,13 +43,13 @@
static LIST_HEAD(dev_list);
static DEFINE_MUTEX(intf_mutex);
-static void mlx4_add_device(struct mlx4_interface *intf, struct
mlx4_priv *priv)
+static int mlx4_add_device(struct mlx4_interface *intf, struct
mlx4_priv *priv)
{
struct mlx4_device_context *dev_ctx;
dev_ctx = kmalloc(sizeof *dev_ctx, GFP_KERNEL);
if (!dev_ctx)
- return;
+ return -EFAULT;
dev_ctx->intf = intf;
dev_ctx->context = intf->add(&priv->dev);
@@ -59,8 +59,11 @@
spin_lock_irq(&priv->ctx_lock);
list_add_tail(&dev_ctx->list, &priv->ctx_list);
spin_unlock_irq(&priv->ctx_lock);
- } else
+ } else {
kfree(dev_ctx);
+ return -EFAULT;
+ }
+ return 0;
}
static void mlx4_remove_device(struct mlx4_interface *intf, struct
mlx4_priv *priv)
@@ -82,19 +85,25 @@
int mlx4_register_interface(struct mlx4_interface *intf)
{
struct mlx4_priv *priv;
+ int err = 0;
if (!intf->add || !intf->remove)
return -EINVAL;
mutex_lock(&intf_mutex);
- list_add_tail(&intf->list, &intf_list);
- list_for_each_entry(priv, &dev_list, dev_list, struct mlx4_priv)
- mlx4_add_device(intf, priv);
+ list_for_each_entry(priv, &dev_list, dev_list, struct mlx4_priv) {
+ if (mlx4_add_device(intf, priv)) {
+ err = -EFAULT;
+ goto end;
+ }
+ }
+ list_add_tail(&intf->list, &intf_list);
+
+end:
mutex_unlock(&intf_mutex);
-
- return 0;
+ return err;
}
EXPORT_SYMBOL_GPL(mlx4_register_interface);
@@ -137,12 +146,18 @@
mutex_lock(&intf_mutex);
+ list_for_each_entry(intf, &intf_list, list, struct mlx4_interface) {
+ if (mlx4_add_device(intf, priv)) {
+ err = -EFAULT;
+ goto end;
+ }
+ }
+
list_add_tail(&priv->dev_list, &dev_list);
- list_for_each_entry(intf, &intf_list, list, struct mlx4_interface)
- mlx4_add_device(intf, priv);
-
+
+end:
mutex_unlock(&intf_mutex);
- if (!mlx4_is_livefish(dev))
+ if (!err && !mlx4_is_livefish(dev))
err = mlx4_start_catas_poll(dev);
return err;
Index: hw/mlx4/kernel/bus/net/main.c
===================================================================
--- hw/mlx4/kernel/bus/net/main.c (revision 2055)
+++ hw/mlx4/kernel/bus/net/main.c (working copy)
@@ -956,9 +956,11 @@
("mlx4_register_device for livefish failed, return with
error.\n"));
pdev->dev = NULL;
kfree(priv);
+ }
+ else {
+ MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_LOW ,
+ ("MLX4_BUS started in \"livefish\" mode !!!.\n"));
}
- MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_LOW ,
- ("MLX4_BUS started in \"livefish\" mode !!!.\n"));
goto end;
}
@@ -1064,8 +1066,8 @@
mlx4_close_hca(dev);
mlx4_cmd_cleanup(dev);
- if (reset)
- mlx4_reset(dev);
+ if (reset && mlx4_reset(dev))
+ mlx4_err(dev, "Failed to reset HCA\n");
mlx4_dbg(dev, "MLX4_BUS: NET device (dev_id=%d) is REMOVED ! \n",
(int)pdev->dev_id);
pdev->dev = NULL;
done:
Index: hw/mthca/kernel/mt_cache.c
===================================================================
--- hw/mthca/kernel/mt_cache.c (revision 2055)
+++ hw/mthca/kernel/mt_cache.c (working copy)
@@ -341,6 +341,9 @@
u8 p;
rwlock_init(&device->cache.lock);
+ INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
+ device, ib_cache_event);
+ ib_register_event_handler(&device->cache.event_handler);
device->cache.pkey_cache =
kmalloc(sizeof *device->cache.pkey_cache *
@@ -361,19 +364,8 @@
ib_cache_update(device, p + start_port(device));
}
- INIT_IB_EVENT_HANDLER(&device->cache.event_handler,
- device, ib_cache_event);
- if (ib_register_event_handler(&device->cache.event_handler))
- goto err_cache;
-
return;
-err_cache:
- for (p = 0; p <= end_port(device) - start_port(device); ++p) {
- kfree(device->cache.pkey_cache[p]);
- kfree(device->cache.gid_cache[p]);
- }
-
err:
kfree(device->cache.pkey_cache);
kfree(device->cache.gid_cache);
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20090325/c18412ff/attachment.html>
More information about the ofw
mailing list