<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<HTML><HEAD>
<META http-equiv=Content-Type content="text/html; charset=us-ascii">
<META content="MSHTML 6.00.2900.3243" name=GENERATOR></HEAD>
<BODY>
<DIV><SPAN class=526253515-29032009><FONT face=Arial color=#0000ff size=2>Appied
in 2064.</FONT></SPAN></DIV><BR>
<BLOCKQUOTE dir=ltr
style="PADDING-LEFT: 5px; MARGIN-LEFT: 5px; BORDER-LEFT: #0000ff 2px solid; MARGIN-RIGHT: 0px">
<DIV class=OutlookMessageHeader lang=en-us dir=ltr align=left>
<HR tabIndex=-1>
<FONT face=Tahoma size=2><B>From:</B> ofw-bounces@lists.openfabrics.org
[mailto:ofw-bounces@lists.openfabrics.org] <B>On Behalf Of </B>Leonid
Keller<BR><B>Sent:</B> Wednesday, March 25, 2009 4:17 PM<BR><B>To:</B>
ofw@lists.openfabrics.org<BR><B>Subject:</B> [ofw][patch][HW] fix bugs in low
resources flow<BR></FONT><BR></DIV>
<DIV></DIV>
<DIV><FONT face=Arial size=2><SPAN class=795061514-25032009>This patch fixes
several bugs that show up upon low resources.</SPAN></FONT></DIV>
<DIV><FONT face=Arial size=2><SPAN class=795061514-25032009>(found with the
help of Verifier with error injection)</SPAN></FONT></DIV>
<DIV><FONT face=Arial size=2></FONT> </DIV>
<DIV><FONT face=Arial size=2>Index:
hw/mlx4/kernel/bus/core/cache.c<BR>===================================================================<BR>---
hw/mlx4/kernel/bus/core/cache.c (revision 2055)<BR>+++
hw/mlx4/kernel/bus/core/cache.c (working copy)<BR>@@ -366,48 +366,38
@@<BR> int p;<BR> int
port_num;<BR> <BR>+ shutter_init(
&device->cache.x.work_thread
);<BR> rwlock_init(&device->cache.lock);<BR>+ INIT_IB_EVENT_HANDLER(&device->cache.event_handler,<BR>+
device, ib_cache_event, NULL, NULL,
0);<BR>+ ib_register_event_handler(&device->cache.event_handler);<BR>+<BR> port_num
= end_port(device) - start_port(device) + 1;<BR>- <BR> if
(port_num > 0 ) { <BR> // if port_num ==0
==> there are no IB ports<BR> device->cache.pkey_cache
=<BR> kmalloc(sizeof *device->cache.pkey_cache *
port_num, GFP_KERNEL);<BR> device->cache.gid_cache
=<BR> kmalloc(sizeof *device->cache.gid_cache *
port_num, GFP_KERNEL);<BR>-<BR> device->cache.lmc_cache =
kmalloc(sizeof *device->cache.lmc_cache
*<BR>- port_num,
GFP_KERNEL);<BR>+ port_num,
GFP_KERNEL);<BR> <BR> if (!device->cache.pkey_cache
|| !device->cache.gid_cache ||<BR>-
!device->cache.lmc_cache)
{<BR>+ !device->cache.lmc_cache)
{<BR> printk(KERN_WARNING "Couldn't allocate cache
"<BR>- "for %s\n",
device->name);<BR>+ "for %s\n",
device->name);<BR> goto
err;<BR> }<BR> }<BR> <BR>- shutter_init(
&device->cache.x.work_thread );<BR>-<BR> for (p = 0; p <
port_num; ++p) {<BR> device->cache.pkey_cache[p] =
NULL;<BR> device->cache.gid_cache [p] =
NULL;<BR> ib_cache_update(device, (u8)(p +
start_port(device)));<BR> }<BR> <BR>- INIT_IB_EVENT_HANDLER(&device->cache.event_handler,<BR>-
device, ib_cache_event, NULL, NULL, 0);<BR>- if
(ib_register_event_handler(&device->cache.event_handler))<BR>- goto
err_cache;<BR>-<BR> return;<BR> <BR>-err_cache:<BR>- for
(p = 0; p <= end_port(device) - start_port(device); ++p)
{<BR>- kfree(device->cache.pkey_cache[p]);<BR>- kfree(device->cache.gid_cache[p]);<BR>- }<BR>-<BR> err:<BR> kfree(device->cache.pkey_cache);<BR> kfree(device->cache.gid_cache);<BR>@@
-422,6 +412,7 @@<BR> {<BR> int
p;<BR> <BR>+ ASSERT(device->cache.event_handler.device);<BR> ib_unregister_event_handler(&device->cache.event_handler);<BR> //
instead of Linux flush_scheduled_work(): wait for them to
quit<BR> shutter_shut( &device->cache.x.work_thread
);<BR>Index:
hw/mlx4/kernel/bus/core/device.c<BR>===================================================================<BR>---
hw/mlx4/kernel/bus/core/device.c (revision 2055)<BR>+++
hw/mlx4/kernel/bus/core/device.c (working copy)<BR>@@ -302,17 +302,23
@@<BR> goto
out;<BR> }<BR> <BR>- list_add_tail(&device->core_list,
&device_list);<BR>-<BR>- device->reg_state =
IB_DEV_REGISTERED;<BR>-<BR> {<BR> struct ib_client
*client;<BR> <BR>- list_for_each_entry(client,
&client_list, list, struct ib_client)<BR>- if
(client->add && !add_client_context(device,
client))<BR>+ list_for_each_entry(client, &client_list, list,
struct ib_client) {<BR>+ if ( add_client_context(device,
client) ) {<BR>+ printk(KERN_WARNING
"add_client_context failed for device
%s\n",<BR>+
device->name);<BR>+ ret =
-EFAULT;<BR>+ goto
out;<BR>+ }<BR>+ if
(client->add)<BR> client->add(device);<BR>+ }<BR> }<BR>+
<BR>+ list_add_tail(&device->core_list,
&device_list);<BR>+ device->reg_state =
IB_DEV_REGISTERED;<BR> <BR>
out:<BR> mutex_unlock(&device_mutex);<BR>@@ -381,17 +387,25
@@<BR> int ib_register_client(struct ib_client
*client)<BR> {<BR> struct ib_device *device;<BR>+ int ret
=
0;<BR> <BR> mutex_lock(&device_mutex);<BR> <BR>- list_add_tail(&client->list,
&client_list);<BR>- list_for_each_entry(device, &device_list,
core_list, struct ib_device)<BR>- if (client->add &&
!add_client_context(device, client))<BR>+ list_for_each_entry(device,
&device_list, core_list, struct ib_device) {<BR>+ if (
add_client_context(device, client) )
{<BR>+ printk(KERN_WARNING "add_client_context failed for
device %s\n",<BR>+
device->name);<BR>+ ret =
-EFAULT;<BR>+ goto out;<BR>+ }<BR>+ if
(client->add)<BR> client->add(device);<BR>-<BR>+ }<BR>+
<BR>+ list_add_tail(&client->list,
&client_list);<BR>+out:<BR> mutex_unlock(&device_mutex);<BR>-<BR>- return
0;<BR>+ return
ret;<BR> }<BR> EXPORT_SYMBOL(ib_register_client);<BR> <BR>Index:
hw/mlx4/kernel/bus/drv/drv.c<BR>===================================================================<BR>---
hw/mlx4/kernel/bus/drv/drv.c (revision 2055)<BR>+++
hw/mlx4/kernel/bus/drv/drv.c (working copy)<BR>@@ -323,10 +323,12
@@<BR> <BR> p_fdo->bus_ib_ifc.pdev =
&p_fdo->pci_dev;<BR> p_fdo->bus_ib_ifc.p_ibdev =
p_fdo->pci_dev.ib_dev;<BR>- p_fdo->bus_ib_ifc.pmlx4_dev =
to_mdev(p_fdo->pci_dev.ib_dev)->dev;<BR>- p_fdo->bus_ib_ifc.is_livefish
= mlx4_is_livefish(p_fdo->pci_dev.dev);<BR>- if (
p_fdo->bus_ib_ifc.pmlx4_dev->flags & MLX4_FLAG_MSI_X
)<BR>- p_fdo->bus_ib_ifc.n_msi_vectors =
p_fdo->pci_dev.n_msi_vectors - 2;<BR>+
p_fdo->bus_ib_ifc.is_livefish =
mlx4_is_livefish(p_fdo->pci_dev.dev);
<BR>+ if ( p_fdo->bus_ib_ifc.is_livefish == 0 )
{<BR>+
p_fdo->bus_ib_ifc.pmlx4_dev =
to_mdev(p_fdo->pci_dev.ib_dev)->dev;
<BR>+ if ( p_fdo->bus_ib_ifc.pmlx4_dev->flags
& MLX4_FLAG_MSI_X )<BR>+
p_fdo->bus_ib_ifc.n_msi_vectors = p_fdo->pci_dev.n_msi_vectors -
2;<BR>+ }<BR> <BR> p_fdo->card_started =
TRUE;<BR> <BR>@@ -572,13 +574,13
@@<BR> pdev->int_info =
*desc;<BR> if (desc->Flags &
CM_RESOURCE_INTERRUPT_MESSAGE)
{<BR> pdev->n_msi_vectors_alloc =
(u8)(pdev->n_msi_vectors_alloc+desc_raw->u.MessageInterrupt.Raw.MessageCount);<BR>- MLX4_PRINT(TRACE_LEVEL_WARNING,
MLX4_DBG_DRV,<BR>+ MLX4_PRINT(TRACE_LEVEL_VERBOSE,
MLX4_DBG_DRV,<BR> ("EvtPrepareHardware:
Desc %d: MsiInterrupt: Share %d, Flags %#x, Level %d, Vector %#x, Affinity
%#x\n", <BR> i,
desc->ShareDisposition,
desc->Flags,<BR> desc->u.MessageInterrupt.Translated.Level,
<BR> desc->u.MessageInterrupt.Translated.Vector,
<BR> (u32)desc->u.MessageInterrupt.Translated.Affinity
));<BR>- MLX4_PRINT(TRACE_LEVEL_WARNING,
MLX4_DBG_DRV,<BR>+ MLX4_PRINT(TRACE_LEVEL_VERBOSE,
MLX4_DBG_DRV,<BR> ("EvtPrepareHardware:
Desc %d: RawMsiInterrupt: Share %d, Flags %#x, MessageCount %#hx, Vector %#x,
Affinity %#x\n", <BR> i,
desc_raw->ShareDisposition,
desc_raw->Flags,<BR> desc_raw->u.MessageInterrupt.Raw.MessageCount,
<BR>@@ -586,7 +588,7
@@<BR> (u32)desc_raw->u.MessageInterrupt.Raw.Affinity
));<BR> }<BR> else {
// line-based
interrupt<BR>- MLX4_PRINT(TRACE_LEVEL_WARNING,
MLX4_DBG_DRV,<BR>+ MLX4_PRINT(TRACE_LEVEL_VERBOSE,
MLX4_DBG_DRV,<BR> ("EvtPrepareHardware:
Desc %d: LineInterrupt: Share %d, Flags %#x, Level %d, Vector %#x, Affinity
%#x\n", <BR> i,
desc->ShareDisposition,
desc->Flags,<BR> desc->u.Interrupt.Level,
desc->u.Interrupt.Vector, <BR>@@ -774,14 +776,14
@@<BR> }<BR> <BR> NTSTATUS<BR>-EvtDeviceAdd(<BR>+EvtDriverDeviceAdd(<BR> IN
WDFDRIVER Driver,<BR> IN
PWDFDEVICE_INIT
DeviceInit<BR> )<BR> /*++<BR> Routine
Description:<BR> <BR>- EvtDeviceAdd is called by the framework in
response to AddDevice<BR>+ EvtDriverDeviceAdd is called by the framework
in response to AddDevice<BR> call from the PnP manager. We create
and initialize a device object to<BR> represent a new instance of
mxe bus.<BR> <BR>@@ -1191,7 +1193,7
@@<BR> //<BR> <BR> WDF_DRIVER_CONFIG_INIT(<BR>- &config,
EvtDeviceAdd );<BR>+ &config, EvtDriverDeviceAdd
);<BR> config.EvtDriverUnload =
EvtDriverUnload;<BR> <BR> //<BR>Index:
hw/mlx4/kernel/bus/drv/drv.h<BR>===================================================================<BR>---
hw/mlx4/kernel/bus/drv/drv.h (revision 2055)<BR>+++
hw/mlx4/kernel/bus/drv/drv.h (working copy)<BR>@@ -155,7 +155,7
@@<BR> );<BR> <BR> NTSTATUS<BR>-EvtDeviceAdd(<BR>+EvtDriverDeviceAdd(<BR> IN
WDFDRIVER Driver,<BR> IN
PWDFDEVICE_INIT DeviceInit<BR> );<BR>Index:
hw/mlx4/kernel/bus/drv/pci.c<BR>===================================================================<BR>---
hw/mlx4/kernel/bus/drv/pci.c (revision 2055)<BR>+++
hw/mlx4/kernel/bus/drv/pci.c (working copy)<BR>@@ -541,7 +541,7
@@<BR> p_vector = ka;<BR> /*
print (allocated+2) vectors */<BR> for (i=0;
i<pdev->n_msi_vectors_alloc+2; i++)
{<BR>- MLX4_PRINT( TRACE_LEVEL_WARNING
,MLX4_DBG_PNP ,<BR>+ MLX4_PRINT(
TRACE_LEVEL_VERBOSE ,MLX4_DBG_PNP
,<BR> ("MSI-X Vectors: Id %d, Masked %d,
Addr %#I64x, Data %#x\n",<BR> i,
MSIX_VECTOR_MASKED(p_vector[i].Flags),<BR> p_vector[i].Addr,
p_vector[i].Data ));<BR>@@ -587,7 +587,7
@@<BR> )<BR> {<BR> u32 sem;<BR>- NTSTATUS status
= STATUS_SUCCESS;<BR>+ NTSTATUS status =
STATUS_SUCCESS,
status1;<BR> PBUS_INTERFACE_STANDARD p_ifc =
&pdev->bus_pci_ifc;<BR> PCI_COMMON_CONFIG* p_cfg
= &pdev->pci_cfg_space;<BR> struct msix_saved_info
msix_info;<BR>@@ -703,19 +703,19
@@<BR> }<BR> }<BR> <BR>+ status =
STATUS_SUCCESS;<BR>+<BR>+err:<BR> /* restore MSI-X info after reset
*/<BR>- status = __pci_restore_msix_info( pdev, &msix_info
);<BR>- if (!NT_SUCCESS(status))<BR>- goto
err;<BR>+ status1 = __pci_restore_msix_info( pdev, &msix_info
);<BR>+ status = (!status) ? status1 : status; /* return the only or
the first error */<BR>+ if( NT_SUCCESS( status ) )
{<BR>+ MLX4_PRINT( TRACE_LEVEL_WARNING ,MLX4_DBG_PNP , ("HCA has
been reset ! \n"));<BR>+ }<BR> <BR>- /* check, whether MSI-X
capabilities were restore */<BR>+ /* check, whether MSI-X capabilities
have been restored */<BR> pci_get_msi_info( pdev, p_cfg,
&pdev->uplink_info );<BR> <BR>- MLX4_PRINT(
TRACE_LEVEL_WARNING ,MLX4_DBG_PNP , ("HCA has been reset !
\n"));<BR>-<BR>- status = STATUS_SUCCESS;<BR>-<BR>-err:<BR> if
(pdev->msix_info.valid)
<BR> pci_free_msix_info_resources(&pdev->msix_info);<BR> MLX4_EXIT(
MLX4_DBG_PNP );<BR>Index:
hw/mlx4/kernel/bus/ib/main.c<BR>===================================================================<BR>---
hw/mlx4/kernel/bus/ib/main.c (revision 2055)<BR>+++
hw/mlx4/kernel/bus/ib/main.c (working copy)<BR>@@ -611,6 +611,7
@@<BR> mlx4_pd_free(dev,
ibdev->priv_pdn);<BR> <BR> err_dealloc:<BR>+ ibdev->ib_dev.reg_state
=
IB_DEV_UNINITIALIZED;<BR> ib_dealloc_device(&ibdev->ib_dev);<BR> <BR> return
NULL;<BR>Index:
hw/mlx4/kernel/bus/net/catas.c<BR>===================================================================<BR>---
hw/mlx4/kernel/bus/net/catas.c (revision 2055)<BR>+++
hw/mlx4/kernel/bus/net/catas.c (working copy)<BR>@@ -370,6 +370,7
@@<BR> // to allow for end of operations that are in
progress<BR> reset_work = IoAllocateWorkItem(
dev->pdev->p_self_do );<BR> if (!reset_work)
{<BR>+
spin_unlock_irqrestore(&ibdev->event_handler_lock,
flags);<BR> mlx4_err(dev, "mlx4_reset_request
IoAllocateWorkItem failed, reset will not be
propagated\n");<BR> err =
-EFAULT;<BR> goto err_workitem;<BR>Index:
hw/mlx4/kernel/bus/net/cmd.c<BR>===================================================================<BR>---
hw/mlx4/kernel/bus/net/cmd.c (revision 2055)<BR>+++
hw/mlx4/kernel/bus/net/cmd.c (working copy)<BR>@@ -337,9 +337,15
@@<BR> mlx4_dispatch_reset_event(dev->pdev->ib_dev,
IB_EVENT_RESET_DRIVER);<BR> }<BR> }<BR>+ else
{<BR>+ err = -EFAULT;<BR>+ mlx4_err(dev,
"mlx4_cmd_wait: Unexpected end of waiting for a comand
\n");<BR>+ ASSERT(0);<BR>+ }<BR> }<BR>-<BR>- err
= context->result;<BR>+ else<BR>+ err =
context->result;<BR>+ <BR> if
(err)<BR> goto out;<BR> <BR>Index:
hw/mlx4/kernel/bus/net/intf.c<BR>===================================================================<BR>---
hw/mlx4/kernel/bus/net/intf.c (revision 2055)<BR>+++
hw/mlx4/kernel/bus/net/intf.c (working copy)<BR>@@ -43,13 +43,13
@@<BR> static LIST_HEAD(dev_list);<BR> static
DEFINE_MUTEX(intf_mutex);<BR> <BR>-static void mlx4_add_device(struct
mlx4_interface *intf, struct mlx4_priv *priv)<BR>+static int
mlx4_add_device(struct mlx4_interface *intf, struct mlx4_priv
*priv)<BR> {<BR> struct mlx4_device_context
*dev_ctx;<BR> <BR> dev_ctx = kmalloc(sizeof *dev_ctx,
GFP_KERNEL);<BR> if
(!dev_ctx)<BR>- return;<BR>+ return
-EFAULT;<BR> <BR> dev_ctx->intf =
intf;<BR> dev_ctx->context =
intf->add(&priv->dev);<BR>@@ -59,8 +59,11
@@<BR> spin_lock_irq(&priv->ctx_lock);<BR> list_add_tail(&dev_ctx->list,
&priv->ctx_list);<BR> spin_unlock_irq(&priv->ctx_lock);<BR>- }
else<BR>+ } else
{<BR> kfree(dev_ctx);<BR>+ return
-EFAULT;<BR>+ }<BR>+ return 0;<BR> }<BR> <BR> static
void mlx4_remove_device(struct mlx4_interface *intf, struct mlx4_priv
*priv)<BR>@@ -82,19 +85,25 @@<BR> int mlx4_register_interface(struct
mlx4_interface *intf)<BR> {<BR> struct mlx4_priv
*priv;<BR>+ int err = 0;<BR> <BR> if (!intf->add ||
!intf->remove)<BR> return
-EINVAL;<BR> <BR> mutex_lock(&intf_mutex);<BR> <BR>- list_add_tail(&intf->list,
&intf_list);<BR>- list_for_each_entry(priv, &dev_list, dev_list,
struct mlx4_priv)<BR>- mlx4_add_device(intf,
priv);<BR>+ list_for_each_entry(priv, &dev_list, dev_list, struct
mlx4_priv) {<BR>+ if (mlx4_add_device(intf, priv))
{<BR>+ err = -EFAULT;<BR>+ goto
end;<BR>+ }<BR>+ }<BR> <BR>+
list_add_tail(&intf->list,
&intf_list);<BR>+<BR>+end:<BR> mutex_unlock(&intf_mutex);<BR>-<BR>- return
0;<BR>+ return
err;<BR> }<BR> EXPORT_SYMBOL_GPL(mlx4_register_interface);<BR> <BR>@@
-137,12 +146,18
@@<BR> <BR> mutex_lock(&intf_mutex);<BR> <BR>+ list_for_each_entry(intf,
&intf_list, list, struct mlx4_interface) {<BR>+ if
(mlx4_add_device(intf, priv)) {<BR>+ err =
-EFAULT;<BR>+ goto
end;<BR>+ }<BR>+ }<BR>+
<BR> list_add_tail(&priv->dev_list,
&dev_list);<BR>- list_for_each_entry(intf, &intf_list, list,
struct mlx4_interface)<BR>- mlx4_add_device(intf,
priv);<BR>-<BR>+
<BR>+end:<BR> mutex_unlock(&intf_mutex);<BR>- if
(!mlx4_is_livefish(dev))<BR>+ if (!err &&
!mlx4_is_livefish(dev))<BR> err =
mlx4_start_catas_poll(dev);<BR> <BR> return err;<BR>Index:
hw/mlx4/kernel/bus/net/main.c<BR>===================================================================<BR>---
hw/mlx4/kernel/bus/net/main.c (revision 2055)<BR>+++
hw/mlx4/kernel/bus/net/main.c (working copy)<BR>@@ -956,9 +956,11
@@<BR> ("mlx4_register_device for livefish
failed, return with error.\n"));<BR> pdev->dev =
NULL;<BR> kfree(priv);<BR>+ }
<BR>+ else {<BR>+
MLX4_PRINT(TRACE_LEVEL_ERROR ,MLX4_DBG_LOW
,<BR>+ ("MLX4_BUS started in \"livefish\"
mode
!!!.\n"));<BR> }<BR>- MLX4_PRINT(TRACE_LEVEL_ERROR
,MLX4_DBG_LOW ,<BR>- ("MLX4_BUS started in \"livefish\" mode
!!!.\n"));<BR> goto end;<BR> }<BR> <BR>@@
-1064,8 +1066,8
@@<BR> mlx4_close_hca(dev);<BR> mlx4_cmd_cleanup(dev);<BR> <BR>- if
(reset)<BR>- mlx4_reset(dev);<BR>+ if (reset
&& mlx4_reset(dev))<BR>+ mlx4_err(dev, "Failed to
reset HCA\n");<BR> mlx4_dbg(dev, "MLX4_BUS: NET device
(dev_id=%d) is REMOVED ! \n",
(int)pdev->dev_id);<BR> pdev->dev =
NULL;<BR> done:<BR>Index:
hw/mthca/kernel/mt_cache.c<BR>===================================================================<BR>---
hw/mthca/kernel/mt_cache.c (revision 2055)<BR>+++
hw/mthca/kernel/mt_cache.c (working copy)<BR>@@ -341,6 +341,9
@@<BR> u8
p;<BR> <BR> rwlock_init(&device->cache.lock);<BR>+ INIT_IB_EVENT_HANDLER(&device->cache.event_handler,<BR>+
device,
ib_cache_event);<BR>+ ib_register_event_handler(&device->cache.event_handler);<BR> <BR> device->cache.pkey_cache
=<BR> kmalloc(sizeof *device->cache.pkey_cache *<BR>@@
-361,19 +364,8 @@<BR> ib_cache_update(device, p +
start_port(device));<BR> }<BR> <BR>- INIT_IB_EVENT_HANDLER(&device->cache.event_handler,<BR>-
device, ib_cache_event);<BR>- if
(ib_register_event_handler(&device->cache.event_handler))<BR>- goto
err_cache;<BR>-<BR> return;<BR> <BR>-err_cache:<BR>- for
(p = 0; p <= end_port(device) - start_port(device); ++p)
{<BR>- kfree(device->cache.pkey_cache[p]);<BR>- kfree(device->cache.gid_cache[p]);<BR>- }<BR>-<BR> err:<BR> kfree(device->cache.pkey_cache);<BR> kfree(device->cache.gid_cache);<BR></FONT></DIV></BLOCKQUOTE></BODY></HTML>