[ofw] patch: Fix our error handling in the case of hardware errors
Tzachi Dar
tzachid at mellanox.co.il
Mon Mar 2 05:49:43 PST 2009
Index: mlx4/kernel/bus/drv/pdo.c
===================================================================
--- mlx4/kernel/bus/drv/pdo.c (revision 1993)
+++ mlx4/kernel/bus/drv/pdo.c (working copy)
@@ -268,6 +268,7 @@
p_fdo->bus_ib_ifc.mlx4_interface.mlx4_unregister_ev_cb =
mlx4_reset_cb_unregister;
p_fdo->bus_ib_ifc.mlx4_interface.mlx4_reset_request =
mlx4_reset_request;
p_fdo->bus_ib_ifc.mlx4_interface.mlx4_reset_execute =
mlx4_reset_execute;
+ p_fdo->bus_ib_ifc.mlx4_interface.mlx4_reset_ready = mlx4_reset_ready;
//
// Create a custom interface so that other drivers can
Index: mlx4/kernel/bus/inc/bus_intf.h
===================================================================
--- mlx4/kernel/bus/inc/bus_intf.h (revision 1993)
+++ mlx4/kernel/bus/inc/bus_intf.h (working copy)
@@ -117,6 +117,7 @@
typedef int (*MLX4_RESET_REQUEST) (struct ib_event_handler
*event_handler);
typedef int (*MLX4_RESET_EXECUTE) (struct ib_event_handler
*event_handler);
+typedef int (*MLX4_RESET_READY) (struct ib_event_handler
*event_handler);
struct mlx4_interface_ex {
MLX4_PD_ALLOC mlx4_pd_alloc;
@@ -169,6 +170,7 @@
MLX4_UNREGISTER_EVENT_HANDLER mlx4_unregister_ev_cb;
MLX4_RESET_REQUEST mlx4_reset_request;
MLX4_RESET_EXECUTE mlx4_reset_execute;
+ MLX4_RESET_READY mlx4_reset_ready;
};
Index: mlx4/kernel/bus/inc/device.h
===================================================================
--- mlx4/kernel/bus/inc/device.h (revision 1993)
+++ mlx4/kernel/bus/inc/device.h (working copy)
@@ -40,8 +40,7 @@
MLX4_FLAG_RESET_CLIENT = 1 << 11,
MLX4_FLAG_RESET_DRIVER = 1 << 12,
MLX4_FLAG_RESET_STARTED = 1 << 13,
- MLX4_FLAG_CARD_IS_DEAD = 1 << 14,
- MLX4_FLAG_BUSY_WAIT = 1 << 15
+ MLX4_FLAG_BUSY_WAIT = 1 << 14
};
enum {
@@ -342,6 +341,7 @@
u32 signature;
struct pci_dev *pdev;
unsigned long flags;
+ LONG reset_pending;
struct mlx4_caps caps;
struct radix_tree_root qp_table_tree;
u32 rev_id;
Index: mlx4/kernel/bus/inc/ib_verbs.h
===================================================================
--- mlx4/kernel/bus/inc/ib_verbs.h (revision 1993)
+++ mlx4/kernel/bus/inc/ib_verbs.h (working copy)
@@ -293,15 +293,19 @@
};
enum ib_event_handler_flags {
- IB_IVH_RESET_CB = (1 << 0),
- IB_IVH_NOTIFIED = (1 << 1),
- IB_IVH_RESET_READY = (1 << 2)
+ IB_IVH_RESET_CB = (1 << 0), // it is handler for soft reset
+ IB_IVH_NOTIFIED = (1 << 1), // client has been notified about
requested reset
+ IB_IVH_RESET_READY = (1 << 2), // client is ready for HW reset
+ IB_IVH_RESET_D_PENDING = (1 << 3), // device reset notification is
pending
+ IB_IVH_RESET_C_PENDING = (1 << 4), // client reset notification is
pending
+ IB_IVH_NOTIF_READY = (1 << 5) // client is ready to get reset request
notification
};
+typedef void (*ib_event_handler_t)(struct ib_event_handler *, struct
ib_event *);
struct ib_event_handler {
struct ib_device *device;
- void (*handler)(struct ib_event_handler *, struct ib_event
*);
+ ib_event_handler_t handler;
struct list_head list;
void * ctx;
void * rsrv_ptr;
Index: mlx4/kernel/bus/net/catas.c
===================================================================
--- mlx4/kernel/bus/net/catas.c (revision 1993)
+++ mlx4/kernel/bus/net/catas.c (working copy)
@@ -39,10 +39,6 @@
static DEFINE_SPINLOCK(catas_lock);
static LIST_HEAD(catas_list);
-// TODO: put into Globals
-// "Reset device on internal errors if non-zero (default 1)")
-int g_internal_err_reset = 0;
-
void mlx4_dispatch_reset_event(struct ib_device *ibdev, enum
ib_event_type type)
{
unsigned long flags;
@@ -56,11 +52,25 @@
list_for_each_entry(handler, &ibdev->event_handler_list, list, struct
ib_event_handler)
{
- // notify only those, that are not notified
+ // notify only soft reset handlers
if ( handler->flags & IB_IVH_RESET_CB )
+ // notify only those, that are not yet notified
if ( !(handler->flags & IB_IVH_NOTIFIED) ) {
- handler->flags |= IB_IVH_NOTIFIED;
- handler->handler(handler, &event);
+ // notify only those that are ready to get the notification
+ if ( handler->flags & IB_IVH_NOTIF_READY ) {
+ // insure not to notify once more
+ handler->flags |= IB_IVH_NOTIFIED;
+ handler->flags &= ~(IB_IVH_NOTIF_READY |
+ IB_IVH_RESET_D_PENDING | IB_IVH_RESET_C_PENDING);
+ handler->handler(handler, &event);
+ }
+ else {
+ // pend the notification
+ if (type == IB_EVENT_RESET_DRIVER)
+ handler->flags |= IB_IVH_RESET_D_PENDING;
+ else
+ handler->flags |= IB_IVH_RESET_C_PENDING;
+ }
}
}
@@ -107,36 +117,28 @@
i, swab32(readl(priv->catas_err.map + i)));
}
-static void catas_reset()
-{
- struct mlx4_priv *priv, *tmppriv;
- struct mlx4_dev *dev;
- struct list_head tlist;
- int ret;
-
- INIT_LIST_HEAD(&tlist);
- spin_lock_irq(&catas_lock);
- list_splice_init(&catas_list, &tlist);
- spin_unlock_irq(&catas_lock);
-
- list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list, struct
mlx4_priv, struct mlx4_priv) {
- ret = mlx4_restart_one(priv->dev.pdev);
- dev = &priv->dev;
- if (ret)
- mlx4_err(dev, "Reset failed (%d)\n", ret);
- else
- mlx4_dbg(dev, "Reset succeeded\n");
- }
-}
-
static void
catas_reset_wi(
IN DEVICE_OBJECT* p_dev_obj,
- IN void* context )
+ IN struct mlx4_dev * dev )
{
+ NTSTATUS status;
+ long do_reset;
UNUSED_PARAM(p_dev_obj);
- IoFreeWorkItem( context );
- catas_reset();
+
+ do_reset = InterlockedCompareExchange(&dev->reset_pending, 1, 0);
+ if (do_reset == 0) {
+ status = mlx4_reset(dev);
+ if ( !NT_SUCCESS( status ) ) {
+ mlx4_err(dev, "Failed to reset HCA, aborting.(status %#x)\n",
status);
+ }
+
+ dev->flags |= MLX4_FLAG_RESET_DRIVER; // bar the device
+ }
+
+ dump_err_buf(dev);
+ mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
+ mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
}
/* polling on DISPATCH_LEVEL */
@@ -145,27 +147,9 @@
struct mlx4_priv *priv = mlx4_priv(dev);
if (readl(priv->catas_err.map)) {
- dump_err_buf(dev);
-
- // bar the device
- dev->flags |= MLX4_FLAG_RESET_DRIVER;
-
- // relay the event
- mlx4_dispatch_event(dev, MLX4_EVENT_TYPE_LOCAL_CATAS_ERROR, 0, 0);
-
- // notify the clients
- mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
-
- if (g_internal_err_reset) {
- PIO_WORKITEM catas_work = IoAllocateWorkItem( dev->pdev->p_self_do
);
-
- spin_lock_dpc(&catas_lock);
- list_add(&priv->catas_err.list, &catas_list);
- spin_unlock_dpc(&catas_lock);
-
- if (!catas_work)
- IoQueueWorkItem( catas_work, catas_reset_wi, DelayedWorkQueue,
catas_work );
- }
+
+ mlx4_warn(dev, "Detected catastrophic error on mdev %p\n", dev);
+ IoQueueWorkItem( priv->catas_err.catas_work, catas_reset_wi,
DelayedWorkQueue, dev );
} else {
spin_lock_dpc(&catas_lock);
if (!priv->catas_err.stop) {
@@ -190,12 +174,12 @@
poll_catas( dev );
}
-void mlx4_start_catas_poll(struct mlx4_dev *dev)
+int mlx4_start_catas_poll(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
u64 addr;
+ int err;
- INIT_LIST_HEAD(&priv->catas_err.list);
priv->catas_err.map = NULL;
addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
@@ -205,8 +189,16 @@
if (!priv->catas_err.map) {
mlx4_warn(dev, "Failed to map internal error buffer at 0x%lx\n",
addr);
- return;
+ err = -ENOMEM;
+ goto err_map;
}
+
+ priv->catas_err.catas_work = IoAllocateWorkItem( dev->pdev->p_self_do
);
+ if (!priv->catas_err.catas_work) {
+ mlx4_warn(dev, "Failed to allocate work item from polling thread\n");
+ err = -EFAULT;
+ goto err_alloc;
+ }
priv->catas_err.stop = 0;
spin_lock_init( &catas_lock );
@@ -215,6 +207,13 @@
priv->catas_err.interval.QuadPart = (-10)*
(__int64)MLX4_CATAS_POLL_INTERVAL;
KeSetTimerEx( &priv->catas_err.timer, priv->catas_err.interval,
0, &priv->catas_err.timer_dpc );
+ return 0;
+
+
+err_alloc:
+ iounmap(priv->catas_err.map, priv->fw.catas_size * 4);
+err_map:
+ return err;
}
void mlx4_stop_catas_poll(struct mlx4_dev *dev)
@@ -231,13 +230,11 @@
KeCancelTimer(&priv->catas_err.timer);
KeFlushQueuedDpcs();
-
if (priv->catas_err.map)
iounmap(priv->catas_err.map, priv->fw.catas_size * 4);
- spin_lock_irq(&catas_lock);
- list_del(&priv->catas_err.list);
- spin_unlock_irq(&catas_lock);
+ if (priv->catas_err.catas_work)
+ IoFreeWorkItem( priv->catas_err.catas_work );
}
static int wait4reset(struct ib_event_handler *event_handler)
@@ -264,6 +261,24 @@
return n_not_ready;
}
+int mlx4_reset_ready( struct ib_event_handler *event_handler )
+{
+ unsigned long flags;
+ struct ib_device *ibdev = event_handler->device;
+
+ ASSERT(KeGetCurrentIrql() == PASSIVE_LEVEL);
+
+ spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
+ event_handler->flags |= IB_IVH_NOTIF_READY;
+ spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
+ if (event_handler->flags & IB_IVH_RESET_D_PENDING)
+ mlx4_dispatch_reset_event(ibdev, IB_EVENT_RESET_DRIVER);
+ else
+ if (event_handler->flags & IB_IVH_RESET_C_PENDING)
+ mlx4_dispatch_reset_event(ibdev, IB_EVENT_RESET_CLIENT);
+ return 0;
+}
+
int mlx4_reset_execute( struct ib_event_handler *event_handler )
{
int err;
@@ -274,8 +289,9 @@
struct pci_dev *pdev = ibdev->dma_device->pdev;
// mark client as "ready for reset" and check whether we can do reset
- if (wait4reset(event_handler))
+ if (wait4reset(event_handler)) {
return 0;
+ }
// fully bar the device
ibdev->dma_device->flags |= MLX4_FLAG_RESET_STARTED;
@@ -330,16 +346,21 @@
struct ib_device *ibdev;
struct mlx4_dev *dev;
- ibdev = event_handler->device;
- if (ibdev == NULL)
- return -EFAULT;
+ unsigned long flags;
- dev = ibdev->dma_device;
- if (ibdev == NULL)
- return -EFAULT;
+ ibdev = event_handler->device;
+ if (ibdev == NULL)
+ return -EFAULT;
+
+ dev = ibdev->dma_device;
+ if (ibdev == NULL)
+ return -EFAULT;
+
+ spin_lock_irqsave(&ibdev->event_handler_lock, &flags);
+
// set device to RESET_PENDING mode
- if (!mlx4_is_barred(dev)) {
+ if (!(dev->flags & (MLX4_FLAG_RESET_CLIENT | MLX4_FLAG_RESET_DRIVER)))
{
PIO_WORKITEM reset_work;
// bar the device
@@ -348,12 +369,17 @@
// delay reset to a system thread
// to allow for end of operations that are in progress
reset_work = IoAllocateWorkItem( dev->pdev->p_self_do );
- if (!reset_work)
+ if (!reset_work) {
+ mlx4_err(dev, "mlx4_reset_request IoAllocateWorkItem failed, reset
will not be propagated\n");
return -EFAULT;
+ }
event_handler->rsrv_ptr = reset_work;
IoQueueWorkItem( reset_work, card_reset_wi, DelayedWorkQueue,
event_handler );
}
+ spin_unlock_irqrestore(&ibdev->event_handler_lock, flags);
+
+
return 0;
}
Index: mlx4/kernel/bus/net/cmd.c
===================================================================
--- mlx4/kernel/bus/net/cmd.c (revision 1993)
+++ mlx4/kernel/bus/net/cmd.c (working copy)
@@ -210,6 +210,7 @@
int err = 0;
u64 end;
u8 status;
+ long do_reset;
down(&priv->cmd.poll_sem);
@@ -224,6 +225,19 @@
if (cmd_pending(dev)) {
err = -ETIMEDOUT;
+
+ do_reset = InterlockedCompareExchange(&dev->reset_pending, 1, 0);
+ if (!do_reset) {
+ NTSTATUS status1;
+ status1 = mlx4_reset(dev);
+ if ( !NT_SUCCESS( status1 ) ) {
+ mlx4_err(dev, "Failed to reset HCA, aborting.(status %#x)\n",
status1);
+ }
+
+ dev->flags |= MLX4_FLAG_RESET_DRIVER; // bar the device
+ }
+ mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
+
goto out;
}
@@ -273,15 +287,14 @@
struct mlx4_cmd_context *context;
int err = 0;
u64 out_prm = out_param ? *out_param : 0;
+ long do_reset;
down(&cmd->event_sem);
-
- if ( dev->flags & MLX4_FLAG_CARD_IS_DEAD ) {
+ if ( dev->flags & MLX4_FLAG_RESET_DRIVER ) {
err = -EBUSY;
mlx4_warn(dev, "mlx4_cmd_wait: Command %02x is skipped because the
card is stuck \n", op);
goto exit;
}
-
spin_lock(&cmd->context_lock);
BUG_ON(cmd->free_head < 0);
context = &cmd->context[cmd->free_head];
@@ -302,8 +315,6 @@
mlx4_err(dev, "mlx4_cmd_wait: Command %02x completed with timeout
after %d msecs \n",
op, timeout);
- /* for debug purposes */
- ASSERT(FALSE);
/* for enabling busy-wait loop, add MLX4_FLAG_BUSY_WAIT (0x8000) to
dev->flags */
while (dev) {
u32 wait_ms =2000; /* wait interval in msecs */
@@ -311,10 +322,20 @@
break;
cl_thread_suspend( wait_ms );
}
+ do_reset = InterlockedCompareExchange(&dev->reset_pending, 1, 0);
+ if (!do_reset) {
+ NTSTATUS status = mlx4_reset(dev);
+ if ( !NT_SUCCESS( status ) ) {
+ mlx4_err(dev, "Failed to reset HCA, aborting.(status %#x)\n",
status);
+ }
+
+ dev->flags |= MLX4_FLAG_RESET_DRIVER; // bar the device
+ }
/* try to solve the problem */
- dev->flags |= MLX4_FLAG_RESET_DRIVER | MLX4_FLAG_CARD_IS_DEAD;
- mlx4_dispatch_reset_event(dev->pdev->ib_dev, IB_EVENT_RESET_DRIVER);
+ if (dev->pdev->ib_dev) {
+ mlx4_dispatch_reset_event(dev->pdev->ib_dev,
IB_EVENT_RESET_DRIVER);
+ }
}
}
@@ -331,7 +352,7 @@
cmd->free_head = (int)(context - cmd->context);
spin_unlock(&cmd->context_lock);
-exit:
+exit:
up(&cmd->event_sem);
return err;
}
@@ -420,8 +441,8 @@
out_is_imm, in_modifier, (int)op_modifier);
#endif
- if ( mlx4_is_barred(dev) )
- return -EFAULT;
+ if ( mlx4_is_barred(dev) )
+ return -EFAULT;
if (mlx4_priv(dev)->cmd.use_events)
return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm,
Index: mlx4/kernel/bus/net/cq.c
===================================================================
--- mlx4/kernel/bus/net/cq.c (revision 1993)
+++ mlx4/kernel/bus/net/cq.c (working copy)
@@ -258,8 +258,7 @@
if (atomic_dec_and_test(&cq->refcount))
complete(&cq->free);
- if (!mlx4_is_barred(dev))
- wait_for_completion(&cq->free);
+ wait_for_completion(&cq->free);
mlx4_table_put(dev, &cq_table->table, cq->cqn);
mlx4_bitmap_free(&cq_table->bitmap, cq->cqn);
Index: mlx4/kernel/bus/net/intf.c
===================================================================
--- mlx4/kernel/bus/net/intf.c (revision 1993)
+++ mlx4/kernel/bus/net/intf.c (working copy)
@@ -119,20 +119,21 @@
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_device_context *dev_ctx;
- spin_lock_dpc(&priv->ctx_lock);
+ spin_lock(&priv->ctx_lock);
list_for_each_entry(dev_ctx, &priv->ctx_list, list, struct
mlx4_device_context)
if (dev_ctx->intf->event)
dev_ctx->intf->event(dev, dev_ctx->context, type,
subtype, port);
- spin_unlock_dpc(&priv->ctx_lock);
+ spin_unlock(&priv->ctx_lock);
}
int mlx4_register_device(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf;
+ int err = 0;
mutex_lock(&intf_mutex);
@@ -142,9 +143,9 @@
mutex_unlock(&intf_mutex);
if (!mlx4_is_livefish(dev))
- mlx4_start_catas_poll(dev);
+ err = mlx4_start_catas_poll(dev);
- return 0;
+ return err;
}
void mlx4_unregister_device(struct mlx4_dev *dev)
Index: mlx4/kernel/bus/net/mlx4.h
===================================================================
--- mlx4/kernel/bus/net/mlx4.h (revision 1993)
+++ mlx4/kernel/bus/net/mlx4.h (working copy)
@@ -271,12 +271,12 @@
struct mlx4_catas_err {
u32 __iomem *map;
- struct list_head list;
/* Windows */
int stop;
KTIMER timer;
KDPC timer_dpc;
LARGE_INTEGER interval;
+ PIO_WORKITEM catas_work;
};
struct mlx4_mac_table {
@@ -375,7 +375,7 @@
void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
-void mlx4_start_catas_poll(struct mlx4_dev *dev);
+int mlx4_start_catas_poll(struct mlx4_dev *dev);
void mlx4_stop_catas_poll(struct mlx4_dev *dev);
int mlx4_restart_one(struct pci_dev *pdev);
int mlx4_register_device(struct mlx4_dev *dev);
@@ -434,6 +434,7 @@
void mlx4_remove_eq(struct mlx4_dev *dev, u8 eq_num);
+int mlx4_reset_ready( struct ib_event_handler *event_handler );
int mlx4_reset_execute( struct ib_event_handler *event_handler );
int mlx4_reset_request( struct ib_event_handler *event_handler );
Index: mlx4/kernel/bus/net/qp.c
===================================================================
--- mlx4/kernel/bus/net/qp.c (revision 1993)
+++ mlx4/kernel/bus/net/qp.c (working copy)
@@ -265,8 +265,7 @@
if (atomic_dec_and_test(&qp->refcount))
complete(&qp->free);
- if (!mlx4_is_barred(dev))
- wait_for_completion(&qp->free);
+ wait_for_completion(&qp->free);
mlx4_table_put(dev, &qp_table->cmpt_table, qp->qpn);
mlx4_table_put(dev, &qp_table->rdmarc_table, qp->qpn);
Index: mlx4/kernel/bus/net/srq.c
===================================================================
--- mlx4/kernel/bus/net/srq.c (revision 1993)
+++ mlx4/kernel/bus/net/srq.c (working copy)
@@ -218,8 +218,7 @@
if (atomic_dec_and_test(&srq->refcount))
complete(&srq->free);
- if (!mlx4_is_barred(dev))
- wait_for_completion(&srq->free);
+ wait_for_completion(&srq->free);
mlx4_table_put(dev, &srq_table->table, srq->srqn);
mlx4_bitmap_free(&srq_table->bitmap, srq->srqn);
Index: mlx4/kernel/inc/l2w.h
===================================================================
--- mlx4/kernel/inc/l2w.h (revision 1993)
+++ mlx4/kernel/inc/l2w.h (working copy)
@@ -334,7 +334,7 @@
static inline int mlx4_is_barred(struct mlx4_dev *dev)
{
- return dev->flags & (MLX4_FLAG_RESET_CLIENT | MLX4_FLAG_RESET_DRIVER);
+ return dev->flags & MLX4_FLAG_RESET_DRIVER;
}
static inline int mlx4_is_in_reset(struct mlx4_dev *dev)
Index: mlx4/kernel/inc/l2w_sync.h
===================================================================
--- mlx4/kernel/inc/l2w_sync.h (revision 1993)
+++ mlx4/kernel/inc/l2w_sync.h (working copy)
@@ -103,6 +103,7 @@
static inline int wait_for_completion_timeout( struct completion *
compl, unsigned long timeout )
{
LARGE_INTEGER interval;
+ ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
interval.QuadPart = (-10)* (__int64)timeout;
return (int)KeWaitForSingleObject( &compl->event, Executive,
KernelMode, FALSE, &interval );
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20090302/beb0cd61/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: hw_error.diff
Type: application/octet-stream
Size: 18283 bytes
Desc: hw_error.diff
URL: <http://lists.openfabrics.org/pipermail/ofw/attachments/20090302/beb0cd61/attachment.obj>
More information about the ofw
mailing list