[openib-general] [PATCH] IB/mthca: recover from device errors

Michael S. Tsirkin mst at mellanox.co.il
Tue Aug 15 11:11:18 PDT 2006


Hello, Roland!
The following makes it possible to recover from catastrophic
errors through device reset. Could you please queue it for 2.6.19?

Implementation detail:

Catastrophic event device reset. Implemented via a fatal list, in
which device objects are queued for resetting.  A spinlock guarantees
list insertion/deletion protection, while a mutex guarantees
that we don't perform device resets while a device
add/remove operation is in progress (and vice versa).
Added a workqueue to the mthca driver to perform the reset in a
thread context.

--

Trigger devie remove and then add once a catastrophic error was
detected in hardware.  This, in turn, will cause a device
reset typically recovering from the catastrophic condition.

Since this might interefere with debugging the error root
cause, add a module option to suppress this behaviour.

Signed-off-by: Jack Morgenstein <jackm at mellanox.co.il>
Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>

Index: ofed_1_1/drivers/infiniband/hw/mthca/mthca_catas.c
===================================================================
--- ofed_1_1.orig/drivers/infiniband/hw/mthca/mthca_catas.c	2006-08-03 14:30:21.645701000 +0300
+++ ofed_1_1/drivers/infiniband/hw/mthca/mthca_catas.c	2006-08-10 16:46:57.418864000 +0300
@@ -34,6 +34,7 @@
 
 #include <linux/jiffies.h>
 #include <linux/timer.h>
+#include <linux/workqueue.h>
 
 #include "mthca_dev.h"
 
@@ -48,9 +49,42 @@ enum {
 
 static DEFINE_SPINLOCK(catas_lock);
 
+static struct workqueue_struct *catas_wq;
+static struct list_head catas_list;
+static struct work_struct catas_work;
+
+static int catas_reset_disable = 0;
+module_param_named(catas_reset_disable, catas_reset_disable, int, 0644);
+MODULE_PARM_DESC(catas_reset_disable, "disable reset on catastrophic event if > 0");
+
+static void catas_reset(void *work_ptr)
+{
+	struct mthca_dev *dev, *tmpdev;
+	LIST_HEAD(local_catas);
+	unsigned long flags;
+	int rc;
+
+	mutex_lock(&mthca_device_mutex);
+
+	spin_lock_irqsave(&catas_lock, flags);
+	list_for_each_entry_safe(dev, tmpdev, &catas_list, catas_err.list)
+		list_move_tail(&dev->catas_err.list, &local_catas);
+	spin_unlock_irqrestore(&catas_lock, flags);
+
+	list_for_each_entry_safe(dev, tmpdev, &local_catas, catas_err.list) {
+		rc = mthca_restart_one(dev->pdev);
+		if (rc)
+			mthca_err(dev, "Reset failed (%d)\n", rc);
+		else
+			mthca_dbg(dev, "Reset succeeded\n");
+	}
+	mutex_unlock(&mthca_device_mutex);
+}
+
 static void handle_catas(struct mthca_dev *dev)
 {
 	struct ib_event event;
+	unsigned long flags;
 	const char *type;
 	int i;
 
@@ -82,6 +116,14 @@ static void handle_catas(struct mthca_de
 	for (i = 0; i < dev->catas_err.size; ++i)
 		mthca_err(dev, "  buf[%02x]: %08x\n",
 			  i, swab32(readl(dev->catas_err.map + i)));
+
+	if (catas_reset_disable)
+		return;
+
+	spin_lock_irqsave(&catas_lock, flags);
+	list_add(&dev->catas_err.list, &catas_list);
+	queue_work(catas_wq, &catas_work);
+	spin_unlock_irqrestore(&catas_lock, flags);
 }
 
 static void poll_catas(unsigned long dev_ptr)
@@ -135,11 +177,14 @@ void mthca_start_catas_poll(struct mthca
 	dev->catas_err.timer.data     = (unsigned long) dev;
 	dev->catas_err.timer.function = poll_catas;
 	dev->catas_err.timer.expires  = jiffies + MTHCA_CATAS_POLL_INTERVAL;
+	INIT_LIST_HEAD(&dev->catas_err.list);
 	add_timer(&dev->catas_err.timer);
 }
 
 void mthca_stop_catas_poll(struct mthca_dev *dev)
 {
+	unsigned long flags;
+
 	spin_lock_irq(&catas_lock);
 	dev->catas_err.stop = 1;
 	spin_unlock_irq(&catas_lock);
@@ -153,4 +198,23 @@ void mthca_stop_catas_poll(struct mthca_
 				    dev->catas_err.addr),
 				   dev->catas_err.size * 4);
 	}
+
+	spin_lock_irqsave(&catas_lock, flags);
+	list_del(&dev->catas_err.list);
+	spin_unlock_irqrestore(&catas_lock, flags);
+}
+
+int __init mthca_catas_init(void)
+{
+	INIT_LIST_HEAD(&catas_list);
+	INIT_WORK(&catas_work, catas_reset, NULL);
+	catas_wq = create_singlethread_workqueue("mthcacatas");
+	if (!catas_wq)
+		return -ENOMEM;
+	return 0;
+}
+
+void mthca_catas_cleanup(void)
+{
+	destroy_workqueue(catas_wq);
 }
Index: ofed_1_1/drivers/infiniband/hw/mthca/mthca_main.c
===================================================================
--- ofed_1_1.orig/drivers/infiniband/hw/mthca/mthca_main.c	2006-08-03 14:30:21.747701000 +0300
+++ ofed_1_1/drivers/infiniband/hw/mthca/mthca_main.c	2006-08-10 16:46:16.770946000 +0300
@@ -80,6 +80,8 @@ static int tune_pci = 0;
 module_param(tune_pci, int, 0444);
 MODULE_PARM_DESC(tune_pci, "increase PCI burst from the default set by BIOS if nonzero");
 
+struct mutex mthca_device_mutex;
+
 static const char mthca_version[] __devinitdata =
 	DRV_NAME ": Mellanox InfiniBand HCA driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -978,28 +980,15 @@ static struct {
 					MTHCA_FLAG_SINAI_OPT }
 };
 
-static int __devinit mthca_init_one(struct pci_dev *pdev,
-				    const struct pci_device_id *id)
+static int __mthca_init_one(struct pci_dev *pdev, int hca_type)
 {
-	static int mthca_version_printed = 0;
 	int ddr_hidden = 0;
 	int err;
 	struct mthca_dev *mdev;
 
-	if (!mthca_version_printed) {
-		printk(KERN_INFO "%s", mthca_version);
-		++mthca_version_printed;
-	}
-
 	printk(KERN_INFO PFX "Initializing %s\n",
 	       pci_name(pdev));
 
-	if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
-		printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
-		       pci_name(pdev), id->driver_data);
-		return -ENODEV;
-	}
-
 	err = pci_enable_device(pdev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot enable PCI device, "
@@ -1065,7 +1054,7 @@ static int __devinit mthca_init_one(stru
 
 	mdev->pdev = pdev;
 
-	mdev->mthca_flags = mthca_hca_table[id->driver_data].flags;
+	mdev->mthca_flags = mthca_hca_table[hca_type].flags;
 	if (ddr_hidden)
 		mdev->mthca_flags |= MTHCA_FLAG_DDR_HIDDEN;
 
@@ -1099,13 +1088,13 @@ static int __devinit mthca_init_one(stru
 	if (err)
 		goto err_cmd;
 
-	if (mdev->fw_ver < mthca_hca_table[id->driver_data].latest_fw) {
+	if (mdev->fw_ver < mthca_hca_table[hca_type].latest_fw) {
 		mthca_warn(mdev, "HCA FW version %d.%d.%d is old (%d.%d.%d is current).\n",
 			   (int) (mdev->fw_ver >> 32), (int) (mdev->fw_ver >> 16) & 0xffff,
 			   (int) (mdev->fw_ver & 0xffff),
-			   (int) (mthca_hca_table[id->driver_data].latest_fw >> 32),
-			   (int) (mthca_hca_table[id->driver_data].latest_fw >> 16) & 0xffff,
-			   (int) (mthca_hca_table[id->driver_data].latest_fw & 0xffff));
+			   (int) (mthca_hca_table[hca_type].latest_fw >> 32),
+			   (int) (mthca_hca_table[hca_type].latest_fw >> 16) & 0xffff,
+			   (int) (mthca_hca_table[hca_type].latest_fw & 0xffff));
 		mthca_warn(mdev, "If you have problems, try updating your HCA FW.\n");
 	}
 
@@ -1122,6 +1111,7 @@ static int __devinit mthca_init_one(stru
 		goto err_unregister;
 
 	pci_set_drvdata(pdev, mdev);
+	mdev->hca_type = hca_type;
 
 	return 0;
 
@@ -1166,7 +1156,7 @@ err_disable_pdev:
 	return err;
 }
 
-static void __devexit mthca_remove_one(struct pci_dev *pdev)
+static void __mthca_remove_one(struct pci_dev *pdev)
 {
 	struct mthca_dev *mdev = pci_get_drvdata(pdev);
 	u8 status;
@@ -1211,6 +1201,49 @@ static void __devexit mthca_remove_one(s
 	}
 }
 
+static int __devinit mthca_init_one(struct pci_dev *pdev,
+			     const struct pci_device_id *id)
+{
+	static int mthca_version_printed = 0;
+	int rc;
+
+	mutex_lock(&mthca_device_mutex);
+	if (!mthca_version_printed) {
+		printk(KERN_INFO "%s", mthca_version);
+		++mthca_version_printed;
+	}
+
+	if (id->driver_data >= ARRAY_SIZE(mthca_hca_table)) {
+		printk(KERN_ERR PFX "%s has invalid driver data %lx\n",
+		       pci_name(pdev), id->driver_data);
+		mutex_unlock(&mthca_device_mutex);
+		return -ENODEV;
+	}
+
+	rc = __mthca_init_one(pdev, id->driver_data);
+	mutex_unlock(&mthca_device_mutex);
+	return rc;
+}
+
+static void __devexit mthca_remove_one(struct pci_dev *pdev)
+{
+	mutex_lock(&mthca_device_mutex);
+	__mthca_remove_one(pdev);
+	mutex_unlock(&mthca_device_mutex);
+	return;
+}
+
+int mthca_restart_one(struct pci_dev *pdev)
+{
+	struct mthca_dev *mdev;
+
+	mdev = pci_get_drvdata(pdev);
+	if (!mdev)
+		return -ENODEV;
+	__mthca_remove_one(pdev);
+	return __mthca_init_one(pdev, mdev->hca_type);
+}
+
 static struct pci_device_id mthca_pci_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR),
 	  .driver_data = TAVOR },
@@ -1248,13 +1281,22 @@ static int __init mthca_init(void)
 {
 	int ret;
 
+	mutex_init(&mthca_device_mutex);
+	if (mthca_catas_init())
+		return -ENOMEM;
+
 	ret = pci_register_driver(&mthca_driver);
-	return ret < 0 ? ret : 0;
+	if (ret < 0) {
+		mthca_catas_cleanup();
+		return ret;
+	}
+	return 0;
 }
 
 static void __exit mthca_cleanup(void)
 {
 	pci_unregister_driver(&mthca_driver);
+	mthca_catas_cleanup();
 }
 
 module_init(mthca_init);
Index: ofed_1_1/drivers/infiniband/hw/mthca/mthca_dev.h
===================================================================
--- ofed_1_1.orig/drivers/infiniband/hw/mthca/mthca_dev.h	2006-08-03 14:30:21.706704000 +0300
+++ ofed_1_1/drivers/infiniband/hw/mthca/mthca_dev.h	2006-08-10 16:47:05.666648000 +0300
@@ -45,6 +45,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/timer.h>
 #include <linux/mutex.h>
+#include <linux/list.h>
 
 #include <asm/semaphore.h>
 
@@ -283,8 +284,11 @@ struct mthca_catas_err {
 	unsigned long		stop;
 	u32			size;
 	struct timer_list	timer;
+	struct list_head	list;
 };
 
+extern struct mutex mthca_device_mutex;
+
 struct mthca_dev {
 	struct ib_device  ib_dev;
 	struct pci_dev   *pdev;
@@ -450,6 +454,9 @@ void mthca_unregister_device(struct mthc
 
 void mthca_start_catas_poll(struct mthca_dev *dev);
 void mthca_stop_catas_poll(struct mthca_dev *dev);
+int mthca_restart_one(struct pci_dev *pdev);
+int mthca_catas_init(void);
+void mthca_catas_cleanup(void);
 
 int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar);
 void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar);

-- 
MST




More information about the general mailing list