[ewg] [PATCH] IB/iser: release host lock before error handling
Erez Zilber
erezz at voltaire.com
Sun Jun 10 07:03:01 PDT 2007
Tziporet,
Please accept this patch. It fixes a bug in an error scenario in open-iscsi. This bug can lead to a deadlock.
In RH4 kernels, the scsi host template eh callbacks are called with
the host lock held. The reason that scsi-ml took this lock is more
historical and this locking was removed later. the open-iscsi eh
callbacks must release this lock when entering the function and lock
it again when exiting the function.
Signed-off-by: Erez Zilber <erezz at voltaire.com>
---
.../2.6.9_U3/release_host_lock_before_eh.patch | 63 +++++++++++++++++++++++
.../2.6.9_U4/release_host_lock_before_eh.patch | 63 +++++++++++++++++++++++
.../2.6.9_U5/release_host_lock_before_eh.patch | 63 +++++++++++++++++++++++
3 files changed, 189 insertions(+), 0 deletions(-)
diff --git a/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch b/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch
new file mode 100644
index 0000000..475b3f5
--- /dev/null
+++ b/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch
@@ -0,0 +1,63 @@
+diff -rup linux-2.6.20/drivers/scsi/libiscsi.c linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
+--- linux-2.6.20/drivers/scsi/libiscsi.c 2007-02-04 20:44:54.000000000 +0200
++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c 2007-06-10 16:32:51.000000000 +0300
+@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
+ struct iscsi_conn *conn = session->leadconn;
+ int fail_session = 0;
+
++ spin_unlock_irq(host->host_lock);
+ spin_lock_bh(&session->lock);
+ if (session->state == ISCSI_STATE_TERMINATE) {
+ failed:
+ debug_scsi("failing host reset: session terminated "
+ "[CID %d age %d]\n", conn->id, session->age);
+ spin_unlock_bh(&session->lock);
++ spin_lock_irq(host->host_lock);
+ return FAILED;
+ }
+
+@@ -1009,6 +1011,7 @@ failed:
+ else
+ goto failed;
+ spin_unlock_bh(&session->lock);
++ spin_lock_irq(host->host_lock);
+
+ return SUCCESS;
+ }
+@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
+
+ int iscsi_eh_abort(struct scsi_cmnd *sc)
+ {
++ struct Scsi_Host *shost = sc->device->host;
+ struct iscsi_cmd_task *ctask;
+ struct iscsi_conn *conn;
+ struct iscsi_session *session;
+ int rc;
+
++ spin_unlock_irq(shost->host_lock);
+ /*
+ * if session was ISCSI_STATE_IN_RECOVERY then we may not have
+ * got the command.
+ */
+ if (!sc->SCp.ptr) {
+ debug_scsi("sc never reached iscsi layer or it completed.\n");
++ spin_lock_irq(shost->host_lock);
+ return SUCCESS;
+ }
+
+@@ -1257,6 +1263,7 @@ success_cleanup:
+
+ success_rel_mutex:
+ mutex_unlock(&conn->xmitmutex);
++ spin_lock_irq(shost->host_lock);
+ return SUCCESS;
+
+ failed:
+@@ -1264,6 +1271,7 @@ failed:
+ mutex_unlock(&conn->xmitmutex);
+
+ debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
++ spin_lock_irq(shost->host_lock);
+ return FAILED;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_eh_abort);
diff --git a/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch b/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch
new file mode 100644
index 0000000..475b3f5
--- /dev/null
+++ b/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch
@@ -0,0 +1,63 @@
+diff -rup linux-2.6.20/drivers/scsi/libiscsi.c linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
+--- linux-2.6.20/drivers/scsi/libiscsi.c 2007-02-04 20:44:54.000000000 +0200
++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c 2007-06-10 16:32:51.000000000 +0300
+@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
+ struct iscsi_conn *conn = session->leadconn;
+ int fail_session = 0;
+
++ spin_unlock_irq(host->host_lock);
+ spin_lock_bh(&session->lock);
+ if (session->state == ISCSI_STATE_TERMINATE) {
+ failed:
+ debug_scsi("failing host reset: session terminated "
+ "[CID %d age %d]\n", conn->id, session->age);
+ spin_unlock_bh(&session->lock);
++ spin_lock_irq(host->host_lock);
+ return FAILED;
+ }
+
+@@ -1009,6 +1011,7 @@ failed:
+ else
+ goto failed;
+ spin_unlock_bh(&session->lock);
++ spin_lock_irq(host->host_lock);
+
+ return SUCCESS;
+ }
+@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
+
+ int iscsi_eh_abort(struct scsi_cmnd *sc)
+ {
++ struct Scsi_Host *shost = sc->device->host;
+ struct iscsi_cmd_task *ctask;
+ struct iscsi_conn *conn;
+ struct iscsi_session *session;
+ int rc;
+
++ spin_unlock_irq(shost->host_lock);
+ /*
+ * if session was ISCSI_STATE_IN_RECOVERY then we may not have
+ * got the command.
+ */
+ if (!sc->SCp.ptr) {
+ debug_scsi("sc never reached iscsi layer or it completed.\n");
++ spin_lock_irq(shost->host_lock);
+ return SUCCESS;
+ }
+
+@@ -1257,6 +1263,7 @@ success_cleanup:
+
+ success_rel_mutex:
+ mutex_unlock(&conn->xmitmutex);
++ spin_lock_irq(shost->host_lock);
+ return SUCCESS;
+
+ failed:
+@@ -1264,6 +1271,7 @@ failed:
+ mutex_unlock(&conn->xmitmutex);
+
+ debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
++ spin_lock_irq(shost->host_lock);
+ return FAILED;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_eh_abort);
diff --git a/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch b/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch
new file mode 100644
index 0000000..475b3f5
--- /dev/null
+++ b/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch
@@ -0,0 +1,63 @@
+diff -rup linux-2.6.20/drivers/scsi/libiscsi.c linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
+--- linux-2.6.20/drivers/scsi/libiscsi.c 2007-02-04 20:44:54.000000000 +0200
++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c 2007-06-10 16:32:51.000000000 +0300
+@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
+ struct iscsi_conn *conn = session->leadconn;
+ int fail_session = 0;
+
++ spin_unlock_irq(host->host_lock);
+ spin_lock_bh(&session->lock);
+ if (session->state == ISCSI_STATE_TERMINATE) {
+ failed:
+ debug_scsi("failing host reset: session terminated "
+ "[CID %d age %d]\n", conn->id, session->age);
+ spin_unlock_bh(&session->lock);
++ spin_lock_irq(host->host_lock);
+ return FAILED;
+ }
+
+@@ -1009,6 +1011,7 @@ failed:
+ else
+ goto failed;
+ spin_unlock_bh(&session->lock);
++ spin_lock_irq(host->host_lock);
+
+ return SUCCESS;
+ }
+@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
+
+ int iscsi_eh_abort(struct scsi_cmnd *sc)
+ {
++ struct Scsi_Host *shost = sc->device->host;
+ struct iscsi_cmd_task *ctask;
+ struct iscsi_conn *conn;
+ struct iscsi_session *session;
+ int rc;
+
++ spin_unlock_irq(shost->host_lock);
+ /*
+ * if session was ISCSI_STATE_IN_RECOVERY then we may not have
+ * got the command.
+ */
+ if (!sc->SCp.ptr) {
+ debug_scsi("sc never reached iscsi layer or it completed.\n");
++ spin_lock_irq(shost->host_lock);
+ return SUCCESS;
+ }
+
+@@ -1257,6 +1263,7 @@ success_cleanup:
+
+ success_rel_mutex:
+ mutex_unlock(&conn->xmitmutex);
++ spin_lock_irq(shost->host_lock);
+ return SUCCESS;
+
+ failed:
+@@ -1264,6 +1271,7 @@ failed:
+ mutex_unlock(&conn->xmitmutex);
+
+ debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
++ spin_lock_irq(shost->host_lock);
+ return FAILED;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_eh_abort);
--
1.4.2
--
____________________________________________________________
Erez Zilber | 972-9-971-7689
Software Engineer, Storage Team
Voltaire – _The Grid Backbone_
__
www.voltaire.com <http://www.voltaire.com/>
<mailto:erezz at voltaire.com>
More information about the ewg
mailing list