[ewg] [PATCH] IB/iser: release host lock before error handling

Erez Zilber erezz at voltaire.com
Sun Jun 10 07:03:01 PDT 2007


Tziporet,

Please accept this patch. It fixes a bug in an error scenario in open-iscsi. This bug can lead to a deadlock.

In RH4 kernels, the scsi host template eh callbacks are called with
the host lock held. The reason that scsi-ml took this lock is more
historical and this locking was removed later. the open-iscsi eh
callbacks must release this lock when entering the function and lock
it again when exiting the function.

Signed-off-by: Erez Zilber <erezz at voltaire.com>
---
 .../2.6.9_U3/release_host_lock_before_eh.patch     |   63 +++++++++++++++++++++++
 .../2.6.9_U4/release_host_lock_before_eh.patch     |   63 +++++++++++++++++++++++
 .../2.6.9_U5/release_host_lock_before_eh.patch     |   63 +++++++++++++++++++++++
 3 files changed, 189 insertions(+), 0 deletions(-)

diff --git a/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch b/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch
new file mode 100644
index 0000000..475b3f5
--- /dev/null
+++ b/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch
@@ -0,0 +1,63 @@
+diff -rup linux-2.6.20/drivers/scsi/libiscsi.c linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
+--- linux-2.6.20/drivers/scsi/libiscsi.c	2007-02-04 20:44:54.000000000 +0200
++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c	2007-06-10 16:32:51.000000000 +0300
+@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
+ 	struct iscsi_conn *conn = session->leadconn;
+ 	int fail_session = 0;
+ 
++	spin_unlock_irq(host->host_lock);
+ 	spin_lock_bh(&session->lock);
+ 	if (session->state == ISCSI_STATE_TERMINATE) {
+ failed:
+ 		debug_scsi("failing host reset: session terminated "
+ 			   "[CID %d age %d]\n", conn->id, session->age);
+ 		spin_unlock_bh(&session->lock);
++		spin_lock_irq(host->host_lock);
+ 		return FAILED;
+ 	}
+ 
+@@ -1009,6 +1011,7 @@ failed:
+ 	else
+ 		goto failed;
+ 	spin_unlock_bh(&session->lock);
++	spin_lock_irq(host->host_lock);
+ 
+ 	return SUCCESS;
+ }
+@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
+ 
+ int iscsi_eh_abort(struct scsi_cmnd *sc)
+ {
++	struct Scsi_Host *shost = sc->device->host;
+ 	struct iscsi_cmd_task *ctask;
+ 	struct iscsi_conn *conn;
+ 	struct iscsi_session *session;
+ 	int rc;
+ 
++	spin_unlock_irq(shost->host_lock);
+ 	/*
+ 	 * if session was ISCSI_STATE_IN_RECOVERY then we may not have
+ 	 * got the command.
+ 	 */
+ 	if (!sc->SCp.ptr) {
+ 		debug_scsi("sc never reached iscsi layer or it completed.\n");
++		spin_lock_irq(shost->host_lock);
+ 		return SUCCESS;
+ 	}
+ 
+@@ -1257,6 +1263,7 @@ success_cleanup:
+ 
+ success_rel_mutex:
+ 	mutex_unlock(&conn->xmitmutex);
++	spin_lock_irq(shost->host_lock);
+ 	return SUCCESS;
+ 
+ failed:
+@@ -1264,6 +1271,7 @@ failed:
+ 	mutex_unlock(&conn->xmitmutex);
+ 
+ 	debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
++	spin_lock_irq(shost->host_lock);
+ 	return FAILED;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_eh_abort);
diff --git a/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch b/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch
new file mode 100644
index 0000000..475b3f5
--- /dev/null
+++ b/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch
@@ -0,0 +1,63 @@
+diff -rup linux-2.6.20/drivers/scsi/libiscsi.c linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
+--- linux-2.6.20/drivers/scsi/libiscsi.c	2007-02-04 20:44:54.000000000 +0200
++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c	2007-06-10 16:32:51.000000000 +0300
+@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
+ 	struct iscsi_conn *conn = session->leadconn;
+ 	int fail_session = 0;
+ 
++	spin_unlock_irq(host->host_lock);
+ 	spin_lock_bh(&session->lock);
+ 	if (session->state == ISCSI_STATE_TERMINATE) {
+ failed:
+ 		debug_scsi("failing host reset: session terminated "
+ 			   "[CID %d age %d]\n", conn->id, session->age);
+ 		spin_unlock_bh(&session->lock);
++		spin_lock_irq(host->host_lock);
+ 		return FAILED;
+ 	}
+ 
+@@ -1009,6 +1011,7 @@ failed:
+ 	else
+ 		goto failed;
+ 	spin_unlock_bh(&session->lock);
++	spin_lock_irq(host->host_lock);
+ 
+ 	return SUCCESS;
+ }
+@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
+ 
+ int iscsi_eh_abort(struct scsi_cmnd *sc)
+ {
++	struct Scsi_Host *shost = sc->device->host;
+ 	struct iscsi_cmd_task *ctask;
+ 	struct iscsi_conn *conn;
+ 	struct iscsi_session *session;
+ 	int rc;
+ 
++	spin_unlock_irq(shost->host_lock);
+ 	/*
+ 	 * if session was ISCSI_STATE_IN_RECOVERY then we may not have
+ 	 * got the command.
+ 	 */
+ 	if (!sc->SCp.ptr) {
+ 		debug_scsi("sc never reached iscsi layer or it completed.\n");
++		spin_lock_irq(shost->host_lock);
+ 		return SUCCESS;
+ 	}
+ 
+@@ -1257,6 +1263,7 @@ success_cleanup:
+ 
+ success_rel_mutex:
+ 	mutex_unlock(&conn->xmitmutex);
++	spin_lock_irq(shost->host_lock);
+ 	return SUCCESS;
+ 
+ failed:
+@@ -1264,6 +1271,7 @@ failed:
+ 	mutex_unlock(&conn->xmitmutex);
+ 
+ 	debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
++	spin_lock_irq(shost->host_lock);
+ 	return FAILED;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_eh_abort);
diff --git a/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch b/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch
new file mode 100644
index 0000000..475b3f5
--- /dev/null
+++ b/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch
@@ -0,0 +1,63 @@
+diff -rup linux-2.6.20/drivers/scsi/libiscsi.c linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
+--- linux-2.6.20/drivers/scsi/libiscsi.c	2007-02-04 20:44:54.000000000 +0200
++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c	2007-06-10 16:32:51.000000000 +0300
+@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
+ 	struct iscsi_conn *conn = session->leadconn;
+ 	int fail_session = 0;
+ 
++	spin_unlock_irq(host->host_lock);
+ 	spin_lock_bh(&session->lock);
+ 	if (session->state == ISCSI_STATE_TERMINATE) {
+ failed:
+ 		debug_scsi("failing host reset: session terminated "
+ 			   "[CID %d age %d]\n", conn->id, session->age);
+ 		spin_unlock_bh(&session->lock);
++		spin_lock_irq(host->host_lock);
+ 		return FAILED;
+ 	}
+ 
+@@ -1009,6 +1011,7 @@ failed:
+ 	else
+ 		goto failed;
+ 	spin_unlock_bh(&session->lock);
++	spin_lock_irq(host->host_lock);
+ 
+ 	return SUCCESS;
+ }
+@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
+ 
+ int iscsi_eh_abort(struct scsi_cmnd *sc)
+ {
++	struct Scsi_Host *shost = sc->device->host;
+ 	struct iscsi_cmd_task *ctask;
+ 	struct iscsi_conn *conn;
+ 	struct iscsi_session *session;
+ 	int rc;
+ 
++	spin_unlock_irq(shost->host_lock);
+ 	/*
+ 	 * if session was ISCSI_STATE_IN_RECOVERY then we may not have
+ 	 * got the command.
+ 	 */
+ 	if (!sc->SCp.ptr) {
+ 		debug_scsi("sc never reached iscsi layer or it completed.\n");
++		spin_lock_irq(shost->host_lock);
+ 		return SUCCESS;
+ 	}
+ 
+@@ -1257,6 +1263,7 @@ success_cleanup:
+ 
+ success_rel_mutex:
+ 	mutex_unlock(&conn->xmitmutex);
++	spin_lock_irq(shost->host_lock);
+ 	return SUCCESS;
+ 
+ failed:
+@@ -1264,6 +1271,7 @@ failed:
+ 	mutex_unlock(&conn->xmitmutex);
+ 
+ 	debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
++	spin_lock_irq(shost->host_lock);
+ 	return FAILED;
+ }
+ EXPORT_SYMBOL_GPL(iscsi_eh_abort);
-- 
1.4.2

-- 
____________________________________________________________

Erez Zilber   |  972-9-971-7689

Software Engineer, Storage Team

Voltaire – _The Grid Backbone_

 __

 www.voltaire.com <http://www.voltaire.com/>

<mailto:erezz at voltaire.com>

  





More information about the ewg mailing list