[ewg] RE: [PATCH] IB/iser: release host lock before error handling

Vladimir Sokolovsky vlad at dev.mellanox.co.il
Sun Jun 10 07:27:08 PDT 2007


Applied,

Regards,
Vladimir

Tziporet Koren wrote:
> Approved - Vlad please apply
> Tziporet 
> 
> -----Original Message-----
> From: Erez Zilber [mailto:erezz at voltaire.com] 
> Sent: Sunday, June 10, 2007 5:03 PM
> To: Tziporet Koren; Vladimir Sokolovsky
> Cc: ewg at lists.openfabrics.org; open-iscsi at googlegroups.com; Mike
> Christie
> Subject: [PATCH] IB/iser: release host lock before error handling
> 
> Tziporet,
> 
> Please accept this patch. It fixes a bug in an error scenario in
> open-iscsi. This bug can lead to a deadlock.
> 
> In RH4 kernels, the scsi host template eh callbacks are called with
> the host lock held. The reason that scsi-ml took this lock is more
> historical and this locking was removed later. the open-iscsi eh
> callbacks must release this lock when entering the function and lock
> it again when exiting the function.
> 
> Signed-off-by: Erez Zilber <erezz at voltaire.com>
> ---
>  .../2.6.9_U3/release_host_lock_before_eh.patch     |   63
> +++++++++++++++++++++++
>  .../2.6.9_U4/release_host_lock_before_eh.patch     |   63
> +++++++++++++++++++++++
>  .../2.6.9_U5/release_host_lock_before_eh.patch     |   63
> +++++++++++++++++++++++
>  3 files changed, 189 insertions(+), 0 deletions(-)
> 
> diff --git
> a/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch
> b/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch
> new file mode 100644
> index 0000000..475b3f5
> --- /dev/null
> +++ b/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch
> @@ -0,0 +1,63 @@
> +diff -rup linux-2.6.20/drivers/scsi/libiscsi.c
> linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
> +--- linux-2.6.20/drivers/scsi/libiscsi.c	2007-02-04
> 20:44:54.000000000 +0200
> ++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c	2007-06-10
> 16:32:51.000000000 +0300
> +@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
> + 	struct iscsi_conn *conn = session->leadconn;
> + 	int fail_session = 0;
> + 
> ++	spin_unlock_irq(host->host_lock);
> + 	spin_lock_bh(&session->lock);
> + 	if (session->state == ISCSI_STATE_TERMINATE) {
> + failed:
> + 		debug_scsi("failing host reset: session terminated "
> + 			   "[CID %d age %d]\n", conn->id, session->age);
> + 		spin_unlock_bh(&session->lock);
> ++		spin_lock_irq(host->host_lock);
> + 		return FAILED;
> + 	}
> + 
> +@@ -1009,6 +1011,7 @@ failed:
> + 	else
> + 		goto failed;
> + 	spin_unlock_bh(&session->lock);
> ++	spin_lock_irq(host->host_lock);
> + 
> + 	return SUCCESS;
> + }
> +@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
> + 
> + int iscsi_eh_abort(struct scsi_cmnd *sc)
> + {
> ++	struct Scsi_Host *shost = sc->device->host;
> + 	struct iscsi_cmd_task *ctask;
> + 	struct iscsi_conn *conn;
> + 	struct iscsi_session *session;
> + 	int rc;
> + 
> ++	spin_unlock_irq(shost->host_lock);
> + 	/*
> + 	 * if session was ISCSI_STATE_IN_RECOVERY then we may not have
> + 	 * got the command.
> + 	 */
> + 	if (!sc->SCp.ptr) {
> + 		debug_scsi("sc never reached iscsi layer or it
> completed.\n");
> ++		spin_lock_irq(shost->host_lock);
> + 		return SUCCESS;
> + 	}
> + 
> +@@ -1257,6 +1263,7 @@ success_cleanup:
> + 
> + success_rel_mutex:
> + 	mutex_unlock(&conn->xmitmutex);
> ++	spin_lock_irq(shost->host_lock);
> + 	return SUCCESS;
> + 
> + failed:
> +@@ -1264,6 +1271,7 @@ failed:
> + 	mutex_unlock(&conn->xmitmutex);
> + 
> + 	debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc,
> ctask->itt);
> ++	spin_lock_irq(shost->host_lock);
> + 	return FAILED;
> + }
> + EXPORT_SYMBOL_GPL(iscsi_eh_abort);
> diff --git
> a/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch
> b/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch
> new file mode 100644
> index 0000000..475b3f5
> --- /dev/null
> +++ b/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch
> @@ -0,0 +1,63 @@
> +diff -rup linux-2.6.20/drivers/scsi/libiscsi.c
> linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
> +--- linux-2.6.20/drivers/scsi/libiscsi.c	2007-02-04
> 20:44:54.000000000 +0200
> ++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c	2007-06-10
> 16:32:51.000000000 +0300
> +@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
> + 	struct iscsi_conn *conn = session->leadconn;
> + 	int fail_session = 0;
> + 
> ++	spin_unlock_irq(host->host_lock);
> + 	spin_lock_bh(&session->lock);
> + 	if (session->state == ISCSI_STATE_TERMINATE) {
> + failed:
> + 		debug_scsi("failing host reset: session terminated "
> + 			   "[CID %d age %d]\n", conn->id, session->age);
> + 		spin_unlock_bh(&session->lock);
> ++		spin_lock_irq(host->host_lock);
> + 		return FAILED;
> + 	}
> + 
> +@@ -1009,6 +1011,7 @@ failed:
> + 	else
> + 		goto failed;
> + 	spin_unlock_bh(&session->lock);
> ++	spin_lock_irq(host->host_lock);
> + 
> + 	return SUCCESS;
> + }
> +@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
> + 
> + int iscsi_eh_abort(struct scsi_cmnd *sc)
> + {
> ++	struct Scsi_Host *shost = sc->device->host;
> + 	struct iscsi_cmd_task *ctask;
> + 	struct iscsi_conn *conn;
> + 	struct iscsi_session *session;
> + 	int rc;
> + 
> ++	spin_unlock_irq(shost->host_lock);
> + 	/*
> + 	 * if session was ISCSI_STATE_IN_RECOVERY then we may not have
> + 	 * got the command.
> + 	 */
> + 	if (!sc->SCp.ptr) {
> + 		debug_scsi("sc never reached iscsi layer or it
> completed.\n");
> ++		spin_lock_irq(shost->host_lock);
> + 		return SUCCESS;
> + 	}
> + 
> +@@ -1257,6 +1263,7 @@ success_cleanup:
> + 
> + success_rel_mutex:
> + 	mutex_unlock(&conn->xmitmutex);
> ++	spin_lock_irq(shost->host_lock);
> + 	return SUCCESS;
> + 
> + failed:
> +@@ -1264,6 +1271,7 @@ failed:
> + 	mutex_unlock(&conn->xmitmutex);
> + 
> + 	debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc,
> ctask->itt);
> ++	spin_lock_irq(shost->host_lock);
> + 	return FAILED;
> + }
> + EXPORT_SYMBOL_GPL(iscsi_eh_abort);
> diff --git
> a/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch
> b/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch
> new file mode 100644
> index 0000000..475b3f5
> --- /dev/null
> +++ b/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch
> @@ -0,0 +1,63 @@
> +diff -rup linux-2.6.20/drivers/scsi/libiscsi.c
> linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
> +--- linux-2.6.20/drivers/scsi/libiscsi.c	2007-02-04
> 20:44:54.000000000 +0200
> ++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c	2007-06-10
> 16:32:51.000000000 +0300
> +@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
> + 	struct iscsi_conn *conn = session->leadconn;
> + 	int fail_session = 0;
> + 
> ++	spin_unlock_irq(host->host_lock);
> + 	spin_lock_bh(&session->lock);
> + 	if (session->state == ISCSI_STATE_TERMINATE) {
> + failed:
> + 		debug_scsi("failing host reset: session terminated "
> + 			   "[CID %d age %d]\n", conn->id, session->age);
> + 		spin_unlock_bh(&session->lock);
> ++		spin_lock_irq(host->host_lock);
> + 		return FAILED;
> + 	}
> + 
> +@@ -1009,6 +1011,7 @@ failed:
> + 	else
> + 		goto failed;
> + 	spin_unlock_bh(&session->lock);
> ++	spin_lock_irq(host->host_lock);
> + 
> + 	return SUCCESS;
> + }
> +@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
> + 
> + int iscsi_eh_abort(struct scsi_cmnd *sc)
> + {
> ++	struct Scsi_Host *shost = sc->device->host;
> + 	struct iscsi_cmd_task *ctask;
> + 	struct iscsi_conn *conn;
> + 	struct iscsi_session *session;
> + 	int rc;
> + 
> ++	spin_unlock_irq(shost->host_lock);
> + 	/*
> + 	 * if session was ISCSI_STATE_IN_RECOVERY then we may not have
> + 	 * got the command.
> + 	 */
> + 	if (!sc->SCp.ptr) {
> + 		debug_scsi("sc never reached iscsi layer or it
> completed.\n");
> ++		spin_lock_irq(shost->host_lock);
> + 		return SUCCESS;
> + 	}
> + 
> +@@ -1257,6 +1263,7 @@ success_cleanup:
> + 
> + success_rel_mutex:
> + 	mutex_unlock(&conn->xmitmutex);
> ++	spin_lock_irq(shost->host_lock);
> + 	return SUCCESS;
> + 
> + failed:
> +@@ -1264,6 +1271,7 @@ failed:
> + 	mutex_unlock(&conn->xmitmutex);
> + 
> + 	debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc,
> ctask->itt);
> ++	spin_lock_irq(shost->host_lock);
> + 	return FAILED;
> + }
> + EXPORT_SYMBOL_GPL(iscsi_eh_abort);




More information about the ewg mailing list