[ewg] RE: [PATCH] IB/iser: release host lock before error handling
Vladimir Sokolovsky
vlad at dev.mellanox.co.il
Sun Jun 10 07:27:08 PDT 2007
Applied,
Regards,
Vladimir
Tziporet Koren wrote:
> Approved - Vlad please apply
> Tziporet
>
> -----Original Message-----
> From: Erez Zilber [mailto:erezz at voltaire.com]
> Sent: Sunday, June 10, 2007 5:03 PM
> To: Tziporet Koren; Vladimir Sokolovsky
> Cc: ewg at lists.openfabrics.org; open-iscsi at googlegroups.com; Mike
> Christie
> Subject: [PATCH] IB/iser: release host lock before error handling
>
> Tziporet,
>
> Please accept this patch. It fixes a bug in an error scenario in
> open-iscsi. This bug can lead to a deadlock.
>
> In RH4 kernels, the scsi host template eh callbacks are called with
> the host lock held. The reason that scsi-ml took this lock is more
> historical and this locking was removed later. the open-iscsi eh
> callbacks must release this lock when entering the function and lock
> it again when exiting the function.
>
> Signed-off-by: Erez Zilber <erezz at voltaire.com>
> ---
> .../2.6.9_U3/release_host_lock_before_eh.patch | 63
> +++++++++++++++++++++++
> .../2.6.9_U4/release_host_lock_before_eh.patch | 63
> +++++++++++++++++++++++
> .../2.6.9_U5/release_host_lock_before_eh.patch | 63
> +++++++++++++++++++++++
> 3 files changed, 189 insertions(+), 0 deletions(-)
>
> diff --git
> a/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch
> b/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch
> new file mode 100644
> index 0000000..475b3f5
> --- /dev/null
> +++ b/kernel_patches/backport/2.6.9_U3/release_host_lock_before_eh.patch
> @@ -0,0 +1,63 @@
> +diff -rup linux-2.6.20/drivers/scsi/libiscsi.c
> linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
> +--- linux-2.6.20/drivers/scsi/libiscsi.c 2007-02-04
> 20:44:54.000000000 +0200
> ++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c 2007-06-10
> 16:32:51.000000000 +0300
> +@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
> + struct iscsi_conn *conn = session->leadconn;
> + int fail_session = 0;
> +
> ++ spin_unlock_irq(host->host_lock);
> + spin_lock_bh(&session->lock);
> + if (session->state == ISCSI_STATE_TERMINATE) {
> + failed:
> + debug_scsi("failing host reset: session terminated "
> + "[CID %d age %d]\n", conn->id, session->age);
> + spin_unlock_bh(&session->lock);
> ++ spin_lock_irq(host->host_lock);
> + return FAILED;
> + }
> +
> +@@ -1009,6 +1011,7 @@ failed:
> + else
> + goto failed;
> + spin_unlock_bh(&session->lock);
> ++ spin_lock_irq(host->host_lock);
> +
> + return SUCCESS;
> + }
> +@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
> +
> + int iscsi_eh_abort(struct scsi_cmnd *sc)
> + {
> ++ struct Scsi_Host *shost = sc->device->host;
> + struct iscsi_cmd_task *ctask;
> + struct iscsi_conn *conn;
> + struct iscsi_session *session;
> + int rc;
> +
> ++ spin_unlock_irq(shost->host_lock);
> + /*
> + * if session was ISCSI_STATE_IN_RECOVERY then we may not have
> + * got the command.
> + */
> + if (!sc->SCp.ptr) {
> + debug_scsi("sc never reached iscsi layer or it
> completed.\n");
> ++ spin_lock_irq(shost->host_lock);
> + return SUCCESS;
> + }
> +
> +@@ -1257,6 +1263,7 @@ success_cleanup:
> +
> + success_rel_mutex:
> + mutex_unlock(&conn->xmitmutex);
> ++ spin_lock_irq(shost->host_lock);
> + return SUCCESS;
> +
> + failed:
> +@@ -1264,6 +1271,7 @@ failed:
> + mutex_unlock(&conn->xmitmutex);
> +
> + debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc,
> ctask->itt);
> ++ spin_lock_irq(shost->host_lock);
> + return FAILED;
> + }
> + EXPORT_SYMBOL_GPL(iscsi_eh_abort);
> diff --git
> a/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch
> b/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch
> new file mode 100644
> index 0000000..475b3f5
> --- /dev/null
> +++ b/kernel_patches/backport/2.6.9_U4/release_host_lock_before_eh.patch
> @@ -0,0 +1,63 @@
> +diff -rup linux-2.6.20/drivers/scsi/libiscsi.c
> linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
> +--- linux-2.6.20/drivers/scsi/libiscsi.c 2007-02-04
> 20:44:54.000000000 +0200
> ++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c 2007-06-10
> 16:32:51.000000000 +0300
> +@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
> + struct iscsi_conn *conn = session->leadconn;
> + int fail_session = 0;
> +
> ++ spin_unlock_irq(host->host_lock);
> + spin_lock_bh(&session->lock);
> + if (session->state == ISCSI_STATE_TERMINATE) {
> + failed:
> + debug_scsi("failing host reset: session terminated "
> + "[CID %d age %d]\n", conn->id, session->age);
> + spin_unlock_bh(&session->lock);
> ++ spin_lock_irq(host->host_lock);
> + return FAILED;
> + }
> +
> +@@ -1009,6 +1011,7 @@ failed:
> + else
> + goto failed;
> + spin_unlock_bh(&session->lock);
> ++ spin_lock_irq(host->host_lock);
> +
> + return SUCCESS;
> + }
> +@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
> +
> + int iscsi_eh_abort(struct scsi_cmnd *sc)
> + {
> ++ struct Scsi_Host *shost = sc->device->host;
> + struct iscsi_cmd_task *ctask;
> + struct iscsi_conn *conn;
> + struct iscsi_session *session;
> + int rc;
> +
> ++ spin_unlock_irq(shost->host_lock);
> + /*
> + * if session was ISCSI_STATE_IN_RECOVERY then we may not have
> + * got the command.
> + */
> + if (!sc->SCp.ptr) {
> + debug_scsi("sc never reached iscsi layer or it
> completed.\n");
> ++ spin_lock_irq(shost->host_lock);
> + return SUCCESS;
> + }
> +
> +@@ -1257,6 +1263,7 @@ success_cleanup:
> +
> + success_rel_mutex:
> + mutex_unlock(&conn->xmitmutex);
> ++ spin_lock_irq(shost->host_lock);
> + return SUCCESS;
> +
> + failed:
> +@@ -1264,6 +1271,7 @@ failed:
> + mutex_unlock(&conn->xmitmutex);
> +
> + debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc,
> ctask->itt);
> ++ spin_lock_irq(shost->host_lock);
> + return FAILED;
> + }
> + EXPORT_SYMBOL_GPL(iscsi_eh_abort);
> diff --git
> a/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch
> b/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch
> new file mode 100644
> index 0000000..475b3f5
> --- /dev/null
> +++ b/kernel_patches/backport/2.6.9_U5/release_host_lock_before_eh.patch
> @@ -0,0 +1,63 @@
> +diff -rup linux-2.6.20/drivers/scsi/libiscsi.c
> linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c
> +--- linux-2.6.20/drivers/scsi/libiscsi.c 2007-02-04
> 20:44:54.000000000 +0200
> ++++ linux-2.6.20-host-lock-fix/drivers/scsi/libiscsi.c 2007-06-10
> 16:32:51.000000000 +0300
> +@@ -972,12 +972,14 @@ int iscsi_eh_host_reset(struct scsi_cmnd
> + struct iscsi_conn *conn = session->leadconn;
> + int fail_session = 0;
> +
> ++ spin_unlock_irq(host->host_lock);
> + spin_lock_bh(&session->lock);
> + if (session->state == ISCSI_STATE_TERMINATE) {
> + failed:
> + debug_scsi("failing host reset: session terminated "
> + "[CID %d age %d]\n", conn->id, session->age);
> + spin_unlock_bh(&session->lock);
> ++ spin_lock_irq(host->host_lock);
> + return FAILED;
> + }
> +
> +@@ -1009,6 +1011,7 @@ failed:
> + else
> + goto failed;
> + spin_unlock_bh(&session->lock);
> ++ spin_lock_irq(host->host_lock);
> +
> + return SUCCESS;
> + }
> +@@ -1162,17 +1165,20 @@ static void fail_command(struct iscsi_co
> +
> + int iscsi_eh_abort(struct scsi_cmnd *sc)
> + {
> ++ struct Scsi_Host *shost = sc->device->host;
> + struct iscsi_cmd_task *ctask;
> + struct iscsi_conn *conn;
> + struct iscsi_session *session;
> + int rc;
> +
> ++ spin_unlock_irq(shost->host_lock);
> + /*
> + * if session was ISCSI_STATE_IN_RECOVERY then we may not have
> + * got the command.
> + */
> + if (!sc->SCp.ptr) {
> + debug_scsi("sc never reached iscsi layer or it
> completed.\n");
> ++ spin_lock_irq(shost->host_lock);
> + return SUCCESS;
> + }
> +
> +@@ -1257,6 +1263,7 @@ success_cleanup:
> +
> + success_rel_mutex:
> + mutex_unlock(&conn->xmitmutex);
> ++ spin_lock_irq(shost->host_lock);
> + return SUCCESS;
> +
> + failed:
> +@@ -1264,6 +1271,7 @@ failed:
> + mutex_unlock(&conn->xmitmutex);
> +
> + debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc,
> ctask->itt);
> ++ spin_lock_irq(shost->host_lock);
> + return FAILED;
> + }
> + EXPORT_SYMBOL_GPL(iscsi_eh_abort);
More information about the ewg
mailing list