[openib-general] [PATCH 2/6] [RFC] open iscsi iser transport provider code

Or Gerlitz ogerlitz at voltaire.com
Wed Feb 22 06:27:29 PST 2006


+ this is the equivalent of drivers/scsi/iscsi_tcp.c

+ As of the trunk conventions of being workable with the latest stable kernel 
  (2.6.15) iscsi_iser.c is not uptodate with iscsi_tcp.c which passed many changes 
  towards 2.6.16. The sync that would take place once 2.6.16 is out, will remove 
  much of the duplications.

--- /ulp/iser-x/iscsi_iser.c	2006-02-22 15:06:49.000000000 +0200
+++ /ulp/iser/iscsi_iser.c	2006-02-22 15:14:42.000000000 +0200
@@ -1 +1,1850 @@
+/*
+ * iSCSI Initiator over iSER Data-Path
+ *
+ * Copyright (C) 2004 Dmitry Yusupov
+ * Copyright (C) 2004 Alex Aizman
+ * Copyright (C) 2005 Mike Christie
+ * Copyright (c) 2005, 2006 Voltaire, Inc. All rights reserved.
+ * maintained by openib-general at openib.org
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *	- Redistributions of source code must retain the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer.
+ *
+ *	- Redistributions in binary form must reproduce the above
+ *	  copyright notice, this list of conditions and the following
+ *	  disclaimer in the documentation and/or other materials
+ *	  provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Credits:
+ *	Christoph Hellwig
+ *	FUJITA Tomonori
+ *	Arne Redlich
+ *	Zhenyu Wang
+ * Modified by:
+ *      Erez Zilber
+ *
+ *
+ * $Id: iscsi_iser.c 5460 2006-02-22 11:25:08Z ogerlitz $
+ */
 
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/hardirq.h>
+#include <linux/kfifo.h>
+#include <linux/blkdev.h>
+#include <linux/init.h>
+#include <linux/ioctl.h>
+#include <linux/devfs_fs_kernel.h>
+#include <linux/cdev.h>
+#include <linux/in.h>
+#include <linux/net.h>
+#include <linux/scatterlist.h>
+#include <linux/delay.h>
+
+#include <net/sock.h>
+
+#include <asm/uaccess.h>
+
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_request.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_transport_iscsi.h>
+
+#include "iscsi_iser.h"
+#include "iser_socket.h"
+
+#define INVALID_SN_DELTA	0xffff
+
+#ifdef DEBUG_ISER
+#define debug_iser(fmt...) printk(KERN_DEBUG "iser: " fmt)
+#else
+#define debug_iser(fmt...)
+#endif
+
+#ifdef DEBUG_SCSI
+#define debug_scsi(fmt...) printk(KERN_DEBUG "scsi: " fmt)
+#else
+#define debug_scsi(fmt...)
+#endif
+
+static unsigned int iscsi_max_lun = 512;
+module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO);
+
+#define DRV_VER	     "$Rev$"
+#define DRV_DATE     "$LastChangedDate$"
+
+int iser_debug_level = 0;
+
+MODULE_DESCRIPTION("iSER (iSCSI Extensions for RDMA) Datamover "
+		   "v" DRV_VER "(" DRV_DATE ")");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Alex Nezhinsky, Dan Bar Dov");
+
+module_param_named(debug_level, iser_debug_level, int, 0644);
+MODULE_PARM_DESC(debug_level,"Enable debug tracing if > 0 (default:disabled)");
+
+struct iser_global ig;
+
+void
+iscsi_iser_conn_failure(struct iscsi_iser_conn *conn, enum iscsi_err err)
+{
+	struct iscsi_iser_session *session = conn->session;
+	unsigned long flags;
+
+	spin_lock_irqsave(&session->lock, flags);
+	if (session->conn_cnt == 1 || session->leadconn == conn)
+		session->state = ISCSI_STATE_FAILED;
+	spin_unlock_irqrestore(&session->lock, flags);
+	set_bit(SUSPEND_BIT, &conn->suspend_tx);
+	iscsi_conn_error(iscsi_handle(conn), err);
+}
+
+static inline int
+iscsi_iser_check_assign_cmdsn(struct iscsi_iser_session *session,
+			      struct iscsi_nopin *hdr)
+{
+	uint32_t max_cmdsn = be32_to_cpu(hdr->max_cmdsn);
+	uint32_t exp_cmdsn = be32_to_cpu(hdr->exp_cmdsn);
+
+	if (max_cmdsn < exp_cmdsn -1 &&
+	    max_cmdsn > exp_cmdsn - INVALID_SN_DELTA)
+		return ISCSI_ERR_MAX_CMDSN;
+	if (max_cmdsn > session->max_cmdsn ||
+	    max_cmdsn < session->max_cmdsn - INVALID_SN_DELTA)
+		session->max_cmdsn = max_cmdsn;
+	if (exp_cmdsn > session->exp_cmdsn ||
+	    exp_cmdsn < session->exp_cmdsn - INVALID_SN_DELTA)
+		session->exp_cmdsn = exp_cmdsn;
+
+	return 0;
+}
+
+static inline void
+iscsi_iser_ctask_cleanup(struct iscsi_iser_conn *conn,
+			 struct iscsi_iser_cmd_task *ctask)
+{
+	struct scsi_cmnd *sc = ctask->sc;
+	struct iscsi_iser_session *session = conn->session;
+
+	spin_lock(&session->lock);
+	if (unlikely(!sc)) {
+		spin_unlock(&session->lock);
+		return;
+	}
+	ctask->sc = NULL;
+	__kfifo_put(session->cmdpool.queue, (void*)&ctask, sizeof(void*));
+	spin_unlock(&session->lock);
+}
+
+/**
+ * iscsi_cmd_rsp - SCSI Command Response processing
+ * @conn: iscsi connection
+ * @ctask: scsi command task
+ **/
+static int
+iscsi_iser_cmd_rsp(struct iscsi_iser_conn *conn,
+		   struct iscsi_iser_cmd_task *ctask,
+		   struct iscsi_hdr *hdr, char *rx_data)
+{
+	int rc;
+	struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)hdr;
+	struct iscsi_iser_session *session = conn->session;
+	struct scsi_cmnd *sc = ctask->sc;
+	int senselen = 0;
+	char *data = NULL;
+
+	rc = iscsi_iser_check_assign_cmdsn(session, (struct iscsi_nopin*)rhdr);
+	if (rc) {
+		sc->result = (DID_ERROR << 16);
+		goto out;
+	}
+
+	conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
+
+	sc->result = (DID_OK << 16) | rhdr->cmd_status;
+
+	if (rhdr->response != ISCSI_STATUS_CMD_COMPLETED) {
+		sc->result = (DID_ERROR << 16);
+		goto out;
+	}
+
+	if (ntoh24(rhdr->dlength)) {
+		data = rx_data;
+		senselen = (data[0] << 8) | data[1];
+	}
+
+	if (rhdr->cmd_status == SAM_STAT_CHECK_CONDITION && senselen) {
+		int sensecopy = min(senselen, SCSI_SENSE_BUFFERSIZE);
+
+		memcpy(sc->sense_buffer, data + 2, sensecopy);
+		debug_scsi("copied %d bytes of sense\n", sensecopy);
+	}
+
+	if (sc->sc_data_direction == DMA_TO_DEVICE)
+		goto out;
+
+	if (rhdr->flags & ISCSI_FLAG_CMD_UNDERFLOW) {
+		int res_count = be32_to_cpu(rhdr->residual_count);
+
+		if (res_count > 0 && res_count <= sc->request_bufflen)
+			sc->resid = res_count;
+		else
+			sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
+	} else if (rhdr->flags & ISCSI_FLAG_CMD_BIDI_UNDERFLOW)
+		sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
+	else if (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW)
+		sc->resid = be32_to_cpu(rhdr->residual_count);
+
+out:
+	debug_scsi("done [sc %lx res %d itt 0x%x]\n",
+		   (long)sc, sc->result, ctask->itt);
+
+	iscsi_iser_ctask_cleanup(conn, ctask);
+	sc->scsi_done(sc);
+	return rc;
+}
+
+int
+iscsi_iser_hdr_recv(struct iscsi_iser_conn *conn, struct iscsi_hdr *hdr, char *rx_data)
+{
+	int rc = 0;
+	struct iscsi_iser_cmd_task *ctask;
+	struct iscsi_iser_session *session = conn->session;
+	uint32_t itt;
+	int datalen;
+	int ahslen;
+
+	/* verify PDU length */
+	datalen = ntoh24(hdr->dlength);
+	if (datalen > DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH) {
+		printk(KERN_ERR "iscsi_tcp: datalen %d > %d\n",
+		       datalen, DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH);
+		return ISCSI_ERR_DATALEN;
+	}
+
+	/* read AHS */
+	ahslen = hdr->hlength * 4;
+
+	/* verify itt (itt encoding: age+cid+itt) */
+	itt = hdr->itt;
+	if (itt != cpu_to_be32(ISCSI_RESERVED_TAG)) {
+		if ((itt & AGE_MASK) !=
+				(session->age << AGE_SHIFT)) {
+			printk(KERN_ERR "iscsi_iser: received itt %x expected "
+				"session age (%x)\n", itt,
+				session->age & AGE_MASK);
+			return ISCSI_ERR_BAD_ITT;
+		}
+
+		if ((itt & CID_MASK) != (conn->id << CID_SHIFT)) {
+			printk(KERN_ERR "iscsi_iser: received itt %x, expected "
+				"CID (%x)\n", itt, conn->id);
+			return ISCSI_ERR_BAD_ITT;
+		}
+		itt = itt & ITT_MASK;
+	} else
+		itt = itt;
+
+	if (itt < session->cmds_max) {
+		ctask = (struct iscsi_iser_cmd_task *)session->cmds[itt];
+
+		if (!ctask->sc) {
+			printk(KERN_INFO "iscsi_iser: dropping ctask with "
+			       "itt 0x%x\n", ctask->itt);
+			return 0;
+		}
+
+		if (ctask->sc->SCp.phase != session->age) {
+			printk(KERN_ERR "iscsi_iser: ctask's session age %d, "
+				"expected %d\n", ctask->sc->SCp.phase,
+				session->age);
+			return ISCSI_ERR_SESSION_FAILED;
+		}
+
+		debug_scsi("rsp [op 0x%x cid %d sc %lx itt 0x%x len %d]\n",
+			   hdr->opcode, conn->id, (long)ctask->sc,
+			   ctask->itt, datalen);
+
+		switch(hdr->opcode) {
+		case ISCSI_OP_SCSI_CMD_RSP:
+			BUG_ON((void*)ctask != ctask->sc->SCp.ptr);
+			if (!datalen)
+				rc = iscsi_iser_cmd_rsp(conn, ctask, hdr, rx_data);
+			break;
+
+		default:
+			rc = ISCSI_ERR_BAD_OPCODE;
+			break;
+		}
+	} else if (itt >= ISCSI_MGMT_ITT_OFFSET &&
+		   itt < ISCSI_MGMT_ITT_OFFSET + session->mgmtpool_max) {
+		struct iscsi_iser_mgmt_task *mtask = (struct iscsi_iser_mgmt_task *)
+					session->mgmt_cmds[itt -
+						ISCSI_MGMT_ITT_OFFSET];
+
+		debug_scsi("immrsp [op 0x%x cid %d itt 0x%x len %d]\n",
+			   hdr->opcode, conn->id, mtask->itt,
+			   datalen);
+
+		switch(hdr->opcode) {
+		case ISCSI_OP_LOGIN_RSP:
+		case ISCSI_OP_TEXT_RSP:
+		case ISCSI_OP_LOGOUT_RSP:
+			rc = iscsi_iser_check_assign_cmdsn(session,
+						 (struct iscsi_nopin*)hdr);
+			if (rc)
+				break;
+
+			rc = iscsi_recv_pdu(iscsi_handle(conn), hdr,
+					    NULL, 0);
+			if (conn->login_mtask != mtask) {
+				spin_lock(&session->lock);
+				__kfifo_put(session->mgmtpool.queue,
+				    (void*)&mtask, sizeof(void*));
+				spin_unlock(&session->lock);
+			}
+			break;
+		case ISCSI_OP_SCSI_TMFUNC_RSP:
+			rc = iscsi_iser_check_assign_cmdsn(session,
+						 (struct iscsi_nopin*)hdr);
+			if (rc)
+				break;
+
+			if (datalen || ahslen) {
+				rc = ISCSI_ERR_PROTO;
+				break;
+			}
+
+			spin_lock(&session->lock);
+			if (conn->tmabort_state == TMABORT_INITIAL) {
+				__kfifo_put(session->mgmtpool.queue,
+						(void*)&mtask, sizeof(void*));
+				conn->tmabort_state =
+					((struct iscsi_tm_rsp *)hdr)->
+					response == ISCSI_TMF_RSP_COMPLETE ?
+						TMABORT_SUCCESS:TMABORT_FAILED;
+				/* unblock eh_abort() */
+				wake_up(&conn->ehwait);
+			}
+			spin_unlock(&session->lock);
+			break;
+		case ISCSI_OP_NOOP_IN:
+			if (hdr->ttt != ISCSI_RESERVED_TAG) {
+				rc = ISCSI_ERR_PROTO;
+				break;
+			}
+			rc = iscsi_iser_check_assign_cmdsn(session,
+						(struct iscsi_nopin*)hdr);
+			if (rc)
+				break;
+			conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
+
+			rc = iscsi_recv_pdu(iscsi_handle(conn), hdr,
+					    NULL, 0);
+			mtask = (struct iscsi_iser_mgmt_task *)
+				session->mgmt_cmds[itt -
+						ISCSI_MGMT_ITT_OFFSET];
+			if (conn->login_mtask != mtask) {
+				spin_lock(&session->lock);
+				__kfifo_put(session->mgmtpool.queue,
+					  (void*)&mtask, sizeof(void*));
+				spin_unlock(&session->lock);
+			}
+			break;
+		default:
+			rc = ISCSI_ERR_BAD_OPCODE;
+			break;
+		}
+	} else if (itt == ISCSI_RESERVED_TAG) {
+		switch(hdr->opcode) {
+		case ISCSI_OP_NOOP_IN:
+			rc = iscsi_iser_check_assign_cmdsn(session,
+					 (struct iscsi_nopin*)hdr);
+			if (!rc && hdr->ttt != ISCSI_RESERVED_TAG)
+				rc = iscsi_recv_pdu(iscsi_handle(conn),
+						    hdr, NULL, 0);
+			break;
+		case ISCSI_OP_REJECT:
+			/* we need sth like iscsi_reject_rsp()*/
+		case ISCSI_OP_ASYNC_EVENT:
+			/* we need sth like iscsi_async_event_rsp() */
+			rc = ISCSI_ERR_BAD_OPCODE;
+			break;
+		default:
+			rc = ISCSI_ERR_BAD_OPCODE;
+			break;
+		}
+	} else
+		rc = ISCSI_ERR_BAD_ITT;
+
+	return rc;
+}
+
+static void
+iscsi_iser_unsolicit_data_init(struct iscsi_iser_conn *conn,
+			       struct iscsi_iser_cmd_task *ctask,
+			       struct iscsi_data  *hdr)
+{
+	memset(hdr, 0, sizeof(struct iscsi_data));
+	hdr->ttt = cpu_to_be32(ISCSI_RESERVED_TAG);
+	hdr->datasn = cpu_to_be32(ctask->unsol_datasn);
+	ctask->unsol_datasn++;
+	hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
+	memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
+
+	hdr->itt = ctask->hdr->itt;
+	hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
+
+	hdr->offset = cpu_to_be32(ctask->total_length -
+				  ctask->rdma_data_count -
+				  ctask->unsol_count);
+
+	if (ctask->unsol_count > conn->max_xmit_dlength) {
+		hton24(hdr->dlength, conn->max_xmit_dlength);
+		ctask->data_count = conn->max_xmit_dlength;
+		hdr->flags = 0;
+	} else {
+		hton24(hdr->dlength, ctask->unsol_count);
+		ctask->data_count = ctask->unsol_count;
+		hdr->flags = ISCSI_FLAG_CMD_FINAL;
+	}
+}
+
+
+/**
+ * iscsi_iser_cmd_init - Initialize iSCSI SCSI_READ or SCSI_WRITE commands
+ *
+ **/
+static void
+iscsi_iser_cmd_init(struct iscsi_iser_conn *conn,
+		    struct iscsi_iser_cmd_task *ctask,
+		    struct scsi_cmnd *sc)
+{
+	struct iscsi_iser_session *session = conn->session;
+
+	ctask->sc = sc;
+	ctask->conn = conn;
+
+	ctask->hdr->opcode = ISCSI_OP_SCSI_CMD;
+	ctask->hdr->flags = ISCSI_ATTR_SIMPLE;
+	ctask->hdr->lun[1] = sc->device->lun;
+	ctask->hdr->itt = ctask->itt | (conn->id << CID_SHIFT) |
+			 (session->age << AGE_SHIFT);
+	ctask->hdr->data_length = cpu_to_be32(sc->request_bufflen);
+	ctask->hdr->cmdsn = cpu_to_be32(session->cmdsn); session->cmdsn++;
+	ctask->hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
+	memcpy(ctask->hdr->cdb, sc->cmnd, sc->cmd_len);
+	memset(&ctask->hdr->cdb[sc->cmd_len], 0,
+	       MAX_COMMAND_SIZE - sc->cmd_len);
+
+	ctask->mtask = NULL;
+	ctask->command_sent = 0;
+
+	ctask->total_length = sc->request_bufflen;
+
+	if (sc->sc_data_direction == DMA_TO_DEVICE) {
+		ctask->hdr->flags |= ISCSI_FLAG_CMD_WRITE;
+		BUG_ON(ctask->total_length == 0);
+
+		/* unsolicited bytes to be sent as imm. data - with cmd pdu */
+		ctask->imm_count = 0;
+		/* unsolicited bytes to be sent as data-out */
+		ctask->unsol_count = 0;
+		ctask->unsol_datasn = 0;
+
+		if (session->imm_data_en) {
+			if (ctask->total_length >= session->first_burst)
+				ctask->imm_count = min(session->first_burst,
+						       conn->max_xmit_dlength);
+			else
+				ctask->imm_count = min(ctask->total_length,
+						       conn->max_xmit_dlength);
+			hton24(ctask->hdr->dlength, ctask->imm_count);
+		} else
+			zero_data(ctask->hdr->dlength);
+
+		if (!session->initial_r2t_en)
+			ctask->unsol_count = min(session->first_burst,
+				ctask->total_length) - ctask->imm_count;
+		if (!ctask->unsol_count)
+			/* No unsolicit Data-Out's */
+			ctask->hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+
+		/* bytes to be sent via RDMA operations */
+		ctask->rdma_data_count = ctask->total_length -
+					 ctask->imm_count -
+					 ctask->unsol_count;
+
+		debug_scsi("cmd [itt %x total %d imm %d imm_data %d "
+			   "rdma_data %d]\n",
+			   ctask->itt, ctask->total_length, ctask->imm_count,
+			   ctask->unsol_count, ctask->rdma_data_count);
+	} else {
+		ctask->hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+		if (sc->sc_data_direction == DMA_FROM_DEVICE)
+			ctask->hdr->flags |= ISCSI_FLAG_CMD_READ;
+		ctask->datasn = 0;
+		zero_data(ctask->hdr->dlength);
+		ctask->rdma_data_count = ctask->total_length;
+	}
+
+	iser_ctask_rdma_init(ctask);
+}
+
+/**
+ * iscsi_mtask_xmit - xmit management(immediate) task
+ * @conn: iscsi connection
+ * @mtask: task management task
+ *
+ * Notes:
+ *	The function can return -EAGAIN in which case caller must
+ *	call it again later, or recover. '0' return code means successful
+ *	xmit.
+ *
+ **/
+static int
+iscsi_iser_mtask_xmit(struct iscsi_iser_conn *conn,
+				 struct iscsi_iser_mgmt_task *mtask)
+{
+	int error = 0;
+
+	debug_scsi("mtask deq [cid %d itt 0x%x]\n", conn->id, mtask->itt);
+
+	error = iser_send_control(conn, mtask);
+
+	if (error && error != -EAGAIN)
+		iscsi_iser_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+
+	return error;
+}
+
+static int
+iscsi_iser_ctask_xmit_unsol_data(struct iscsi_iser_conn *conn,
+				 struct iscsi_iser_cmd_task *ctask)
+{
+	struct iscsi_data  hdr;
+	int error = 0;
+
+	/* Send data-out PDUs while there's still unsolicited data to send */
+	while (ctask->unsol_count > 0) {
+		iscsi_iser_unsolicit_data_init(conn, ctask, &hdr);
+
+		debug_scsi("Sending data-out: itt 0x%x, data count %d\n",
+			   hdr.itt, ctask->data_count);
+
+		/* the buffer description has been passed with the command */
+		/* Send the command */
+		error = iser_send_data_out(conn, ctask, &hdr);
+		if (error) {
+			ctask->unsol_datasn--;
+			goto iscsi_iser_ctask_xmit_unsol_data_exit;
+		}
+		ctask->unsol_count -= ctask->data_count;
+		debug_scsi("Need to send %d more as data-out PDUs\n",
+			   ctask->unsol_count);
+	}
+
+iscsi_iser_ctask_xmit_unsol_data_exit:
+	return error;
+}
+
+static int
+iscsi_iser_ctask_xmit(struct iscsi_iser_conn *conn,
+		      struct iscsi_iser_cmd_task *ctask)
+{
+	int error = 0;
+
+	debug_scsi("ctask deq [cid %d itt 0x%x]\n",
+		   conn->id, ctask->itt);
+
+	/*
+	 * serialize with TMF AbortTask
+	 */
+	if (ctask->mtask)
+		return error;
+
+	/* Send the cmd PDU */
+	if (!ctask->command_sent) {
+		error = iser_send_command(conn, ctask);
+		if (error)
+			goto iscsi_iser_ctask_xmit_exit;
+		ctask->command_sent = 1;
+	}
+
+	/* Send unsolicited data-out PDU(s) if necessary */
+	if (ctask->unsol_count)
+		error = iscsi_iser_ctask_xmit_unsol_data(conn, ctask);
+
+ iscsi_iser_ctask_xmit_exit:
+	if (error && error != -EAGAIN)
+		iscsi_iser_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+	return error;
+}
+
+/**
+ * iscsi_data_xmit - xmit any command into the scheduled connection
+ * @conn: iscsi connection
+ *
+ * Notes:
+ *	The function can return -EAGAIN in which case the caller must
+ *	re-schedule it again later or recover. '0' return code means
+ *	successful xmit.
+ **/
+static int
+iscsi_iser_data_xmit(struct iscsi_iser_conn *conn)
+{
+	 if (unlikely(conn->suspend_tx)) {
+		debug_iser("conn %d Tx suspended!\n", conn->id);
+		return 0;
+	}
+
+	/*
+	 * Transmit in the following order:
+	 *
+	 * 1) un-finished xmit (ctask or mtask)
+	 * 2) immediate control PDUs
+	 * 3) SCSI commands
+	 * 4) non-immediate control PDUs
+	 *
+	 * No need to lock around __kfifo_get as long as
+	 * there's one producer and one consumer.
+	 */
+
+	BUG_ON(conn->ctask && conn->mtask);
+
+	if (conn->ctask) {
+		if (iscsi_iser_ctask_xmit(conn, conn->ctask))
+			goto iscsi_iser_data_xmit_fail;
+		/* done with this in-progress ctask */
+		conn->ctask = NULL;
+	}
+	if (conn->mtask) {
+		if (iscsi_iser_mtask_xmit(conn, conn->mtask))
+			goto iscsi_iser_data_xmit_fail;
+		/* done with this in-progress mtask */
+		conn->mtask = NULL;
+	}
+
+	/* process immediate first */
+	if (unlikely(__kfifo_len(conn->immqueue))) {
+		struct iscsi_iser_session *session = conn->session;
+		while (__kfifo_get(conn->immqueue, (void*)&conn->mtask,
+				   sizeof(void*))) {
+			if (iscsi_iser_mtask_xmit(conn, conn->mtask))
+				goto iscsi_iser_data_xmit_fail;
+
+			if (conn->mtask->hdr->itt ==
+			    cpu_to_be32(ISCSI_RESERVED_TAG)) {
+				spin_lock_bh(&session->lock);
+				__kfifo_put(session->mgmtpool.queue,
+					    (void*)&conn->mtask,
+					    sizeof(void*));
+				spin_unlock_bh(&session->lock);
+			}
+		}
+		/* done with this mtask */
+		conn->mtask = NULL;
+	}
+
+	/* process command queue */
+	while (__kfifo_get(conn->xmitqueue, (void*)&conn->ctask,
+			   sizeof(void*))) {
+		if (iscsi_iser_ctask_xmit(conn, conn->ctask))
+			goto iscsi_iser_data_xmit_fail;
+	}
+	/* done with this ctask */
+	conn->ctask = NULL;
+
+	/* process the rest control plane PDUs, if any */
+	if (unlikely(__kfifo_len(conn->mgmtqueue))) {
+		struct iscsi_iser_session *session = conn->session;
+
+		while (__kfifo_get(conn->mgmtqueue, (void*)&conn->mtask,
+				   sizeof(void*))) {
+			if (iscsi_iser_mtask_xmit(conn, conn->mtask))
+				goto iscsi_iser_data_xmit_fail;
+
+			if (conn->mtask->hdr->itt ==
+			    cpu_to_be32(ISCSI_RESERVED_TAG)) {
+				spin_lock_bh(&session->lock);
+				__kfifo_put(session->mgmtpool.queue,
+					    (void*)&conn->mtask,
+					    sizeof(void*));
+				spin_unlock_bh(&session->lock);
+			}
+		}
+		/* done with this mtask */
+		conn->mtask = NULL;
+	}
+
+	return 0;
+
+iscsi_iser_data_xmit_fail:
+	if (unlikely(conn->suspend_tx))
+		return 0;
+
+	return -EAGAIN;
+}
+
+static void
+iscsi_iser_xmitworker(void *data)
+{
+	struct iscsi_iser_conn *conn = data;
+
+	/*
+	 * serialize Xmit worker on a per-connection basis.
+	 */
+	mutex_lock(&conn->xmitmutex);
+	if (iscsi_iser_data_xmit(conn))
+		schedule_work(&conn->xmitwork);
+	mutex_unlock(&conn->xmitmutex);
+}
+
+
+
+#define FAILURE_BAD_HOST		1
+#define FAILURE_SESSION_FAILED		2
+#define FAILURE_SESSION_FREED		3
+#define FAILURE_WINDOW_CLOSED		4
+#define FAILURE_SESSION_TERMINATE	5
+
+static int
+iscsi_iser_queuecommand(struct scsi_cmnd *sc,
+			void (*done)(struct scsi_cmnd *))
+{
+	struct Scsi_Host *host;
+	int reason = 0;
+	struct iscsi_iser_session *session;
+	struct iscsi_iser_conn *conn = NULL;
+	struct iscsi_iser_cmd_task *ctask = NULL;
+
+	sc->scsi_done = done;
+	sc->result = 0;
+
+	host = sc->device->host;
+	session = iscsi_hostdata(host->hostdata);
+	BUG_ON(host != session->host);
+
+	spin_lock(&session->lock);
+
+	if (session->state != ISCSI_STATE_LOGGED_IN) {
+		if (session->state == ISCSI_STATE_FAILED) {
+			reason = FAILURE_SESSION_FAILED;
+			debug_scsi("rejecting becuase session->state = %d\n",
+				   session->state);
+			goto reject;
+		} else if (session->state == ISCSI_STATE_TERMINATE) {
+			reason = FAILURE_SESSION_TERMINATE;
+			goto fault;
+		}
+		reason = FAILURE_SESSION_FREED;
+		goto fault;
+	}
+
+	/*
+	 * Check for iSCSI window and take care of CmdSN wrap-around
+	 */
+	if ((int)(session->max_cmdsn - session->cmdsn) < 0) {
+		reason = FAILURE_WINDOW_CLOSED;
+		debug_scsi("rejecting becuase session->max_cmdsn = %d "
+			   "& session->cmdsn = %d\n",
+			   session->max_cmdsn,
+			   session->cmdsn);
+		goto reject;
+	}
+
+	conn = session->leadconn;
+
+	__kfifo_get(session->cmdpool.queue, (void*)&ctask, sizeof(void*));
+	BUG_ON(ctask->sc);
+
+	sc->SCp.phase = session->age;
+	sc->SCp.ptr = (char*)ctask;
+	iscsi_iser_cmd_init(conn, ctask, sc);
+
+	__kfifo_put(conn->xmitqueue, (void*)&ctask, sizeof(void*));
+	debug_scsi(
+		  "ctask enq [%s cid %d sc %lx itt 0x%x "
+		  "len %d cmdsn %d win %d]\n",
+		  sc->sc_data_direction == DMA_TO_DEVICE ? "write" : "read",
+		  conn->id, (long)sc, ctask->itt, sc->request_bufflen,
+		  session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1);
+	spin_unlock(&session->lock);
+
+	if (!in_interrupt() && mutex_trylock(&conn->xmitmutex)) {
+		spin_unlock_irq(host->host_lock);
+		if (iscsi_iser_data_xmit(conn))
+			schedule_work(&conn->xmitwork);
+		mutex_unlock(&conn->xmitmutex);
+		spin_lock_irq(host->host_lock);
+	} else
+		schedule_work(&conn->xmitwork);
+
+	return 0;
+
+reject:
+	spin_unlock(&session->lock);
+	debug_scsi("cmd 0x%x rejected (%d)\n", sc->cmnd[0], reason);
+	return SCSI_MLQUEUE_HOST_BUSY;
+
+fault:
+	spin_unlock(&session->lock);
+	printk(KERN_ERR "iscsi_iser: cmd 0x%x is not queued (%d)\n",
+	       sc->cmnd[0], reason);
+	sc->sense_buffer[0] = 0x70;
+	sc->sense_buffer[2] = NOT_READY;
+	sc->sense_buffer[7] = 0x6;
+	sc->sense_buffer[12] = 0x08;
+	sc->sense_buffer[13] = 0x00;
+	sc->result = (DID_NO_CONNECT << 16);
+	sc->resid = sc->request_bufflen;
+	sc->scsi_done(sc);
+	return 0;
+}
+
+static int
+iscsi_iser_pool_init(struct iscsi_iser_queue *q, int max,
+		     void ***items, int item_size)
+{
+	int i;
+
+	*items = kmalloc(max * sizeof(void*), GFP_KERNEL);
+	if (*items == NULL)
+		return -ENOMEM;
+
+	q->max = max;
+	q->pool = kmalloc(max * sizeof(void*), GFP_KERNEL);
+	if (q->pool == NULL) {
+		kfree(*items);
+		return -ENOMEM;
+	}
+
+	q->queue = kfifo_init((void*)q->pool, max * sizeof(void*),
+			      GFP_KERNEL, NULL);
+	if (q->queue == ERR_PTR(-ENOMEM)) {
+		kfree(q->pool);
+		kfree(*items);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < max; i++) {
+		q->pool[i] = kmalloc(item_size, GFP_KERNEL);
+		if (q->pool[i] == NULL) {
+			int j;
+			for (j = 0; j < i; j++) {
+				kfree(q->pool[j]);
+			}
+			kfifo_free(q->queue);
+			kfree(q->pool);
+			kfree(*items);
+			return -ENOMEM;
+		}
+		memset(q->pool[i], 0, item_size);
+		(*items)[i] = q->pool[i];
+		__kfifo_put(q->queue, (void*)&q->pool[i], sizeof(void*));
+	}
+	return 0;
+}
+
+static void
+iscsi_iser_pool_free(struct iscsi_iser_queue *q, void **items)
+{
+	int i;
+
+	for (i = 0; i < q->max; i++)
+		kfree(items[i]);
+	kfree(q->pool);
+	kfree(items);
+}
+
+static iscsi_connh_t
+iscsi_iser_conn_create(iscsi_sessionh_t sessionh,
+		       uint32_t conn_idx)
+{
+	struct iscsi_iser_session *session = iscsi_ptr(sessionh);
+	struct iscsi_iser_conn *conn = NULL;
+
+	conn = kzalloc(sizeof *conn, GFP_KERNEL);
+	if (conn == NULL) {
+		goto conn_alloc_fail;
+	}
+
+	/* Init the connection */
+	conn->c_stage = ISCSI_CONN_INITIAL_STAGE;
+
+	conn->tmabort_state = TMABORT_INITIAL;
+
+	conn->session = session;
+	conn->id = conn_idx;
+
+	conn->exp_statsn = 0;
+
+	/* initialize general xmit PDU commands queue */
+	conn->xmitqueue = kfifo_alloc(session->cmds_max * sizeof(void*),
+				      GFP_KERNEL, NULL);
+	if (conn->xmitqueue == ERR_PTR(-ENOMEM))
+		goto xmitqueue_alloc_fail;
+
+	/* initialize general immediate & non-immediate PDU commands queue */
+	conn->immqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*),
+				     GFP_KERNEL, NULL);
+	if (conn->immqueue == ERR_PTR(-ENOMEM))
+		goto immqueue_alloc_fail;
+
+	conn->mgmtqueue = kfifo_alloc(session->mgmtpool_max * sizeof(void*),
+				      GFP_KERNEL, NULL);
+	if (conn->mgmtqueue == ERR_PTR(-ENOMEM))
+		goto mgmtqueue_alloc_fail;
+
+	INIT_WORK(&conn->xmitwork, iscsi_iser_xmitworker, conn);
+
+	/* allocate login_mtask used for the login/text sequences */
+	spin_lock_bh(&session->lock);
+	if (!__kfifo_get(session->mgmtpool.queue,
+			 (void*)&conn->login_mtask,
+			 sizeof(void*))) {
+		spin_unlock_bh(&session->lock);
+		goto login_mtask_alloc_fail;
+	}
+	spin_unlock_bh(&session->lock);
+
+	init_timer(&conn->tmabort_timer);
+	mutex_init(&conn->xmitmutex);
+	init_waitqueue_head(&conn->ehwait);
+	spin_lock_init(&conn->lock);
+
+	return iscsi_handle(conn);
+
+login_mtask_alloc_fail:
+	kfifo_free(conn->mgmtqueue);
+mgmtqueue_alloc_fail:
+	kfifo_free(conn->immqueue);
+immqueue_alloc_fail:
+	kfifo_free(conn->xmitqueue);
+xmitqueue_alloc_fail:
+	kfree(conn);
+conn_alloc_fail:
+	return iscsi_handle(NULL);
+}
+
+static void
+iscsi_iser_conn_destroy(iscsi_connh_t connh)
+{
+	struct iscsi_iser_conn *conn = iscsi_ptr(connh);
+	struct iscsi_iser_session *session = conn->session;
+	unsigned long flags;
+
+	mutex_lock(&conn->xmitmutex);
+	set_bit(SUSPEND_BIT, &conn->suspend_tx);
+
+	if (conn->c_stage == ISCSI_CONN_INITIAL_STAGE && conn->sock) {
+		sock_release(conn->sock);
+		conn->sock = NULL;
+	}
+
+	spin_lock_bh(&session->lock);
+	conn->c_stage = ISCSI_CONN_CLEANUP_WAIT;
+	if (session->leadconn == conn) {
+		/*
+		 * leading connection? then give up on recovery.
+		 */
+		session->state = ISCSI_STATE_TERMINATE;
+		wake_up(&conn->ehwait);
+	}
+	spin_unlock_bh(&session->lock);
+
+	mutex_unlock(&conn->xmitmutex);
+
+	/*
+	 * Block until all in-progress commands for this connection
+	 * time out or fail.
+	 */
+	for (;;) {
+		spin_lock_irqsave(session->host->host_lock, flags);
+		if (!session->host->host_busy) { /* OK for ERL == 0 */
+			spin_unlock_irqrestore(session->host->host_lock, flags);
+			debug_iser("%s: released host_lock (host's not busy)\n", __FUNCTION__);
+			break;
+		}
+		spin_unlock_irqrestore(session->host->host_lock, flags);
+		msleep_interruptible(500);
+		debug_iser("conn_destroy(): host = 0x%p, host_busy %d host_failed %d\n", session->host,
+			   session->host->host_busy, session->host->host_failed);
+		/*
+		 * force eh_abort() to unblock
+		 */
+		wake_up(&conn->ehwait);
+	}
+
+	spin_lock_bh(&session->lock);
+	__kfifo_put(session->mgmtpool.queue, (void*)&conn->login_mtask,
+		    sizeof(void*));
+	list_del(&conn->item);
+	if (list_empty(&session->connections))
+		session->leadconn = NULL;
+	if (session->leadconn && session->leadconn == conn)
+		session->leadconn = container_of(session->connections.next,
+			struct iscsi_iser_conn, item);
+
+	if (session->leadconn == NULL)
+		/* none connections exits.. reset sequencing */
+		session->cmdsn = session->max_cmdsn = session->exp_cmdsn = 1;
+	spin_unlock_bh(&session->lock);
+
+	kfifo_free(conn->xmitqueue);
+	kfifo_free(conn->immqueue);
+	kfifo_free(conn->mgmtqueue);
+	kfree(conn);
+}
+
+static int
+iscsi_iser_conn_bind(iscsi_sessionh_t sessionh,
+		     iscsi_connh_t connh, uint32_t transport_fd,
+		     int is_leading)
+{
+	struct iscsi_iser_session *session = iscsi_ptr(sessionh);
+	struct iscsi_iser_conn *tmp = ERR_PTR(-EEXIST), *conn = iscsi_ptr(connh);
+	struct socket *sock;
+	struct iser_conn *p_iser_conn;
+	int error = 0;
+
+	/* lookup for existing socket */
+	sock = sockfd_lookup(transport_fd, &error);
+	if (!sock) {
+		printk(KERN_ERR "iscsi_iser: sockfd_lookup failed %d\n",
+		       error);
+		return -EEXIST;
+	}
+
+	/* lookup for existing connection */
+	spin_lock_bh(&session->lock);
+	list_for_each_entry(tmp, &session->connections, item) {
+		if (tmp == conn) {
+			if (conn->c_stage != ISCSI_CONN_STOPPED ||
+			    conn->stop_stage == STOP_CONN_TERM) {
+				printk(KERN_ERR "iscsi_iser: can't bind "
+				       "non-stopped connection (%d:%d)\n",
+				       conn->c_stage, conn->stop_stage);
+				spin_unlock_bh(&session->lock);
+				return -EIO;
+			}
+			break;
+		}
+	}
+	if (tmp != conn) {
+		/* bind new iSCSI connection to session */
+		conn->session = session;
+
+		list_add(&conn->item, &session->connections);
+	}
+	spin_unlock_bh(&session->lock);
+
+	if (conn->stop_stage != STOP_CONN_SUSPEND) {
+		/* bind iSCSI connection and socket */
+		conn->sock = sock;
+	}
+
+	/* binds the iSER connection retrieved from the previously connected   *
+	 * socket to the iSCSI layer connection. exchanges connection pointers */
+	p_iser_conn = iser_conn_from_sock(sock);
+	p_iser_conn->p_iscsi_conn = conn;
+	conn->ib_conn             = p_iser_conn;
+
+	if (is_leading)
+		session->leadconn = conn;
+
+	clear_bit(SUSPEND_BIT, &conn->suspend_tx);
+
+	return 0;
+}
+
+static int
+iscsi_iser_conn_start(iscsi_connh_t connh)
+{
+	struct iscsi_iser_conn *conn = iscsi_ptr(connh);
+	struct iscsi_iser_session *session = conn->session;
+	int error = 0;
+
+	if (session == NULL) {
+		printk(KERN_ERR "iscsi_iser: can't start unbound connection\n");
+		return -EPERM;
+	}
+
+	spin_lock_bh(&session->lock);
+	conn->c_stage = ISCSI_CONN_STARTED;
+	session->state = ISCSI_STATE_LOGGED_IN;
+
+	switch(conn->stop_stage) {
+	case STOP_CONN_RECOVER:
+		/*
+		 * unblock eh_abort() if it is blocked. re-try all
+		 * commands after successful recovery
+		 */
+		session->conn_cnt++;
+		conn->stop_stage = 0;
+		conn->tmabort_state = TMABORT_INITIAL;
+		session->age++;
+		wake_up(&conn->ehwait);
+		break;
+	case STOP_CONN_TERM:
+		session->conn_cnt++;
+		conn->stop_stage = 0;
+		break;
+	case STOP_CONN_SUSPEND:
+		conn->stop_stage = 0;
+		clear_bit(SUSPEND_BIT, &conn->suspend_tx);
+		break;
+	default:
+		break;
+	}
+	spin_unlock_bh(&session->lock);
+
+	error = iser_conn_set_full_featured_mode(conn);
+
+	return error;
+}
+
+static void
+iscsi_iser_conn_stop(iscsi_connh_t connh, int flag)
+{
+	struct iscsi_iser_conn *conn = iscsi_ptr(connh);
+	struct iscsi_iser_session *session = conn->session;
+	struct iscsi_iser_cmd_task *ctask;
+	struct iscsi_iser_mgmt_task *mtask;
+	unsigned long flags;
+
+	BUG_ON(!conn->sock);
+
+	mutex_lock(&conn->xmitmutex);
+
+       spin_lock_irqsave(session->host->host_lock, flags);
+	spin_lock(&session->lock);
+	conn->stop_stage = flag;
+	conn->c_stage = ISCSI_CONN_STOPPED;
+	set_bit(SUSPEND_BIT, &conn->suspend_tx);
+
+	if (flag != STOP_CONN_SUSPEND)
+		session->conn_cnt--;
+
+	if (session->conn_cnt == 0 || session->leadconn == conn)
+		session->state = ISCSI_STATE_FAILED;
+
+	spin_unlock(&session->lock);
+	spin_unlock_irqrestore(session->host->host_lock, flags);
+
+	if (flag == STOP_CONN_TERM || flag == STOP_CONN_RECOVER) {
+		/*
+		 * flush xmit queues.
+		 */
+		spin_lock_bh(&session->lock);
+		while (__kfifo_get(conn->xmitqueue, (void*)&ctask,
+				    sizeof(void*))) {
+			spin_unlock_bh(&session->lock);
+			local_bh_disable();
+			iscsi_iser_ctask_cleanup(conn, ctask);
+			local_bh_enable();
+			spin_lock_bh(&session->lock);
+		}
+		conn->ctask = NULL;
+
+		while (__kfifo_get(conn->immqueue, (void*)&mtask,
+				   sizeof(void*)) ||
+		       __kfifo_get(conn->mgmtqueue, (void*)&mtask,
+				   sizeof(void*))) {
+			__kfifo_put(session->mgmtpool.queue,(void*)&mtask,
+				    sizeof(void*));
+		}
+		conn->mtask = NULL;
+		spin_unlock_bh(&session->lock);
+
+		/*
+		 * release conn only after we stopped data_xmit()
+		 * activity and flushed all outstandings
+		 */
+
+		/* starts conn teardown process, waits until all previously   *
+		 * posted buffers get flushed, deallocates all conn resources */
+		iser_conn_terminate(conn->ib_conn);
+
+		sock_release(conn->sock);
+		conn->sock = NULL;
+	}
+	mutex_unlock(&conn->xmitmutex);
+}
+
+
+static int
+iscsi_iser_conn_send_generic(iscsi_connh_t connh, struct iscsi_hdr *hdr,
+			     char *data, uint32_t data_size)
+{
+	struct iscsi_iser_conn *conn = iscsi_ptr(connh);
+	struct iscsi_iser_session *session = conn->session;
+	struct iscsi_iser_mgmt_task *mtask = NULL;
+	struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr;
+
+	spin_lock_bh(&session->lock);
+	if (session->state == ISCSI_STATE_TERMINATE) {
+		spin_unlock_bh(&session->lock);
+		return -EPERM;
+	}
+	if (hdr->opcode == (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) ||
+	    hdr->opcode == (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE)) {
+		/*
+		 * Login and Text are sent serially, in
+		 * request-followed-by-response sequence.
+		 * Same mtask can be used. Same ITT must be used.
+		 * Note that login_mtask is preallocated at cnx_create().
+		 */
+		mtask = conn->login_mtask;
+	} else {
+		BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
+		BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
+		if (!__kfifo_get(session->mgmtpool.queue,
+				 (void*)&mtask, sizeof(void*))) {
+			spin_unlock_bh(&session->lock);
+			return -ENOSPC;
+		}
+	}
+
+	/*
+	 * pre-format CmdSN and ExpStatSN for outgoing PDU.
+	 */
+	if (hdr->itt != cpu_to_be32(ISCSI_RESERVED_TAG)) {
+		hdr->itt = mtask->itt | (conn->id << CID_SHIFT) |
+			   (session->age << AGE_SHIFT);
+		nop->cmdsn = cpu_to_be32(session->cmdsn);
+		if (conn->c_stage == ISCSI_CONN_STARTED &&
+		    !(hdr->opcode & ISCSI_OP_IMMEDIATE))
+			session->cmdsn++;
+	} else
+		/* do not advance CmdSN */
+		nop->cmdsn = cpu_to_be32(session->cmdsn);
+
+	nop->exp_statsn = cpu_to_be32(conn->exp_statsn);
+
+	memcpy(mtask->hdr, hdr, sizeof(struct iscsi_hdr));
+
+	spin_unlock_bh(&session->lock);
+
+	if (data_size) {
+		memcpy(mtask->data, data, data_size);
+		mtask->data_count = data_size;
+	} else
+		mtask->data_count = 0;
+
+	debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n",
+		   hdr->opcode, hdr->itt, data_size);
+
+	/*
+	 * since send_pdu() could be called at least from two contexts,
+	 * we need to serialize __kfifo_put, so we don't have to take
+	 * additional lock on fast data-path
+	 */
+	if (hdr->opcode & ISCSI_OP_IMMEDIATE)
+		__kfifo_put(conn->immqueue, (void*)&mtask, sizeof(void*));
+	else
+		__kfifo_put(conn->mgmtqueue, (void*)&mtask, sizeof(void*));
+
+	schedule_work(&conn->xmitwork);
+
+	return 0;
+}
+
+static int
+iscsi_iser_eh_host_reset(struct scsi_cmnd *sc)
+{
+	struct iscsi_iser_cmd_task *ctask = (struct iscsi_iser_cmd_task *)sc->SCp.ptr;
+	struct iscsi_iser_conn *conn = ctask->conn;
+	struct iscsi_iser_session *session = conn->session;
+
+	debug_iser("%s: host_busy %d host_failed %d\n",
+		   __FUNCTION__,
+		   session->host->host_busy, session->host->host_failed);
+	spin_lock_bh(&session->lock);
+	if (session->state == ISCSI_STATE_TERMINATE) {
+		debug_scsi("failing host reset: session terminated "
+			   "[CID %d age %d]", conn->id, session->age);
+		spin_unlock_bh(&session->lock);
+		return FAILED;
+	}
+	spin_unlock_bh(&session->lock);
+
+	debug_scsi("failing connection CID %d due to SCSI host reset "
+		   "[itt 0x%x age %d]", conn->id, ctask->itt,
+		   session->age);
+	iscsi_iser_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+
+	return SUCCESS;
+}
+
+static void
+iscsi_iser_tmabort_timedout(unsigned long data)
+{
+	struct iscsi_iser_cmd_task *ctask = (struct iscsi_iser_cmd_task *)data;
+	struct iscsi_iser_conn *conn = ctask->conn;
+	struct iscsi_iser_session *session = conn->session;
+
+	spin_lock(&session->lock);
+	if (conn->tmabort_state == TMABORT_INITIAL) {
+		__kfifo_put(session->mgmtpool.queue,
+				(void*)&ctask->mtask, sizeof(void*));
+		conn->tmabort_state = TMABORT_TIMEDOUT;
+		debug_scsi("tmabort timedout [sc %lx itt 0x%x]\n",
+			(long)ctask->sc, ctask->itt);
+		/* unblock eh_abort() */
+		wake_up(&conn->ehwait);
+	}
+	spin_unlock(&session->lock);
+}
+
+static int
+iscsi_iser_eh_abort(struct scsi_cmnd *sc)
+{
+	int rc;
+	struct iscsi_iser_cmd_task *ctask;
+	struct iscsi_iser_conn *conn;
+	struct iscsi_iser_session *session;
+
+	ctask = (struct iscsi_iser_cmd_task *)sc->SCp.ptr;
+	conn = ctask->conn;
+	session = conn->session;
+
+	debug_iser("%s: host_busy %d host_failed %d\n",
+		   __FUNCTION__,
+		   session->host->host_busy, session->host->host_failed);
+
+	debug_scsi("aborting [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
+
+	/*
+	 * two cases for ERL=0 here:
+	 *
+	 * 1) connection-level failure;
+	 * 2) recovery due protocol error;
+	 */
+	mutex_lock(&conn->xmitmutex);
+	spin_lock_bh(&session->lock);
+	debug_iser("%s: session->state = %d\n", __FUNCTION__, session->state);
+	if (session->state != ISCSI_STATE_LOGGED_IN) {
+		if (session->state == ISCSI_STATE_TERMINATE) {
+			spin_unlock_bh(&session->lock);
+			mutex_unlock(&conn->xmitmutex);
+			debug_scsi("abort failed becuase session->state == ISCSI_STATE_TERMINATE\n");
+			goto failed;
+		}
+		spin_unlock_bh(&session->lock);
+	} else {
+		struct iscsi_tm *hdr = &conn->tmhdr;
+
+		/*
+		 * Still LOGGED_IN...
+		 */
+
+		if (!ctask->sc || sc->SCp.phase != session->age) {
+			/*
+			 * 1) ctask completed before time out. But session
+			 *    is still ok => Happy Retry.
+			 * 2) session was re-open during time out of ctask.
+			 */
+			spin_unlock_bh(&session->lock);
+			mutex_unlock(&conn->xmitmutex);
+			goto success;
+		}
+		conn->tmabort_state = TMABORT_INITIAL;
+		spin_unlock_bh(&session->lock);
+
+		/*
+		 * ctask timed out but session is OK
+		 * ERL=0 requires task mgmt abort to be issued on each
+		 * failed command. requests must be serialized.
+		 */
+		memset(hdr, 0, sizeof(struct iscsi_tm));
+		hdr->opcode = ISCSI_OP_SCSI_TMFUNC | ISCSI_OP_IMMEDIATE;
+		hdr->flags = ISCSI_TM_FUNC_ABORT_TASK;
+		hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+		memcpy(hdr->lun, ctask->hdr->lun, sizeof(hdr->lun));
+		hdr->rtt = ctask->hdr->itt;
+		hdr->refcmdsn = ctask->hdr->cmdsn;
+
+		iser_err("op 0x%x aborting rtt 0x%x itt 0x%x dlength %d]\n",
+		         hdr->opcode, hdr->rtt, hdr->itt, ntoh24(hdr->dlength));
+
+		rc = iscsi_iser_conn_send_generic(iscsi_handle(conn), (struct iscsi_hdr *)hdr,
+						  NULL, 0);
+
+		if (rc) {
+			iscsi_iser_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+			debug_scsi("abort sent failure [itt 0x%x]", ctask->itt);
+		} else {
+			/*
+			 * TMF abort vs. TMF response race logic
+			 */
+			spin_lock_bh(&session->lock);
+			ctask->mtask = (struct iscsi_iser_mgmt_task *)
+				session->mgmt_cmds[(hdr->itt & ITT_MASK) -
+							ISCSI_MGMT_ITT_OFFSET];
+			if (conn->tmabort_state == TMABORT_INITIAL) {
+				conn->tmabort_timer.expires = 3*HZ + jiffies;
+				conn->tmabort_timer.function =
+						iscsi_iser_tmabort_timedout;
+				conn->tmabort_timer.data = (unsigned long)ctask;
+				add_timer(&conn->tmabort_timer);
+				debug_scsi("abort sent [itt 0x%x]\n", ctask->itt);
+			} else {
+				if (!ctask->sc ||
+				    conn->tmabort_state == TMABORT_SUCCESS) {
+					conn->tmabort_state = TMABORT_INITIAL;
+					spin_unlock_bh(&session->lock);
+					mutex_unlock(&conn->xmitmutex);
+					goto success;
+				}
+				conn->tmabort_state = TMABORT_INITIAL;
+				iscsi_iser_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+			}
+			spin_unlock_bh(&session->lock);
+		}
+	}
+	mutex_unlock(&conn->xmitmutex);
+
+	/*
+	 * block eh thread until:
+	 *
+	 * 1) abort response;
+	 * 2) abort timeout;
+	 * 3) session re-opened;
+	 * 4) session terminated;
+	 */
+	for (;;) {
+		int p_state = session->state;
+
+		rc = wait_event_interruptible(conn->ehwait,
+			(p_state == ISCSI_STATE_LOGGED_IN ?
+			 (session->state == ISCSI_STATE_TERMINATE ||
+			  conn->tmabort_state != TMABORT_INITIAL) :
+			 (session->state == ISCSI_STATE_TERMINATE ||
+			  session->state == ISCSI_STATE_LOGGED_IN)));
+		if (rc) {
+			/* shutdown.. */
+			session->state = ISCSI_STATE_TERMINATE;
+			debug_scsi("abort failed: session->state = %d\n",
+				   session->state);
+			goto failed;
+		}
+
+		if (signal_pending(current))
+			flush_signals(current);
+
+
+		if (session->state == ISCSI_STATE_TERMINATE){
+			debug_scsi("abort failed because session->state == ISCSI_STATE_TERMINATE (2)\n");
+			goto failed;
+		}
+
+		spin_lock_bh(&session->lock);
+		if (sc->SCp.phase == session->age &&
+		   (conn->tmabort_state == TMABORT_TIMEDOUT ||
+		    conn->tmabort_state == TMABORT_FAILED)) {
+			conn->tmabort_state = TMABORT_INITIAL;
+			if (!ctask->sc) {
+				/*
+				 * ctask completed before tmf abort response or
+				 * time out.
+				 * But session is still ok => Happy Retry.
+				 */
+				spin_unlock_bh(&session->lock);
+				break;
+			}
+			spin_unlock_bh(&session->lock);
+			iscsi_iser_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+			continue;
+		}
+		spin_unlock_bh(&session->lock);
+		break;
+	}
+
+success:
+	debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
+	rc = SUCCESS;
+	goto exit;
+
+failed:
+	debug_scsi("abort failed [sc %lx itt 0x%x]\n", (long)sc, ctask->itt);
+	rc = FAILED;
+
+exit:
+	del_timer_sync(&conn->tmabort_timer);
+
+	mutex_lock(&conn->xmitmutex);
+	if (conn->sock) {
+		struct sock *sk = conn->sock->sk;
+
+		write_lock_bh(&sk->sk_callback_lock);
+		iscsi_iser_ctask_cleanup(conn, ctask);
+		write_unlock_bh(&sk->sk_callback_lock);
+	}
+	mutex_unlock(&conn->xmitmutex);
+	return rc;
+}
+
+
+static struct scsi_host_template iscsi_iser_sht = {
+	.name                   = "iSCSI Initiator over iSER, v."
+				  ISCSI_VERSION_STR,
+	.queuecommand           = iscsi_iser_queuecommand,
+	.can_queue              = ISCSI_ISER_XMIT_CMDS_MAX - 1,
+	.sg_tablesize           = ISCSI_ISER_SG_TABLESIZE,
+	.cmd_per_lun            = ISCSI_ISER_CMD_PER_LUN,
+	.eh_abort_handler       = iscsi_iser_eh_abort,
+	.eh_host_reset_handler	= iscsi_iser_eh_host_reset,
+	.use_clustering         = DISABLE_CLUSTERING,
+	.proc_name              = "iscsi_iser",
+	.this_id                = -1,
+};
+
+static iscsi_sessionh_t
+iscsi_iser_session_create(uint32_t initial_cmdsn,
+			  struct Scsi_Host *host)
+{
+	struct iscsi_iser_session *session = NULL;
+	int cmd_i, mgmt_i, j;
+
+	session = iscsi_hostdata(host->hostdata);
+	memset(session, 0, sizeof(struct iscsi_iser_session));
+
+	session->host = host;
+	session->id = host->host_no;
+	session->mgmtpool_max = ISCSI_ISER_MGMT_CMDS_MAX;
+	session->cmds_max = ISCSI_ISER_XMIT_CMDS_MAX;
+	session->cmdsn = initial_cmdsn;
+	session->exp_cmdsn = initial_cmdsn + 1;
+	session->max_cmdsn = initial_cmdsn + 1;
+
+	if (iscsi_iser_pool_init(&session->cmdpool, session->cmds_max,
+				 (void***)&session->cmds,
+				 sizeof(struct iscsi_iser_cmd_task)))
+		goto cmdpool_alloc_fail;
+
+	/* pre-format cmds pool with ITT */
+	for (cmd_i = 0; cmd_i < session->cmds_max; cmd_i++) {
+		session->cmds[cmd_i]->itt = cmd_i;
+
+		session->cmds[cmd_i]->hdr  = (struct iscsi_cmd *)
+			&session->cmds[cmd_i]->desc.iscsi_header;
+	}
+
+	spin_lock_init(&session->lock);
+	INIT_LIST_HEAD(&session->connections);
+
+	/* initialize immediate command pool */
+	if (iscsi_iser_pool_init(&session->mgmtpool, session->mgmtpool_max,
+				 (void***)&session->mgmt_cmds,
+				 sizeof(struct iscsi_iser_mgmt_task)))
+		goto mgmtpool_alloc_fail;
+
+	/* pre-format immediate cmds pool with ITT */
+	for (mgmt_i = 0; mgmt_i < session->mgmtpool_max; mgmt_i++) {
+		session->mgmt_cmds[mgmt_i]->itt = ISCSI_MGMT_ITT_OFFSET + mgmt_i;
+
+		session->mgmt_cmds[mgmt_i]->hdr  =
+			&session->mgmt_cmds[mgmt_i]->desc.iscsi_header;
+
+		session->mgmt_cmds[mgmt_i]->desc.data =
+			kmalloc(DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH,
+				GFP_KERNEL);
+
+		if (!session->mgmt_cmds[mgmt_i]->desc.data)
+			goto immdata_alloc_fail;
+
+		session->mgmt_cmds[mgmt_i]->data =
+			session->mgmt_cmds[mgmt_i]->desc.data;
+	}
+
+	return iscsi_handle(session);
+
+immdata_alloc_fail:
+	for (j = 0; j < mgmt_i; j++)
+		kfree(session->mgmt_cmds[j]->desc.data);
+	iscsi_iser_pool_free(&session->mgmtpool, (void**)session->mgmt_cmds);
+mgmtpool_alloc_fail:
+	iscsi_iser_pool_free(&session->cmdpool, (void**)session->cmds);
+cmdpool_alloc_fail:
+	return iscsi_handle(NULL);
+}
+
+static void
+iscsi_iser_session_destroy(iscsi_sessionh_t sessionh)
+{
+	int mgmt_i;
+	struct iscsi_iser_session *session = iscsi_ptr(sessionh);
+
+	for (mgmt_i = 0; mgmt_i < session->mgmtpool_max; mgmt_i++)
+		kfree(session->mgmt_cmds[mgmt_i]->desc.data);
+
+	iscsi_iser_pool_free(&session->mgmtpool, (void**)session->mgmt_cmds);
+	iscsi_iser_pool_free(&session->cmdpool, (void**)session->cmds);
+}
+
+static int
+iscsi_iser_conn_set_param(iscsi_connh_t connh,
+			  enum iscsi_param param,
+			  uint32_t value)
+{
+	struct iscsi_iser_conn *conn = iscsi_ptr(connh);
+	struct iscsi_iser_session *session = conn->session;
+
+	spin_lock_bh(&session->lock);
+	if (conn->c_stage != ISCSI_CONN_INITIAL_STAGE &&
+	    conn->stop_stage != STOP_CONN_RECOVER) {
+		printk(KERN_ERR "iscsi_iser: can not change parameter [%d]\n",
+		       param);
+		spin_unlock_bh(&session->lock);
+		return 0;
+	}
+	spin_unlock_bh(&session->lock);
+
+	switch (param) {
+	case ISCSI_PARAM_MAX_RECV_DLENGTH:
+		/* TBD */
+		break;
+	case ISCSI_PARAM_MAX_XMIT_DLENGTH:
+		conn->max_xmit_dlength =  value;
+		break;
+	case ISCSI_PARAM_HDRDGST_EN:
+		if (value) {
+			printk(KERN_ERR "DataDigest wasn't negotiated to None");
+			return -EPROTO;
+		}
+		break;
+	case ISCSI_PARAM_DATADGST_EN:
+		if (value) {
+			printk(KERN_ERR "DataDigest wasn't negotiated to None");
+			return -EPROTO;
+		}
+		break;
+	case ISCSI_PARAM_INITIAL_R2T_EN:
+		session->initial_r2t_en = value;
+		break;
+	case ISCSI_PARAM_IMM_DATA_EN:
+		session->imm_data_en = value;
+		break;
+	case ISCSI_PARAM_FIRST_BURST:
+		session->first_burst = value;
+		break;
+	case ISCSI_PARAM_MAX_BURST:
+		session->max_burst = value;
+		break;
+	case ISCSI_PARAM_PDU_INORDER_EN:
+		session->pdu_inorder_en = value;
+		break;
+	case ISCSI_PARAM_DATASEQ_INORDER_EN:
+		session->dataseq_inorder_en = value;
+		break;
+	case ISCSI_PARAM_ERL:
+		session->erl = value;
+		break;
+	case ISCSI_PARAM_IFMARKER_EN:
+		if (value) {
+			printk(KERN_ERR "IFMarker wasn't negotiated to No");
+			return -EPROTO;
+		}
+		break;
+	case ISCSI_PARAM_OFMARKER_EN:
+		if (value) {
+			printk(KERN_ERR "OFMarker wasn't negotiated to No");
+			return -EPROTO;
+		}
+		break;
+	case ISCSI_PARAM_RDMAEXTENSIONS:
+		if (!value) {
+			printk(KERN_ERR "RDMAExtensions wasn't negotiated to Yes");
+			return -EPROTO;
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int
+iscsi_iser_conn_get_param(iscsi_connh_t connh,
+			  enum iscsi_param param,
+			  uint32_t *value)
+{
+	struct iscsi_iser_conn *conn = iscsi_ptr(connh);
+	struct iscsi_iser_session *session = conn->session;
+
+	switch (param) {
+	case ISCSI_PARAM_MAX_XMIT_DLENGTH:
+		*value = conn->max_xmit_dlength;
+		break;
+	case ISCSI_PARAM_HDRDGST_EN:
+		*value = 0;
+		break;
+	case ISCSI_PARAM_DATADGST_EN:
+		*value = 0;
+		break;
+	case ISCSI_PARAM_INITIAL_R2T_EN:
+		*value = session->initial_r2t_en;
+		break;
+	case ISCSI_PARAM_MAX_R2T:
+		*value = session->max_r2t;
+		break;
+	case ISCSI_PARAM_IMM_DATA_EN:
+		*value = session->imm_data_en;
+		break;
+	case ISCSI_PARAM_FIRST_BURST:
+		*value = session->first_burst;
+		break;
+	case ISCSI_PARAM_MAX_BURST:
+		*value = session->max_burst;
+		break;
+	case ISCSI_PARAM_PDU_INORDER_EN:
+		*value = session->pdu_inorder_en;
+		break;
+	case ISCSI_PARAM_DATASEQ_INORDER_EN:
+		*value = session->dataseq_inorder_en;
+		break;
+	case ISCSI_PARAM_ERL:
+		*value = session->erl;
+		break;
+	case ISCSI_PARAM_IFMARKER_EN:
+		*value = 0;
+		break;
+	case ISCSI_PARAM_OFMARKER_EN:
+		*value = 0;
+		break;
+	case ISCSI_PARAM_RDMAEXTENSIONS:
+		*value = 1;
+		break;
+	/*case ISCSI_PARAM_TARGET_RECV_DLENGTH:
+		*value = conn->target_recv_dlength;
+		break;
+	case ISCSI_PARAM_INITIATOR_RECV_DLENGTH:
+		*value = conn->initiator_recv_dlength;
+		break;*/
+	default:
+		return ISCSI_ERR_PARAM_NOT_FOUND;
+	}
+
+	return 0;
+}
+
+static int
+iscsi_iser_conn_send_pdu(iscsi_connh_t connh, struct iscsi_hdr *hdr, char *data,
+			  uint32_t data_size)
+{
+	struct iscsi_iser_conn *conn = iscsi_ptr(connh);
+	int rc;
+
+	mutex_lock(&conn->xmitmutex);
+	rc = iscsi_iser_conn_send_generic(connh, hdr, data, data_size);
+	mutex_unlock(&conn->xmitmutex);
+
+	return rc;
+}
+
+static struct iscsi_transport iscsi_iser_transport = {
+	.owner                  = THIS_MODULE,
+	.name                   = "iser",
+	.caps                   = CAP_RECOVERY_L0 | CAP_MULTI_R2T,
+	.af                     = AF_ISER,
+	.rdma                   = 1,
+	.host_template          = &iscsi_iser_sht,
+	.hostdata_size          = sizeof(struct iscsi_iser_session),
+	.max_lun                = ISCSI_ISER_MAX_LUN,
+	.max_cmd_len            = ISCSI_ISER_MAX_CMD_LEN,
+	.create_session         = iscsi_iser_session_create,
+	.destroy_session        = iscsi_iser_session_destroy,
+	.create_conn            = iscsi_iser_conn_create,
+	.bind_conn              = iscsi_iser_conn_bind,
+	.destroy_conn           = iscsi_iser_conn_destroy,
+	.set_param              = iscsi_iser_conn_set_param,
+	.get_param              = iscsi_iser_conn_get_param,
+	.start_conn             = iscsi_iser_conn_start,
+	.stop_conn              = iscsi_iser_conn_stop,
+	.send_pdu               = iscsi_iser_conn_send_pdu,
+};
+
+static int __init iser_init(void)
+{
+	int err;
+
+	iser_dbg("Starting iSER datamover...\n");
+
+	if (iscsi_max_lun < 1) {
+		printk(KERN_ERR "Invalid max_lun value of %u\n", iscsi_max_lun);
+		return -EINVAL;
+	}
+
+	iscsi_iser_transport.max_lun = iscsi_max_lun;
+
+	memset(&ig, 0, sizeof(struct iser_global));
+
+	ig.desc_cache = kmem_cache_create("iser_descriptors",
+					  sizeof (struct iser_desc),
+					  0, SLAB_HWCACHE_ALIGN,
+					  NULL, NULL);
+	if (ig.desc_cache == NULL)
+		return -ENOMEM;
+
+	/* adaptor init is called only after the first addr resolution */
+	mutex_init(&ig.adaptor_list_mutex);
+	INIT_LIST_HEAD(&ig.adaptor_list);
+
+	err = iser_register_sockets();
+	if (err) {
+		iser_err("iser socket init failed!\n");
+		goto register_socket_failure;
+	}
+
+	err = iscsi_register_transport(&iscsi_iser_transport);
+	if (err) {
+		iser_err("iscsi_register_transport failed\n");
+		goto register_transport_failure;
+	}
+
+	return 0;
+
+register_transport_failure:
+	iser_unreg_sockets();
+register_socket_failure:
+	kmem_cache_destroy(ig.desc_cache);
+
+	return err;
+}
+
+static void __exit iser_exit(void)
+{
+	iser_dbg("Removing iSER datamover...\n");
+	iscsi_unregister_transport(&iscsi_iser_transport);
+	kmem_cache_destroy(ig.desc_cache);
+	iser_unreg_sockets();
+}
+
+module_init(iser_init);
+module_exit(iser_exit);




More information about the general mailing list