[ofa-general] [PATCH] iser: avoid recv buf exhaustion

David Disseldorp ddiss at sgi.com
Thu Nov 20 22:10:45 PST 2008


iSCSI/iSER targets may send PDUs without a prior request from the initiator,
RFC 5046 refers to these PDUs as "unexpected". NOP-In PDUs with itt=RESERVED
and Asynchronous Message PDUs occupy this category.

The amount of active "unexpected" PDU's an iSER target may have at any time is
governed by the MaxOutstandingUnexpectedPDUs key, which is not yet supported.

Currently when an iSER target sends an "unexpected" PDU, the initiators recv
buffer consumed by the PDU is not replaced. If over initial_post_recv_bufs_num
"unexpected" PDUs are received then the receive queue will run out of receive
work requests.

This patch ensures recv buffers consumed by "unexpected" PDUs are replaced
prior to sending the next control-type PDU.

Signed-off-by: David Disseldorp <ddiss at sgi.com>
Signed-off-by: Ken Sandars <ksandars at sgi.com>
---
 drivers/infiniband/ulp/iser/iscsi_iser.h     |    3 +
 drivers/infiniband/ulp/iser/iser_initiator.c |   76 ++++++++++++++++++++++++--
 drivers/infiniband/ulp/iser/iser_verbs.c     |    1 +
 3 files changed, 74 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 81a8262..8611195 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -252,6 +252,9 @@ struct iser_conn {
 	wait_queue_head_t	     wait;          /* waitq for conn/disconn  */
 	atomic_t                     post_recv_buf_count; /* posted rx count   */
 	atomic_t                     post_send_buf_count; /* posted tx count   */
+	atomic_t                     unexpected_pdu_count;/* count of received *
+							   * unexpected pdus   *
+							   * not yet retired   */
 	char 			     name[ISER_OBJECT_NAME_SIZE];
 	struct iser_page_vec         *page_vec;     /* represents SG to fmr maps*
 						     * maps serialized as tx is*/
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index cdd2831..9f8cffb 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -274,8 +274,10 @@ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn)
 	struct iscsi_iser_conn *iser_conn = conn->dd_data;
 
 	int i;
-	/* no need to keep it in a var, we are after login so if this should
-	 * be negotiated, by now the result should be available here */
+	/*
+	 * FIXME this value should be declared to the target during login with
+	 * the MaxOutstandingUnexpectedPDUs key when supported
+	 */
 	int initial_post_recv_bufs_num = ISER_MAX_RX_MISC_PDUS;
 
 	iser_dbg("Initially post: %d\n", initial_post_recv_bufs_num);
@@ -310,6 +312,33 @@ iser_check_xmit(struct iscsi_conn *conn, void *task)
 	return 0;
 }
 
+static inline int
+iser_post_unexpected_recvs(struct iscsi_conn *conn)
+{
+	struct iscsi_iser_conn *iser_conn = conn->dd_data;
+	int outstanding_unexp_pdus;
+	int err = 0;
+
+	if (atomic_read(&iser_conn->ib_conn->unexpected_pdu_count) == 0)
+		goto out;
+
+	outstanding_unexp_pdus =
+		atomic_xchg(&iser_conn->ib_conn->unexpected_pdu_count, 0);
+
+	while (outstanding_unexp_pdus > 0) {
+		if (iser_post_receive_control(conn) != 0) {
+			iser_err("post_rcv failed\n");
+			err = -ENOMEM;
+			atomic_add(outstanding_unexp_pdus,
+				   &iser_conn->ib_conn->unexpected_pdu_count);
+			goto out;
+		}
+		outstanding_unexp_pdus--;
+	}
+
+out:
+	return err;
+}
 
 /**
  * iser_send_command - send command PDU
@@ -372,6 +401,7 @@ int iser_send_command(struct iscsi_conn *conn,
 	iser_reg_single(iser_conn->ib_conn->device,
 			send_dto->regd[0], DMA_TO_DEVICE);
 
+	/* post recv buffer for SCSI response */
 	if (iser_post_receive_control(conn) != 0) {
 		iser_err("post_recv failed!\n");
 		err = -ENOMEM;
@@ -380,6 +410,12 @@ int iser_send_command(struct iscsi_conn *conn,
 
 	iser_task->status = ISER_TASK_STATUS_STARTED;
 
+	/*
+	 * post recv bufs for those consumed by unexpected pdus from target
+	 * errors are ignored, as retry occurs on next send
+	 */
+	iser_post_unexpected_recvs(conn);
+
 	err = iser_post_send(&iser_task->desc);
 	if (!err)
 		return 0;
@@ -478,6 +514,7 @@ int iser_send_control(struct iscsi_conn *conn,
 	int err = 0;
 	struct iser_regd_buf *regd_buf;
 	struct iser_device *device;
+	unsigned char opcode;
 
 	if (!iser_conn_state_comp(iser_conn->ib_conn, ISER_CONN_UP)) {
 		iser_err("Failed to send, conn: 0x%p is not up\n", iser_conn->ib_conn);
@@ -512,12 +549,24 @@ int iser_send_control(struct iscsi_conn *conn,
 				       data_seg_len);
 	}
 
-	if (iser_post_receive_control(conn) != 0) {
-		iser_err("post_rcv_buff failed!\n");
-		err = -ENOMEM;
-		goto send_control_error;
+	opcode = task->hdr->opcode & ISCSI_OPCODE_MASK;
+
+	/* post recv buffer for response if one is expected */
+	if (!((opcode == ISCSI_OP_NOOP_OUT)
+	 && (task->hdr->itt == RESERVED_ITT))) {
+		if (iser_post_receive_control(conn) != 0) {
+			iser_err("post_rcv_buff failed!\n");
+			err = -ENOMEM;
+			goto send_control_error;
+		}
 	}
 
+	/*
+	 * post recv bufs for those consumed by unexpected pdus from target
+	 * errors are ignored, as retry occurs on next send
+	 */
+	iser_post_unexpected_recvs(conn);
+
 	err = iser_post_send(mdesc);
 	if (!err)
 		return 0;
@@ -586,6 +635,21 @@ void iser_rcv_completion(struct iser_desc *rx_desc,
 	 * parallel to the execution of iser_conn_term. So the code that waits *
 	 * for the posted rx bufs refcount to become zero handles everything   */
 	atomic_dec(&conn->ib_conn->post_recv_buf_count);
+
+	/*
+	 * if an unexpected PDU was received then the recv wr consumed must
+	 * be replaced, this is done in the next send of a control-type PDU
+	 */
+	if ((opcode == ISCSI_OP_NOOP_IN)
+	 && (hdr->itt == RESERVED_ITT)) {
+		/* nop-in with itt = 0xffffffff */
+		atomic_inc(&conn->ib_conn->unexpected_pdu_count);
+	}
+	else if (opcode == ISCSI_OP_ASYNC_EVENT) {
+		/* asyncronous message */
+		atomic_inc(&conn->ib_conn->unexpected_pdu_count);
+	}
+	/* a reject PDU consumes the recv buf posted for the response */
 }
 
 void iser_snd_completion(struct iser_desc *tx_desc)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 26ff621..6dc6b17 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -498,6 +498,7 @@ void iser_conn_init(struct iser_conn *ib_conn)
 	init_waitqueue_head(&ib_conn->wait);
 	atomic_set(&ib_conn->post_recv_buf_count, 0);
 	atomic_set(&ib_conn->post_send_buf_count, 0);
+	atomic_set(&ib_conn->unexpected_pdu_count, 0);
 	atomic_set(&ib_conn->refcount, 1);
 	INIT_LIST_HEAD(&ib_conn->conn_list);
 	spin_lock_init(&ib_conn->lock);
-- 
1.5.4.5




More information about the general mailing list