[openib-general] Re: [PATCH] [1/2] SQE handling on MAD QPs

Sean Hefty mshefty at ichips.intel.com
Fri Nov 12 09:54:51 PST 2004


On Fri, 12 Nov 2004 12:18:32 -0500
Hal Rosenstock <halr at voltaire.com> wrote:

> On Fri, 2004-11-12 at 12:13, Sean Hefty wrote:
> > Not sure what the issue is.  Let me make sure that I've pulled the latest code and 
> > resubmit the patch.
> 
> It looks right to me. Does it work for you ? Can you send a normal
> rather than unified diff ?

Can you try this version?  I'll also revert back to the original code and see if
I can apply the patch.

- Sean


Index: include/ib_mad.h
===================================================================
--- include/ib_mad.h	(revision 1221)
+++ include/ib_mad.h	(working copy)
@@ -250,6 +250,8 @@
  * @mad_agent - Specifies the associated registration to post the send to.
  * @send_wr - Specifies the information needed to send the MAD(s).
  * @bad_send_wr - Specifies the MAD on which an error was encountered.
+ *
+ * Sent MADs are not guaranteed to complete in the order that they were posted.
  */
 int ib_post_send_mad(struct ib_mad_agent *mad_agent,
 		     struct ib_send_wr *send_wr,
Index: core/mad.c
===================================================================
--- core/mad.c	(revision 1221)
+++ core/mad.c	(working copy)
@@ -90,6 +90,8 @@
 				    struct ib_mad_send_wc *mad_send_wc);
 static void timeout_sends(void *data);
 static int solicited_mad(struct ib_mad *mad);
+static int ib_mad_change_qp_state_to_rts(struct ib_qp *qp,
+					 enum ib_qp_state cur_state);
 
 /*
  * Returns a ib_mad_port_private structure or NULL for a device/port.
@@ -591,6 +593,7 @@
 		/* Timeout will be updated after send completes */
 		mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr.
 							ud.timeout_ms);
+		mad_send_wr->retry = 0;
 		/* One reference for each work request to QP + response */
 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
 		mad_send_wr->status = IB_WC_SUCCESS;
@@ -1339,6 +1342,70 @@
 	}
 }
 
+static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
+{
+	struct ib_mad_send_wr_private *mad_send_wr;
+	struct ib_mad_list_head *mad_list;
+	int flags;
+
+	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
+	list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
+		mad_send_wr = container_of(mad_list,
+					   struct ib_mad_send_wr_private,
+					   mad_list);
+		mad_send_wr->retry = 1;
+	}
+	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
+}
+
+static void mad_error_handler(struct ib_mad_port_private *port_priv,
+			      struct ib_wc *wc)
+{
+	struct ib_mad_list_head *mad_list;
+	struct ib_mad_qp_info *qp_info;
+	struct ib_mad_send_wr_private *mad_send_wr;
+	int ret;
+
+	/* Determine if failure was a send or receive */
+	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+	qp_info = mad_list->mad_queue->qp_info;
+	if (mad_list->mad_queue == &qp_info->recv_queue) {
+		/*
+		* Receive errors indicate that the QP has entered the error 
+		* state - error handling/shutdown code will cleanup.
+		*/
+		return;
+	}
+
+	/*
+	 * Send errors will transition the QP to SQE - move
+	 * QP to RTS and repost flushed work requests.
+	 */
+	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
+				   mad_list);
+	if (wc->status == IB_WC_WR_FLUSH_ERR) {
+		if (mad_send_wr->retry) {
+			/* Repost send. */
+			struct ib_send_wr *bad_send_wr;
+
+			mad_send_wr->retry = 0;
+			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
+					&bad_send_wr);
+			if (ret)
+				ib_mad_send_done_handler(port_priv, wc);
+		} else
+			ib_mad_send_done_handler(port_priv, wc);
+	} else {
+		/* Transition QP to RTS and fail offending send. */
+		ret = ib_mad_change_qp_state_to_rts(qp_info->qp, IB_QPS_SQE);
+		if (ret)
+			printk(KERN_ERR PFX "mad_error_handler - unable to "
+			       "transition QP to RTS : %d\n", ret);
+		ib_mad_send_done_handler(port_priv, wc);
+		mark_sends_for_retry(qp_info);
+	}
+}
+
 /*
  * IB MAD completion callback
  */
@@ -1346,34 +1413,25 @@
 {
 	struct ib_mad_port_private *port_priv;
 	struct ib_wc wc;
-	struct ib_mad_list_head *mad_list;
-	struct ib_mad_qp_info *qp_info;
 
 	port_priv = (struct ib_mad_port_private*)data;
 	ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
 	
 	while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
-		if (wc.status != IB_WC_SUCCESS) {
-			/* Determine if failure was a send or receive */
-			mad_list = (struct ib_mad_list_head *)
-				   (unsigned long)wc.wr_id;
-			qp_info = mad_list->mad_queue->qp_info;
-			if (mad_list->mad_queue == &qp_info->send_queue)
-				wc.opcode = IB_WC_SEND;
-			else
-				wc.opcode = IB_WC_RECV;
-		}
-		switch (wc.opcode) {
-		case IB_WC_SEND:
-			ib_mad_send_done_handler(port_priv, &wc);
-			break;
-		case IB_WC_RECV:
-			ib_mad_recv_done_handler(port_priv, &wc);
-			break;
-		default:
-			BUG_ON(1);
-			break;
-		}
+		if (wc.status == IB_WC_SUCCESS) {
+			switch (wc.opcode) {
+			case IB_WC_SEND:
+				ib_mad_send_done_handler(port_priv, &wc);
+				break;
+			case IB_WC_RECV:
+				ib_mad_recv_done_handler(port_priv, &wc);
+				break;
+			default:
+				BUG_ON(1);
+				break;
+			}
+		} else
+			mad_error_handler(port_priv, &wc);
 	}
 }
 
@@ -1717,7 +1775,8 @@
 /*
  * Modify QP into Ready-To-Send state
  */
-static inline int ib_mad_change_qp_state_to_rts(struct ib_qp *qp)
+static int ib_mad_change_qp_state_to_rts(struct ib_qp *qp,
+					 enum ib_qp_state cur_state)
 {
 	int ret;
 	struct ib_qp_attr *attr;
@@ -1729,11 +1788,12 @@
 		       "ib_qp_attr\n");
 		return -ENOMEM;
 	}
-
 	attr->qp_state = IB_QPS_RTS;
-	attr->sq_psn = IB_MAD_SEND_Q_PSN;
-	attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
-
+	attr_mask = IB_QP_STATE;
+	if (cur_state == IB_QPS_RTR) {
+		attr->sq_psn = IB_MAD_SEND_Q_PSN;
+		attr_mask |= IB_QP_SQ_PSN;
+	}
 	ret = ib_modify_qp(qp, attr, attr_mask);
 	kfree(attr);
 
@@ -1793,7 +1853,8 @@
 			goto error;
 		}
 
-		ret = ib_mad_change_qp_state_to_rts(port_priv->qp_info[i].qp);
+		ret = ib_mad_change_qp_state_to_rts(port_priv->qp_info[i].qp,
+						    IB_QPS_RTR);
 		if (ret) {
 			printk(KERN_ERR PFX "Couldn't change QP%d state to "
 			       "RTS\n", i);
@@ -1852,6 +1913,15 @@
 	}
 }
 
+static void qp_event_handler(struct ib_event *event, void *qp_context)
+{
+	struct ib_mad_qp_info	*qp_info = qp_context;
+
+	/* It's worse than that! He's dead, Jim! */
+	printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
+		event->event, qp_info->qp->qp_num);
+}
+
 static void init_mad_queue(struct ib_mad_qp_info *qp_info,
 			   struct ib_mad_queue *mad_queue)
 {
@@ -1884,6 +1954,8 @@
 	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
 	qp_init_attr.qp_type = qp_type;
 	qp_init_attr.port_num = port_priv->port_num;
+	qp_init_attr.qp_context = qp_info;
+	qp_init_attr.event_handler = qp_event_handler;
 	qp_info->qp = ib_create_qp(port_priv->pd, &qp_init_attr);
 	if (IS_ERR(qp_info->qp)) {
 		printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
Index: core/mad_priv.h
===================================================================
--- core/mad_priv.h	(revision 1221)
+++ core/mad_priv.h	(working copy)
@@ -127,6 +127,7 @@
 	u64 wr_id;			/* client WR ID */
 	u64 tid;
 	unsigned long timeout;
+	int retry;
 	int refcount;
 	enum ib_wc_status status;
 };



More information about the general mailing list