[openib-general] [PATCH] for review -- fix MAD completion handling

Sean Hefty mshefty at ichips.intel.com
Tue Nov 2 11:19:06 PST 2004


On Thu, 28 Oct 2004 23:30:00 -0700
Sean Hefty <mshefty at ichips.intel.com> wrote:

> Here's what I have to handle MAD completion handling.  This patch
> tries to fix the issue of matching a completion (successful or error)
> with the corresponding work request.  Some notes:

Please use this patch instead.  I merged with the latest changes (as of
this morning) and tested with opensm running on a remote node and ipoib
running locally.

This change is for the openib-candidate branch, but going forward, my
intention is to create patches for the roland-merge branch.

- Sean


Index: access/mad.c
===================================================================
--- access/mad.c	(revision 1116)
+++ access/mad.c	(working copy)
@@ -81,9 +81,8 @@
 static int add_mad_reg_req(struct ib_mad_reg_req *mad_reg_req,
 			   struct ib_mad_agent_private *priv);
 static void remove_mad_reg_req(struct ib_mad_agent_private *priv); 
-static int ib_mad_post_receive_mad(struct ib_mad_port_private
*port_priv,
-				   struct ib_qp *qp);
-static int ib_mad_post_receive_mads(struct ib_mad_port_private *priv);
+static int ib_mad_post_receive_mad(struct ib_mad_qp_info *qp_info);
+static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info);
 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
 static void ib_mad_complete_send_wr(struct ib_mad_send_wr_private
*mad_send_wr,
 				    struct ib_mad_send_wc *mad_send_wc);
@@ -130,6 +129,19 @@
 		0 : mgmt_class;
 }
 
+static int get_spl_qp_index(enum ib_qp_type qp_type)
+{
+	switch (qp_type)
+	{
+	case IB_QPT_SMI:
+		return 0;
+	case IB_QPT_GSI:
+		return 1;
+	default:
+		return -1;
+	}
+}
+
 /*
  * ib_register_mad_agent - Register to send/receive MADs
  */
@@ -148,12 +160,13 @@
 	struct ib_mad_reg_req *reg_req = NULL;
 	struct ib_mad_mgmt_class_table *class;
 	struct ib_mad_mgmt_method_table *method;
-	int ret2;
+	int ret2, qpn;
 	unsigned long flags;
 	u8 mgmt_class;
 
 	/* Validate parameters */
-	if (qp_type != IB_QPT_GSI && qp_type != IB_QPT_SMI) {
+	qpn = get_spl_qp_index(qp_type);
+	if (qpn == -1) {
 		ret = ERR_PTR(-EINVAL);
 		goto error1;
 	}
@@ -225,14 +238,14 @@
  
 	/* Now, fill in the various structures */
 	memset(mad_agent_priv, 0, sizeof *mad_agent_priv);
-	mad_agent_priv->port_priv = port_priv;
+	mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
 	mad_agent_priv->reg_req = reg_req;
 	mad_agent_priv->rmpp_version = rmpp_version;
 	mad_agent_priv->agent.device = device;
 	mad_agent_priv->agent.recv_handler = recv_handler;
 	mad_agent_priv->agent.send_handler = send_handler;
 	mad_agent_priv->agent.context = context;
-	mad_agent_priv->agent.qp = port_priv->qp[qp_type];
+	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
 	mad_agent_priv->agent.port_num = port_num;
 
 	spin_lock_irqsave(&port_priv->reg_lock, flags);
@@ -256,6 +269,7 @@
 			}
 		}
 	}
+
 	ret2 = add_mad_reg_req(mad_reg_req, mad_agent_priv);
 	if (ret2) {
 		ret = ERR_PTR(ret2);	
@@ -272,7 +286,6 @@
 	INIT_WORK(&mad_agent_priv->work, timeout_sends, mad_agent_priv);
 	atomic_set(&mad_agent_priv->refcount, 1);
 	init_waitqueue_head(&mad_agent_priv->wait);
-	mad_agent_priv->port_priv = port_priv;
 
 	return &mad_agent_priv->agent;
 
@@ -292,6 +305,7 @@
 int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
 {
 	struct ib_mad_agent_private *mad_agent_priv;
+	struct ib_mad_port_private *port_priv;
 	unsigned long flags;
 
 	mad_agent_priv = container_of(mad_agent, struct
ib_mad_agent_private,
@@ -305,13 +319,14 @@
 	 */
 	cancel_mads(mad_agent_priv);
 
+	port_priv = mad_agent_priv->qp_info->port_priv;
 	cancel_delayed_work(&mad_agent_priv->work);
-	flush_workqueue(mad_agent_priv->port_priv->wq);
+	flush_workqueue(port_priv->wq);
 
-	spin_lock_irqsave(&mad_agent_priv->port_priv->reg_lock, flags);
+	spin_lock_irqsave(&port_priv->reg_lock, flags);
 	remove_mad_reg_req(mad_agent_priv);
 	list_del(&mad_agent_priv->agent_list);
-	spin_unlock_irqrestore(&mad_agent_priv->port_priv->reg_lock, flags);
+	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
 
 	/* XXX: Cleanup pending RMPP receives for this agent */
 
@@ -326,30 +341,51 @@
 }
 EXPORT_SYMBOL(ib_unregister_mad_agent);
 
+static void queue_mad(struct ib_mad_queue *mad_queue,
+		      struct ib_mad_list_head *mad_list)
+{
+	unsigned long flags;
+
+	mad_list->mad_queue = mad_queue;
+	spin_lock_irqsave(&mad_queue->lock, flags);
+	list_add_tail(&mad_list->list, &mad_queue->list);
+	mad_queue->count++;
+	spin_unlock_irqrestore(&mad_queue->lock, flags);
+}
+
+static void dequeue_mad(struct ib_mad_list_head *mad_list)
+{
+	struct ib_mad_queue *mad_queue;
+	unsigned long flags;
+
+	BUG_ON(!mad_list->mad_queue);
+	mad_queue = mad_list->mad_queue;
+	spin_lock_irqsave(&mad_queue->lock, flags);
+	list_del(&mad_list->list);
+	mad_queue->count--;
+	spin_unlock_irqrestore(&mad_queue->lock, flags);
+}
+
 static int ib_send_mad(struct ib_mad_agent_private *mad_agent_priv,
 		       struct ib_mad_send_wr_private *mad_send_wr,
 		       struct ib_send_wr *send_wr,
 		       struct ib_send_wr **bad_send_wr)
 {
-	struct ib_mad_port_private *port_priv;
-	unsigned long flags;
+	struct ib_mad_qp_info *qp_info;
 	int ret;
 
-	port_priv = mad_agent_priv->port_priv;
-
 	/* Replace user's WR ID with our own to find WR upon completion
*/
+	qp_info = mad_agent_priv->qp_info;
 	mad_send_wr->wr_id = send_wr->wr_id;
-	send_wr->wr_id = (unsigned long)mad_send_wr;
+	send_wr->wr_id = (unsigned long)&mad_send_wr->mad_list;
+	queue_mad(&qp_info->send_queue, &mad_send_wr->mad_list);
 
-	spin_lock_irqsave(&port_priv->send_list_lock, flags);
 	ret = ib_post_send(mad_agent_priv->agent.qp, send_wr,
bad_send_wr);
-	if (!ret) {
-		list_add_tail(&mad_send_wr->send_list,
-			      &port_priv->send_posted_mad_list);
-		port_priv->send_posted_mad_count++;
-	} else 
+	if (ret) {
 		printk(KERN_NOTICE PFX "ib_post_send failed ret = %d\n", ret);
-	spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+		dequeue_mad(&mad_send_wr->mad_list);
+		*bad_send_wr = send_wr;
+	}
 	return ret;
 }
 
@@ -364,7 +400,6 @@
 	int ret;
 	struct ib_send_wr	*cur_send_wr, *next_send_wr;
 	struct ib_mad_agent_private	*mad_agent_priv;
-	struct ib_mad_port_private	*port_priv;
 
 	/* Validate supplied parameters */
 	if (!bad_send_wr)
@@ -379,7 +414,6 @@
 
 	mad_agent_priv = container_of(mad_agent, struct
ib_mad_agent_private,
 				      agent);
-	port_priv = mad_agent_priv->port_priv;
 
 	/* Walk list of send WRs and post each on send list */
 	cur_send_wr = send_wr;
@@ -421,6 +455,7 @@
 				  cur_send_wr, bad_send_wr);
 		if (ret) {
 			/* Handle QP overrun separately... -ENOMEM */
+			/* Handle posting when QP is in error state... */
 
 			/* Fail send request */
 			spin_lock_irqsave(&mad_agent_priv->lock, flags);
@@ -587,7 +622,7 @@
 	if (!mad_reg_req)
 		return 0;
 
-	private = priv->port_priv;
+	private = priv->qp_info->port_priv;
 	mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
 	class = &private->version[mad_reg_req->mgmt_class_version];
 	if (!*class) {
@@ -663,7 +698,7 @@
 		goto out;
 	}
 
-	port_priv = agent_priv->port_priv;
+	port_priv = agent_priv->qp_info->port_priv;
 	class =
port_priv->version[agent_priv->reg_req->mgmt_class_version];
 	if (!class) {
 		printk(KERN_ERR PFX "No class table yet MAD registration "
@@ -695,20 +730,6 @@
 	return;
 }
 
-static int convert_qpnum(u32 qp_num)
-{
-	/* 
-	 * XXX: No redirection currently
-	 * QP0 and QP1 only
-	 * Ultimately, will need table of QP numbers and table index
-	 * as QP numbers will not be packed once redirection supported
-	 */
-	if (qp_num > 1) {
-		return -1;
-	}
-	return qp_num;
-}
-
 static int response_mad(struct ib_mad *mad)
 {
 	/* Trap represses are responses although response bit is reset
*/
@@ -913,55 +934,21 @@
 static void ib_mad_recv_done_handler(struct ib_mad_port_private
*port_priv,
 				     struct ib_wc *wc)
 {
+	struct ib_mad_qp_info *qp_info;
 	struct ib_mad_private_header *mad_priv_hdr;
-	struct ib_mad_recv_buf *rbuf;
 	struct ib_mad_private *recv;
-	union ib_mad_recv_wrid wrid;
-	unsigned long flags;
-	u32 qp_num;
+	struct ib_mad_list_head *mad_list;
 	struct ib_mad_agent_private *mad_agent = NULL;
-	int solicited, qpn;
-
-	/* For receive, QP number is field in the WC WRID */
-	wrid.wrid = wc->wr_id;
-	qp_num = wrid.wrid_field.qpn;
-	qpn = convert_qpnum(qp_num);
-	if (qpn == -1) {
-		ib_mad_post_receive_mad(port_priv, port_priv->qp[qp_num]);
-		printk(KERN_ERR PFX "Packet received on unknown QPN %d\n",
-		       qp_num);
-		return;
-	}
-	
-	/* 
-	 * Completion corresponds to first entry on 
-	 * posted MAD receive list based on WRID in completion
-	 */
-	spin_lock_irqsave(&port_priv->recv_list_lock, flags);
-	if (!list_empty(&port_priv->recv_posted_mad_list[qpn])) {
-		rbuf = list_entry(port_priv->recv_posted_mad_list[qpn].next,
-				 struct ib_mad_recv_buf,
-				 list);
-		mad_priv_hdr = container_of(rbuf, struct ib_mad_private_header,
-					    recv_buf);
-		recv = container_of(mad_priv_hdr, struct ib_mad_private,
-				    header);
-	
-		/* Remove from posted receive MAD list */
-		list_del(&recv->header.recv_buf.list);
-		port_priv->recv_posted_mad_count[qpn]--;
-
-	} else {
-		spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
-		ib_mad_post_receive_mad(port_priv, port_priv->qp[qp_num]);
-		printk(KERN_ERR PFX "Receive completion WR ID 0x%Lx on QP %d "
-		       "with no posted receive\n",
-		       (unsigned long long) wc->wr_id,
-		       qp_num);
-		return;
-	}
-	spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+	int solicited;
+	unsigned long flags;
 
+	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+	qp_info = mad_list->mad_queue->qp_info;
+	dequeue_mad(mad_list);
+
+	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
+				    mad_list);
+	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
 	pci_unmap_single(port_priv->device->dma_device,
 			 pci_unmap_addr(&recv->header, mapping),
 			 sizeof(struct ib_mad_private) -
@@ -976,7 +963,7 @@
 	recv->header.recv_buf.grh = &recv->grh;
 
 	/* Validate MAD */
-	if (!validate_mad(recv->header.recv_buf.mad, qp_num))
+	if (!validate_mad(recv->header.recv_buf.mad, qp_info->qp->qp_num))
 		goto out;
 
 	/* Snoop MAD ? */
@@ -1009,7 +996,7 @@
 	}
 
 	/* Post another receive request for this QP */
-	ib_mad_post_receive_mad(port_priv, port_priv->qp[qp_num]);
+	ib_mad_post_receive_mad(qp_info);
 }
 
 static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
@@ -1030,7 +1017,8 @@
 			delay = mad_send_wr->timeout - jiffies;
 			if ((long)delay <= 0)
 				delay = 1;
-			queue_delayed_work(mad_agent_priv->port_priv->wq,
+			queue_delayed_work(mad_agent_priv->qp_info->
+					   port_priv->wq,
 					   &mad_agent_priv->work, delay);
 		}
 	}
@@ -1060,7 +1048,7 @@
 	/* Reschedule a work item if we have a shorter timeout */
 	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
{
 		cancel_delayed_work(&mad_agent_priv->work);
-		queue_delayed_work(mad_agent_priv->port_priv->wq,
+		queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
 				   &mad_agent_priv->work, delay);
 	}
 }
@@ -1114,39 +1102,15 @@
 				     struct ib_wc *wc)
 {
 	struct ib_mad_send_wr_private	*mad_send_wr;
-	unsigned long			flags;
-
-	/* Completion corresponds to first entry on posted MAD send list */
-	spin_lock_irqsave(&port_priv->send_list_lock, flags);
-	if (list_empty(&port_priv->send_posted_mad_list)) {
-		printk(KERN_ERR PFX "Send completion WR ID 0x%Lx but send "
-		       "list is empty\n", (unsigned long long) wc->wr_id);
-		goto error;
-	}
-
-	mad_send_wr = list_entry(port_priv->send_posted_mad_list.next,
-				 struct ib_mad_send_wr_private,
-				 send_list);
-	if (wc->wr_id != (unsigned long)mad_send_wr) {
-		printk(KERN_ERR PFX "Send completion WR ID 0x%Lx doesn't match "
-		       "posted send WR ID 0x%lx\n",
-		       (unsigned long long) wc->wr_id,
-		       (unsigned long)mad_send_wr);
-		goto error;
-	}
-
-	/* Remove from posted send MAD list */
-	list_del(&mad_send_wr->send_list);
-	port_priv->send_posted_mad_count--;
-	spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+	struct ib_mad_list_head		*mad_list;
 
+	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
+	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
+				   mad_list);
+	dequeue_mad(mad_list);
 	/* Restore client wr_id in WC */
 	wc->wr_id = mad_send_wr->wr_id;
 	ib_mad_complete_send_wr(mad_send_wr, (struct
ib_mad_send_wc*)wc);
-	return;
-
-error:
-	spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
 }
 
 /*
@@ -1156,28 +1120,33 @@
 {
 	struct ib_mad_port_private *port_priv;
 	struct ib_wc wc;
+	struct ib_mad_list_head *mad_list;
+	struct ib_mad_qp_info *qp_info;
 
 	port_priv = (struct ib_mad_port_private*)data;
 	ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
 	
 	while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
 		if (wc.status != IB_WC_SUCCESS) {
-			printk(KERN_ERR PFX "Completion error %d WRID 0x%Lx\n",
-                                       wc.status, (unsigned long long)
wc.wr_id);
+			/* Determine if failure was a send or receive. */
+			mad_list = (struct ib_mad_list_head *)
+				   (unsigned long)wc.wr_id;
+			qp_info = mad_list->mad_queue->qp_info;
+			if (mad_list->mad_queue == &qp_info->send_queue)
+				wc.opcode = IB_WC_SEND;
+			else
+				wc.opcode = IB_WC_RECV;
+		}
+		switch (wc.opcode) {
+		case IB_WC_SEND:
 			ib_mad_send_done_handler(port_priv, &wc);
-		} else {
-			switch (wc.opcode) {
-			case IB_WC_SEND:
-				ib_mad_send_done_handler(port_priv, &wc);
-				break;
-			case IB_WC_RECV:
-				ib_mad_recv_done_handler(port_priv, &wc);
-				break;
-			default:
-				printk(KERN_ERR PFX "Wrong Opcode 0x%x on completion\n",
-				       wc.opcode);
-				break;
-			}
+			break;
+		case IB_WC_RECV:
+			ib_mad_recv_done_handler(port_priv, &wc);
+			break;
+		default:
+			BUG_ON(1);
+			break;
 		}
 	}
 }
@@ -1307,7 +1276,8 @@
 			delay = mad_send_wr->timeout - jiffies;
 			if ((long)delay <= 0)
 				delay = 1;
-			queue_delayed_work(mad_agent_priv->port_priv->wq,
+			queue_delayed_work(mad_agent_priv->qp_info->
+					   port_priv->wq,
 					   &mad_agent_priv->work, delay);
 			break;
 		}
@@ -1332,24 +1302,13 @@
 	queue_work(port_priv->wq, &port_priv->work);
 }
 
-static int ib_mad_post_receive_mad(struct ib_mad_port_private
*port_priv,
-				   struct ib_qp *qp)
+static int ib_mad_post_receive_mad(struct ib_mad_qp_info *qp_info)
 {
 	struct ib_mad_private *mad_priv;
 	struct ib_sge sg_list;
 	struct ib_recv_wr recv_wr;
 	struct ib_recv_wr *bad_recv_wr;
-	unsigned long flags;
 	int ret;
-	union ib_mad_recv_wrid wrid;
-	int qpn;
-
-
-	qpn = convert_qpnum(qp->qp_num);
-	if (qpn == -1) {
-		printk(KERN_ERR PFX "Post receive to invalid QPN %d\n",
qp->qp_num);
-		return -EINVAL;
-	}
 
 	/* 
 	 * Allocate memory for receive buffer.
@@ -1367,47 +1326,32 @@
 	}
 
 	/* Setup scatter list */
-	sg_list.addr = pci_map_single(port_priv->device->dma_device,
+	sg_list.addr = pci_map_single(qp_info->port_priv->device->dma_device,
 				      &mad_priv->grh,
 				      sizeof *mad_priv -
 					sizeof mad_priv->header,
 				      PCI_DMA_FROMDEVICE);
 	sg_list.length = sizeof *mad_priv - sizeof mad_priv->header;
-	sg_list.lkey = (*port_priv->mr).lkey;
+	sg_list.lkey = (*qp_info->port_priv->mr).lkey;
 
 	/* Setup receive WR */
 	recv_wr.next = NULL;
 	recv_wr.sg_list = &sg_list;
 	recv_wr.num_sge = 1;
 	recv_wr.recv_flags = IB_RECV_SIGNALED;
-	wrid.wrid_field.index = port_priv->recv_wr_index[qpn]++;
-	wrid.wrid_field.qpn = qp->qp_num;
-	recv_wr.wr_id = wrid.wrid;
-
-	/* Link receive WR into posted receive MAD list */
-	spin_lock_irqsave(&port_priv->recv_list_lock, flags);
-	list_add_tail(&mad_priv->header.recv_buf.list,
-		      &port_priv->recv_posted_mad_list[qpn]);
-	port_priv->recv_posted_mad_count[qpn]++;
-	spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
-
+	recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
 	pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
 
-	/* Now, post receive WR */
-	ret = ib_post_recv(qp, &recv_wr, &bad_recv_wr);
+	/* Post receive WR. */
+	queue_mad(&qp_info->recv_queue, &mad_priv->header.mad_list);
+	ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
 	if (ret) {
-
-		pci_unmap_single(port_priv->device->dma_device,
+		dequeue_mad(&mad_priv->header.mad_list);
+		pci_unmap_single(qp_info->port_priv->device->dma_device,
 				 pci_unmap_addr(&mad_priv->header, mapping),
 				 sizeof *mad_priv - sizeof mad_priv->header,
 				 PCI_DMA_FROMDEVICE);
 
-		/* Unlink from posted receive MAD list */
-		spin_lock_irqsave(&port_priv->recv_list_lock, flags);
-		list_del(&mad_priv->header.recv_buf.list);
-		port_priv->recv_posted_mad_count[qpn]--;
-		spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
-
 		kmem_cache_free(ib_mad_cache, mad_priv);
 		printk(KERN_NOTICE PFX "ib_post_recv WRID 0x%Lx failed ret =
%d\n",
 		       (unsigned long long) recv_wr.wr_id, ret);
@@ -1420,79 +1364,72 @@
 /*
  * Allocate receive MADs and post receive WRs for them 
  */
-static int ib_mad_post_receive_mads(struct ib_mad_port_private
*port_priv)
+static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info)
 {
-	int i, j;
+	int i, ret;
 
 	for (i = 0; i < IB_MAD_QP_RECV_SIZE; i++) {
-		for (j = 0; j < IB_MAD_QPS_CORE; j++) {
-			if (ib_mad_post_receive_mad(port_priv,
-						    port_priv->qp[j])) {
-				printk(KERN_ERR PFX "receive post %d failed "
-				       "on %s port %d\n", i + 1,
-				       port_priv->device->name,
-				       port_priv->port_num);
-			}
+		ret = ib_mad_post_receive_mad(qp_info);
+		if (ret) {
+			printk(KERN_ERR PFX "receive post %d failed "
+				"on %s port %d\n", i + 1,
+				qp_info->port_priv->device->name,
+				qp_info->port_priv->port_num);
+			break;
 		}
 	}
-
-	return 0;
+	return ret;
 }
 
 /*
  * Return all the posted receive MADs
  */
-static void ib_mad_return_posted_recv_mads(struct ib_mad_port_private
*port_priv)
+static void ib_mad_return_posted_recv_mads(struct ib_mad_qp_info
*qp_info)
 {
-	int i;
 	unsigned long flags;
 	struct ib_mad_private_header *mad_priv_hdr;
-	struct ib_mad_recv_buf *rbuf;
 	struct ib_mad_private *recv;
+	struct ib_mad_list_head *mad_list;
 
-	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-		spin_lock_irqsave(&port_priv->recv_list_lock, flags);
-		while (!list_empty(&port_priv->recv_posted_mad_list[i])) {
+	spin_lock_irqsave(&qp_info->recv_queue.lock, flags);
+	while (!list_empty(&qp_info->recv_queue.list)) {
 
-			rbuf = list_entry(port_priv->recv_posted_mad_list[i].next,
-					  struct ib_mad_recv_buf, list);
-			mad_priv_hdr = container_of(rbuf,
-						    struct ib_mad_private_header,
-						    recv_buf);
-			recv = container_of(mad_priv_hdr,
-					    struct ib_mad_private, header);
+		mad_list = list_entry(qp_info->recv_queue.list.next,
+				      struct ib_mad_list_head, list);
+		mad_priv_hdr = container_of(mad_list,
+					    struct ib_mad_private_header,
+					    mad_list);
+		recv = container_of(mad_priv_hdr, struct ib_mad_private,
+				    header);
 
-			/* Remove for posted receive MAD list */
-			list_del(&recv->header.recv_buf.list);
- 
-			/* Undo PCI mapping */
-			pci_unmap_single(port_priv->device->dma_device,
-					 pci_unmap_addr(&recv->header, mapping),
-					 sizeof(struct ib_mad_private) -
-					 sizeof(struct ib_mad_private_header),
-					 PCI_DMA_FROMDEVICE);
-
-			kmem_cache_free(ib_mad_cache, recv);
-		}
+		/* Remove from posted receive MAD list */
+		list_del(&mad_list->list);
 
-		INIT_LIST_HEAD(&port_priv->recv_posted_mad_list[i]);
-		port_priv->recv_posted_mad_count[i] = 0;
-		spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+		/* Undo PCI mapping */
+		pci_unmap_single(qp_info->port_priv->device->dma_device,
+				 pci_unmap_addr(&recv->header, mapping),
+				 sizeof(struct ib_mad_private) -
+				 sizeof(struct ib_mad_private_header),
+				 PCI_DMA_FROMDEVICE);
+		kmem_cache_free(ib_mad_cache, recv);
 	}
+
+	qp_info->recv_queue.count = 0;
+	spin_unlock_irqrestore(&qp_info->recv_queue.lock, flags);
 }
 
 /*
  * Return all the posted send MADs
  */
-static void ib_mad_return_posted_send_mads(struct ib_mad_port_private
*port_priv)
+static void ib_mad_return_posted_send_mads(struct ib_mad_qp_info
*qp_info)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&port_priv->send_list_lock, flags);
-	/* Just clear port send posted MAD list */
-	INIT_LIST_HEAD(&port_priv->send_posted_mad_list);
-	port_priv->send_posted_mad_count = 0;
-	spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+	/* Just clear port send posted MAD list... revisit!!! */
+	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
+	INIT_LIST_HEAD(&qp_info->send_queue.list);
+	qp_info->send_queue.count = 0;
+	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
 }
 
 /*
@@ -1618,35 +1555,21 @@
 	int ret, i, ret2;
 
 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-		ret = ib_mad_change_qp_state_to_init(port_priv->qp[i]);
+		ret = ib_mad_change_qp_state_to_init(port_priv->qp_info[i].qp);
 		if (ret) {
 			printk(KERN_ERR PFX "Couldn't change QP%d state to "
 			       "INIT\n", i);
-			return ret;
+			goto error;
 		}
-	}
-
-	ret = ib_mad_post_receive_mads(port_priv);
-	if (ret) {
-		printk(KERN_ERR PFX "Couldn't post receive requests\n");
-		goto error;
-	}
-
-	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
-	if (ret) {
-		printk(KERN_ERR PFX "Failed to request completion
notification\n");
-		goto error;
-	}
 
-	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-		ret = ib_mad_change_qp_state_to_rtr(port_priv->qp[i]);
+		ret = ib_mad_change_qp_state_to_rtr(port_priv->qp_info[i].qp);
 		if (ret) {
 			printk(KERN_ERR PFX "Couldn't change QP%d state to "
 			       "RTR\n", i);
 			goto error;
 		}
 
-		ret = ib_mad_change_qp_state_to_rts(port_priv->qp[i]);
+		ret = ib_mad_change_qp_state_to_rts(port_priv->qp_info[i].qp);
 		if (ret) {
 			printk(KERN_ERR PFX "Couldn't change QP%d state to "
 			       "RTS\n", i);
@@ -1654,17 +1577,31 @@
 		}
 	}
 
+	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
+	if (ret) {
+		printk(KERN_ERR PFX "Failed to request completion
notification\n");
+		goto error;
+	}
+
+	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
+		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i]);
+		if (ret) {
+			printk(KERN_ERR PFX "Couldn't post receive requests\n");
+			goto error;
+		}
+	}
 	return 0;
+
 error:
-	ib_mad_return_posted_recv_mads(port_priv);
 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-		ret2 = ib_mad_change_qp_state_to_reset(port_priv->qp[i]);
+		ib_mad_return_posted_recv_mads(&port_priv->qp_info[i]);
+		ret2 = ib_mad_change_qp_state_to_reset(port_priv->
+						       qp_info[i].qp);
 		if (ret2) {
 			printk(KERN_ERR PFX "ib_mad_port_start: Couldn't "
 			       "change QP%d state to RESET\n", i);
 		}
 	}
-
 	return ret;
 }
 
@@ -1676,16 +1613,64 @@
 	int i, ret;
 
 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-		ret = ib_mad_change_qp_state_to_reset(port_priv->qp[i]);
+		ret = ib_mad_change_qp_state_to_reset(port_priv->qp_info[i].qp);
 		if (ret) {
 			printk(KERN_ERR PFX "ib_mad_port_stop: Couldn't change "
 			       "%s port %d QP%d state to RESET\n",
 			       port_priv->device->name, port_priv->port_num, i);
 		}
+		ib_mad_return_posted_recv_mads(&port_priv->qp_info[i]);
+		ib_mad_return_posted_send_mads(&port_priv->qp_info[i]);
 	}
+}
 
-	ib_mad_return_posted_recv_mads(port_priv);
-	ib_mad_return_posted_send_mads(port_priv);
+static void init_mad_queue(struct ib_mad_qp_info *qp_info,
+			   struct ib_mad_queue *mad_queue)
+{
+	mad_queue->qp_info = qp_info;
+	mad_queue->count = 0;
+	spin_lock_init(&mad_queue->lock);
+	INIT_LIST_HEAD(&mad_queue->list);
+}
+
+static int create_mad_qp(struct ib_mad_port_private *port_priv,
+			 struct ib_mad_qp_info *qp_info,
+			 enum ib_qp_type qp_type)
+{
+	struct ib_qp_init_attr	qp_init_attr;
+	int ret;
+
+	qp_info->port_priv = port_priv;
+	init_mad_queue(qp_info, &qp_info->send_queue);
+	init_mad_queue(qp_info, &qp_info->recv_queue);
+
+	memset(&qp_init_attr, 0, sizeof qp_init_attr);
+	qp_init_attr.send_cq = port_priv->cq;
+	qp_init_attr.recv_cq = port_priv->cq;
+	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+	qp_init_attr.rq_sig_type = IB_SIGNAL_ALL_WR;
+	qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
+	qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
+	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
+	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
+	qp_init_attr.qp_type = qp_type;
+	qp_init_attr.port_num = port_priv->port_num;
+	qp_info->qp = ib_create_qp(port_priv->pd, &qp_init_attr);
+	if (IS_ERR(qp_info->qp)) {
+		printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
+		       get_spl_qp_index(qp_type));
+		ret = PTR_ERR(qp_info->qp);
+		goto error;		
+	}
+	return 0;
+
+error:
+	return ret;
+}
+
+static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
+{
+	ib_destroy_qp(qp_info->qp);
 }
 
 /*
@@ -1694,7 +1679,7 @@
  */
 static int ib_mad_port_open(struct ib_device *device, int port_num)
 {
-	int ret, cq_size, i;
+	int ret, cq_size;
 	u64 iova = 0;
 	struct ib_phys_buf buf_list = {
 		.addr = 0,
@@ -1749,38 +1734,15 @@
 		goto error5;
 	}
 
-	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-		struct ib_qp_init_attr	qp_init_attr;
-
-		memset(&qp_init_attr, 0, sizeof qp_init_attr);
-		qp_init_attr.send_cq = port_priv->cq;
-		qp_init_attr.recv_cq = port_priv->cq;
-		qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-		qp_init_attr.rq_sig_type = IB_SIGNAL_ALL_WR;
-		qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
-		qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
-		qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
-		qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
-		qp_init_attr.qp_type = i;	/* Relies on ib_qp_type enum ordering of
IB_QPT_SMI and IB_QPT_GSI */
-		qp_init_attr.port_num = port_priv->port_num;
-		port_priv->qp[i] = ib_create_qp(port_priv->pd, &qp_init_attr);
-		if (IS_ERR(port_priv->qp[i])) {
-			printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n", i);
-			ret = PTR_ERR(port_priv->qp[i]);
-			if (i == 0)
-				goto error6;		
-			else
-				goto error7;
-		}
-	}
+	ret = create_mad_qp(port_priv, &port_priv->qp_info[0], IB_QPT_SMI);
+	if (ret)
+		goto error6;
+	ret = create_mad_qp(port_priv, &port_priv->qp_info[1], IB_QPT_GSI);
+	if (ret)
+		goto error7;
 
 	spin_lock_init(&port_priv->reg_lock);
-	spin_lock_init(&port_priv->recv_list_lock);
-	spin_lock_init(&port_priv->send_list_lock);
 	INIT_LIST_HEAD(&port_priv->agent_list);
-	INIT_LIST_HEAD(&port_priv->send_posted_mad_list);
-	for (i = 0; i < IB_MAD_QPS_CORE; i++)
-		INIT_LIST_HEAD(&port_priv->recv_posted_mad_list[i]);
 
 	port_priv->wq = create_workqueue("ib_mad");
 	if (!port_priv->wq) {
@@ -1798,15 +1760,14 @@
 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
 	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
-
 	return 0;
 
 error9:
 	destroy_workqueue(port_priv->wq);
 error8:
-	ib_destroy_qp(port_priv->qp[1]);
+	destroy_mad_qp(&port_priv->qp_info[1]);
 error7:
-	ib_destroy_qp(port_priv->qp[0]);
+	destroy_mad_qp(&port_priv->qp_info[0]);
 error6:
 	ib_dereg_mr(port_priv->mr);
 error5:
@@ -1842,8 +1803,8 @@
 	ib_mad_port_stop(port_priv);
 	flush_workqueue(port_priv->wq);
 	destroy_workqueue(port_priv->wq);
-	ib_destroy_qp(port_priv->qp[1]);
-	ib_destroy_qp(port_priv->qp[0]);
+	destroy_mad_qp(&port_priv->qp_info[1]);
+	destroy_mad_qp(&port_priv->qp_info[0]);
 	ib_dereg_mr(port_priv->mr);
 	ib_dealloc_pd(port_priv->pd);
 	ib_destroy_cq(port_priv->cq);
Index: access/mad_priv.h
===================================================================
--- access/mad_priv.h	(revision 1116)
+++ access/mad_priv.h	(working copy)
@@ -79,16 +79,13 @@
 #define MAX_MGMT_CLASS		80	
 #define MAX_MGMT_VERSION	8
 
-
-union ib_mad_recv_wrid {
-	u64 wrid;
-	struct {
-		u32 index;
-		u32 qpn;
-	} wrid_field;
+struct ib_mad_list_head {
+	struct list_head list;
+	struct ib_mad_queue *mad_queue;
 };
 
 struct ib_mad_private_header {
+	struct ib_mad_list_head mad_list;
 	struct ib_mad_recv_wc recv_wc;
 	struct ib_mad_recv_buf recv_buf;
 	DECLARE_PCI_UNMAP_ADDR(mapping)
@@ -108,7 +105,7 @@
 	struct list_head agent_list;
 	struct ib_mad_agent agent;
 	struct ib_mad_reg_req *reg_req;
-	struct ib_mad_port_private *port_priv;
+	struct ib_mad_qp_info *qp_info;
 
 	spinlock_t lock;
 	struct list_head send_list;
@@ -122,7 +119,7 @@
 };
 
 struct ib_mad_send_wr_private {
-	struct list_head send_list;
+	struct ib_mad_list_head mad_list;
 	struct list_head agent_list;
 	struct ib_mad_agent *agent;
 	u64 wr_id;			/* client WR ID */
@@ -140,11 +137,25 @@
 	struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_CLASS];
 };
 
+struct ib_mad_queue {
+	spinlock_t lock;
+	struct list_head list;
+	int count;
+	struct ib_mad_qp_info *qp_info;
+};
+
+struct ib_mad_qp_info {
+	struct ib_mad_port_private *port_priv;
+	struct ib_qp *qp;
+	struct ib_mad_queue send_queue;
+	struct ib_mad_queue recv_queue;
+	/* struct ib_mad_queue overflow_queue; */
+};
+
 struct ib_mad_port_private {
 	struct list_head port_list;
 	struct ib_device *device;
 	int port_num;
-	struct ib_qp *qp[IB_MAD_QPS_CORE];
 	struct ib_cq *cq;
 	struct ib_pd *pd;
 	struct ib_mr *mr;
@@ -154,15 +165,7 @@
 	struct list_head agent_list;
 	struct workqueue_struct *wq;
 	struct work_struct work;
-
-	spinlock_t send_list_lock;
-	struct list_head send_posted_mad_list;
-	int send_posted_mad_count;
-
-	spinlock_t recv_list_lock;
-	struct list_head recv_posted_mad_list[IB_MAD_QPS_CORE];
-	int recv_posted_mad_count[IB_MAD_QPS_CORE];
-	u32 recv_wr_index[IB_MAD_QPS_CORE];
+	struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
 };
 
 #endif	/* __IB_MAD_PRIV_H__ */




More information about the general mailing list