[openib-general] [PATCH] separates QP0/1 interactions

Sean Hefty mshefty at ichips.intel.com
Thu Sep 23 17:17:45 PDT 2004


The following patch separates the interactions between QP 0 and 1 in the MAD code.  Each QP now has its own queuing, locking, completion handling, error handling, etc.

I have a list of several of changes for the MAD code that I will try to get to tomorrow.  Please let me know if you have any questions.

- Sean

-- Index: access/ib_mad_priv.h
===================================================================
--- access/ib_mad_priv.h	(revision 880)
+++ access/ib_mad_priv.h	(working copy)
@@ -95,6 +95,7 @@
 
 struct ib_mad_agent_private {
 	struct list_head agent_list;
+	struct ib_mad_qp_info *qp_info;
 	struct ib_mad_agent agent;
 	struct ib_mad_reg_req *reg_req;
 	u8 rmpp_version;
@@ -115,17 +116,27 @@
 	struct ib_mad_mgmt_method_table *method_table[MAX_MGMT_CLASS];
 };
 
-struct ib_mad_thread_private {
+struct ib_mad_qp_info {
+	struct ib_mad_port_private *port_priv;
+	struct ib_qp *qp;
+	struct ib_cq *cq;
+
+	spinlock_t	 send_list_lock;
+	struct list_head send_posted_mad_list;
+	int		 send_posted_mad_count;
+
+	spinlock_t	 recv_list_lock;
+	struct list_head recv_posted_mad_list;
+	int		 recv_posted_mad_count;
+
+	struct task_struct	*mad_thread;
 	wait_queue_head_t	wait;
 };
 
 struct ib_mad_port_private {
 	struct list_head port_list;
-	struct task_struct *mad_thread;
 	struct ib_device *device;
 	int port_num;
-	struct ib_qp *qp[IB_MAD_QPS_SUPPORTED];
-	struct ib_cq *cq;
 	struct ib_pd *pd;
 	struct ib_mr *mr;
 
@@ -133,15 +144,7 @@
 	struct ib_mad_mgmt_class_table *version[MAX_MGMT_VERSION];
 	struct list_head agent_list;
 
-	spinlock_t send_list_lock;
-	struct list_head send_posted_mad_list;
-	int send_posted_mad_count;
-
-	spinlock_t recv_list_lock;
-	struct list_head recv_posted_mad_list[IB_MAD_QPS_SUPPORTED];
-	int recv_posted_mad_count[IB_MAD_QPS_SUPPORTED];
-
-	struct ib_mad_thread_private mad_thread_private;
+	struct ib_mad_qp_info qp_info[IB_MAD_QPS_CORE];
 };
 
 #endif	/* __IB_MAD_PRIV_H__ */
Index: access/ib_mad.c
===================================================================
--- access/ib_mad.c	(revision 880)
+++ access/ib_mad.c	(working copy)
@@ -81,12 +81,10 @@
 static int add_mad_reg_req(struct ib_mad_reg_req *mad_reg_req,
 			   struct ib_mad_agent_private *priv);
 static void remove_mad_reg_req(struct ib_mad_agent_private *priv); 
-static int ib_mad_port_restart(struct ib_mad_port_private *priv);
-static int ib_mad_post_receive_mad(struct ib_mad_port_private *port_priv,
-				   struct ib_qp *qp);
-static int ib_mad_post_receive_mads(struct ib_mad_port_private *priv);
+static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info);
+static int ib_mad_post_receive_mad(struct ib_mad_qp_info *qp_info);
 static inline u8 convert_mgmt_class(u8 mgmt_class);
-
+static int ib_mad_restart_qp(struct ib_mad_qp_info *qp_info);
 
 /*
  * ib_register_mad_agent - Register to send/receive MADs
@@ -205,11 +203,12 @@
 	memset(mad_agent_priv, 0, sizeof *mad_agent_priv);
 	mad_agent_priv->reg_req = reg_req;
 	mad_agent_priv->rmpp_version = rmpp_version;
+	mad_agent_priv->qp_info = &port_priv->qp_info[qp_type];
 	mad_agent_priv->agent.device = device;
 	mad_agent_priv->agent.recv_handler = recv_handler;
 	mad_agent_priv->agent.send_handler = send_handler;
 	mad_agent_priv->agent.context = context;
-	mad_agent_priv->agent.qp = port_priv->qp[qp_type];
+	mad_agent_priv->agent.qp = port_priv->qp_info[qp_type].qp;
 	mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
 
 	ret2 = add_mad_reg_req(mad_reg_req, mad_agent_priv);
@@ -287,6 +286,7 @@
 	struct ib_send_wr	*cur_send_wr, *next_send_wr;
 	struct ib_send_wr	wr;
 	struct ib_send_wr	*bad_wr;
+	struct ib_mad_agent_private *mad_agent_priv;
 	struct ib_mad_send_wr_private *mad_send_wr;
 	unsigned long flags;
 
@@ -297,6 +297,9 @@
 		return -EINVAL;
 	}
 
+	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private, 
+				      agent);
+
 	/* Walk list of send WRs and post each on send list */
 	cur_send_wr = send_wr;
 	while (cur_send_wr) {
@@ -330,19 +333,22 @@
 		wr.send_flags = IB_SEND_SIGNALED; /* cur_send_wr->send_flags ? */
 
 		/* Link send WR into posted send MAD list */
-		spin_lock_irqsave(&((struct ib_mad_port_private *)mad_agent->device->mad)->send_list_lock, flags);
+		spin_lock_irqsave(&mad_agent_priv->qp_info->send_list_lock, flags);
 		list_add_tail(&mad_send_wr->send_list,
-			      &((struct ib_mad_port_private *)mad_agent->device->mad)->send_posted_mad_list);
-		((struct ib_mad_port_private *)mad_agent->device->mad)->send_posted_mad_count++;
-		spin_unlock_irqrestore(&((struct ib_mad_port_private *)mad_agent->device->mad)->send_list_lock, flags);
+			      &mad_agent_priv->qp_info->send_posted_mad_list);
+		mad_agent_priv->qp_info->send_posted_mad_count++;
+		spin_unlock_irqrestore(&mad_agent_priv->qp_info->send_list_lock,
+				       flags);
 
 		ret = ib_post_send(mad_agent->qp, &wr, &bad_wr);
 		if (ret) {
 			/* Unlink from posted send MAD list */
-			spin_unlock_irqrestore(&((struct ib_mad_port_private *)mad_agent->device->mad)->send_list_lock, flags);
+			spin_unlock_irqrestore(
+				&mad_agent_priv->qp_info->send_list_lock, flags);
 			list_del(&mad_send_wr->send_list);
-			((struct ib_mad_port_private *)mad_agent->device->mad)->send_posted_mad_count--;
-			spin_unlock_irqrestore(&((struct ib_mad_port_private *)mad_agent->device->mad)->send_list_lock, flags);
+			mad_agent_priv->qp_info->send_posted_mad_count--;
+			spin_unlock_irqrestore(
+				&mad_agent_priv->qp_info->send_list_lock, flags);
 			*bad_send_wr = cur_send_wr;
 			printk(KERN_NOTICE "ib_post_mad_send failed\n");
 			return ret;		
@@ -361,19 +367,32 @@
 void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
 {
 	struct ib_mad_recv_buf *entry;
-	struct ib_mad_private *buffer = (struct ib_mad_private *)mad_recv_wc;
+	struct ib_mad_private_header *mad_private_header;
+	struct ib_mad_private *mad_private;
 
 	/*
 	 * Walk receive buffer list associated with this WC
 	 * No need to remove them from list of receive buffers
 	 */
+	mad_private_header = container_of(mad_recv_wc,
+					  struct ib_mad_private_header,
+					  recv_wc);
+	mad_private = container_of(mad_private_header,
+				   struct ib_mad_private,
+				   header);
+	
 	list_for_each_entry(entry, &mad_recv_wc->recv_buf->list, list) {
 		/* Free previous receive buffer */
-		kmem_cache_free(ib_mad_cache, buffer);
-		buffer = (void *)entry - sizeof(struct ib_mad_private_header);
+		kmem_cache_free(ib_mad_cache, mad_private);
+		mad_private_header = container_of(mad_recv_wc,
+					  	  struct ib_mad_private_header,
+					  	  recv_wc);
+		mad_private = container_of(mad_private_header,
+				   	   struct ib_mad_private,
+				   	   header);
 	}
 	/* Free last buffer */
-	kmem_cache_free(ib_mad_cache, buffer);
+	kmem_cache_free(ib_mad_cache, mad_private);
 }
 EXPORT_SYMBOL(ib_free_recv_mad);
 
@@ -567,20 +586,6 @@
 	}
 }
 
-static int convert_qpnum(u32 qp_num)
-{
-	/* 
-	 * No redirection currently!!!
-	 * QP0 and QP1 only
-	 * Ultimately, will need table of QP numbers and table index
-	 * as QP numbers will not be packed once redirection supported
-	 */
-	if (qp_num > 1) {
-		printk(KERN_ERR "QP number %d invalid\n", qp_num);
-	}
-	return qp_num;
-}
-
 static int response_mad(struct ib_mad *mad)
 {
 	/* Trap represses are responses although response bit is reset */
@@ -622,7 +627,7 @@
 	/* Whether MAD was solicited determines type of routing to MAD client */
 	if (solicited) {
 		/* Routing is based on high 32 bits of transaction ID of MAD  */
-		hi_tid = mad->mad_hdr.tid >> 32;
+		hi_tid = (u32)(mad->mad_hdr.tid >> 32);
 		list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
 			if (entry->agent.hi_tid == hi_tid) {
 				mad_agent = entry;
@@ -631,7 +636,7 @@
 		}
 		if (!mad_agent) {
 			printk(KERN_ERR "No client 0x%x for received MAD\n",
-			       (u32)(mad->mad_hdr.tid >> 32));
+			       hi_tid);
 			goto ret;
 		}
 	} else {
@@ -643,12 +648,14 @@
 		}
 		version = port_priv->version[mad->mad_hdr.class_version];
 		if (!version) {
-			printk(KERN_ERR "MAD received for class version %d with no client\n", mad->mad_hdr.class_version);
+			printk(KERN_ERR "MAD received for class version %d with no client\n",
+			       mad->mad_hdr.class_version);
 			goto ret;
 		}
 		class = version->method_table[convert_mgmt_class(mad->mad_hdr.mgmt_class)];	
 		if (!class) {
-			printk(KERN_ERR "MAD receive for class %d with no client\n", mad->mad_hdr.mgmt_class);
+			printk(KERN_ERR "MAD receive for class %d with no client\n",
+			       mad->mad_hdr.mgmt_class);
 			goto ret;
 		}
 		mad_agent = class->agent[mad->mad_hdr.method & ~IB_MGMT_METHOD_RESP];		
@@ -684,48 +691,43 @@
 	return valid;
 }
 
-static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
+static void ib_mad_recv_done_handler(struct ib_mad_qp_info *qp_info,
 				     struct ib_wc *wc)
 {
 	struct ib_mad_private *recv;
 	unsigned long flags;
-	u32 qp_num;
 	struct ib_mad_agent_private *mad_agent;
 	int solicited;
 
-	/* For receive, WC WRID is the QP number */
-	qp_num = wc->wr_id;
-
 	/* 
 	 * Completion corresponds to first entry on 
 	 * posted MAD receive list based on WRID in completion
 	 */
-	spin_lock_irqsave(&port_priv->recv_list_lock, flags);
-	if (!list_empty(&port_priv->recv_posted_mad_list[convert_qpnum(qp_num)])) {
-		recv = list_entry(&port_priv->recv_posted_mad_list[convert_qpnum(qp_num)],
+	spin_lock_irqsave(&qp_info->recv_list_lock, flags);
+	if (!list_empty(&qp_info->recv_posted_mad_list)) {
+		recv = list_entry(&qp_info->recv_posted_mad_list,
 				  struct ib_mad_private,
 				  header.recv_buf.list);
 
 		/* Remove from posted receive MAD list */
 		list_del(&recv->header.recv_buf.list);
-
-		port_priv->recv_posted_mad_count[convert_qpnum(qp_num)]--;
+		qp_info->recv_posted_mad_count--;
 
 	} else {
-		printk(KERN_ERR "Receive completion WR ID 0x%Lx on QP %d with no posted receive\n", wc->wr_id, qp_num); 
-		spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+		printk(KERN_ERR "Receive completion with no posted receive\n");
+		spin_unlock_irqrestore(&qp_info->recv_list_lock, flags);
 		return;
 	}
-	spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+	spin_unlock_irqrestore(&qp_info->recv_list_lock, flags);
 
-	pci_unmap_single(port_priv->device->dma_device,
+	pci_unmap_single(qp_info->port_priv->device->dma_device,
 			 pci_unmap_addr(&recv->header, mapping),
 			 sizeof(struct ib_mad_private) - sizeof(struct ib_mad_private_header),
 			 PCI_DMA_FROMDEVICE);
 
 	/* Setup MAD receive work completion from "normal" work completion */
 	recv->header.recv_wc.wc = wc;
-	recv->header.recv_wc.mad_len = sizeof(struct ib_mad); /* Should this be based on wc->byte_len ? Also, RMPP !!! */
+	recv->header.recv_wc.mad_len = sizeof(struct ib_mad); /* ignore GRH size */
 	recv->header.recv_wc.recv_buf = &recv->header.recv_buf;
 
 	/* Setup MAD receive buffer */
@@ -738,15 +740,15 @@
 	}
 
 	/* Validate MAD */
-	if (!validate_mad(recv->header.recv_buf.mad, qp_num))
-		goto ret;
+	if (!validate_mad(recv->header.recv_buf.mad, qp_info->qp->qp_num))
+		return;
 
 	/* Determine corresponding MAD agent for incoming receive MAD */
-	spin_lock_irqsave(&port_priv->reg_lock, flags);
+	spin_lock_irqsave(&qp_info->port_priv->reg_lock, flags);
 	/* First, determine whether MAD was solicited */
 	solicited = solicited_mad(recv->header.recv_buf.mad);
 	/* Now, find the mad agent */
-	mad_agent = find_mad_agent(port_priv,
+	mad_agent = find_mad_agent(qp_info->port_priv,
 				   recv->header.recv_buf.mad,
 				   solicited);
 	if (!mad_agent) {
@@ -757,49 +759,40 @@
 			printk(KERN_DEBUG "Currently unsupported solicited MAD received\n");
 		}
 
+		/* Release locking before callback... */
 		/* Invoke receive callback */	
 		mad_agent->agent.recv_handler(&mad_agent->agent,
 					      &recv->header.recv_wc);
 	}
-	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
-
-	/* Post another receive request for this QP */
-	ib_mad_post_receive_mad(port_priv, port_priv->qp[qp_num]);
-
-ret:
-	return;
+	spin_unlock_irqrestore(&qp_info->port_priv->reg_lock, flags);
 }
 
-static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
+static void ib_mad_send_done_handler(struct ib_mad_qp_info *qp_info,
 				     struct ib_wc *wc)
 {
 	struct ib_mad_send_wr_private *send_wr;
 	unsigned long flags;
 
 	/* Completion corresponds to first entry on posted MAD send list */
-	spin_lock_irqsave(&port_priv->send_list_lock, flags);
-	if (!list_empty(&port_priv->send_posted_mad_list)) {
-		send_wr = list_entry(&port_priv->send_posted_mad_list,
+	spin_lock_irqsave(&qp_info->send_list_lock, flags);
+	if (!list_empty(&qp_info->send_posted_mad_list)) {
+		send_wr = list_entry(&qp_info->send_posted_mad_list,
 				     struct ib_mad_send_wr_private,
 				     send_list);
 
-		if (send_wr->wr_id != wc->wr_id) {
-			printk(KERN_ERR "Send completion WR ID 0x%Lx doesn't match posted send WR ID 0x%Lx\n", wc->wr_id, send_wr->wr_id);
-			
-			goto error;
-		}
-
 		/* Check whether timeout was requested !!! */
 
 		/* Remove from posted send MAD list */
 		list_del(&send_wr->send_list);
-		port_priv->send_posted_mad_count--;
+		qp_info->send_posted_mad_count--;
 
 	} else {
-		printk(KERN_ERR "Send completion  WR ID 0x%Lx but send list is empty\n", wc->wr_id);
+		printk(KERN_ERR "Send completion WR ID 0x%Lx but send list is empty\n", wc->wr_id);
 		goto error;
 	}
-	spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+	spin_unlock_irqrestore(&qp_info->send_list_lock, flags);
+
+	/* Synchronize with deregistration... */
 
 	/* Restore client wr_id in WC */
 	wc->wr_id = send_wr->wr_id;
@@ -811,20 +804,19 @@
 	return;
 
 error:
-	spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+	spin_unlock_irqrestore(&qp_info->send_list_lock, flags);
 	return;
 }
 
 /*
  * IB MAD completion callback
  */
-static void ib_mad_completion_handler(struct ib_mad_port_private *port_priv)
+static void ib_mad_completion_handler(struct ib_mad_qp_info *qp_info)
 {
 	struct ib_wc wc;
 	int err_status = 0;
 
-	while (!ib_poll_cq(port_priv->cq, 1, &wc)) {
-		printk(KERN_DEBUG "Completion - WR ID = 0x%Lx\n", wc.wr_id);
+	while (!ib_poll_cq(qp_info->cq, 1, &wc)) {
 
 		if (wc.status != IB_WC_SUCCESS) {
 			switch (wc.opcode) {
@@ -846,10 +838,11 @@
 
 		switch (wc.opcode) {
 		case IB_WC_SEND:
-			ib_mad_send_done_handler(port_priv, &wc);
+			ib_mad_send_done_handler(qp_info, &wc);
 			break;
 		case IB_WC_RECV:
-			ib_mad_recv_done_handler(port_priv, &wc);
+			ib_mad_recv_done_handler(qp_info, &wc);
+			ib_mad_post_receive_mad(qp_info);
 			break;
 		default:
 			printk(KERN_ERR "Wrong Opcode: %d\n", wc.opcode);
@@ -861,76 +854,43 @@
 	}
 
 	if (err_status) {
-		ib_mad_port_restart(port_priv);
+		ib_mad_restart_qp(qp_info);
 	} else {
-		ib_mad_post_receive_mads(port_priv);
-		ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
+		ib_req_notify_cq(qp_info->cq, IB_CQ_NEXT_COMP);
 	}
 }
 
 /*
  * IB MAD thread
  */
-static int ib_mad_thread(void *param)
+static int ib_mad_thread_handler(void *param)
 {
-	struct ib_mad_port_private *port_priv = param;
-	struct ib_mad_thread_private *mad_thread_priv = &port_priv->mad_thread_private;
+	struct ib_mad_qp_info *qp_info = param;
 	int ret;
 
 	while (1) {
 		while (!signal_pending(current)) {
-			ret = wait_event_interruptible(mad_thread_priv->wait, 0);
+			ret = wait_event_interruptible(qp_info->wait, 0);
 			if (ret) {
 				printk(KERN_ERR "ib_mad thread exiting\n");
 				return 0;
 			}
 
-			ib_mad_completion_handler(port_priv);
-
+			ib_mad_completion_handler(qp_info);
 		}
 	}
 }
 
-/*
- * Initialize the IB MAD thread
- */
-static int ib_mad_thread_init(struct ib_mad_port_private *port_priv)
-{
-	struct ib_mad_thread_private *mad_thread_priv = &port_priv->mad_thread_private;
-
-	init_waitqueue_head(&mad_thread_priv->wait);
-
-	port_priv->mad_thread = kthread_create(ib_mad_thread,
-					       port_priv,
-					       "ib_mad-%-6s-%-2d",
-					       port_priv->device->name,
-					       port_priv->port_num);
-	if (IS_ERR(port_priv->mad_thread)) {
-		printk(KERN_ERR "Couldn't start mad thread for %s port %d\n",
-		       port_priv->device->name, port_priv->port_num);
-		return 1;
-	}	
-	return 0;
-}
-
-/*
- * Stop the IB MAD thread
- */
-static void ib_mad_thread_stop(struct ib_mad_port_private *port_priv)
-{
-	kthread_stop(port_priv->mad_thread);	/* !!! */
-}
 
 static void ib_mad_thread_completion_handler(struct ib_cq *cq)
 {
-	struct ib_mad_port_private *port_priv = cq->cq_context;
-	struct ib_mad_thread_private *mad_thread_priv = &port_priv->mad_thread_private;
+	struct ib_mad_qp_info *qp_info;
 
-	wake_up_interruptible(&mad_thread_priv->wait);
+	qp_info = (struct ib_mad_qp_info*)cq->cq_context;
+	wake_up_interruptible(&qp_info->wait);
 }
 
-static int ib_mad_post_receive_mad(struct ib_mad_port_private *port_priv,
-				   struct ib_qp *qp)
+static int ib_mad_post_receive_mad(struct ib_mad_qp_info *qp_info)
 {
 	struct ib_mad_private *mad_priv;
 	struct ib_sge sg_list;
@@ -955,43 +915,42 @@
 	}
 
 	/* Setup scatter list */
-	sg_list.addr = pci_map_single(port_priv->device->dma_device,
+	sg_list.addr = pci_map_single(qp_info->port_priv->device->dma_device,
 				      &mad_priv->grh,
 				      sizeof *mad_priv - sizeof mad_priv->header, 
 				      PCI_DMA_FROMDEVICE);
 	sg_list.length = sizeof *mad_priv - sizeof mad_priv->header;
-	sg_list.lkey = (*port_priv->mr).lkey;
+	sg_list.lkey = qp_info->port_priv->mr->lkey;
 
 	/* Setup receive WR */
 	recv_wr.next = NULL;
 	recv_wr.sg_list = &sg_list;
 	recv_wr.num_sge = 1;
 	recv_wr.recv_flags = IB_RECV_SIGNALED;
-	recv_wr.wr_id = qp->qp_num; /* 32 bits left */
 
 	/* Link receive WR into posted receive MAD list */
-	spin_lock_irqsave(&port_priv->recv_list_lock, flags);
+	spin_lock_irqsave(&qp_info->recv_list_lock, flags);
 	list_add_tail(&mad_priv->header.recv_buf.list,
-		      &port_priv->recv_posted_mad_list[convert_qpnum(qp->qp_num)]);
-	port_priv->recv_posted_mad_count[convert_qpnum(qp->qp_num)]++;
-	spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+		      &qp_info->recv_posted_mad_list);
+	qp_info->recv_posted_mad_count++;
+	spin_unlock_irqrestore(&qp_info->recv_list_lock, flags);
 
 	pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
 
 	/* Now, post receive WR */
-	ret = ib_post_recv(qp, &recv_wr, &bad_recv_wr);
+	ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
 	if (ret) {
 
-		pci_unmap_single(port_priv->device->dma_device,
+		pci_unmap_single(qp_info->port_priv->device->dma_device,
 				 pci_unmap_addr(&mad_priv->header, mapping),
 				 sizeof *mad_priv - sizeof mad_priv->header,
 				 PCI_DMA_FROMDEVICE);
 
 		/* Unlink from posted receive MAD list */
-		spin_lock_irqsave(&port_priv->recv_list_lock, flags);
+		spin_lock_irqsave(&qp_info->recv_list_lock, flags);
 		list_del(&mad_priv->header.recv_buf.list);
-		port_priv->recv_posted_mad_count[convert_qpnum(qp->qp_num)]--;
-		spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+		qp_info->recv_posted_mad_count--;
+		spin_unlock_irqrestore(&qp_info->recv_list_lock, flags);
 
 		kmem_cache_free(ib_mad_cache, mad_priv);
 		printk(KERN_NOTICE "ib_post_recv failed ret = %d\n", ret);
@@ -1004,65 +963,61 @@
 /*
  * Allocate receive MADs and post receive WRs for them 
  */
-static int ib_mad_post_receive_mads(struct ib_mad_port_private *port_priv)
+static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info)
 {
-	int i, j;
+	int i, ret = 0;
 
-	for (i = 0; i < IB_MAD_QP_RECV_SIZE; i++) {
-		for (j = 0; j < IB_MAD_QPS_CORE; j++) {
-			if (ib_mad_post_receive_mad(port_priv,
-						    port_priv->qp[j])) {
-				printk(KERN_ERR "receive post %d failed on %s port %d\n",
-				       i + 1, port_priv->device->name,
-				       port_priv->port_num);
-			}
+	for (i = qp_info->recv_posted_mad_count; i < IB_MAD_QP_RECV_SIZE; i++) {
+		ret = ib_mad_post_receive_mad(qp_info);
+		if (ret) {
+			printk(KERN_ERR "receive post %d failed on %s port %d\n",
+				i + 1, qp_info->port_priv->device->name,
+				qp_info->port_priv->port_num);
+			break;
 		}
 	}
 
-	return 0;
+	return ret;
 }
 
 /*
  * Return all the posted receive MADs
  */
-static void ib_mad_return_posted_recv_mads(struct ib_mad_port_private *port_priv)
+static void ib_mad_return_posted_recv_mads(struct ib_mad_qp_info *qp_info)
 {
-	int i;
 	unsigned long flags;
 
-	for (i = 0; i < IB_MAD_QPS_SUPPORTED; i++) {
-		spin_lock_irqsave(&port_priv->recv_list_lock, flags);
-		while (!list_empty(&port_priv->recv_posted_mad_list[i])) {
-
-			/* PCI mapping !!! */
+	spin_lock_irqsave(&qp_info->recv_list_lock, flags);
+	while (!list_empty(&qp_info->recv_posted_mad_list)) {
 
-		}
-		INIT_LIST_HEAD(&port_priv->recv_posted_mad_list[i]);
-		port_priv->recv_posted_mad_count[i] = 0;
-		spin_unlock_irqrestore(&port_priv->recv_list_lock, flags);
+		/* PCI mapping !!! */
+		list_del(&qp_info->recv_posted_mad_list);
 	}
+	INIT_LIST_HEAD(&qp_info->recv_posted_mad_list);
+	qp_info->recv_posted_mad_count = 0;
+	spin_unlock_irqrestore(&qp_info->recv_list_lock, flags);
 }
 
 /*
  * Return all the posted send MADs
  */
-static void ib_mad_return_posted_send_mads(struct ib_mad_port_private *port_priv)
+static void ib_mad_return_posted_send_mads(struct ib_mad_qp_info *qp_info)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&port_priv->send_list_lock, flags);
-	while (!list_empty(&port_priv->send_posted_mad_list)) {
+	spin_lock_irqsave(&qp_info->send_list_lock, flags);
+	while (!list_empty(&qp_info->send_posted_mad_list)) {
 
 		/* PCI mapping ? */
 
-		list_del(&port_priv->send_posted_mad_list);
+		list_del(&qp_info->send_posted_mad_list);
 
 		/* Call completion handler with some status ? */
 
 	}
-	INIT_LIST_HEAD(&port_priv->send_posted_mad_list);
-	port_priv->send_posted_mad_count = 0;
-	spin_unlock_irqrestore(&port_priv->send_list_lock, flags);
+	INIT_LIST_HEAD(&qp_info->send_posted_mad_list);
+	qp_info->send_posted_mad_count = 0;
+	spin_unlock_irqrestore(&qp_info->send_list_lock, flags);
 }
 
 /*
@@ -1087,13 +1042,12 @@
 	 * one is needed for the Reset to Init transition.
 	 */
 	attr->pkey_index = 0;
-	attr->port_num = port_num;
 	/* QKey is 0 for QP0 */
 	if (qp->qp_num == 0)
 		attr->qkey = 0;
 	else
 		attr->qkey = IB_QP1_QKEY;
-	attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY;
+	attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
 
 	ret = ib_modify_qp(qp, attr, attr_mask, &qp_cap);
 	kfree(attr);
@@ -1182,93 +1136,180 @@
 }
 
 /*
- * Start the port
+ * Halt operations on the specified QP.
  */
-static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
+static void ib_mad_stop_qp(struct ib_mad_qp_info *qp_info)
 {
-	int ret, i, ret2;
+	int ret;
 
-	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-		ret = ib_mad_change_qp_state_to_init(port_priv->qp[i],
-						     port_priv->port_num);
-		if (ret) {
-			printk(KERN_ERR "Could not change QP%d state to INIT\n", i);
-			return ret;
-		}
+	ret = ib_mad_change_qp_state_to_reset(qp_info->qp);
+	if (ret) {
+		printk(KERN_ERR "ib_mad_qp_stop: Could not change %s port %d QP%d state to RESET\n",
+		       qp_info->port_priv->device->name,
+		       qp_info->port_priv->port_num, qp_info->qp->qp_num);
+	}
+
+	ib_mad_return_posted_recv_mads(qp_info);
+	ib_mad_return_posted_send_mads(qp_info);
+}
+
+/*
+ * Start operations on the specified QP.
+ */
+static int ib_mad_start_qp(struct ib_mad_qp_info *qp_info)
+{
+	int ret;
+
+	ret = ib_mad_change_qp_state_to_init(qp_info->qp,
+					     qp_info->port_priv->port_num);
+	if (ret) {
+		printk(KERN_ERR "Could not change QP%d state to INIT\n",
+		       qp_info->qp->qp_num);
+		return ret;
 	}
 
-	ret = ib_mad_post_receive_mads(port_priv);
+	ret = ib_mad_post_receive_mads(qp_info);
 	if (ret) {
 		printk(KERN_ERR "Could not post receive requests\n");
 		goto error;
 	}
 
-	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
+	ret = ib_mad_change_qp_state_to_rtr(qp_info->qp);
 	if (ret) {
-		printk(KERN_ERR "Failed to request completion notification\n");
+		printk(KERN_ERR "Could not change QP%d state to RTR\n",
+		       qp_info->qp->qp_num);
 		goto error;
 	}
 
-	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-		ret = ib_mad_change_qp_state_to_rtr(port_priv->qp[i]);
-		if (ret) {
-			printk(KERN_ERR "Could not change QP%d state to RTR\n", i);
-			goto error;
-		}
+	ret = ib_mad_change_qp_state_to_rts(qp_info->qp);
+	if (ret) {
+		printk(KERN_ERR "Could not change QP%d state to RTS\n",
+		       qp_info->qp->qp_num);
+		goto error;
+	}
 
-		ret = ib_mad_change_qp_state_to_rts(port_priv->qp[i]);
-		if (ret) {
-			printk(KERN_ERR "Could not change QP%d state to RTS\n", i);
-			goto error;
-		}
+	/* Don't report receive completions until we're ready to send. */
+	ret = ib_req_notify_cq(qp_info->cq, IB_CQ_NEXT_COMP);
+	if (ret) {
+		printk(KERN_ERR "Failed to request completion notification\n");
+		goto error;
 	}
 
 	return 0;
-error:
-	ib_mad_return_posted_recv_mads(port_priv);
-	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-		ret2 = ib_mad_change_qp_state_to_reset(port_priv->qp[i]);
-		if (ret2) {
-			printk(KERN_ERR "ib_mad_port_start: Could not change QP%d state to RESET\n", i);
-		}
-	}
 
+error:
+	ib_mad_stop_qp(qp_info);
 	return ret;
 }
 
 /*
- * Stop the port
+ * Restart operations on the specified QP.
  */
-static void ib_mad_port_stop(struct ib_mad_port_private *port_priv)
+static int ib_mad_restart_qp(struct ib_mad_qp_info *qp_info)
 {
-	int i, ret;
+	int ret;
 
-	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-		ret = ib_mad_change_qp_state_to_reset(port_priv->qp[i]);
-		if (ret) {
-			printk(KERN_ERR "ib_mad_port_stop: Could not change %s port %d QP%d state to RESET\n",
-			       port_priv->device->name, port_priv->port_num, i);
-		}
-	}
+	/* Need to synchronize this against user's posting MADs... */
+	ib_mad_stop_qp(qp_info);
+	ret = ib_mad_start_qp(qp_info);
+	if (ret) {
+		printk(KERN_ERR "Could not restart %s port %d QP %d\n",
+		       qp_info->port_priv->device->name,
+		       qp_info->port_priv->port_num, qp_info->qp->qp_num);
+	}	
+
+	return ret;
+}
+
+
+static void ib_mad_destroy_qp(struct ib_mad_qp_info *qp_info)
+{
+	/* Stop processing completions. */
+	kthread_stop(qp_info->mad_thread);
+	ib_mad_stop_qp(qp_info);
 
-	ib_mad_return_posted_recv_mads(port_priv);
-	ib_mad_return_posted_send_mads(port_priv);
+	ib_destroy_qp(qp_info->qp);
+	ib_destroy_cq(qp_info->cq);
 }
 
-/*
- * Restart the port
- */
-static int ib_mad_port_restart(struct ib_mad_port_private *port_priv)
+static int ib_mad_init_qp(struct ib_mad_port_private *port_priv,
+			  struct ib_mad_qp_info *qp_info,
+			  enum ib_qp_type qp_type)
 {
-	int ret;
+	int ret, cq_size;
+	struct ib_qp_init_attr qp_init_attr;
+	struct ib_qp_cap qp_cap;
 
-	ib_mad_port_stop(port_priv);
-	ret = ib_mad_port_start(port_priv);
-	if (ret) {
-		printk(KERN_ERR "Could not restart %s port %d\n",
-			port_priv->device->name, port_priv->port_num);
+	qp_info->port_priv = port_priv;
+
+	/* Allocate CQ */
+	cq_size = IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE;
+	qp_info->cq = ib_create_cq(port_priv->device,
+				   (ib_comp_handler)ib_mad_thread_completion_handler,
+				   NULL, qp_info, cq_size);
+	if (IS_ERR(qp_info->cq)) {
+		printk(KERN_ERR "Could not create ib_mad CQ\n");
+		return PTR_ERR(qp_info->cq);
+	}
+
+	/* Allocate QP */
+	memset(&qp_init_attr, 0, sizeof qp_init_attr);
+	qp_init_attr.send_cq = qp_info->cq;
+	qp_init_attr.recv_cq = qp_info->cq;
+	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+	qp_init_attr.rq_sig_type = IB_SIGNAL_ALL_WR;
+	qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
+	qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
+	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
+	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
+	qp_init_attr.qp_type = qp_type;
+	qp_init_attr.port_num = port_priv->port_num;
+
+	qp_info->qp = ib_create_qp(port_priv->pd, &qp_init_attr, &qp_cap);
+	if (IS_ERR(qp_info->qp)) {
+		printk(KERN_ERR "Could not create ib_mad QP%d\n",
+		       qp_info->qp->qp_num);
+		ret = PTR_ERR(qp_info->qp);
+		goto error1;
+	}
+
+	spin_lock_init(&qp_info->send_list_lock);
+	INIT_LIST_HEAD(&qp_info->send_posted_mad_list);
+	qp_info->send_posted_mad_count = 0;
+
+	spin_lock_init(&qp_info->recv_list_lock);
+	INIT_LIST_HEAD(&qp_info->recv_posted_mad_list);
+	qp_info->recv_posted_mad_count = 0;
+
+	/* Startup the completion thread. */
+	init_waitqueue_head(&qp_info->wait);
+	qp_info->mad_thread = kthread_create(ib_mad_thread_handler,
+					     qp_info,
+					     "ib_mad-%-6s-%-2d-%-4d",
+					     qp_info->port_priv->device->name,
+					     qp_info->port_priv->port_num,
+					     qp_info->qp->qp_num);
+	if (IS_ERR(qp_info->mad_thread)) {
+		printk(KERN_ERR "Couldn't start mad thread for %s port %d\n",
+		       qp_info->port_priv->device->name,
+		       qp_info->port_priv->port_num);
+		ret = PTR_ERR(qp_info->mad_thread);
+		goto error2;
 	}	
 
+	/* Start the QP. */
+	ret = ib_mad_start_qp(qp_info);
+	if (ret)
+		goto error3;
+
+	return 0;
+
+error3:
+	kthread_stop(qp_info->mad_thread);
+error2:
+	ib_destroy_qp(qp_info->qp);
+error1:
+	ib_destroy_cq(qp_info->cq);
 	return ret;
 }
 
@@ -1278,14 +1319,12 @@
  */
 static int ib_mad_port_open(struct ib_device *device, int port_num)
 {
-	int ret, cq_size, i;
+	int ret, i, qp;
 	u64 iova = 0;
 	struct ib_phys_buf buf_list = {
 		.addr = 0,
 		.size = (unsigned long) high_memory - PAGE_OFFSET
 	};
-	struct ib_qp_init_attr qp_init_attr;
-	struct ib_qp_cap qp_cap;
 	struct ib_mad_port_private *entry, *port_priv = NULL;
 	unsigned long flags;
 
@@ -1320,21 +1359,11 @@
 		port_priv->version[i] = NULL;
 	}
 
-	cq_size = IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE;
-	port_priv->cq = ib_create_cq(port_priv->device,
-				     (ib_comp_handler) ib_mad_thread_completion_handler,
-				     NULL, port_priv, cq_size);
-	if (IS_ERR(port_priv->cq)) {
-		printk(KERN_ERR "Could not create ib_mad CQ\n");
-		ret = PTR_ERR(port_priv->cq);
-		goto error3;
-	}
-
 	port_priv->pd = ib_alloc_pd(device);
 	if (IS_ERR(port_priv->pd)) {
 		printk(KERN_ERR "Could not create ib_mad PD\n");
 		ret = PTR_ERR(port_priv->pd);
-		goto error4;
+		goto error1;
 	}
 
 	port_priv->mr = ib_reg_phys_mr(port_priv->pd, &buf_list, 1,
@@ -1342,58 +1371,19 @@
 	if (IS_ERR(port_priv->mr)) {
 		printk(KERN_ERR "Could not register ib_mad MR\n");
 		ret = PTR_ERR(port_priv->mr);
-		goto error5;
+		goto error2;
 	}
 
-	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
-		memset(&qp_init_attr, 0, sizeof qp_init_attr);
-		qp_init_attr.send_cq = port_priv->cq;
-		qp_init_attr.recv_cq = port_priv->cq;
-		qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-		qp_init_attr.rq_sig_type = IB_SIGNAL_ALL_WR;
-		qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
-		qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
-		qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
-		qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
-		if (i == 0)
-			qp_init_attr.qp_type = IB_QPT_SMI;
-		else
-			qp_init_attr.qp_type = IB_QPT_GSI;
-		qp_init_attr.port_num = port_priv->port_num;
-		port_priv->qp[i] = ib_create_qp(port_priv->pd, &qp_init_attr,
-						&qp_cap);
-		if (IS_ERR(port_priv->qp[i])) {
-			printk(KERN_ERR "Could not create ib_mad QP%d\n", i);
-			ret = PTR_ERR(port_priv->qp[i]);
-			if (i == 0)
-				goto error6;		
-			else
-				goto error7;
-			}
-		printk(KERN_DEBUG "Created ib_mad QP %d\n",
-		       port_priv->qp[i]->qp_num);
+	for (qp = 0; qp < IB_MAD_QPS_CORE; qp++) {
+		ret = ib_mad_init_qp(port_priv,
+				     &port_priv->qp_info[qp],
+				     qp ? IB_QPT_GSI : IB_QPT_SMI);
+		if (ret)
+			goto error3;
 	}
 
 	spin_lock_init(&port_priv->reg_lock);
-	spin_lock_init(&port_priv->recv_list_lock);
-	spin_lock_init(&port_priv->send_list_lock);
 	INIT_LIST_HEAD(&port_priv->agent_list);
-	INIT_LIST_HEAD(&port_priv->send_posted_mad_list);
-	port_priv->send_posted_mad_count = 0;
-	for (i = 0; i < IB_MAD_QPS_SUPPORTED; i++) {
-		INIT_LIST_HEAD(&port_priv->recv_posted_mad_list[i]);
-		port_priv->recv_posted_mad_count[i] = 0;
-	}
-
-	ret = ib_mad_thread_init(port_priv);
-	if (ret)
-		goto error8;
-
-	ret = ib_mad_port_start(port_priv);
-	if (ret) {
-		printk(KERN_ERR "Couldn't start port\n");
-		goto error8;
-	}
 
 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
 	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
@@ -1401,17 +1391,14 @@
 
 	return 0;
 
-error8:
-	ib_destroy_qp(port_priv->qp[1]);
-error7:
-	ib_destroy_qp(port_priv->qp[0]);
-error6:
+error3:
+	while (qp > 0) {
+		ib_mad_destroy_qp(&port_priv->qp_info[--qp]);
+	}
 	ib_dereg_mr(port_priv->mr);
-error5:
+error2:
 	ib_dealloc_pd(port_priv->pd);
-error4:
-	ib_destroy_cq(port_priv->cq);
-error3:
+error1:
 	kfree(port_priv);
 
 	return ret;
@@ -1426,6 +1413,7 @@
 {
 	struct ib_mad_port_private *entry, *port_priv = NULL;
 	unsigned long flags;
+	int i;
 
 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
 	list_for_each_entry(entry, &ib_mad_port_list, port_list) {
@@ -1444,13 +1432,12 @@
 	list_del(&port_priv->port_list);
 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
 
-	ib_mad_port_stop(port_priv);
-	ib_mad_thread_stop(port_priv);
-	ib_destroy_qp(port_priv->qp[1]);
-	ib_destroy_qp(port_priv->qp[0]);
+	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
+		ib_mad_destroy_qp(&port_priv->qp_info[i]);
+	}
+
 	ib_dereg_mr(port_priv->mr);
 	ib_dealloc_pd(port_priv->pd);
-	ib_destroy_cq(port_priv->cq);
 	/* Handle deallocation of MAD registration tables!!! */
 
 	kfree(port_priv);
@@ -1461,7 +1448,7 @@
 
 static void ib_mad_init_device(struct ib_device *device)
 {
-	int ret, num_ports, cur_port, i, ret2;
+	int ret, num_ports, i, ret2;
 	struct ib_device_attr device_attr;
 
 	ret = ib_query_device(device, &device_attr);
@@ -1472,16 +1459,14 @@
 
 	if (device->node_type == IB_NODE_SWITCH) {
 		num_ports = 1;
-		cur_port = 0;
 	} else {
 		num_ports = device_attr.phys_port_cnt;
-		cur_port = 1;
 	}
-	for (i = 0; i < num_ports; i++, cur_port++) {
-		ret = ib_mad_port_open(device, cur_port);
+	for (i = 0; i < num_ports; i++) {
+		ret = ib_mad_port_open(device, i+1);
 		if (ret) {
 			printk(KERN_ERR "Could not open %s port %d\n",
-			       device->name, cur_port);
+			       device->name, i+1);
 			goto error_device_open;
 		}
 	}
@@ -1490,11 +1475,10 @@
 
 error_device_open:
 	while (i > 0) {
-		cur_port--;
-		ret2 = ib_mad_port_close(device, cur_port);
+		ret2 = ib_mad_port_close(device, i);
 		if (ret2) {
 			printk(KERN_ERR "Could not close %s port %d\n",
-			       device->name, cur_port);
+			       device->name, i);
 		}
 		i--;
 	}
@@ -1505,7 +1489,7 @@
 
 static void ib_mad_remove_device(struct ib_device *device)
 {
-	int ret, i, num_ports, cur_port, ret2;
+	int ret, i, num_ports, ret2;
 	struct ib_device_attr device_attr;
 
 	ret = ib_query_device(device, &device_attr);
@@ -1516,16 +1500,14 @@
 
 	if (device->node_type == IB_NODE_SWITCH) {
 		num_ports = 1;
-		cur_port = 0;
 	} else {
 		num_ports = device_attr.phys_port_cnt;
-		cur_port = 1;
 	}
-	for (i = 0; i < num_ports; i++, cur_port++) {
-		ret2 = ib_mad_port_close(device, cur_port);
+	for (i = 0; i < num_ports; i++) {
+		ret2 = ib_mad_port_close(device, i+1);
 		if (ret2) {
 			printk(KERN_ERR "Could not close %s port %d\n",
-			       device->name, cur_port);
+			       device->name, i+1);
 			if (!ret)
 				ret = ret2;
 		}




More information about the general mailing list