[openib-general] [PATCH] (repost) no qp lock on poll, separate sq/rq locks

Roland Dreier roland at topspin.com
Thu Feb 24 20:39:43 PST 2005


Ugh, I think I missed something when I thought about this the first
time around.  It seems the test for WQ overflow assumes that all WQs
have a power-of-2 size, which we currently don't enforce for Tavor
mode.  It seems there are two possible solutions:

  Round up WQ sizes for Tavor as well.  I don't like this because it
  could potentially use a lot of extra memory.

  Or, add one more counter back into the WQ struct so we can keep
  track of both the next index to use as well as the total number of
  WQEs posted in Tavor mode (we still only need one counter in
  mem-free mode).

I implemented the second option.  Does this patch look reasonable?

 - R.

Index: hw/mthca/mthca_provider.h
===================================================================
--- hw/mthca/mthca_provider.h	(revision 1912)
+++ hw/mthca/mthca_provider.h	(working copy)
@@ -167,15 +167,16 @@ struct mthca_cq {
 
 struct mthca_wq {
 	spinlock_t lock;
-	int   max;
-	int   next;
-	int   last_comp;
-	void *last;
-	int   max_gs;
-	int   wqe_shift;
+	int        max;
+	unsigned   next_ind;
+	unsigned   head;
+	unsigned   tail;
+	void      *last;
+	int        max_gs;
+	int        wqe_shift;
 
-	int   db_index;		/* Arbel only */
-	u32  *db;
+	int        db_index;	/* Arbel only */
+	u32       *db;
 };
 
 struct mthca_qp {
Index: hw/mthca/mthca_cq.c
===================================================================
--- hw/mthca/mthca_cq.c	(revision 1912)
+++ hw/mthca/mthca_cq.c	(working copy)
@@ -453,7 +453,7 @@ static inline int mthca_poll_one(struct 
 		entry->wr_id = (*cur_qp)->wrid[wqe_index];
 	}
 
-	wq->last_comp = wqe_index;
+	++wq->tail;
 
 	if (0)
 		mthca_dbg(dev, "%s completion for QP %06x, index %d (nr %d)\n",
Index: hw/mthca/mthca_qp.c
===================================================================
--- hw/mthca/mthca_qp.c	(revision 1912)
+++ hw/mthca/mthca_qp.c	(working copy)
@@ -1081,8 +1081,9 @@ static void mthca_free_memfree(struct mt
 static void mthca_wq_init(struct mthca_wq* wq)
 {
 	spin_lock_init(&wq->lock);
-	wq->next      = 0;
-	wq->last_comp = wq->max - 1;
+	wq->next_ind  = 0;
+	wq->head      = 0;
+	wq->tail      = 0;
 	wq->last      = NULL;
 }
 
@@ -1397,19 +1398,19 @@ static int build_mlx_header(struct mthca
 	return 0;
 }
 
-static inline int mthca_wq_overflow(struct mthca_wq* wq, int nreq,
-	 struct ib_cq* ib_cq)
+static inline int mthca_wq_overflow(struct mthca_wq *wq, int nreq,
+				    struct ib_cq *ib_cq)
 {
-	int cur;
-	struct mthca_cq* cq;
+	unsigned cur;
+	struct mthca_cq *cq;
 
-	cur = (wq->next - wq->last_comp - 1) & (wq->max - 1);
+	cur = wq->head - wq->tail;
 	if (likely(cur + nreq < wq->max))
 		return 0;
 
 	cq = to_mcq(ib_cq);
 	spin_lock(&cq->lock);
-	cur = (wq->next - wq->last_comp - 1) & (wq->max - 1);
+	cur = wq->head - wq->tail;
 	spin_unlock(&cq->lock);
 
 	return cur + nreq >= wq->max;
@@ -1436,13 +1437,13 @@ int mthca_tavor_post_send(struct ib_qp *
 
 	/* XXX check that state is OK to post send */
 
-	ind = qp->sq.next;
+	ind = qp->sq.next_ind;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
-			mthca_err(dev, "SQ %06x full (%d next, %d last_polled,"
+			mthca_err(dev, "SQ %06x full (%u head, %u tail,"
 					" %d max, %d nreq)\n", qp->qpn,
-					qp->sq.next, qp->sq.last_comp,
+					qp->sq.head, qp->sq.tail,
 					qp->sq.max, nreq);
 			err = -ENOMEM;
 			*bad_wr = wr;
@@ -1603,7 +1604,7 @@ out:
 	if (likely(nreq)) {
 		u32 doorbell[2];
 
-		doorbell[0] = cpu_to_be32(((qp->sq.next << qp->sq.wqe_shift) +
+		doorbell[0] = cpu_to_be32(((qp->sq.next_ind << qp->sq.wqe_shift) +
 					   qp->send_wqe_offset) | f0 | op0);
 		doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
 
@@ -1614,7 +1615,8 @@ out:
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 	}
 
-	qp->sq.next = ind;
+	qp->sq.next_ind = ind;
+	qp->sq.head    += nreq;
 
 	spin_unlock_irqrestore(&qp->sq.lock, flags);
 	return err;
@@ -1639,13 +1641,13 @@ int mthca_tavor_post_receive(struct ib_q
 
 	/* XXX check that state is OK to post receive */
 
-	ind = qp->rq.next;
+	ind = qp->rq.next_ind;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
-			mthca_err(dev, "RQ %06x full (%d next, %d last_polled,"
+			mthca_err(dev, "RQ %06x full (%u head, %u tail,"
 					" %d max, %d nreq)\n", qp->qpn,
-					qp->rq.next, qp->rq.last_comp,
+					qp->rq.head, qp->rq.tail,
 					qp->rq.max, nreq);
 			err = -ENOMEM;
 			*bad_wr = wr;
@@ -1703,7 +1705,7 @@ out:
 	if (likely(nreq)) {
 		u32 doorbell[2];
 
-		doorbell[0] = cpu_to_be32((qp->rq.next << qp->rq.wqe_shift) | size0);
+		doorbell[0] = cpu_to_be32((qp->rq.next_ind << qp->rq.wqe_shift) | size0);
 		doorbell[1] = cpu_to_be32((qp->qpn << 8) | nreq);
 
 		wmb();
@@ -1713,7 +1715,8 @@ out:
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 	}
 
-	qp->rq.next = ind;
+	qp->rq.next_ind = ind;
+	qp->rq.head    += nreq;
 
 	spin_unlock_irqrestore(&qp->rq.lock, flags);
 	return err;
@@ -1740,13 +1743,13 @@ int mthca_arbel_post_send(struct ib_qp *
 
 	/* XXX check that state is OK to post send */
 
-	ind = qp->sq.next & (qp->sq.max - 1);
+	ind = qp->sq.head & (qp->sq.max - 1);
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
-			mthca_err(dev, "SQ %06x full (%d next, %d last_polled,"
+			mthca_err(dev, "SQ %06x full (%u head, %u tail,"
 					" %d max, %d nreq)\n", qp->qpn,
-					qp->sq.next, qp->sq.last_comp,
+					qp->sq.head, qp->sq.tail,
 					qp->sq.max, nreq);
 			err = -ENOMEM;
 			*bad_wr = wr;
@@ -1857,18 +1860,18 @@ out:
 		u32 doorbell[2];
 
 		doorbell[0] = cpu_to_be32((nreq << 24)                  |
-					  ((qp->sq.next & 0xffff) << 8) |
+					  ((qp->sq.head & 0xffff) << 8) |
 					  f0 | op0);
 		doorbell[1] = cpu_to_be32((qp->qpn << 8) | size0);
 
-		qp->sq.next += nreq;
+		qp->sq.head += nreq;
 
 		/*
 		 * Make sure that descriptors are written before
 		 * doorbell record.
 		 */
 		wmb();
-		*qp->sq.db = cpu_to_be32(qp->sq.next & 0xffff);
+		*qp->sq.db = cpu_to_be32(qp->sq.head & 0xffff);
 
 		/*
 		 * Make sure doorbell record is written before we
@@ -1900,13 +1903,13 @@ int mthca_arbel_post_receive(struct ib_q
 
 	/* XXX check that state is OK to post receive */
 
-	ind = qp->rq.next & (qp->rq.max - 1);
+	ind = qp->rq.head & (qp->rq.max - 1);
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
-			mthca_err(dev, "RQ %06x full (%d next, %d last_polled,"
+			mthca_err(dev, "RQ %06x full (%u head, %u tail,"
 					" %d max, %d nreq)\n", qp->qpn,
-					qp->rq.next, qp->rq.last_comp,
+					qp->rq.head, qp->rq.tail,
 					qp->rq.max, nreq);
 			err = -ENOMEM;
 			*bad_wr = wr;
@@ -1949,14 +1952,14 @@ int mthca_arbel_post_receive(struct ib_q
 	}
 out:
 	if (likely(nreq)) {
-		qp->rq.next += nreq;
+		qp->rq.head += nreq;
 
 		/*
 		 * Make sure that descriptors are written before
 		 * doorbell record.
 		 */
 		wmb();
-		*qp->rq.db = cpu_to_be32(qp->rq.next & 0xffff);
+		*qp->rq.db = cpu_to_be32(qp->rq.head & 0xffff);
 	}
 
 	spin_unlock_irqrestore(&qp->rq.lock, flags);




More information about the general mailing list