[openib-general] [PATCH] no qp lock in poll, separate locks for send and receive

Michael S. Tsirkin mst at mellanox.co.il
Wed Feb 23 05:02:05 PST 2005



Quoting r. Roland Dreier <roland at topspin.com>:
> Subject: Re: [PATCH] 1/2 separate locking for send and receive q in mthca
> 
>     Michael> Instead of doing this, how about something else I would
>     Michael> prefer: we could avoid locking the QP on CQ poll
>     Michael> altogether, if there is a separate last polled index that
>     Michael> is written only by cq poll and read by qp post.  This
>     Michael> index update would be protected by a cq lock.
> 
> Yes, that's a greate solution.  Splitting things up so that some
> values are only written inside the WQ lock and some are only written
> inside the CQ lock is a very clean solution.
> 
>  - R.
> 

I seem to get some good speedups (5-10%) in IP over IB from the following
patch, but I think its an obvious win for any ULP.

This patch does the following:
1. Split the QP spinlock to send and receive lock.
The only place where we have to lock both is upon modify_qp,
and that is not on data path.

2. Avoid taking any QP locks when polling CQ.

This last part is achieved by getting rid of the cur field in
mthca_wq, and calculating the number of outstanding WQEs by comparing
the last_comp and next fields. next is only updated by post,
last_comp is only updated by poll.

In a rare case where an overrun is detected, a CQ is locked
and the overrun condition is re-tested, to avoid any potential
for stale last_comp value.

Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>

Index: hw/mthca/mthca_provider.h
===================================================================
--- hw/mthca/mthca_provider.h	(revision 1862)
+++ hw/mthca/mthca_provider.h	(working copy)
@@ -147,8 +147,8 @@ struct mthca_cq {
 };
 
 struct mthca_wq {
+	spinlock_t lock;
 	int   max;
-	int   cur;
 	int   next;
 	int   last_comp;
 	void *last;
@@ -158,7 +158,6 @@ struct mthca_wq {
 
 struct mthca_qp {
 	struct ib_qp           ibqp;
-	spinlock_t             lock;
 	atomic_t               refcount;
 	u32                    qpn;
 	int                    is_direct;
Index: hw/mthca/mthca_cq.c
===================================================================
--- hw/mthca/mthca_cq.c	(revision 1862)
+++ hw/mthca/mthca_cq.c	(working copy)
@@ -405,15 +405,6 @@ static inline int mthca_poll_one(struct 
 	is_send  = is_error ? cqe->opcode & 0x01 : cqe->is_send & 0x80;
 
 	if (!*cur_qp || be32_to_cpu(cqe->my_qpn) != (*cur_qp)->qpn) {
-		if (*cur_qp) {
-			if (*freed) {
-				wmb();
-				inc_cons_index(dev, cq, *freed);
-				*freed = 0;
-			}
-			spin_unlock(&(*cur_qp)->lock);
-		}
-
 		/*
 		 * We do not have to take the QP table lock here,
 		 * because CQs will be locked while QPs are removed
@@ -428,8 +419,6 @@ static inline int mthca_poll_one(struct 
 			err = -EINVAL;
 			goto out;
 		}
-
-		spin_lock(&(*cur_qp)->lock);
 	}
 
 	entry->qp_num = (*cur_qp)->qpn;
@@ -446,11 +435,6 @@ static inline int mthca_poll_one(struct 
 		entry->wr_id = (*cur_qp)->wrid[wqe_index];
 	}
 
-	if (wq->last_comp < wqe_index)
-		wq->cur -= wqe_index - wq->last_comp;
-	else
-		wq->cur -= wq->max - wq->last_comp + wqe_index;
-
 	wq->last_comp = wqe_index;
 
 	if (0)
@@ -533,9 +517,6 @@ int mthca_poll_cq(struct ib_cq *ibcq, in
 		inc_cons_index(dev, cq, freed);
 	}
 
-	if (qp)
-		spin_unlock(&qp->lock);
-
 	spin_unlock_irqrestore(&cq->lock, flags);
 
 	return err == 0 || err == -EAGAIN ? npolled : err;
Index: hw/mthca/mthca_qp.c
===================================================================
--- hw/mthca/mthca_qp.c	(revision 1862)
+++ hw/mthca/mthca_qp.c	(working copy)
@@ -552,9 +552,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, 
 		else
 			cur_state = attr->cur_qp_state;
 	} else {
-		spin_lock_irq(&qp->lock);
+		spin_lock_irq(&qp->sq.lock);
+		spin_lock(&qp->rq.lock);
 		cur_state = qp->state;
-		spin_unlock_irq(&qp->lock);
+		spin_unlock(&qp->rq.lock);
+		spin_unlock_irq(&qp->sq.lock);
 	}
 
 	if (attr_mask & IB_QP_STATE) {
@@ -972,6 +974,14 @@ static int mthca_alloc_wqe_buf(struct mt
 	return err;
 }
 
+static void mthca_wq_init(struct mthca_wq* wq)
+{
+	spin_lock_init(&wq->lock);
+	wq->next      = 0;
+	wq->last_comp = wq->max - 1;
+	wq->last      = NULL;
+}
+
 static int mthca_alloc_qp_common(struct mthca_dev *dev,
 				 struct mthca_pd *pd,
 				 struct mthca_cq *send_cq,
@@ -981,20 +991,13 @@ static int mthca_alloc_qp_common(struct 
 {
 	int err;
 
-	spin_lock_init(&qp->lock);
 	atomic_set(&qp->refcount, 1);
 	qp->state    	 = IB_QPS_RESET;
 	qp->atomic_rd_en = 0;
 	qp->resp_depth   = 0;
 	qp->sq_policy    = send_policy;
-	qp->rq.cur       = 0;
-	qp->sq.cur       = 0;
-	qp->rq.next      = 0;
-	qp->sq.next      = 0;
-	qp->rq.last_comp = qp->rq.max - 1;
-	qp->sq.last_comp = qp->sq.max - 1;
-	qp->rq.last      = NULL;
-	qp->sq.last      = NULL;
+	mthca_wq_init(&qp->sq);
+	mthca_wq_init(&qp->rq);
 
 	err = mthca_alloc_wqe_buf(dev, pd, qp);
 	return err;
@@ -1240,6 +1243,24 @@ static int build_mlx_header(struct mthca
 	return 0;
 }
 
+static inline int mthca_wq_overflow(struct mthca_wq* wq, int nreq,
+	 struct ib_cq* ib_cq)
+{
+	int cur;
+	struct mthca_cq* cq;
+
+	cur = (wq->next - wq->last_comp - 1) & (wq->max - 1);
+	if (likely(cur + nreq < wq->max))
+		return 0;
+
+	cq = to_mcq(ib_cq);
+	spin_lock(&cq->lock);
+	cur = (wq->next - wq->last_comp - 1) & (wq->max - 1);
+	spin_unlock(&cq->lock);
+
+	return cur + nreq >= wq->max;
+}
+
 int mthca_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		    struct ib_send_wr **bad_wr)
 {
@@ -1267,16 +1288,18 @@ int mthca_post_send(struct ib_qp *ibqp, 
 		[IB_WR_ATOMIC_FETCH_AND_ADD] = MTHCA_OPCODE_ATOMIC_FA,
 	};
 
-	spin_lock_irqsave(&qp->lock, flags);
+	spin_lock_irqsave(&qp->sq.lock, flags);
 
 	/* XXX check that state is OK to post send */
 
 	ind = qp->sq.next;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		if (qp->sq.cur + nreq >= qp->sq.max) {
-			mthca_err(dev, "SQ full (%d posted, %d max, %d nreq)\n",
-				  qp->sq.cur, qp->sq.max, nreq);
+		if (mthca_wq_overflow(&qp->sq, nreq, qp->ibqp.send_cq)) {
+			mthca_err(dev, "SQ %06x full (%d next, %d last_polled,"
+					" %d max, %d nreq)\n", qp->qpn,
+					qp->sq.next, qp->sq.last_comp,
+					qp->sq.max, nreq);
 			err = -ENOMEM;
 			*bad_wr = wr;
 			goto out;
@@ -1447,10 +1470,9 @@ out:
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 	}
 
-	qp->sq.cur += nreq;
 	qp->sq.next = ind;
 
-	spin_unlock_irqrestore(&qp->lock, flags);
+	spin_unlock_irqrestore(&qp->sq.lock, flags);
 	return err;
 }
 
@@ -1469,15 +1491,18 @@ int mthca_post_receive(struct ib_qp *ibq
 	void *wqe;
 	void *prev_wqe;
 
-	spin_lock_irqsave(&qp->lock, flags);
+	spin_lock_irqsave(&qp->rq.lock, flags);
 
 	/* XXX check that state is OK to post receive */
 
 	ind = qp->rq.next;
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
-		if (qp->rq.cur + nreq >= qp->rq.max) {
-			mthca_err(dev, "RQ %06x full\n", qp->qpn);
+		if (mthca_wq_overflow(&qp->rq, nreq, qp->ibqp.recv_cq)) {
+			mthca_err(dev, "RQ %06x full (%d next, %d last_polled,"
+					" %d max, %d nreq)\n", qp->qpn,
+					qp->rq.next, qp->rq.last_comp,
+					qp->rq.max, nreq);
 			err = -ENOMEM;
 			*bad_wr = wr;
 			goto out;
@@ -1544,10 +1569,9 @@ out:
 			      MTHCA_GET_DOORBELL_LOCK(&dev->doorbell_lock));
 	}
 
-	qp->rq.cur += nreq;
 	qp->rq.next = ind;
 
-	spin_unlock_irqrestore(&qp->lock, flags);
+	spin_unlock_irqrestore(&qp->rq.lock, flags);
 	return err;
 }
 

-- 
MST - Michael S. Tsirkin



More information about the general mailing list