[ofa-general] [Bug 508] IPoIB CM multicast is hogging interrupts

Michael S. Tsirkin mst at dev.mellanox.co.il
Sun Apr 29 02:02:44 PDT 2007


IB spec requires that, after request for notification, we drain the CQ of
completions that might have arrived there before or during request for
notification.  But the number of these is limited by CQ size, so we can, and
should, avoid polling indefinitely (and starving other CQs).

Signed-off-by: Michael S. Tsirkin <mst at dev.mellanox.co.il>

---

I think this trick I just came up with is a simpe way to prevent
IPoIB TX from hogging interrupts, even without NAPI. And it might be a better
way to solve the problem for IPoIB CM TX than using a common cq
as my previous patch did.

This seems to hurt top bandwidth a bit in my testing, so this needs some more
work. Meanwhile, Scott, could you please check whether the following
patch helps in your test-case?

Roland, I think something similiar is a good idea for SRP, too.
What do you think?

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2b242a4..3ed1536 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -573,14 +573,15 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
 static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
 {
 	struct ipoib_cm_tx *tx = tx_ptr;
-	int n, i;
+	int n, i, cnt = 0;
 
 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 	do {
 		n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc);
+		cnt += n;
 		for (i = 0; i < n; ++i)
 			ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i);
-	} while (n == IPOIB_NUM_WC);
+	} while (n == IPOIB_NUM_WC && cnt < ipoib_sendq_size);
 }
 
 int ipoib_cm_dev_open(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ba0ee5c..3701cd7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -294,14 +294,15 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
 {
 	struct net_device *dev = (struct net_device *) dev_ptr;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	int n, i;
+	int n, i, cnt = 0;
 
 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 	do {
 		n = ib_poll_cq(cq, IPOIB_NUM_WC, priv->ibwc);
+		cnt += n;
 		for (i = 0; i < n; ++i)
 			ipoib_ib_handle_wc(dev, priv->ibwc + i);
-	} while (n == IPOIB_NUM_WC);
+	} while (n == IPOIB_NUM_WC && cnt < 2 * ipoib_recvq_size + ipoib_sendq_size);
 }
 
 static inline int post_send(struct ipoib_dev_priv *priv,

-- 
MST
-------------- next part --------------
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2b242a4..3ed1536 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -573,14 +573,15 @@ static void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ipoib_cm_tx *tx
 static void ipoib_cm_tx_completion(struct ib_cq *cq, void *tx_ptr)
 {
 	struct ipoib_cm_tx *tx = tx_ptr;
-	int n, i;
+	int n, i, cnt = 0;
 
 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 	do {
 		n = ib_poll_cq(cq, IPOIB_NUM_WC, tx->ibwc);
+		cnt += n;
 		for (i = 0; i < n; ++i)
 			ipoib_cm_handle_tx_wc(tx->dev, tx, tx->ibwc + i);
-	} while (n == IPOIB_NUM_WC);
+	} while (n == IPOIB_NUM_WC && cnt < ipoib_sendq_size);
 }
 
 int ipoib_cm_dev_open(struct net_device *dev)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ba0ee5c..3701cd7 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -294,14 +294,15 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
 {
 	struct net_device *dev = (struct net_device *) dev_ptr;
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	int n, i;
+	int n, i, cnt = 0;
 
 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 	do {
 		n = ib_poll_cq(cq, IPOIB_NUM_WC, priv->ibwc);
+		cnt += n;
 		for (i = 0; i < n; ++i)
 			ipoib_ib_handle_wc(dev, priv->ibwc + i);
-	} while (n == IPOIB_NUM_WC);
+	} while (n == IPOIB_NUM_WC && cnt < 2 * ipoib_recvq_size + ipoib_sendq_size);
 }
 
 static inline int post_send(struct ipoib_dev_priv *priv,


More information about the general mailing list