[ofa-general] [PATCH 1/5] IB/ipoib: Split CQs for IPOIB UD
Eli Cohen
eli at dev.mellanox.co.il
Fri Feb 1 02:24:33 PST 2008
IB/ipoib: Split CQs for IPOIB UD
This comes as a preparation for using unsignalled QP in UD mode. It
uses a dedicated CQ for the UD send. The CQ is not armed and is polled
for completion right after sending a packet.
This patch and the following patches fix bugs 760 and 761.
Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-31 11:42:31.776503000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-01-31 14:11:01.107304000 +0200
@@ -254,7 +254,7 @@ repost:
"for buf %d\n", wr_id);
}
-static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
+static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc, int need_lock)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
unsigned int wr_id = wc->wr_id;
@@ -279,13 +279,17 @@ static void ipoib_ib_handle_tx_wc(struct
dev_kfree_skb_any(tx_req->skb);
- spin_lock_irqsave(&priv->tx_lock, flags);
+ if (need_lock)
+ spin_lock_irqsave(&priv->tx_lock, flags);
+
++priv->tx_tail;
if (unlikely(--priv->tx_outstanding == ipoib_sendq_size >> 1) &&
netif_queue_stopped(dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(dev);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+ if (need_lock)
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR)
@@ -294,6 +298,15 @@ static void ipoib_ib_handle_tx_wc(struct
wc->status, wr_id, wc->vendor_err);
}
+static void poll_tx(struct ipoib_dev_priv *priv, int need_lock)
+{
+ int n, i;
+
+ n = ib_poll_cq(priv->scq, MAX_SEND_CQE, priv->send_wc);
+ for (i = 0; i < n; ++i)
+ ipoib_ib_handle_tx_wc(priv->dev, priv->send_wc + i, need_lock);
+}
+
int ipoib_poll(struct napi_struct *napi, int budget)
{
struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
@@ -309,7 +322,7 @@ poll_more:
int max = (budget - done);
t = min(IPOIB_NUM_WC, max);
- n = ib_poll_cq(priv->cq, t, priv->ibwc);
+ n = ib_poll_cq(priv->rcq, t, priv->ibwc);
for (i = 0; i < n; i++) {
struct ib_wc *wc = priv->ibwc + i;
@@ -320,12 +333,8 @@ poll_more:
ipoib_cm_handle_rx_wc(dev, wc);
else
ipoib_ib_handle_rx_wc(dev, wc);
- } else {
- if (wc->wr_id & IPOIB_OP_CM)
- ipoib_cm_handle_tx_wc(dev, wc);
- else
- ipoib_ib_handle_tx_wc(dev, wc);
- }
+ } else
+ ipoib_cm_handle_tx_wc(priv->dev, wc);
}
if (n != t)
@@ -334,7 +343,7 @@ poll_more:
if (done < budget) {
netif_rx_complete(dev, napi);
- if (unlikely(ib_req_notify_cq(priv->cq,
+ if (unlikely(ib_req_notify_cq(priv->rcq,
IB_CQ_NEXT_COMP |
IB_CQ_REPORT_MISSED_EVENTS)) &&
netif_rx_reschedule(dev, napi))
@@ -344,7 +353,7 @@ poll_more:
return done;
}
-void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
+void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr)
{
struct net_device *dev = dev_ptr;
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -352,6 +361,13 @@ void ipoib_ib_completion(struct ib_cq *c
netif_rx_schedule(dev, &priv->napi);
}
+void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev_ptr);
+
+ poll_tx(priv, 1);
+}
+
static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
struct ib_ah *address, u32 qpn,
@@ -471,6 +487,10 @@ void ipoib_send(struct net_device *dev,
netif_stop_queue(dev);
}
}
+
+ if (unlikely(priv->tx_outstanding > MAX_SEND_CQE + 1))
+ poll_tx(priv, 0);
+
return;
drop:
@@ -623,7 +643,7 @@ void ipoib_drain_cq(struct net_device *d
struct ipoib_dev_priv *priv = netdev_priv(dev);
int i, n;
do {
- n = ib_poll_cq(priv->cq, IPOIB_NUM_WC, priv->ibwc);
+ n = ib_poll_cq(priv->rcq, IPOIB_NUM_WC, priv->ibwc);
for (i = 0; i < n; ++i) {
/*
* Convert any successful completions to flush
@@ -642,7 +662,7 @@ void ipoib_drain_cq(struct net_device *d
if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
else
- ipoib_ib_handle_tx_wc(dev, priv->ibwc + i);
+ ipoib_ib_handle_tx_wc(dev, priv->ibwc + i, 1);
}
}
} while (n == IPOIB_NUM_WC);
@@ -737,7 +757,7 @@ timeout:
msleep(1);
}
- ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP);
+ ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP);
return 0;
}
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-31 11:42:32.043502000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-31 14:16:34.690314000 +0200
@@ -94,6 +94,8 @@ enum {
IPOIB_MCAST_FLAG_SENDONLY = 1,
IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
IPOIB_MCAST_FLAG_ATTACHED = 3,
+
+ MAX_SEND_CQE = 16,
};
#define IPOIB_OP_RECV (1ul << 31)
@@ -348,7 +350,8 @@ struct ipoib_dev_priv {
u16 pkey_index;
struct ib_pd *pd;
struct ib_mr *mr;
- struct ib_cq *cq;
+ struct ib_cq *rcq;
+ struct ib_cq *scq;
struct ib_qp *qp;
u32 qkey;
@@ -368,7 +371,8 @@ struct ipoib_dev_priv {
struct ib_send_wr tx_wr;
unsigned tx_outstanding;
- struct ib_wc ibwc[IPOIB_NUM_WC];
+ struct ib_wc ibwc[IPOIB_NUM_WC];
+ struct ib_wc send_wc[MAX_SEND_CQE];
struct list_head dead_ahs;
@@ -449,7 +453,8 @@ extern struct workqueue_struct *ipoib_wo
/* functions */
int ipoib_poll(struct napi_struct *napi, int budget);
-void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
+void ipoib_ib_rx_completion(struct ib_cq *cq, void *dev_ptr);
+void ipoib_ib_tx_completion(struct ib_cq *cq, void *dev_ptr);
struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
struct ib_pd *pd, struct ib_ah_attr *attr);
@@ -697,7 +702,6 @@ static inline int ipoib_register_debugfs
static inline void ipoib_unregister_debugfs(void) { }
#endif
-
#define ipoib_printk(level, priv, format, arg...) \
printk(level "%s: " format, ((struct ipoib_dev_priv *) priv)->dev->name , ## arg)
#define ipoib_warn(priv, format, arg...) \
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-31 11:42:32.175502000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-01-31 12:39:19.616627000 +0200
@@ -173,37 +173,42 @@ int ipoib_transport_dev_init(struct net_
goto out_free_pd;
}
- size = ipoib_sendq_size + ipoib_recvq_size + 1;
+ size = ipoib_recvq_size;
ret = ipoib_cm_dev_init(dev);
if (!ret)
size += ipoib_recvq_size + 1 /* 1 extra for rx_drain_qp */;
- priv->cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
- if (IS_ERR(priv->cq)) {
- printk(KERN_WARNING "%s: failed to create CQ\n", ca->name);
+ priv->rcq = ib_create_cq(priv->ca, ipoib_ib_rx_completion, NULL, dev, size, 0);
+ if (IS_ERR(priv->rcq)) {
+ printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
goto out_free_mr;
}
+ priv->scq = ib_create_cq(priv->ca, ipoib_ib_tx_completion, NULL, dev, ipoib_sendq_size, 0);
+ if (IS_ERR(priv->scq)) {
+ printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
+ goto out_free_rcq;
+ }
+
+
coal = kzalloc(sizeof *coal, GFP_KERNEL);
if (coal) {
coal->rx_coalesce_usecs = 10;
- coal->tx_coalesce_usecs = 10;
coal->rx_max_coalesced_frames = 16;
- coal->tx_max_coalesced_frames = 16;
dev->ethtool_ops->set_coalesce(dev, coal);
kfree(coal);
}
- if (ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP))
- goto out_free_cq;
+ if (ib_req_notify_cq(priv->rcq, IB_CQ_NEXT_COMP))
+ goto out_free_scq;
- init_attr.send_cq = priv->cq;
- init_attr.recv_cq = priv->cq;
+ init_attr.send_cq = priv->scq;
+ init_attr.recv_cq = priv->rcq;
priv->qp = ib_create_qp(priv->pd, &init_attr);
if (IS_ERR(priv->qp)) {
printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
- goto out_free_cq;
+ goto out_free_rcq;
}
priv->dev->dev_addr[1] = (priv->qp->qp_num >> 16) & 0xff;
@@ -219,8 +224,11 @@ int ipoib_transport_dev_init(struct net_
return 0;
-out_free_cq:
- ib_destroy_cq(priv->cq);
+out_free_scq:
+ ib_destroy_cq(priv->scq);
+
+out_free_rcq:
+ ib_destroy_cq(priv->rcq);
out_free_mr:
ib_dereg_mr(priv->mr);
@@ -243,7 +251,10 @@ void ipoib_transport_dev_cleanup(struct
clear_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
}
- if (ib_destroy_cq(priv->cq))
+ if (ib_destroy_cq(priv->scq))
+ ipoib_warn(priv, "ib_cq_destroy failed\n");
+
+ if (ib_destroy_cq(priv->rcq))
ipoib_warn(priv, "ib_cq_destroy failed\n");
ipoib_cm_dev_cleanup(dev);
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-31 11:42:31.770505000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-31 12:18:43.243122000 +0200
@@ -199,8 +199,8 @@ static struct ib_qp *ipoib_cm_create_rx_
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = {
.event_handler = ipoib_cm_rx_event_handler,
- .send_cq = priv->cq, /* For drain WR */
- .recv_cq = priv->cq,
+ .send_cq = priv->rcq, /* For drain WR */
+ .recv_cq = priv->rcq,
.srq = priv->cm.srq,
.cap.max_send_wr = 1, /* For drain WR */
.cap.max_send_sge = 1, /* FIXME: 0 Seems not to work */
@@ -791,8 +791,8 @@ static struct ib_qp *ipoib_cm_create_tx_
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_init_attr attr = {
- .send_cq = priv->cq,
- .recv_cq = priv->cq,
+ .send_cq = priv->rcq,
+ .recv_cq = priv->rcq,
.srq = priv->cm.srq,
.cap.max_send_wr = ipoib_sendq_size,
.cap.max_send_sge = 1,
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_etool.c 2008-01-31 11:42:32.052502000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_etool.c 2008-01-31 11:42:32.421466000 +0200
@@ -69,7 +69,7 @@ static int ipoib_set_coalesce(struct net
coal->tx_max_coalesced_frames > 0xffff)
return -EINVAL;
- ret = ib_modify_cq(priv->cq, coal->rx_max_coalesced_frames,
+ ret = ib_modify_cq(priv->rcq, coal->rx_max_coalesced_frames,
coal->rx_coalesce_usecs);
if (ret) {
ipoib_dbg(priv, "failed modifying CQ\n");
More information about the general
mailing list