[openib-general] [PATCH] IB/ipoib: NAPI
Eli cohen
eli at dev.mellanox.co.il
Thu Sep 21 07:57:37 PDT 2006
This patch implements NAPI for iopib. It is a draft implementation.
I would like your opinion on whether we need a module parameter
to control if NAPI should be activated or not.
Also there is a need to implement peek_cq and call it for
ib_req_notify_cq() so as to know if there is a need to call
netif_rx_schedule_prep() again.
Signed-off-by: Eli Cohen <eli at dev.mellanox.co.il>
---
Index: openib-1.1-rc6/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- openib-1.1-rc6.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c 2006-09-21 16:30:35.000000000 +0300
+++ openib-1.1-rc6/drivers/infiniband/ulp/ipoib/ipoib_main.c 2006-09-21 16:30:42.000000000 +0300
@@ -69,6 +69,8 @@
MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0");
#endif
+static const int poll_def_weight = 64;
+
struct ipoib_path_iter {
struct net_device *dev;
struct ipoib_path path;
@@ -91,6 +93,9 @@
.remove = ipoib_remove_one
};
+
+int ipoib_poll(struct net_device *dev, int *budget);
+
int ipoib_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -689,6 +694,7 @@
goto out;
}
+
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
spin_lock(&priv->lock);
__skb_queue_tail(&neigh->queue, skb);
@@ -892,6 +898,7 @@
/* Delete any child interfaces first */
list_for_each_entry_safe(cpriv, tcpriv, &priv->child_intfs, list) {
+ netif_poll_disable(priv->dev);
unregister_netdev(cpriv->dev);
ipoib_dev_cleanup(cpriv->dev);
free_netdev(cpriv->dev);
@@ -919,6 +926,8 @@
dev->hard_header = ipoib_hard_header;
dev->set_multicast_list = ipoib_set_mcast_list;
dev->neigh_setup = ipoib_neigh_setup_dev;
+ dev->poll = ipoib_poll;
+ dev->weight = poll_def_weight;
dev->watchdog_timeo = HZ;
@@ -1097,6 +1106,8 @@
goto register_failed;
}
+ netif_poll_enable(priv->dev);
+
ipoib_create_debug_files(priv->dev);
if (ipoib_add_pkey_attr(priv->dev))
@@ -1111,6 +1122,7 @@
return priv->dev;
sysfs_failed:
+ netif_poll_disable(priv->dev);
ipoib_delete_debug_files(priv->dev);
unregister_netdev(priv->dev);
@@ -1168,6 +1180,7 @@
dev_list = ib_get_client_data(device, &ipoib_client);
list_for_each_entry_safe(priv, tmp, dev_list, list) {
+ netif_poll_disable(priv->dev);
ib_unregister_event_handler(&priv->event_handler);
flush_scheduled_work();
Index: openib-1.1-rc6/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- openib-1.1-rc6.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2006-09-21 16:30:38.000000000 +0300
+++ openib-1.1-rc6/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2006-09-21 17:24:59.000000000 +0300
@@ -169,7 +169,7 @@
return 0;
}
-static void ipoib_ib_handle_wc(struct net_device *dev,
+static void ipoib_ib_handle_rwc(struct net_device *dev,
struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -178,122 +178,186 @@
ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
wr_id, wc->opcode, wc->status);
- if (wr_id & IPOIB_OP_RECV) {
- wr_id &= ~IPOIB_OP_RECV;
-
- if (wr_id < ipoib_recvq_size) {
- struct sk_buff *skb = priv->rx_ring[wr_id].skb;
- dma_addr_t addr = priv->rx_ring[wr_id].mapping;
-
- if (unlikely(wc->status != IB_WC_SUCCESS)) {
- if (wc->status != IB_WC_WR_FLUSH_ERR)
- ipoib_warn(priv, "failed recv event "
- "(status=%d, wrid=%d vend_err %x)\n",
- wc->status, wr_id, wc->vendor_err);
- dma_unmap_single(priv->ca->dma_device, addr,
- IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
- dev_kfree_skb_any(skb);
- priv->rx_ring[wr_id].skb = NULL;
- return;
- }
-
- /*
- * If we can't allocate a new RX buffer, dump
- * this packet and reuse the old buffer.
- */
- if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
- ++priv->stats.rx_dropped;
- goto repost;
- }
-
- ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
- wc->byte_len, wc->slid);
+ wr_id &= ~IPOIB_OP_RECV;
+ if (wr_id < ipoib_recvq_size) {
+ struct sk_buff *skb = priv->rx_ring[wr_id].skb;
+ dma_addr_t addr = priv->rx_ring[wr_id].mapping;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ if (wc->status != IB_WC_WR_FLUSH_ERR)
+ ipoib_warn(priv, "failed recv event "
+ "(status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
dma_unmap_single(priv->ca->dma_device, addr,
IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ dev_kfree_skb_any(skb);
+ priv->rx_ring[wr_id].skb = NULL;
+ return;
+ }
- skb_put(skb, wc->byte_len);
- skb_pull(skb, IB_GRH_BYTES);
+ /*
+ * If we can't allocate a new RX buffer, dump
+ * this packet and reuse the old buffer.
+ */
+ if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {
+ ++priv->stats.rx_dropped;
+ goto repost;
+ }
- if (wc->slid != priv->local_lid ||
- wc->src_qp != priv->qp->qp_num) {
- skb->protocol = ((struct ipoib_header *) skb->data)->proto;
- skb->mac.raw = skb->data;
- skb_pull(skb, IPOIB_ENCAP_LEN);
-
- dev->last_rx = jiffies;
- ++priv->stats.rx_packets;
- priv->stats.rx_bytes += skb->len;
-
- skb->dev = dev;
- /* XXX get correct PACKET_ type here */
- skb->pkt_type = PACKET_HOST;
- netif_rx_ni(skb);
- } else {
- ipoib_dbg_data(priv, "dropping loopback packet\n");
- dev_kfree_skb_any(skb);
- }
+ ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
+ wc->byte_len, wc->slid);
- repost:
- if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
- ipoib_warn(priv, "ipoib_ib_post_receive failed "
- "for buf %d\n", wr_id);
- } else
- ipoib_warn(priv, "completion event with wrid %d\n",
- wr_id);
+ dma_unmap_single(priv->ca->dma_device, addr,
+ IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
- } else {
- struct ipoib_tx_buf *tx_req;
- unsigned long flags;
+ skb_put(skb, wc->byte_len);
+ skb_pull(skb, IB_GRH_BYTES);
- if (wr_id >= ipoib_sendq_size) {
- ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
- wr_id, ipoib_sendq_size);
- return;
+ if (wc->slid != priv->local_lid ||
+ wc->src_qp != priv->qp->qp_num) {
+ skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+ skb->mac.raw = skb->data;
+ skb_pull(skb, IPOIB_ENCAP_LEN);
+
+ dev->last_rx = jiffies;
+ ++priv->stats.rx_packets;
+ priv->stats.rx_bytes += skb->len;
+
+ skb->dev = dev;
+ /* XXX get correct PACKET_ type here */
+ skb->pkt_type = PACKET_HOST;
+ netif_receive_skb(skb);
+ } else {
+ ipoib_dbg_data(priv, "dropping loopback packet\n");
+ dev_kfree_skb_any(skb);
}
- ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
+ repost:
+ if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
+ ipoib_warn(priv, "ipoib_ib_post_receive failed "
+ "for buf %d\n", wr_id);
+ } else
+ ipoib_warn(priv, "completion event with wrid %d\n",
+ wr_id);
- tx_req = &priv->tx_ring[wr_id];
+}
- dma_unmap_single(priv->ca->dma_device,
- pci_unmap_addr(tx_req, mapping),
- tx_req->skb->len,
- DMA_TO_DEVICE);
- ++priv->stats.tx_packets;
- priv->stats.tx_bytes += tx_req->skb->len;
+static void ipoib_ib_handle_swc(struct net_device *dev,
+ struct ib_wc *wc)
+{
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+ unsigned int wr_id = wc->wr_id;
+ struct ipoib_tx_buf *tx_req;
+ unsigned long flags;
- dev_kfree_skb_any(tx_req->skb);
+ ipoib_dbg_data(priv, "called: id %d, op %d, status: %d\n",
+ wr_id, wc->opcode, wc->status);
- spin_lock_irqsave(&priv->tx_lock, flags);
- ++priv->tx_tail;
- if (netif_queue_stopped(dev) &&
- test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
- priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
- netif_wake_queue(dev);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
-
- if (wc->status != IB_WC_SUCCESS &&
- wc->status != IB_WC_WR_FLUSH_ERR)
- ipoib_warn(priv, "failed send event "
- "(status=%d, wrid=%d vend_err %x)\n",
- wc->status, wr_id, wc->vendor_err);
+ if (wr_id >= ipoib_sendq_size) {
+ ipoib_warn(priv, "completion event with wrid %d (> %d)\n",
+ wr_id, ipoib_sendq_size);
+ return;
}
+
+ ipoib_dbg_data(priv, "send complete, wrid %d\n", wr_id);
+
+ tx_req = &priv->tx_ring[wr_id];
+
+ dma_unmap_single(priv->ca->dma_device,
+ pci_unmap_addr(tx_req, mapping),
+ tx_req->skb->len,
+ DMA_TO_DEVICE);
+
+ ++priv->stats.tx_packets;
+ priv->stats.tx_bytes += tx_req->skb->len;
+
+ dev_kfree_skb_any(tx_req->skb);
+
+ spin_lock_irqsave(&priv->tx_lock, flags);
+ ++priv->tx_tail;
+ if (netif_queue_stopped(dev) &&
+ test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags) &&
+ priv->tx_head - priv->tx_tail <= ipoib_sendq_size >> 1)
+ netif_wake_queue(dev);
+ spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+ if (wc->status != IB_WC_SUCCESS &&
+ wc->status != IB_WC_WR_FLUSH_ERR)
+ ipoib_warn(priv, "failed send event "
+ "(status=%d, wrid=%d vend_err %x)\n",
+ wc->status, wr_id, wc->vendor_err);
}
-void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
+static inline int is_rx_comp(struct ib_wc *wc)
+{
+ unsigned int wr_id = wc->wr_id;
+
+ if (wr_id & IPOIB_OP_RECV)
+ return 1;
+
+ return 0;
+}
+
+int ipoib_poll(struct net_device *dev, int *budget)
{
- struct net_device *dev = (struct net_device *) dev_ptr;
struct ipoib_dev_priv *priv = netdev_priv(dev);
int n, i;
+ struct ib_cq *cq = priv->cq;
+ int quota = dev->quota;
+ int wc;
+ int rx = 0;
+ int tx = 0;
- ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
do {
- n = ib_poll_cq(cq, IPOIB_NUM_WC, priv->ibwc);
- for (i = 0; i < n; ++i)
- ipoib_ib_handle_wc(dev, priv->ibwc + i);
- } while (n == IPOIB_NUM_WC);
+ wc = min_t(int, quota, IPOIB_NUM_WC);
+ n = ib_poll_cq(cq, wc, priv->ibwc);
+ for (i = 0; i < n; ++i) {
+ if (is_rx_comp(priv->ibwc + i)) {
+ ++rx;
+ --quota;
+ ipoib_ib_handle_rwc(dev, priv->ibwc + i);
+ }
+ else {
+ ++tx;
+ ipoib_ib_handle_swc(dev, priv->ibwc + i);
+ }
+
+ if (unlikely(quota <= 0))
+ goto not_done;
+ }
+ } while (n == wc);
+
+ if (rx || tx)
+ goto not_done;
+
+
+ netif_rx_complete(dev);
+ ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+ /* TODO we need peek_cq here for hw devices that
+ could would not generate interrupts for completions
+ arriving between end of polling till request notify */
+
+ return 0;
+
+not_done:
+ *budget -= rx;
+ dev->quota = quota;
+ return 1;
+}
+
+void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
+{
+ struct net_device *dev = (struct net_device *) dev_ptr;
+ struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+ /* tell the network layer we have packts */
+ if (netif_rx_schedule_prep(dev))
+ __netif_rx_schedule(dev);
+ else {
+ ipoib_warn(priv, "received interupt while in polling\n");
+ }
}
static inline int post_send(struct ipoib_dev_priv *priv,
More information about the general
mailing list