[ofa-general] [PATCH 3/5] IB/ipoib: post to SRQ every n buffers
Eli Cohen
eli at dev.mellanox.co.il
Fri Feb 1 02:25:06 PST 2008
IB/ipoib: post to SRQ every n buffers
To reduce the overhead of posting receive buffers to the SRQ,
we do it every 16 received buffers.
Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-28 21:50:46.000000000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-01-28 21:52:55.745918000 +0200
@@ -96,6 +96,7 @@ enum {
IPOIB_MCAST_FLAG_ATTACHED = 3,
MAX_SEND_CQE = 16,
+ CM_POST_SRQ_COUNT = 16,
};
#define IPOIB_OP_RECV (1ul << 31)
@@ -283,6 +284,11 @@ struct ipoib_cm_rx_buf {
u64 mapping[IPOIB_CM_RX_SG];
};
+struct ipoib_cm_rx_wr {
+ struct ib_recv_wr wr;
+ struct ib_sge rx_sge[IPOIB_CM_RX_SG];
+};
+
struct ipoib_cm_dev_priv {
struct ib_srq *srq;
struct ipoib_cm_rx_buf *srq_ring;
@@ -301,10 +307,10 @@ struct ipoib_cm_dev_priv {
struct list_head start_list;
struct list_head reap_list;
struct ib_wc ibwc[IPOIB_NUM_WC];
- struct ib_sge rx_sge[IPOIB_CM_RX_SG];
- struct ib_recv_wr rx_wr;
int max_cm_mtu;
int num_frags;
+ struct ipoib_cm_rx_wr *head;
+ struct ipoib_cm_rx_wr *rx_wr_arr;
};
struct ipoib_ethtool_st {
Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-28 21:50:45.000000000 +0200
+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2008-01-28 21:51:45.846437000 +0200
@@ -81,24 +81,46 @@ static void ipoib_cm_dma_unmap_rx(struct
ib_dma_unmap_single(priv->ca, mapping[i + 1], PAGE_SIZE, DMA_FROM_DEVICE);
}
-static int ipoib_cm_post_receive(struct net_device *dev, int id)
+static int ipoib_cm_post_receive(struct net_device *dev, int id, int pi)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_recv_wr *bad_wr;
- int i, ret;
+ int i, ret = 0;
+ struct ipoib_cm_rx_wr *cur;
+ struct ipoib_cm_rx_wr *prev;
+ int post;
+
+ ipoib_dbg_data(priv, "posting to id=%d, pi=%d\n", id, pi);
+ cur = &priv->cm.rx_wr_arr[id];
+ prev = &priv->cm.rx_wr_arr[(id - 1) & (ipoib_recvq_size - 1)];
+
+ prev->wr.next = &cur->wr;
+ cur->wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
+ cur->wr.next = NULL;
+
- priv->cm.rx_wr.wr_id = id | IPOIB_OP_CM | IPOIB_OP_RECV;
for (i = 0; i < priv->cm.num_frags; ++i)
- priv->cm.rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
+ cur->rx_sge[i].addr = priv->cm.srq_ring[id].mapping[i];
+
+ post = pi || (((unsigned long)(cur - priv->cm.head) & (ipoib_recvq_size - 1))
+ >= CM_POST_SRQ_COUNT);
+ if (post) {
+ ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.head->wr, &bad_wr);
+ if (unlikely(ret)) {
+ ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
+ while (bad_wr) {
+ id = bad_wr->wr_id & ~(IPOIB_OP_CM | IPOIB_OP_RECV);
+ ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
+ priv->cm.srq_ring[id].mapping);
+ dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
+ priv->cm.srq_ring[id].skb = NULL;
+ bad_wr = bad_wr->next;
+ }
+ } else
+ priv->cm.head = &priv->cm.rx_wr_arr[(id + 1) & (ipoib_recvq_size - 1)];
+
- ret = ib_post_srq_recv(priv->cm.srq, &priv->cm.rx_wr, &bad_wr);
- if (unlikely(ret)) {
- ipoib_warn(priv, "post srq failed for buf %d (%d)\n", id, ret);
- ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
- priv->cm.srq_ring[id].mapping);
- dev_kfree_skb_any(priv->cm.srq_ring[id].skb);
- priv->cm.srq_ring[id].skb = NULL;
}
return ret;
@@ -483,7 +505,7 @@ void ipoib_cm_handle_rx_wc(struct net_de
netif_receive_skb(skb);
repost:
- if (unlikely(ipoib_cm_post_receive(dev, wr_id)))
+ if (unlikely(ipoib_cm_post_receive(dev, wr_id, 0)))
ipoib_warn(priv, "ipoib_cm_post_receive failed "
"for buf %d\n", wr_id);
}
@@ -1277,7 +1299,7 @@ int ipoib_cm_dev_init(struct net_device
.max_wr = ipoib_recvq_size,
}
};
- int ret, i;
+ int ret, i, j;
struct ib_device_attr attr;
INIT_LIST_HEAD(&priv->cm.passive_ids);
@@ -1307,8 +1329,14 @@ int ipoib_cm_dev_init(struct net_device
srq_init_attr.attr.max_sge = attr.max_srq_sge;
+ priv->cm.rx_wr_arr = kzalloc(ipoib_recvq_size * sizeof priv->cm.rx_wr_arr[0],
+ GFP_KERNEL);
+ if (!priv->cm.rx_wr_arr)
+ return -ENOMEM;
+
priv->cm.srq = ib_create_srq(priv->pd, &srq_init_attr);
if (IS_ERR(priv->cm.srq)) {
+ kfree(priv->cm.rx_wr_arr);
ret = PTR_ERR(priv->cm.srq);
priv->cm.srq = NULL;
return ret;
@@ -1328,15 +1356,19 @@ int ipoib_cm_dev_init(struct net_device
return -ENOMEM;
}
- for (i = 0; i < priv->cm.num_frags; ++i)
- priv->cm.rx_sge[i].lkey = priv->mr->lkey;
+ for (j = 0; j < ipoib_recvq_size; ++j) {
+ for (i = 0; i < priv->cm.num_frags; ++i)
+ priv->cm.rx_wr_arr[j].rx_sge[i].lkey = priv->mr->lkey;
+
+ priv->cm.rx_wr_arr[j].rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
+ for (i = 1; i < priv->cm.num_frags; ++i)
+ priv->cm.rx_wr_arr[j].rx_sge[i].length = PAGE_SIZE;
+
+ priv->cm.rx_wr_arr[j].wr.sg_list = priv->cm.rx_wr_arr[j].rx_sge;
+ priv->cm.rx_wr_arr[j].wr.num_sge = priv->cm.num_frags;
+ }
- priv->cm.rx_sge[0].length = IPOIB_CM_HEAD_SIZE;
- for (i = 1; i < priv->cm.num_frags; ++i)
- priv->cm.rx_sge[i].length = PAGE_SIZE;
- priv->cm.rx_wr.next = NULL;
- priv->cm.rx_wr.sg_list = priv->cm.rx_sge;
- priv->cm.rx_wr.num_sge = priv->cm.num_frags;
+ priv->cm.head = &priv->cm.rx_wr_arr[0];
for (i = 0; i < ipoib_recvq_size; ++i) {
if (!ipoib_cm_alloc_rx_skb(dev, i, priv->cm.num_frags - 1,
@@ -1345,7 +1377,7 @@ int ipoib_cm_dev_init(struct net_device
ipoib_cm_dev_cleanup(dev);
return -ENOMEM;
}
- if (ipoib_cm_post_receive(dev, i)) {
+ if (ipoib_cm_post_receive(dev, i, 1)) {
ipoib_warn(priv, "ipoib_ib_post_receive failed for buf %d\n", i);
ipoib_cm_dev_cleanup(dev);
return -EIO;
@@ -1375,11 +1407,12 @@ void ipoib_cm_dev_cleanup(struct net_dev
return;
for (i = 0; i < ipoib_recvq_size; ++i)
if (priv->cm.srq_ring[i].skb) {
- ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
+ ipoib_cm_dma_unmap_rx(priv, priv->cm.num_frags - 1,
priv->cm.srq_ring[i].mapping);
dev_kfree_skb_any(priv->cm.srq_ring[i].skb);
priv->cm.srq_ring[i].skb = NULL;
}
kfree(priv->cm.srq_ring);
+ kfree(priv->cm.rx_wr_arr);
priv->cm.srq_ring = NULL;
}
More information about the general
mailing list