[ofa-general] [PATCH 2/11] IB/ipoib: support for sending gather skbs
Eli Cohen
eli at mellanox.co.il
Mon Sep 24 05:36:51 PDT 2007
From: Michael S. Tsirkin <mst at mellanox.co.il>
Subject: IB/ipoib: support for sending gather skbs
This patch, by itself, does nothing - this
prepares the ground for hardware checksum support patches.
NETIF_F_SG can't be actually set without enabling
hardware checksum support, so this is done
by the follow-up patches.
Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>
---
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2007-09-24 11:20:24.000000000 +0200
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2007-09-24 12:09:21.000000000 +0200
@@ -122,9 +122,61 @@ struct ipoib_rx_buf {
struct ipoib_tx_buf {
struct sk_buff *skb;
- u64 mapping;
+ u64 mapping[MAX_SKB_FRAGS + 1];
};
+static inline int ipoib_dma_map_tx(struct ib_device *ca,
+ struct ipoib_tx_buf *tx_req)
+{
+ struct sk_buff *skb = tx_req->skb;
+ u64 *mapping = tx_req->mapping;
+ int frags;
+ int i;
+
+ mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
+ DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
+ return -EIO;
+
+ frags = skb_shinfo(skb)->nr_frags;
+ for (i = 0; i < frags; ++i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ mapping[i + 1] = ib_dma_map_page(ca, frag->page,
+ frag->page_offset, frag->size,
+ DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1])))
+ goto partial_error;
+ }
+ return 0;
+
+partial_error:
+ ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+
+ for (; i > 0; --i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
+ ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE);
+ }
+ return -EIO;
+}
+
+static inline void ipoib_dma_unmap_tx(struct ib_device *ca,
+ struct ipoib_tx_buf *tx_req)
+{
+ struct sk_buff *skb = tx_req->skb;
+ u64 *mapping = tx_req->mapping;
+ int frags;
+ int i;
+
+ ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+
+ frags = skb_shinfo(skb)->nr_frags;
+ for (i = 0; i < frags; ++i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ ib_dma_unmap_page(ca, mapping[i + 1], frag->size,
+ DMA_TO_DEVICE);
+ }
+}
+
struct ib_cm_id;
struct ipoib_cm_data {
@@ -269,7 +321,7 @@ struct ipoib_dev_priv {
struct ipoib_tx_buf *tx_ring;
unsigned tx_head;
unsigned tx_tail;
- struct ib_sge tx_sge;
+ struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
struct ib_send_wr tx_wr;
struct ib_wc ibwc[IPOIB_NUM_WC];
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-09-24 11:20:24.000000000 +0200
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c 2007-09-24 12:23:26.000000000 +0200
@@ -491,15 +491,22 @@ repost:
static inline int post_send(struct ipoib_dev_priv *priv,
struct ipoib_cm_tx *tx,
unsigned int wr_id,
- u64 addr, int len)
+ u64 *mapping, int headlen,
+ skb_frag_t *frags,
+ int nr_frags)
+
{
struct ib_send_wr *bad_wr;
+ int i;
- priv->tx_sge.addr = addr;
- priv->tx_sge.length = len;
-
- priv->tx_wr.wr_id = wr_id;
-
+ priv->tx_sge[0].addr = mapping[0];
+ priv->tx_sge[0].length = headlen;
+ for (i = 0; i < nr_frags; ++i) {
+ priv->tx_sge[i + 1].addr = mapping[i + 1];
+ priv->tx_sge[i + 1].length = frags[i].size;
+ }
+ priv->tx_wr.num_sge = nr_frags + 1;
+ priv->tx_wr.wr_id = wr_id;
return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
}
@@ -507,7 +514,6 @@ void ipoib_cm_send(struct net_device *de
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req;
- u64 addr;
if (unlikely(skb->len > tx->mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -530,20 +536,19 @@ void ipoib_cm_send(struct net_device *de
*/
tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
tx_req->skb = skb;
- addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
- if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+ if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
++priv->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
- tx_req->mapping = addr;
-
if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
- addr, skb->len))) {
+ tx_req->mapping, skb_headlen(skb),
+ skb_shinfo(skb)->frags,
+ skb_shinfo(skb)->nr_frags))) {
ipoib_warn(priv, "post_send failed\n");
++priv->stats.tx_errors;
- ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv->ca, tx_req);
dev_kfree_skb_any(skb);
} else {
dev->trans_start = jiffies;
@@ -577,7 +582,7 @@ static void ipoib_cm_handle_tx_wc(struct
tx_req = &tx->tx_ring[wr_id];
- ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv->ca, tx_req);
/* FIXME: is this right? Shouldn't we only increment on success? */
++priv->stats.tx_packets;
@@ -814,7 +819,7 @@ static struct ib_qp *ipoib_cm_create_tx_
attr.recv_cq = priv->cq;
attr.srq = priv->cm.srq;
attr.cap.max_send_wr = ipoib_sendq_size;
- attr.cap.max_send_sge = 1;
+ attr.cap.max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1;
attr.sq_sig_type = IB_SIGNAL_ALL_WR;
attr.qp_type = IB_QPT_RC;
attr.send_cq = cq;
@@ -981,8 +986,7 @@ static void ipoib_cm_tx_destroy(struct i
if (p->tx_ring) {
while ((int) p->tx_tail - (int) p->tx_head < 0) {
tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
- ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
- DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv->ca, tx_req);
dev_kfree_skb_any(tx_req->skb);
++p->tx_tail;
}
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-09-24 11:20:24.000000000 +0200
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2007-09-24 11:57:02.000000000 +0200
@@ -257,8 +257,7 @@ static void ipoib_ib_handle_tx_wc(struct
tx_req = &priv->tx_ring[wr_id];
- ib_dma_unmap_single(priv->ca, tx_req->mapping,
- tx_req->skb->len, DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv->ca, tx_req);
++priv->stats.tx_packets;
priv->stats.tx_bytes += tx_req->skb->len;
@@ -343,16 +342,23 @@ void ipoib_ib_completion(struct ib_cq *c
static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
struct ib_ah *address, u32 qpn,
- u64 addr, int len)
+ u64 *mapping, int headlen,
+ skb_frag_t *frags,
+ int nr_frags)
{
struct ib_send_wr *bad_wr;
+ int i;
- priv->tx_sge.addr = addr;
- priv->tx_sge.length = len;
-
- priv->tx_wr.wr_id = wr_id;
- priv->tx_wr.wr.ud.remote_qpn = qpn;
- priv->tx_wr.wr.ud.ah = address;
+ priv->tx_sge[0].addr = mapping[0];
+ priv->tx_sge[0].length = headlen;
+ for (i = 0; i < nr_frags; ++i) {
+ priv->tx_sge[i + 1].addr = mapping[i + 1];
+ priv->tx_sge[i + 1].length = frags[i].size;
+ }
+ priv->tx_wr.num_sge = nr_frags + 1;
+ priv->tx_wr.wr_id = wr_id;
+ priv->tx_wr.wr.ud.remote_qpn = qpn;
+ priv->tx_wr.wr.ud.ah = address;
return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
}
@@ -362,7 +368,6 @@ void ipoib_send(struct net_device *dev,
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req;
- u64 addr;
if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -385,20 +390,19 @@ void ipoib_send(struct net_device *dev,
*/
tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
tx_req->skb = skb;
- addr = ib_dma_map_single(priv->ca, skb->data, skb->len,
- DMA_TO_DEVICE);
- if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+ if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
++priv->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
- tx_req->mapping = addr;
if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
- address->ah, qpn, addr, skb->len))) {
+ address->ah, qpn,
+ tx_req->mapping, skb_headlen(skb),
+ skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) {
ipoib_warn(priv, "post_send failed\n");
++priv->stats.tx_errors;
- ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv->ca, tx_req);
dev_kfree_skb_any(skb);
} else {
dev->trans_start = jiffies;
@@ -604,10 +608,7 @@ int ipoib_ib_dev_stop(struct net_device
while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
tx_req = &priv->tx_ring[priv->tx_tail &
(ipoib_sendq_size - 1)];
- ib_dma_unmap_single(priv->ca,
- tx_req->mapping,
- tx_req->skb->len,
- DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv->ca, tx_req);
dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail;
}
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-09-24 11:20:24.000000000 +0200
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2007-09-24 12:24:02.000000000 +0200
@@ -149,14 +149,14 @@ int ipoib_transport_dev_init(struct net_
.cap = {
.max_send_wr = ipoib_sendq_size,
.max_recv_wr = ipoib_recvq_size,
- .max_send_sge = 1,
+ .max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1,
.max_recv_sge = 1
},
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_UD
};
- int ret, size;
+ int i, ret, size;
priv->pd = ib_alloc_pd(priv->ca);
if (IS_ERR(priv->pd)) {
@@ -197,11 +197,11 @@ int ipoib_transport_dev_init(struct net_
priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
- priv->tx_sge.lkey = priv->mr->lkey;
+ for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
+ priv->tx_sge[i].lkey = priv->mr->lkey;
priv->tx_wr.opcode = IB_WR_SEND;
- priv->tx_wr.sg_list = &priv->tx_sge;
- priv->tx_wr.num_sge = 1;
+ priv->tx_wr.sg_list = priv->tx_sge;
priv->tx_wr.send_flags = IB_SEND_SIGNALED;
return 0;
More information about the general
mailing list