[ofa-general] [PATCH 2/16] ib/ipoib: Add s/g support for IPOIB

Shirley Ma xma at us.ibm.com
Thu Jan 24 12:06:30 PST 2008






Hello Eli,

      Can you make IPoIB-CM rx S/G functions more generic so it can be
reused here for IPoIB UD tx S/G?

Thanks
Shirley




                                                                           
             Eli Cohen                                                     
             <eli at mell                                                     
             anox.co.i                                                  To 
             l>                Roland Dreier <rdreier at cisco.com>           
             Sent by:                                                   cc 
             general-b         openfabrics <general at lists.openfabrics.org> 
             ounces at li                                             Subject 
             sts.openf         [ofa-general] [PATCH 2/16] ib/ipoib: Add    
             abrics.or         s/g support for IPOIB                       
             g                                                             
                                                                           
                                                                           
             01/16/08                                                      
             08:37 AM                                                      
                                                                           




Add s/g support for IPOIB

This patch acts as a preperation for using checksum offload for
IB devices capable of inserting/verifying checksum in IP
packets. The patch does not actaully turn on NETIF_F_SG
but rather defers the role to the patches adding checksum
offload capabilities. Support is added only for datagram mode
since Mellanox HW does not support checksum offload on connected QPs.

Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>
Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
 drivers/infiniband/ulp/ipoib/ipoib.h       |   56
+++++++++++++++++++++++++++-
 drivers/infiniband/ulp/ipoib/ipoib_cm.c    |   10 ++--
 drivers/infiniband/ulp/ipoib/ipoib_ib.c    |   41 ++++++++++----------
 drivers/infiniband/ulp/ipoib/ipoib_verbs.c |   10 ++--
 4 files changed, 85 insertions(+), 32 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h
b/drivers/infiniband/ulp/ipoib/ipoib.h
index eb7edab..6729c14 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -142,9 +142,61 @@ struct ipoib_rx_buf {

 struct ipoib_tx_buf {
             struct sk_buff *skb;
-            u64                     mapping;
+            u64                     mapping[MAX_SKB_FRAGS + 1];
 };

+static inline int ipoib_dma_map_tx(struct ib_device *ca,
+                                                   struct ipoib_tx_buf
*tx_req)
+{
+            struct sk_buff *skb = tx_req->skb;
+            u64 *mapping = tx_req->mapping;
+            int frags;
+            int i;
+
+            mapping[0] = ib_dma_map_single(ca, skb->data,
skb_headlen(skb),
+                                                       DMA_TO_DEVICE);
+            if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
+                        return -EIO;
+
+            frags = skb_shinfo(skb)->nr_frags;
+            for (i = 0; i < frags; ++i) {
+                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+                        mapping[i + 1] = ib_dma_map_page(ca, frag->page,
+
frag->page_offset, frag->size,
+
DMA_TO_DEVICE);
+                        if (unlikely(ib_dma_mapping_error(ca, mapping[i +
1])))
+                                    goto partial_error;
+            }
+            return 0;
+
+partial_error:
+            ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb),
DMA_TO_DEVICE);
+
+            for (; i > 0; --i) {
+                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
+                        ib_dma_unmap_page(ca, mapping[i], frag->size,
DMA_TO_DEVICE);
+            }
+            return -EIO;
+}
+
+static inline void ipoib_dma_unmap_tx(struct ib_device *ca,
+                                                      struct ipoib_tx_buf
*tx_req)
+{
+            struct sk_buff *skb = tx_req->skb;
+            u64 *mapping = tx_req->mapping;
+            int frags;
+            int i;
+
+            ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb),
DMA_TO_DEVICE);
+
+            frags = skb_shinfo(skb)->nr_frags;
+            for (i = 0; i < frags; ++i) {
+                        skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+                        ib_dma_unmap_page(ca, mapping[i + 1], frag->size,
+                                                  DMA_TO_DEVICE);
+            }
+}
+
 struct ib_cm_id;

 struct ipoib_cm_data {
@@ -290,7 +342,7 @@ struct ipoib_dev_priv {
             struct ipoib_tx_buf *tx_ring;
             unsigned             tx_head;
             unsigned             tx_tail;
-            struct ib_sge        tx_sge;
+            struct ib_sge                tx_sge[MAX_SKB_FRAGS + 1];
             struct ib_send_wr    tx_wr;
             unsigned             tx_outstanding;

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 059cf92..8485fde 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -495,8 +495,8 @@ static inline int post_send(struct ipoib_dev_priv
*priv,
 {
             struct ib_send_wr *bad_wr;

-            priv->tx_sge.addr             = addr;
-            priv->tx_sge.length           = len;
+            priv->tx_sge[0].addr          = addr;
+            priv->tx_sge[0].length        = len;

             priv->tx_wr.wr_id                   = wr_id | IPOIB_OP_CM;

@@ -537,7 +537,7 @@ void ipoib_cm_send(struct net_device *dev, struct
sk_buff *skb, struct ipoib_cm_
                         return;
             }

-            tx_req->mapping = addr;
+            tx_req->mapping[0] = addr;

             if (unlikely(post_send(priv, tx, tx->tx_head &
(ipoib_sendq_size - 1),
                                             addr, skb->len))) {
@@ -576,7 +576,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev,
struct ib_wc *wc)

             tx_req = &tx->tx_ring[wr_id];

-            ib_dma_unmap_single(priv->ca, tx_req->mapping,
tx_req->skb->len, DMA_TO_DEVICE);
+            ib_dma_unmap_single(priv->ca, tx_req->mapping[0],
tx_req->skb->len, DMA_TO_DEVICE);

             /* FIXME: is this right? Shouldn't we only increment on
success? */
             ++dev->stats.tx_packets;
@@ -954,7 +954,7 @@ timeout:

             while ((int) p->tx_tail - (int) p->tx_head < 0) {
                         tx_req = &p->tx_ring[p->tx_tail &
(ipoib_sendq_size - 1)];
-                        ib_dma_unmap_single(priv->ca, tx_req->mapping,
tx_req->skb->len,
+                        ib_dma_unmap_single(priv->ca, tx_req->mapping[0],
tx_req->skb->len,
                                                     DMA_TO_DEVICE);
                         dev_kfree_skb_any(tx_req->skb);
                         ++p->tx_tail;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 5063dd5..680c27f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -257,8 +257,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device
*dev, struct ib_wc *wc)

             tx_req = &priv->tx_ring[wr_id];

-            ib_dma_unmap_single(priv->ca, tx_req->mapping,
-                                        tx_req->skb->len, DMA_TO_DEVICE);
+            ipoib_dma_unmap_tx(priv->ca, tx_req);

             ++dev->stats.tx_packets;
             dev->stats.tx_bytes += tx_req->skb->len;
@@ -341,16 +340,23 @@ void ipoib_ib_completion(struct ib_cq *cq, void
*dev_ptr)
 static inline int post_send(struct ipoib_dev_priv *priv,
                                         unsigned int wr_id,
                                         struct ib_ah *address, u32 qpn,
-                                        u64 addr, int len)
+                                        u64 *mapping, int headlen,
+                                        skb_frag_t *frags,
+                                        int nr_frags)
 {
             struct ib_send_wr *bad_wr;
+            int i;

-            priv->tx_sge.addr             = addr;
-            priv->tx_sge.length           = len;
-
-            priv->tx_wr.wr_id                   = wr_id;
-            priv->tx_wr.wr.ud.remote_qpn  = qpn;
-            priv->tx_wr.wr.ud.ah                = address;
+            priv->tx_sge[0].addr         = mapping[0];
+            priv->tx_sge[0].length       = headlen;
+            for (i = 0; i < nr_frags; ++i) {
+                        priv->tx_sge[i + 1].addr = mapping[i + 1];
+                        priv->tx_sge[i + 1].length = frags[i].size;
+            }
+            priv->tx_wr.num_sge          = nr_frags + 1;
+            priv->tx_wr.wr_id                  = wr_id;
+            priv->tx_wr.wr.ud.remote_qpn = qpn;
+            priv->tx_wr.wr.ud.ah               = address;

             return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
@@ -360,7 +366,6 @@ void ipoib_send(struct net_device *dev, struct sk_buff
*skb,
 {
             struct ipoib_dev_priv *priv = netdev_priv(dev);
             struct ipoib_tx_buf *tx_req;
-            u64 addr;

             if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
                         ipoib_warn(priv, "packet len %d (> %d) too long to
send, dropping\n",
@@ -383,20 +388,19 @@ void ipoib_send(struct net_device *dev, struct
sk_buff *skb,
              */
             tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size -
1)];
             tx_req->skb = skb;
-            addr = ib_dma_map_single(priv->ca, skb->data, skb->len,
-                                                 DMA_TO_DEVICE);
-            if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+            if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
                         ++dev->stats.tx_errors;
                         dev_kfree_skb_any(skb);
                         return;
             }
-            tx_req->mapping = addr;

             if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size
- 1),
-                                           address->ah, qpn, addr,
skb->len))) {
+                                           address->ah, qpn,
+                                           tx_req->mapping,
skb_headlen(skb),
+                                           skb_shinfo(skb)->frags,
skb_shinfo(skb)->nr_frags))) {
                         ipoib_warn(priv, "post_send failed\n");
                         ++dev->stats.tx_errors;
-                        ib_dma_unmap_single(priv->ca, addr, skb->len,
DMA_TO_DEVICE);
+                        ipoib_dma_unmap_tx(priv->ca, tx_req);
                         dev_kfree_skb_any(skb);
             } else {
                         dev->trans_start = jiffies;
@@ -615,10 +619,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int
flush)
                                     while ((int) priv->tx_tail - (int)
priv->tx_head < 0) {
                                                 tx_req =
&priv->tx_ring[priv->tx_tail &

       (ipoib_sendq_size - 1)];
-
ib_dma_unmap_single(priv->ca,
-
tx_req->mapping,
-
tx_req->skb->len,
-
DMA_TO_DEVICE);
+
ipoib_dma_unmap_tx(priv->ca, tx_req);

dev_kfree_skb_any(tx_req->skb);
                                                 ++priv->tx_tail;
                                                 --priv->tx_outstanding;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 3c6e45d..a6f5f65 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -149,14 +149,14 @@ int ipoib_transport_dev_init(struct net_device *dev,
struct ib_device *ca)
                         .cap = {
                                     .max_send_wr  = ipoib_sendq_size,
                                     .max_recv_wr  = ipoib_recvq_size,
-                                    .max_send_sge = 1,
+                                    .max_send_sge = dev->features &
NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1,
                                     .max_recv_sge = 1
                         },
                         .sq_sig_type = IB_SIGNAL_ALL_WR,
                         .qp_type     = IB_QPT_UD
             };

-            int ret, size;
+            int i, ret, size;

             priv->pd = ib_alloc_pd(priv->ca);
             if (IS_ERR(priv->pd)) {
@@ -197,11 +197,11 @@ int ipoib_transport_dev_init(struct net_device *dev,
struct ib_device *ca)
             priv->dev->dev_addr[2] = (priv->qp->qp_num >>  8) & 0xff;
             priv->dev->dev_addr[3] = (priv->qp->qp_num      ) & 0xff;

-            priv->tx_sge.lkey             = priv->mr->lkey;
+            for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
+                        priv->tx_sge[i].lkey    = priv->mr->lkey;

             priv->tx_wr.opcode            = IB_WR_SEND;
-            priv->tx_wr.sg_list           = &priv->tx_sge;
-            priv->tx_wr.num_sge           = 1;
+            priv->tx_wr.sg_list           = priv->tx_sge;
             priv->tx_wr.send_flags              = IB_SEND_SIGNALED;

             return 0;
--
1.5.3.8


_______________________________________________
general mailing list
general at lists.openfabrics.org
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit
http://openib.org/mailman/listinfo/openib-general
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20080124/11e61f7d/attachment.html>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: graycol.gif
Type: image/gif
Size: 105 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20080124/11e61f7d/attachment.gif>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: pic25807.gif
Type: image/gif
Size: 1255 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20080124/11e61f7d/attachment-0001.gif>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: ecblank.gif
Type: image/gif
Size: 45 bytes
Desc: not available
URL: <http://lists.openfabrics.org/pipermail/general/attachments/20080124/11e61f7d/attachment-0002.gif>


More information about the general mailing list