<html><body>
<p>Hello Eli,<br>
<br>
        Can you make IPoIB-CM rx S/G functions more generic so it can be reused here for IPoIB UD tx S/G?<br>
<br>
Thanks<br>
Shirley <br>
<br>
<br>
<img width="16" height="16" src="cid:1__=08BBF949DFFDD4188f9e8a93df938@us.ibm.com" border="0" alt="Inactive hide details for Eli Cohen <eli@mellanox.co.il>">Eli Cohen <eli@mellanox.co.il><br>
<br>
<br>

<table width="100%" border="0" cellspacing="0" cellpadding="0">
<tr valign="top"><td style="background-image:url(cid:2__=08BBF949DFFDD4188f9e8a93df938@us.ibm.com); background-repeat: no-repeat; " width="29%">
<ul>
<ul>
<ul>
<ul><b><font size="2">Eli Cohen <eli@mellanox.co.il></font></b><font size="2"> </font><br>
<font size="2">Sent by: general-bounces@lists.openfabrics.org</font>
<p><font size="2">01/16/08 08:37 AM</font></ul>
</ul>
</ul>
</ul>
</td><td width="71%">
<table width="100%" border="0" cellspacing="0" cellpadding="0">
<tr valign="top"><td width="1%"><img width="58" height="1" src="cid:3__=08BBF949DFFDD4188f9e8a93df938@us.ibm.com" border="0" alt=""><br>
<div align="right"><font size="2">To</font></div></td><td width="100%"><img width="1" height="1" src="cid:3__=08BBF949DFFDD4188f9e8a93df938@us.ibm.com" border="0" alt=""><br>
<font size="2">Roland Dreier <rdreier@cisco.com></font></td></tr>

<tr valign="top"><td width="1%"><img width="58" height="1" src="cid:3__=08BBF949DFFDD4188f9e8a93df938@us.ibm.com" border="0" alt=""><br>
<div align="right"><font size="2">cc</font></div></td><td width="100%"><img width="1" height="1" src="cid:3__=08BBF949DFFDD4188f9e8a93df938@us.ibm.com" border="0" alt=""><br>
<font size="2">openfabrics <general@lists.openfabrics.org></font></td></tr>

<tr valign="top"><td width="1%"><img width="58" height="1" src="cid:3__=08BBF949DFFDD4188f9e8a93df938@us.ibm.com" border="0" alt=""><br>
<div align="right"><font size="2">Subject</font></div></td><td width="100%"><img width="1" height="1" src="cid:3__=08BBF949DFFDD4188f9e8a93df938@us.ibm.com" border="0" alt=""><br>
<font size="2">[ofa-general] [PATCH 2/16] ib/ipoib: Add s/g support for IPOIB</font></td></tr>
</table>

<table border="0" cellspacing="0" cellpadding="0">
<tr valign="top"><td width="58"><img width="1" height="1" src="cid:3__=08BBF949DFFDD4188f9e8a93df938@us.ibm.com" border="0" alt=""></td><td width="336"><img width="1" height="1" src="cid:3__=08BBF949DFFDD4188f9e8a93df938@us.ibm.com" border="0" alt=""></td></tr>
</table>
</td></tr>
</table>
<br>
<tt>Add s/g support for IPOIB<br>
<br>
This patch acts as a preperation for using checksum offload for<br>
IB devices capable of inserting/verifying checksum in IP<br>
packets. The patch does not actaully turn on NETIF_F_SG<br>
but rather defers the role to the patches adding checksum<br>
offload capabilities. Support is added only for datagram mode<br>
since Mellanox HW does not support checksum offload on connected QPs.<br>
<br>
Signed-off-by: Michael S. Tsirkin <mst@mellanox.co.il><br>
Signed-off-by: Eli Cohen <eli@mellanox.co.il><br>
---<br>
 drivers/infiniband/ulp/ipoib/ipoib.h       |   56 +++++++++++++++++++++++++++-<br>
 drivers/infiniband/ulp/ipoib/ipoib_cm.c    |   10 ++--<br>
 drivers/infiniband/ulp/ipoib/ipoib_ib.c    |   41 ++++++++++----------<br>
 drivers/infiniband/ulp/ipoib/ipoib_verbs.c |   10 ++--<br>
 4 files changed, 85 insertions(+), 32 deletions(-)<br>
<br>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h<br>
index eb7edab..6729c14 100644<br>
--- a/drivers/infiniband/ulp/ipoib/ipoib.h<br>
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h<br>
@@ -142,9 +142,61 @@ struct ipoib_rx_buf {<br>
 <br>
 struct ipoib_tx_buf {<br>
                 struct sk_buff *skb;<br>
-                u64                             mapping;<br>
+                u64                             mapping[MAX_SKB_FRAGS + 1];<br>
 };<br>
 <br>
+static inline int ipoib_dma_map_tx(struct ib_device *ca,<br>
+                                                                   struct ipoib_tx_buf *tx_req)<br>
+{<br>
+                struct sk_buff *skb = tx_req->skb;<br>
+                u64 *mapping = tx_req->mapping;<br>
+                int frags;<br>
+                int i;<br>
+<br>
+                mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),<br>
+                                                                       DMA_TO_DEVICE);<br>
+                if (unlikely(ib_dma_mapping_error(ca, mapping[0])))<br>
+                                return -EIO;<br>
+<br>
+                frags = skb_shinfo(skb)->nr_frags;<br>
+                for (i = 0; i < frags; ++i) {<br>
+                                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];<br>
+                                mapping[i + 1] = ib_dma_map_page(ca, frag->page,<br>
+                                                                                                 frag->page_offset, frag->size,<br>
+                                                                                                 DMA_TO_DEVICE);<br>
+                                if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1])))<br>
+                                                goto partial_error;<br>
+                }<br>
+                return 0;<br>
+<br>
+partial_error:<br>
+                ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);<br>
+<br>
+                for (; i > 0; --i) {<br>
+                                skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];<br>
+                                ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE);<br>
+                }<br>
+                return -EIO;<br>
+}<br>
+<br>
+static inline void ipoib_dma_unmap_tx(struct ib_device *ca,<br>
+                                                                      struct ipoib_tx_buf *tx_req)<br>
+{<br>
+                struct sk_buff *skb = tx_req->skb;<br>
+                u64 *mapping = tx_req->mapping;<br>
+                int frags;<br>
+                int i;<br>
+<br>
+                ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);<br>
+<br>
+                frags = skb_shinfo(skb)->nr_frags;<br>
+                for (i = 0; i < frags; ++i) {<br>
+                                skb_frag_t *frag = &skb_shinfo(skb)->frags[i];<br>
+                                ib_dma_unmap_page(ca, mapping[i + 1], frag->size,<br>
+                                                                  DMA_TO_DEVICE);<br>
+                }<br>
+}<br>
+<br>
 struct ib_cm_id;<br>
 <br>
 struct ipoib_cm_data {<br>
@@ -290,7 +342,7 @@ struct ipoib_dev_priv {<br>
                 struct ipoib_tx_buf *tx_ring;<br>
                 unsigned             tx_head;<br>
                 unsigned             tx_tail;<br>
-                struct ib_sge        tx_sge;<br>
+                struct ib_sge                tx_sge[MAX_SKB_FRAGS + 1];<br>
                 struct ib_send_wr    tx_wr;<br>
                 unsigned             tx_outstanding;<br>
 <br>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c<br>
index 059cf92..8485fde 100644<br>
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c<br>
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c<br>
@@ -495,8 +495,8 @@ static inline int post_send(struct ipoib_dev_priv *priv,<br>
 {<br>
                 struct ib_send_wr *bad_wr;<br>
 <br>
-                priv->tx_sge.addr             = addr;<br>
-                priv->tx_sge.length           = len;<br>
+                priv->tx_sge[0].addr          = addr;<br>
+                priv->tx_sge[0].length        = len;<br>
 <br>
                 priv->tx_wr.wr_id                  = wr_id | IPOIB_OP_CM;<br>
 <br>
@@ -537,7 +537,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_<br>
                                 return;<br>
                 }<br>
 <br>
-                tx_req->mapping = addr;<br>
+                tx_req->mapping[0] = addr;<br>
 <br>
                 if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),<br>
                                                         addr, skb->len))) {<br>
@@ -576,7 +576,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)<br>
 <br>
                 tx_req = &tx->tx_ring[wr_id];<br>
 <br>
-                ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);<br>
+                ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE);<br>
 <br>
                 /* FIXME: is this right? Shouldn't we only increment on success? */<br>
                 ++dev->stats.tx_packets;<br>
@@ -954,7 +954,7 @@ timeout:<br>
 <br>
                 while ((int) p->tx_tail - (int) p->tx_head < 0) {<br>
                                 tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];<br>
-                                ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,<br>
+                                ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len,<br>
                                                                     DMA_TO_DEVICE);<br>
                                 dev_kfree_skb_any(tx_req->skb);<br>
                                 ++p->tx_tail;<br>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c<br>
index 5063dd5..680c27f 100644<br>
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c<br>
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c<br>
@@ -257,8 +257,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)<br>
 <br>
                 tx_req = &priv->tx_ring[wr_id];<br>
 <br>
-                ib_dma_unmap_single(priv->ca, tx_req->mapping,<br>
-                                                    tx_req->skb->len, DMA_TO_DEVICE);<br>
+                ipoib_dma_unmap_tx(priv->ca, tx_req);<br>
 <br>
                 ++dev->stats.tx_packets;<br>
                 dev->stats.tx_bytes += tx_req->skb->len;<br>
@@ -341,16 +340,23 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)<br>
 static inline int post_send(struct ipoib_dev_priv *priv,<br>
                                                     unsigned int wr_id,<br>
                                                     struct ib_ah *address, u32 qpn,<br>
-                                                    u64 addr, int len)<br>
+                                                    u64 *mapping, int headlen,<br>
+                                                    skb_frag_t *frags,<br>
+                                                    int nr_frags)<br>
 {<br>
                 struct ib_send_wr *bad_wr;<br>
+                int i;<br>
 <br>
-                priv->tx_sge.addr             = addr;<br>
-                priv->tx_sge.length           = len;<br>
-<br>
-                priv->tx_wr.wr_id                  = wr_id;<br>
-                priv->tx_wr.wr.ud.remote_qpn  = qpn;<br>
-                priv->tx_wr.wr.ud.ah                       = address;<br>
+                priv->tx_sge[0].addr         = mapping[0];<br>
+                priv->tx_sge[0].length       = headlen;<br>
+                for (i = 0; i < nr_frags; ++i) {<br>
+                                priv->tx_sge[i + 1].addr = mapping[i + 1];<br>
+                                priv->tx_sge[i + 1].length = frags[i].size;<br>
+                }<br>
+                priv->tx_wr.num_sge          = nr_frags + 1;<br>
+                priv->tx_wr.wr_id                 = wr_id;<br>
+                priv->tx_wr.wr.ud.remote_qpn = qpn;<br>
+                priv->tx_wr.wr.ud.ah                      = address;<br>
 <br>
                 return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);<br>
 }<br>
@@ -360,7 +366,6 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,<br>
 {<br>
                 struct ipoib_dev_priv *priv = netdev_priv(dev);<br>
                 struct ipoib_tx_buf *tx_req;<br>
-                u64 addr;<br>
 <br>
                 if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {<br>
                                 ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",<br>
@@ -383,20 +388,19 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,<br>
                  */<br>
                 tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];<br>
                 tx_req->skb = skb;<br>
-                addr = ib_dma_map_single(priv->ca, skb->data, skb->len,<br>
-                                                                 DMA_TO_DEVICE);<br>
-                if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {<br>
+                if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {<br>
                                 ++dev->stats.tx_errors;<br>
                                 dev_kfree_skb_any(skb);<br>
                                 return;<br>
                 }<br>
-                tx_req->mapping = addr;<br>
 <br>
                 if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),<br>
-                                                       address->ah, qpn, addr, skb->len))) {<br>
+                                                       address->ah, qpn,<br>
+                                                       tx_req->mapping, skb_headlen(skb),<br>
+                                                       skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) {<br>
                                 ipoib_warn(priv, "post_send failed\n");<br>
                                 ++dev->stats.tx_errors;<br>
-                                ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);<br>
+                                ipoib_dma_unmap_tx(priv->ca, tx_req);<br>
                                 dev_kfree_skb_any(skb);<br>
                 } else {<br>
                                 dev->trans_start = jiffies;<br>
@@ -615,10 +619,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)<br>
                                                 while ((int) priv->tx_tail - (int) priv->tx_head < 0) {<br>
                                                                 tx_req = &priv->tx_ring[priv->tx_tail &<br>
                                                                                                                 (ipoib_sendq_size - 1)];<br>
-                                                                ib_dma_unmap_single(priv->ca,<br>
-                                                                                                    tx_req->mapping,<br>
-                                                                                                    tx_req->skb->len,<br>
-                                                                                                    DMA_TO_DEVICE);<br>
+                                                                ipoib_dma_unmap_tx(priv->ca, tx_req);<br>
                                                                 dev_kfree_skb_any(tx_req->skb);<br>
                                                                 ++priv->tx_tail;<br>
                                                                 --priv->tx_outstanding;<br>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c<br>
index 3c6e45d..a6f5f65 100644<br>
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c<br>
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c<br>
@@ -149,14 +149,14 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)<br>
                                 .cap = {<br>
                                                 .max_send_wr  = ipoib_sendq_size,<br>
                                                 .max_recv_wr  = ipoib_recvq_size,<br>
-                                                .max_send_sge = 1,<br>
+                                                .max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1,<br>
                                                 .max_recv_sge = 1<br>
                                 },<br>
                                 .sq_sig_type = IB_SIGNAL_ALL_WR,<br>
                                 .qp_type     = IB_QPT_UD<br>
                 };<br>
 <br>
-                int ret, size;<br>
+                int i, ret, size;<br>
 <br>
                 priv->pd = ib_alloc_pd(priv->ca);<br>
                 if (IS_ERR(priv->pd)) {<br>
@@ -197,11 +197,11 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)<br>
                 priv->dev->dev_addr[2] = (priv->qp->qp_num >>  8) & 0xff;<br>
                 priv->dev->dev_addr[3] = (priv->qp->qp_num      ) & 0xff;<br>
 <br>
-                priv->tx_sge.lkey            = priv->mr->lkey;<br>
+                for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)<br>
+                                priv->tx_sge[i].lkey    = priv->mr->lkey;<br>
 <br>
                 priv->tx_wr.opcode           = IB_WR_SEND;<br>
-                priv->tx_wr.sg_list                  = &priv->tx_sge;<br>
-                priv->tx_wr.num_sge                  = 1;<br>
+                priv->tx_wr.sg_list                  = priv->tx_sge;<br>
                 priv->tx_wr.send_flags               = IB_SEND_SIGNALED;<br>
 <br>
                 return 0;<br>
-- <br>
1.5.3.8<br>
<br>
<br>
_______________________________________________<br>
general mailing list<br>
general@lists.openfabrics.org<br>
</tt><tt><a href="http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general">http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general</a></tt><tt><br>
<br>
To unsubscribe, please visit </tt><tt><a href="http://openib.org/mailman/listinfo/openib-general">http://openib.org/mailman/listinfo/openib-general</a></tt><tt><br>
</tt><br>
</body></html>