[ofa-general] [PATCH 2/16 v4] IB/ipoib: Add s/g support
Eli Cohen
eli at mellanox.co.il
Wed Jan 30 08:30:53 PST 2008
IB/ipoib: Add s/g support
This patch acts as a preperation for using checksum offload for
IB devices capable of inserting/verifying checksum in IP
packets. The patch does not actaully turn on NETIF_F_SG
but rather defers the role to the patches adding checksum
offload capabilities. Support is added only for datagram mode
since Mellanox HW does not support checksum offload on connected QPs.
Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>
Signed-off-by: Eli Cohen <eli at mellanox.co.il>
---
drivers/infiniband/ulp/ipoib/ipoib.h | 60 ++++++++++++++++++++++++++--
drivers/infiniband/ulp/ipoib/ipoib_cm.c | 10 ++--
drivers/infiniband/ulp/ipoib/ipoib_ib.c | 41 ++++++++++---------
drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 14 +++---
4 files changed, 89 insertions(+), 36 deletions(-)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index fe250c6..7c9edc6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -143,9 +143,61 @@ struct ipoib_rx_buf {
struct ipoib_tx_buf {
struct sk_buff *skb;
- u64 mapping;
+ u64 mapping[MAX_SKB_FRAGS + 1];
};
+static inline int ipoib_dma_map_tx(struct ib_device *ca,
+ struct ipoib_tx_buf *tx_req)
+{
+ struct sk_buff *skb = tx_req->skb;
+ u64 *mapping = tx_req->mapping;
+ int frags;
+ int i;
+
+ mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
+ DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
+ return -EIO;
+
+ frags = skb_shinfo(skb)->nr_frags;
+ for (i = 0; i < frags; ++i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ mapping[i + 1] = ib_dma_map_page(ca, frag->page,
+ frag->page_offset, frag->size,
+ DMA_TO_DEVICE);
+ if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1])))
+ goto partial_error;
+ }
+ return 0;
+
+partial_error:
+ ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+
+ for (; i > 0; --i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
+ ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE);
+ }
+ return -EIO;
+}
+
+static inline void ipoib_dma_unmap_tx(struct ib_device *ca,
+ struct ipoib_tx_buf *tx_req)
+{
+ struct sk_buff *skb = tx_req->skb;
+ u64 *mapping = tx_req->mapping;
+ int frags;
+ int i;
+
+ ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+
+ frags = skb_shinfo(skb)->nr_frags;
+ for (i = 0; i < frags; ++i) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ ib_dma_unmap_page(ca, mapping[i + 1], frag->size,
+ DMA_TO_DEVICE);
+ }
+}
+
struct ib_cm_id;
struct ipoib_cm_data {
@@ -294,9 +346,9 @@ struct ipoib_dev_priv {
spinlock_t tx_lock;
struct ipoib_tx_buf *tx_ring;
- unsigned tx_head;
- unsigned tx_tail;
- struct ib_sge tx_sge;
+ unsigned tx_head;
+ unsigned tx_tail;
+ struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
struct ib_send_wr tx_wr;
unsigned tx_outstanding;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 1818f95..7dd2ec4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -634,8 +634,8 @@ static inline int post_send(struct ipoib_dev_priv *priv,
{
struct ib_send_wr *bad_wr;
- priv->tx_sge.addr = addr;
- priv->tx_sge.length = len;
+ priv->tx_sge[0].addr = addr;
+ priv->tx_sge[0].length = len;
priv->tx_wr.wr_id = wr_id | IPOIB_OP_CM;
@@ -676,7 +676,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
return;
}
- tx_req->mapping = addr;
+ tx_req->mapping[0] = addr;
if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
addr, skb->len))) {
@@ -715,7 +715,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
tx_req = &tx->tx_ring[wr_id];
- ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
+ ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len, DMA_TO_DEVICE);
/* FIXME: is this right? Shouldn't we only increment on success? */
++dev->stats.tx_packets;
@@ -1110,7 +1110,7 @@ timeout:
while ((int) p->tx_tail - (int) p->tx_head < 0) {
tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
- ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
+ ib_dma_unmap_single(priv->ca, tx_req->mapping[0], tx_req->skb->len,
DMA_TO_DEVICE);
dev_kfree_skb_any(tx_req->skb);
++p->tx_tail;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 52bc2bd..680c27f 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -257,8 +257,7 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
tx_req = &priv->tx_ring[wr_id];
- ib_dma_unmap_single(priv->ca, tx_req->mapping,
- tx_req->skb->len, DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv->ca, tx_req);
++dev->stats.tx_packets;
dev->stats.tx_bytes += tx_req->skb->len;
@@ -341,16 +340,23 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
static inline int post_send(struct ipoib_dev_priv *priv,
unsigned int wr_id,
struct ib_ah *address, u32 qpn,
- u64 addr, int len)
+ u64 *mapping, int headlen,
+ skb_frag_t *frags,
+ int nr_frags)
{
struct ib_send_wr *bad_wr;
+ int i;
- priv->tx_sge.addr = addr;
- priv->tx_sge.length = len;
-
- priv->tx_wr.wr_id = wr_id;
- priv->tx_wr.wr.ud.remote_qpn = qpn;
- priv->tx_wr.wr.ud.ah = address;
+ priv->tx_sge[0].addr = mapping[0];
+ priv->tx_sge[0].length = headlen;
+ for (i = 0; i < nr_frags; ++i) {
+ priv->tx_sge[i + 1].addr = mapping[i + 1];
+ priv->tx_sge[i + 1].length = frags[i].size;
+ }
+ priv->tx_wr.num_sge = nr_frags + 1;
+ priv->tx_wr.wr_id = wr_id;
+ priv->tx_wr.wr.ud.remote_qpn = qpn;
+ priv->tx_wr.wr.ud.ah = address;
return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
}
@@ -360,7 +366,6 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_tx_buf *tx_req;
- u64 addr;
if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -383,20 +388,19 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
*/
tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
tx_req->skb = skb;
- addr = ib_dma_map_single(priv->ca, skb->data, skb->len,
- DMA_TO_DEVICE);
- if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+ if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
++dev->stats.tx_errors;
dev_kfree_skb_any(skb);
return;
}
- tx_req->mapping = addr;
if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
- address->ah, qpn, addr, skb->len))) {
+ address->ah, qpn,
+ tx_req->mapping, skb_headlen(skb),
+ skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) {
ipoib_warn(priv, "post_send failed\n");
++dev->stats.tx_errors;
- ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv->ca, tx_req);
dev_kfree_skb_any(skb);
} else {
dev->trans_start = jiffies;
@@ -615,10 +619,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
tx_req = &priv->tx_ring[priv->tx_tail &
(ipoib_sendq_size - 1)];
- ib_dma_unmap_single(priv->ca,
- tx_req->mapping,
- tx_req->skb->len,
- DMA_TO_DEVICE);
+ ipoib_dma_unmap_tx(priv->ca, tx_req);
dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail;
--priv->tx_outstanding;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index 433e99a..5e392e0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
@@ -149,14 +149,14 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
.cap = {
.max_send_wr = ipoib_sendq_size,
.max_recv_wr = ipoib_recvq_size,
- .max_send_sge = 1,
+ .max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1,
.max_recv_sge = 1
},
.sq_sig_type = IB_SIGNAL_ALL_WR,
.qp_type = IB_QPT_UD
};
- int ret, size;
+ int i, ret, size;
priv->pd = ib_alloc_pd(priv->ca);
if (IS_ERR(priv->pd)) {
@@ -201,12 +201,12 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->dev->dev_addr[2] = (priv->qp->qp_num >> 8) & 0xff;
priv->dev->dev_addr[3] = (priv->qp->qp_num ) & 0xff;
- priv->tx_sge.lkey = priv->mr->lkey;
+ for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
+ priv->tx_sge[i].lkey = priv->mr->lkey;
- priv->tx_wr.opcode = IB_WR_SEND;
- priv->tx_wr.sg_list = &priv->tx_sge;
- priv->tx_wr.num_sge = 1;
- priv->tx_wr.send_flags = IB_SEND_SIGNALED;
+ priv->tx_wr.opcode = IB_WR_SEND;
+ priv->tx_wr.sg_list = priv->tx_sge;
+ priv->tx_wr.send_flags = IB_SEND_SIGNALED;
return 0;
--
1.5.3.8
More information about the general
mailing list