[ofa-general] [PATCH 2/11] IB/ipoib: support for sending gather skbs

Eli Cohen eli at mellanox.co.il
Mon Sep 24 05:36:51 PDT 2007


From: Michael S. Tsirkin <mst at mellanox.co.il>
Subject: IB/ipoib: support for sending gather skbs

This patch, by itself, does nothing - this
prepares the ground for hardware checksum support patches.
NETIF_F_SG can't be actually set without enabling
hardware checksum support, so this is done
by the follow-up patches.

Signed-off-by: Michael S. Tsirkin <mst at mellanox.co.il>

---

Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2007-09-24 11:20:24.000000000 +0200
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib.h	2007-09-24 12:09:21.000000000 +0200
@@ -122,9 +122,61 @@ struct ipoib_rx_buf {
 
 struct ipoib_tx_buf {
 	struct sk_buff *skb;
-	u64		mapping;
+	u64		mapping[MAX_SKB_FRAGS + 1];
 };
 
+static inline int ipoib_dma_map_tx(struct ib_device *ca,
+				   struct ipoib_tx_buf *tx_req)
+{
+	struct sk_buff *skb = tx_req->skb;
+	u64 *mapping = tx_req->mapping;
+	int frags;
+	int i;
+
+	mapping[0] = ib_dma_map_single(ca, skb->data, skb_headlen(skb),
+				       DMA_TO_DEVICE);
+	if (unlikely(ib_dma_mapping_error(ca, mapping[0])))
+		return -EIO;
+
+	frags = skb_shinfo(skb)->nr_frags;
+	for (i = 0; i < frags; ++i) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+		mapping[i + 1] = ib_dma_map_page(ca, frag->page,
+						 frag->page_offset, frag->size,
+						 DMA_TO_DEVICE);
+		if (unlikely(ib_dma_mapping_error(ca, mapping[i + 1])))
+			goto partial_error;
+	}
+	return 0;
+
+partial_error:
+	ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+
+	for (; i > 0; --i) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1];
+		ib_dma_unmap_page(ca, mapping[i], frag->size, DMA_TO_DEVICE);
+	}
+	return -EIO;
+}
+
+static inline void ipoib_dma_unmap_tx(struct ib_device *ca,
+				      struct ipoib_tx_buf *tx_req)
+{
+	struct sk_buff *skb = tx_req->skb;
+	u64 *mapping = tx_req->mapping;
+	int frags;
+	int i;
+
+	ib_dma_unmap_single(ca, mapping[0], skb_headlen(skb), DMA_TO_DEVICE);
+
+	frags = skb_shinfo(skb)->nr_frags;
+	for (i = 0; i < frags; ++i) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+		ib_dma_unmap_page(ca, mapping[i + 1], frag->size,
+				  DMA_TO_DEVICE);
+	}
+}
+
 struct ib_cm_id;
 
 struct ipoib_cm_data {
@@ -269,7 +321,7 @@ struct ipoib_dev_priv {
 	struct ipoib_tx_buf *tx_ring;
 	unsigned             tx_head;
 	unsigned             tx_tail;
-	struct ib_sge        tx_sge;
+	struct ib_sge	     tx_sge[MAX_SKB_FRAGS + 1];
 	struct ib_send_wr    tx_wr;
 
 	struct ib_wc ibwc[IPOIB_NUM_WC];
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2007-09-24 11:20:24.000000000 +0200
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2007-09-24 12:23:26.000000000 +0200
@@ -491,15 +491,22 @@ repost:
 static inline int post_send(struct ipoib_dev_priv *priv,
 			    struct ipoib_cm_tx *tx,
 			    unsigned int wr_id,
-			    u64 addr, int len)
+			    u64 *mapping, int headlen,
+			    skb_frag_t *frags,
+			    int nr_frags)
+
 {
 	struct ib_send_wr *bad_wr;
+	int i;
 
-	priv->tx_sge.addr             = addr;
-	priv->tx_sge.length           = len;
-
-	priv->tx_wr.wr_id 	      = wr_id;
-
+	priv->tx_sge[0].addr   = mapping[0];
+	priv->tx_sge[0].length = headlen;
+	for (i = 0; i < nr_frags; ++i) {
+		priv->tx_sge[i + 1].addr = mapping[i + 1];
+		priv->tx_sge[i + 1].length = frags[i].size;
+	}
+	priv->tx_wr.num_sge    = nr_frags + 1;
+	priv->tx_wr.wr_id      = wr_id;
 	return ib_post_send(tx->qp, &priv->tx_wr, &bad_wr);
 }
 
@@ -507,7 +514,6 @@ void ipoib_cm_send(struct net_device *de
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_tx_buf *tx_req;
-	u64 addr;
 
 	if (unlikely(skb->len > tx->mtu)) {
 		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -530,20 +536,19 @@ void ipoib_cm_send(struct net_device *de
 	 */
 	tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
 	tx_req->skb = skb;
-	addr = ib_dma_map_single(priv->ca, skb->data, skb->len, DMA_TO_DEVICE);
-	if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
 		++priv->stats.tx_errors;
 		dev_kfree_skb_any(skb);
 		return;
 	}
 
-	tx_req->mapping = addr;
-
 	if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
-			        addr, skb->len))) {
+			        tx_req->mapping, skb_headlen(skb),
+			        skb_shinfo(skb)->frags,
+			        skb_shinfo(skb)->nr_frags))) {
 		ipoib_warn(priv, "post_send failed\n");
 		++priv->stats.tx_errors;
-		ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
+		ipoib_dma_unmap_tx(priv->ca, tx_req);
 		dev_kfree_skb_any(skb);
 	} else {
 		dev->trans_start = jiffies;
@@ -577,7 +582,7 @@ static void ipoib_cm_handle_tx_wc(struct
 
 	tx_req = &tx->tx_ring[wr_id];
 
-	ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
+	ipoib_dma_unmap_tx(priv->ca, tx_req);
 
 	/* FIXME: is this right? Shouldn't we only increment on success? */
 	++priv->stats.tx_packets;
@@ -814,7 +819,7 @@ static struct ib_qp *ipoib_cm_create_tx_
 	attr.recv_cq = priv->cq;
 	attr.srq = priv->cm.srq;
 	attr.cap.max_send_wr = ipoib_sendq_size;
-	attr.cap.max_send_sge = 1;
+	attr.cap.max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1;
 	attr.sq_sig_type = IB_SIGNAL_ALL_WR;
 	attr.qp_type = IB_QPT_RC;
 	attr.send_cq = cq;
@@ -981,8 +986,7 @@ static void ipoib_cm_tx_destroy(struct i
 	if (p->tx_ring) {
 		while ((int) p->tx_tail - (int) p->tx_head < 0) {
 			tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
-			ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
-					 DMA_TO_DEVICE);
+			ipoib_dma_unmap_tx(priv->ca, tx_req);
 			dev_kfree_skb_any(tx_req->skb);
 			++p->tx_tail;
 		}
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-09-24 11:20:24.000000000 +0200
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-09-24 11:57:02.000000000 +0200
@@ -257,8 +257,7 @@ static void ipoib_ib_handle_tx_wc(struct
 
 	tx_req = &priv->tx_ring[wr_id];
 
-	ib_dma_unmap_single(priv->ca, tx_req->mapping,
-			    tx_req->skb->len, DMA_TO_DEVICE);
+	ipoib_dma_unmap_tx(priv->ca, tx_req);
 
 	++priv->stats.tx_packets;
 	priv->stats.tx_bytes += tx_req->skb->len;
@@ -343,16 +342,23 @@ void ipoib_ib_completion(struct ib_cq *c
 static inline int post_send(struct ipoib_dev_priv *priv,
 			    unsigned int wr_id,
 			    struct ib_ah *address, u32 qpn,
-			    u64 addr, int len)
+			    u64 *mapping, int headlen,
+			    skb_frag_t *frags,
+			    int nr_frags)
 {
 	struct ib_send_wr *bad_wr;
+	int i;
 
-	priv->tx_sge.addr             = addr;
-	priv->tx_sge.length           = len;
-
-	priv->tx_wr.wr_id 	      = wr_id;
-	priv->tx_wr.wr.ud.remote_qpn  = qpn;
-	priv->tx_wr.wr.ud.ah 	      = address;
+	priv->tx_sge[0].addr         = mapping[0];
+	priv->tx_sge[0].length       = headlen;
+	for (i = 0; i < nr_frags; ++i) {
+		priv->tx_sge[i + 1].addr = mapping[i + 1];
+		priv->tx_sge[i + 1].length = frags[i].size;
+	}
+	priv->tx_wr.num_sge          = nr_frags + 1;
+	priv->tx_wr.wr_id 	     = wr_id;
+	priv->tx_wr.wr.ud.remote_qpn = qpn;
+	priv->tx_wr.wr.ud.ah 	     = address;
 
 	return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
 }
@@ -362,7 +368,6 @@ void ipoib_send(struct net_device *dev, 
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_tx_buf *tx_req;
-	u64 addr;
 
 	if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
 		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -385,20 +390,19 @@ void ipoib_send(struct net_device *dev, 
 	 */
 	tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
 	tx_req->skb = skb;
-	addr = ib_dma_map_single(priv->ca, skb->data, skb->len,
-				 DMA_TO_DEVICE);
-	if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
+	if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
 		++priv->stats.tx_errors;
 		dev_kfree_skb_any(skb);
 		return;
 	}
-	tx_req->mapping = addr;
 
 	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
-			       address->ah, qpn, addr, skb->len))) {
+			       address->ah, qpn,
+			       tx_req->mapping, skb_headlen(skb),
+			       skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags))) {
 		ipoib_warn(priv, "post_send failed\n");
 		++priv->stats.tx_errors;
-		ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
+		ipoib_dma_unmap_tx(priv->ca, tx_req);
 		dev_kfree_skb_any(skb);
 	} else {
 		dev->trans_start = jiffies;
@@ -604,10 +608,7 @@ int ipoib_ib_dev_stop(struct net_device 
 			while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
 				tx_req = &priv->tx_ring[priv->tx_tail &
 							(ipoib_sendq_size - 1)];
-				ib_dma_unmap_single(priv->ca,
-						    tx_req->mapping,
-						    tx_req->skb->len,
-						    DMA_TO_DEVICE);
+				ipoib_dma_unmap_tx(priv->ca, tx_req);
 				dev_kfree_skb_any(tx_req->skb);
 				++priv->tx_tail;
 			}
Index: ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- ofa_1_3_dev_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2007-09-24 11:20:24.000000000 +0200
+++ ofa_1_3_dev_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2007-09-24 12:24:02.000000000 +0200
@@ -149,14 +149,14 @@ int ipoib_transport_dev_init(struct net_
 		.cap = {
 			.max_send_wr  = ipoib_sendq_size,
 			.max_recv_wr  = ipoib_recvq_size,
-			.max_send_sge = 1,
+			.max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1,
 			.max_recv_sge = 1
 		},
 		.sq_sig_type = IB_SIGNAL_ALL_WR,
 		.qp_type     = IB_QPT_UD
 	};
 
-	int ret, size;
+	int i, ret, size;
 
 	priv->pd = ib_alloc_pd(priv->ca);
 	if (IS_ERR(priv->pd)) {
@@ -197,11 +197,11 @@ int ipoib_transport_dev_init(struct net_
 	priv->dev->dev_addr[2] = (priv->qp->qp_num >>  8) & 0xff;
 	priv->dev->dev_addr[3] = (priv->qp->qp_num      ) & 0xff;
 
-	priv->tx_sge.lkey 	= priv->mr->lkey;
+	for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
+		priv->tx_sge[i].lkey    = priv->mr->lkey;
 
 	priv->tx_wr.opcode 	= IB_WR_SEND;
-	priv->tx_wr.sg_list 	= &priv->tx_sge;
-	priv->tx_wr.num_sge 	= 1;
+	priv->tx_wr.sg_list 	= priv->tx_sge;
 	priv->tx_wr.send_flags 	= IB_SEND_SIGNALED;
 
 	return 0;




More information about the general mailing list