[ofa-general] [PATCH - 6] ipoib scatter/gather

Eli Cohen eli at mellanox.co.il
Wed Aug 15 11:16:14 PDT 2007


Add scatter/gather support to ipoib

Signed-off-by: Eli Cohen <eli at mellnaox.co.il>

---

If the net device does not use scatter/gather then the QP
uses a single send sge.

Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib.h
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2007-08-15 20:50:31.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib.h	2007-08-15 20:50:33.000000000 +0300
@@ -121,9 +121,14 @@ struct ipoib_rx_buf {
 	u64		mapping;
 };
 
+struct ipoib_mapping_st {
+	u64	addr;
+	u16	size;
+};
+
 struct ipoib_tx_buf {
 	struct sk_buff *skb;
-	u64		mapping;
+	struct ipoib_mapping_st  mapping[MAX_SKB_FRAGS + 1];
 };
 
 struct ib_cm_id;
@@ -270,7 +275,7 @@ struct ipoib_dev_priv {
 	struct ipoib_tx_buf *tx_ring;
 	unsigned             tx_head;
 	unsigned             tx_tail;
-	struct ib_sge        tx_sge;
+	struct ib_sge        tx_sge[MAX_SKB_FRAGS + 1];
 	struct ib_send_wr    tx_wr;
 
 	struct ib_wc ibwc[IPOIB_NUM_WC];
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_ib.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-08-15 20:50:31.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2007-08-15 20:50:33.000000000 +0300
@@ -248,6 +248,24 @@ repost:
 			   "for buf %d\n", wr_id);
 }
 
+static int dma_unmap_list(struct ib_device *ca, struct ipoib_mapping_st *map,
+			   u16 n)
+{
+	int i;
+	int len;
+
+	ib_dma_unmap_single(ca, map[0].addr, map[0].size, DMA_TO_DEVICE);
+	len = map[0].size;
+
+	for (i = 1; i < n; ++i) {
+		ib_dma_unmap_page(ca, map[i].addr, map[i].size,
+				  DMA_TO_DEVICE);
+		len += map[i].size;
+	}
+
+	return len;
+}
+
 static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -265,12 +283,9 @@ static void ipoib_ib_handle_tx_wc(struct
 	}
 
 	tx_req = &priv->tx_ring[wr_id];
-
-	ib_dma_unmap_single(priv->ca, tx_req->mapping,
-			    tx_req->skb->len, DMA_TO_DEVICE);
-
+	priv->stats.tx_bytes += dma_unmap_list(priv->ca, tx_req->mapping,
+					skb_shinfo(tx_req->skb)->nr_frags + 1);
 	++priv->stats.tx_packets;
-	priv->stats.tx_bytes += tx_req->skb->len;
 
 	dev_kfree_skb_any(tx_req->skb);
 
@@ -352,13 +367,17 @@ void ipoib_ib_completion(struct ib_cq *c
 static inline int post_send(struct ipoib_dev_priv *priv,
 			    unsigned int wr_id,
 			    struct ib_ah *address, u32 qpn,
-			    u64 addr, int len)
+			    struct ipoib_mapping_st *mapping, int ngather)
 {
 	struct ib_send_wr *bad_wr;
+	int i;
 
-	priv->tx_sge.addr             = addr;
-	priv->tx_sge.length           = len;
+	for (i = 0; i < ngather; ++i) {
+		priv->tx_sge[i].addr = mapping[i].addr;
+		priv->tx_sge[i].length = mapping[i].size;
+	}
 
+	priv->tx_wr.num_sge 	      = ngather;
 	priv->tx_wr.wr_id 	      = wr_id;
 	priv->tx_wr.wr.ud.remote_qpn  = qpn;
 	priv->tx_wr.wr.ud.ah 	      = address;
@@ -371,7 +390,9 @@ void ipoib_send(struct net_device *dev, 
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ipoib_tx_buf *tx_req;
+	struct skb_frag_struct *frag;
 	u64 addr;
+	unsigned short i;
 
 	if (unlikely(skb->len > priv->mcast_mtu + IPOIB_ENCAP_LEN)) {
 		ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
@@ -394,14 +415,27 @@ void ipoib_send(struct net_device *dev, 
 	 */
 	tx_req = &priv->tx_ring[priv->tx_head & (ipoib_sendq_size - 1)];
 	tx_req->skb = skb;
-	addr = ib_dma_map_single(priv->ca, skb->data, skb->len,
+	addr = ib_dma_map_single(priv->ca, skb->data, skb_headlen(skb),
 				 DMA_TO_DEVICE);
 	if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
 		++priv->stats.tx_errors;
 		dev_kfree_skb_any(skb);
 		return;
 	}
-	tx_req->mapping = addr;
+
+	tx_req->mapping[0].addr = addr;
+	tx_req->mapping[0].size = skb_headlen(skb);
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) {
+		frag = &skb_shinfo(skb)->frags[i];
+		addr = ib_dma_map_page(priv->ca, frag->page, frag->page_offset,
+				       frag->size, DMA_TO_DEVICE);
+		if (unlikely(ib_dma_mapping_error(priv->ca, addr)))
+			goto map_err;
+
+		tx_req->mapping[i + 1].addr = addr;
+		tx_req->mapping[i + 1].size = frag->size;
+	}
 
 	if (dev->features & NETIF_F_HW_CSUM) {
 		if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
@@ -414,11 +448,10 @@ void ipoib_send(struct net_device *dev, 
 
 
 	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
-			       address->ah, qpn, addr, skb->len))) {
+			       address->ah, qpn, tx_req->mapping,
+			       skb_shinfo(skb)->nr_frags + 1))) {
 		ipoib_warn(priv, "post_send failed\n");
-		++priv->stats.tx_errors;
-		ib_dma_unmap_single(priv->ca, addr, skb->len, DMA_TO_DEVICE);
-		dev_kfree_skb_any(skb);
+		goto map_err;
 	} else {
 		dev->trans_start = jiffies;
 
@@ -431,6 +464,11 @@ void ipoib_send(struct net_device *dev, 
 			set_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
 		}
 	}
+	return;
+
+map_err:
+	dma_unmap_list(priv->ca, tx_req->mapping, i + 1);
+	dev_kfree_skb_any(skb);
 }
 
 static void __ipoib_reap_ah(struct net_device *dev)
@@ -593,6 +631,7 @@ int ipoib_ib_dev_stop(struct net_device 
 	struct ib_qp_attr qp_attr;
 	unsigned long begin;
 	struct ipoib_tx_buf *tx_req;
+	struct sk_buff *skb;
 	int i;
 
 	clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
@@ -623,11 +662,10 @@ int ipoib_ib_dev_stop(struct net_device 
 			while ((int) priv->tx_tail - (int) priv->tx_head < 0) {
 				tx_req = &priv->tx_ring[priv->tx_tail &
 							(ipoib_sendq_size - 1)];
-				ib_dma_unmap_single(priv->ca,
-						    tx_req->mapping,
-						    tx_req->skb->len,
-						    DMA_TO_DEVICE);
-				dev_kfree_skb_any(tx_req->skb);
+				skb = tx_req->skb;
+				dma_unmap_list(priv->ca, tx_req->mapping,
+					       skb_shinfo(skb)->nr_frags + 1);
+				dev_kfree_skb_any(skb);
 				++priv->tx_tail;
 			}
 
@@ -635,13 +673,14 @@ int ipoib_ib_dev_stop(struct net_device 
 				struct ipoib_rx_buf *rx_req;
 
 				rx_req = &priv->rx_ring[i];
-				if (!rx_req->skb)
+				skb = rx_req->skb;
+				if (!skb)
 					continue;
 				ib_dma_unmap_single(priv->ca,
 						    rx_req->mapping,
 						    IPOIB_BUF_SIZE,
 						    DMA_FROM_DEVICE);
-				dev_kfree_skb_any(rx_req->skb);
+				dev_kfree_skb_any(skb);
 				rx_req->skb = NULL;
 			}
 
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2007-08-15 20:50:32.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2007-08-15 20:50:33.000000000 +0300
@@ -149,14 +149,15 @@ int ipoib_transport_dev_init(struct net_
 		.cap = {
 			.max_send_wr  = ipoib_sendq_size,
 			.max_recv_wr  = ipoib_recvq_size,
-			.max_send_sge = 1,
 			.max_recv_sge = 1
 		},
 		.sq_sig_type = IB_SIGNAL_ALL_WR,
 		.qp_type     = IB_QPT_UD
 	};
 
-	int ret, size;
+	int ret;
+	int size;
+	int i;
 
 	priv->pd = ib_alloc_pd(priv->ca);
 	if (IS_ERR(priv->pd)) {
@@ -187,6 +188,11 @@ int ipoib_transport_dev_init(struct net_
 	init_attr.send_cq = priv->cq;
 	init_attr.recv_cq = priv->cq;
 
+	if (priv->dev->features & NETIF_F_SG)
+		init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
+	else
+		init_attr.cap.max_send_sge = 1;
+
 	priv->qp = ib_create_qp(priv->pd, &init_attr);
 	if (IS_ERR(priv->qp)) {
 		printk(KERN_WARNING "%s: failed to create QP\n", ca->name);
@@ -197,11 +203,11 @@ int ipoib_transport_dev_init(struct net_
 	priv->dev->dev_addr[2] = (priv->qp->qp_num >>  8) & 0xff;
 	priv->dev->dev_addr[3] = (priv->qp->qp_num      ) & 0xff;
 
-	priv->tx_sge.lkey 	= priv->mr->lkey;
+	for (i = 0; i < MAX_SKB_FRAGS + 1; ++i)
+		priv->tx_sge[i].lkey 	= priv->mr->lkey;
 
 	priv->tx_wr.opcode 	= IB_WR_SEND;
-	priv->tx_wr.sg_list 	= &priv->tx_sge;
-	priv->tx_wr.num_sge 	= 1;
+	priv->tx_wr.sg_list 	= priv->tx_sge;
 	priv->tx_wr.send_flags 	= IB_SEND_SIGNALED;
 
 	return 0;
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_cm.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2007-08-15 20:50:31.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_cm.c	2007-08-15 20:50:33.000000000 +0300
@@ -495,8 +495,8 @@ static inline int post_send(struct ipoib
 {
 	struct ib_send_wr *bad_wr;
 
-	priv->tx_sge.addr             = addr;
-	priv->tx_sge.length           = len;
+	priv->tx_sge[0].addr          = addr;
+	priv->tx_sge[0].length        = len;
 
 	priv->tx_wr.wr_id 	      = wr_id;
 
@@ -537,7 +537,7 @@ void ipoib_cm_send(struct net_device *de
 		return;
 	}
 
-	tx_req->mapping = addr;
+	tx_req->mapping[0].addr = addr;
 
 	if (unlikely(post_send(priv, tx, tx->tx_head & (ipoib_sendq_size - 1),
 			        addr, skb->len))) {
@@ -577,7 +577,8 @@ static void ipoib_cm_handle_tx_wc(struct
 
 	tx_req = &tx->tx_ring[wr_id];
 
-	ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len, DMA_TO_DEVICE);
+	ib_dma_unmap_single(priv->ca, tx_req->mapping[0].addr,
+			    tx_req->skb->len, DMA_TO_DEVICE);
 
 	/* FIXME: is this right? Shouldn't we only increment on success? */
 	++priv->stats.tx_packets;
@@ -981,8 +982,8 @@ static void ipoib_cm_tx_destroy(struct i
 	if (p->tx_ring) {
 		while ((int) p->tx_tail - (int) p->tx_head < 0) {
 			tx_req = &p->tx_ring[p->tx_tail & (ipoib_sendq_size - 1)];
-			ib_dma_unmap_single(priv->ca, tx_req->mapping, tx_req->skb->len,
-					 DMA_TO_DEVICE);
+			ib_dma_unmap_single(priv->ca, tx_req->mapping[0].addr,
+					    tx_req->skb->len, DMA_TO_DEVICE);
 			dev_kfree_skb_any(tx_req->skb);
 			++p->tx_tail;
 		}
Index: linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_main.c
===================================================================
--- linux-2.6.23-rc1.orig/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-08-15 20:50:31.000000000 +0300
+++ linux-2.6.23-rc1/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-08-15 20:50:33.000000000 +0300
@@ -1067,17 +1067,18 @@ int ipoib_add_pkey_attr(struct net_devic
 	return device_create_file(&dev->dev, &dev_attr_pkey);
 }
 
-static void set_tx_csum(struct net_device *dev)
+static int set_tx_csum(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
 	if (test_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags))
-		return;
+		return -EINVAL;
 
 	if (!(priv->ca->flags & IB_DEVICE_IP_CSUM))
-		return;
+		return -EINVAL;
 
 	dev->features |= NETIF_F_HW_CSUM;
+	return 0;
 }
 
 static void set_rx_csum(struct net_device *dev)
@@ -1128,6 +1129,11 @@ static struct net_device *ipoib_add_port
 	} else
 		memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
 
+	priv->ca = hca;
+	if (!set_tx_csum(priv->dev))
+		priv->dev->features |= NETIF_F_SG;
+
+	set_rx_csum(priv->dev);
 
 	result = ipoib_dev_init(priv->dev, hca, port);
 	if (result < 0) {
@@ -1146,9 +1152,6 @@ static struct net_device *ipoib_add_port
 		goto event_failed;
 	}
 
-	set_tx_csum(priv->dev);
-	set_rx_csum(priv->dev);
-
 	result = register_netdev(priv->dev);
 	if (result) {
 		printk(KERN_WARNING "%s: couldn't register ipoib port %d; error %d\n",




More information about the general mailing list