[ofa-general] Re: [PATCH] IB/mlx4: Add checksum offload support

Roland Dreier rdreier at cisco.com
Sat Mar 15 19:10:55 PDT 2008


thanks, I applied the ipoib and mlx4 patches with some cleanups.

One thing I changed is that ipoib doesn't keep a cached copy of the
HCA capabilities flags -- I didn't see any reason why it was needed.

The patches I have in my tree are below:

>From b6fe014b2ade84f82c614ee68292fb85ce1fc573 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli at dev.mellanox.co.il>
Date: Wed, 27 Feb 2008 17:07:08 +0200
Subject: [PATCH] IPoIB: Use checksum offload support if available

For HCAs that support checksum offload (ie that set IB_DEVICE_UD_IP_CSUM
in the device capabilities flags), have IPoIB set NETIF_F_IP_CSUM and
use the HCA to generate and verify IP checksums.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd at cisco.com>
---
 drivers/infiniband/ulp/ipoib/ipoib.h      |    1 +
 drivers/infiniband/ulp/ipoib/ipoib_cm.c   |    8 ++++++++
 drivers/infiniband/ulp/ipoib/ipoib_ib.c   |   11 +++++++++++
 drivers/infiniband/ulp/ipoib/ipoib_main.c |   22 +++++++++++++++++++++-
 4 files changed, 41 insertions(+), 1 deletions(-)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 054fab8..08930ca 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -87,6 +87,7 @@ enum {
 	IPOIB_MCAST_STARTED	  = 8,
 	IPOIB_FLAG_ADMIN_CM	  = 9,
 	IPOIB_FLAG_UMCAST	  = 10,
+	IPOIB_FLAG_CSUM		  = 11,
 
 	IPOIB_MAX_BACKOFF_SECONDS = 16,
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 2490b2d..edf63dc 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1383,6 +1383,10 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 		set_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
 		ipoib_warn(priv, "enabling connected mode "
 			   "will cause multicast packet drops\n");
+
+		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG);
+		priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+
 		ipoib_flush_paths(dev);
 		return count;
 	}
@@ -1391,6 +1395,10 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 		clear_bit(IPOIB_FLAG_ADMIN_CM, &priv->flags);
 		dev->mtu = min(priv->mcast_mtu, dev->mtu);
 		ipoib_flush_paths(dev);
+
+		if (test_bit(IPOIB_FLAG_CSUM, &priv->flags))
+			dev->features |= NETIF_F_IP_CSUM | NETIF_F_SG;
+
 		return count;
 	}
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 08c4396..8ed09d1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -231,6 +231,12 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	skb->dev = dev;
 	/* XXX get correct PACKET_ type here */
 	skb->pkt_type = PACKET_HOST;
+
+	if (test_bit(IPOIB_FLAG_CSUM, &priv->flags) && likely(wc->csum_ok))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	else
+		skb->ip_summed = CHECKSUM_NONE;
+
 	netif_receive_skb(skb);
 
 repost:
@@ -442,6 +448,11 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
 		return;
 	}
 
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		priv->tx_wr.send_flags |= IB_SEND_IP_CSUM;
+	else
+		priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+
 	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
 			       address->ah, qpn,
 			       tx_req->mapping, skb_headlen(skb),
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 5728204..d0fbb0e 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1105,6 +1105,7 @@ static struct net_device *ipoib_add_port(const char *format,
 					 struct ib_device *hca, u8 port)
 {
 	struct ipoib_dev_priv *priv;
+	struct ib_device_attr *device_attr = NULL;
 	int result = -ENOMEM;
 
 	priv = ipoib_intf_alloc(format);
@@ -1120,6 +1121,25 @@ static struct net_device *ipoib_add_port(const char *format,
 		goto device_init_failed;
 	}
 
+	device_attr = kmalloc(sizeof *device_attr, GFP_KERNEL);
+	if (!device_attr) {
+		printk(KERN_WARNING "%s: allocation of %zu bytes failed\n",
+		       hca->name, sizeof *device_attr);
+		goto device_init_failed;
+	}
+
+	result = ib_query_device(hca, device_attr);
+	if (result) {
+		printk(KERN_WARNING "%s: ib_query_device failed (ret = %d)\n",
+		       hca->name, result);
+		goto device_init_failed;
+	}
+
+	if (device_attr->device_cap_flags & IB_DEVICE_UD_IP_CSUM) {
+		set_bit(IPOIB_FLAG_CSUM, &priv->flags);
+		priv->dev->features |= NETIF_F_SG | NETIF_F_IP_CSUM;
+	}
+
 	/*
 	 * Set the full membership bit, so that we join the right
 	 * broadcast group, etc.
@@ -1137,7 +1157,6 @@ static struct net_device *ipoib_add_port(const char *format,
 	} else
 		memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
 
-
 	result = ipoib_dev_init(priv->dev, hca, port);
 	if (result < 0) {
 		printk(KERN_WARNING "%s: failed to initialize port %d (ret = %d)\n",
@@ -1192,6 +1211,7 @@ device_init_failed:
 	free_netdev(priv->dev);
 
 alloc_mem_failed:
+	kfree(device_attr);
 	return ERR_PTR(result);
 }
 
-- 
1.5.4.3



>From b3dfa9bed3b72555ee30ad200de24ee30ec55844 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli at dev.mellanox.co.il>
Date: Wed, 27 Feb 2008 17:07:11 +0200
Subject: [PATCH] IB/mlx4: Add IPoIB checksum offload support

ConnectX devices support checksum generation and verification of TCP
and UDP headers for UD IPoIB messages.  This patch checks if the HCA
supports this and sets the IB_DEVICE_UD_IP_CSUM capability flag if it
does.  It implements support for handling the IB_SEND_IP_CSUM send
flag and setting the csum_ok field in receive work completion.

Signed-off-by: Eli Cohen <eli at mellanox.co.il>
Signed-off-by: Ali Ayub <ali at mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd at cisco.com>
---
 drivers/infiniband/hw/mlx4/cq.c   |   16 ++++++++++++++++
 drivers/infiniband/hw/mlx4/main.c |    2 ++
 drivers/infiniband/hw/mlx4/qp.c   |    3 +++
 drivers/net/mlx4/fw.c             |    4 ++++
 include/linux/mlx4/cq.h           |   14 ++++++++++++--
 include/linux/mlx4/qp.h           |   10 ++++++----
 6 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 7360bba..d2e32b0 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -297,6 +297,20 @@ static void mlx4_ib_handle_error_cqe(struct mlx4_err_cqe *cqe,
 	wc->vendor_err = cqe->vendor_err_syndrome;
 }
 
+static int mlx4_ib_ipoib_csum_ok(__be32 status, __be16 checksum)
+{
+	return ((status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4	|
+				      MLX4_CQE_IPOIB_STATUS_IPV4F	|
+				      MLX4_CQE_IPOIB_STATUS_IPV4OPT	|
+				      MLX4_CQE_IPOIB_STATUS_IPV6	|
+				      MLX4_CQE_IPOIB_STATUS_IPOK)) ==
+		cpu_to_be32(MLX4_CQE_IPOIB_STATUS_IPV4	|
+			    MLX4_CQE_IPOIB_STATUS_IPOK))		&&
+		(status & cpu_to_be32(MLX4_CQE_IPOIB_STATUS_UDP	|
+				      MLX4_CQE_IPOIB_STATUS_TCP))	&&
+		checksum == cpu_to_be16(0xffff);
+}
+
 static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
 			    struct mlx4_ib_qp **cur_qp,
 			    struct ib_wc *wc)
@@ -434,6 +448,8 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
 		wc->dlid_path_bits = (g_mlpath_rqpn >> 24) & 0x7f;
 		wc->wc_flags	  |= g_mlpath_rqpn & 0x80000000 ? IB_WC_GRH : 0;
 		wc->pkey_index     = be32_to_cpu(cqe->immed_rss_invalid) & 0x7f;
+		wc->csum_ok	   = mlx4_ib_ipoib_csum_ok(cqe->ipoib_status,
+							   cqe->checksum);
 	}
 
 	return 0;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 96a39b5..ef5e9db 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -99,6 +99,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
 	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
 		props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
+	if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
+		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
 
 	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 		0xffffff;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index ac965ab..31b2b5b 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -1436,6 +1436,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			 cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) : 0) |
 			(wr->send_flags & IB_SEND_SOLICITED ?
 			 cpu_to_be32(MLX4_WQE_CTRL_SOLICITED) : 0) |
+			((wr->send_flags & IB_SEND_IP_CSUM) ?
+			 cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
+				     MLX4_WQE_CTRL_TCP_UDP_CSUM) : 0) |
 			qp->sq_signal_bits;
 
 		if (wr->opcode == IB_WR_SEND_WITH_IMM ||
diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c
index 61dc495..f494c3e 100644
--- a/drivers/net/mlx4/fw.c
+++ b/drivers/net/mlx4/fw.c
@@ -696,6 +696,10 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	/* Check port for UD address vector: */
 	*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1);
 
+	/* Enable IPoIB checksumming if we can: */
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
+		*(inbox + INIT_HCA_FLAGS_OFFSET / 4) |= cpu_to_be32(1 << 3);
+
 	/* QPC/EEC/CQC/EQC/RDMARC attributes */
 
 	MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h
index 0181e0a..1243eba 100644
--- a/include/linux/mlx4/cq.h
+++ b/include/linux/mlx4/cq.h
@@ -45,11 +45,11 @@ struct mlx4_cqe {
 	u8			sl;
 	u8			reserved1;
 	__be16			rlid;
-	u32			reserved2;
+	__be32			ipoib_status;
 	__be32			byte_cnt;
 	__be16			wqe_index;
 	__be16			checksum;
-	u8			reserved3[3];
+	u8			reserved2[3];
 	u8			owner_sr_opcode;
 };
 
@@ -85,6 +85,16 @@ enum {
 	MLX4_CQE_SYNDROME_REMOTE_ABORTED_ERR		= 0x22,
 };
 
+enum {
+	MLX4_CQE_IPOIB_STATUS_IPV4			= 1 << 22,
+	MLX4_CQE_IPOIB_STATUS_IPV4F			= 1 << 23,
+	MLX4_CQE_IPOIB_STATUS_IPV6			= 1 << 24,
+	MLX4_CQE_IPOIB_STATUS_IPV4OPT			= 1 << 25,
+	MLX4_CQE_IPOIB_STATUS_TCP			= 1 << 26,
+	MLX4_CQE_IPOIB_STATUS_UDP			= 1 << 27,
+	MLX4_CQE_IPOIB_STATUS_IPOK			= 1 << 28,
+};
+
 static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd,
 			       void __iomem *uar_page,
 			       spinlock_t *doorbell_lock)
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 09a2230..31f9eb3 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -158,10 +158,12 @@ struct mlx4_qp_context {
 #define MLX4_FW_VER_WQE_CTRL_NEC mlx4_fw_ver(2, 2, 232)
 
 enum {
-	MLX4_WQE_CTRL_NEC	= 1 << 29,
-	MLX4_WQE_CTRL_FENCE	= 1 << 6,
-	MLX4_WQE_CTRL_CQ_UPDATE	= 3 << 2,
-	MLX4_WQE_CTRL_SOLICITED	= 1 << 1,
+	MLX4_WQE_CTRL_NEC		= 1 << 29,
+	MLX4_WQE_CTRL_FENCE		= 1 << 6,
+	MLX4_WQE_CTRL_CQ_UPDATE		= 3 << 2,
+	MLX4_WQE_CTRL_SOLICITED		= 1 << 1,
+	MLX4_WQE_CTRL_IP_CSUM		= 1 << 4,
+	MLX4_WQE_CTRL_TCP_UDP_CSUM	= 1 << 5,
 };
 
 struct mlx4_wqe_ctrl_seg {
-- 
1.5.4.3




More information about the general mailing list