[ofa-general] [RFC] IPoIB UD 4K MTU support
Shirley Ma
mashirle at us.ibm.com
Tue Jan 22 10:08:41 PST 2008
Hello Roland,
IPoIB UD currently supports up to 2K MTU. Below is the draft patch to
enable IPoIB UD 4K MTU support for any IB device who has 4K MTU like IBM
eHCA. This patch limits packet in one page range by setting IPoIB UD MTU
size as 4K-48 (40 GRH, 4 IPoIB header, 4 padding to IP header align) to
avoid two contiguous pages allocation when kernel page size is 4K.
Enabling IPoIB UD 4K MTU relies on both SM to set default broadcast
group 4K MTU and of course switch should support 4K MTU. When SM default
broadcast group MTU sets 2K, IPoIB UD MTU will fall back to 2K.
I have tested 2K MTU. 4K MTU is still under testing. The reason I send
this patch out before my test for review is I want comments as early as
possible. So I can integrate the comments into this patch and hopefully
we can make it into OFED-1.3-rc3 which is around Jan.30.
Thanks
Shirley
diff -urpN
ipoib/ipoib.h /home/shirley/ipoib-test/drivers/infiniband/ulp/ipoib/ipoib.h
--- ipoib/ipoib.h 2008-01-21 14:16:19.000000000 -0500
+++ /home/shirley/ipoib-test/drivers/infiniband/ulp/ipoib/ipoib.h
2008-01-22 15:50:13.000000000 -0500
@@ -56,9 +56,6 @@
/* constants */
enum {
- IPOIB_PACKET_SIZE = 2048,
- IPOIB_BUF_SIZE = IPOIB_PACKET_SIZE + IB_GRH_BYTES,
-
IPOIB_ENCAP_LEN = 4,
IPOIB_CM_MTU = 0x10000 - 0x10, /* padding to align header
to 16 */
@@ -320,6 +317,7 @@ struct ipoib_dev_priv {
struct dentry *mcg_dentry;
struct dentry *path_dentry;
#endif
+ unsigned int max_ib_mtu;
};
struct ipoib_ah {
@@ -698,4 +696,11 @@ extern int ipoib_debug_level;
#define IPOIB_QPN(ha) (be32_to_cpup((__be32 *) ha) & 0xffffff)
+/* padding packet to fit one page size for 4K IB mtu */
+static inline int ipoib_ud_mtu(unsigned int ib_mtu)
+{
+ return (ib_mtu < 4096) ? (ib_mtu - IPOIB_ENCAP_LEN) :
+ (ib_mtu - IB_GRH_BYTES - IPOIB_ENCAP_LEN - 4);
+}
+
#endif /* _IPOIB_H */
diff -urpN
ipoib/ipoib_ib.c /home/shirley/ipoib-test/drivers/infiniband/ulp/ipoib/ipoib_ib.c
--- ipoib/ipoib_ib.c 2008-01-10 13:13:12.000000000 -0500
+++ /home/shirley/ipoib-test/drivers/infiniband/ulp/ipoib/ipoib_ib.c
2008-01-22 15:58:16.000000000 -0500
@@ -87,6 +87,15 @@ void ipoib_free_ah(struct kref *kref)
spin_unlock_irqrestore(&priv->lock, flags);
}
+static int ipoib_ud_buf_size(unsigned int max_ib_mtu)
+{
+ if (max_ib_mtu < 4096)
+ return (max_ib_mtu + IB_GRH_BYTES);
+ else
+ /* padding packet to one page for 4K mtu */
+ return (max_ib_mtu - 4);
+}
+
static int ipoib_ib_post_receive(struct net_device *dev, int id)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -96,7 +105,7 @@ static int ipoib_ib_post_receive(struct
int ret;
list.addr = priv->rx_ring[id].mapping;
- list.length = IPOIB_BUF_SIZE;
+ list.length = ipoib_ud_buf_size(priv->max_ib_mtu);
list.lkey = priv->mr->lkey;
param.next = NULL;
@@ -108,7 +117,7 @@ static int ipoib_ib_post_receive(struct
if (unlikely(ret)) {
ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);
ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping,
- IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ ipoib_ud_buf_size(priv->max_ib_mtu), DMA_FROM_DEVICE);
dev_kfree_skb_any(priv->rx_ring[id].skb);
priv->rx_ring[id].skb = NULL;
}
@@ -122,7 +131,7 @@ static int ipoib_alloc_rx_skb(struct net
struct sk_buff *skb;
u64 addr;
- skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);
+ skb = dev_alloc_skb(ipoib_ud_buf_size(priv->max_ib_mtu) + 4);
if (!skb)
return -ENOMEM;
@@ -133,7 +142,7 @@ static int ipoib_alloc_rx_skb(struct net
*/
skb_reserve(skb, 4);
- addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE,
+ addr = ib_dma_map_single(priv->ca, skb->data,
ipoib_ud_buf_size(priv->max_ib_mtu),
DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {
dev_kfree_skb_any(skb);
@@ -190,7 +199,7 @@ static void ipoib_ib_handle_rx_wc(struct
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
ib_dma_unmap_single(priv->ca, addr,
- IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ ipoib_ud_buf_size(priv->max_ib_mtu), DMA_FROM_DEVICE);
dev_kfree_skb_any(skb);
priv->rx_ring[wr_id].skb = NULL;
return;
@@ -215,7 +224,7 @@ static void ipoib_ib_handle_rx_wc(struct
ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",
wc->byte_len, wc->slid);
- ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE);
+ ib_dma_unmap_single(priv->ca, addr,
ipoib_ud_buf_size(priv->max_ib_mtu), DMA_FROM_DEVICE);
skb_put(skb, wc->byte_len);
skb_pull(skb, IB_GRH_BYTES);
@@ -632,7 +641,7 @@ int ipoib_ib_dev_stop(struct net_device
continue;
ib_dma_unmap_single(priv->ca,
rx_req->mapping,
- IPOIB_BUF_SIZE,
+ ipoib_ud_buf_size(priv->max_ib_mtu),
DMA_FROM_DEVICE);
dev_kfree_skb_any(rx_req->skb);
rx_req->skb = NULL;
diff -urpN
ipoib/ipoib_main.c /home/shirley/ipoib-test/drivers/infiniband/ulp/ipoib/ipoib_main.c
--- ipoib/ipoib_main.c 2008-01-21 14:43:39.000000000 -0500
+++ /home/shirley/ipoib-test/drivers/infiniband/ulp/ipoib/ipoib_main.c
2008-01-22 15:39:44.000000000 -0500
@@ -193,7 +193,7 @@ static int ipoib_change_mtu(struct net_d
return 0;
}
- if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)
+ if (new_mtu > ipoib_ud_mtu(priv->max_ib_mtu))
return -EINVAL;
priv->admin_mtu = new_mtu;
@@ -978,7 +978,7 @@ static void ipoib_setup(struct net_devic
dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
/* MTU will be reset when mcast join happens */
- dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
+ dev->mtu = ipoib_ud_mtu(priv->max_ib_mtu);
priv->mcast_mtu = priv->admin_mtu = dev->mtu;
memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
@@ -1112,6 +1112,7 @@ static struct net_device *ipoib_add_port
struct ib_device *hca, u8 port)
{
struct ipoib_dev_priv *priv;
+ struct ib_port_attr attr;
int result = -ENOMEM;
priv = ipoib_intf_alloc(format);
@@ -1120,6 +1121,13 @@ static struct net_device *ipoib_add_port
SET_NETDEV_DEV(priv->dev, hca->dma_device);
+ if (!ib_query_port(hca, port, &attr))
+ priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);
+ else {
+ printk(KERN_WARNING "%s: ib_query_port %d failed\n",
+ hca->name, port);
+ goto device_init_failed;
+ }
result = ib_query_pkey(hca, port, 0, &priv->pkey);
if (result) {
printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
diff -urpN
ipoib/ipoib_multicast.c /home/shirley/ipoib-test/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
--- ipoib/ipoib_multicast.c 2008-01-10 13:13:12.000000000 -0500
+++ /home/shirley/ipoib-test/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2008-01-22 15:42:10.000000000 -0500
@@ -567,9 +567,7 @@ void ipoib_mcast_join_task(struct work_s
return;
}
- priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) -
- IPOIB_ENCAP_LEN;
-
+ priv->mcast_mtu =
ipoib_ud_mtu(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
if (!ipoib_cm_admin_enabled(dev))
dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);
More information about the general
mailing list