<html><body>
<p>Hello Vlad,<br>
<br>
I just found my linux email wasn't going out. I am resending the updated IPoIB-4K MTU patch for 2.6.24 against ofed-1.3-git tree. This patch has been tested and validated. Please check in. If any issues, please let me know. This email client has problem, please use attached file for applying.<br>
<br>
Signed-off-by: Shirley Ma <xma@us.ibm.com><br>
---<br>
<br>
diff -urpN ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib.h ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib.h<br>
--- ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib.h  2008-02-04 20:09:18.000000000 -0800<br>
+++ ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib.h  2008-02-04 20:11:26.000000000 -0800<br>
@@ -56,11 +56,11 @@<br>
 /* constants */<br>
 <br>
 enum {<br>
-       IPOIB_PACKET_SIZE         = 2048,<br>
-       IPOIB_BUF_SIZE            = IPOIB_PACKET_SIZE + IB_GRH_BYTES,<br>
-<br>
        IPOIB_ENCAP_LEN           = 4,<br>
 <br>
+       IPOIB_UD_HEAD_SIZE        = IB_GRH_BYTES + IPOIB_ENCAP_LEN,<br>
+       IPOIB_UD_RX_SG            = 2, /* for 4K MTU */ <br>
+<br>
        IPOIB_CM_MTU              = 0x10000 - 0x10, /* padding to align header to 16 */<br>
        IPOIB_CM_BUF_SIZE         = IPOIB_CM_MTU  + IPOIB_ENCAP_LEN,<br>
        IPOIB_CM_HEAD_SIZE        = IPOIB_CM_BUF_SIZE % PAGE_SIZE,<br>
@@ -141,9 +141,9 @@ struct ipoib_mcast {<br>
        struct net_device *dev;<br>
 };<br>
 <br>
-struct ipoib_rx_buf {<br>
+struct ipoib_sg_rx_buf {<br>
        struct sk_buff *skb;<br>
-       u64             mapping;<br>
+       u64             mapping[IPOIB_UD_RX_SG];<br>
 };<br>
 <br>
 struct ipoib_tx_buf {<br>
@@ -337,7 +337,7 @@ struct ipoib_dev_priv {<br>
 <br>
        struct net_device      *dev;<br>
        struct ib_recv_wr       rx_wr_draft[UD_POST_RCV_COUNT];<br>
-       struct ib_sge           sglist_draft[UD_POST_RCV_COUNT];<br>
+       struct ib_sge           sglist_draft[UD_POST_RCV_COUNT][IPOIB_UD_RX_SG];<br>
        unsigned int            rx_outst;<br>
 <br>
        struct napi_struct napi;<br>
@@ -378,7 +378,7 @@ struct ipoib_dev_priv {<br>
        unsigned int admin_mtu;<br>
        unsigned int mcast_mtu;<br>
 <br>
-       struct ipoib_rx_buf *rx_ring;<br>
+       struct ipoib_sg_rx_buf *rx_ring;<br>
 <br>
        spinlock_t           tx_lock;<br>
        struct ipoib_tx_buf *tx_ring;<br>
@@ -412,6 +412,7 @@ struct ipoib_dev_priv {<br>
        struct ipoib_ethtool_st etool;<br>
        struct timer_list poll_timer;<br>
        struct ib_ah *own_ah;<br>
+       int max_ib_mtu;<br>
 };<br>
 <br>
 struct ipoib_ah {<br>
@@ -452,6 +453,19 @@ struct ipoib_neigh {<br>
        struct list_head    list;<br>
 };<br>
 <br>
+#define IPOIB_UD_MTU(ib_mtu)           (ib_mtu - IPOIB_ENCAP_LEN)<br>
+#define IPOIB_UD_BUF_SIZE(ib_mtu)      (ib_mtu + IB_GRH_BYTES)<br>
+static inline int ipoib_ud_need_sg(int ib_mtu)<br>
+{<br>
+       return (IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE) ? 1 : 0;<br>
+}<br>
+static inline void ipoib_sg_dma_unmap_rx(struct ipoib_dev_priv *priv,<br>
+                                        u64 mapping[IPOIB_UD_RX_SG])<br>
+{<br>
+       ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, DMA_FROM_DEVICE);<br>
+       ib_dma_unmap_single(priv->ca, mapping[1], PAGE_SIZE, DMA_FROM_DEVICE);<br>
+}<br>
+<br>
 /*<br>
  * We stash a pointer to our private neighbour information after our<br>
  * hardware address in neigh->ha.  The ALIGN() expression here makes<br>
diff -urpN ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_ib.c ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_ib.c<br>
--- ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_ib.c       2008-02-04 20:09:18.000000000 -0800<br>
+++ ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_ib.c       2008-02-04 20:11:26.000000000 -0800<br>
@@ -96,14 +96,82 @@ static void clean_pending_receives(struc<br>
 <br>
        for (i = 0; i < priv->rx_outst; ++i) {<br>
                id = priv->rx_wr_draft[i].wr_id & ~IPOIB_OP_RECV;<br>
-               ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping,<br>
-                                            IPOIB_BUF_SIZE, DMA_FROM_DEVICE);<br>
+               if (ipoib_ud_need_sg(priv->max_ib_mtu))<br>
+                       ipoib_sg_dma_unmap_rx(priv,<br>
+                                             priv->rx_ring[i].mapping);<br>
+               else<br>
+                       ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping[0],<br>
+                                            IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE);<br>
                dev_kfree_skb_any(priv->rx_ring[id].skb);<br>
                priv->rx_ring[id].skb = NULL;<br>
        }<br>
        priv->rx_outst = 0;<br>
 }<br>
 <br>
+static void ipoib_ud_skb_put_frags(struct sk_buff *skb, unsigned int length,<br>
+                                  struct sk_buff *toskb)<br>
+{<br>
+       unsigned int size;<br>
+       skb_frag_t *frag = &skb_shinfo(skb)->frags[0];<br>
+ <br>
+       /* put header into skb */<br>
+       size = min(length, (unsigned)IPOIB_UD_HEAD_SIZE);<br>
+       skb->tail += size;<br>
+       skb->len += size;<br>
+       length -= size;<br>
+ <br>
+       if (length == 0) {<br>
+               /* don't need this page */<br>
+               skb_fill_page_desc(toskb, 0, frag->page, 0, PAGE_SIZE);<br>
+               --skb_shinfo(skb)->nr_frags;<br>
+       } else {<br>
+               size = min(length, (unsigned) PAGE_SIZE);<br>
+               frag->size = size;<br>
+               skb->data_len += size;<br>
+               skb->truesize += size;<br>
+               skb->len += size;<br>
+               length -= size;<br>
+       }<br>
+}<br>
+ <br>
+static struct sk_buff *ipoib_sg_alloc_rx_skb(struct net_device *dev,<br>
+                                            int id, u64 mapping[IPOIB_UD_RX_SG])<br>
+{<br>
+       struct ipoib_dev_priv *priv = netdev_priv(dev);<br>
+       struct page *page;<br>
+       struct sk_buff *skb;<br>
+<br>
+       skb = dev_alloc_skb(IPOIB_UD_HEAD_SIZE);<br>
+ <br>
+       if (unlikely(!skb)) <br>
+               return NULL;<br>
+ <br>
+       mapping[0] = ib_dma_map_single(priv->ca, skb->data, IPOIB_UD_HEAD_SIZE,<br>
+                                      DMA_FROM_DEVICE);<br>
+       if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0]))) {<br>
+               dev_kfree_skb_any(skb);<br>
+               return NULL;<br>
+       }<br>
+ <br>
+       page = alloc_page(GFP_ATOMIC);<br>
+       if (!page)<br>
+               goto partial_error;<br>
+ <br>
+       skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);<br>
+       mapping[1] = ib_dma_map_page(priv->ca, skb_shinfo(skb)->frags[0].page,<br>
+                                    0, PAGE_SIZE, DMA_FROM_DEVICE);<br>
+       if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))<br>
+               goto partial_error;<br>
+ <br>
+       priv->rx_ring[id].skb = skb;<br>
+       return skb;<br>
+ <br>
+partial_error:<br>
+       ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE, DMA_FROM_DEVICE);<br>
+       dev_kfree_skb_any(skb);<br>
+       return NULL;<br>
+}<br>
+<br>
 static int ipoib_ib_post_receive(struct net_device *dev, int id)<br>
 {<br>
        struct ipoib_dev_priv *priv = netdev_priv(dev);<br>
@@ -111,8 +179,11 @@ static int ipoib_ib_post_receive(struct <br>
        int ret = 0;<br>
        int i = priv->rx_outst;<br>
 <br>
-       priv->sglist_draft[i].addr = priv->rx_ring[id].mapping;<br>
+       priv->sglist_draft[i][0].addr = priv->rx_ring[id].mapping[0];<br>
+       priv->sglist_draft[i][1].addr = priv->rx_ring[id].mapping[1];<br>
+       <br>
        priv->rx_wr_draft[i].wr_id = id | IPOIB_OP_RECV;<br>
+       <br>
        if (++priv->rx_outst == UD_POST_RCV_COUNT) {<br>
                ret = ib_post_recv(priv->qp, priv->rx_wr_draft, &bad_wr);<br>
 <br>
@@ -120,8 +191,13 @@ static int ipoib_ib_post_receive(struct <br>
                        ipoib_warn(priv, "receive failed for buf %d (%d)\n", id, ret);<br>
                        while (bad_wr) {<br>
                                id = bad_wr->wr_id & ~IPOIB_OP_RECV;<br>
-                               ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping,<br>
-                                                   IPOIB_BUF_SIZE, DMA_FROM_DEVICE);<br>
+                               if (ipoib_ud_need_sg(priv->max_ib_mtu))<br>
+                                       ipoib_sg_dma_unmap_rx(priv,<br>
+                                                     priv->rx_ring[i].mapping);<br>
+                               else<br>
+                                       ib_dma_unmap_single(priv->ca, priv->rx_ring[id].mapping[0],<br>
+                                                           IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), <br>
+                                                           DMA_FROM_DEVICE);<br>
                                dev_kfree_skb_any(priv->rx_ring[id].skb);<br>
                                priv->rx_ring[id].skb = NULL;<br>
                        }<br>
@@ -138,7 +214,7 @@ static int ipoib_alloc_rx_skb(struct net<br>
        struct sk_buff *skb;<br>
        u64 addr;<br>
 <br>
-       skb = dev_alloc_skb(IPOIB_BUF_SIZE + 4);<br>
+       skb = dev_alloc_skb(IPOIB_UD_BUF_SIZE(priv->max_ib_mtu) + 4);<br>
        if (!skb)<br>
                return -ENOMEM;<br>
 <br>
@@ -149,7 +225,8 @@ static int ipoib_alloc_rx_skb(struct net<br>
         */<br>
        skb_reserve(skb, 4);<br>
 <br>
-       addr = ib_dma_map_single(priv->ca, skb->data, IPOIB_BUF_SIZE,<br>
+       addr = ib_dma_map_single(priv->ca, skb->data,<br>
+                                IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),<br>
                                 DMA_FROM_DEVICE);<br>
        if (unlikely(ib_dma_mapping_error(priv->ca, addr))) {<br>
                dev_kfree_skb_any(skb);<br>
@@ -157,7 +234,7 @@ static int ipoib_alloc_rx_skb(struct net<br>
        }<br>
 <br>
        priv->rx_ring[id].skb     = skb;<br>
-       priv->rx_ring[id].mapping = addr;<br>
+       priv->rx_ring[id].mapping[0] = addr;<br>
 <br>
        return 0;<br>
 }<br>
@@ -165,10 +242,15 @@ static int ipoib_alloc_rx_skb(struct net<br>
 static int ipoib_ib_post_receives(struct net_device *dev)<br>
 {<br>
        struct ipoib_dev_priv *priv = netdev_priv(dev);<br>
-       int i;<br>
+       int i, ret;<br>
 <br>
        for (i = 0; i < ipoib_recvq_size; ++i) {<br>
-               if (ipoib_alloc_rx_skb(dev, i)) {<br>
+               if (ipoib_ud_need_sg(priv->max_ib_mtu)) <br>
+                       ret = !(ipoib_sg_alloc_rx_skb(dev, i,<br>
+                                                     priv->rx_ring[i].mapping));<br>
+               else<br>
+                       ret = ipoib_alloc_rx_skb(dev, i);<br>
+               if (ret) {<br>
                        ipoib_warn(priv, "failed to allocate receive buffer %d\n", i);<br>
                        return -ENOMEM;<br>
                }<br>
@@ -186,7 +268,7 @@ static void ipoib_ib_handle_rx_wc(struct<br>
        struct ipoib_dev_priv *priv = netdev_priv(dev);<br>
        unsigned int wr_id = wc->wr_id & ~IPOIB_OP_RECV;<br>
        struct sk_buff *skb;<br>
-       u64 addr;<br>
+       u64 mapping[IPOIB_UD_RX_SG];<br>
 <br>
        ipoib_dbg_data(priv, "recv completion: id %d, status: %d\n",<br>
                       wr_id, wc->status);<br>
@@ -198,42 +280,74 @@ static void ipoib_ib_handle_rx_wc(struct<br>
        }<br>
 <br>
        skb  = priv->rx_ring[wr_id].skb;<br>
-       addr = priv->rx_ring[wr_id].mapping;<br>
 <br>
-       if (unlikely(wc->status != IB_WC_SUCCESS)) {<br>
-               if (wc->status != IB_WC_WR_FLUSH_ERR)<br>
-                       ipoib_warn(priv, "failed recv event "<br>
-                                  "(status=%d, wrid=%d vend_err %x)\n",<br>
-                                  wc->status, wr_id, wc->vendor_err);<br>
-               ib_dma_unmap_single(priv->ca, addr,<br>
-                                   IPOIB_BUF_SIZE, DMA_FROM_DEVICE);<br>
-               dev_kfree_skb_any(skb);<br>
-               priv->rx_ring[wr_id].skb = NULL;<br>
-               return;<br>
-       }<br>
+       /* duplicate the code here, to omit fast path if need-sg condition check */<br>
+       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {<br>
+               struct sk_buff *newskb;<br>
+               if (unlikely(wc->status != IB_WC_SUCCESS)) {<br>
+                       if (wc->status != IB_WC_WR_FLUSH_ERR)<br>
+                               ipoib_warn(priv, "failed recv event "<br>
+                                          "(status=%d, wrid=%d vend_err %x)\n",<br>
+                                          wc->status, wr_id, wc->vendor_err);<br>
+                       ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);<br>
+                       dev_kfree_skb_any(skb);<br>
+                       priv->rx_ring[wr_id].skb = NULL;<br>
+                       return;<br>
+               }<br>
+               /*<br>
+                * Drop packets that this interface sent, ie multicast packets<br>
+                * that the HCA has replicated.<br>
+                */<br>
+               if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)<br>
+                       goto repost;<br>
+               newskb = ipoib_sg_alloc_rx_skb(dev, wr_id, mapping);<br>
+               if (unlikely(!newskb)) {<br>
+                       ++dev->stats.rx_dropped;<br>
+                       goto repost;<br>
+               }<br>
+               ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",<br>
+                              wc->byte_len, wc->slid);<br>
+               ipoib_sg_dma_unmap_rx(priv, priv->rx_ring[wr_id].mapping);<br>
+               memcpy(priv->rx_ring[wr_id].mapping, mapping,<br>
+                      IPOIB_UD_RX_SG * sizeof *mapping);<br>
+               ipoib_ud_skb_put_frags(skb, wc->byte_len, newskb);<br>
+       } else {<br>
+               u64 addr = priv->rx_ring[wr_id].mapping[0];<br>
+               if (unlikely(wc->status != IB_WC_SUCCESS)) {<br>
+                       if (wc->status != IB_WC_WR_FLUSH_ERR)<br>
+                               ipoib_warn(priv, "failed recv event "<br>
+                                          "(status=%d, wrid=%d vend_err %x)\n",<br>
+                                          wc->status, wr_id, wc->vendor_err);<br>
+                       ib_dma_unmap_single(priv->ca, addr,<br>
+                                           IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE);<br>
+                       dev_kfree_skb_any(skb);<br>
+                       priv->rx_ring[wr_id].skb = NULL;<br>
+                       return;<br>
+               }<br>
 <br>
-       /*<br>
-        * Drop packets that this interface sent, ie multicast packets<br>
-        * that the HCA has replicated.<br>
-        */<br>
-       if (unlikely(wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num))<br>
-               goto repost;<br>
+               /*<br>
+                * Drop packets that this interface sent, ie multicast packets<br>
+                * that the HCA has replicated.<br>
+                */<br>
+               if (unlikely(wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num))<br>
+                       goto repost;<br>
 <br>
-       /*<br>
-        * If we can't allocate a new RX buffer, dump<br>
-        * this packet and reuse the old buffer.<br>
-        */<br>
-       if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {<br>
-               ++dev->stats.rx_dropped;<br>
-               goto repost;<br>
-       }<br>
+               /*<br>
+                * If we can't allocate a new RX buffer, dump<br>
+                * this packet and reuse the old buffer.<br>
+                */<br>
+               if (unlikely(ipoib_alloc_rx_skb(dev, wr_id))) {<br>
+                       ++dev->stats.rx_dropped;<br>
+                       goto repost;<br>
+               }<br>
 <br>
-       ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",<br>
-                      wc->byte_len, wc->slid);<br>
+               ipoib_dbg_data(priv, "received %d bytes, SLID 0x%04x\n",<br>
+                              wc->byte_len, wc->slid);<br>
 <br>
-       ib_dma_unmap_single(priv->ca, addr, IPOIB_BUF_SIZE, DMA_FROM_DEVICE);<br>
+               ib_dma_unmap_single(priv->ca, addr, IPOIB_UD_BUF_SIZE(priv->max_ib_mtu), DMA_FROM_DEVICE);<br>
 <br>
-       skb_put(skb, wc->byte_len);<br>
+               skb_put(skb, wc->byte_len);<br>
+       }<br>
        skb_pull(skb, IB_GRH_BYTES);<br>
 <br>
        skb->protocol = ((struct ipoib_header *) skb->data)->proto;<br>
@@ -827,18 +941,21 @@ int ipoib_ib_dev_stop(struct net_device <br>
                         * all our pending work requests.<br>
                         */<br>
                        for (i = 0; i < ipoib_recvq_size; ++i) {<br>
-                               struct ipoib_rx_buf *rx_req;<br>
+                               struct ipoib_sg_rx_buf *rx_req;<br>
 <br>
                                rx_req = &priv->rx_ring[i];<br>
-<br>
-                               if (rx_req->skb) {<br>
+                               if (!rx_req->skb)<br>
+                                       continue;<br>
+                               if (ipoib_ud_need_sg(priv->max_ib_mtu))<br>
+                                       ipoib_sg_dma_unmap_rx(priv,<br>
+                                                             priv->rx_ring[i].mapping);<br>
+                               else<br>
                                        ib_dma_unmap_single(priv->ca,<br>
-                                                           rx_req->mapping,<br>
-                                                           IPOIB_BUF_SIZE,<br>
+                                                           rx_req->mapping[0],<br>
+                                                           IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),<br>
                                                            DMA_FROM_DEVICE);<br>
-                                       dev_kfree_skb_any(rx_req->skb);<br>
-                                       rx_req->skb = NULL;<br>
-                               }<br>
+                               dev_kfree_skb_any(rx_req->skb);<br>
+                               rx_req->skb = NULL;<br>
                        }<br>
 <br>
                        goto timeout;<br>
diff -urpN ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_main.c ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_main.c<br>
--- ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_main.c     2008-02-04 20:09:18.000000000 -0800<br>
+++ ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_main.c     2008-02-04 20:11:26.000000000 -0800<br>
@@ -193,7 +193,7 @@ static int ipoib_change_mtu(struct net_d<br>
                return 0;<br>
        }<br>
 <br>
-       if (new_mtu > IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN)<br>
+       if (new_mtu > IPOIB_UD_MTU(priv->max_ib_mtu))<br>
                return -EINVAL;<br>
 <br>
        priv->admin_mtu = new_mtu;<br>
@@ -1007,10 +1007,6 @@ static void ipoib_setup(struct net_devic<br>
        dev->tx_queue_len     = ipoib_sendq_size * 2;<br>
        dev->features            = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;<br>
 <br>
-       /* MTU will be reset when mcast join happens */<br>
-       dev->mtu              = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;<br>
-       priv->mcast_mtu       = priv->admin_mtu = dev->mtu;<br>
-<br>
        memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);<br>
 <br>
        netif_carrier_off(dev);<br>
@@ -1156,6 +1152,7 @@ static struct net_device *ipoib_add_port<br>
                                         struct ib_device *hca, u8 port)<br>
 {<br>
        struct ipoib_dev_priv *priv;<br>
+       struct ib_port_attr attr;<br>
        int result = -ENOMEM;<br>
 <br>
        priv = ipoib_intf_alloc(format);<br>
@@ -1166,6 +1163,18 @@ static struct net_device *ipoib_add_port<br>
 <br>
        priv->dev->features |= NETIF_F_HIGHDMA;<br>
 <br>
+       if (!ib_query_port(hca, port, &attr))<br>
+               priv->max_ib_mtu = ib_mtu_enum_to_int(attr.max_mtu);<br>
+       else {<br>
+               printk(KERN_WARNING "%s: ib_query_port %d failed\n",<br>
+                      hca->name, port);<br>
+               goto device_init_failed;<br>
+       }<br>
+<br>
+       /* MTU will be reset when mcast join happens */<br>
+       priv->dev->mtu  = IPOIB_UD_MTU(priv->max_ib_mtu);<br>
+       priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;<br>
+<br>
        result = ib_query_pkey(hca, port, 0, &priv->pkey);<br>
        if (result) {<br>
                printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",<br>
diff -urpN ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c<br>
--- ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c        2008-02-04 15:31:14.000000000 -0800<br>
+++ ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c        2008-02-04 20:11:26.000000000 -0800<br>
@@ -567,8 +567,7 @@ void ipoib_mcast_join_task(struct work_s<br>
                return;<br>
        }<br>
 <br>
-       priv->mcast_mtu = ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu) -<br>
-               IPOIB_ENCAP_LEN;<br>
+       priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));<br>
 <br>
        if (!ipoib_cm_admin_enabled(dev))<br>
                dev->mtu = min(priv->mcast_mtu, priv->admin_mtu);<br>
diff -urpN ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c<br>
--- ofed_kernel_a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c    2008-02-04 20:09:18.000000000 -0800<br>
+++ ofed_kernel_b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c    2008-02-04 20:11:50.000000000 -0800<br>
@@ -151,7 +151,7 @@ int ipoib_transport_dev_init(struct net_<br>
                        .max_send_wr  = ipoib_sendq_size,<br>
                        .max_recv_wr  = ipoib_recvq_size,<br>
                        .max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1,<br>
-                       .max_recv_sge = 1<br>
+                       .max_recv_sge = IPOIB_UD_RX_SG <br>
                },<br>
                .sq_sig_type = IB_SIGNAL_REQ_WR,<br>
                .qp_type     = IB_QPT_UD,<br>
@@ -225,18 +225,29 @@ int ipoib_transport_dev_init(struct net_<br>
        priv->tx_wr.opcode   = IB_WR_SEND;<br>
        priv->tx_wr.sg_list  = priv->tx_sge;<br>
        priv->tx_wr.send_flags       = IB_SEND_SIGNALED;<br>
-<br>
+       <br>
        for (i = 0; i < UD_POST_RCV_COUNT; ++i) {<br>
-               priv->sglist_draft[i].length = IPOIB_BUF_SIZE;<br>
-               priv->sglist_draft[i].lkey = priv->mr->lkey;<br>
-<br>
-               priv->rx_wr_draft[i].sg_list = &priv->sglist_draft[i];<br>
-               priv->rx_wr_draft[i].num_sge = 1;<br>
+               priv->sglist_draft[i][0].lkey = priv->mr->lkey;<br>
+               priv->sglist_draft[i][1].lkey = priv->mr->lkey;<br>
+               priv->rx_wr_draft[i].sg_list = &priv->sglist_draft[i][0];<br>
                if (i < UD_POST_RCV_COUNT - 1)<br>
                        priv->rx_wr_draft[i].next = &priv->rx_wr_draft[i + 1];<br>
        }<br>
        priv->rx_wr_draft[i].next = NULL;<br>
 <br>
+       if (ipoib_ud_need_sg(priv->max_ib_mtu)) {<br>
+               for (i = 0; i < UD_POST_RCV_COUNT; ++i) {<br>
+                       priv->sglist_draft[i][0].length = IPOIB_UD_HEAD_SIZE;<br>
+                       priv->sglist_draft[i][1].length = PAGE_SIZE;<br>
+                       priv->rx_wr_draft[i].num_sge = IPOIB_UD_RX_SG;<br>
+               }<br>
+       } else {<br>
+               for (i = 0; i < UD_POST_RCV_COUNT; ++i) {<br>
+                       priv->sglist_draft[i][0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);<br>
+                       priv->rx_wr_draft[i].num_sge = 1;<br>
+               }<br>
+       }<br>
+<br>
        return 0;<br>
 <br>
 out_free_scq:<br>
<br>
<br>
<i>(See attached file: ipoib-4kmtu-sg-2.6.24-rc3.patch)</i><br>
<br>
<br>
Thanks<br>
Shirley </body></html>