[ewg] Re: [PATCH] IB/ipoib: bug fix creation of own_ah
Ralph Campbell
ralph.campbell at qlogic.com
Mon Feb 11 11:16:49 PST 2008
I tried this out on RHEL 5 (2.6.18-8.el5) and it works OK.
Acked-by: Ralph Campbell <ralph.campbell at qlogic.com>
On Sun, 2008-02-10 at 17:08 +0200, Eli Cohen wrote:
> [PATCH] IB/ipoib: bug fix creation of own_ah
>
> Create own ah after the port is queried. This also handles
> cases when a port change event occurs (which includes lid
> change event).
>
> Signed-off-by: Eli Cohen <eli at mellanox.co.il>
> ---
>
> Ralph, Arlin,
>
> I think this patch should fix the problem you were seeing. Could you
> check if it does? I pushed this to ofed 1.3 tree.
>
>
> kernel_patches/fixes/ipoib_0190_unsig_udqp.patch | 169 +++++++++++++---------
> 1 files changed, 99 insertions(+), 70 deletions(-)
>
> diff --git a/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch b/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch
> index 1ffb78a..2a1062d 100644
> --- a/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch
> +++ b/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch
> @@ -13,7 +13,7 @@ Signed-off-by: Eli Cohen <eli at mellanox.co.il>
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h 2008-02-10 10:04:06.097933000 +0200
> -+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-02-10 10:08:46.941535000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h 2008-02-10 14:11:45.097178000 +0200
> @@ -373,6 +373,7 @@ struct ipoib_dev_priv {
>
> struct ib_wc ibwc[IPOIB_NUM_WC];
> @@ -39,10 +39,18 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
>
> struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
> struct ib_pd *pd, struct ib_ah_attr *attr);
> +@@ -534,6 +536,7 @@ int ipoib_pkey_dev_delay_open(struct net
> + void ipoib_drain_cq(struct net_device *dev);
> +
> + void ipoib_set_ethtool_ops(struct net_device *dev);
> ++void destroy_own_ah(struct ipoib_dev_priv *priv);
> +
> + #ifdef CONFIG_INFINIBAND_IPOIB_CM
> +
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-10 10:04:06.092935000 +0200
> -+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-10 10:09:59.703873000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c 2008-02-10 14:11:45.086179000 +0200
> @@ -254,12 +254,10 @@ repost:
> "for buf %d\n", wr_id);
> }
> @@ -83,7 +91,9 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
> netif_wake_queue(dev);
> +}
> -+
> +
> +- if (need_lock)
> +- spin_unlock_irqrestore(&priv->tx_lock, flags);
> +static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
> +{
> + struct ipoib_dev_priv *priv = netdev_priv(dev);
> @@ -96,9 +106,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> + _ipoib_ib_handle_tx_wc(dev, i);
> + } while (i++ != wr_id);
> + priv->tx_poll = i & (ipoib_sendq_size - 1);
> -
> -- if (need_lock)
> -- spin_unlock_irqrestore(&priv->tx_lock, flags);
> ++
> + if (unlikely(wc->status != IB_WC_SUCCESS &&
> + wc->status != IB_WC_WR_FLUSH_ERR))
>
> @@ -128,7 +136,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> }
>
> int ipoib_poll(struct napi_struct *napi, int budget)
> -@@ -361,11 +372,63 @@ void ipoib_ib_rx_completion(struct ib_cq
> +@@ -361,11 +372,66 @@ void ipoib_ib_rx_completion(struct ib_cq
> netif_rx_schedule(dev, &priv->napi);
> }
>
> @@ -161,7 +169,8 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> + spin_lock_irqsave(&priv->tx_lock, flags);
> + if (((int)priv->tx_tail - (int)priv->tx_head < 0) &&
> + time_after(jiffies, dev->trans_start + 10) &&
> -+ priv->tx_outstanding < ipoib_sendq_size) {
> ++ priv->tx_outstanding < ipoib_sendq_size &&
> ++ priv->own_ah) {
> + wrid = priv->tx_head & (ipoib_sendq_size - 1);
> + priv->tx_ring[wrid].skb = NULL;
> + if (post_zlen_send_wr(priv, wrid))
> @@ -173,8 +182,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> + }
> + poll_tx(priv);
> + spin_unlock_irqrestore(&priv->tx_lock, flags);
> -
> -- poll_tx(priv, 1);
> ++
> + mod_timer(&priv->poll_timer, jiffies + HZ / 2);
> +}
> +
> @@ -189,13 +197,16 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> + if (!post_zlen_send_wr(priv, wrid)) {
> + ++priv->tx_head;
> + ++priv->tx_outstanding;
> -+ }
> ++ } else
> ++ ipoib_warn(priv, "post_zlen failed\n");
> +
> +- poll_tx(priv, 1);
> + poll_tx(priv);
> + spin_unlock_irqrestore(&priv->tx_lock, flags);
> }
>
> static inline int post_send(struct ipoib_dev_priv *priv,
> -@@ -405,6 +468,11 @@ static inline int post_send(struct ipoib
> +@@ -405,6 +471,11 @@ static inline int post_send(struct ipoib
> } else
> priv->tx_wr.opcode = IB_WR_SEND;
>
> @@ -207,7 +218,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
> }
>
> -@@ -481,15 +549,17 @@ void ipoib_send(struct net_device *dev,
> +@@ -481,15 +552,17 @@ void ipoib_send(struct net_device *dev,
>
> address->last_send = priv->tx_head;
> ++priv->tx_head;
> @@ -227,58 +238,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>
> return;
>
> -@@ -530,6 +600,32 @@ void ipoib_reap_ah(struct work_struct *w
> - round_jiffies_relative(HZ));
> - }
> -
> -+static int create_own_ah(struct ipoib_dev_priv *priv)
> -+{
> -+ struct ib_ah_attr attr = {
> -+ .dlid = priv->local_lid,
> -+ .port_num = priv->port,
> -+ };
> -+
> -+ if (priv->own_ah) {
> -+ ipoib_dbg(priv, "own ah already exists\n");
> -+ return -EINVAL;
> -+ }
> -+ priv->own_ah = ib_create_ah(priv->pd, &attr);
> -+ return IS_ERR(priv->own_ah);
> -+}
> -+
> -+static void destroy_own_ah(struct ipoib_dev_priv *priv)
> -+{
> -+ if (!priv->own_ah) {
> -+ ipoib_dbg(priv, "destroying an already destroyed own ah\n");
> -+ return;
> -+ }
> -+
> -+ ib_destroy_ah(priv->own_ah);
> -+ priv->own_ah = NULL;
> -+}
> -+
> - int ipoib_ib_dev_open(struct net_device *dev)
> - {
> - struct ipoib_dev_priv *priv = netdev_priv(dev);
> -@@ -542,9 +638,17 @@ int ipoib_ib_dev_open(struct net_device
> - }
> - set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> -
> -+ ret = create_own_ah(priv);
> -+ if (ret) {
> -+ priv->own_ah = NULL;
> -+ ipoib_warn(priv, "failed to create own ah\n");
> -+ return -1;
> -+ }
> -+
> - ret = ipoib_init_qp(dev);
> - if (ret) {
> - ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
> -+ destroy_own_ah(priv);
> - return -1;
> - }
> -
> -@@ -566,6 +670,11 @@ int ipoib_ib_dev_open(struct net_device
> +@@ -566,6 +639,11 @@ int ipoib_ib_dev_open(struct net_device
> queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
> round_jiffies_relative(HZ));
>
> @@ -290,7 +250,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
>
> return 0;
> -@@ -662,7 +771,7 @@ void ipoib_drain_cq(struct net_device *d
> +@@ -662,7 +740,7 @@ void ipoib_drain_cq(struct net_device *d
> if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
> ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
> else
> @@ -299,7 +259,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> }
> }
> } while (n == IPOIB_NUM_WC);
> -@@ -673,12 +782,14 @@ int ipoib_ib_dev_stop(struct net_device
> +@@ -673,12 +751,14 @@ int ipoib_ib_dev_stop(struct net_device
> struct ipoib_dev_priv *priv = netdev_priv(dev);
> struct ib_qp_attr qp_attr;
> unsigned long begin;
> @@ -315,7 +275,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>
> /*
> * Move our QP to the error state and then reinitialize in
> -@@ -700,32 +811,30 @@ int ipoib_ib_dev_stop(struct net_device
> +@@ -700,32 +780,30 @@ int ipoib_ib_dev_stop(struct net_device
> * assume the HW is wedged and just free up
> * all our pending work requests.
> */
> @@ -363,7 +323,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> ipoib_drain_cq(dev);
>
> msleep(1);
> -@@ -734,6 +843,7 @@ int ipoib_ib_dev_stop(struct net_device
> +@@ -734,6 +812,7 @@ int ipoib_ib_dev_stop(struct net_device
> ipoib_dbg(priv, "All sends and receives done.\n");
>
> timeout:
> @@ -371,10 +331,19 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> qp_attr.qp_state = IB_QPS_RESET;
> if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
> ipoib_warn(priv, "Failed to modify QP to RESET state\n");
> +@@ -838,6 +917,8 @@ static void __ipoib_ib_dev_flush(struct
> + ipoib_ib_dev_open(dev);
> + }
> +
> ++ destroy_own_ah(priv);
> ++
> + /*
> + * The device could have been brought down between the start and when
> + * we get here, don't bring it back up if it's not configured up
> Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> ===================================================================
> --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-02-10 10:04:06.101933000 +0200
> -+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-02-10 10:08:46.948541000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c 2008-02-10 14:10:10.611696000 +0200
> @@ -153,7 +153,7 @@ int ipoib_transport_dev_init(struct net_
> .max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1,
> .max_recv_sge = 1
> @@ -393,3 +362,63 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
> if (IS_ERR(priv->scq)) {
> printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
> goto out_free_rcq;
> +Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> +===================================================================
> +--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2008-02-10 14:10:09.000000000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c 2008-02-10 14:11:45.091181000 +0200
> +@@ -492,6 +492,42 @@ static void ipoib_mcast_join(struct net_
> + }
> + }
> +
> ++static int create_own_ah(struct ipoib_dev_priv *priv)
> ++{
> ++ struct ib_ah_attr attr = {
> ++ .dlid = priv->local_lid,
> ++ .port_num = priv->port,
> ++ };
> ++ struct ib_ah *ah;
> ++
> ++ if (priv->own_ah)
> ++ return 0;
> ++
> ++ ah = ib_create_ah(priv->pd, &attr);
> ++ if (!IS_ERR(ah)) {
> ++ ipoib_dbg(priv, "created own ah\n");
> ++ priv->own_ah = ah;
> ++ }
> ++
> ++ return IS_ERR(priv->own_ah);
> ++}
> ++
> ++void destroy_own_ah(struct ipoib_dev_priv *priv)
> ++{
> ++ unsigned long flags;
> ++
> ++ if (!priv->own_ah) {
> ++ ipoib_dbg(priv, "own ah already destroyed\n");
> ++ return;
> ++ } else
> ++ ipoib_dbg(priv, "destroying own ah\n");
> ++
> ++ spin_lock_irqsave(&priv->tx_lock, flags);
> ++ ib_destroy_ah(priv->own_ah);
> ++ priv->own_ah = NULL;
> ++ spin_unlock_irqrestore(&priv->tx_lock, flags);
> ++}
> ++
> + void ipoib_mcast_join_task(struct work_struct *work)
> + {
> + struct ipoib_dev_priv *priv =
> +@@ -509,8 +545,11 @@ void ipoib_mcast_join_task(struct work_s
> + {
> + struct ib_port_attr attr;
> +
> +- if (!ib_query_port(priv->ca, priv->port, &attr))
> ++ if (!ib_query_port(priv->ca, priv->port, &attr)) {
> + priv->local_lid = attr.lid;
> ++ if (create_own_ah(priv))
> ++ ipoib_warn(priv, "create own_ah failed\n");
> ++ }
> + else
> + ipoib_warn(priv, "ib_query_port failed\n");
> + }
More information about the ewg
mailing list