[ewg] Re: [PATCH] IB/ipoib: bug fix creation of own_ah

Ralph Campbell ralph.campbell at qlogic.com
Mon Feb 11 11:16:49 PST 2008


I tried this out on RHEL 5 (2.6.18-8.el5) and it works OK.

Acked-by: Ralph Campbell <ralph.campbell at qlogic.com>


On Sun, 2008-02-10 at 17:08 +0200, Eli Cohen wrote:
> [PATCH] IB/ipoib: bug fix creation of own_ah
> 
> Create own ah after the port is queried. This also handles
> cases when a port change event occurs (which includes lid
> change event).
> 
> Signed-off-by: Eli Cohen <eli at mellanox.co.il>
> ---
> 
> Ralph, Arlin,
> 
> I think this patch should fix the problem you were seeing. Could you
> check if it does? I pushed this to ofed 1.3 tree.
> 
> 
>   kernel_patches/fixes/ipoib_0190_unsig_udqp.patch |  169 +++++++++++++---------
>   1 files changed, 99 insertions(+), 70 deletions(-)
> 
> diff --git a/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch b/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch
> index 1ffb78a..2a1062d 100644
> --- a/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch
> +++ b/kernel_patches/fixes/ipoib_0190_unsig_udqp.patch
> @@ -13,7 +13,7 @@ Signed-off-by: Eli Cohen <eli at mellanox.co.il>
>   Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
>   ===================================================================
>   --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib.h	2008-02-10 10:04:06.097933000 +0200
> -+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h	2008-02-10 10:08:46.941535000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h	2008-02-10 14:11:45.097178000 +0200
>   @@ -373,6 +373,7 @@ struct ipoib_dev_priv {
> 
>    	struct ib_wc 	     ibwc[IPOIB_NUM_WC];
> @@ -39,10 +39,18 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib.h
> 
>    struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
>    				 struct ib_pd *pd, struct ib_ah_attr *attr);
> +@@ -534,6 +536,7 @@ int ipoib_pkey_dev_delay_open(struct net
> + void ipoib_drain_cq(struct net_device *dev);
> +
> + void ipoib_set_ethtool_ops(struct net_device *dev);
> ++void destroy_own_ah(struct ipoib_dev_priv *priv);
> +
> + #ifdef CONFIG_INFINIBAND_IPOIB_CM
> +
>   Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>   ===================================================================
>   --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2008-02-10 10:04:06.092935000 +0200
> -+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2008-02-10 10:09:59.703873000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c	2008-02-10 14:11:45.086179000 +0200
>   @@ -254,12 +254,10 @@ repost:
>    			   "for buf %d\n", wr_id);
>    }
> @@ -83,7 +91,9 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>    	    test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
>    		netif_wake_queue(dev);
>   +}
> -+
> +
> +-	if (need_lock)
> +-		spin_unlock_irqrestore(&priv->tx_lock, flags);
>   +static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
>   +{
>   +	struct ipoib_dev_priv *priv = netdev_priv(dev);
> @@ -96,9 +106,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>   +		_ipoib_ib_handle_tx_wc(dev, i);
>   +	} while (i++ != wr_id);
>   +	priv->tx_poll = i & (ipoib_sendq_size - 1);
> -
> --	if (need_lock)
> --		spin_unlock_irqrestore(&priv->tx_lock, flags);
> ++
>   +	if (unlikely(wc->status != IB_WC_SUCCESS &&
>   +		     wc->status != IB_WC_WR_FLUSH_ERR))
> 
> @@ -128,7 +136,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>    }
> 
>    int ipoib_poll(struct napi_struct *napi, int budget)
> -@@ -361,11 +372,63 @@ void ipoib_ib_rx_completion(struct ib_cq
> +@@ -361,11 +372,66 @@ void ipoib_ib_rx_completion(struct ib_cq
>    	netif_rx_schedule(dev, &priv->napi);
>    }
> 
> @@ -161,7 +169,8 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>   +	spin_lock_irqsave(&priv->tx_lock, flags);
>   +	if (((int)priv->tx_tail - (int)priv->tx_head < 0) &&
>   +		time_after(jiffies, dev->trans_start + 10) &&
> -+		priv->tx_outstanding < ipoib_sendq_size) {
> ++		priv->tx_outstanding < ipoib_sendq_size &&
> ++		priv->own_ah) {
>   +		wrid = priv->tx_head & (ipoib_sendq_size - 1);
>   +		priv->tx_ring[wrid].skb = NULL;
>   +		if (post_zlen_send_wr(priv, wrid))
> @@ -173,8 +182,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>   +	}
>   +	poll_tx(priv);
>   +	spin_unlock_irqrestore(&priv->tx_lock, flags);
> -
> --	poll_tx(priv, 1);
> ++
>   +	mod_timer(&priv->poll_timer, jiffies + HZ / 2);
>   +}
>   +
> @@ -189,13 +197,16 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>   +	if (!post_zlen_send_wr(priv, wrid)) {
>   +		++priv->tx_head;
>   +		++priv->tx_outstanding;
> -+	}
> ++	} else
> ++		ipoib_warn(priv, "post_zlen failed\n");
> +
> +-	poll_tx(priv, 1);
>   +	poll_tx(priv);
>   +	spin_unlock_irqrestore(&priv->tx_lock, flags);
>    }
> 
>    static inline int post_send(struct ipoib_dev_priv *priv,
> -@@ -405,6 +468,11 @@ static inline int post_send(struct ipoib
> +@@ -405,6 +471,11 @@ static inline int post_send(struct ipoib
>    	} else
>    		priv->tx_wr.opcode      = IB_WR_SEND;
> 
> @@ -207,7 +218,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>    	return ib_post_send(priv->qp, &priv->tx_wr, &bad_wr);
>    }
> 
> -@@ -481,15 +549,17 @@ void ipoib_send(struct net_device *dev,
> +@@ -481,15 +552,17 @@ void ipoib_send(struct net_device *dev,
> 
>    		address->last_send = priv->tx_head;
>    		++priv->tx_head;
> @@ -227,58 +238,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> 
>    	return;
> 
> -@@ -530,6 +600,32 @@ void ipoib_reap_ah(struct work_struct *w
> - 				   round_jiffies_relative(HZ));
> - }
> -
> -+static int create_own_ah(struct ipoib_dev_priv *priv)
> -+{
> -+	struct ib_ah_attr attr = {
> -+		.dlid = priv->local_lid,
> -+		.port_num = priv->port,
> -+	};
> -+
> -+	if (priv->own_ah) {
> -+		ipoib_dbg(priv, "own ah already exists\n");
> -+		return -EINVAL;
> -+	}
> -+	priv->own_ah = ib_create_ah(priv->pd, &attr);
> -+	return IS_ERR(priv->own_ah);
> -+}
> -+
> -+static void destroy_own_ah(struct ipoib_dev_priv *priv)
> -+{
> -+	if (!priv->own_ah) {
> -+		ipoib_dbg(priv, "destroying an already destroyed own ah\n");
> -+		return;
> -+	}
> -+
> -+	ib_destroy_ah(priv->own_ah);
> -+	priv->own_ah = NULL;
> -+}
> -+
> - int ipoib_ib_dev_open(struct net_device *dev)
> - {
> - 	struct ipoib_dev_priv *priv = netdev_priv(dev);
> -@@ -542,9 +638,17 @@ int ipoib_ib_dev_open(struct net_device
> - 	}
> - 	set_bit(IPOIB_PKEY_ASSIGNED, &priv->flags);
> -
> -+	ret = create_own_ah(priv);
> -+	if (ret) {
> -+		priv->own_ah = NULL;
> -+		ipoib_warn(priv, "failed to create own ah\n");
> -+		return -1;
> -+	}
> -+
> - 	ret = ipoib_init_qp(dev);
> - 	if (ret) {
> - 		ipoib_warn(priv, "ipoib_init_qp returned %d\n", ret);
> -+		destroy_own_ah(priv);
> - 		return -1;
> - 	}
> -
> -@@ -566,6 +670,11 @@ int ipoib_ib_dev_open(struct net_device
> +@@ -566,6 +639,11 @@ int ipoib_ib_dev_open(struct net_device
>    	queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
>    			   round_jiffies_relative(HZ));
> 
> @@ -290,7 +250,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>    	set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
> 
>    	return 0;
> -@@ -662,7 +771,7 @@ void ipoib_drain_cq(struct net_device *d
> +@@ -662,7 +740,7 @@ void ipoib_drain_cq(struct net_device *d
>    				if (priv->ibwc[i].wr_id & IPOIB_OP_CM)
>    					ipoib_cm_handle_tx_wc(dev, priv->ibwc + i);
>    				else
> @@ -299,7 +259,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>    			}
>    		}
>    	} while (n == IPOIB_NUM_WC);
> -@@ -673,12 +782,14 @@ int ipoib_ib_dev_stop(struct net_device
> +@@ -673,12 +751,14 @@ int ipoib_ib_dev_stop(struct net_device
>    	struct ipoib_dev_priv *priv = netdev_priv(dev);
>    	struct ib_qp_attr qp_attr;
>    	unsigned long begin;
> @@ -315,7 +275,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
> 
>    	/*
>    	 * Move our QP to the error state and then reinitialize in
> -@@ -700,32 +811,30 @@ int ipoib_ib_dev_stop(struct net_device
> +@@ -700,32 +780,30 @@ int ipoib_ib_dev_stop(struct net_device
>    			 * assume the HW is wedged and just free up
>    			 * all our pending work requests.
>    			 */
> @@ -363,7 +323,7 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>    		ipoib_drain_cq(dev);
> 
>    		msleep(1);
> -@@ -734,6 +843,7 @@ int ipoib_ib_dev_stop(struct net_device
> +@@ -734,6 +812,7 @@ int ipoib_ib_dev_stop(struct net_device
>    	ipoib_dbg(priv, "All sends and receives done.\n");
> 
>    timeout:
> @@ -371,10 +331,19 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_ib.c
>    	qp_attr.qp_state = IB_QPS_RESET;
>    	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
>    		ipoib_warn(priv, "Failed to modify QP to RESET state\n");
> +@@ -838,6 +917,8 @@ static void __ipoib_ib_dev_flush(struct
> + 		ipoib_ib_dev_open(dev);
> + 	}
> +
> ++	destroy_own_ah(priv);
> ++
> + 	/*
> + 	 * The device could have been brought down between the start and when
> + 	 * we get here, don't bring it back up if it's not configured up
>   Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
>   ===================================================================
>   --- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2008-02-10 10:04:06.101933000 +0200
> -+++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2008-02-10 10:08:46.948541000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c	2008-02-10 14:10:10.611696000 +0200
>   @@ -153,7 +153,7 @@ int ipoib_transport_dev_init(struct net_
>    			.max_send_sge = dev->features & NETIF_F_SG ? MAX_SKB_FRAGS + 1 : 1,
>    			.max_recv_sge = 1
> @@ -393,3 +362,63 @@ Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
>    	if (IS_ERR(priv->scq)) {
>    		printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
>    		goto out_free_rcq;
> +Index: ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
> +===================================================================
> +--- ofed_kernel.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	2008-02-10 14:10:09.000000000 +0200
> ++++ ofed_kernel/drivers/infiniband/ulp/ipoib/ipoib_multicast.c	2008-02-10 14:11:45.091181000 +0200
> +@@ -492,6 +492,42 @@ static void ipoib_mcast_join(struct net_
> + 	}
> + }
> +
> ++static int create_own_ah(struct ipoib_dev_priv *priv)
> ++{
> ++	struct ib_ah_attr attr = {
> ++		.dlid = priv->local_lid,
> ++		.port_num = priv->port,
> ++	};
> ++        struct ib_ah *ah;
> ++
> ++	if (priv->own_ah)
> ++		return 0;
> ++
> ++	ah = ib_create_ah(priv->pd, &attr);
> ++	if (!IS_ERR(ah)) {
> ++		ipoib_dbg(priv, "created own ah\n");
> ++		priv->own_ah = ah;
> ++	}
> ++
> ++	return IS_ERR(priv->own_ah);
> ++}
> ++
> ++void destroy_own_ah(struct ipoib_dev_priv *priv)
> ++{
> ++	unsigned long flags;
> ++
> ++	if (!priv->own_ah) {
> ++		ipoib_dbg(priv, "own ah already destroyed\n");
> ++		return;
> ++	} else
> ++		ipoib_dbg(priv, "destroying own ah\n");
> ++
> ++	spin_lock_irqsave(&priv->tx_lock, flags);
> ++	ib_destroy_ah(priv->own_ah);
> ++	priv->own_ah = NULL;
> ++	spin_unlock_irqrestore(&priv->tx_lock, flags);
> ++}
> ++
> + void ipoib_mcast_join_task(struct work_struct *work)
> + {
> + 	struct ipoib_dev_priv *priv =
> +@@ -509,8 +545,11 @@ void ipoib_mcast_join_task(struct work_s
> + 	{
> + 		struct ib_port_attr attr;
> +
> +-		if (!ib_query_port(priv->ca, priv->port, &attr))
> ++		if (!ib_query_port(priv->ca, priv->port, &attr)) {
> + 			priv->local_lid = attr.lid;
> ++			if (create_own_ah(priv))
> ++				ipoib_warn(priv, "create own_ah failed\n");
> ++		}
> + 		else
> + 			ipoib_warn(priv, "ib_query_port failed\n");
> + 	}




More information about the ewg mailing list