[ewg] [PATCH OFED-151] ehca fixes

Alexander Schmidt alexs at linux.vnet.ibm.com
Wed Feb 17 05:15:11 PST 2010


On Tue, 16 Feb 2010 15:29:11 +0200
Vladimir Sokolovsky <vlad at dev.mellanox.co.il> wrote:

> Alexander Schmidt wrote:
> > Hi Vlad,
> > 
> > please apply the following fixes for OFED-1.5.1, thank you!
> > 
> > Regards,
> > Alex
> > 
> 
> Hi Alex,
> These fixes require updates in backport patches, at least for SLES10 SP2 and SP3:
> 
>          /tmp/ofa_1_5_dev_kernel-20100216-1441_linux-2.6.16.60-0.54.5-smp_check/kernel_patches/backport/2.6.16_sles10_sp3/ehca-030-ibmebus_loc_code.patch
> /usr/bin/quilt --quiltrc /tmp/ofa_1_5_dev_kernel-20100216-1441_linux-2.6.16.60-0.54.5-smp_check/patches/quiltrc import 
> /tmp/ofa_1_5_dev_kernel-20100216-1441_linux-2.6.16.60-0.54.5-smp_check/kernel_patches/backport/2.6.16_sles10_sp3/ehca-030-ibmebus_loc_code.patch
> Importing patch /tmp/ofa_1_5_dev_kernel-20100216-1441_linux-2.6.16.60-0.54.5-smp_check/kernel_patches/backport/2.6.16_sles10_sp3/ehca-030-ibmebus_loc_code.patch (stored as ehca-030-ibmebus_loc_code.patch)
> /usr/bin/quilt --quiltrc /tmp/ofa_1_5_dev_kernel-20100216-1441_linux-2.6.16.60-0.54.5-smp_check/patches/quiltrc push patches/ehca-030-ibmebus_loc_code.patch
> Applying patch ehca-030-ibmebus_loc_code.patch
> patching file drivers/infiniband/hw/ehca/ehca_classes.h
> patching file drivers/infiniband/hw/ehca/ehca_eq.c
> Hunk #3 FAILED at 170.
> 1 out of 3 hunks FAILED -- rejects in file drivers/infiniband/hw/ehca/ehca_eq.c
> patching file drivers/infiniband/hw/ehca/ehca_main.c
> Patch ehca-030-ibmebus_loc_code.patch does not apply (enforce with -f)
> 
> You can reproduce it by:
> # ./ofed_scripts/ofed_makedist.sh
> 
> Regards,
> Vladimir

Hi Vlad, thank you for checking this. I've included an update of the backports
in the patch below, ofed_makedist.sh runs fine now.

Regards,
Alex

diff --git a/kernel_patches/backport/2.6.16_sles10_sp2/ehca-030-ibmebus_loc_code.patch b/kernel_patches/backport/2.6.16_sles10_sp2/ehca-030-ibmebus_loc_code.patch
index 9d6830d..23a7f1e 100644
--- a/kernel_patches/backport/2.6.16_sles10_sp2/ehca-030-ibmebus_loc_code.patch
+++ b/kernel_patches/backport/2.6.16_sles10_sp2/ehca-030-ibmebus_loc_code.patch
@@ -39,15 +39,15 @@ Index: ofa_kernel-1.5/drivers/infiniband/hw/ehca/ehca_eq.c
  					  IRQF_DISABLED, "ehca_neq",
  					  (void *)shca);
  		if (ret < 0)
-@@ -170,7 +170,7 @@
- 	u64 h_ret;
- 
- 	spin_lock_irqsave(&eq->spinlock, flags);
--	ibmebus_free_irq(eq->ist, (void *)shca);
-+	ibmebus_free_irq(NULL, eq->ist, (void *)shca);
- 
- 	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
- 
+@@ -169,7 +169,7 @@ int ehca_destroy_eq(struct ehca_shca *sh
+        unsigned long flags;
+        u64 h_ret;
+
+-       ibmebus_free_irq(eq->ist, (void *)shca);
++       ibmebus_free_irq(NULL, eq->ist, (void *)shca);
+
+        spin_lock_irqsave(&shca_list_lock, flags);
+        eq->is_initialized = 0;
 Index: ofa_kernel-1.5/drivers/infiniband/hw/ehca/ehca_main.c
 ===================================================================
 --- ofa_kernel-1.5.orig/drivers/infiniband/hw/ehca/ehca_main.c	2009-09-25 12:42:48.000000000 +0200
diff --git a/kernel_patches/backport/2.6.16_sles10_sp3/ehca-030-ibmebus_loc_code.patch b/kernel_patches/backport/2.6.16_sles10_sp3/ehca-030-ibmebus_loc_code.patch
index 9d6830d..cf9b5bb 100644
--- a/kernel_patches/backport/2.6.16_sles10_sp3/ehca-030-ibmebus_loc_code.patch
+++ b/kernel_patches/backport/2.6.16_sles10_sp3/ehca-030-ibmebus_loc_code.patch
@@ -39,15 +39,15 @@ Index: ofa_kernel-1.5/drivers/infiniband/hw/ehca/ehca_eq.c
  					  IRQF_DISABLED, "ehca_neq",
  					  (void *)shca);
  		if (ret < 0)
-@@ -170,7 +170,7 @@
- 	u64 h_ret;
+@@ -169,7 +169,7 @@
+        unsigned long flags;
+        u64 h_ret;
  
- 	spin_lock_irqsave(&eq->spinlock, flags);
--	ibmebus_free_irq(eq->ist, (void *)shca);
-+	ibmebus_free_irq(NULL, eq->ist, (void *)shca);
- 
- 	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+-       ibmebus_free_irq(eq->ist, (void *)shca);
++       ibmebus_free_irq(NULL, eq->ist, (void *)shca);
  
+        spin_lock_irqsave(&shca_list_lock, flags);
+        eq->is_initialized = 0;
 Index: ofa_kernel-1.5/drivers/infiniband/hw/ehca/ehca_main.c
 ===================================================================
 --- ofa_kernel-1.5.orig/drivers/infiniband/hw/ehca/ehca_main.c	2009-09-25 12:42:48.000000000 +0200
diff --git a/kernel_patches/backport/2.6.18-EL5.3/ehca-030-ibmebus_loc_code.patch b/kernel_patches/backport/2.6.18-EL5.3/ehca-030-ibmebus_loc_code.patch
index 9d6830d..cfc1595 100644
--- a/kernel_patches/backport/2.6.18-EL5.3/ehca-030-ibmebus_loc_code.patch
+++ b/kernel_patches/backport/2.6.18-EL5.3/ehca-030-ibmebus_loc_code.patch
@@ -39,15 +39,15 @@ Index: ofa_kernel-1.5/drivers/infiniband/hw/ehca/ehca_eq.c
  					  IRQF_DISABLED, "ehca_neq",
  					  (void *)shca);
  		if (ret < 0)
-@@ -170,7 +170,7 @@
- 	u64 h_ret;
- 
- 	spin_lock_irqsave(&eq->spinlock, flags);
--	ibmebus_free_irq(eq->ist, (void *)shca);
-+	ibmebus_free_irq(NULL, eq->ist, (void *)shca);
- 
- 	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
- 
+@@ -169,7 +169,7 @@
+        unsigned long flags;
+        u64 h_ret;
+
+-       ibmebus_free_irq(eq->ist, (void *)shca);
++       ibmebus_free_irq(NULL, eq->ist, (void *)shca);
+
+        spin_lock_irqsave(&shca_list_lock, flags);
+        eq->is_initialized = 0;
 Index: ofa_kernel-1.5/drivers/infiniband/hw/ehca/ehca_main.c
 ===================================================================
 --- ofa_kernel-1.5.orig/drivers/infiniband/hw/ehca/ehca_main.c	2009-09-25 12:42:48.000000000 +0200
diff --git a/kernel_patches/backport/2.6.18-EL5.4/ehca-030-ibmebus_loc_code.patch b/kernel_patches/backport/2.6.18-EL5.4/ehca-030-ibmebus_loc_code.patch
index 9d6830d..cfc1595 100644
--- a/kernel_patches/backport/2.6.18-EL5.4/ehca-030-ibmebus_loc_code.patch
+++ b/kernel_patches/backport/2.6.18-EL5.4/ehca-030-ibmebus_loc_code.patch
@@ -39,15 +39,15 @@ Index: ofa_kernel-1.5/drivers/infiniband/hw/ehca/ehca_eq.c
  					  IRQF_DISABLED, "ehca_neq",
  					  (void *)shca);
  		if (ret < 0)
-@@ -170,7 +170,7 @@
- 	u64 h_ret;
- 
- 	spin_lock_irqsave(&eq->spinlock, flags);
--	ibmebus_free_irq(eq->ist, (void *)shca);
-+	ibmebus_free_irq(NULL, eq->ist, (void *)shca);
- 
- 	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
- 
+@@ -169,7 +169,7 @@
+        unsigned long flags;
+        u64 h_ret;
+
+-       ibmebus_free_irq(eq->ist, (void *)shca);
++       ibmebus_free_irq(NULL, eq->ist, (void *)shca);
+
+        spin_lock_irqsave(&shca_list_lock, flags);
+        eq->is_initialized = 0;
 Index: ofa_kernel-1.5/drivers/infiniband/hw/ehca/ehca_main.c
 ===================================================================
 --- ofa_kernel-1.5.orig/drivers/infiniband/hw/ehca/ehca_main.c	2009-09-25 12:42:48.000000000 +0200
diff --git a/kernel_patches/fixes/ehca-0100-rework_destroy_eq.patch b/kernel_patches/fixes/ehca-0100-rework_destroy_eq.patch
new file mode 100644
index 0000000..0af4068
--- /dev/null
+++ b/kernel_patches/fixes/ehca-0100-rework_destroy_eq.patch
@@ -0,0 +1,63 @@
+commit 9420269428b3dc80c98e52beac60a3976fbef7d2
+Author: Alexander Schmidt <alexs at linux.vnet.ibm.com>
+Date:   Wed Dec 9 10:11:04 2009 -0800
+
+    IB/ehca: Rework destroy_eq()
+    
+    The ibmebus_free_irq() function, which might sleep, was called with
+    interrupts disabled.  To fix this, make sure that no interrupts are
+    running by killing the interrupt tasklet.  Also lock the
+    shca_list_lock to protect against the poll_eqs_timer running
+    concurrently.
+    
+    Signed-off-by: Alexander Schmidt <alexs at linux.vnet.ibm.com>
+    Signed-off-by: Roland Dreier <rolandd at cisco.com>
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
+index c825142..0136abd 100644
+--- a/drivers/infiniband/hw/ehca/ehca_classes.h
++++ b/drivers/infiniband/hw/ehca/ehca_classes.h
+@@ -375,6 +375,7 @@ extern rwlock_t ehca_qp_idr_lock;
+ extern rwlock_t ehca_cq_idr_lock;
+ extern struct idr ehca_qp_idr;
+ extern struct idr ehca_cq_idr;
++extern spinlock_t shca_list_lock;
+ 
+ extern int ehca_static_rate;
+ extern int ehca_port_act_time;
+diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
+index 523e733..3b87589 100644
+--- a/drivers/infiniband/hw/ehca/ehca_eq.c
++++ b/drivers/infiniband/hw/ehca/ehca_eq.c
+@@ -169,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
+ 	unsigned long flags;
+ 	u64 h_ret;
+ 
+-	spin_lock_irqsave(&eq->spinlock, flags);
+ 	ibmebus_free_irq(eq->ist, (void *)shca);
+ 
+-	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
++	spin_lock_irqsave(&shca_list_lock, flags);
++	eq->is_initialized = 0;
++	spin_unlock_irqrestore(&shca_list_lock, flags);
+ 
+-	spin_unlock_irqrestore(&eq->spinlock, flags);
++	tasklet_kill(&eq->interrupt_task);
++
++	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
+ 
+ 	if (h_ret != H_SUCCESS) {
+ 		ehca_err(&shca->ib_device, "Can't free EQ resources.");
+diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
+index fb2d83c..129a6be 100644
+--- a/drivers/infiniband/hw/ehca/ehca_main.c
++++ b/drivers/infiniband/hw/ehca/ehca_main.c
+@@ -123,7 +123,7 @@ DEFINE_IDR(ehca_qp_idr);
+ DEFINE_IDR(ehca_cq_idr);
+ 
+ static LIST_HEAD(shca_list); /* list of all registered ehcas */
+-static DEFINE_SPINLOCK(shca_list_lock);
++DEFINE_SPINLOCK(shca_list_lock);
+ 
+ static struct timer_list poll_eqs_timer;
+ 
diff --git a/kernel_patches/fixes/ehca-0110-dont_turnoff_irq_in_tasklet.patch b/kernel_patches/fixes/ehca-0110-dont_turnoff_irq_in_tasklet.patch
new file mode 100644
index 0000000..2933738
--- /dev/null
+++ b/kernel_patches/fixes/ehca-0110-dont_turnoff_irq_in_tasklet.patch
@@ -0,0 +1,33 @@
+rq_spinlock is only taken in tasklet context, so it is safe not to
+disable hardware interrupts.
+
+Signed-off-by: Alexander Schmidt <alexs at linux.vnet.ibm.com>
+---
+ drivers/infiniband/hw/ehca/ehca_irq.c |    5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+--- linux-2.6.orig/drivers/infiniband/hw/ehca/ehca_irq.c
++++ linux-2.6/drivers/infiniband/hw/ehca/ehca_irq.c
+@@ -548,11 +548,10 @@ void ehca_process_eq(struct ehca_shca *s
+ 	struct ehca_eq *eq = &shca->eq;
+ 	struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
+ 	u64 eqe_value, ret;
+-	unsigned long flags;
+ 	int eqe_cnt, i;
+ 	int eq_empty = 0;
+ 
+-	spin_lock_irqsave(&eq->irq_spinlock, flags);
++	spin_lock(&eq->irq_spinlock);
+ 	if (is_irq) {
+ 		const int max_query_cnt = 100;
+ 		int query_cnt = 0;
+@@ -643,7 +642,7 @@ void ehca_process_eq(struct ehca_shca *s
+ 	} while (1);
+ 
+ unlock_irq_spinlock:
+-	spin_unlock_irqrestore(&eq->irq_spinlock, flags);
++	spin_unlock(&eq->irq_spinlock);
+ }
+ 
+ void ehca_tasklet_eq(unsigned long data)
+
diff --git a/kernel_patches/fixes/ehca-0120-allow_access_by_query_qp.patch b/kernel_patches/fixes/ehca-0120-allow_access_by_query_qp.patch
new file mode 100644
index 0000000..9d84aad
--- /dev/null
+++ b/kernel_patches/fixes/ehca-0120-allow_access_by_query_qp.patch
@@ -0,0 +1,23 @@
+The max_dest_rd_atomic and max_qp_rd_atomic values are
+properly returned by query_qp(), so there should not be
+an error returned when they are queried.
+
+Signed-off-by: Alexander Schmidt <alexs at linux.vnet.ibm.com>
+---
+ drivers/infiniband/hw/ehca/ehca_qp.c |    4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+--- linux-2.6.orig/drivers/infiniband/hw/ehca/ehca_qp.c
++++ linux-2.6/drivers/infiniband/hw/ehca/ehca_qp.c
+@@ -55,9 +55,7 @@ static struct kmem_cache *qp_cache;
+ /*
+  * attributes not supported by query qp
+  */
+-#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \
+-				     IB_QP_MAX_QP_RD_ATOMIC   | \
+-				     IB_QP_ACCESS_FLAGS       | \
++#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS       | \
+ 				     IB_QP_EN_SQD_ASYNC_NOTIFY)
+ 
+ /*
+
diff --git a/kernel_patches/fixes/ehca-0130-process_mad.patch b/kernel_patches/fixes/ehca-0130-process_mad.patch
new file mode 100644
index 0000000..a97e3bb
--- /dev/null
+++ b/kernel_patches/fixes/ehca-0130-process_mad.patch
@@ -0,0 +1,20 @@
+If the caller does not pass a valid in_wc to process_mad(),
+return MAD failure as it is not possible to generate a valid
+MAD redirect response.
+
+Signed-off-by: Alexander Schmidt <alexs at linux.vnet.ibm.com>
+---
+ drivers/infiniband/hw/ehca/ehca_sqp.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- linux-2.6.orig/drivers/infiniband/hw/ehca/ehca_sqp.c
++++ linux-2.6/drivers/infiniband/hw/ehca/ehca_sqp.c
+@@ -222,7 +222,7 @@ int ehca_process_mad(struct ib_device *i
+ {
+ 	int ret;
+ 
+-	if (!port_num || port_num > ibdev->phys_port_cnt)
++	if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
+ 		return IB_MAD_RESULT_FAILURE;
+ 
+ 	/* accept only pma request */



More information about the ewg mailing list