[ewg] [PATCH OFED-151] ehca fixes

Vladimir Sokolovsky vlad at dev.mellanox.co.il
Tue Feb 16 05:29:11 PST 2010


Alexander Schmidt wrote:
> Hi Vlad,
> 
> please apply the following fixes for OFED-1.5.1, thank you!
> 
> Regards,
> Alex
> 

Hi Alex,
These fixes require updates in backport patches, at least for SLES10 SP2 and SP3:

         /tmp/ofa_1_5_dev_kernel-20100216-1441_linux-2.6.16.60-0.54.5-smp_check/kernel_patches/backport/2.6.16_sles10_sp3/ehca-030-ibmebus_loc_code.patch
/usr/bin/quilt --quiltrc /tmp/ofa_1_5_dev_kernel-20100216-1441_linux-2.6.16.60-0.54.5-smp_check/patches/quiltrc import 
/tmp/ofa_1_5_dev_kernel-20100216-1441_linux-2.6.16.60-0.54.5-smp_check/kernel_patches/backport/2.6.16_sles10_sp3/ehca-030-ibmebus_loc_code.patch
Importing patch /tmp/ofa_1_5_dev_kernel-20100216-1441_linux-2.6.16.60-0.54.5-smp_check/kernel_patches/backport/2.6.16_sles10_sp3/ehca-030-ibmebus_loc_code.patch (stored as ehca-030-ibmebus_loc_code.patch)
/usr/bin/quilt --quiltrc /tmp/ofa_1_5_dev_kernel-20100216-1441_linux-2.6.16.60-0.54.5-smp_check/patches/quiltrc push patches/ehca-030-ibmebus_loc_code.patch
Applying patch ehca-030-ibmebus_loc_code.patch
patching file drivers/infiniband/hw/ehca/ehca_classes.h
patching file drivers/infiniband/hw/ehca/ehca_eq.c
Hunk #3 FAILED at 170.
1 out of 3 hunks FAILED -- rejects in file drivers/infiniband/hw/ehca/ehca_eq.c
patching file drivers/infiniband/hw/ehca/ehca_main.c
Patch ehca-030-ibmebus_loc_code.patch does not apply (enforce with -f)

You can reproduce it by:
# ./ofed_scripts/ofed_makedist.sh

Regards,
Vladimir

> diff -Nurp ofa_kernel-1.5.1.old/kernel_patches/fixes/ehca-0100-rework_destroy_eq.patch ofa_kernel-1.5.1/kernel_patches/fixes/ehca-0100-rework_destroy_eq.patch
> --- ofa_kernel-1.5.1.old/kernel_patches/fixes/ehca-0100-rework_destroy_eq.patch	1970-01-01 01:00:00.000000000 +0100
> +++ ofa_kernel-1.5.1/kernel_patches/fixes/ehca-0100-rework_destroy_eq.patch	2010-02-15 11:43:55.000000000 +0100
> @@ -0,0 +1,63 @@
> +commit 9420269428b3dc80c98e52beac60a3976fbef7d2
> +Author: Alexander Schmidt <alexs at linux.vnet.ibm.com>
> +Date:   Wed Dec 9 10:11:04 2009 -0800
> +
> +    IB/ehca: Rework destroy_eq()
> +    
> +    The ibmebus_free_irq() function, which might sleep, was called with
> +    interrupts disabled.  To fix this, make sure that no interrupts are
> +    running by killing the interrupt tasklet.  Also lock the
> +    shca_list_lock to protect against the poll_eqs_timer running
> +    concurrently.
> +    
> +    Signed-off-by: Alexander Schmidt <alexs at linux.vnet.ibm.com>
> +    Signed-off-by: Roland Dreier <rolandd at cisco.com>
> +
> +diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
> +index c825142..0136abd 100644
> +--- a/drivers/infiniband/hw/ehca/ehca_classes.h
> ++++ b/drivers/infiniband/hw/ehca/ehca_classes.h
> +@@ -375,6 +375,7 @@ extern rwlock_t ehca_qp_idr_lock;
> + extern rwlock_t ehca_cq_idr_lock;
> + extern struct idr ehca_qp_idr;
> + extern struct idr ehca_cq_idr;
> ++extern spinlock_t shca_list_lock;
> + 
> + extern int ehca_static_rate;
> + extern int ehca_port_act_time;
> +diff --git a/drivers/infiniband/hw/ehca/ehca_eq.c b/drivers/infiniband/hw/ehca/ehca_eq.c
> +index 523e733..3b87589 100644
> +--- a/drivers/infiniband/hw/ehca/ehca_eq.c
> ++++ b/drivers/infiniband/hw/ehca/ehca_eq.c
> +@@ -169,12 +169,15 @@ int ehca_destroy_eq(struct ehca_shca *shca, struct ehca_eq *eq)
> + 	unsigned long flags;
> + 	u64 h_ret;
> + 
> +-	spin_lock_irqsave(&eq->spinlock, flags);
> + 	ibmebus_free_irq(eq->ist, (void *)shca);
> + 
> +-	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
> ++	spin_lock_irqsave(&shca_list_lock, flags);
> ++	eq->is_initialized = 0;
> ++	spin_unlock_irqrestore(&shca_list_lock, flags);
> + 
> +-	spin_unlock_irqrestore(&eq->spinlock, flags);
> ++	tasklet_kill(&eq->interrupt_task);
> ++
> ++	h_ret = hipz_h_destroy_eq(shca->ipz_hca_handle, eq);
> + 
> + 	if (h_ret != H_SUCCESS) {
> + 		ehca_err(&shca->ib_device, "Can't free EQ resources.");
> +diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
> +index fb2d83c..129a6be 100644
> +--- a/drivers/infiniband/hw/ehca/ehca_main.c
> ++++ b/drivers/infiniband/hw/ehca/ehca_main.c
> +@@ -123,7 +123,7 @@ DEFINE_IDR(ehca_qp_idr);
> + DEFINE_IDR(ehca_cq_idr);
> + 
> + static LIST_HEAD(shca_list); /* list of all registered ehcas */
> +-static DEFINE_SPINLOCK(shca_list_lock);
> ++DEFINE_SPINLOCK(shca_list_lock);
> + 
> + static struct timer_list poll_eqs_timer;
> + 
> diff -Nurp ofa_kernel-1.5.1.old/kernel_patches/fixes/ehca-0110-dont_turnoff_irq_in_tasklet.patch ofa_kernel-1.5.1/kernel_patches/fixes/ehca-0110-dont_turnoff_irq_in_tasklet.patch
> --- ofa_kernel-1.5.1.old/kernel_patches/fixes/ehca-0110-dont_turnoff_irq_in_tasklet.patch	1970-01-01 01:00:00.000000000 +0100
> +++ ofa_kernel-1.5.1/kernel_patches/fixes/ehca-0110-dont_turnoff_irq_in_tasklet.patch	2010-02-15 11:43:55.000000000 +0100
> @@ -0,0 +1,33 @@
> +rq_spinlock is only taken in tasklet context, so it is safe not to
> +disable hardware interrupts.
> +
> +Signed-off-by: Alexander Schmidt <alexs at linux.vnet.ibm.com>
> +---
> + drivers/infiniband/hw/ehca/ehca_irq.c |    5 ++---
> + 1 file changed, 2 insertions(+), 3 deletions(-)
> +
> +--- linux-2.6.orig/drivers/infiniband/hw/ehca/ehca_irq.c
> ++++ linux-2.6/drivers/infiniband/hw/ehca/ehca_irq.c
> +@@ -548,11 +548,10 @@ void ehca_process_eq(struct ehca_shca *s
> + 	struct ehca_eq *eq = &shca->eq;
> + 	struct ehca_eqe_cache_entry *eqe_cache = eq->eqe_cache;
> + 	u64 eqe_value, ret;
> +-	unsigned long flags;
> + 	int eqe_cnt, i;
> + 	int eq_empty = 0;
> + 
> +-	spin_lock_irqsave(&eq->irq_spinlock, flags);
> ++	spin_lock(&eq->irq_spinlock);
> + 	if (is_irq) {
> + 		const int max_query_cnt = 100;
> + 		int query_cnt = 0;
> +@@ -643,7 +642,7 @@ void ehca_process_eq(struct ehca_shca *s
> + 	} while (1);
> + 
> + unlock_irq_spinlock:
> +-	spin_unlock_irqrestore(&eq->irq_spinlock, flags);
> ++	spin_unlock(&eq->irq_spinlock);
> + }
> + 
> + void ehca_tasklet_eq(unsigned long data)
> +
> diff -Nurp ofa_kernel-1.5.1.old/kernel_patches/fixes/ehca-0120-allow_access_by_query_qp.patch ofa_kernel-1.5.1/kernel_patches/fixes/ehca-0120-allow_access_by_query_qp.patch
> --- ofa_kernel-1.5.1.old/kernel_patches/fixes/ehca-0120-allow_access_by_query_qp.patch	1970-01-01 01:00:00.000000000 +0100
> +++ ofa_kernel-1.5.1/kernel_patches/fixes/ehca-0120-allow_access_by_query_qp.patch	2010-02-15 11:43:55.000000000 +0100
> @@ -0,0 +1,23 @@
> +The max_dest_rd_atomic and max_qp_rd_atomic values are
> +properly returned by query_qp(), so there should not be
> +an error returned when they are queried.
> +
> +Signed-off-by: Alexander Schmidt <alexs at linux.vnet.ibm.com>
> +---
> + drivers/infiniband/hw/ehca/ehca_qp.c |    4 +---
> + 1 file changed, 1 insertion(+), 3 deletions(-)
> +
> +--- linux-2.6.orig/drivers/infiniband/hw/ehca/ehca_qp.c
> ++++ linux-2.6/drivers/infiniband/hw/ehca/ehca_qp.c
> +@@ -55,9 +55,7 @@ static struct kmem_cache *qp_cache;
> + /*
> +  * attributes not supported by query qp
> +  */
> +-#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_MAX_DEST_RD_ATOMIC | \
> +-				     IB_QP_MAX_QP_RD_ATOMIC   | \
> +-				     IB_QP_ACCESS_FLAGS       | \
> ++#define QP_ATTR_QUERY_NOT_SUPPORTED (IB_QP_ACCESS_FLAGS       | \
> + 				     IB_QP_EN_SQD_ASYNC_NOTIFY)
> + 
> + /*
> +
> diff -Nurp ofa_kernel-1.5.1.old/kernel_patches/fixes/ehca-0130-process_mad.patch ofa_kernel-1.5.1/kernel_patches/fixes/ehca-0130-process_mad.patch
> --- ofa_kernel-1.5.1.old/kernel_patches/fixes/ehca-0130-process_mad.patch	1970-01-01 01:00:00.000000000 +0100
> +++ ofa_kernel-1.5.1/kernel_patches/fixes/ehca-0130-process_mad.patch	2010-02-15 11:43:55.000000000 +0100
> @@ -0,0 +1,20 @@
> +If the caller does not pass a valid in_wc to process_mad(),
> +return MAD failure as it is not possible to generate a valid
> +MAD redirect response.
> +
> +Signed-off-by: Alexander Schmidt <alexs at linux.vnet.ibm.com>
> +---
> + drivers/infiniband/hw/ehca/ehca_sqp.c |    2 +-
> + 1 file changed, 1 insertion(+), 1 deletion(-)
> +
> +--- linux-2.6.orig/drivers/infiniband/hw/ehca/ehca_sqp.c
> ++++ linux-2.6/drivers/infiniband/hw/ehca/ehca_sqp.c
> +@@ -222,7 +222,7 @@ int ehca_process_mad(struct ib_device *i
> + {
> + 	int ret;
> + 
> +-	if (!port_num || port_num > ibdev->phys_port_cnt)
> ++	if (!port_num || port_num > ibdev->phys_port_cnt || !in_wc)
> + 		return IB_MAD_RESULT_FAILURE;
> + 
> + 	/* accept only pma request */
> 




More information about the ewg mailing list