[ewg] [PATCH 1/3] ofed-1.3-rc4 IB/ehca: upstream patches needed for ehca
Hoang-Nam Nguyen
hnguyen at linux.vnet.ibm.com
Sat Feb 2 08:47:33 PST 2008
IB/ehca: set of patches queued for 2.6.25 and needed for ofed-1.3 release
0001: Add missing spaces in the middle of format strings
0002: Forward event client-reregister-required to registered clients
0003: Use round_jiffies() for EQ polling timer
0004: Remove CQ-QP-link before destroying QP in error path of create_qp()
0005: Define array to store SMI/GSI QPs
0006: Add "port connection autodetect mode"
0007: Prevent RDMA-related connection failures on some eHCA2 hardware
0008: Prevent sending ud packets to qp0
0009: Update sma attr also in case of disruptive
0010: Add PMA support
Signed-off-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
---
...dd_missing_spaces_in_the_middle_of_format.patch | 59 +++
..._Forward_event_client_reregister_required.patch | 55 +++
...03_Use_round_jiffies_for_EQ_polling_timer.patch | 31 ++
...04_Remove_CQ_QP_link_before_destroying_QP.patch | 35 ++
...ca_0005_Define_array_to_store_SMI_GSI_QPs.patch | 59 +++
..._0006_Add_port_connection_autodetect_mode.patch | 414 ++++++++++++++++++++
..._Prevent_RDMA_related_connection_failures.patch | 276 +++++++++++++
...ca_0008_Prevent_sending_ud_packets_to_qp0.patch | 18 +
...pdate_sma_attr_also_in_case_of_disruptive.patch | 26 ++
.../fixes/ehca_0010_Add_PMA_support.patch | 180 +++++++++
10 files changed, 1153 insertions(+), 0 deletions(-)
create mode 100644 kernel_patches/fixes/ehca_0001_Add_missing_spaces_in_the_middle_of_format.patch
create mode 100644 kernel_patches/fixes/ehca_0002_Forward_event_client_reregister_required.patch
create mode 100644 kernel_patches/fixes/ehca_0003_Use_round_jiffies_for_EQ_polling_timer.patch
create mode 100644 kernel_patches/fixes/ehca_0004_Remove_CQ_QP_link_before_destroying_QP.patch
create mode 100644 kernel_patches/fixes/ehca_0005_Define_array_to_store_SMI_GSI_QPs.patch
create mode 100644 kernel_patches/fixes/ehca_0006_Add_port_connection_autodetect_mode.patch
create mode 100644 kernel_patches/fixes/ehca_0007_Prevent_RDMA_related_connection_failures.patch
create mode 100644 kernel_patches/fixes/ehca_0008_Prevent_sending_ud_packets_to_qp0.patch
create mode 100644 kernel_patches/fixes/ehca_0009_Update_sma_attr_also_in_case_of_disruptive.patch
create mode 100644 kernel_patches/fixes/ehca_0010_Add_PMA_support.patch
diff --git a/kernel_patches/fixes/ehca_0001_Add_missing_spaces_in_the_middle_of_format.patch b/kernel_patches/fixes/ehca_0001_Add_missing_spaces_in_the_middle_of_format.patch
new file mode 100644
index 0000000..ca13dd6
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0001_Add_missing_spaces_in_the_middle_of_format.patch
@@ -0,0 +1,59 @@
+From 41c38ba27fb89140311cfa0b1258b1ccc88eea7b Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:15:17 +0100
+Subject: [PATCH] IB/ehca: Add missing spaces in the middle of format strings.
+
+Signed-off-by: Joe Perches <joe at perches.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_cq.c | 2 +-
+ drivers/infiniband/hw/ehca/ehca_qp.c | 6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
+index 79c25f5..0467c15 100644
+--- a/drivers/infiniband/hw/ehca/ehca_cq.c
++++ b/drivers/infiniband/hw/ehca/ehca_cq.c
+@@ -246,7 +246,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
+ } else {
+ if (h_ret != H_PAGE_REGISTERED) {
+ ehca_err(device, "Registration of page failed "
+- "ehca_cq=%p cq_num=%x h_ret=%li"
++ "ehca_cq=%p cq_num=%x h_ret=%li "
+ "counter=%i act_pages=%i",
+ my_cq, my_cq->cq_number,
+ h_ret, counter, param.act_pages);
+diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
+index dd12668..04e711f 100644
+--- a/drivers/infiniband/hw/ehca/ehca_qp.c
++++ b/drivers/infiniband/hw/ehca/ehca_qp.c
+@@ -858,7 +858,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+- ehca_err(pd->device, "Could not modify SRQ to INIT"
++ ehca_err(pd->device, "Could not modify SRQ to INIT "
+ "ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+@@ -872,7 +872,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+- ehca_err(pd->device, "Could not enable SRQ"
++ ehca_err(pd->device, "Could not enable SRQ "
+ "ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+@@ -886,7 +886,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ update_mask,
+ mqpcb, my_qp->galpas.kernel);
+ if (hret != H_SUCCESS) {
+- ehca_err(pd->device, "Could not modify SRQ to RTR"
++ ehca_err(pd->device, "Could not modify SRQ to RTR "
+ "ehca_qp=%p qp_num=%x h_ret=%li",
+ my_qp, my_qp->real_qp_num, hret);
+ goto create_srq2;
+--
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0002_Forward_event_client_reregister_required.patch b/kernel_patches/fixes/ehca_0002_Forward_event_client_reregister_required.patch
new file mode 100644
index 0000000..04159e2
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0002_Forward_event_client_reregister_required.patch
@@ -0,0 +1,55 @@
+From afe2f1d8e50933645608932bcbba7dd81144a96c Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:19:03 +0100
+Subject: [PATCH] IB/ehca: Forward event client-reregister-required to registered clients
+
+This patch allows ehca to forward event client-reregister-required to
+registered clients. One such event is generated by a switch eg. after
+its reboot.
+
+Signed-off-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_irq.c | 12 ++++++++++++
+ 1 files changed, 12 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
+index 3f617b2..4c734ec 100644
+--- a/drivers/infiniband/hw/ehca/ehca_irq.c
++++ b/drivers/infiniband/hw/ehca/ehca_irq.c
+@@ -62,6 +62,7 @@
+ #define NEQE_PORT_NUMBER EHCA_BMASK_IBM( 8, 15)
+ #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
+ #define NEQE_DISRUPTIVE EHCA_BMASK_IBM(16, 16)
++#define NEQE_SPECIFIC_EVENT EHCA_BMASK_IBM(16, 23)
+
+ #define ERROR_DATA_LENGTH EHCA_BMASK_IBM(52, 63)
+ #define ERROR_DATA_TYPE EHCA_BMASK_IBM( 0, 7)
+@@ -354,6 +355,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
+ {
+ u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+ u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
++ u8 spec_event;
+
+ switch (ec) {
+ case 0x30: /* port availability change */
+@@ -394,6 +396,16 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
+ case 0x33: /* trace stopped */
+ ehca_err(&shca->ib_device, "Traced stopped.");
+ break;
++ case 0x34: /* util async event */
++ spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
++ if (spec_event == 0x80) /* client reregister required */
++ dispatch_port_event(shca, port,
++ IB_EVENT_CLIENT_REREGISTER,
++ "client reregister req.");
++ else
++ ehca_warn(&shca->ib_device, "Unknown util async "
++ "event %x on port %x", spec_event, port);
++ break;
+ default:
+ ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
+ ec, shca->ib_device.name);
+--
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0003_Use_round_jiffies_for_EQ_polling_timer.patch b/kernel_patches/fixes/ehca_0003_Use_round_jiffies_for_EQ_polling_timer.patch
new file mode 100644
index 0000000..5fb3691
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0003_Use_round_jiffies_for_EQ_polling_timer.patch
@@ -0,0 +1,31 @@
+From a8ed1e3c557c23e60a5bf4b2fe027f8453a255f4 Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:20:37 +0100
+Subject: [PATCH] IB/ehca: Use round_jiffies() for EQ polling timer
+
+Use round_jiffies() to align ehca's 1-second timer with other timers
+and potentially save power by sleeping cores for longer.
+
+Signed-off-by: Anton Blanchard <anton at samba.org>
+Acked-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_main.c | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
+index 90d4334..63d2de3 100644
+--- a/drivers/infiniband/hw/ehca/ehca_main.c
++++ b/drivers/infiniband/hw/ehca/ehca_main.c
+@@ -913,7 +913,7 @@ void ehca_poll_eqs(unsigned long data)
+ ehca_process_eq(shca, 0);
+ }
+ }
+- mod_timer(&poll_eqs_timer, jiffies + HZ);
++ mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ));
+ spin_unlock(&shca_list_lock);
+ }
+
+--
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0004_Remove_CQ_QP_link_before_destroying_QP.patch b/kernel_patches/fixes/ehca_0004_Remove_CQ_QP_link_before_destroying_QP.patch
new file mode 100644
index 0000000..31c9aa8
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0004_Remove_CQ_QP_link_before_destroying_QP.patch
@@ -0,0 +1,35 @@
+From ebc0988b682e1fcc8d456b7e16ca94a02ced7e6a Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:21:24 +0100
+Subject: [PATCH] IB/ehca: Remove CQ-QP-link before destroying QP in error path of create_qp()
+
+Signed-off-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_qp.c | 5 ++++-
+ 1 files changed, 4 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
+index 04e711f..db910bc 100644
+--- a/drivers/infiniband/hw/ehca/ehca_qp.c
++++ b/drivers/infiniband/hw/ehca/ehca_qp.c
+@@ -769,12 +769,15 @@ static struct ehca_qp *internal_create_qp(
+ if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+ ehca_err(pd->device, "Copy to udata failed");
+ ret = -EINVAL;
+- goto create_qp_exit4;
++ goto create_qp_exit5;
+ }
+ }
+
+ return my_qp;
+
++create_qp_exit5:
++ ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
++
+ create_qp_exit4:
+ if (HAS_RQ(my_qp))
+ ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+--
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0005_Define_array_to_store_SMI_GSI_QPs.patch b/kernel_patches/fixes/ehca_0005_Define_array_to_store_SMI_GSI_QPs.patch
new file mode 100644
index 0000000..23777de
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0005_Define_array_to_store_SMI_GSI_QPs.patch
@@ -0,0 +1,59 @@
+From 52bdcb2961257e1b1d4564a33c28ed4876453fb3 Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:25:59 +0100
+Subject: [PATCH] IB/ehca: Define array to store SMI/GSI QPs
+
+Signed-off-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_classes.h | 2 +-
+ drivers/infiniband/hw/ehca/ehca_main.c | 6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
+index 87f12d4..5c6d3fa 100644
+--- a/drivers/infiniband/hw/ehca/ehca_classes.h
++++ b/drivers/infiniband/hw/ehca/ehca_classes.h
+@@ -94,7 +94,7 @@ struct ehca_sma_attr {
+
+ struct ehca_sport {
+ struct ib_cq *ibcq_aqp1;
+- struct ib_qp *ibqp_aqp1;
++ struct ib_qp *ibqp_sqp[2];
+ enum ib_port_state port_state;
+ struct ehca_sma_attr saved_attr;
+ };
+diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
+index 63d2de3..18122c1 100644
+--- a/drivers/infiniband/hw/ehca/ehca_main.c
++++ b/drivers/infiniband/hw/ehca/ehca_main.c
+@@ -498,7 +498,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
+ }
+ sport->ibcq_aqp1 = ibcq;
+
+- if (sport->ibqp_aqp1) {
++ if (sport->ibqp_sqp[IB_QPT_GSI]) {
+ ehca_err(&shca->ib_device, "AQP1 QP is already created.");
+ ret = -EPERM;
+ goto create_aqp1;
+@@ -524,7 +524,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
+ ret = PTR_ERR(ibqp);
+ goto create_aqp1;
+ }
+- sport->ibqp_aqp1 = ibqp;
++ sport->ibqp_sqp[IB_QPT_GSI] = ibqp;
+
+ return 0;
+
+@@ -537,7 +537,7 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
+ {
+ int ret;
+
+- ret = ib_destroy_qp(sport->ibqp_aqp1);
++ ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]);
+ if (ret) {
+ ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
+ return ret;
+--
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0006_Add_port_connection_autodetect_mode.patch b/kernel_patches/fixes/ehca_0006_Add_port_connection_autodetect_mode.patch
new file mode 100644
index 0000000..e61ef32
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0006_Add_port_connection_autodetect_mode.patch
@@ -0,0 +1,414 @@
+diff -Nurp ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_classes.h ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_classes.h
+--- ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_classes.h 2008-02-01 09:04:53.000000000 -0800
++++ ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_classes.h 2008-02-01 09:08:13.000000000 -0800
+@@ -95,6 +95,10 @@ struct ehca_sma_attr {
+ struct ehca_sport {
+ struct ib_cq *ibcq_aqp1;
+ struct ib_qp *ibqp_sqp[2];
++ /* lock to serialze modify_qp() calls for sqp in normal
++ * and irq path (when event PORT_ACTIVE is received first time)
++ */
++ spinlock_t mod_sqp_lock;
+ enum ib_port_state port_state;
+ struct ehca_sma_attr saved_attr;
+ };
+@@ -141,6 +145,14 @@ enum ehca_ext_qp_type {
+ EQPT_SRQ = 3,
+ };
+
++/* struct to cache modify_qp()'s parms for GSI/SMI qp */
++struct ehca_mod_qp_parm {
++ int mask;
++ struct ib_qp_attr attr;
++};
++
++#define EHCA_MOD_QP_PARM_MAX 4
++
+ struct ehca_qp {
+ union {
+ struct ib_qp ib_qp;
+@@ -164,6 +176,9 @@ struct ehca_qp {
+ struct ehca_cq *recv_cq;
+ unsigned int sqerr_purgeflag;
+ struct hlist_node list_entries;
++ /* array to cache modify_qp()'s parms for GSI/SMI qp */
++ struct ehca_mod_qp_parm *mod_qp_parm;
++ int mod_qp_parm_idx;
+ /* mmap counter for resources mapped into user space */
+ u32 mm_count_squeue;
+ u32 mm_count_rqueue;
+@@ -323,6 +338,7 @@ extern int ehca_port_act_time;
+ extern int ehca_use_hp_mr;
+ extern int ehca_scaling_code;
+ extern int ehca_lock_hcalls;
++extern int ehca_nr_ports;
+
+ struct ipzu_queue_resp {
+ u32 qe_size; /* queue entry size */
+diff -Nurp ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_irq.c ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_irq.c
+--- ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_irq.c 2008-02-01 09:04:53.000000000 -0800
++++ ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_irq.c 2008-02-01 09:07:44.000000000 -0800
+@@ -356,17 +356,33 @@ static void parse_ec(struct ehca_shca *s
+ u8 ec = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+ u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+ u8 spec_event;
++ struct ehca_sport *sport = &shca->sport[port - 1];
++ unsigned long flags;
+
+ switch (ec) {
+ case 0x30: /* port availability change */
+ if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
+- shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
++ int suppress_event;
++ /* replay modify_qp for sqps */
++ spin_lock_irqsave(&sport->mod_sqp_lock, flags);
++ suppress_event = !sport->ibqp_sqp[IB_QPT_GSI];
++ if (sport->ibqp_sqp[IB_QPT_SMI])
++ ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
++ if (!suppress_event)
++ ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
++ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
++
++ /* AQP1 was destroyed, ignore this event */
++ if (suppress_event)
++ break;
++
++ sport->port_state = IB_PORT_ACTIVE;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+ "is active");
+ ehca_query_sma_attr(shca, port,
+- &shca->sport[port - 1].saved_attr);
++ &sport->saved_attr);
+ } else {
+- shca->sport[port - 1].port_state = IB_PORT_DOWN;
++ sport->port_state = IB_PORT_DOWN;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+ "is inactive");
+ }
+@@ -380,11 +396,11 @@ static void parse_ec(struct ehca_shca *s
+ ehca_warn(&shca->ib_device, "disruptive port "
+ "%d configuration change", port);
+
+- shca->sport[port - 1].port_state = IB_PORT_DOWN;
++ sport->port_state = IB_PORT_DOWN;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+ "is inactive");
+
+- shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
++ sport->port_state = IB_PORT_ACTIVE;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+ "is active");
+ } else
+diff -Nurp ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_iverbs.h ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_iverbs.h
+--- ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_iverbs.h 2008-01-30 03:24:40.000000000 -0800
++++ ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_iverbs.h 2008-02-01 09:07:44.000000000 -0800
+@@ -200,4 +200,6 @@ void ehca_free_fw_ctrlblock(void *ptr);
+ #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
+ #endif
+
++void ehca_recover_sqp(struct ib_qp *sqp);
++
+ #endif
+diff -Nurp ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_main.c ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_main.c
+--- ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_main.c 2008-02-01 09:04:53.000000000 -0800
++++ ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_main.c 2008-02-01 09:07:44.000000000 -0800
+@@ -90,7 +90,8 @@ MODULE_PARM_DESC(hw_level,
+ "hardware level"
+ " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
+ MODULE_PARM_DESC(nr_ports,
+- "number of connected ports (default: 2)");
++ "number of connected ports (-1: autodetect, 1: port one only, "
++ "2: two ports (default)");
+ MODULE_PARM_DESC(use_hp_mr,
+ "high performance MRs (0: no (default), 1: yes)");
+ MODULE_PARM_DESC(port_act_time,
+@@ -688,7 +689,7 @@ static int __devinit ehca_probe(struct o
+ struct ehca_shca *shca;
+ const u64 *handle;
+ struct ib_pd *ibpd;
+- int ret;
++ int ret, i;
+
+ handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
+ if (!handle) {
+@@ -709,6 +710,8 @@ static int __devinit ehca_probe(struct o
+ return -ENOMEM;
+ }
+ mutex_init(&shca->modify_mutex);
++ for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
++ spin_lock_init(&shca->sport[i].mod_sqp_lock);
+
+ shca->ofdev = dev;
+ shca->ipz_hca_handle.handle = *handle;
+diff -Nurp ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_qp.c ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_qp.c
+--- ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_qp.c 2008-02-01 09:04:53.000000000 -0800
++++ ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_qp.c 2008-02-01 09:07:44.000000000 -0800
+@@ -729,12 +729,31 @@ static struct ehca_qp *internal_create_q
+ init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
+ my_qp->init_attr = *init_attr;
+
++ if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
++ shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
++ &my_qp->ib_qp;
++ if (ehca_nr_ports < 0) {
++ /* alloc array to cache subsequent modify qp parms
++ * for autodetect mode
++ */
++ my_qp->mod_qp_parm =
++ kzalloc(EHCA_MOD_QP_PARM_MAX *
++ sizeof(*my_qp->mod_qp_parm),
++ GFP_KERNEL);
++ if (!my_qp->mod_qp_parm) {
++ ehca_err(pd->device,
++ "Could not alloc mod_qp_parm");
++ goto create_qp_exit4;
++ }
++ }
++ }
++
+ /* NOTE: define_apq0() not supported yet */
+ if (qp_type == IB_QPT_GSI) {
+ h_ret = ehca_define_sqp(shca, my_qp, init_attr);
+ if (h_ret != H_SUCCESS) {
+ ret = ehca2ib_return_code(h_ret);
+- goto create_qp_exit4;
++ goto create_qp_exit5;
+ }
+ }
+
+@@ -743,7 +762,7 @@ static struct ehca_qp *internal_create_q
+ if (ret) {
+ ehca_err(pd->device,
+ "Couldn't assign qp to send_cq ret=%i", ret);
+- goto create_qp_exit4;
++ goto create_qp_exit5;
+ }
+ }
+
+@@ -769,15 +788,18 @@ static struct ehca_qp *internal_create_q
+ if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+ ehca_err(pd->device, "Copy to udata failed");
+ ret = -EINVAL;
+- goto create_qp_exit5;
++ goto create_qp_exit6;
+ }
+ }
+
+ return my_qp;
+
+-create_qp_exit5:
++create_qp_exit6:
+ ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
+
++create_qp_exit5:
++ kfree(my_qp->mod_qp_parm);
++
+ create_qp_exit4:
+ if (HAS_RQ(my_qp))
+ ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+@@ -995,7 +1017,7 @@ static int internal_modify_qp(struct ib_
+ unsigned long flags = 0;
+
+ /* do query_qp to obtain current attr values */
+- mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
++ mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
+ if (!mqpcb) {
+ ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
+ "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
+@@ -1183,6 +1205,8 @@ static int internal_modify_qp(struct ib_
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
+ }
+ if (attr_mask & IB_QP_PORT) {
++ struct ehca_sport *sport;
++ struct ehca_qp *aqp1;
+ if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
+ ret = -EINVAL;
+ ehca_err(ibqp->device, "Invalid port=%x. "
+@@ -1191,6 +1215,29 @@ static int internal_modify_qp(struct ib_
+ shca->num_ports);
+ goto modify_qp_exit2;
+ }
++ sport = &shca->sport[attr->port_num - 1];
++ if (!sport->ibqp_sqp[IB_QPT_GSI]) {
++ /* should not occur */
++ ret = -EFAULT;
++ ehca_err(ibqp->device, "AQP1 was not created for "
++ "port=%x", attr->port_num);
++ goto modify_qp_exit2;
++ }
++ aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI],
++ struct ehca_qp, ib_qp);
++ if (ibqp->qp_type != IB_QPT_GSI &&
++ ibqp->qp_type != IB_QPT_SMI &&
++ aqp1->mod_qp_parm) {
++ /*
++ * firmware will reject this modify_qp() because
++ * port is not activated/initialized fully
++ */
++ ret = -EFAULT;
++ ehca_warn(ibqp->device, "Couldn't modify qp port=%x: "
++ "either port is being activated (try again) "
++ "or cabling issue", attr->port_num);
++ goto modify_qp_exit2;
++ }
+ mqpcb->prim_phys_port = attr->port_num;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
+ }
+@@ -1470,6 +1517,8 @@ modify_qp_exit1:
+ int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ struct ib_udata *udata)
+ {
++ struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
++ ib_device);
+ struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+ struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+ ib_pd);
+@@ -1482,9 +1531,100 @@ int ehca_modify_qp(struct ib_qp *ibqp, s
+ return -EINVAL;
+ }
+
++ /* The if-block below caches qp_attr to be modified for GSI and SMI
++ * qps during the initialization by ib_mad. When the respective port
++ * is activated, ie we got an event PORT_ACTIVE, we'll replay the
++ * cached modify calls sequence, see ehca_recover_sqs() below.
++ * Why that is required:
++ * 1) If one port is connected, older code requires that port one
++ * to be connected and module option nr_ports=1 to be given by
++ * user, which is very inconvenient for end user.
++ * 2) Firmware accepts modify_qp() only if respective port has become
++ * active. Older code had a wait loop of 30sec create_qp()/
++ * define_aqp1(), which is not appropriate in practice. This
++ * code now removes that wait loop, see define_aqp1(), and always
++ * reports all ports to ib_mad resp. users. Only activated ports
++ * will then usable for the users.
++ */
++ if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
++ int port = my_qp->init_attr.port_num;
++ struct ehca_sport *sport = &shca->sport[port - 1];
++ unsigned long flags;
++ spin_lock_irqsave(&sport->mod_sqp_lock, flags);
++ /* cache qp_attr only during init */
++ if (my_qp->mod_qp_parm) {
++ struct ehca_mod_qp_parm *p;
++ if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) {
++ ehca_err(&shca->ib_device,
++ "mod_qp_parm overflow state=%x port=%x"
++ " type=%x", attr->qp_state,
++ my_qp->init_attr.port_num,
++ ibqp->qp_type);
++ spin_unlock_irqrestore(&sport->mod_sqp_lock,
++ flags);
++ return -EINVAL;
++ }
++ p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx];
++ p->mask = attr_mask;
++ p->attr = *attr;
++ my_qp->mod_qp_parm_idx++;
++ ehca_dbg(&shca->ib_device,
++ "Saved qp_attr for state=%x port=%x type=%x",
++ attr->qp_state, my_qp->init_attr.port_num,
++ ibqp->qp_type);
++ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
++ return 0;
++ }
++ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
++ }
++
+ return internal_modify_qp(ibqp, attr, attr_mask, 0);
+ }
+
++void ehca_recover_sqp(struct ib_qp *sqp)
++{
++ struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp);
++ int port = my_sqp->init_attr.port_num;
++ struct ib_qp_attr attr;
++ struct ehca_mod_qp_parm *qp_parm;
++ int i, qp_parm_idx, ret;
++ unsigned long flags, wr_cnt;
++
++ if (!my_sqp->mod_qp_parm)
++ return;
++ ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num);
++
++ qp_parm = my_sqp->mod_qp_parm;
++ qp_parm_idx = my_sqp->mod_qp_parm_idx;
++ for (i = 0; i < qp_parm_idx; i++) {
++ attr = qp_parm[i].attr;
++ ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0);
++ if (ret) {
++ ehca_err(sqp->device, "Could not modify SQP port=%x "
++ "qp_num=%x ret=%x", port, sqp->qp_num, ret);
++ goto free_qp_parm;
++ }
++ ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x",
++ port, sqp->qp_num, attr.qp_state);
++ }
++
++ /* re-trigger posted recv wrs */
++ wr_cnt = my_sqp->ipz_rqueue.current_q_offset /
++ my_sqp->ipz_rqueue.qe_size;
++ if (wr_cnt) {
++ spin_lock_irqsave(&my_sqp->spinlock_r, flags);
++ hipz_update_rqa(my_sqp, wr_cnt);
++ spin_unlock_irqrestore(&my_sqp->spinlock_r, flags);
++ ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx",
++ port, sqp->qp_num, wr_cnt);
++ }
++
++free_qp_parm:
++ kfree(qp_parm);
++ /* this prevents subsequent calls to modify_qp() to cache qp_attr */
++ my_sqp->mod_qp_parm = NULL;
++}
++
+ int ehca_query_qp(struct ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+@@ -1772,6 +1912,7 @@ static int internal_destroy_qp(struct ib
+ struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
+ struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+ ib_pd);
++ struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
+ u32 cur_pid = current->tgid;
+ u32 qp_num = my_qp->real_qp_num;
+ int ret;
+@@ -1818,6 +1959,14 @@ static int internal_destroy_qp(struct ib
+ port_num = my_qp->init_attr.port_num;
+ qp_type = my_qp->init_attr.qp_type;
+
++ if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
++ spin_lock_irqsave(&sport->mod_sqp_lock, flags);
++ kfree(my_qp->mod_qp_parm);
++ my_qp->mod_qp_parm = NULL;
++ shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL;
++ spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
++ }
++
+ /* no support for IB_QPT_SMI yet */
+ if (qp_type == IB_QPT_GSI) {
+ struct ib_event event;
+diff -Nurp ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_sqp.c ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_sqp.c
+--- ofa_kernel-1.3_a/drivers/infiniband/hw/ehca/ehca_sqp.c 2008-01-30 03:24:40.000000000 -0800
++++ ofa_kernel-1.3_b/drivers/infiniband/hw/ehca/ehca_sqp.c 2008-02-01 09:07:44.000000000 -0800
+@@ -40,11 +40,8 @@
+ */
+
+
+-#include <linux/module.h>
+-#include <linux/err.h>
+ #include "ehca_classes.h"
+ #include "ehca_tools.h"
+-#include "ehca_qes.h"
+ #include "ehca_iverbs.h"
+ #include "hcp_if.h"
+
+@@ -93,6 +90,9 @@ u64 ehca_define_sqp(struct ehca_shca *sh
+ return H_PARAMETER;
+ }
+
++ if (ehca_nr_ports < 0) /* autodetect mode */
++ return H_SUCCESS;
++
+ for (counter = 0;
+ shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
+ counter < ehca_port_act_time;
diff --git a/kernel_patches/fixes/ehca_0007_Prevent_RDMA_related_connection_failures.patch b/kernel_patches/fixes/ehca_0007_Prevent_RDMA_related_connection_failures.patch
new file mode 100644
index 0000000..a7744d2
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0007_Prevent_RDMA_related_connection_failures.patch
@@ -0,0 +1,276 @@
+From a1f46cca6affc61b78050b85eec957b14fa6ea58 Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:27:52 +0100
+Subject: [PATCH] IB/ehca: Prevent RDMA-related connection failures on some eHCA2 hardware
+
+Some HW revisions of eHCA2 may cause an RC connection to break if they
+received RDMA Reads over that connection before. This can be
+prevented by assuring that, after the first RDMA Read, the QP receives
+a new RDMA Read every few million link packets.
+
+Include code into the driver that inserts an empty (size 0) RDMA Read
+into the message stream every now and then if the consumer doesn't
+post them frequently enough.
+
+Signed-off-by: Joachim Fenkes <fenkes at de.ibm.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_classes.h | 5 ++
+ drivers/infiniband/hw/ehca/ehca_qp.c | 14 +++-
+ drivers/infiniband/hw/ehca/ehca_reqs.c | 112 ++++++++++++++++++++--------
+ 3 files changed, 95 insertions(+), 36 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
+index 997c3d1..8b76ac3 100644
+--- a/drivers/infiniband/hw/ehca/ehca_classes.h
++++ b/drivers/infiniband/hw/ehca/ehca_classes.h
+@@ -183,6 +183,11 @@ struct ehca_qp {
+ u32 mm_count_squeue;
+ u32 mm_count_rqueue;
+ u32 mm_count_galpa;
++ /* unsolicited ack circumvention */
++ int unsol_ack_circ;
++ int mtu_shift;
++ u32 message_count;
++ u32 packet_count;
+ };
+
+ #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
+diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
+index 53310f0..e32f964 100644
+--- a/drivers/infiniband/hw/ehca/ehca_qp.c
++++ b/drivers/infiniband/hw/ehca/ehca_qp.c
+@@ -592,10 +592,8 @@ static struct ehca_qp *internal_create_qp(
+ goto create_qp_exit1;
+ }
+
+- if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+- parms.sigtype = HCALL_SIGT_EVERY;
+- else
+- parms.sigtype = HCALL_SIGT_BY_WQE;
++ /* Always signal by WQE so we can hide circ. WQEs */
++ parms.sigtype = HCALL_SIGT_BY_WQE;
+
+ /* UD_AV CIRCUMVENTION */
+ max_send_sge = init_attr->cap.max_send_sge;
+@@ -618,6 +616,10 @@ static struct ehca_qp *internal_create_qp(
+ parms.squeue.max_sge = max_send_sge;
+ parms.rqueue.max_sge = max_recv_sge;
+
++ /* RC QPs need one more SWQE for unsolicited ack circumvention */
++ if (qp_type == IB_QPT_RC)
++ parms.squeue.max_wr++;
++
+ if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
+ if (HAS_SQ(my_qp))
+ ehca_determine_small_queue(
+@@ -650,6 +652,8 @@ static struct ehca_qp *internal_create_qp(
+ parms.squeue.act_nr_sges = 1;
+ parms.rqueue.act_nr_sges = 1;
+ }
++ /* hide the extra WQE */
++ parms.squeue.act_nr_wqes--;
+ break;
+ case IB_QPT_UD:
+ case IB_QPT_GSI:
+@@ -1294,6 +1298,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
+ }
+
+ if (attr_mask & IB_QP_PATH_MTU) {
++ /* store ld(MTU) */
++ my_qp->mtu_shift = attr->path_mtu + 7;
+ mqpcb->path_mtu = attr->path_mtu;
+ update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
+ }
+diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
+index ea91360..3aacc8c 100644
+--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
++++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
+@@ -50,6 +50,9 @@
+ #include "hcp_if.h"
+ #include "hipz_fns.h"
+
++/* in RC traffic, insert an empty RDMA READ every this many packets */
++#define ACK_CIRC_THRESHOLD 2000000
++
+ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
+ struct ehca_wqe *wqe_p,
+ struct ib_recv_wr *recv_wr)
+@@ -81,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
+ if (ehca_debug_level) {
+ ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
+ ipz_rqueue);
+- ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
++ ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
+ }
+
+ return 0;
+@@ -135,7 +138,8 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
+
+ static inline int ehca_write_swqe(struct ehca_qp *qp,
+ struct ehca_wqe *wqe_p,
+- const struct ib_send_wr *send_wr)
++ const struct ib_send_wr *send_wr,
++ int hidden)
+ {
+ u32 idx;
+ u64 dma_length;
+@@ -176,7 +180,9 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
+
+ wqe_p->wr_flag = 0;
+
+- if (send_wr->send_flags & IB_SEND_SIGNALED)
++ if ((send_wr->send_flags & IB_SEND_SIGNALED ||
++ qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
++ && !hidden)
+ wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+
+ if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
+@@ -199,7 +205,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
+
+ wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
+ wqe_p->local_ee_context_qkey = remote_qkey;
+- if (!send_wr->wr.ud.ah) {
++ if (unlikely(!send_wr->wr.ud.ah)) {
+ ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
+ return -EINVAL;
+ }
+@@ -255,6 +261,15 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
+ } /* eof idx */
+ wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
+
++ /* unsolicited ack circumvention */
++ if (send_wr->opcode == IB_WR_RDMA_READ) {
++ /* on RDMA read, switch on and reset counters */
++ qp->message_count = qp->packet_count = 0;
++ qp->unsol_ack_circ = 1;
++ } else
++ /* else estimate #packets */
++ qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
++
+ break;
+
+ default:
+@@ -355,13 +370,49 @@ static inline void map_ib_wc_status(u32 cqe_status,
+ *wc_status = IB_WC_SUCCESS;
+ }
+
++static inline int post_one_send(struct ehca_qp *my_qp,
++ struct ib_send_wr *cur_send_wr,
++ struct ib_send_wr **bad_send_wr,
++ int hidden)
++{
++ struct ehca_wqe *wqe_p;
++ int ret;
++ u64 start_offset = my_qp->ipz_squeue.current_q_offset;
++
++ /* get pointer next to free WQE */
++ wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
++ if (unlikely(!wqe_p)) {
++ /* too many posted work requests: queue overflow */
++ if (bad_send_wr)
++ *bad_send_wr = cur_send_wr;
++ ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
++ "qp_num=%x", my_qp->ib_qp.qp_num);
++ return -ENOMEM;
++ }
++ /* write a SEND WQE into the QUEUE */
++ ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, hidden);
++ /*
++ * if something failed,
++ * reset the free entry pointer to the start value
++ */
++ if (unlikely(ret)) {
++ my_qp->ipz_squeue.current_q_offset = start_offset;
++ if (bad_send_wr)
++ *bad_send_wr = cur_send_wr;
++ ehca_err(my_qp->ib_qp.device, "Could not write WQE "
++ "qp_num=%x", my_qp->ib_qp.qp_num);
++ return -EINVAL;
++ }
++
++ return 0;
++}
++
+ int ehca_post_send(struct ib_qp *qp,
+ struct ib_send_wr *send_wr,
+ struct ib_send_wr **bad_send_wr)
+ {
+ struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+ struct ib_send_wr *cur_send_wr;
+- struct ehca_wqe *wqe_p;
+ int wqe_cnt = 0;
+ int ret = 0;
+ unsigned long flags;
+@@ -369,37 +420,33 @@ int ehca_post_send(struct ib_qp *qp,
+ /* LOCK the QUEUE */
+ spin_lock_irqsave(&my_qp->spinlock_s, flags);
+
++ /* Send an empty extra RDMA read if:
++ * 1) there has been an RDMA read on this connection before
++ * 2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
++ * 3) we can be sure that any previous extra RDMA read has been
++ * processed so we don't overflow the SQ
++ */
++ if (unlikely(my_qp->unsol_ack_circ &&
++ my_qp->packet_count > ACK_CIRC_THRESHOLD &&
++ my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
++ /* insert an empty RDMA READ to fix up the remote QP state */
++ struct ib_send_wr circ_wr;
++ memset(&circ_wr, 0, sizeof(circ_wr));
++ circ_wr.opcode = IB_WR_RDMA_READ;
++ post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
++ wqe_cnt++;
++ ehca_dbg(qp->device, "posted circ wr qp_num=%x", qp->qp_num);
++ my_qp->message_count = my_qp->packet_count = 0;
++ }
++
+ /* loop processes list of send reqs */
+ for (cur_send_wr = send_wr; cur_send_wr != NULL;
+ cur_send_wr = cur_send_wr->next) {
+- u64 start_offset = my_qp->ipz_squeue.current_q_offset;
+- /* get pointer next to free WQE */
+- wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
+- if (unlikely(!wqe_p)) {
+- /* too many posted work requests: queue overflow */
+- if (bad_send_wr)
+- *bad_send_wr = cur_send_wr;
+- if (wqe_cnt == 0) {
+- ret = -ENOMEM;
+- ehca_err(qp->device, "Too many posted WQEs "
+- "qp_num=%x", qp->qp_num);
+- }
+- goto post_send_exit0;
+- }
+- /* write a SEND WQE into the QUEUE */
+- ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr);
+- /*
+- * if something failed,
+- * reset the free entry pointer to the start value
+- */
++ ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
+ if (unlikely(ret)) {
+- my_qp->ipz_squeue.current_q_offset = start_offset;
+- *bad_send_wr = cur_send_wr;
+- if (wqe_cnt == 0) {
+- ret = -EINVAL;
+- ehca_err(qp->device, "Could not write WQE "
+- "qp_num=%x", qp->qp_num);
+- }
++ /* if one or more WQEs were successful, don't fail */
++ if (wqe_cnt)
++ ret = 0;
+ goto post_send_exit0;
+ }
+ wqe_cnt++;
+@@ -410,6 +457,7 @@ int ehca_post_send(struct ib_qp *qp,
+ post_send_exit0:
+ iosync(); /* serialize GAL register access */
+ hipz_update_sqa(my_qp, wqe_cnt);
++ my_qp->message_count += wqe_cnt;
+ spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
+ return ret;
+ }
+--
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0008_Prevent_sending_ud_packets_to_qp0.patch b/kernel_patches/fixes/ehca_0008_Prevent_sending_ud_packets_to_qp0.patch
new file mode 100644
index 0000000..e63556b
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0008_Prevent_sending_ud_packets_to_qp0.patch
@@ -0,0 +1,18 @@
+diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
+index 3aacc8c..2ce8cff 100644
+--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
++++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
+@@ -209,6 +209,10 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
+ ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
+ return -EINVAL;
+ }
++ if (unlikely(send_wr->wr.ud.remote_qpn == 0)) {
++ ehca_gen_err("dest QP# is 0. qp=%x", qp->real_qp_num);
++ return -EINVAL;
++ }
+ my_av = container_of(send_wr->wr.ud.ah, struct ehca_av, ib_ah);
+ wqe_p->u.ud_av.ud_av = my_av->av;
+
+--
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0009_Update_sma_attr_also_in_case_of_disruptive.patch b/kernel_patches/fixes/ehca_0009_Update_sma_attr_also_in_case_of_disruptive.patch
new file mode 100644
index 0000000..eb7ae70
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0009_Update_sma_attr_also_in_case_of_disruptive.patch
@@ -0,0 +1,26 @@
+From 124dc3b2e2c856e670529cd9ce76824535b30984 Mon Sep 17 00:00:00 2001
+From: Joachim Fenkes <fenkes at de.ibm.com>
+Date: Fri, 25 Jan 2008 18:12:13 +0100
+Subject: [PATCH] IB/ehca: Update sma_attr also in case of disruptive config change
+
+Signed-off-by: Joachim Fenkes <fenkes at de.ibm.com>
+---
+ drivers/infiniband/hw/ehca/ehca_irq.c | 2 ++
+ 1 files changed, 2 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
+index 863b34f..b5ca94c 100644
+--- a/drivers/infiniband/hw/ehca/ehca_irq.c
++++ b/drivers/infiniband/hw/ehca/ehca_irq.c
+@@ -403,6 +403,8 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
+ sport->port_state = IB_PORT_ACTIVE;
+ dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+ "is active");
++ ehca_query_sma_attr(shca, port,
++ &sport->saved_attr);
+ } else
+ notify_port_conf_change(shca, port);
+ break;
+--
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0010_Add_PMA_support.patch b/kernel_patches/fixes/ehca_0010_Add_PMA_support.patch
new file mode 100644
index 0000000..e82e297
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0010_Add_PMA_support.patch
@@ -0,0 +1,180 @@
+From 1aa6a5771ee6ad88332d0265a802cc7ec2017839 Mon Sep 17 00:00:00 2001
+From: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+Date: Fri, 25 Jan 2008 18:14:10 +0100
+Subject: [PATCH] IB/ehca: Add PMA support
+This patch enables ehca to redirect any PMA queries to the
+actual PMA QP.
+
+Signed-off-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+Reviewed-by: Joachim Fenkes <fenkes at de.ibm.com>
+Reviewed-by: Christoph Raisch <raisch at de.ibm.com>
+---
+ drivers/infiniband/hw/ehca/ehca_classes.h | 1 +
+ drivers/infiniband/hw/ehca/ehca_iverbs.h | 5 ++
+ drivers/infiniband/hw/ehca/ehca_main.c | 2 +-
+ drivers/infiniband/hw/ehca/ehca_sqp.c | 91 +++++++++++++++++++++++++++++
+ 4 files changed, 98 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
+index f281d16..92cce8a 100644
+--- a/drivers/infiniband/hw/ehca/ehca_classes.h
++++ b/drivers/infiniband/hw/ehca/ehca_classes.h
+@@ -101,6 +101,7 @@ struct ehca_sport {
+ spinlock_t mod_sqp_lock;
+ enum ib_port_state port_state;
+ struct ehca_sma_attr saved_attr;
++ u32 pma_qp_nr;
+ };
+
+ #define HCA_CAP_MR_PGSIZE_4K 0x80000000
+diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
+index c469bfd..a8a2ea5 100644
+--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
++++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
+@@ -187,6 +187,11 @@ int ehca_dealloc_ucontext(struct ib_ucontext *context);
+
+ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
++int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
++ struct ib_wc *in_wc, struct ib_grh *in_grh,
++ struct ib_mad *in_mad,
++ struct ib_mad *out_mad);
++
+ void ehca_poll_eqs(unsigned long data);
+
+ int ehca_calc_ipd(struct ehca_shca *shca, int port,
+diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
+index 0fe0c84..33b5bac 100644
+--- a/drivers/infiniband/hw/ehca/ehca_main.c
++++ b/drivers/infiniband/hw/ehca/ehca_main.c
+@@ -472,7 +472,7 @@ int ehca_init_device(struct ehca_shca *shca)
+ shca->ib_device.dealloc_fmr = ehca_dealloc_fmr;
+ shca->ib_device.attach_mcast = ehca_attach_mcast;
+ shca->ib_device.detach_mcast = ehca_detach_mcast;
+- /* shca->ib_device.process_mad = ehca_process_mad; */
++ shca->ib_device.process_mad = ehca_process_mad;
+ shca->ib_device.mmap = ehca_mmap;
+
+ if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
+diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
+index 79e72b2..706d97a 100644
+--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
++++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
+@@ -39,12 +39,18 @@
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
++#include <rdma/ib_mad.h>
+
+ #include "ehca_classes.h"
+ #include "ehca_tools.h"
+ #include "ehca_iverbs.h"
+ #include "hcp_if.h"
+
++#define IB_MAD_STATUS_REDIRECT __constant_htons(0x0002)
++#define IB_MAD_STATUS_UNSUP_VERSION __constant_htons(0x0004)
++#define IB_MAD_STATUS_UNSUP_METHOD __constant_htons(0x0008)
++
++#define IB_PMA_CLASS_PORT_INFO __constant_htons(0x0001)
+
+ /**
+ * ehca_define_sqp - Defines special queue pair 1 (GSI QP). When special queue
+@@ -83,6 +89,9 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
+ port, ret);
+ return ret;
+ }
++ shca->sport[port - 1].pma_qp_nr = pma_qp_nr;
++ ehca_dbg(&shca->ib_device, "port=%x pma_qp_nr=%x",
++ port, pma_qp_nr);
+ break;
+ default:
+ ehca_err(&shca->ib_device, "invalid qp_type=%x",
+@@ -109,3 +118,85 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
+
+ return H_SUCCESS;
+ }
++
++struct ib_perf {
++ struct ib_mad_hdr mad_hdr;
++ u8 reserved[40];
++ u8 data[192];
++} __attribute__ ((packed));
++
++
++static int ehca_process_perf(struct ib_device *ibdev, u8 port_num,
++ struct ib_mad *in_mad, struct ib_mad *out_mad)
++{
++ struct ib_perf *in_perf = (struct ib_perf *)in_mad;
++ struct ib_perf *out_perf = (struct ib_perf *)out_mad;
++ struct ib_class_port_info *poi =
++ (struct ib_class_port_info *)out_perf->data;
++ struct ehca_shca *shca =
++ container_of(ibdev, struct ehca_shca, ib_device);
++ struct ehca_sport *sport = &shca->sport[port_num - 1];
++
++ ehca_dbg(ibdev, "method=%x", in_perf->mad_hdr.method);
++
++ *out_mad = *in_mad;
++
++ if (in_perf->mad_hdr.class_version != 1) {
++ ehca_warn(ibdev, "Unsupported class_version=%x",
++ in_perf->mad_hdr.class_version);
++ out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_VERSION;
++ goto perf_reply;
++ }
++
++ switch (in_perf->mad_hdr.method) {
++ case IB_MGMT_METHOD_GET:
++ case IB_MGMT_METHOD_SET:
++ /* set class port info for redirection */
++ out_perf->mad_hdr.attr_id = IB_PMA_CLASS_PORT_INFO;
++ out_perf->mad_hdr.status = IB_MAD_STATUS_REDIRECT;
++ memset(poi, 0, sizeof(*poi));
++ poi->base_version = 1;
++ poi->class_version = 1;
++ poi->resp_time_value = 18;
++ poi->redirect_lid = sport->saved_attr.lid;
++ poi->redirect_qp = sport->pma_qp_nr;
++ poi->redirect_qkey = IB_QP1_QKEY;
++ poi->redirect_pkey = IB_DEFAULT_PKEY_FULL;
++
++ ehca_dbg(ibdev, "ehca_pma_lid=%x ehca_pma_qp=%x",
++ sport->saved_attr.lid, sport->pma_qp_nr);
++ break;
++
++ case IB_MGMT_METHOD_GET_RESP:
++ return IB_MAD_RESULT_FAILURE;
++
++ default:
++ out_perf->mad_hdr.status = IB_MAD_STATUS_UNSUP_METHOD;
++ break;
++ }
++
++perf_reply:
++ out_perf->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
++
++ return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
++}
++
++int ehca_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
++ struct ib_wc *in_wc, struct ib_grh *in_grh,
++ struct ib_mad *in_mad,
++ struct ib_mad *out_mad)
++{
++ int ret;
++
++ if (!port_num || port_num > ibdev->phys_port_cnt)
++ return IB_MAD_RESULT_FAILURE;
++
++ /* accept only pma request */
++ if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
++ return IB_MAD_RESULT_SUCCESS;
++
++ ehca_dbg(ibdev, "port_num=%x src_qp=%x", port_num, in_wc->src_qp);
++ ret = ehca_process_perf(ibdev, port_num, in_mad, out_mad);
++
++ return ret;
++}
+--
+1.5.2
+
--
1.5.2
More information about the ewg
mailing list