[ofa-general] [PATCH 1/3] ofed-1.3-rc3 IB/ehca: upstream patches (2.6.25)

Hoang-Nam Nguyen hnguyen at linux.vnet.ibm.com
Wed Jan 23 09:46:35 PST 2008


IB/ehca: set of patches queued for 2.6.25 and needed for ofed-1.3 release

0001: Add missing spaces in the middle of format strings
0002: Forward event client-reregister-required to registered clients
0003: Use round_jiffies() for EQ polling timer
0004: Remove CQ-QP-link before destroying QP in error path of create_qp()
0005: Define array to store SMI/GSI QPs
0006: Add "port connection autodetect mode"
0007: Prevent RDMA-related connection failures on some eHCA2 hardware

Signed-off-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
---
 ...dd_missing_spaces_in_the_middle_of_format.patch |   59 +++
 ..._Forward_event_client_reregister_required.patch |   55 +++
 ...03_Use_round_jiffies_for_EQ_polling_timer.patch |   31 ++
 ...04_Remove_CQ_QP_link_before_destroying_QP.patch |   35 ++
 ...ca_0005_Define_array_to_store_SMI_GSI_QPs.patch |   59 +++
 ..._0006_Add_port_connection_autodetect_mode.patch |  472 ++++++++++++++++++++
 ..._Prevent_RDMA_related_connection_failures.patch |  276 ++++++++++++
 7 files changed, 987 insertions(+), 0 deletions(-)
 create mode 100644 kernel_patches/fixes/ehca_0001_Add_missing_spaces_in_the_middle_of_format.patch
 create mode 100644 kernel_patches/fixes/ehca_0002_Forward_event_client_reregister_required.patch
 create mode 100644 kernel_patches/fixes/ehca_0003_Use_round_jiffies_for_EQ_polling_timer.patch
 create mode 100644 kernel_patches/fixes/ehca_0004_Remove_CQ_QP_link_before_destroying_QP.patch
 create mode 100644 kernel_patches/fixes/ehca_0005_Define_array_to_store_SMI_GSI_QPs.patch
 create mode 100644 kernel_patches/fixes/ehca_0006_Add_port_connection_autodetect_mode.patch
 create mode 100644 kernel_patches/fixes/ehca_0007_Prevent_RDMA_related_connection_failures.patch

diff --git a/kernel_patches/fixes/ehca_0001_Add_missing_spaces_in_the_middle_of_format.patch b/kernel_patches/fixes/ehca_0001_Add_missing_spaces_in_the_middle_of_format.patch
new file mode 100644
index 0000000..ca13dd6
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0001_Add_missing_spaces_in_the_middle_of_format.patch
@@ -0,0 +1,59 @@
+From 41c38ba27fb89140311cfa0b1258b1ccc88eea7b Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:15:17 +0100
+Subject: [PATCH] IB/ehca: Add missing spaces in the middle of format strings.
+
+Signed-off-by: Joe Perches <joe at perches.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_cq.c |    2 +-
+ drivers/infiniband/hw/ehca/ehca_qp.c |    6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_cq.c b/drivers/infiniband/hw/ehca/ehca_cq.c
+index 79c25f5..0467c15 100644
+--- a/drivers/infiniband/hw/ehca/ehca_cq.c
++++ b/drivers/infiniband/hw/ehca/ehca_cq.c
+@@ -246,7 +246,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
+ 		} else {
+ 			if (h_ret != H_PAGE_REGISTERED) {
+ 				ehca_err(device, "Registration of page failed "
+-					 "ehca_cq=%p cq_num=%x h_ret=%li"
++					 "ehca_cq=%p cq_num=%x h_ret=%li "
+ 					 "counter=%i act_pages=%i",
+ 					 my_cq, my_cq->cq_number,
+ 					 h_ret, counter, param.act_pages);
+diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
+index dd12668..04e711f 100644
+--- a/drivers/infiniband/hw/ehca/ehca_qp.c
++++ b/drivers/infiniband/hw/ehca/ehca_qp.c
+@@ -858,7 +858,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ 				update_mask,
+ 				mqpcb, my_qp->galpas.kernel);
+ 	if (hret != H_SUCCESS) {
+-		ehca_err(pd->device, "Could not modify SRQ to INIT"
++		ehca_err(pd->device, "Could not modify SRQ to INIT "
+ 			 "ehca_qp=%p qp_num=%x h_ret=%li",
+ 			 my_qp, my_qp->real_qp_num, hret);
+ 		goto create_srq2;
+@@ -872,7 +872,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ 				update_mask,
+ 				mqpcb, my_qp->galpas.kernel);
+ 	if (hret != H_SUCCESS) {
+-		ehca_err(pd->device, "Could not enable SRQ"
++		ehca_err(pd->device, "Could not enable SRQ "
+ 			 "ehca_qp=%p qp_num=%x h_ret=%li",
+ 			 my_qp, my_qp->real_qp_num, hret);
+ 		goto create_srq2;
+@@ -886,7 +886,7 @@ struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+ 				update_mask,
+ 				mqpcb, my_qp->galpas.kernel);
+ 	if (hret != H_SUCCESS) {
+-		ehca_err(pd->device, "Could not modify SRQ to RTR"
++		ehca_err(pd->device, "Could not modify SRQ to RTR "
+ 			 "ehca_qp=%p qp_num=%x h_ret=%li",
+ 			 my_qp, my_qp->real_qp_num, hret);
+ 		goto create_srq2;
+-- 
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0002_Forward_event_client_reregister_required.patch b/kernel_patches/fixes/ehca_0002_Forward_event_client_reregister_required.patch
new file mode 100644
index 0000000..04159e2
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0002_Forward_event_client_reregister_required.patch
@@ -0,0 +1,55 @@
+From afe2f1d8e50933645608932bcbba7dd81144a96c Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:19:03 +0100
+Subject: [PATCH] IB/ehca: Forward event client-reregister-required to registered clients
+
+This patch allows ehca to forward event client-reregister-required to
+registered clients.  One such event is generated by a switch eg. after
+its reboot.
+
+Signed-off-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_irq.c |   12 ++++++++++++
+ 1 files changed, 12 insertions(+), 0 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
+index 3f617b2..4c734ec 100644
+--- a/drivers/infiniband/hw/ehca/ehca_irq.c
++++ b/drivers/infiniband/hw/ehca/ehca_irq.c
+@@ -62,6 +62,7 @@
+ #define NEQE_PORT_NUMBER       EHCA_BMASK_IBM( 8, 15)
+ #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16, 16)
+ #define NEQE_DISRUPTIVE        EHCA_BMASK_IBM(16, 16)
++#define NEQE_SPECIFIC_EVENT    EHCA_BMASK_IBM(16, 23)
+ 
+ #define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52, 63)
+ #define ERROR_DATA_TYPE        EHCA_BMASK_IBM( 0,  7)
+@@ -354,6 +355,7 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
+ {
+ 	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+ 	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
++	u8 spec_event;
+ 
+ 	switch (ec) {
+ 	case 0x30: /* port availability change */
+@@ -394,6 +396,16 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
+ 	case 0x33:  /* trace stopped */
+ 		ehca_err(&shca->ib_device, "Traced stopped.");
+ 		break;
++	case 0x34: /* util async event */
++		spec_event = EHCA_BMASK_GET(NEQE_SPECIFIC_EVENT, eqe);
++		if (spec_event == 0x80) /* client reregister required */
++			dispatch_port_event(shca, port,
++					    IB_EVENT_CLIENT_REREGISTER,
++					    "client reregister req.");
++		else
++			ehca_warn(&shca->ib_device, "Unknown util async "
++				  "event %x on port %x", spec_event, port);
++		break;
+ 	default:
+ 		ehca_err(&shca->ib_device, "Unknown event code: %x on %s.",
+ 			 ec, shca->ib_device.name);
+-- 
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0003_Use_round_jiffies_for_EQ_polling_timer.patch b/kernel_patches/fixes/ehca_0003_Use_round_jiffies_for_EQ_polling_timer.patch
new file mode 100644
index 0000000..5fb3691
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0003_Use_round_jiffies_for_EQ_polling_timer.patch
@@ -0,0 +1,31 @@
+From a8ed1e3c557c23e60a5bf4b2fe027f8453a255f4 Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:20:37 +0100
+Subject: [PATCH] IB/ehca: Use round_jiffies() for EQ polling timer
+
+Use round_jiffies() to align ehca's 1-second timer with other timers
+and potentially save power by sleeping cores for longer.
+
+Signed-off-by: Anton Blanchard <anton at samba.org>
+Acked-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_main.c |    2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
+index 90d4334..63d2de3 100644
+--- a/drivers/infiniband/hw/ehca/ehca_main.c
++++ b/drivers/infiniband/hw/ehca/ehca_main.c
+@@ -913,7 +913,7 @@ void ehca_poll_eqs(unsigned long data)
+ 				ehca_process_eq(shca, 0);
+ 		}
+ 	}
+-	mod_timer(&poll_eqs_timer, jiffies + HZ);
++	mod_timer(&poll_eqs_timer, round_jiffies(jiffies + HZ));
+ 	spin_unlock(&shca_list_lock);
+ }
+ 
+-- 
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0004_Remove_CQ_QP_link_before_destroying_QP.patch b/kernel_patches/fixes/ehca_0004_Remove_CQ_QP_link_before_destroying_QP.patch
new file mode 100644
index 0000000..31c9aa8
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0004_Remove_CQ_QP_link_before_destroying_QP.patch
@@ -0,0 +1,35 @@
+From ebc0988b682e1fcc8d456b7e16ca94a02ced7e6a Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:21:24 +0100
+Subject: [PATCH] IB/ehca: Remove CQ-QP-link before destroying QP in error path of create_qp()
+
+Signed-off-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_qp.c |    5 ++++-
+ 1 files changed, 4 insertions(+), 1 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
+index 04e711f..db910bc 100644
+--- a/drivers/infiniband/hw/ehca/ehca_qp.c
++++ b/drivers/infiniband/hw/ehca/ehca_qp.c
+@@ -769,12 +769,15 @@ static struct ehca_qp *internal_create_qp(
+ 		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+ 			ehca_err(pd->device, "Copy to udata failed");
+ 			ret = -EINVAL;
+-			goto create_qp_exit4;
++			goto create_qp_exit5;
+ 		}
+ 	}
+ 
+ 	return my_qp;
+ 
++create_qp_exit5:
++	ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
++
+ create_qp_exit4:
+ 	if (HAS_RQ(my_qp))
+ 		ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+-- 
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0005_Define_array_to_store_SMI_GSI_QPs.patch b/kernel_patches/fixes/ehca_0005_Define_array_to_store_SMI_GSI_QPs.patch
new file mode 100644
index 0000000..23777de
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0005_Define_array_to_store_SMI_GSI_QPs.patch
@@ -0,0 +1,59 @@
+From 52bdcb2961257e1b1d4564a33c28ed4876453fb3 Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:25:59 +0100
+Subject: [PATCH] IB/ehca: Define array to store SMI/GSI QPs
+
+Signed-off-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_classes.h |    2 +-
+ drivers/infiniband/hw/ehca/ehca_main.c    |    6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
+index 87f12d4..5c6d3fa 100644
+--- a/drivers/infiniband/hw/ehca/ehca_classes.h
++++ b/drivers/infiniband/hw/ehca/ehca_classes.h
+@@ -94,7 +94,7 @@ struct ehca_sma_attr {
+ 
+ struct ehca_sport {
+ 	struct ib_cq *ibcq_aqp1;
+-	struct ib_qp *ibqp_aqp1;
++	struct ib_qp *ibqp_sqp[2];
+ 	enum ib_port_state port_state;
+ 	struct ehca_sma_attr saved_attr;
+ };
+diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
+index 63d2de3..18122c1 100644
+--- a/drivers/infiniband/hw/ehca/ehca_main.c
++++ b/drivers/infiniband/hw/ehca/ehca_main.c
+@@ -498,7 +498,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
+ 	}
+ 	sport->ibcq_aqp1 = ibcq;
+ 
+-	if (sport->ibqp_aqp1) {
++	if (sport->ibqp_sqp[IB_QPT_GSI]) {
+ 		ehca_err(&shca->ib_device, "AQP1 QP is already created.");
+ 		ret = -EPERM;
+ 		goto create_aqp1;
+@@ -524,7 +524,7 @@ static int ehca_create_aqp1(struct ehca_shca *shca, u32 port)
+ 		ret = PTR_ERR(ibqp);
+ 		goto create_aqp1;
+ 	}
+-	sport->ibqp_aqp1 = ibqp;
++	sport->ibqp_sqp[IB_QPT_GSI] = ibqp;
+ 
+ 	return 0;
+ 
+@@ -537,7 +537,7 @@ static int ehca_destroy_aqp1(struct ehca_sport *sport)
+ {
+ 	int ret;
+ 
+-	ret = ib_destroy_qp(sport->ibqp_aqp1);
++	ret = ib_destroy_qp(sport->ibqp_sqp[IB_QPT_GSI]);
+ 	if (ret) {
+ 		ehca_gen_err("Cannot destroy AQP1 QP. ret=%i", ret);
+ 		return ret;
+-- 
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0006_Add_port_connection_autodetect_mode.patch b/kernel_patches/fixes/ehca_0006_Add_port_connection_autodetect_mode.patch
new file mode 100644
index 0000000..69bf0ab
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0006_Add_port_connection_autodetect_mode.patch
@@ -0,0 +1,472 @@
+From f75184dd4ac5ac23e05e888bccafbf3d633455fa Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:27:10 +0100
+Subject: [PATCH] IB/ehca: Add "port connection autodetect mode"
+
+This patch enhances ehca with a capability to "autodetect" the ports
+being connected physically. In order to utilize that function the
+module option nr_ports must be set to -1 (default is 2 - two
+ports). This feature is experimental and will made the default later.
+
+More detail:
+
+If the user connects only one port to the switch, current code requires
+  1) port one to be connected and
+  2) module option nr_ports=1 to be given.
+
+If autodetect is enabled, ehca will not wait at creation of the GSI QP
+for the respective port to become active. Since firmware does not
+accept modify_qp() while the port is down at initialization, we need
+to cache all calls to modify_qp() for the SMI/GSI QP and just return a
+good return code.
+
+When a port is activated and we get a PORT_ACTIVE event, we replay the
+cached modify-qp() parms and re-trigger any posted recv WRs. Only then
+do we forward the PORT_ACTIVE event to registered clients.
+
+The result of this autodetect patch is that all ports will be
+accessible by the users. Depending on their respective cabling only
+those ports that are connected properly will become operable. If a
+user tries to modify a regular QP of a non-connected port, modify_qp()
+will fail. Furthermore, ibv_devinfo should show the port state
+accordingly.
+
+Note that this patch primarily improves the loading behaviour of
+ehca. If the cable is removed while the driver is operating and
+plugged in again, firmware will handle that properly by sending an
+appropriate async event.
+
+Signed-off-by: Hoang-Nam Nguyen <hnguyen at de.ibm.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_classes.h |   16 +++
+ drivers/infiniband/hw/ehca/ehca_irq.c     |   26 ++++-
+ drivers/infiniband/hw/ehca/ehca_iverbs.h  |    2 +
+ drivers/infiniband/hw/ehca/ehca_main.c    |    7 +-
+ drivers/infiniband/hw/ehca/ehca_qp.c      |  159 ++++++++++++++++++++++++++++-
+ drivers/infiniband/hw/ehca/ehca_sqp.c     |    6 +-
+ 6 files changed, 201 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
+index 5c6d3fa..997c3d1 100644
+--- a/drivers/infiniband/hw/ehca/ehca_classes.h
++++ b/drivers/infiniband/hw/ehca/ehca_classes.h
+@@ -95,6 +95,10 @@ struct ehca_sma_attr {
+ struct ehca_sport {
+ 	struct ib_cq *ibcq_aqp1;
+ 	struct ib_qp *ibqp_sqp[2];
++	/* lock to serialze modify_qp() calls for sqp in normal
++	 * and irq path (when event PORT_ACTIVE is received first time)
++	 */
++	spinlock_t mod_sqp_lock;
+ 	enum ib_port_state port_state;
+ 	struct ehca_sma_attr saved_attr;
+ };
+@@ -141,6 +145,14 @@ enum ehca_ext_qp_type {
+ 	EQPT_SRQ       = 3,
+ };
+ 
++/* struct to cache modify_qp()'s parms for GSI/SMI qp */
++struct ehca_mod_qp_parm {
++	int mask;
++	struct ib_qp_attr attr;
++};
++
++#define EHCA_MOD_QP_PARM_MAX 4
++
+ struct ehca_qp {
+ 	union {
+ 		struct ib_qp ib_qp;
+@@ -164,6 +176,9 @@ struct ehca_qp {
+ 	struct ehca_cq *recv_cq;
+ 	unsigned int sqerr_purgeflag;
+ 	struct hlist_node list_entries;
++	/* array to cache modify_qp()'s parms for GSI/SMI qp */
++	struct ehca_mod_qp_parm *mod_qp_parm;
++	int mod_qp_parm_idx;
+ 	/* mmap counter for resources mapped into user space */
+ 	u32 mm_count_squeue;
+ 	u32 mm_count_rqueue;
+@@ -322,6 +337,7 @@ extern int ehca_static_rate;
+ extern int ehca_port_act_time;
+ extern int ehca_use_hp_mr;
+ extern int ehca_scaling_code;
++extern int ehca_nr_ports;
+ 
+ struct ipzu_queue_resp {
+ 	u32 qe_size;      /* queue entry size */
+diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
+index 4c734ec..863b34f 100644
+--- a/drivers/infiniband/hw/ehca/ehca_irq.c
++++ b/drivers/infiniband/hw/ehca/ehca_irq.c
+@@ -356,17 +356,33 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
+ 	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
+ 	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
+ 	u8 spec_event;
++	struct ehca_sport *sport = &shca->sport[port - 1];
++	unsigned long flags;
+ 
+ 	switch (ec) {
+ 	case 0x30: /* port availability change */
+ 		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
+-			shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
++			int suppress_event;
++			/* replay modify_qp for sqps */
++			spin_lock_irqsave(&sport->mod_sqp_lock, flags);
++			suppress_event = !sport->ibqp_sqp[IB_QPT_GSI];
++			if (sport->ibqp_sqp[IB_QPT_SMI])
++				ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_SMI]);
++			if (!suppress_event)
++				ehca_recover_sqp(sport->ibqp_sqp[IB_QPT_GSI]);
++			spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
++
++			/* AQP1 was destroyed, ignore this event */
++			if (suppress_event)
++				break;
++
++			sport->port_state = IB_PORT_ACTIVE;
+ 			dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+ 					    "is active");
+ 			ehca_query_sma_attr(shca, port,
+-					    &shca->sport[port - 1].saved_attr);
++					    &sport->saved_attr);
+ 		} else {
+-			shca->sport[port - 1].port_state = IB_PORT_DOWN;
++			sport->port_state = IB_PORT_DOWN;
+ 			dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+ 					    "is inactive");
+ 		}
+@@ -380,11 +396,11 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
+ 			ehca_warn(&shca->ib_device, "disruptive port "
+ 				  "%d configuration change", port);
+ 
+-			shca->sport[port - 1].port_state = IB_PORT_DOWN;
++			sport->port_state = IB_PORT_DOWN;
+ 			dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+ 					    "is inactive");
+ 
+-			shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
++			sport->port_state = IB_PORT_ACTIVE;
+ 			dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+ 					    "is active");
+ 		} else
+diff --git a/drivers/infiniband/hw/ehca/ehca_iverbs.h b/drivers/infiniband/hw/ehca/ehca_iverbs.h
+index 5485799..c469bfd 100644
+--- a/drivers/infiniband/hw/ehca/ehca_iverbs.h
++++ b/drivers/infiniband/hw/ehca/ehca_iverbs.h
+@@ -200,4 +200,6 @@ void ehca_free_fw_ctrlblock(void *ptr);
+ #define ehca_free_fw_ctrlblock(ptr) free_page((unsigned long)(ptr))
+ #endif
+ 
++void ehca_recover_sqp(struct ib_qp *sqp);
++
+ #endif
+diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
+index 18122c1..6d247ad 100644
+--- a/drivers/infiniband/hw/ehca/ehca_main.c
++++ b/drivers/infiniband/hw/ehca/ehca_main.c
+@@ -87,7 +87,8 @@ MODULE_PARM_DESC(hw_level,
+ 		 "hardware level"
+ 		 " (0: autosensing (default), 1: v. 0.20, 2: v. 0.21)");
+ MODULE_PARM_DESC(nr_ports,
+-		 "number of connected ports (default: 2)");
++		 "number of connected ports (-1: autodetect, 1: port one only, "
++		 "2: two ports (default)");
+ MODULE_PARM_DESC(use_hp_mr,
+ 		 "high performance MRs (0: no (default), 1: yes)");
+ MODULE_PARM_DESC(port_act_time,
+@@ -675,7 +676,7 @@ static int __devinit ehca_probe(struct of_device *dev,
+ 	struct ehca_shca *shca;
+ 	const u64 *handle;
+ 	struct ib_pd *ibpd;
+-	int ret;
++	int ret, i;
+ 
+ 	handle = of_get_property(dev->node, "ibm,hca-handle", NULL);
+ 	if (!handle) {
+@@ -696,6 +697,8 @@ static int __devinit ehca_probe(struct of_device *dev,
+ 		return -ENOMEM;
+ 	}
+ 	mutex_init(&shca->modify_mutex);
++	for (i = 0; i < ARRAY_SIZE(shca->sport); i++)
++		spin_lock_init(&shca->sport[i].mod_sqp_lock);
+ 
+ 	shca->ofdev = dev;
+ 	shca->ipz_hca_handle.handle = *handle;
+diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
+index db910bc..53310f0 100644
+--- a/drivers/infiniband/hw/ehca/ehca_qp.c
++++ b/drivers/infiniband/hw/ehca/ehca_qp.c
+@@ -729,12 +729,31 @@ static struct ehca_qp *internal_create_qp(
+ 	init_attr->cap.max_send_wr = parms.squeue.act_nr_wqes;
+ 	my_qp->init_attr = *init_attr;
+ 
++	if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
++		shca->sport[init_attr->port_num - 1].ibqp_sqp[qp_type] =
++			&my_qp->ib_qp;
++		if (ehca_nr_ports < 0) {
++			/* alloc array to cache subsequent modify qp parms
++			 * for autodetect mode
++			 */
++			my_qp->mod_qp_parm =
++				kzalloc(EHCA_MOD_QP_PARM_MAX *
++					sizeof(*my_qp->mod_qp_parm),
++					GFP_KERNEL);
++			if (!my_qp->mod_qp_parm) {
++				ehca_err(pd->device,
++					 "Could not alloc mod_qp_parm");
++				goto create_qp_exit4;
++			}
++		}
++	}
++
+ 	/* NOTE: define_apq0() not supported yet */
+ 	if (qp_type == IB_QPT_GSI) {
+ 		h_ret = ehca_define_sqp(shca, my_qp, init_attr);
+ 		if (h_ret != H_SUCCESS) {
+ 			ret = ehca2ib_return_code(h_ret);
+-			goto create_qp_exit4;
++			goto create_qp_exit5;
+ 		}
+ 	}
+ 
+@@ -743,7 +762,7 @@ static struct ehca_qp *internal_create_qp(
+ 		if (ret) {
+ 			ehca_err(pd->device,
+ 				 "Couldn't assign qp to send_cq ret=%i", ret);
+-			goto create_qp_exit4;
++			goto create_qp_exit5;
+ 		}
+ 	}
+ 
+@@ -769,15 +788,18 @@ static struct ehca_qp *internal_create_qp(
+ 		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
+ 			ehca_err(pd->device, "Copy to udata failed");
+ 			ret = -EINVAL;
+-			goto create_qp_exit5;
++			goto create_qp_exit6;
+ 		}
+ 	}
+ 
+ 	return my_qp;
+ 
+-create_qp_exit5:
++create_qp_exit6:
+ 	ehca_cq_unassign_qp(my_qp->send_cq, my_qp->real_qp_num);
+ 
++create_qp_exit5:
++	kfree(my_qp->mod_qp_parm);
++
+ create_qp_exit4:
+ 	if (HAS_RQ(my_qp))
+ 		ipz_queue_dtor(my_pd, &my_qp->ipz_rqueue);
+@@ -995,7 +1017,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
+ 	unsigned long flags = 0;
+ 
+ 	/* do query_qp to obtain current attr values */
+-	mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
++	mqpcb = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
+ 	if (!mqpcb) {
+ 		ehca_err(ibqp->device, "Could not get zeroed page for mqpcb "
+ 			 "ehca_qp=%p qp_num=%x ", my_qp, ibqp->qp_num);
+@@ -1183,6 +1205,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
+ 		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_P_KEY_IDX, 1);
+ 	}
+ 	if (attr_mask & IB_QP_PORT) {
++		struct ehca_sport *sport;
++		struct ehca_qp *aqp1;
+ 		if (attr->port_num < 1 || attr->port_num > shca->num_ports) {
+ 			ret = -EINVAL;
+ 			ehca_err(ibqp->device, "Invalid port=%x. "
+@@ -1191,6 +1215,29 @@ static int internal_modify_qp(struct ib_qp *ibqp,
+ 				 shca->num_ports);
+ 			goto modify_qp_exit2;
+ 		}
++		sport = &shca->sport[attr->port_num - 1];
++		if (!sport->ibqp_sqp[IB_QPT_GSI]) {
++			/* should not occur */
++			ret = -EFAULT;
++			ehca_err(ibqp->device, "AQP1 was not created for "
++				 "port=%x", attr->port_num);
++			goto modify_qp_exit2;
++		}
++		aqp1 = container_of(sport->ibqp_sqp[IB_QPT_GSI],
++				    struct ehca_qp, ib_qp);
++		if (ibqp->qp_type != IB_QPT_GSI &&
++		    ibqp->qp_type != IB_QPT_SMI &&
++		    aqp1->mod_qp_parm) {
++			/*
++			 * firmware will reject this modify_qp() because
++			 * port is not activated/initialized fully
++			 */
++			ret = -EFAULT;
++			ehca_warn(ibqp->device, "Couldn't modify qp port=%x: "
++				  "either port is being activated (try again) "
++				  "or cabling issue", attr->port_num);
++			goto modify_qp_exit2;
++		}
+ 		mqpcb->prim_phys_port = attr->port_num;
+ 		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PRIM_PHYS_PORT, 1);
+ 	}
+@@ -1470,6 +1517,8 @@ modify_qp_exit1:
+ int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ 		   struct ib_udata *udata)
+ {
++	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
++					      ib_device);
+ 	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
+ 	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+ 					     ib_pd);
+@@ -1482,9 +1531,100 @@ int ehca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
+ 		return -EINVAL;
+ 	}
+ 
++	/* The if-block below caches qp_attr to be modified for GSI and SMI
++	 * qps during the initialization by ib_mad. When the respective port
++	 * is activated, ie we got an event PORT_ACTIVE, we'll replay the
++	 * cached modify calls sequence, see ehca_recover_sqs() below.
++	 * Why that is required:
++	 * 1) If one port is connected, older code requires that port one
++	 *    to be connected and module option nr_ports=1 to be given by
++	 *    user, which is very inconvenient for end user.
++	 * 2) Firmware accepts modify_qp() only if respective port has become
++	 *    active. Older code had a wait loop of 30sec create_qp()/
++	 *    define_aqp1(), which is not appropriate in practice. This
++	 *    code now removes that wait loop, see define_aqp1(), and always
++	 *    reports all ports to ib_mad resp. users. Only activated ports
++	 *    will then usable for the users.
++	 */
++	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
++		int port = my_qp->init_attr.port_num;
++		struct ehca_sport *sport = &shca->sport[port - 1];
++		unsigned long flags;
++		spin_lock_irqsave(&sport->mod_sqp_lock, flags);
++		/* cache qp_attr only during init */
++		if (my_qp->mod_qp_parm) {
++			struct ehca_mod_qp_parm *p;
++			if (my_qp->mod_qp_parm_idx >= EHCA_MOD_QP_PARM_MAX) {
++				ehca_err(&shca->ib_device,
++					 "mod_qp_parm overflow state=%x port=%x"
++					 " type=%x", attr->qp_state,
++					 my_qp->init_attr.port_num,
++					 ibqp->qp_type);
++				spin_unlock_irqrestore(&sport->mod_sqp_lock,
++						       flags);
++				return -EINVAL;
++			}
++			p = &my_qp->mod_qp_parm[my_qp->mod_qp_parm_idx];
++			p->mask = attr_mask;
++			p->attr = *attr;
++			my_qp->mod_qp_parm_idx++;
++			ehca_dbg(&shca->ib_device,
++				 "Saved qp_attr for state=%x port=%x type=%x",
++				 attr->qp_state, my_qp->init_attr.port_num,
++				 ibqp->qp_type);
++			spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
++			return 0;
++		}
++		spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
++	}
++
+ 	return internal_modify_qp(ibqp, attr, attr_mask, 0);
+ }
+ 
++void ehca_recover_sqp(struct ib_qp *sqp)
++{
++	struct ehca_qp *my_sqp = container_of(sqp, struct ehca_qp, ib_qp);
++	int port = my_sqp->init_attr.port_num;
++	struct ib_qp_attr attr;
++	struct ehca_mod_qp_parm *qp_parm;
++	int i, qp_parm_idx, ret;
++	unsigned long flags, wr_cnt;
++
++	if (!my_sqp->mod_qp_parm)
++		return;
++	ehca_dbg(sqp->device, "SQP port=%x qp_num=%x", port, sqp->qp_num);
++
++	qp_parm = my_sqp->mod_qp_parm;
++	qp_parm_idx = my_sqp->mod_qp_parm_idx;
++	for (i = 0; i < qp_parm_idx; i++) {
++		attr = qp_parm[i].attr;
++		ret = internal_modify_qp(sqp, &attr, qp_parm[i].mask, 0);
++		if (ret) {
++			ehca_err(sqp->device, "Could not modify SQP port=%x "
++				 "qp_num=%x ret=%x", port, sqp->qp_num, ret);
++			goto free_qp_parm;
++		}
++		ehca_dbg(sqp->device, "SQP port=%x qp_num=%x in state=%x",
++			 port, sqp->qp_num, attr.qp_state);
++	}
++
++	/* re-trigger posted recv wrs */
++	wr_cnt =  my_sqp->ipz_rqueue.current_q_offset /
++		my_sqp->ipz_rqueue.qe_size;
++	if (wr_cnt) {
++		spin_lock_irqsave(&my_sqp->spinlock_r, flags);
++		hipz_update_rqa(my_sqp, wr_cnt);
++		spin_unlock_irqrestore(&my_sqp->spinlock_r, flags);
++		ehca_dbg(sqp->device, "doorbell port=%x qp_num=%x wr_cnt=%lx",
++			 port, sqp->qp_num, wr_cnt);
++	}
++
++free_qp_parm:
++	kfree(qp_parm);
++	/* this prevents subsequent calls to modify_qp() to cache qp_attr */
++	my_sqp->mod_qp_parm = NULL;
++}
++
+ int ehca_query_qp(struct ib_qp *qp,
+ 		  struct ib_qp_attr *qp_attr,
+ 		  int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr)
+@@ -1772,6 +1912,7 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+ 	struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
+ 	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
+ 					     ib_pd);
++	struct ehca_sport *sport = &shca->sport[my_qp->init_attr.port_num - 1];
+ 	u32 cur_pid = current->tgid;
+ 	u32 qp_num = my_qp->real_qp_num;
+ 	int ret;
+@@ -1818,6 +1959,14 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+ 	port_num = my_qp->init_attr.port_num;
+ 	qp_type  = my_qp->init_attr.qp_type;
+ 
++	if (qp_type == IB_QPT_SMI || qp_type == IB_QPT_GSI) {
++		spin_lock_irqsave(&sport->mod_sqp_lock, flags);
++		kfree(my_qp->mod_qp_parm);
++		my_qp->mod_qp_parm = NULL;
++		shca->sport[port_num - 1].ibqp_sqp[qp_type] = NULL;
++		spin_unlock_irqrestore(&sport->mod_sqp_lock, flags);
++	}
++
+ 	/* no support for IB_QPT_SMI yet */
+ 	if (qp_type == IB_QPT_GSI) {
+ 		struct ib_event event;
+diff --git a/drivers/infiniband/hw/ehca/ehca_sqp.c b/drivers/infiniband/hw/ehca/ehca_sqp.c
+index f0792e5..79e72b2 100644
+--- a/drivers/infiniband/hw/ehca/ehca_sqp.c
++++ b/drivers/infiniband/hw/ehca/ehca_sqp.c
+@@ -40,11 +40,8 @@
+  */
+ 
+ 
+-#include <linux/module.h>
+-#include <linux/err.h>
+ #include "ehca_classes.h"
+ #include "ehca_tools.h"
+-#include "ehca_qes.h"
+ #include "ehca_iverbs.h"
+ #include "hcp_if.h"
+ 
+@@ -93,6 +90,9 @@ u64 ehca_define_sqp(struct ehca_shca *shca,
+ 		return H_PARAMETER;
+ 	}
+ 
++	if (ehca_nr_ports < 0) /* autodetect mode */
++		return H_SUCCESS;
++
+ 	for (counter = 0;
+ 	     shca->sport[port - 1].port_state != IB_PORT_ACTIVE &&
+ 		     counter < ehca_port_act_time;
+-- 
+1.5.2
+
diff --git a/kernel_patches/fixes/ehca_0007_Prevent_RDMA_related_connection_failures.patch b/kernel_patches/fixes/ehca_0007_Prevent_RDMA_related_connection_failures.patch
new file mode 100644
index 0000000..a7744d2
--- /dev/null
+++ b/kernel_patches/fixes/ehca_0007_Prevent_RDMA_related_connection_failures.patch
@@ -0,0 +1,276 @@
+From a1f46cca6affc61b78050b85eec957b14fa6ea58 Mon Sep 17 00:00:00 2001
+From: root <root at dent.boeblingen.de.ibm.com>
+Date: Tue, 22 Jan 2008 16:27:52 +0100
+Subject: [PATCH] IB/ehca: Prevent RDMA-related connection failures on some eHCA2 hardware
+
+Some HW revisions of eHCA2 may cause an RC connection to break if they
+received RDMA Reads over that connection before.  This can be
+prevented by assuring that, after the first RDMA Read, the QP receives
+a new RDMA Read every few million link packets.
+
+Include code into the driver that inserts an empty (size 0) RDMA Read
+into the message stream every now and then if the consumer doesn't
+post them frequently enough.
+
+Signed-off-by: Joachim Fenkes <fenkes at de.ibm.com>
+Signed-off-by: Roland Dreier <rolandd at cisco.com>
+---
+ drivers/infiniband/hw/ehca/ehca_classes.h |    5 ++
+ drivers/infiniband/hw/ehca/ehca_qp.c      |   14 +++-
+ drivers/infiniband/hw/ehca/ehca_reqs.c    |  112 ++++++++++++++++++++--------
+ 3 files changed, 95 insertions(+), 36 deletions(-)
+
+diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
+index 997c3d1..8b76ac3 100644
+--- a/drivers/infiniband/hw/ehca/ehca_classes.h
++++ b/drivers/infiniband/hw/ehca/ehca_classes.h
+@@ -183,6 +183,11 @@ struct ehca_qp {
+ 	u32 mm_count_squeue;
+ 	u32 mm_count_rqueue;
+ 	u32 mm_count_galpa;
++	/* unsolicited ack circumvention */
++	int unsol_ack_circ;
++	int mtu_shift;
++	u32 message_count;
++	u32 packet_count;
+ };
+ 
+ #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
+diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
+index 53310f0..e32f964 100644
+--- a/drivers/infiniband/hw/ehca/ehca_qp.c
++++ b/drivers/infiniband/hw/ehca/ehca_qp.c
+@@ -592,10 +592,8 @@ static struct ehca_qp *internal_create_qp(
+ 		goto create_qp_exit1;
+ 	}
+ 
+-	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
+-		parms.sigtype = HCALL_SIGT_EVERY;
+-	else
+-		parms.sigtype = HCALL_SIGT_BY_WQE;
++	/* Always signal by WQE so we can hide circ. WQEs */
++	parms.sigtype = HCALL_SIGT_BY_WQE;
+ 
+ 	/* UD_AV CIRCUMVENTION */
+ 	max_send_sge = init_attr->cap.max_send_sge;
+@@ -618,6 +616,10 @@ static struct ehca_qp *internal_create_qp(
+ 	parms.squeue.max_sge = max_send_sge;
+ 	parms.rqueue.max_sge = max_recv_sge;
+ 
++	/* RC QPs need one more SWQE for unsolicited ack circumvention */
++	if (qp_type == IB_QPT_RC)
++		parms.squeue.max_wr++;
++
+ 	if (EHCA_BMASK_GET(HCA_CAP_MINI_QP, shca->hca_cap)) {
+ 		if (HAS_SQ(my_qp))
+ 			ehca_determine_small_queue(
+@@ -650,6 +652,8 @@ static struct ehca_qp *internal_create_qp(
+ 			parms.squeue.act_nr_sges = 1;
+ 			parms.rqueue.act_nr_sges = 1;
+ 		}
++		/* hide the extra WQE */
++		parms.squeue.act_nr_wqes--;
+ 		break;
+ 	case IB_QPT_UD:
+ 	case IB_QPT_GSI:
+@@ -1294,6 +1298,8 @@ static int internal_modify_qp(struct ib_qp *ibqp,
+ 	}
+ 
+ 	if (attr_mask & IB_QP_PATH_MTU) {
++		/* store ld(MTU) */
++		my_qp->mtu_shift = attr->path_mtu + 7;
+ 		mqpcb->path_mtu = attr->path_mtu;
+ 		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_PATH_MTU, 1);
+ 	}
+diff --git a/drivers/infiniband/hw/ehca/ehca_reqs.c b/drivers/infiniband/hw/ehca/ehca_reqs.c
+index ea91360..3aacc8c 100644
+--- a/drivers/infiniband/hw/ehca/ehca_reqs.c
++++ b/drivers/infiniband/hw/ehca/ehca_reqs.c
+@@ -50,6 +50,9 @@
+ #include "hcp_if.h"
+ #include "hipz_fns.h"
+ 
++/* in RC traffic, insert an empty RDMA READ every this many packets */
++#define ACK_CIRC_THRESHOLD 2000000
++
+ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
+ 				  struct ehca_wqe *wqe_p,
+ 				  struct ib_recv_wr *recv_wr)
+@@ -81,7 +84,7 @@ static inline int ehca_write_rwqe(struct ipz_queue *ipz_rqueue,
+ 	if (ehca_debug_level) {
+ 		ehca_gen_dbg("RECEIVE WQE written into ipz_rqueue=%p",
+ 			     ipz_rqueue);
+-		ehca_dmp( wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
++		ehca_dmp(wqe_p, 16*(6 + wqe_p->nr_of_data_seg), "recv wqe");
+ 	}
+ 
+ 	return 0;
+@@ -135,7 +138,8 @@ static void trace_send_wr_ud(const struct ib_send_wr *send_wr)
+ 
+ static inline int ehca_write_swqe(struct ehca_qp *qp,
+ 				  struct ehca_wqe *wqe_p,
+-				  const struct ib_send_wr *send_wr)
++				  const struct ib_send_wr *send_wr,
++				  int hidden)
+ {
+ 	u32 idx;
+ 	u64 dma_length;
+@@ -176,7 +180,9 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
+ 
+ 	wqe_p->wr_flag = 0;
+ 
+-	if (send_wr->send_flags & IB_SEND_SIGNALED)
++	if ((send_wr->send_flags & IB_SEND_SIGNALED ||
++	    qp->init_attr.sq_sig_type == IB_SIGNAL_ALL_WR)
++	    && !hidden)
+ 		wqe_p->wr_flag |= WQE_WRFLAG_REQ_SIGNAL_COM;
+ 
+ 	if (send_wr->opcode == IB_WR_SEND_WITH_IMM ||
+@@ -199,7 +205,7 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
+ 
+ 		wqe_p->destination_qp_number = send_wr->wr.ud.remote_qpn << 8;
+ 		wqe_p->local_ee_context_qkey = remote_qkey;
+-		if (!send_wr->wr.ud.ah) {
++		if (unlikely(!send_wr->wr.ud.ah)) {
+ 			ehca_gen_err("wr.ud.ah is NULL. qp=%p", qp);
+ 			return -EINVAL;
+ 		}
+@@ -255,6 +261,15 @@ static inline int ehca_write_swqe(struct ehca_qp *qp,
+ 		} /* eof idx */
+ 		wqe_p->u.nud.atomic_1st_op_dma_len = dma_length;
+ 
++		/* unsolicited ack circumvention */
++		if (send_wr->opcode == IB_WR_RDMA_READ) {
++			/* on RDMA read, switch on and reset counters */
++			qp->message_count = qp->packet_count = 0;
++			qp->unsol_ack_circ = 1;
++		} else
++			/* else estimate #packets */
++			qp->packet_count += (dma_length >> qp->mtu_shift) + 1;
++
+ 		break;
+ 
+ 	default:
+@@ -355,13 +370,49 @@ static inline void map_ib_wc_status(u32 cqe_status,
+ 		*wc_status = IB_WC_SUCCESS;
+ }
+ 
++static inline int post_one_send(struct ehca_qp *my_qp,
++			 struct ib_send_wr *cur_send_wr,
++			 struct ib_send_wr **bad_send_wr,
++			 int hidden)
++{
++	struct ehca_wqe *wqe_p;
++	int ret;
++	u64 start_offset = my_qp->ipz_squeue.current_q_offset;
++
++	/* get pointer next to free WQE */
++	wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
++	if (unlikely(!wqe_p)) {
++		/* too many posted work requests: queue overflow */
++		if (bad_send_wr)
++			*bad_send_wr = cur_send_wr;
++		ehca_err(my_qp->ib_qp.device, "Too many posted WQEs "
++			 "qp_num=%x", my_qp->ib_qp.qp_num);
++		return -ENOMEM;
++	}
++	/* write a SEND WQE into the QUEUE */
++	ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr, hidden);
++	/*
++	 * if something failed,
++	 * reset the free entry pointer to the start value
++	 */
++	if (unlikely(ret)) {
++		my_qp->ipz_squeue.current_q_offset = start_offset;
++		if (bad_send_wr)
++			*bad_send_wr = cur_send_wr;
++		ehca_err(my_qp->ib_qp.device, "Could not write WQE "
++			 "qp_num=%x", my_qp->ib_qp.qp_num);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++
+ int ehca_post_send(struct ib_qp *qp,
+ 		   struct ib_send_wr *send_wr,
+ 		   struct ib_send_wr **bad_send_wr)
+ {
+ 	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
+ 	struct ib_send_wr *cur_send_wr;
+-	struct ehca_wqe *wqe_p;
+ 	int wqe_cnt = 0;
+ 	int ret = 0;
+ 	unsigned long flags;
+@@ -369,37 +420,33 @@ int ehca_post_send(struct ib_qp *qp,
+ 	/* LOCK the QUEUE */
+ 	spin_lock_irqsave(&my_qp->spinlock_s, flags);
+ 
++	/* Send an empty extra RDMA read if:
++	 *  1) there has been an RDMA read on this connection before
++	 *  2) no RDMA read occurred for ACK_CIRC_THRESHOLD link packets
++	 *  3) we can be sure that any previous extra RDMA read has been
++	 *     processed so we don't overflow the SQ
++	 */
++	if (unlikely(my_qp->unsol_ack_circ &&
++		     my_qp->packet_count > ACK_CIRC_THRESHOLD &&
++		     my_qp->message_count > my_qp->init_attr.cap.max_send_wr)) {
++		/* insert an empty RDMA READ to fix up the remote QP state */
++		struct ib_send_wr circ_wr;
++		memset(&circ_wr, 0, sizeof(circ_wr));
++		circ_wr.opcode = IB_WR_RDMA_READ;
++		post_one_send(my_qp, &circ_wr, NULL, 1); /* ignore retcode */
++		wqe_cnt++;
++		ehca_dbg(qp->device, "posted circ wr  qp_num=%x", qp->qp_num);
++		my_qp->message_count = my_qp->packet_count = 0;
++	}
++
+ 	/* loop processes list of send reqs */
+ 	for (cur_send_wr = send_wr; cur_send_wr != NULL;
+ 	     cur_send_wr = cur_send_wr->next) {
+-		u64 start_offset = my_qp->ipz_squeue.current_q_offset;
+-		/* get pointer next to free WQE */
+-		wqe_p = ipz_qeit_get_inc(&my_qp->ipz_squeue);
+-		if (unlikely(!wqe_p)) {
+-			/* too many posted work requests: queue overflow */
+-			if (bad_send_wr)
+-				*bad_send_wr = cur_send_wr;
+-			if (wqe_cnt == 0) {
+-				ret = -ENOMEM;
+-				ehca_err(qp->device, "Too many posted WQEs "
+-					 "qp_num=%x", qp->qp_num);
+-			}
+-			goto post_send_exit0;
+-		}
+-		/* write a SEND WQE into the QUEUE */
+-		ret = ehca_write_swqe(my_qp, wqe_p, cur_send_wr);
+-		/*
+-		 * if something failed,
+-		 * reset the free entry pointer to the start value
+-		 */
++		ret = post_one_send(my_qp, cur_send_wr, bad_send_wr, 0);
+ 		if (unlikely(ret)) {
+-			my_qp->ipz_squeue.current_q_offset = start_offset;
+-			*bad_send_wr = cur_send_wr;
+-			if (wqe_cnt == 0) {
+-				ret = -EINVAL;
+-				ehca_err(qp->device, "Could not write WQE "
+-					 "qp_num=%x", qp->qp_num);
+-			}
++			/* if one or more WQEs were successful, don't fail */
++			if (wqe_cnt)
++				ret = 0;
+ 			goto post_send_exit0;
+ 		}
+ 		wqe_cnt++;
+@@ -410,6 +457,7 @@ int ehca_post_send(struct ib_qp *qp,
+ post_send_exit0:
+ 	iosync(); /* serialize GAL register access */
+ 	hipz_update_sqa(my_qp, wqe_cnt);
++	my_qp->message_count += wqe_cnt;
+ 	spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
+ 	return ret;
+ }
+-- 
+1.5.2
+
-- 
1.5.2






More information about the general mailing list