[ofa-general] [PATCH 06/10] IB/ipath: Head of Line blocking vs forward progress of user apps

Ralph Campbell ralph.campbell at qlogic.com
Thu Mar 20 17:18:06 PDT 2008


From: John Gregor <<john.gregor at qlogic.com>>

There's a conflict between our need to quiesce PSM-based applications
to avoid HoL blocking when the IB link goes down and the apps' desire
to remain running so that their quiescence timout mechanism can keep
running.

The compromise is to STOP the processes for a fixed period of time
and then alternate between CONT and STOP until the link is again active.

If there are poor interactions with subnet manager configuration
at a given site, the interval can be adjusted via a module paramter.

Signed-off-by: John Gregor <john.gregor at qlogic.com>
---

 drivers/infiniband/hw/ipath/ipath_debug.h     |    1 
 drivers/infiniband/hw/ipath/ipath_diag.c      |   19 +-
 drivers/infiniband/hw/ipath/ipath_driver.c    |  129 +++++++++++
 drivers/infiniband/hw/ipath/ipath_init_chip.c |    6 +
 drivers/infiniband/hw/ipath/ipath_intr.c      |  287 ++++++++++++-------------
 drivers/infiniband/hw/ipath/ipath_kernel.h    |   32 +++
 drivers/infiniband/hw/ipath/ipath_registers.h |   20 --
 7 files changed, 312 insertions(+), 182 deletions(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_debug.h b/drivers/infiniband/hw/ipath/ipath_debug.h
index d6f6953..7170bd2 100644
--- a/drivers/infiniband/hw/ipath/ipath_debug.h
+++ b/drivers/infiniband/hw/ipath/ipath_debug.h
@@ -66,6 +66,7 @@
 #define __IPATH_IPATHERR    0x40000	/* Ethernet (IPATH) errors */
 #define __IPATH_IPATHPD     0x80000	/* Ethernet (IPATH) packet dump */
 #define __IPATH_IPATHTABLE  0x100000	/* Ethernet (IPATH) table dump */
+#define __IPATH_LINKVERBDBG 0x200000	/* very verbose linkchange debug */
 
 #else				/* _IPATH_DEBUGGING */
 
diff --git a/drivers/infiniband/hw/ipath/ipath_diag.c b/drivers/infiniband/hw/ipath/ipath_diag.c
index 4137c77..4ee9479 100644
--- a/drivers/infiniband/hw/ipath/ipath_diag.c
+++ b/drivers/infiniband/hw/ipath/ipath_diag.c
@@ -330,6 +330,7 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
 	struct ipath_devdata *dd;
 	ssize_t ret = 0;
 	u64 val;
+	u32 l_state, lt_state; /* LinkState, LinkTrainingState */
 
 	if (count != sizeof(dp)) {
 		ret = -EINVAL;
@@ -396,10 +397,17 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
 		ret = -ENODEV;
 		goto bail;
 	}
-	/* Check link state, but not if we have custom PBC */
-	val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
-	if (!dp.pbc_wd && val != IPATH_IBSTATE_INIT &&
-		val != IPATH_IBSTATE_ARM && val != IPATH_IBSTATE_ACTIVE) {
+	/*
+	 * Want to skip check for l_state if using custom PBC,
+	 * because we might be trying to force an SM packet out.
+	 * first-cut, skip _all_ state checking in that case.
+	 */
+	val = ipath_ib_state(dd, dd->ipath_lastibcstat);
+	lt_state = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
+	l_state = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
+	if (!dp.pbc_wd && (lt_state != INFINIPATH_IBCS_LT_STATE_LINKUP ||
+	    (val != dd->ib_init && val != dd->ib_arm &&
+	    val != dd->ib_active))) {
 		ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
 			   dd->ipath_unit, (unsigned long long) val);
 		ret = -EINVAL;
@@ -438,6 +446,9 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
 		ret = -EBUSY;
 		goto bail;
 	}
+	/* disarm it just to be extra sure */
+	ipath_disarm_piobufs(dd, pbufn, 1);
+
 
 	plen >>= 2;		/* in dwords */
 
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 7121fe8..5579195 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -77,6 +77,11 @@ unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
 module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
 MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
 
+static unsigned ipath_hol_timeout_ms = 13000;
+module_param_named(hol_timeout_ms, ipath_hol_timeout_ms, uint, S_IRUGO);
+MODULE_PARM_DESC(hol_timeout_ms,
+	"duration of user app suspension after link failure");
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("QLogic <support at pathscale.com>");
 MODULE_DESCRIPTION("QLogic InfiniPath driver");
@@ -1670,11 +1675,8 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 	ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
 		   "is %s\n", dd->ipath_unit,
 		   what[linkcmd],
-		   ipath_ibcstatus_str[
-			   (ipath_read_kreg64
-			    (dd, dd->ipath_kregs->kr_ibcstatus) >>
-			    INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-			   INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
+		   ipath_ibcstatus_str[ipath_ib_linktrstate(dd,
+			ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus))]);
 	/* flush all queued sends when going to DOWN to be sure that
 	 * they don't block MAD packets */
 	if (linkcmd == INFINIPATH_IBCC_LINKCMD_DOWN)
@@ -1925,9 +1927,8 @@ static void ipath_run_led_override(unsigned long opaque)
 	 */
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
 	ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-		  INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
-	lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
-		 INFINIPATH_IBCS_LINKSTATE_MASK;
+		  dd->ibcs_lts_mask;
+	lstate = (val >> dd->ibcs_ls_shift) & INFINIPATH_IBCS_LINKSTATE_MASK;
 
 	dd->ipath_f_setextled(dd, lstate, ltstate);
 	mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
@@ -1988,6 +1989,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
 
 	ipath_dbg("Shutting down the device\n");
 
+	ipath_hol_up(dd); /* make sure user processes aren't suspended */
+
 	dd->ipath_flags |= IPATH_LINKUNK;
 	dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
 			     IPATH_LINKINIT | IPATH_LINKARMED |
@@ -2037,6 +2040,8 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
 	 */
 	dd->ipath_f_quiet_serdes(dd);
 
+	/* stop all the timers that might still be running */
+	del_timer_sync(&dd->ipath_hol_timer);
 	if (dd->ipath_stats_timer_active) {
 		del_timer_sync(&dd->ipath_stats_timer);
 		dd->ipath_stats_timer_active = 0;
@@ -2252,6 +2257,114 @@ bail:
 	return ret;
 }
 
+/*
+ * send a signal to all the processes that have the driver open
+ * through the normal interfaces (i.e., everything other than diags
+ * interface).  Returns number of signalled processes.
+ */
+int ipath_signal_procs(struct ipath_devdata *dd, int sig)
+{
+	int i, sub, any = 0;
+	pid_t pid;
+
+	if (!dd->ipath_pd)
+		return 0;
+	for (i = 1; i < dd->ipath_cfgports; i++) {
+		if (!dd->ipath_pd[i] || !dd->ipath_pd[i]->port_cnt ||
+		    !dd->ipath_pd[i]->port_pid)
+			continue;
+		pid = dd->ipath_pd[i]->port_pid;
+		dev_info(&dd->pcidev->dev, "context %d in use "
+			  "(PID %u), sending signal %d\n",
+			  i, pid, sig);
+		kill_proc(pid, sig, 1);
+		any++;
+		for (sub = 0; sub < INFINIPATH_MAX_SUBPORT; sub++) {
+			pid = dd->ipath_pd[i]->port_subpid[sub];
+			if (!pid)
+				continue;
+			dev_info(&dd->pcidev->dev, "sub-context "
+				"%d:%d in use (PID %u), sending "
+				"signal %d\n", i, sub, pid, sig);
+			kill_proc(pid, sig, 1);
+			any++;
+		}
+	}
+	return any;
+}
+
+static void ipath_hol_signal_down(struct ipath_devdata *dd)
+{
+	if (ipath_signal_procs(dd, SIGSTOP))
+		ipath_dbg("Stopped some processes\n");
+	ipath_cancel_sends(dd, 1);
+}
+
+
+static void ipath_hol_signal_up(struct ipath_devdata *dd)
+{
+	if (ipath_signal_procs(dd, SIGCONT))
+		ipath_dbg("Continued some processes\n");
+}
+
+/*
+ * link is down, stop any users processes, and flush pending sends
+ * to prevent HoL blocking, then start the HoL timer that
+ * periodically continues, then stop procs, so they can detect
+ * link down if they want, and do something about it.
+ * Timer may already be running, so use __mod_timer, not add_timer.
+ */
+void ipath_hol_down(struct ipath_devdata *dd)
+{
+	dd->ipath_hol_state = IPATH_HOL_DOWN;
+	ipath_hol_signal_down(dd);
+	dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
+	dd->ipath_hol_timer.expires = jiffies +
+		msecs_to_jiffies(ipath_hol_timeout_ms);
+	__mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
+}
+
+/*
+ * link is up, continue any user processes, and ensure timer
+ * is a nop, if running.  Let timer keep running, if set; it
+ * will nop when it sees the link is up
+ */
+void ipath_hol_up(struct ipath_devdata *dd)
+{
+	ipath_hol_signal_up(dd);
+	dd->ipath_hol_state = IPATH_HOL_UP;
+}
+
+/*
+ * toggle the running/not running state of user proceses
+ * to prevent HoL blocking on chip resources, but still allow
+ * user processes to do link down special case handling.
+ * Should only be called via the timer
+ */
+void ipath_hol_event(unsigned long opaque)
+{
+	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+
+	if (dd->ipath_hol_next == IPATH_HOL_DOWNSTOP
+		&& dd->ipath_hol_state != IPATH_HOL_UP) {
+		dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
+		ipath_dbg("Stopping processes\n");
+		ipath_hol_signal_down(dd);
+	} else { /* may do "extra" if also in ipath_hol_up() */
+		dd->ipath_hol_next = IPATH_HOL_DOWNSTOP;
+		ipath_dbg("Continuing processes\n");
+		ipath_hol_signal_up(dd);
+	}
+	if (dd->ipath_hol_state == IPATH_HOL_UP)
+		ipath_dbg("link's up, don't resched timer\n");
+	else {
+		dd->ipath_hol_timer.expires = jiffies +
+			msecs_to_jiffies(ipath_hol_timeout_ms);
+		__mod_timer(&dd->ipath_hol_timer,
+			dd->ipath_hol_timer.expires);
+	}
+}
+
 int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
 {
 	u64 val;
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index f0d7848..bed0927 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -908,6 +908,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 		dd->ipath_stats_timer_active = 1;
 	}
 
+	/* Set up HoL state */
+	init_timer(&dd->ipath_hol_timer);
+	dd->ipath_hol_timer.function = ipath_hol_event;
+	dd->ipath_hol_timer.data = (unsigned long)dd;
+	dd->ipath_hol_state = IPATH_HOL_UP;
+
 done:
 	if (!ret) {
 		*dd->ipath_statusp |= IPATH_STATUS_CHIP_PRESENT;
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index ed2a227..dde5dfc 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -32,6 +32,7 @@
  */
 
 #include <linux/pci.h>
+#include <linux/delay.h>
 
 #include "ipath_kernel.h"
 #include "ipath_verbs.h"
@@ -256,24 +257,20 @@ void ipath_format_hwerrors(u64 hwerrs,
 }
 
 /* return the strings for the most common link states */
-static char *ib_linkstate(u32 linkstate)
+static char *ib_linkstate(struct ipath_devdata *dd, u64 ibcs)
 {
 	char *ret;
+	u32 state;
 
-	switch (linkstate) {
-	case IPATH_IBSTATE_INIT:
+	state = ipath_ib_state(dd, ibcs);
+	if (state == dd->ib_init)
 		ret = "Init";
-		break;
-	case IPATH_IBSTATE_ARM:
+	else if (state == dd->ib_arm)
 		ret = "Arm";
-		break;
-	case IPATH_IBSTATE_ACTIVE:
+	else if (state == dd->ib_active)
 		ret = "Active";
-		break;
-	default:
+	else
 		ret = "Down";
-	}
-
 	return ret;
 }
 
@@ -288,103 +285,137 @@ void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev)
 }
 
 static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
-				     ipath_err_t errs, int noprint)
+				     ipath_err_t errs)
 {
-	u64 val;
-	u32 ltstate, lstate;
+	u32 ltstate, lstate, ibstate, lastlstate;
+	u32 init = dd->ib_init;
+	u32 arm = dd->ib_arm;
+	u32 active = dd->ib_active;
+	const u64 ibcs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+
+	lstate = ipath_ib_linkstate(dd, ibcs); /* linkstate */
+	ibstate = ipath_ib_state(dd, ibcs);
+	/* linkstate at last interrupt */
+	lastlstate = ipath_ib_linkstate(dd, dd->ipath_lastibcstat);
+	ltstate = ipath_ib_linktrstate(dd, ibcs); /* linktrainingtate */
 
 	/*
-	 * even if diags are enabled, we want to notice LINKINIT, etc.
-	 * We just don't want to change the LED state, or
-	 * dd->ipath_kregs->kr_ibcctrl
+	 * if linkstate transitions into INIT from any of the various down
+	 * states, or if it transitions from any of the up (INIT or better)
+	 * states into any of the down states (except link recovery), then
+	 * call the chip-specific code to take appropriate actions.
 	 */
-	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-	lstate = val & IPATH_IBSTATE_MASK;
+	if (lstate >= INFINIPATH_IBCS_L_STATE_INIT &&
+		lastlstate == INFINIPATH_IBCS_L_STATE_DOWN) {
+		/* transitioned to UP */
+		if (dd->ipath_f_ib_updown(dd, 1, ibcs)) {
+			ipath_cdbg(LINKVERB, "LinkUp handled, skipped\n");
+			goto skip_ibchange; /* chip-code handled */
+		}
+	} else if ((lastlstate >= INFINIPATH_IBCS_L_STATE_INIT ||
+		(dd->ipath_flags & IPATH_IB_FORCE_NOTIFY)) &&
+		ltstate <= INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE &&
+		ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
+		int handled;
+		handled = dd->ipath_f_ib_updown(dd, 0, ibcs);
+		dd->ipath_flags &= ~IPATH_IB_FORCE_NOTIFY;
+		if (handled) {
+			ipath_cdbg(LINKVERB, "LinkDown handled, skipped\n");
+			goto skip_ibchange; /* chip-code handled */
+		}
+	}
 
 	/*
-	 * this is confusing enough when it happens that I want to always put it
-	 * on the console and in the logs.  If it was a requested state change,
-	 * we'll have already cleared the flags, so we won't print this warning
+	 * Significant enough to always print and get into logs, if it was
+	 * unexpected.  If it was a requested state change, we'll have
+	 * already cleared the flags, so we won't print this warning
 	 */
-	if ((lstate != IPATH_IBSTATE_ARM && lstate != IPATH_IBSTATE_ACTIVE)
-		&& (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
-		dev_info(&dd->pcidev->dev, "Link state changed from %s to %s\n",
-				 (dd->ipath_flags & IPATH_LINKARMED) ? "ARM" : "ACTIVE",
-				 ib_linkstate(lstate));
-		/*
-		 * Flush all queued sends when link went to DOWN or INIT,
-		 * to be sure that they don't block SMA and other MAD packets
-		 */
-		ipath_cancel_sends(dd, 1);
-	}
-	else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
-	    lstate == IPATH_IBSTATE_ACTIVE) {
-		/*
-		 * only print at SMA if there is a change, debug if not
-		 * (sometimes we want to know that, usually not).
-		 */
-		if (lstate == ((unsigned) dd->ipath_lastibcstat
-			       & IPATH_IBSTATE_MASK)) {
-			ipath_dbg("Status change intr but no change (%s)\n",
-				  ib_linkstate(lstate));
-		}
-		else
-			ipath_cdbg(VERBOSE, "Unit %u link state %s, last "
-				   "was %s\n", dd->ipath_unit,
-				   ib_linkstate(lstate),
-				   ib_linkstate((unsigned)
-						dd->ipath_lastibcstat
-						& IPATH_IBSTATE_MASK));
-	}
-	else {
-		lstate = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
-		if (lstate == IPATH_IBSTATE_INIT ||
-		    lstate == IPATH_IBSTATE_ARM ||
-		    lstate == IPATH_IBSTATE_ACTIVE)
-			ipath_cdbg(VERBOSE, "Unit %u link state down"
-				   " (state 0x%x), from %s\n",
-				   dd->ipath_unit,
-				   (u32)val & IPATH_IBSTATE_MASK,
-				   ib_linkstate(lstate));
-		else
-			ipath_cdbg(VERBOSE, "Unit %u link state changed "
-				   "to 0x%x from down (%x)\n",
-				   dd->ipath_unit, (u32) val, lstate);
+	if ((ibstate != arm && ibstate != active) &&
+	    (dd->ipath_flags & (IPATH_LINKARMED | IPATH_LINKACTIVE))) {
+		dev_info(&dd->pcidev->dev, "Link state changed from %s "
+			 "to %s\n", (dd->ipath_flags & IPATH_LINKARMED) ?
+			 "ARM" : "ACTIVE", ib_linkstate(dd, ibcs));
 	}
-	ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-		INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
-	lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
-		INFINIPATH_IBCS_LINKSTATE_MASK;
 
 	if (ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
 	    ltstate == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-		u32 last_ltstate;
-
+		u32 lastlts;
+		lastlts = ipath_ib_linktrstate(dd, dd->ipath_lastibcstat);
 		/*
-		 * Ignore cycling back and forth from Polling.Active
-		 * to Polling.Quiet while waiting for the other end of
-		 * the link to come up. We will cycle back and forth
-		 * between them if no cable is plugged in,
-		 * the other device is powered off or disabled, etc.
+		 * Ignore cycling back and forth from Polling.Active to
+		 * Polling.Quiet while waiting for the other end of the link
+		 * to come up, except to try and decide if we are connected
+		 * to a live IB device or not.  We will cycle back and
+		 * forth between them if no cable is plugged in, the other
+		 * device is powered off or disabled, etc.
 		 */
-		last_ltstate = (dd->ipath_lastibcstat >>
-				INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT)
-			& INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
-		if (last_ltstate == INFINIPATH_IBCS_LT_STATE_POLLACTIVE
-		    || last_ltstate ==
-		    INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
-			if (dd->ipath_ibpollcnt > 40) {
+		if (lastlts == INFINIPATH_IBCS_LT_STATE_POLLACTIVE ||
+		    lastlts == INFINIPATH_IBCS_LT_STATE_POLLQUIET) {
+			if (++dd->ipath_ibpollcnt == 40) {
 				dd->ipath_flags |= IPATH_NOCABLE;
 				*dd->ipath_statusp |=
 					IPATH_STATUS_IB_NOCABLE;
-			} else
-				dd->ipath_ibpollcnt++;
+				ipath_cdbg(LINKVERB, "Set NOCABLE\n");
+			}
+			ipath_cdbg(LINKVERB, "POLL change to %s (%x)\n",
+				ipath_ibcstatus_str[ltstate], ibstate);
 			goto skip_ibchange;
 		}
 	}
-	dd->ipath_ibpollcnt = 0;	/* some state other than 2 or 3 */
+
+	dd->ipath_ibpollcnt = 0; /* not poll*, now */
 	ipath_stats.sps_iblink++;
-	if (ltstate != INFINIPATH_IBCS_LT_STATE_LINKUP) {
+
+	if (ibstate == init || ibstate == arm || ibstate == active) {
+		*dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
+		if (ibstate == init || ibstate == arm) {
+			*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
+			if (dd->ipath_flags & IPATH_LINKACTIVE)
+				signal_ib_event(dd, IB_EVENT_PORT_ERR);
+		}
+		if (ibstate == arm) {
+			dd->ipath_flags |= IPATH_LINKARMED;
+			dd->ipath_flags &= ~(IPATH_LINKUNK |
+				IPATH_LINKINIT | IPATH_LINKDOWN |
+				IPATH_LINKACTIVE | IPATH_NOCABLE);
+			ipath_hol_down(dd);
+		} else  if (ibstate == init) {
+			/*
+			 * set INIT and DOWN.  Down is checked by
+			 * most of the other code, but INIT is
+			 * useful to know in a few places.
+			 */
+			dd->ipath_flags |= IPATH_LINKINIT |
+				IPATH_LINKDOWN;
+			dd->ipath_flags &= ~(IPATH_LINKUNK |
+				IPATH_LINKARMED | IPATH_LINKACTIVE |
+				IPATH_NOCABLE);
+			ipath_hol_down(dd);
+		} else {  /* active */
+			*dd->ipath_statusp |=
+				IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
+			dd->ipath_flags |= IPATH_LINKACTIVE;
+			dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
+				| IPATH_LINKDOWN | IPATH_LINKARMED |
+				IPATH_NOCABLE);
+			signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
+			/* LED active not handled in chip _f_updown */
+			dd->ipath_f_setextled(dd, lstate, ltstate);
+			ipath_hol_up(dd);
+		}
+
+		/*
+		 * print after we've already done the work, so as not to
+		 * delay the state changes and notifications, for debugging
+		 */
+		if (lstate == lastlstate)
+			ipath_cdbg(LINKVERB, "Unchanged from last: %s "
+				"(%x)\n", ib_linkstate(dd, ibcs), ibstate);
+		else
+			ipath_cdbg(VERBOSE, "Unit %u: link up to %s %s (%x)\n",
+				  dd->ipath_unit, ib_linkstate(dd, ibcs),
+				  ipath_ibcstatus_str[ltstate],  ibstate);
+	} else { /* down */
 		if (dd->ipath_flags & IPATH_LINKACTIVE)
 			signal_ib_event(dd, IB_EVENT_PORT_ERR);
 		dd->ipath_flags |= IPATH_LINKDOWN;
@@ -393,65 +424,22 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 				     IPATH_LINKARMED);
 		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
 		dd->ipath_lli_counter = 0;
-		if (!noprint) {
-			if (((dd->ipath_lastibcstat >>
-			      INFINIPATH_IBCS_LINKSTATE_SHIFT) &
-			     INFINIPATH_IBCS_LINKSTATE_MASK)
-			    == INFINIPATH_IBCS_L_STATE_ACTIVE)
-				/* if from up to down be more vocal */
-				ipath_cdbg(VERBOSE,
-					   "Unit %u link now down (%s)\n",
-					   dd->ipath_unit,
-					   ipath_ibcstatus_str[ltstate]);
-			else
-				ipath_cdbg(VERBOSE, "Unit %u link is "
-					   "down (%s)\n", dd->ipath_unit,
-					   ipath_ibcstatus_str[ltstate]);
-		}
 
-		dd->ipath_f_setextled(dd, lstate, ltstate);
-	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ACTIVE) {
-		dd->ipath_flags |= IPATH_LINKACTIVE;
-		dd->ipath_flags &=
-			~(IPATH_LINKUNK | IPATH_LINKINIT | IPATH_LINKDOWN |
-			  IPATH_LINKARMED | IPATH_NOCABLE);
-		*dd->ipath_statusp &= ~IPATH_STATUS_IB_NOCABLE;
-		*dd->ipath_statusp |=
-			IPATH_STATUS_IB_READY | IPATH_STATUS_IB_CONF;
-		dd->ipath_f_setextled(dd, lstate, ltstate);
-		signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
-	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_INIT) {
-		if (dd->ipath_flags & IPATH_LINKACTIVE)
-			signal_ib_event(dd, IB_EVENT_PORT_ERR);
-		/*
-		 * set INIT and DOWN.  Down is checked by most of the other
-		 * code, but INIT is useful to know in a few places.
-		 */
-		dd->ipath_flags |= IPATH_LINKINIT | IPATH_LINKDOWN;
-		dd->ipath_flags &=
-			~(IPATH_LINKUNK | IPATH_LINKACTIVE | IPATH_LINKARMED
-			  | IPATH_NOCABLE);
-		*dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
-					| IPATH_STATUS_IB_READY);
-		dd->ipath_f_setextled(dd, lstate, ltstate);
-	} else if ((val & IPATH_IBSTATE_MASK) == IPATH_IBSTATE_ARM) {
-		if (dd->ipath_flags & IPATH_LINKACTIVE)
-			signal_ib_event(dd, IB_EVENT_PORT_ERR);
-		dd->ipath_flags |= IPATH_LINKARMED;
-		dd->ipath_flags &=
-			~(IPATH_LINKUNK | IPATH_LINKDOWN | IPATH_LINKINIT |
-			  IPATH_LINKACTIVE | IPATH_NOCABLE);
-		*dd->ipath_statusp &= ~(IPATH_STATUS_IB_NOCABLE
-					| IPATH_STATUS_IB_READY);
-		dd->ipath_f_setextled(dd, lstate, ltstate);
-	} else {
-		if (!noprint)
-			ipath_dbg("IBstatuschange unit %u: %s (%x)\n",
-				  dd->ipath_unit,
-				  ipath_ibcstatus_str[ltstate], ltstate);
+		if (lastlstate != INFINIPATH_IBCS_L_STATE_DOWN)
+			ipath_cdbg(VERBOSE, "Unit %u link state down "
+				   "(state 0x%x), from %s\n",
+				   dd->ipath_unit, lstate,
+				   ib_linkstate(dd, dd->ipath_lastibcstat));
+		else
+			ipath_cdbg(LINKVERB, "Unit %u link state changed "
+				   "to %s (0x%x) from down (%x)\n",
+				   dd->ipath_unit,
+				   ipath_ibcstatus_str[ltstate],
+				   ibstate, lastlstate);
 	}
+
 skip_ibchange:
-	dd->ipath_lastibcstat = val;
+	dd->ipath_lastibcstat = ibcs;
 }
 
 static void handle_supp_msgs(struct ipath_devdata *dd,
@@ -743,16 +731,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 		dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
 				     | IPATH_LINKARMED | IPATH_LINKACTIVE);
 		*dd->ipath_statusp &= ~IPATH_STATUS_IB_READY;
-		if (!noprint) {
-			u64 st = ipath_read_kreg64(
-				dd, dd->ipath_kregs->kr_ibcstatus);
 
-			ipath_dbg("Lost link, link now down (%s)\n",
-				  ipath_ibcstatus_str[st & 0xf]);
-		}
+		ipath_dbg("Lost link, link now down (%s)\n",
+			ipath_ibcstatus_str[ipath_read_kreg64(dd,
+			dd->ipath_kregs->kr_ibcstatus) & 0xf]);
 	}
 	if (errs & INFINIPATH_E_IBSTATUSCHANGED)
-		handle_e_ibstatuschanged(dd, errs, noprint);
+		handle_e_ibstatuschanged(dd, errs);
 
 	if (errs & INFINIPATH_E_RESET) {
 		if (!noprint)
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 70c0a0d..0cedc3f 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -427,6 +427,11 @@ struct ipath_devdata {
 
 	unsigned long ipath_ureg_align; /* user register alignment */
 
+	/* HoL blocking / user app forward-progress state */
+	unsigned          ipath_hol_state;
+	unsigned          ipath_hol_next;
+	struct timer_list ipath_hol_timer;
+
 	/*
 	 * Shadow copies of registers; size indicates read access size.
 	 * Most of them are readonly, but some are write-only register,
@@ -706,6 +711,13 @@ struct ipath_devdata {
 	u16 ipath_jint_max_packets;	/* max packets across all ports */
 };
 
+/* ipath_hol_state values (stopping/starting user proc, send flushing) */
+#define IPATH_HOL_UP       0
+#define IPATH_HOL_DOWN     1
+/* ipath_hol_next toggle values, used when hol_state IPATH_HOL_DOWN */
+#define IPATH_HOL_DOWNSTOP 0
+#define IPATH_HOL_DOWNCONT 1
+
 /* Private data for file operations */
 struct ipath_filedata {
 	struct ipath_portdata *pd;
@@ -722,6 +734,7 @@ void ipath_disable_wc(struct ipath_devdata *dd);
 int ipath_count_units(int *npresentp, int *nupp, int *maxportsp);
 void ipath_shutdown_device(struct ipath_devdata *);
 void ipath_clear_freeze(struct ipath_devdata *);
+int ipath_signal_procs(struct ipath_devdata *, int);
 
 struct file_operations;
 int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -775,6 +788,9 @@ int ipath_set_lid(struct ipath_devdata *, u32, u8);
 int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 void ipath_enable_armlaunch(struct ipath_devdata *);
 void ipath_disable_armlaunch(struct ipath_devdata *);
+void ipath_hol_down(struct ipath_devdata *);
+void ipath_hol_up(struct ipath_devdata *);
+void ipath_hol_event(unsigned long);
 
 /* for use in system calls, where we want to know device type, etc. */
 #define port_fp(fp) ((struct ipath_filedata *)(fp)->private_data)->pd
@@ -830,6 +846,7 @@ void ipath_disable_armlaunch(struct ipath_devdata *);
 		/* Suppress heartbeat, even if turning off loopback */
 #define IPATH_NO_HRTBT      0x1000000
 #define IPATH_HAS_MULT_IB_SPEED 0x8000000
+#define IPATH_IB_FORCE_NOTIFY 0x80000000 /* force notify on next ib change */
 
 /* Bits in GPIO for the added interrupts */
 #define IPATH_GPIO_PORT0_BIT 2
@@ -1030,6 +1047,21 @@ static inline u32 ipath_ib_linktrstate(struct ipath_devdata *dd, u64 ibcs)
 }
 
 /*
+ * from contents of IBCStatus (or a saved copy), return logical link state
+ * combination of link state and linktraining state (down, active, init,
+ * arm, etc.
+ */
+static inline u32 ipath_ib_state(struct ipath_devdata *dd, u64 ibcs)
+{
+	u32 ibs;
+	ibs = (u32)(ibcs >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
+		dd->ibcs_lts_mask;
+	ibs |= (u32)(ibcs &
+		(INFINIPATH_IBCS_LINKSTATE_MASK << dd->ibcs_ls_shift));
+	return ibs;
+}
+
+/*
  * sysfs interface.
  */
 
diff --git a/drivers/infiniband/hw/ipath/ipath_registers.h b/drivers/infiniband/hw/ipath/ipath_registers.h
index cb19ea2..16d0d74 100644
--- a/drivers/infiniband/hw/ipath/ipath_registers.h
+++ b/drivers/infiniband/hw/ipath/ipath_registers.h
@@ -200,7 +200,6 @@
 #define INFINIPATH_IBCC_LINKDOWNDEFAULTSTATE 0x4000000000000000ULL
 
 /* kr_ibcstatus bits */
-#define INFINIPATH_IBCS_LINKTRAININGSTATE_MASK 0xF
 #define INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT 0
 #define INFINIPATH_IBCS_LINKSTATE_MASK 0x7
 #define INFINIPATH_IBCS_LINKSTATE_SHIFT 4
@@ -221,30 +220,13 @@
 #define INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN	0x0c
 #define INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT	0x0e
 #define INFINIPATH_IBCS_LT_STATE_RECOVERIDLE	0x0f
-/* link state machine states (shift by INFINIPATH_IBCS_LINKSTATE_SHIFT) */
+/* link state machine states (shift by ibcs_ls_shift) */
 #define INFINIPATH_IBCS_L_STATE_DOWN		0x0
 #define INFINIPATH_IBCS_L_STATE_INIT		0x1
 #define INFINIPATH_IBCS_L_STATE_ARM		0x2
 #define INFINIPATH_IBCS_L_STATE_ACTIVE		0x3
 #define INFINIPATH_IBCS_L_STATE_ACT_DEFER	0x4
 
-/* combination link status states that we use with some frequency */
-#define IPATH_IBSTATE_MASK ((INFINIPATH_IBCS_LINKTRAININGSTATE_MASK \
-		<< INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) | \
-		(INFINIPATH_IBCS_LINKSTATE_MASK \
-		<<INFINIPATH_IBCS_LINKSTATE_SHIFT))
-#define IPATH_IBSTATE_INIT ((INFINIPATH_IBCS_L_STATE_INIT \
-		<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
-		(INFINIPATH_IBCS_LT_STATE_LINKUP \
-		<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
-#define IPATH_IBSTATE_ARM ((INFINIPATH_IBCS_L_STATE_ARM \
-		<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
-		(INFINIPATH_IBCS_LT_STATE_LINKUP \
-		<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
-#define IPATH_IBSTATE_ACTIVE ((INFINIPATH_IBCS_L_STATE_ACTIVE \
-		<< INFINIPATH_IBCS_LINKSTATE_SHIFT) | \
-		(INFINIPATH_IBCS_LT_STATE_LINKUP \
-		<<INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT))
 
 /* kr_extstatus bits */
 #define INFINIPATH_EXTS_SERDESPLLLOCK 0x1




More information about the general mailing list