[ofa-general] [PATCH 6/7] IB/ipath - need to always request and handle PIO avail interrupts

Ralph Campbell ralph.campbell at qlogic.com
Tue May 6 11:36:47 PDT 2008


From: Dave Olson <dave.olson at qlogic.com>

Now that we always use PIO for vl15 on 7220, we could get stuck forever
if we happened to run out of PIO buffers from the verbs code, because
the setup code wouldn't run; the interrupt was also ignored if SDMA
was supported.  We also have to reduce the pio update threshold if we
have fewer kernel buffers than the existing threshold.

Cleans up the initialization a bit to get ordering safer and
more sensible, and to use the existing ipath_chg_kernavail call
to do init, rather than doing it separately.

Drops unnecessary clearing of pio buffer on pio parity error.
Drops incorrect updating of pioavailshadow when exitting freeze
mode (software state may not match chip state if buffer has been
allocated and not yet written).
If we couldn't get a kernel buffer for a while, make sure we are
in sync with hardware, mainly to handle the exitting freeze case.

Signed-off-by: Dave Olson <dave.olson at qlogic.com>
---

 drivers/infiniband/hw/ipath/ipath_driver.c    |  128 +++++++++++++++++++++++--
 drivers/infiniband/hw/ipath/ipath_file_ops.c  |   72 ++++++--------
 drivers/infiniband/hw/ipath/ipath_iba7220.c   |   21 +---
 drivers/infiniband/hw/ipath/ipath_init_chip.c |   95 ++++++++-----------
 drivers/infiniband/hw/ipath/ipath_intr.c      |   82 ++--------------
 drivers/infiniband/hw/ipath/ipath_kernel.h    |    8 +-
 drivers/infiniband/hw/ipath/ipath_ruc.c       |    7 +
 drivers/infiniband/hw/ipath/ipath_sdma.c      |   13 ++-
 8 files changed, 224 insertions(+), 202 deletions(-)

diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index f81dd4a..2036d38 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -1428,6 +1428,40 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
 	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
 }
 
+/*
+ * used to force update of pioavailshadow if we can't get a pio buffer.
+ * Needed primarily due to exitting freeze mode after recovering
+ * from errors.  Done lazily, because it's safer (known to not
+ * be writing pio buffers).
+ */
+static void ipath_reset_availshadow(struct ipath_devdata *dd)
+{
+	int i, im;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ipath_pioavail_lock, flags);
+	for (i = 0; i < dd->ipath_pioavregs; i++) {
+		u64 val, oldval;
+		/* deal with 6110 chip bug on high register #s */
+		im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+			i ^ 1 : i;
+		val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
+		/*
+		 * busy out the buffers not in the kernel avail list,
+		 * without changing the generation bits.
+		 */
+		oldval = dd->ipath_pioavailshadow[i];
+		dd->ipath_pioavailshadow[i] = val |
+			((~dd->ipath_pioavailkernel[i] <<
+			INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
+			0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
+		if (oldval != dd->ipath_pioavailshadow[i])
+			ipath_dbg("shadow[%d] was %Lx, now %lx\n",
+				i, oldval, dd->ipath_pioavailshadow[i]);
+	}
+	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+}
+
 /**
  * ipath_setrcvhdrsize - set the receive header size
  * @dd: the infinipath device
@@ -1482,9 +1516,12 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
 	 */
 	ipath_stats.sps_nopiobufs++;
 	if (!(++dd->ipath_consec_nopiobuf % 100000)) {
-		ipath_dbg("%u pio sends with no bufavail; dmacopy: "
-			"%llx %llx %llx %llx; shadow:  %lx %lx %lx %lx\n",
+		ipath_force_pio_avail_update(dd); /* at start */
+		ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
+			"%llx %llx %llx %llx\n"
+			"ipath  shadow:  %lx %lx %lx %lx\n",
 			dd->ipath_consec_nopiobuf,
+			(unsigned long)get_cycles(),
 			(unsigned long long) le64_to_cpu(dma[0]),
 			(unsigned long long) le64_to_cpu(dma[1]),
 			(unsigned long long) le64_to_cpu(dma[2]),
@@ -1496,14 +1533,17 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
 		 */
 		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
 		    (sizeof(shadow[0]) * 4 * 4))
-			ipath_dbg("2nd group: dmacopy: %llx %llx "
-				  "%llx %llx; shadow: %lx %lx %lx %lx\n",
+			ipath_dbg("2nd group: dmacopy: "
+				  "%llx %llx %llx %llx\n"
+				  "ipath  shadow:  %lx %lx %lx %lx\n",
 				  (unsigned long long)le64_to_cpu(dma[4]),
 				  (unsigned long long)le64_to_cpu(dma[5]),
 				  (unsigned long long)le64_to_cpu(dma[6]),
 				  (unsigned long long)le64_to_cpu(dma[7]),
-				  shadow[4], shadow[5], shadow[6],
-				  shadow[7]);
+				  shadow[4], shadow[5], shadow[6], shadow[7]);
+
+		/* at end, so update likely happened */
+		ipath_reset_availshadow(dd);
 	}
 }
 
@@ -1652,19 +1692,46 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
 			      unsigned len, int avail)
 {
 	unsigned long flags;
-	unsigned end;
+	unsigned end, cnt = 0, next;
 
 	/* There are two bits per send buffer (busy and generation) */
 	start *= 2;
-	len *= 2;
-	end = start + len;
+	end = start + len * 2;
 
-	/* Set or clear the generation bits. */
 	spin_lock_irqsave(&ipath_pioavail_lock, flags);
+	/* Set or clear the busy bit in the shadow. */
 	while (start < end) {
 		if (avail) {
-			__clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
-				dd->ipath_pioavailshadow);
+			unsigned long dma;
+			int i, im;
+			/*
+			 * the BUSY bit will never be set, because we disarm
+			 * the user buffers before we hand them back to the
+			 * kernel.  We do have to make sure the generation
+			 * bit is set correctly in shadow, since it could
+			 * have changed many times while allocated to user.
+			 * We can't use the bitmap functions on the full
+			 * dma array because it is always little-endian, so
+			 * we have to flip to host-order first.
+			 * BITS_PER_LONG is slightly wrong, since it's
+			 * always 64 bits per register in chip...
+			 * We only work on 64 bit kernels, so that's OK.
+			 */
+			/* deal with 6110 chip bug on high register #s */
+			i = start / BITS_PER_LONG;
+			im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+				i ^ 1 : i;
+			__clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
+				+ start, dd->ipath_pioavailshadow);
+			dma = (unsigned long) le64_to_cpu(
+				dd->ipath_pioavailregs_dma[im]);
+			if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+				+ start) % BITS_PER_LONG, &dma))
+				__set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+					+ start, dd->ipath_pioavailshadow);
+			else
+				__clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
+					+ start, dd->ipath_pioavailshadow);
 			__set_bit(start, dd->ipath_pioavailkernel);
 		} else {
 			__set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
@@ -1673,7 +1740,44 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
 		}
 		start += 2;
 	}
+
+	if (dd->ipath_pioupd_thresh) {
+		end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+		next = find_first_bit(dd->ipath_pioavailkernel, end);
+		while (next < end) {
+			cnt++;
+			next = find_next_bit(dd->ipath_pioavailkernel, end,
+					next + 1);
+		}
+	}
 	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+
+	/*
+	 * When moving buffers from kernel to user, if number assigned to
+	 * the user is less than the pio update threshold, and threshold
+	 * is supported (cnt was computed > 0), drop the update threshold
+	 * so we update at least once per allocated number of buffers.
+	 * In any case, if the kernel buffers are less than the threshold,
+	 * drop the threshold.  We don't bother increasing it, having once
+	 * decreased it, since it would typically just cycle back and forth.
+	 * If we don't decrease below buffers in use, we can wait a long
+	 * time for an update, until some other context uses PIO buffers.
+	 */
+	if (!avail && len < cnt)
+		cnt = len;
+	if (cnt < dd->ipath_pioupd_thresh) {
+		dd->ipath_pioupd_thresh = cnt;
+		ipath_dbg("Decreased pio update threshold to %u\n",
+			dd->ipath_pioupd_thresh);
+		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+		dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
+			<< INFINIPATH_S_UPDTHRESH_SHIFT);
+		dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
+			<< INFINIPATH_S_UPDTHRESH_SHIFT;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+			dd->ipath_sendctrl);
+		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+	}
 }
 
 /**
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 8b17522..3295177 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -173,47 +173,25 @@ static int ipath_get_base_info(struct file *fp,
 		(void *) dd->ipath_statusp -
 		(void *) dd->ipath_pioavailregs_dma;
 	if (!shared) {
-		kinfo->spi_piocnt = dd->ipath_pbufsport;
+		kinfo->spi_piocnt = pd->port_piocnt;
 		kinfo->spi_piobufbase = (u64) pd->port_piobufs;
 		kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
 			dd->ipath_ureg_align * pd->port_port;
 	} else if (master) {
-		kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
-				    (dd->ipath_pbufsport % subport_cnt);
+		kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
+				    (pd->port_piocnt % subport_cnt);
 		/* Master's PIO buffers are after all the slave's */
 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
 			dd->ipath_palign *
-			(dd->ipath_pbufsport - kinfo->spi_piocnt);
+			(pd->port_piocnt - kinfo->spi_piocnt);
 	} else {
 		unsigned slave = subport_fp(fp) - 1;
 
-		kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
+		kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
 			dd->ipath_palign * kinfo->spi_piocnt * slave;
 	}
 
-	/*
-	 * Set the PIO avail update threshold to no larger
-	 * than the number of buffers per process. Note that
-	 * we decrease it here, but won't ever increase it.
-	 */
-	if (dd->ipath_pioupd_thresh &&
-	    kinfo->spi_piocnt < dd->ipath_pioupd_thresh) {
-		unsigned long flags;
-
-		dd->ipath_pioupd_thresh = kinfo->spi_piocnt;
-		ipath_dbg("Decreased pio update threshold to %u\n",
-			dd->ipath_pioupd_thresh);
-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-		dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
-			<< INFINIPATH_S_UPDTHRESH_SHIFT);
-		dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
-			<< INFINIPATH_S_UPDTHRESH_SHIFT;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			dd->ipath_sendctrl);
-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-	}
-
 	if (shared) {
 		kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
 			dd->ipath_ureg_align * pd->port_port;
@@ -1309,19 +1287,19 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
 	ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
 	if (!pd->port_subport_cnt) {
 		/* port is not shared */
-		piocnt = dd->ipath_pbufsport;
+		piocnt = pd->port_piocnt;
 		piobufs = pd->port_piobufs;
 	} else if (!subport_fp(fp)) {
 		/* caller is the master */
-		piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
-			 (dd->ipath_pbufsport % pd->port_subport_cnt);
+		piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
+			 (pd->port_piocnt % pd->port_subport_cnt);
 		piobufs = pd->port_piobufs +
-			dd->ipath_palign * (dd->ipath_pbufsport - piocnt);
+			dd->ipath_palign * (pd->port_piocnt - piocnt);
 	} else {
 		unsigned slave = subport_fp(fp) - 1;
 
 		/* caller is a slave */
-		piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
+		piocnt = pd->port_piocnt / pd->port_subport_cnt;
 		piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
 	}
 
@@ -1633,9 +1611,6 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
 		port_fp(fp) = pd;
 		pd->port_pid = current->pid;
 		strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
-		ipath_chg_pioavailkernel(dd,
-			dd->ipath_pbufsport * (pd->port_port - 1),
-			dd->ipath_pbufsport, 0);
 		ipath_stats.sps_ports++;
 		ret = 0;
 	} else
@@ -1938,11 +1913,25 @@ static int ipath_do_user_init(struct file *fp,
 
 	/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
 
+	/* some ports may get extra buffers, calculate that here */
+	if (pd->port_port <= dd->ipath_ports_extrabuf)
+		pd->port_piocnt = dd->ipath_pbufsport + 1;
+	else
+		pd->port_piocnt = dd->ipath_pbufsport;
+
 	/* for right now, kernel piobufs are at end, so port 1 is at 0 */
+	if (pd->port_port <= dd->ipath_ports_extrabuf)
+		pd->port_pio_base = (dd->ipath_pbufsport + 1)
+			* (pd->port_port - 1);
+	else
+		pd->port_pio_base = dd->ipath_ports_extrabuf +
+			dd->ipath_pbufsport * (pd->port_port - 1);
 	pd->port_piobufs = dd->ipath_piobufbase +
-		dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign;
-	ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
-		   pd->port_port, pd->port_piobufs);
+		pd->port_pio_base * dd->ipath_palign;
+	ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
+		" first pio %u\n", pd->port_port, pd->port_piobufs,
+		pd->port_piocnt, pd->port_pio_base);
+	ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
 
 	/*
 	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
@@ -2107,7 +2096,6 @@ static int ipath_close(struct inode *in, struct file *fp)
 	}
 
 	if (dd->ipath_kregbase) {
-		int i;
 		/* atomically clear receive enable port and intr avail. */
 		clear_bit(dd->ipath_r_portenable_shift + port,
 			  &dd->ipath_rcvctrl);
@@ -2136,9 +2124,9 @@ static int ipath_close(struct inode *in, struct file *fp)
 		ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
 			pd->port_port, dd->ipath_dummy_hdrq_phys);
 
-		i = dd->ipath_pbufsport * (port - 1);
-		ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
-		ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1);
+		ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
+		ipath_chg_pioavailkernel(dd, pd->port_pio_base,
+			pd->port_piocnt, 1);
 
 		dd->ipath_f_clear_tids(dd, pd->port_port);
 
diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
index 5f693de..8eee783 100644
--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
+++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
@@ -595,7 +595,7 @@ static void ipath_7220_txe_recover(struct ipath_devdata *dd)
 
 	dev_info(&dd->pcidev->dev,
 		"Recovering from TXE PIO parity error\n");
-	ipath_disarm_senderrbufs(dd, 1);
+	ipath_disarm_senderrbufs(dd);
 }
 
 
@@ -675,10 +675,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
 	if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
 		/*
-		 * Parity errors in send memory are recoverable,
-		 * just cancel the send (if indicated in * sendbuffererror),
-		 * count the occurrence, unfreeze (if no other handled
-		 * hardware error bits are set), and continue.
+		 * Parity errors in send memory are recoverable by h/w
+		 * just do housekeeping, exit freeze mode and continue.
 		 */
 		if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
 			       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
@@ -687,13 +685,6 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
 				     INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
 				    << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
-			if (!hwerrs) {
-				/* else leave in freeze mode */
-				ipath_write_kreg(dd,
-						 dd->ipath_kregs->kr_control,
-						 dd->ipath_control);
-				goto bail;
-			}
 		}
 		if (hwerrs) {
 			/*
@@ -723,8 +714,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
 			dd->ipath_flags &= ~IPATH_INITTED;
 		} else {
-			ipath_dbg("Clearing freezemode on ignored hardware "
-				  "error\n");
+			ipath_dbg("Clearing freezemode on ignored or "
+				"recovered hardware error\n");
 			ipath_clear_freeze(dd);
 		}
 	}
@@ -1967,7 +1958,7 @@ static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports)
 			 dd->ipath_rcvctrl);
 	dd->ipath_p0_rcvegrcnt = 2048; /* always */
 	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
-		dd->ipath_pioreserved = 1; /* reserve a buffer */
+		dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */
 }
 
 
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index 27dd894..3e5baa4 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -41,7 +41,7 @@
 /*
  * min buffers we want to have per port, after driver
  */
-#define IPATH_MIN_USER_PORT_BUFCNT 8
+#define IPATH_MIN_USER_PORT_BUFCNT 7
 
 /*
  * Number of ports we are configured to use (to allow for more pio
@@ -54,13 +54,9 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
 
 /*
  * Number of buffers reserved for driver (verbs and layered drivers.)
- * Reserved at end of buffer list.   Initialized based on
- * number of PIO buffers if not set via module interface.
+ * Initialized based on number of PIO buffers if not set via module interface.
  * The problem with this is that it's global, but we'll use different
- * numbers for different chip types.  So the default value is not
- * very useful.  I've redefined it for the 1.3 release so that it's
- * zero unless set by the user to something else, in which case we
- * try to respect it.
+ * numbers for different chip types.
  */
 static ushort ipath_kpiobufs;
 
@@ -546,9 +542,12 @@ static void enable_chip(struct ipath_devdata *dd, int reinit)
 			pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
 		else
 			pioavail = dd->ipath_pioavailregs_dma[i];
-		dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) |
-			(~dd->ipath_pioavailkernel[i] <<
-			INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
+		/*
+		 * don't need to worry about ipath_pioavailkernel here
+		 * because we will call ipath_chg_pioavailkernel() later
+		 * in initialization, to busy out buffers as needed
+		 */
+		dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
 	}
 	/* can get counters, stats, etc. */
 	dd->ipath_flags |= IPATH_PRESENT;
@@ -708,12 +707,11 @@ static void verify_interrupt(unsigned long opaque)
 int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 {
 	int ret = 0;
-	u32 val32, kpiobufs;
+	u32 kpiobufs, defkbufs;
 	u32 piobufs, uports;
 	u64 val;
 	struct ipath_portdata *pd;
 	gfp_t gfp_flags = GFP_USER | __GFP_COMP;
-	unsigned long flags;
 
 	ret = init_housekeeping(dd, reinit);
 	if (ret)
@@ -753,56 +751,46 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
 		/ (sizeof(u64) * BITS_PER_BYTE / 2);
 	uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
-	if (ipath_kpiobufs == 0) {
-		/* not set by user (this is default) */
-		if (piobufs > 144)
-			kpiobufs = 32;
-		else
-			kpiobufs = 16;
-	}
+	if (piobufs > 144)
+		defkbufs = 32 + dd->ipath_pioreserved;
 	else
-		kpiobufs = ipath_kpiobufs;
+		defkbufs = 16 + dd->ipath_pioreserved;
 
-	if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
+	if (ipath_kpiobufs && (ipath_kpiobufs +
+		(uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
 		int i = (int) piobufs -
 			(int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
 		if (i < 1)
 			i = 1;
 		dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
 			 "%d for kernel leaves too few for %d user ports "
-			 "(%d each); using %u\n", kpiobufs,
+			 "(%d each); using %u\n", ipath_kpiobufs,
 			 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
 		/*
 		 * shouldn't change ipath_kpiobufs, because could be
 		 * different for different devices...
 		 */
 		kpiobufs = i;
-	}
+	} else if (ipath_kpiobufs)
+		kpiobufs = ipath_kpiobufs;
+	else
+		kpiobufs = defkbufs;
 	dd->ipath_lastport_piobuf = piobufs - kpiobufs;
 	dd->ipath_pbufsport =
 		uports ? dd->ipath_lastport_piobuf / uports : 0;
-	val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
-	if (val32 > 0) {
-		ipath_dbg("allocating %u pbufs/port leaves %u unused, "
-			  "add to kernel\n", dd->ipath_pbufsport, val32);
-		dd->ipath_lastport_piobuf -= val32;
-		kpiobufs += val32;
-		ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n",
-			  dd->ipath_pbufsport, val32);
-	}
+	/* if not an even divisor, some user ports get extra buffers */
+	dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
+		(dd->ipath_pbufsport * uports);
+	if (dd->ipath_ports_extrabuf)
+		ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
+			"ports <= %u\n", dd->ipath_pbufsport,
+			dd->ipath_ports_extrabuf);
 	dd->ipath_lastpioindex = 0;
 	dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
-	ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
+	/* ipath_pioavailshadow initialized earlier */
 	ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
 		   "each for %u user ports\n", kpiobufs,
 		   piobufs, dd->ipath_pbufsport, uports);
-	if (dd->ipath_pioupd_thresh) {
-		if (dd->ipath_pbufsport < dd->ipath_pioupd_thresh)
-			dd->ipath_pioupd_thresh = dd->ipath_pbufsport;
-		if (kpiobufs < dd->ipath_pioupd_thresh)
-			dd->ipath_pioupd_thresh = kpiobufs;
-	}
-
 	ret = dd->ipath_f_early_init(dd);
 	if (ret) {
 		ipath_dev_err(dd, "Early initialization failure\n");
@@ -810,13 +798,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	}
 
 	/*
-	 * Cancel any possible active sends from early driver load.
-	 * Follows early_init because some chips have to initialize
-	 * PIO buffers in early_init to avoid false parity errors.
-	 */
-	ipath_cancel_sends(dd, 0);
-
-	/*
 	 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
 	 * done after early_init.
 	 */
@@ -836,6 +817,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
 			 dd->ipath_pioavailregs_phys);
+
 	/*
 	 * this is to detect s/w errors, which the h/w works around by
 	 * ignoring the low 6 bits of address, if it wasn't aligned.
@@ -862,12 +844,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 			 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
 
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-
 	/*
 	 * before error clears, since we expect serdes pll errors during
 	 * this, the first time after reset
@@ -940,6 +916,19 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	else
 		enable_chip(dd, reinit);
 
+	/* after enable_chip, so pioavailshadow setup */
+	ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
+
+	/*
+	 * Cancel any possible active sends from early driver load.
+	 * Follows early_init because some chips have to initialize
+	 * PIO buffers in early_init to avoid false parity errors.
+	 * After enable and ipath_chg_pioavailkernel so we can safely
+	 * enable pioavail updates and PIOENABLE; packets are now
+	 * ready to go out.
+	 */
+	ipath_cancel_sends(dd, 1);
+
 	if (!reinit) {
 		/*
 		 * Used when we close a port, for DMA already in flight
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 45c4c06..26900b3 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -38,42 +38,12 @@
 #include "ipath_verbs.h"
 #include "ipath_common.h"
 
-/*
- * clear (write) a pio buffer, to clear a parity error.   This routine
- * should only be called when in freeze mode, and the buffer should be
- * canceled afterwards.
- */
-static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
-{
-	u32 __iomem *pbuf;
-	u32 dwcnt; /* dword count to write */
-	if (pnum < dd->ipath_piobcnt2k) {
-		pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
-			dd->ipath_palign);
-		dwcnt = dd->ipath_piosize2k >> 2;
-	}
-	else {
-		pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
-			(pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
-		dwcnt = dd->ipath_piosize4k >> 2;
-	}
-	dev_info(&dd->pcidev->dev,
-		"Rewrite PIO buffer %u, to recover from parity error\n",
-		pnum);
-
-	/* no flush required, since already in freeze */
-	writel(dwcnt + 1, pbuf);
-	while (--dwcnt)
-		writel(0, pbuf++);
-}
 
 /*
  * Called when we might have an error that is specific to a particular
  * PIO buffer, and may need to cancel that buffer, so it can be re-used.
- * If rewrite is true, and bits are set in the sendbufferror registers,
- * we'll write to the buffer, for error recovery on parity errors.
  */
-void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
+void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
 {
 	u32 piobcnt;
 	unsigned long sbuf[4];
@@ -109,11 +79,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
 		}
 
 		for (i = 0; i < piobcnt; i++)
-			if (test_bit(i, sbuf)) {
-				if (rewrite)
-					ipath_clrpiobuf(dd, i);
+			if (test_bit(i, sbuf))
 				ipath_disarm_piobufs(dd, i, 1);
-			}
 		/* ignore armlaunch errs for a bit */
 		dd->ipath_lastcancel = jiffies+3;
 	}
@@ -164,7 +131,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
 {
 	u64 ignore_this_time = 0;
 
-	ipath_disarm_senderrbufs(dd, 0);
+	ipath_disarm_senderrbufs(dd);
 	if ((errs & E_SUM_LINK_PKTERRS) &&
 	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
 		/*
@@ -909,8 +876,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
  * processes (causing armlaunch), send errors due to going into freeze mode,
  * etc., and try to avoid causing extra interrupts while doing so.
  * Forcibly update the in-memory pioavail register copies after cleanup
- * because the chip won't do it for anything changing while in freeze mode
- * (we don't want to wait for the next pio buffer state change).
+ * because the chip won't do it while in freeze mode (the register values
+ * themselves are kept correct).
  * Make sure that we don't lose any important interrupts by using the chip
  * feature that says that writing 0 to a bit in *clear that is set in
  * *status will cause an interrupt to be generated again (if allowed by
@@ -918,48 +885,23 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
  */
 void ipath_clear_freeze(struct ipath_devdata *dd)
 {
-	int i, im;
-	u64 val;
-
 	/* disable error interrupts, to avoid confusion */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
 
 	/* also disable interrupts; errormask is sometimes overwriten */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
 
-	/*
-	 * clear all sends, because they have may been
-	 * completed by usercode while in freeze mode, and
-	 * therefore would not be sent, and eventually
-	 * might cause the process to run out of bufs
-	 */
 	ipath_cancel_sends(dd, 1);
+
+	/* clear the freeze, and be sure chip saw it */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
 			 dd->ipath_control);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 
-	/*
-	 * ensure pio avail updates continue (because the update
-	 * won't have happened from cancel_sends because we were
-	 * still in freeze
-	 */
+	/* force in-memory update now we are out of freeze */
 	ipath_force_pio_avail_update(dd);
 
 	/*
-	 * We just enabled pioavailupdate, so dma copy is almost certainly
-	 * not yet right, so read the registers directly.  Similar to init
-	 */
-	for (i = 0; i < dd->ipath_pioavregs; i++) {
-		/* deal with 6110 chip bug */
-		im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
-			i ^ 1 : i;
-		val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
-		dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val);
-		dd->ipath_pioavailshadow[i] = val |
-			(~dd->ipath_pioavailkernel[i] <<
-			INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
-	}
-
-	/*
 	 * force new interrupt if any hwerr, error or interrupt bits are
 	 * still set, and clear "safe" send packet errors related to freeze
 	 * and cancelling sends.  Re-enable error interrupts before possible
@@ -1316,10 +1258,8 @@ irqreturn_t ipath_intr(int irq, void *data)
 		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
 
-		if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
-			handle_layer_pioavail(dd);
-		else
-			ipath_dbg("unexpected BUFAVAIL intr\n");
+		/* always process; sdma verbs uses PIO for acks and VL15  */
+		handle_layer_pioavail(dd);
 	}
 
 	ret = IRQ_HANDLED;
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 202337a..02b24a3 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -117,6 +117,10 @@ struct ipath_portdata {
 	u16 port_subport_cnt;
 	/* non-zero if port is being shared. */
 	u16 port_subport_id;
+	/* number of pio bufs for this port (all procs, if shared) */
+	u32 port_piocnt;
+	/* first pio buffer for this port */
+	u32 port_pio_base;
 	/* chip offset of PIO buffers for this port */
 	u32 port_piobufs;
 	/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
@@ -384,6 +388,8 @@ struct ipath_devdata {
 	u32 ipath_lastrpkts;
 	/* pio bufs allocated per port */
 	u32 ipath_pbufsport;
+	/* if remainder on bufs/port, ports < extrabuf get 1 extra */
+	u32 ipath_ports_extrabuf;
 	u32 ipath_pioupd_thresh; /* update threshold, some chips */
 	/*
 	 * number of ports configured as max; zero is set to number chip
@@ -1011,7 +1017,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
 int ipath_update_eeprom_log(struct ipath_devdata *dd);
 void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
-void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
+void ipath_disarm_senderrbufs(struct ipath_devdata *);
 void ipath_force_pio_avail_update(struct ipath_devdata *);
 void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
 
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index 8ac5c1d..9e3fe61 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -481,9 +481,10 @@ done:
 		wake_up(&qp->wait);
 }
 
-static void want_buffer(struct ipath_devdata *dd)
+static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
 {
-	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) {
+	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
+		qp->ibqp.qp_type == IB_QPT_SMI) {
 		unsigned long flags;
 
 		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
@@ -519,7 +520,7 @@ static void ipath_no_bufs_available(struct ipath_qp *qp,
 	spin_lock_irqsave(&dev->pending_lock, flags);
 	list_add_tail(&qp->piowait, &dev->piowait);
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
-	want_buffer(dev->dd);
+	want_buffer(dev->dd, qp);
 	dev->n_piowait++;
 }
 
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 1974df7..0d07682 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -449,16 +449,19 @@ int setup_sdma(struct ipath_devdata *dd)
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
 			 dd->ipath_sdma_head_phys);
 
-	/* Reserve all the former "kernel" piobufs */
-	n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - dd->ipath_pioreserved;
-	for (i = dd->ipath_lastport_piobuf; i < n; ++i) {
+	/*
+	 * Reserve all the former "kernel" piobufs, using high number range
+	 * so we get as many 4K buffers as possible
+	 */
+	n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
+	i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
+	ipath_chg_pioavailkernel(dd, i, n - i , 0);
+	for (; i < n; ++i) {
 		unsigned word = i / 64;
 		unsigned bit = i & 63;
 		BUG_ON(word >= 3);
 		senddmabufmask[word] |= 1ULL << bit;
 	}
-	ipath_chg_pioavailkernel(dd, dd->ipath_lastport_piobuf,
-		n - dd->ipath_lastport_piobuf, 0);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
 			 senddmabufmask[0]);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,




More information about the general mailing list