[ofa-general] [PATCH 19/20] IB/ipath - add calls to new 7220 code and enable in build

Ralph Campbell ralph.campbell at qlogic.com
Wed Apr 2 15:50:38 PDT 2008


From: Dave Olson <dave.olson at qlogic.com>

This patch adds the initialization calls into the new 7220 HCA files,
changes the Makefile to compile and link the new files, and code
to handle send DMA.

Signed-off-by: Dave Olson <dave.olson at qlogic.com>
---

 drivers/infiniband/hw/ipath/Makefile          |    3 
 drivers/infiniband/hw/ipath/ipath_common.h    |   16 +
 drivers/infiniband/hw/ipath/ipath_driver.c    |  150 ++++++++--
 drivers/infiniband/hw/ipath/ipath_file_ops.c  |   97 ++++++
 drivers/infiniband/hw/ipath/ipath_init_chip.c |    4 
 drivers/infiniband/hw/ipath/ipath_intr.c      |  239 +++++++++++----
 drivers/infiniband/hw/ipath/ipath_kernel.h    |    4 
 drivers/infiniband/hw/ipath/ipath_qp.c        |   14 +
 drivers/infiniband/hw/ipath/ipath_ruc.c       |   18 +
 drivers/infiniband/hw/ipath/ipath_sdma.c      |   91 ++++--
 drivers/infiniband/hw/ipath/ipath_stats.c     |    4 
 drivers/infiniband/hw/ipath/ipath_ud.c        |    1 
 drivers/infiniband/hw/ipath/ipath_verbs.c     |  391 ++++++++++++++++++++++++-
 13 files changed, 896 insertions(+), 136 deletions(-)

diff --git a/drivers/infiniband/hw/ipath/Makefile b/drivers/infiniband/hw/ipath/Makefile
index fe67388..75a6c91 100644
--- a/drivers/infiniband/hw/ipath/Makefile
+++ b/drivers/infiniband/hw/ipath/Makefile
@@ -20,17 +20,20 @@ ib_ipath-y := \
 	ipath_qp.o \
 	ipath_rc.o \
 	ipath_ruc.o \
+	ipath_sdma.o \
 	ipath_srq.o \
 	ipath_stats.o \
 	ipath_sysfs.o \
 	ipath_uc.o \
 	ipath_ud.o \
 	ipath_user_pages.o \
+	ipath_user_sdma.o \
 	ipath_verbs_mcast.o \
 	ipath_verbs.o
 
 ib_ipath-$(CONFIG_HT_IRQ) += ipath_iba6110.o
 ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba6120.o
+ib_ipath-$(CONFIG_PCI_MSI) += ipath_iba7220.o ipath_sd7220.o ipath_sd7220_img.o
 
 ib_ipath-$(CONFIG_X86_64) += ipath_wc_x86_64.o
 ib_ipath-$(CONFIG_PPC64) += ipath_wc_ppc64.o
diff --git a/drivers/infiniband/hw/ipath/ipath_common.h b/drivers/infiniband/hw/ipath/ipath_common.h
index 02fd310..2cf7cd2 100644
--- a/drivers/infiniband/hw/ipath/ipath_common.h
+++ b/drivers/infiniband/hw/ipath/ipath_common.h
@@ -447,8 +447,9 @@ struct ipath_user_info {
 #define IPATH_CMD_PIOAVAILUPD	27	/* force an update of PIOAvail reg */
 #define IPATH_CMD_POLL_TYPE	28	/* set the kind of polling we want */
 #define IPATH_CMD_ARMLAUNCH_CTRL	29 /* armlaunch detection control */
-
-#define IPATH_CMD_MAX		29
+/* 30 is unused */
+#define IPATH_CMD_SDMA_INFLIGHT 31	/* sdma inflight counter request */
+#define IPATH_CMD_SDMA_COMPLETE 32	/* sdma completion counter request */
 
 /*
  * Poll types
@@ -486,6 +487,17 @@ struct ipath_cmd {
 	union {
 		struct ipath_tid_info tid_info;
 		struct ipath_user_info user_info;
+
+		/*
+		 * address in userspace where we should put the sdma
+		 * inflight counter
+		 */
+		__u64 sdma_inflight;
+		/*
+		 * address in userspace where we should put the sdma
+		 * completion counter
+		 */
+		__u64 sdma_complete;
 		/* address in userspace of struct ipath_port_info to
 		   write result to */
 		__u64 port_info;
diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
index 66982a9..8ccc915 100644
--- a/drivers/infiniband/hw/ipath/ipath_driver.c
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c
@@ -129,8 +129,10 @@ static int __devinit ipath_init_one(struct pci_dev *,
 
 /* Only needed for registration, nothing else needs this info */
 #define PCI_VENDOR_ID_PATHSCALE 0x1fc1
+#define PCI_VENDOR_ID_QLOGIC 0x1077
 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd
 #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
+#define PCI_DEVICE_ID_INFINIPATH_7220 0x7220
 
 /* Number of seconds before our card status check...  */
 #define STATUS_TIMEOUT 60
@@ -138,6 +140,7 @@ static int __devinit ipath_init_one(struct pci_dev *,
 static const struct pci_device_id ipath_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, PCI_DEVICE_ID_INFINIPATH_7220) },
 	{ 0, }
 };
 
@@ -532,6 +535,13 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 			      "CONFIG_PCI_MSI is not enabled\n", ent->device);
 		return -ENODEV;
 #endif
+	case PCI_DEVICE_ID_INFINIPATH_7220:
+#ifndef CONFIG_PCI_MSI
+		ipath_dbg("CONFIG_PCI_MSI is not enabled, "
+			  "using IntX for unit %u\n", dd->ipath_unit);
+#endif
+		ipath_init_iba7220_funcs(dd);
+		break;
 	default:
 		ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
 			      "failing\n", ent->device);
@@ -887,13 +897,47 @@ int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
 	return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
 }
 
+static void decode_sdma_errs(struct ipath_devdata *dd, ipath_err_t err,
+	char *buf, size_t blen)
+{
+	static const struct {
+		ipath_err_t err;
+		const char *msg;
+	} errs[] = {
+		{ INFINIPATH_E_SDMAGENMISMATCH, "SDmaGenMismatch" },
+		{ INFINIPATH_E_SDMAOUTOFBOUND, "SDmaOutOfBound" },
+		{ INFINIPATH_E_SDMATAILOUTOFBOUND, "SDmaTailOutOfBound" },
+		{ INFINIPATH_E_SDMABASE, "SDmaBase" },
+		{ INFINIPATH_E_SDMA1STDESC, "SDma1stDesc" },
+		{ INFINIPATH_E_SDMARPYTAG, "SDmaRpyTag" },
+		{ INFINIPATH_E_SDMADWEN, "SDmaDwEn" },
+		{ INFINIPATH_E_SDMAMISSINGDW, "SDmaMissingDw" },
+		{ INFINIPATH_E_SDMAUNEXPDATA, "SDmaUnexpData" },
+		{ INFINIPATH_E_SDMADESCADDRMISALIGN, "SDmaDescAddrMisalign" },
+		{ INFINIPATH_E_SENDBUFMISUSE, "SendBufMisuse" },
+		{ INFINIPATH_E_SDMADISABLED, "SDmaDisabled" },
+	};
+	int i;
+	int expected;
+	size_t bidx = 0;
+
+	for (i = 0; i < ARRAY_SIZE(errs); i++) {
+		expected = (errs[i].err != INFINIPATH_E_SDMADISABLED) ? 0 :
+			test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+		if ((err & errs[i].err) && !expected)
+			bidx += snprintf(buf + bidx, blen - bidx,
+					 "%s ", errs[i].msg);
+	}
+}
+
 /*
  * Decode the error status into strings, deciding whether to always
  * print * it or not depending on "normal packet errors" vs everything
  * else.   Return 1 if "real" errors, otherwise 0 if only packet
  * errors, so caller can decide what to print with the string.
  */
-int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
+int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
+	ipath_err_t err)
 {
 	int iserr = 1;
 	*buf = '\0';
@@ -975,6 +1019,8 @@ int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
 		strlcat(buf, "hardware ", blen);
 	if (err & INFINIPATH_E_RESET)
 		strlcat(buf, "reset ", blen);
+	if (err & INFINIPATH_E_SDMAERRS)
+		decode_sdma_errs(dd, err, buf, blen);
 	if (err & INFINIPATH_E_INVALIDEEPCMD)
 		strlcat(buf, "invalideepromcmd ", blen);
 done:
@@ -1730,30 +1776,80 @@ bail:
  */
 void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
 {
+	unsigned long flags;
+
 	if (dd->ipath_flags & IPATH_IB_AUTONEG_INPROG) {
 		ipath_cdbg(VERBOSE, "Ignore while in autonegotiation\n");
 		goto bail;
 	}
+	/*
+	 * If we have SDMA, and it's not disabled, we have to kick off the
+	 * abort state machine, provided we aren't already aborting.
+	 * If we are in the process of aborting SDMA (!DISABLED, but ABORTING),
+	 * we skip the rest of this routine. It is already "in progress"
+	 */
+	if (dd->ipath_flags & IPATH_HAS_SEND_DMA) {
+		int skip_cancel;
+		u64 *statp = &dd->ipath_sdma_status;
+
+		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+		skip_cancel =
+			!test_bit(IPATH_SDMA_DISABLED, statp) &&
+			test_and_set_bit(IPATH_SDMA_ABORTING, statp);
+		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+		if (skip_cancel)
+			goto bail;
+	}
+
 	ipath_dbg("Cancelling all in-progress send buffers\n");
 
 	/* skip armlaunch errs for a while */
 	dd->ipath_lastcancel = jiffies + HZ / 2;
 
 	/*
-	 * the abort bit is auto-clearing.  We read scratch to be sure
-	 * that cancels and the abort have taken effect in the chip.
+	 * The abort bit is auto-clearing.  We also don't want pioavail
+	 * update happening during this, and we don't want any other
+	 * sends going out, so turn those off for the duration.  We read
+	 * the scratch register to be sure that cancels and the abort
+	 * have taken effect in the chip.  Otherwise two parts are same
+	 * as ipath_force_pio_avail_update()
 	 */
+	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+	dd->ipath_sendctrl &= ~(INFINIPATH_S_PIOBUFAVAILUPD
+		| INFINIPATH_S_PIOENABLE);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-		INFINIPATH_S_ABORT);
+		dd->ipath_sendctrl | INFINIPATH_S_ABORT);
 	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+	/* disarm all send buffers */
 	ipath_disarm_piobufs(dd, 0,
-		(unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k));
-	if (restore_sendctrl) /* else done by caller later */
+		dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+
+	if (restore_sendctrl) {
+		/* else done by caller later if needed */
+		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+		dd->ipath_sendctrl |= INFINIPATH_S_PIOBUFAVAILUPD |
+			INFINIPATH_S_PIOENABLE;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-				 dd->ipath_sendctrl);
+			dd->ipath_sendctrl);
+		/* and again, be sure all have hit the chip */
+		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+	}
 
-	/* and again, be sure all have hit the chip */
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	if ((dd->ipath_flags & IPATH_HAS_SEND_DMA) &&
+	    !test_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status) &&
+	    test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)) {
+		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+		/* only wait so long for intr */
+		dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
+		dd->ipath_sdma_reset_wait = 200;
+		__set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+		if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+			tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+	}
 bail:;
 }
 
@@ -1952,7 +2048,7 @@ bail:
  * sanity checking on this, and we don't deal with what happens to
  * programs that are already running when the size changes.
  * NOTE: changing the MTU will usually cause the IBC to go back to
- * link initialize (IPATH_IBSTATE_INIT) state...
+ * link INIT state...
  */
 int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
 {
@@ -2092,9 +2188,8 @@ static void ipath_run_led_override(unsigned long opaque)
 	 * but leave that to per-chip functions.
 	 */
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
-	ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
-		  dd->ibcs_lts_mask;
-	lstate = (val >> dd->ibcs_ls_shift) & INFINIPATH_IBCS_LINKSTATE_MASK;
+	ltstate = ipath_ib_linktrstate(dd, val);
+	lstate = ipath_ib_linkstate(dd, val);
 
 	dd->ipath_f_setextled(dd, lstate, ltstate);
 	mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
@@ -2170,6 +2265,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
 			 dd->ipath_rcvctrl);
 
+	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+		teardown_sdma(dd);
+
 	/*
 	 * gracefully stop all sends allowing any in progress to trickle out
 	 * first.
@@ -2187,9 +2285,16 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
 	 */
 	udelay(5);
 
+	dd->ipath_f_setextled(dd, 0, 0); /* make sure LEDs are off */
+
 	ipath_set_ib_lstate(dd, 0, INFINIPATH_IBCC_LINKINITCMD_DISABLE);
 	ipath_cancel_sends(dd, 0);
 
+	/*
+	 * we are shutting down, so tell components that care.  We don't do
+	 * this on just a link state change, much like ethernet, a cable
+	 * unplug, etc. doesn't change driver state
+	 */
 	signal_ib_event(dd, IB_EVENT_PORT_ERR);
 
 	/* disable IBC */
@@ -2214,6 +2319,10 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
 		del_timer_sync(&dd->ipath_intrchk_timer);
 		dd->ipath_intrchk_timer.data = 0;
 	}
+	if (atomic_read(&dd->ipath_led_override_timer_active)) {
+		del_timer_sync(&dd->ipath_led_override_timer);
+		atomic_set(&dd->ipath_led_override_timer_active, 0);
+	}
 
 	/*
 	 * clear all interrupts and errors, so that the next time the driver
@@ -2408,13 +2517,18 @@ int ipath_reset_device(int unit)
 			}
 		}
 
+	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+		teardown_sdma(dd);
+
 	dd->ipath_flags &= ~IPATH_INITTED;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
 	ret = dd->ipath_f_reset(dd);
-	if (ret != 1)
-		ipath_dbg("reset was not successful\n");
-	ipath_dbg("Trying to reinitialize unit %u after reset attempt\n",
-		  unit);
-	ret = ipath_init_chip(dd, 1);
+	if (ret == 1) {
+		ipath_dbg("Reinitializing unit %u after reset attempt\n",
+			  unit);
+		ret = ipath_init_chip(dd, 1);
+	} else
+		ret = -EAGAIN;
 	if (ret)
 		ipath_dev_err(dd, "Reinitialize unit %u after "
 			      "reset failed with %d\n", unit, ret);
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index b87d312..d38ba29 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -36,21 +36,28 @@
 #include <linux/cdev.h>
 #include <linux/swap.h>
 #include <linux/vmalloc.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/jiffies.h>
 #include <asm/pgtable.h>
 
 #include "ipath_kernel.h"
 #include "ipath_common.h"
+#include "ipath_user_sdma.h"
 
 static int ipath_open(struct inode *, struct file *);
 static int ipath_close(struct inode *, struct file *);
 static ssize_t ipath_write(struct file *, const char __user *, size_t,
 			   loff_t *);
+static ssize_t ipath_writev(struct kiocb *, const struct iovec *,
+			    unsigned long , loff_t);
 static unsigned int ipath_poll(struct file *, struct poll_table_struct *);
 static int ipath_mmap(struct file *, struct vm_area_struct *);
 
 static const struct file_operations ipath_file_ops = {
 	.owner = THIS_MODULE,
 	.write = ipath_write,
+	.aio_write = ipath_writev,
 	.open = ipath_open,
 	.release = ipath_close,
 	.poll = ipath_poll,
@@ -1870,10 +1877,9 @@ static int ipath_assign_port(struct file *fp,
 	if (ipath_compatible_subports(swmajor, swminor) &&
 	    uinfo->spu_subport_cnt &&
 	    (ret = find_shared_port(fp, uinfo))) {
-		mutex_unlock(&ipath_mutex);
 		if (ret > 0)
 			ret = 0;
-		goto done;
+		goto done_chk_sdma;
 	}
 
 	i_minor = iminor(fp->f_path.dentry->d_inode) - IPATH_USER_MINOR_BASE;
@@ -1885,6 +1891,21 @@ static int ipath_assign_port(struct file *fp,
 	else
 		ret = find_best_unit(fp, uinfo);
 
+done_chk_sdma:
+	if (!ret) {
+		struct ipath_filedata *fd = fp->private_data;
+		const struct ipath_portdata *pd = fd->pd;
+		const struct ipath_devdata *dd = pd->port_dd;
+
+		fd->pq = ipath_user_sdma_queue_create(&dd->pcidev->dev,
+						      dd->ipath_unit,
+						      pd->port_port,
+						      fd->subport);
+
+		if (!fd->pq)
+			ret = -ENOMEM;
+	}
+
 	mutex_unlock(&ipath_mutex);
 
 done:
@@ -2042,6 +2063,13 @@ static int ipath_close(struct inode *in, struct file *fp)
 		mutex_unlock(&ipath_mutex);
 		goto bail;
 	}
+
+	dd = pd->port_dd;
+
+	/* drain user sdma queue */
+	ipath_user_sdma_queue_drain(dd, fd->pq);
+	ipath_user_sdma_queue_destroy(fd->pq);
+
 	if (--pd->port_cnt) {
 		/*
 		 * XXX If the master closes the port before the slave(s),
@@ -2054,7 +2082,6 @@ static int ipath_close(struct inode *in, struct file *fp)
 		goto bail;
 	}
 	port = pd->port_port;
-	dd = pd->port_dd;
 
 	if (pd->port_hdrqfull) {
 		ipath_cdbg(PROC, "%s[%u] had %u rcvhdrqfull errors "
@@ -2176,6 +2203,35 @@ static int ipath_get_slave_info(struct ipath_portdata *pd,
 	return ret;
 }
 
+static int ipath_sdma_get_inflight(struct ipath_user_sdma_queue *pq,
+				   u32 __user *inflightp)
+{
+	const u32 val = ipath_user_sdma_inflight_counter(pq);
+
+	if (put_user(val, inflightp))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int ipath_sdma_get_complete(struct ipath_devdata *dd,
+				   struct ipath_user_sdma_queue *pq,
+				   u32 __user *completep)
+{
+	u32 val;
+	int err;
+
+	err = ipath_user_sdma_make_progress(dd, pq);
+	if (err < 0)
+		return err;
+
+	val = ipath_user_sdma_complete_counter(pq);
+	if (put_user(val, completep))
+		return -EFAULT;
+
+	return 0;
+}
+
 static ssize_t ipath_write(struct file *fp, const char __user *data,
 			   size_t count, loff_t *off)
 {
@@ -2250,6 +2306,16 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
 		dest = &cmd.cmd.armlaunch_ctrl;
 		src = &ucmd->cmd.armlaunch_ctrl;
 		break;
+	case IPATH_CMD_SDMA_INFLIGHT:
+		copy = sizeof(cmd.cmd.sdma_inflight);
+		dest = &cmd.cmd.sdma_inflight;
+		src = &ucmd->cmd.sdma_inflight;
+		break;
+	case IPATH_CMD_SDMA_COMPLETE:
+		copy = sizeof(cmd.cmd.sdma_complete);
+		dest = &cmd.cmd.sdma_complete;
+		src = &ucmd->cmd.sdma_complete;
+		break;
 	default:
 		ret = -EINVAL;
 		goto bail;
@@ -2331,6 +2397,17 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
 		else
 			ipath_disable_armlaunch(pd->port_dd);
 		break;
+	case IPATH_CMD_SDMA_INFLIGHT:
+		ret = ipath_sdma_get_inflight(user_sdma_queue_fp(fp),
+					      (u32 __user *) (unsigned long)
+					      cmd.cmd.sdma_inflight);
+		break;
+	case IPATH_CMD_SDMA_COMPLETE:
+		ret = ipath_sdma_get_complete(pd->port_dd,
+					      user_sdma_queue_fp(fp),
+					      (u32 __user *) (unsigned long)
+					      cmd.cmd.sdma_complete);
+		break;
 	}
 
 	if (ret >= 0)
@@ -2340,6 +2417,20 @@ bail:
 	return ret;
 }
 
+static ssize_t ipath_writev(struct kiocb *iocb, const struct iovec *iov,
+			    unsigned long dim, loff_t off)
+{
+	struct file *filp = iocb->ki_filp;
+	struct ipath_filedata *fp = filp->private_data;
+	struct ipath_portdata *pd = port_fp(filp);
+	struct ipath_user_sdma_queue *pq = fp->pq;
+
+	if (!dim)
+		return -EINVAL;
+
+	return ipath_user_sdma_writev(pd->port_dd, pq, iov, dim);
+}
+
 static struct class *ipath_class;
 
 static int init_cdev(int minor, char *name, const struct file_operations *fops,
diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
index c012e05..b43c2a1 100644
--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
+++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
@@ -980,6 +980,10 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 		dd->ipath_stats_timer_active = 1;
 	}
 
+	/* Set up SendDMA if chip supports it */
+	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+		ret = setup_sdma(dd);
+
 	/* Set up HoL state */
 	init_timer(&dd->ipath_hol_timer);
 	dd->ipath_hol_timer.function = ipath_hol_event;
diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
index 90b972f..d0088d5 100644
--- a/drivers/infiniband/hw/ipath/ipath_intr.c
+++ b/drivers/infiniband/hw/ipath/ipath_intr.c
@@ -433,6 +433,8 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 			dd->ipath_flags &= ~(IPATH_LINKUNK | IPATH_LINKINIT
 				| IPATH_LINKDOWN | IPATH_LINKARMED |
 				IPATH_NOCABLE);
+			if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+				ipath_restart_sdma(dd);
 			signal_ib_event(dd, IB_EVENT_PORT_ACTIVE);
 			/* LED active not handled in chip _f_updown */
 			dd->ipath_f_setextled(dd, lstate, ltstate);
@@ -480,7 +482,7 @@ done:
 }
 
 static void handle_supp_msgs(struct ipath_devdata *dd,
-			     unsigned supp_msgs, char *msg, int msgsz)
+			     unsigned supp_msgs, char *msg, u32 msgsz)
 {
 	/*
 	 * Print the message unless it's ibc status change only, which
@@ -488,12 +490,19 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
 	 */
 	if (dd->ipath_lasterror & ~INFINIPATH_E_IBSTATUSCHANGED) {
 		int iserr;
-		iserr = ipath_decode_err(msg, msgsz,
+		ipath_err_t mask;
+		iserr = ipath_decode_err(dd, msg, msgsz,
 					 dd->ipath_lasterror &
 					 ~INFINIPATH_E_IBSTATUSCHANGED);
-		if (dd->ipath_lasterror &
-			~(INFINIPATH_E_RRCVEGRFULL |
-			INFINIPATH_E_RRCVHDRFULL | INFINIPATH_E_PKTERRS))
+
+		mask = INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+			INFINIPATH_E_PKTERRS | INFINIPATH_E_SDMADISABLED;
+
+		/* if we're in debug, then don't mask SDMADISABLED msgs */
+		if (ipath_debug & __IPATH_DBG)
+			mask &= ~INFINIPATH_E_SDMADISABLED;
+
+		if (dd->ipath_lasterror & ~mask)
 			ipath_dev_err(dd, "Suppressed %u messages for "
 				      "fast-repeating errors (%s) (%llx)\n",
 				      supp_msgs, msg,
@@ -520,7 +529,7 @@ static void handle_supp_msgs(struct ipath_devdata *dd,
 
 static unsigned handle_frequent_errors(struct ipath_devdata *dd,
 				       ipath_err_t errs, char *msg,
-				       int msgsz, int *noprint)
+				       u32 msgsz, int *noprint)
 {
 	unsigned long nc;
 	static unsigned long nextmsg_time;
@@ -550,19 +559,125 @@ static unsigned handle_frequent_errors(struct ipath_devdata *dd,
 	return supp_msgs;
 }
 
+static void handle_sdma_errors(struct ipath_devdata *dd, ipath_err_t errs)
+{
+	unsigned long flags;
+	int expected;
+
+	if (ipath_debug & __IPATH_DBG) {
+		char msg[128];
+		ipath_decode_err(dd, msg, sizeof msg, errs &
+			INFINIPATH_E_SDMAERRS);
+		ipath_dbg("errors %lx (%s)\n", (unsigned long)errs, msg);
+	}
+	if (ipath_debug & __IPATH_VERBDBG) {
+		unsigned long tl, hd, status, lengen;
+		tl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmatail);
+		hd = ipath_read_kreg64(dd, dd->ipath_kregs->kr_senddmahead);
+		status = ipath_read_kreg64(dd
+			, dd->ipath_kregs->kr_senddmastatus);
+		lengen = ipath_read_kreg64(dd,
+			dd->ipath_kregs->kr_senddmalengen);
+		ipath_cdbg(VERBOSE, "sdma tl 0x%lx hd 0x%lx status 0x%lx "
+			"lengen 0x%lx\n", tl, hd, status, lengen);
+	}
+
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+	__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+	expected = test_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+	if (!expected)
+		ipath_cancel_sends(dd, 1);
+}
+
+static void handle_sdma_intr(struct ipath_devdata *dd, u64 istat)
+{
+	unsigned long flags;
+	int expected;
+
+	if ((istat & INFINIPATH_I_SDMAINT) &&
+	    !test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+		ipath_sdma_intr(dd);
+
+	if (istat & INFINIPATH_I_SDMADISABLED) {
+		expected = test_bit(IPATH_SDMA_ABORTING,
+			&dd->ipath_sdma_status);
+		ipath_dbg("%s SDmaDisabled intr\n",
+			expected ? "expected" : "unexpected");
+		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+		__set_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+		if (!expected)
+			ipath_cancel_sends(dd, 1);
+		if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+			tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+	}
+}
+
+static int handle_hdrq_full(struct ipath_devdata *dd)
+{
+	int chkerrpkts = 0;
+	u32 hd, tl;
+	u32 i;
+
+	ipath_stats.sps_hdrqfull++;
+	for (i = 0; i < dd->ipath_cfgports; i++) {
+		struct ipath_portdata *pd = dd->ipath_pd[i];
+
+		if (i == 0) {
+			/*
+			 * For kernel receive queues, we just want to know
+			 * if there are packets in the queue that we can
+			 * process.
+			 */
+			if (pd->port_head != ipath_get_hdrqtail(pd))
+				chkerrpkts |= 1 << i;
+			continue;
+		}
+
+		/* Skip if user context is not open */
+		if (!pd || !pd->port_cnt)
+			continue;
+
+		/* Don't report the same point multiple times. */
+		if (dd->ipath_flags & IPATH_NODMA_RTAIL)
+			tl = ipath_read_ureg32(dd, ur_rcvhdrtail, i);
+		else
+			tl = ipath_get_rcvhdrtail(pd);
+		if (tl == pd->port_lastrcvhdrqtail)
+			continue;
+
+		hd = ipath_read_ureg32(dd, ur_rcvhdrhead, i);
+		if (hd == (tl + 1) || (!hd && tl == dd->ipath_hdrqlast)) {
+			pd->port_lastrcvhdrqtail = tl;
+			pd->port_hdrqfull++;
+			/* flush hdrqfull so that poll() sees it */
+			wmb();
+			wake_up_interruptible(&pd->port_wait);
+		}
+	}
+
+	return chkerrpkts;
+}
+
 static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 {
 	char msg[128];
 	u64 ignore_this_time = 0;
-	int i, iserr = 0;
+	u64 iserr = 0;
 	int chkerrpkts = 0, noprint = 0;
 	unsigned supp_msgs;
 	int log_idx;
 
-	supp_msgs = handle_frequent_errors(dd, errs, msg, sizeof msg, &noprint);
+	/*
+	 * don't report errors that are masked, either at init
+	 * (not set in ipath_errormask), or temporarily (set in
+	 * ipath_maskederrs)
+	 */
+	errs &= dd->ipath_errormask & ~dd->ipath_maskederrs;
 
-	/* don't report errors that are masked */
-	errs &= ~dd->ipath_maskederrs;
+	supp_msgs = handle_frequent_errors(dd, errs, msg, (u32)sizeof msg,
+		&noprint);
 
 	/* do these first, they are most important */
 	if (errs & INFINIPATH_E_HARDWARE) {
@@ -577,6 +692,9 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 		}
 	}
 
+	if (errs & INFINIPATH_E_SDMAERRS)
+		handle_sdma_errors(dd, errs);
+
 	if (!noprint && (errs & ~dd->ipath_e_bitsextant))
 		ipath_dev_err(dd, "error interrupt with unknown errors "
 			      "%llx set\n", (unsigned long long)
@@ -611,7 +729,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 		dd->ipath_errormask &= ~dd->ipath_maskederrs;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
 				 dd->ipath_errormask);
-		s_iserr = ipath_decode_err(msg, sizeof msg,
+		s_iserr = ipath_decode_err(dd, msg, sizeof msg,
 					   dd->ipath_maskederrs);
 
 		if (dd->ipath_maskederrs &
@@ -661,26 +779,43 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 			  INFINIPATH_E_IBSTATUSCHANGED);
 	}
 
-	/* likely due to cancel, so suppress */
+	if (errs & INFINIPATH_E_SENDSPECIALTRIGGER) {
+		dd->ipath_spectriggerhit++;
+		ipath_dbg("%lu special trigger hits\n",
+			dd->ipath_spectriggerhit);
+	}
+
+	/* likely due to cancel; so suppress message unless verbose */
 	if ((errs & (INFINIPATH_E_SPKTLEN | INFINIPATH_E_SPIOARMLAUNCH)) &&
 		dd->ipath_lastcancel > jiffies) {
-		ipath_dbg("Suppressed armlaunch/spktlen after error send cancel\n");
+		/* armlaunch takes precedence; it often causes both. */
+		ipath_cdbg(VERBOSE,
+			"Suppressed %s error (%llx) after sendbuf cancel\n",
+			(errs &  INFINIPATH_E_SPIOARMLAUNCH) ?
+			"armlaunch" : "sendpktlen", (unsigned long long)errs);
 		errs &= ~(INFINIPATH_E_SPIOARMLAUNCH | INFINIPATH_E_SPKTLEN);
 	}
 
 	if (!errs)
 		return 0;
 
-	if (!noprint)
+	if (!noprint) {
+		ipath_err_t mask;
 		/*
-		 * the ones we mask off are handled specially below or above
+		 * The ones we mask off are handled specially below
+		 * or above.  Also mask SDMADISABLED by default as it
+		 * is too chatty.
 		 */
-		ipath_decode_err(msg, sizeof msg,
-				 errs & ~(INFINIPATH_E_IBSTATUSCHANGED |
-					  INFINIPATH_E_RRCVEGRFULL |
-					  INFINIPATH_E_RRCVHDRFULL |
-					  INFINIPATH_E_HARDWARE));
-	else
+		mask = INFINIPATH_E_IBSTATUSCHANGED |
+			INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
+			INFINIPATH_E_HARDWARE | INFINIPATH_E_SDMADISABLED;
+
+		/* if we're in debug, then don't mask SDMADISABLED msgs */
+		if (ipath_debug & __IPATH_DBG)
+			mask &= ~INFINIPATH_E_SDMADISABLED;
+
+		ipath_decode_err(dd, msg, sizeof msg, errs & ~mask);
+	} else
 		/* so we don't need if (!noprint) at strlcat's below */
 		*msg = 0;
 
@@ -705,39 +840,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 	 * fast_stats, no more than every 5 seconds, user ports get printed
 	 * on close
 	 */
-	if (errs & INFINIPATH_E_RRCVHDRFULL) {
-		u32 hd, tl;
-		ipath_stats.sps_hdrqfull++;
-		for (i = 0; i < dd->ipath_cfgports; i++) {
-			struct ipath_portdata *pd = dd->ipath_pd[i];
-			if (i == 0) {
-				hd = pd->port_head;
-				tl = ipath_get_hdrqtail(pd);
-			} else if (pd && pd->port_cnt &&
-				   pd->port_rcvhdrtail_kvaddr) {
-				/*
-				 * don't report same point multiple times,
-				 * except kernel
-				 */
-				tl = *(u64 *) pd->port_rcvhdrtail_kvaddr;
-				if (tl == pd->port_lastrcvhdrqtail)
-					continue;
-				hd = ipath_read_ureg32(dd, ur_rcvhdrhead,
-						       i);
-			} else
-				continue;
-			if (hd == (tl + 1) ||
-			    (!hd && tl == dd->ipath_hdrqlast)) {
-				if (i == 0)
-					chkerrpkts = 1;
-				pd->port_lastrcvhdrqtail = tl;
-				pd->port_hdrqfull++;
-				/* flush hdrqfull so that poll() sees it */
-				wmb();
-				wake_up_interruptible(&pd->port_wait);
-			}
-		}
-	}
+	if (errs & INFINIPATH_E_RRCVHDRFULL)
+		chkerrpkts |= handle_hdrq_full(dd);
 	if (errs & INFINIPATH_E_RRCVEGRFULL) {
 		struct ipath_portdata *pd = dd->ipath_pd[0];
 
@@ -749,7 +853,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 		 */
 		ipath_stats.sps_etidfull++;
 		if (pd->port_head != ipath_get_hdrqtail(pd))
-			chkerrpkts = 1;
+			chkerrpkts |= 1;
 	}
 
 	/*
@@ -788,9 +892,6 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 	if (!noprint && *msg) {
 		if (iserr)
 			ipath_dev_err(dd, "%s error\n", msg);
-		else
-			dev_info(&dd->pcidev->dev, "%s packet problems\n",
-				msg);
 	}
 	if (dd->ipath_state_wanted & dd->ipath_flags) {
 		ipath_cdbg(VERBOSE, "driver wanted state %x, iflags now %x, "
@@ -1017,7 +1118,7 @@ static void handle_urcv(struct ipath_devdata *dd, u64 istat)
 irqreturn_t ipath_intr(int irq, void *data)
 {
 	struct ipath_devdata *dd = data;
-	u32 istat, chk0rcv = 0;
+	u64 istat, chk0rcv = 0;
 	ipath_err_t estat = 0;
 	irqreturn_t ret;
 	static unsigned unexpected = 0;
@@ -1070,17 +1171,17 @@ irqreturn_t ipath_intr(int irq, void *data)
 
 	if (unlikely(istat & ~dd->ipath_i_bitsextant))
 		ipath_dev_err(dd,
-			      "interrupt with unknown interrupts %x set\n",
-			      istat & (u32) ~ dd->ipath_i_bitsextant);
-	else
-		ipath_cdbg(VERBOSE, "intr stat=0x%x\n", istat);
+			      "interrupt with unknown interrupts %Lx set\n",
+			      istat & ~dd->ipath_i_bitsextant);
+	else if (istat & ~INFINIPATH_I_ERROR) /* errors do own printing */
+		ipath_cdbg(VERBOSE, "intr stat=0x%Lx\n", istat);
 
-	if (unlikely(istat & INFINIPATH_I_ERROR)) {
+	if (istat & INFINIPATH_I_ERROR) {
 		ipath_stats.sps_errints++;
 		estat = ipath_read_kreg64(dd,
 					  dd->ipath_kregs->kr_errorstatus);
 		if (!estat)
-			dev_info(&dd->pcidev->dev, "error interrupt (%x), "
+			dev_info(&dd->pcidev->dev, "error interrupt (%Lx), "
 				 "but no error bits set!\n", istat);
 		else if (estat == -1LL)
 			/*
@@ -1198,6 +1299,9 @@ irqreturn_t ipath_intr(int irq, void *data)
 		     (dd->ipath_i_rcvurg_mask << dd->ipath_i_rcvurg_shift)))
 		handle_urcv(dd, istat);
 
+	if (istat & (INFINIPATH_I_SDMAINT | INFINIPATH_I_SDMADISABLED))
+		handle_sdma_intr(dd, istat);
+
 	if (istat & INFINIPATH_I_SPIOBUFAVAIL) {
 		unsigned long flags;
 
@@ -1208,7 +1312,10 @@ irqreturn_t ipath_intr(int irq, void *data)
 		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
 
-		handle_layer_pioavail(dd);
+		if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
+			handle_layer_pioavail(dd);
+		else
+			ipath_dbg("unexpected BUFAVAIL intr\n");
 	}
 
 	ret = IRQ_HANDLED;
diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
index 1d5adf6..a4857b9 100644
--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
+++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
@@ -872,7 +872,8 @@ struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd, gfp_t);
 extern int ipath_diag_inuse;
 
 irqreturn_t ipath_intr(int irq, void *devid);
-int ipath_decode_err(char *buf, size_t blen, ipath_err_t err);
+int ipath_decode_err(struct ipath_devdata *dd, char *buf, size_t blen,
+		     ipath_err_t err);
 #if __IPATH_INFO || __IPATH_DBG
 extern const char *ipath_ibcstatus_str[];
 #endif
@@ -1027,6 +1028,7 @@ void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
 /* send dma routines */
 int setup_sdma(struct ipath_devdata *);
 void teardown_sdma(struct ipath_devdata *);
+void ipath_restart_sdma(struct ipath_devdata *);
 void ipath_sdma_intr(struct ipath_devdata *);
 int ipath_sdma_verbs_send(struct ipath_devdata *, struct ipath_sge_state *,
 			  u32, struct ipath_verbs_txreq *);
diff --git a/drivers/infiniband/hw/ipath/ipath_qp.c b/drivers/infiniband/hw/ipath/ipath_qp.c
index 812b42c..ded970b 100644
--- a/drivers/infiniband/hw/ipath/ipath_qp.c
+++ b/drivers/infiniband/hw/ipath/ipath_qp.c
@@ -340,6 +340,7 @@ static void ipath_reset_qp(struct ipath_qp *qp, enum ib_qp_type type)
 	qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
 	qp->s_hdrwords = 0;
 	qp->s_wqe = NULL;
+	qp->s_pkt_delay = 0;
 	qp->s_psn = 0;
 	qp->r_psn = 0;
 	qp->r_msn = 0;
@@ -563,8 +564,10 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	if (attr_mask & IB_QP_ACCESS_FLAGS)
 		qp->qp_access_flags = attr->qp_access_flags;
 
-	if (attr_mask & IB_QP_AV)
+	if (attr_mask & IB_QP_AV) {
 		qp->remote_ah_attr = attr->ah_attr;
+		qp->s_dmult = ipath_ib_rate_to_mult(attr->ah_attr.static_rate);
+	}
 
 	if (attr_mask & IB_QP_PATH_MTU)
 		qp->path_mtu = attr->path_mtu;
@@ -850,6 +853,7 @@ struct ib_qp *ipath_create_qp(struct ib_pd *ibpd,
 			goto bail_qp;
 		}
 		qp->ip = NULL;
+		qp->s_tx = NULL;
 		ipath_reset_qp(qp, init_attr->qp_type);
 		break;
 
@@ -955,12 +959,20 @@ int ipath_destroy_qp(struct ib_qp *ibqp)
 	/* Stop the sending tasklet. */
 	tasklet_kill(&qp->s_task);
 
+	if (qp->s_tx) {
+		atomic_dec(&qp->refcount);
+		if (qp->s_tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
+			kfree(qp->s_tx->txreq.map_addr);
+	}
+
 	/* Make sure the QP isn't on the timeout list. */
 	spin_lock_irqsave(&dev->pending_lock, flags);
 	if (!list_empty(&qp->timerwait))
 		list_del_init(&qp->timerwait);
 	if (!list_empty(&qp->piowait))
 		list_del_init(&qp->piowait);
+	if (qp->s_tx)
+		list_add(&qp->s_tx->txreq.list, &dev->txreq_free);
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 
 	/*
diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
index a59bdbd..bcaa291 100644
--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
+++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
@@ -483,14 +483,16 @@ done:
 
 static void want_buffer(struct ipath_devdata *dd)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 dd->ipath_sendctrl);
-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+		dd->ipath_sendctrl |= INFINIPATH_S_PIOINTBUFAVAIL;
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+				 dd->ipath_sendctrl);
+		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+	}
 }
 
 /**
diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
index 5918caf..1974df7 100644
--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
+++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
@@ -230,7 +230,6 @@ static void dump_sdma_state(struct ipath_devdata *dd)
 static void sdma_abort_task(unsigned long opaque)
 {
 	struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
-	int kick = 0;
 	u64 status;
 	unsigned long flags;
 
@@ -308,30 +307,26 @@ static void sdma_abort_task(unsigned long opaque)
 		/* done with sdma state for a bit */
 		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
 
-		/* restart sdma engine */
+		/*
+		 * Don't restart sdma here. Wait until link is up to ACTIVE.
+		 * VL15 MADs used to bring the link up use PIO, and multiple
+		 * link transitions otherwise cause the sdma engine to be
+		 * stopped and started multiple times.
+		 * The disable is done here, including the shadow, so the
+		 * state is kept consistent.
+		 * See ipath_restart_sdma() for the actual starting of sdma.
+		 */
 		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
 		dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 				 dd->ipath_sendctrl);
 		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
-		dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-				 dd->ipath_sendctrl);
-		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
-		kick = 1;
-		ipath_dbg("sdma restarted from abort\n");
-
-		/* now clear status bits */
-		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
-		__clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
-		__clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
-		__clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
 
 		/* make sure I see next message */
 		dd->ipath_sdma_abort_jiffies = 0;
 
-		goto unlock;
+		goto done;
 	}
 
 resched:
@@ -353,10 +348,8 @@ resched_noprint:
 
 unlock:
 	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
-
-	/* kick upper layers */
-	if (kick)
-		ipath_ib_piobufavail(dd->verbs_dev);
+done:
+	return;
 }
 
 /*
@@ -481,10 +474,14 @@ int setup_sdma(struct ipath_devdata *dd)
 	tasklet_init(&dd->ipath_sdma_abort_task, sdma_abort_task,
 		     (unsigned long) dd);
 
-	/* Turn on SDMA */
+	/*
+	 * No use to turn on SDMA here, as link is probably not ACTIVE
+	 * Just mark it RUNNING and enable the interrupt, and let the
+	 * ipath_restart_sdma() on link transition to ACTIVE actually
+	 * enable it.
+	 */
 	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
-	dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE |
-		INFINIPATH_S_SDMAINTENABLE;
+	dd->ipath_sendctrl |= INFINIPATH_S_SDMAINTENABLE;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
 	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 	__set_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status);
@@ -572,6 +569,56 @@ void teardown_sdma(struct ipath_devdata *dd)
 				  sdma_descq, sdma_descq_phys);
 }
 
+/*
+ * [Re]start SDMA, if we use it, and it's not already OK.
+ * This is called on transition to link ACTIVE, either the first or
+ * subsequent times.
+ */
+void ipath_restart_sdma(struct ipath_devdata *dd)
+{
+	unsigned long flags;
+	int needed = 1;
+
+	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
+		goto bail;
+
+	/*
+	 * First, make sure we should, which is to say,
+	 * check that we are "RUNNING" (not in teardown)
+	 * and not "SHUTDOWN"
+	 */
+	spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+	if (!test_bit(IPATH_SDMA_RUNNING, &dd->ipath_sdma_status)
+		|| test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+			needed = 0;
+	else {
+		__clear_bit(IPATH_SDMA_DISABLED, &dd->ipath_sdma_status);
+		__clear_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+		__clear_bit(IPATH_SDMA_ABORTING, &dd->ipath_sdma_status);
+	}
+	spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+	if (!needed) {
+		ipath_dbg("invalid attempt to restart SDMA, status 0x%016llx\n",
+			dd->ipath_sdma_status);
+		goto bail;
+	}
+	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+	/*
+	 * First clear, just to be safe. Enable is only done
+	 * in chip on 0->1 transition
+	 */
+	dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	dd->ipath_sendctrl |= INFINIPATH_S_SDMAENABLE;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+
+bail:
+	return;
+}
+
 static inline void make_sdma_desc(struct ipath_devdata *dd,
 	u64 *sdmadesc, u64 addr, u64 dwlen, u64 dwoffset)
 {
diff --git a/drivers/infiniband/hw/ipath/ipath_stats.c b/drivers/infiniband/hw/ipath/ipath_stats.c
index adff2f1..1e36bac 100644
--- a/drivers/infiniband/hw/ipath/ipath_stats.c
+++ b/drivers/infiniband/hw/ipath/ipath_stats.c
@@ -292,8 +292,8 @@ void ipath_get_faststats(unsigned long opaque)
 	    && time_after(jiffies, dd->ipath_unmasktime)) {
 		char ebuf[256];
 		int iserr;
-		iserr = ipath_decode_err(ebuf, sizeof ebuf,
-			dd->ipath_maskederrs);
+		iserr = ipath_decode_err(dd, ebuf, sizeof ebuf,
+					 dd->ipath_maskederrs);
 		if (dd->ipath_maskederrs &
 		    ~(INFINIPATH_E_RRCVEGRFULL | INFINIPATH_E_RRCVHDRFULL |
 		      INFINIPATH_E_PKTERRS))
diff --git a/drivers/infiniband/hw/ipath/ipath_ud.c b/drivers/infiniband/hw/ipath/ipath_ud.c
index de67eed..4d4d58d 100644
--- a/drivers/infiniband/hw/ipath/ipath_ud.c
+++ b/drivers/infiniband/hw/ipath/ipath_ud.c
@@ -303,6 +303,7 @@ int ipath_make_ud_req(struct ipath_qp *qp)
 	qp->s_hdrwords = 7;
 	qp->s_cur_size = wqe->length;
 	qp->s_cur_sge = &qp->s_sge;
+	qp->s_dmult = ah_attr->static_rate;
 	qp->s_wqe = wqe;
 	qp->s_sge.sge = wqe->sg_list[0];
 	qp->s_sge.sg_list = wqe->sg_list + 1;
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 2e6b6f6..434a0d8 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -242,6 +242,93 @@ static void ipath_flush_wqe(struct ipath_qp *qp, struct ib_send_wr *wr)
 	ipath_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 1);
 }
 
+/*
+ * Count the number of DMA descriptors needed to send length bytes of data.
+ * Don't modify the ipath_sge_state to get the count.
+ * Return zero if any of the segments is not aligned.
+ */
+static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
+{
+	struct ipath_sge *sg_list = ss->sg_list;
+	struct ipath_sge sge = ss->sge;
+	u8 num_sge = ss->num_sge;
+	u32 ndesc = 1;	/* count the header */
+
+	while (length) {
+		u32 len = sge.length;
+
+		if (len > length)
+			len = length;
+		if (len > sge.sge_length)
+			len = sge.sge_length;
+		BUG_ON(len == 0);
+		if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
+		    (len != length && (len & (sizeof(u32) - 1)))) {
+			ndesc = 0;
+			break;
+		}
+		ndesc++;
+		sge.vaddr += len;
+		sge.length -= len;
+		sge.sge_length -= len;
+		if (sge.sge_length == 0) {
+			if (--num_sge)
+				sge = *sg_list++;
+		} else if (sge.length == 0 && sge.mr != NULL) {
+			if (++sge.n >= IPATH_SEGSZ) {
+				if (++sge.m >= sge.mr->mapsz)
+					break;
+				sge.n = 0;
+			}
+			sge.vaddr =
+				sge.mr->map[sge.m]->segs[sge.n].vaddr;
+			sge.length =
+				sge.mr->map[sge.m]->segs[sge.n].length;
+		}
+		length -= len;
+	}
+	return ndesc;
+}
+
+/*
+ * Copy from the SGEs to the data buffer.
+ */
+static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
+				u32 length)
+{
+	struct ipath_sge *sge = &ss->sge;
+
+	while (length) {
+		u32 len = sge->length;
+
+		if (len > length)
+			len = length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		BUG_ON(len == 0);
+		memcpy(data, sge->vaddr, len);
+		sge->vaddr += len;
+		sge->length -= len;
+		sge->sge_length -= len;
+		if (sge->sge_length == 0) {
+			if (--ss->num_sge)
+				*sge = *ss->sg_list++;
+		} else if (sge->length == 0 && sge->mr != NULL) {
+			if (++sge->n >= IPATH_SEGSZ) {
+				if (++sge->m >= sge->mr->mapsz)
+					break;
+				sge->n = 0;
+			}
+			sge->vaddr =
+				sge->mr->map[sge->m]->segs[sge->n].vaddr;
+			sge->length =
+				sge->mr->map[sge->m]->segs[sge->n].length;
+		}
+		data += len;
+		length -= len;
+	}
+}
+
 /**
  * ipath_post_one_send - post one RC, UC, or UD send work request
  * @qp: the QP to post on
@@ -866,13 +953,231 @@ static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
 		__raw_writel(last, piobuf);
 }
 
-static int ipath_verbs_send_pio(struct ipath_qp *qp, u32 *hdr, u32 hdrwords,
+/*
+ * Convert IB rate to delay multiplier.
+ */
+unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
+{
+	switch (rate) {
+	case IB_RATE_2_5_GBPS: return 8;
+	case IB_RATE_5_GBPS:   return 4;
+	case IB_RATE_10_GBPS:  return 2;
+	case IB_RATE_20_GBPS:  return 1;
+	default:	       return 0;
+	}
+}
+
+/*
+ * Convert delay multiplier to IB rate
+ */
+enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
+{
+	switch (mult) {
+	case 8:  return IB_RATE_2_5_GBPS;
+	case 4:  return IB_RATE_5_GBPS;
+	case 2:  return IB_RATE_10_GBPS;
+	case 1:  return IB_RATE_20_GBPS;
+	default: return IB_RATE_PORT_CURRENT;
+	}
+}
+
+static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
+{
+	struct ipath_verbs_txreq *tx = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->pending_lock, flags);
+	if (!list_empty(&dev->txreq_free)) {
+		struct list_head *l = dev->txreq_free.next;
+
+		list_del(l);
+		tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
+	}
+	spin_unlock_irqrestore(&dev->pending_lock, flags);
+	return tx;
+}
+
+static inline void put_txreq(struct ipath_ibdev *dev,
+			     struct ipath_verbs_txreq *tx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->pending_lock, flags);
+	list_add(&tx->txreq.list, &dev->txreq_free);
+	spin_unlock_irqrestore(&dev->pending_lock, flags);
+}
+
+static void sdma_complete(void *cookie, int status)
+{
+	struct ipath_verbs_txreq *tx = cookie;
+	struct ipath_qp *qp = tx->qp;
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+
+	/* Generate a completion queue entry if needed */
+	if (qp->ibqp.qp_type != IB_QPT_RC && tx->wqe) {
+		enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
+			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
+
+		ipath_send_complete(qp, tx->wqe, ibs);
+	}
+
+	if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
+		kfree(tx->txreq.map_addr);
+	put_txreq(dev, tx);
+
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+}
+
+/*
+ * Compute the number of clock cycles of delay before sending the next packet.
+ * The multipliers reflect the number of clocks for the fastest rate so
+ * one tick at 4xDDR is 8 ticks at 1xSDR.
+ * If the destination port will take longer to receive a packet than
+ * the outgoing link can send it, we need to delay sending the next packet
+ * by the difference in time it takes the receiver to receive and the sender
+ * to send this packet.
+ * Note that this delay is always correct for UC and RC but not always
+ * optimal for UD. For UD, the destination HCA can be different for each
+ * packet, in which case, we could send packets to a different destination
+ * while "waiting" for the delay. The overhead for doing this without
+ * HW support is more than just paying the cost of delaying some packets
+ * unnecessarily.
+ */
+static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
+{
+	return (rcv_mult > snd_mult) ?
+		(plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
+}
+
+static int ipath_verbs_send_dma(struct ipath_qp *qp,
+				struct ipath_ib_header *hdr, u32 hdrwords,
+				struct ipath_sge_state *ss, u32 len,
+				u32 plen, u32 dwords)
+{
+	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
+	struct ipath_devdata *dd = dev->dd;
+	struct ipath_verbs_txreq *tx;
+	u32 *piobuf;
+	u32 control;
+	u32 ndesc;
+	int ret;
+
+	tx = qp->s_tx;
+	if (tx) {
+		qp->s_tx = NULL;
+		/* resend previously constructed packet */
+		ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
+		if (ret)
+			qp->s_tx = tx;
+		goto bail;
+	}
+
+	tx = get_txreq(dev);
+	if (!tx) {
+		ret = -EBUSY;
+		goto bail;
+	}
+
+	/*
+	 * Get the saved delay count we computed for the previous packet
+	 * and save the delay count for this packet to be used next time
+	 * we get here.
+	 */
+	control = qp->s_pkt_delay;
+	qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
+
+	tx->qp = qp;
+	atomic_inc(&qp->refcount);
+	tx->wqe = qp->s_wqe;
+	tx->txreq.callback = sdma_complete;
+	tx->txreq.callback_cookie = tx;
+	tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
+		IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
+	if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
+		tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
+
+	/* VL15 packets bypass credit check */
+	if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
+		control |= 1ULL << 31;
+		tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
+	}
+
+	if (len) {
+		/*
+		 * Don't try to DMA if it takes more descriptors than
+		 * the queue holds.
+		 */
+		ndesc = ipath_count_sge(ss, len);
+		if (ndesc >= dd->ipath_sdma_descq_cnt)
+			ndesc = 0;
+	} else
+		ndesc = 1;
+	if (ndesc) {
+		tx->hdr.pbc[0] = cpu_to_le32(plen);
+		tx->hdr.pbc[1] = cpu_to_le32(control);
+		memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
+		tx->txreq.sg_count = ndesc;
+		tx->map_len = (hdrwords + 2) << 2;
+		tx->txreq.map_addr = &tx->hdr;
+		ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
+		if (ret) {
+			/* save ss and length in dwords */
+			tx->ss = ss;
+			tx->len = dwords;
+			qp->s_tx = tx;
+		}
+		goto bail;
+	}
+
+	/* Allocate a buffer and copy the header and payload to it. */
+	tx->map_len = (plen + 1) << 2;
+	piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
+	if (unlikely(piobuf == NULL)) {
+		ret = -EBUSY;
+		goto err_tx;
+	}
+	tx->txreq.map_addr = piobuf;
+	tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
+	tx->txreq.sg_count = 1;
+
+	*piobuf++ = (__force u32) cpu_to_le32(plen);
+	*piobuf++ = (__force u32) cpu_to_le32(control);
+	memcpy(piobuf, hdr, hdrwords << 2);
+	ipath_copy_from_sge(piobuf + hdrwords, ss, len);
+
+	ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
+	/*
+	 * If we couldn't queue the DMA request, save the info
+	 * and try again later rather than destroying the
+	 * buffer and undoing the side effects of the copy.
+	 */
+	if (ret) {
+		tx->ss = NULL;
+		tx->len = 0;
+		qp->s_tx = tx;
+	}
+	dev->n_unaligned++;
+	goto bail;
+
+err_tx:
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+	put_txreq(dev, tx);
+bail:
+	return ret;
+}
+
+static int ipath_verbs_send_pio(struct ipath_qp *qp,
+				struct ipath_ib_header *ibhdr, u32 hdrwords,
 				struct ipath_sge_state *ss, u32 len,
 				u32 plen, u32 dwords)
 {
 	struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
+	u32 *hdr = (u32 *) ibhdr;
 	u32 __iomem *piobuf;
 	unsigned flush_wc;
+	u32 control;
 	int ret;
 
 	piobuf = ipath_getpiobuf(dd, plen, NULL);
@@ -882,11 +1187,23 @@ static int ipath_verbs_send_pio(struct ipath_qp *qp, u32 *hdr, u32 hdrwords,
 	}
 
 	/*
-	 * Write len to control qword, no flags.
+	 * Get the saved delay count we computed for the previous packet
+	 * and save the delay count for this packet to be used next time
+	 * we get here.
+	 */
+	control = qp->s_pkt_delay;
+	qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
+
+	/* VL15 packets bypass credit check */
+	if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
+		control |= 1ULL << 31;
+
+	/*
+	 * Write the length to the control qword plus any needed flags.
 	 * We have to flush after the PBC for correctness on some cpus
 	 * or WC buffer can be written out of order.
 	 */
-	writeq(plen, piobuf);
+	writeq(((u64) control << 32) | plen, piobuf);
 	piobuf += 2;
 
 	flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
@@ -961,15 +1278,25 @@ int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
 	 */
 	plen = hdrwords + dwords + 1;
 
-	/* Drop non-VL15 packets if we are not in the active state */
-	if (!(dd->ipath_flags & IPATH_LINKACTIVE) &&
-	    qp->ibqp.qp_type != IB_QPT_SMI) {
+	/*
+	 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
+	 * can defer SDMA restart until link goes ACTIVE without
+	 * worrying about just how we got there.
+	 */
+	if (qp->ibqp.qp_type == IB_QPT_SMI)
+		ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+					   plen, dwords);
+	/* All non-VL15 packets are dropped if link is not ACTIVE */
+	else if (!(dd->ipath_flags & IPATH_LINKACTIVE)) {
 		if (qp->s_wqe)
 			ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
 		ret = 0;
-	} else
-		ret = ipath_verbs_send_pio(qp, (u32 *) hdr, hdrwords,
-					   ss, len, plen, dwords);
+	} else if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+		ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
+					   plen, dwords);
+	else
+		ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+					   plen, dwords);
 
 	return ret;
 }
@@ -1038,6 +1365,12 @@ int ipath_get_counters(struct ipath_devdata *dd,
 		ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
 		ipath_snap_cntr(dd, crp->cr_badformatcnt) +
 		dd->ipath_rxfc_unsupvl_errs;
+	if (crp->cr_rxotherlocalphyerrcnt)
+		cntrs->port_rcv_errors +=
+			ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
+	if (crp->cr_rxvlerrcnt)
+		cntrs->port_rcv_errors +=
+			ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
 	cntrs->port_rcv_remphys_errors =
 		ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
 	cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
@@ -1046,9 +1379,16 @@ int ipath_get_counters(struct ipath_devdata *dd,
 	cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
 	cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
 	cntrs->local_link_integrity_errors =
-		(dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
-		dd->ipath_lli_errs : dd->ipath_lli_errors;
-	cntrs->excessive_buffer_overrun_errors = dd->ipath_overrun_thresh_errs;
+		crp->cr_locallinkintegrityerrcnt ?
+		ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
+		((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
+		 dd->ipath_lli_errs : dd->ipath_lli_errors);
+	cntrs->excessive_buffer_overrun_errors =
+		crp->cr_excessbufferovflcnt ?
+		ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
+		dd->ipath_overrun_thresh_errs;
+	cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
+		ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
 
 	ret = 0;
 
@@ -1396,6 +1736,7 @@ static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
 
 	/* ib_create_ah() will initialize ah->ibah. */
 	ah->attr = *ah_attr;
+	ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
 
 	ret = &ah->ibah;
 
@@ -1429,6 +1770,7 @@ static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
 	struct ipath_ah *ah = to_iah(ibah);
 
 	*ah_attr = ah->attr;
+	ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
 
 	return 0;
 }
@@ -1578,6 +1920,8 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	struct ipath_verbs_counters cntrs;
 	struct ipath_ibdev *idev;
 	struct ib_device *dev;
+	struct ipath_verbs_txreq *tx;
+	unsigned i;
 	int ret;
 
 	idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
@@ -1588,6 +1932,17 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 
 	dev = &idev->ibdev;
 
+	if (dd->ipath_sdma_descq_cnt) {
+		tx = kmalloc(dd->ipath_sdma_descq_cnt * sizeof *tx,
+			     GFP_KERNEL);
+		if (tx == NULL) {
+			ret = -ENOMEM;
+			goto err_tx;
+		}
+	} else
+		tx = NULL;
+	idev->txreq_bufs = tx;
+
 	/* Only need to initialize non-zero fields. */
 	spin_lock_init(&idev->n_pds_lock);
 	spin_lock_init(&idev->n_ahs_lock);
@@ -1628,6 +1983,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	INIT_LIST_HEAD(&idev->pending[2]);
 	INIT_LIST_HEAD(&idev->piowait);
 	INIT_LIST_HEAD(&idev->rnrwait);
+	INIT_LIST_HEAD(&idev->txreq_free);
 	idev->pending_index = 0;
 	idev->port_cap_flags =
 		IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
@@ -1659,6 +2015,9 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 		cntrs.excessive_buffer_overrun_errors;
 	idev->z_vl15_dropped = cntrs.vl15_dropped;
 
+	for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
+		list_add(&tx->txreq.list, &idev->txreq_free);
+
 	/*
 	 * The system image GUID is supposed to be the same for all
 	 * IB HCAs in a single system but since there can be other
@@ -1708,6 +2067,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	dev->phys_port_cnt = 1;
 	dev->num_comp_vectors = 1;
 	dev->dma_device = &dd->pcidev->dev;
+	dev->class_dev.dev = dev->dma_device;
 	dev->query_device = ipath_query_device;
 	dev->modify_device = ipath_modify_device;
 	dev->query_port = ipath_query_port;
@@ -1772,6 +2132,8 @@ err_reg:
 err_lk:
 	kfree(idev->qp_table.table);
 err_qp:
+	kfree(idev->txreq_bufs);
+err_tx:
 	ib_dealloc_device(dev);
 	ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
 	idev = NULL;
@@ -1806,6 +2168,7 @@ void ipath_unregister_ib_device(struct ipath_ibdev *dev)
 	ipath_free_all_qps(&dev->qp_table);
 	kfree(dev->qp_table.table);
 	kfree(dev->lk_table.table);
+	kfree(dev->txreq_bufs);
 	ib_dealloc_device(ibdev);
 }
 
@@ -1853,13 +2216,15 @@ static ssize_t show_stats(struct class_device *cdev, char *buf)
 		      "RC stalls   %d\n"
 		      "piobuf wait %d\n"
 		      "no piobuf   %d\n"
+		      "unaligned   %d\n"
 		      "PKT drops   %d\n"
 		      "WQE errs    %d\n",
 		      dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
 		      dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
 		      dev->n_other_naks, dev->n_timeouts,
 		      dev->n_rdma_dup_busy, dev->n_rc_stalls, dev->n_piowait,
-		      dev->n_no_piobuf, dev->n_pkt_drops, dev->n_wqe_errs);
+		      dev->n_no_piobuf, dev->n_unaligned,
+		      dev->n_pkt_drops, dev->n_wqe_errs);
 	for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
 		const struct ipath_opcode_stats *si = &dev->opstats[i];
 




More information about the general mailing list