[openib-general] [PATCH 2 of 20] ipath - core device driver
Bryan O'Sullivan
bos at pathscale.com
Thu Mar 9 16:35:32 PST 2006
The driver requires kernel PCI_MSI support in order to enable
interrupts. Since the kernel's MSI infrastructure is currently highly
x86-specific, we have a temporary timer-based hack in place for
non-MSI kernels until other arches start supporting MSI.
Signed-off-by: Bryan O'Sullivan <bos at pathscale.com>
diff -r 2a9e52d59741 -r 19bdf20bc544 drivers/infiniband/hw/ipath/ipath_driver.c
--- /dev/null Thu Jan 1 00:00:00 1970 +0000
+++ b/drivers/infiniband/hw/ipath/ipath_driver.c Thu Mar 9 16:15:16 2006 -0800
@@ -0,0 +1,2194 @@
+/*
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/swap.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+
+/*
+ * InfiniPath only supports message-based interrupts. It has no interrupt
+ * pins. Therefore we require CONFIG_PCI_MSI be enabled. We want a
+ * very visible failure if the support isn't present.
+ */
+#ifndef CONFIG_PCI_MSI
+#error "Without CONFIG_PCI_MSI, interrupts will not work"
+#endif
+
+#include "ipath_kernel.h"
+#include "ips_common.h"
+#include "ipath_layer.h"
+
+static void ipath_update_pio_bufs(struct ipath_devdata *);
+static int ipath_shutdown_link(struct ipath_devdata *);
+struct page *ipath_nopage(struct vm_area_struct *, unsigned long, int *);
+
+const char *ipath_get_unit_name(int unit)
+{
+ static char iname[16];
+ snprintf(iname, sizeof iname, "infinipath%u", unit);
+ return iname;
+}
+
+#define DRIVER_LOAD_MSG "PathScale " IPATH_DRV_NAME " loaded: "
+#define PFX IPATH_DRV_NAME ": "
+
+/*
+ * The size has to be longer than this string, so we can append
+ * board/chip information to it in the init code.
+ */
+const char ipath_core_version[] = IPATH_IDSTR "\n";
+
+static struct idr unit_table;
+static DEFINE_SPINLOCK(unit_table_lock);
+atomic_t ipath_max;
+
+unsigned ipath_debug = __IPATH_INFO;
+
+module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(debug, "mask for debug prints");
+EXPORT_SYMBOL_GPL(ipath_debug);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("PathScale <support at pathscale.com>");
+MODULE_DESCRIPTION("Pathscale InfiniPath driver");
+
+const char *ipath_ibcstatus_str[] = {
+ "Disabled",
+ "LinkUp",
+ "PollActive",
+ "PollQuiet",
+ "SleepDelay",
+ "SleepQuiet",
+ "LState6", /* unused */
+ "LState7", /* unused */
+ "CfgDebounce",
+ "CfgRcvfCfg",
+ "CfgWaitRmt",
+ "CfgIdle",
+ "RecovRetrain",
+ "LState0xD", /* unused */
+ "RecovWaitRmt",
+ "RecovIdle",
+};
+
+/*
+ * These variables are initialized in the chip-specific files
+ * but are defined here.
+ */
+u16 ipath_gpio_sda_num, ipath_gpio_scl_num;
+u64 ipath_gpio_sda, ipath_gpio_scl;
+u64 infinipath_i_bitsextant, infinipath_e_bitsextant,
+ infinipath_hwe_bitsextant;
+u32 infinipath_i_rcvavail_mask, infinipath_i_rcvurg_mask;
+
+static void __devexit ipath_remove_one(struct pci_dev *);
+static int ipath_init_one(struct pci_dev *, const struct pci_device_id *);
+
+/* Only needed for registration, nothing else needs this info */
+#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
+#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
+#define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
+
+static const struct pci_device_id ipath_pci_tbl[] = {
+ {PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE,
+ PCI_DEVICE_ID_INFINIPATH_HT)},
+ {PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE,
+ PCI_DEVICE_ID_INFINIPATH_PE800)},
+};
+
+MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
+
+static struct pci_driver ipath_driver = {
+ .name = IPATH_DRV_NAME,
+ .probe = ipath_init_one,
+ .remove = __devexit_p(ipath_remove_one),
+ .id_table = ipath_pci_tbl,
+};
+
+/*
+ * This is where port 0's rcvhdrtail register is written back; we also
+ * want nothing else sharing the cache line, so make it a cache line
+ * in size. Used for all units.
+ */
+u64 *ipath_port0_rcvhdrtail;
+dma_addr_t ipath_port0_rcvhdrtail_dma;
+static int port0_rcvhdrtail_refs;
+
+#if defined (pgprot_writecombine) && defined(_PAGE_MA_WC)
+int remap_area_pages(unsigned long address, unsigned long phys_addr,
+ unsigned long size, unsigned long flags);
+#endif
+
+static inline void read_bars(struct pci_dev *dev, u32 *bar0, u32 *bar1)
+{
+ int ret;
+
+ ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
+ if (ret)
+ dev_err(&dev->dev, "failed to read bar0 before enable: "
+ "error %d\n", -ret);
+
+ ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
+ if (ret)
+ dev_err(&dev->dev, "failed to read bar1 before enable: "
+ "error %d\n", -ret);
+
+ ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
+}
+
+static void ipath_free_devdata(struct pci_dev *pdev,
+ struct ipath_devdata *dd)
+{
+ unsigned long flags;
+
+ if (dd->ipath_unit != -1) {
+ spin_lock_irqsave(&unit_table_lock, flags);
+ idr_remove(&unit_table, dd->ipath_unit);
+ spin_unlock_irqrestore(&unit_table_lock, flags);
+ }
+ pci_free_consistent(pdev, sizeof(*dd), dd, dd->ipath_dma_addr);
+}
+
+static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
+{
+ unsigned long flags;
+ struct ipath_devdata *dd;
+ dma_addr_t dma_addr;
+ int ret;
+
+ if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
+ dd = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dd = pci_alloc_consistent(pdev, sizeof(*dd), &dma_addr);
+
+ if (!dd) {
+ dd = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dd->ipath_dma_addr = dma_addr;
+ dd->ipath_unit = -1;
+
+ spin_lock_irqsave(&unit_table_lock, flags);
+
+ ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);
+ if (ret < 0) {
+ printk(KERN_ERR IPATH_DRV_NAME
+ ": Could not allocate unit ID: error %d\n", -ret);
+ ipath_free_devdata(pdev, dd);
+ dd = ERR_PTR(ret);
+ goto bail_unlock;
+ }
+
+ if (dd->ipath_unit >= atomic_read(&ipath_max))
+ atomic_set(&ipath_max, dd->ipath_unit + 1);
+
+bail_unlock:
+ spin_unlock_irqrestore(&unit_table_lock, flags);
+
+bail:
+ return dd;
+}
+
+static inline struct ipath_devdata *__ipath_lookup(int unit)
+{
+ return idr_find(&unit_table, unit);
+}
+
+struct ipath_devdata *ipath_lookup(int unit)
+{
+ struct ipath_devdata *dd;
+ unsigned long flags;
+
+ spin_lock_irqsave(&unit_table_lock, flags);
+ dd = __ipath_lookup(unit);
+ spin_unlock_irqrestore(&unit_table_lock, flags);
+
+ return dd;
+}
+
+int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp)
+{
+ int nunits, npresent, nup;
+ unsigned long flags;
+ u32 maxports;
+ int i, max;
+
+ nunits = npresent = nup = maxports = 0;
+
+ spin_lock_irqsave(&unit_table_lock, flags);
+
+ max = atomic_read(&ipath_max);
+
+ for (i = 0; i < max; i++) {
+ struct ipath_devdata *dd = __ipath_lookup(i);
+ if (!dd)
+ continue;
+
+ if (dd->ipath_flags & IPATH_INITTED)
+ nunits++;
+ if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
+ npresent++;
+ if (dd->ipath_lid &&
+ !(dd->ipath_flags & (IPATH_LINKDOWN | IPATH_LINKUNK)))
+ nup++;
+ if (dd->ipath_cfgports > maxports)
+ maxports = dd->ipath_cfgports;
+ }
+
+ spin_unlock_irqrestore(&unit_table_lock, flags);
+
+ if (npresentp)
+ *npresentp = npresent;
+ if (nupp)
+ *nupp = nup;
+ if (maxportsp)
+ *maxportsp = maxports;
+
+ return nunits;
+}
+
+static int init_port0_rcvhdrtail(struct pci_dev *pdev)
+{
+ int ret;
+
+ mutex_lock(&ipath_mutex);
+
+ if (!ipath_port0_rcvhdrtail) {
+ ipath_port0_rcvhdrtail =
+ pci_alloc_consistent(pdev,
+ IPATH_PORT0_RCVHDRTAIL_SIZE,
+ &ipath_port0_rcvhdrtail_dma);
+
+ if (!ipath_port0_rcvhdrtail) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ }
+ port0_rcvhdrtail_refs++;
+ ret = 0;
+
+bail:
+ mutex_unlock(&ipath_mutex);
+
+ return ret;
+}
+
+static void cleanup_port0_rcvhdrtail(struct pci_dev *pdev)
+{
+ mutex_lock(&ipath_mutex);
+
+ if (!--port0_rcvhdrtail_refs) {
+ pci_free_consistent(pdev, IPATH_PORT0_RCVHDRTAIL_SIZE,
+ ipath_port0_rcvhdrtail,
+ ipath_port0_rcvhdrtail_dma);
+ ipath_port0_rcvhdrtail = NULL;
+ }
+
+ mutex_unlock(&ipath_mutex);
+}
+
+/*
+ * These next two routines are placeholders in case we don't have per-arch
+ * code for controlling write combining. If explicit control of write
+ * combining is not available, performance will probably be awful.
+ */
+
+int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
+{
+ return -EOPNOTSUPP;
+}
+
+void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
+{
+}
+
+static int ipath_init_one(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ int ret, len, j;
+ struct ipath_devdata *dd;
+ unsigned long long addr;
+ u32 bar0 = 0, bar1 = 0;
+ u8 rev;
+
+ ret = init_port0_rcvhdrtail(pdev);
+ if (ret < 0) {
+ printk(KERN_ERR IPATH_DRV_NAME
+ ": Could not allocate port0_rcvhdrtail: error %d\n",
+ -ret);
+ goto fail;
+ }
+
+ dd = ipath_alloc_devdata(pdev);
+ if (IS_ERR(dd)) {
+ ret = PTR_ERR(dd);
+ printk(KERN_ERR IPATH_DRV_NAME
+ ": Could not allocate devdata: error %d\n", -ret);
+ goto fail;
+ }
+
+ ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
+
+ read_bars(pdev, &bar0, &bar1);
+
+ ret = pci_enable_device(pdev);
+ if (ret) {
+ /* This can happen if we did a chip reset, and then failed
+ * to reprogram the BAR, or the chip reset due to an
+ * internal error. Both of these cases cause the BAR to be
+ * reset back to initial state. For the latter case, the AER
+ * sticky error bit at offset 0x718 should be set, but the
+ * Linux kernel doesn't yet know about that, it appears. If
+ * the original BAR was retained in the kernel data
+ * structures, this may be OK
+ */
+ ipath_dbg("pci_enable unit %u failed: error %d\n",
+ dd->ipath_unit, -ret);
+ }
+ addr = pci_resource_start(pdev, 0);
+ len = pci_resource_len(pdev, 0);
+ ipath_cdbg(VERBOSE, "regbase (0) %llx len %d irq %x, vend %x/%x "
+ "driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
+ ent->device, ent->driver_data);
+
+ read_bars(pdev, &bar0, &bar1);
+
+ if (!bar1 && !(bar0 & ~0xf)) {
+ if (addr) {
+ dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
+ "rewriting as %llx\n", addr);
+ ret = pci_write_config_dword(
+ pdev, PCI_BASE_ADDRESS_0, addr);
+ if (ret)
+ dev_err(&pdev->dev, "rewrite of BAR0 "
+ "failed: err %d\n", -ret);
+ ret = pci_write_config_dword(
+ pdev, PCI_BASE_ADDRESS_1, addr >> 32);
+ if (ret)
+ dev_err(&pdev->dev, "rewrite of BAR1 "
+ "failed: err %d\n", -ret);
+ }
+ else
+ dev_err(&pdev->dev, "BAR is 0 (probable RESET), "
+ "not usable until reboot\n");
+ }
+
+ ret = pci_request_regions(pdev, IPATH_DRV_NAME);
+ if (ret)
+ dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
+ "err %d\n", dd->ipath_unit, -ret);
+
+ ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+ if (ret)
+ dev_info(&pdev->dev, "pci_set_dma_mask unit %u "
+ "fails: %d\n", dd->ipath_unit, ret);
+
+ pci_set_master(pdev); /* probably not be needed for HT */
+
+ /*
+ * Save BARs to rewrite after device reset. Save all 64 bits of
+ * BAR, just in case.
+ */
+ dd->ipath_pcibar0 = addr;
+ dd->ipath_pcibar1 = addr >> 32;
+ dd->ipath_deviceid = ent->device; /* save for later use */
+ dd->ipath_vendorid = ent->vendor;
+
+ /* setup the chip-specific functions, as early as possible. */
+ switch (ent->device) {
+ case PCI_DEVICE_ID_INFINIPATH_HT:
+ ipath_init_ht400_funcs(dd);
+ break;
+ case PCI_DEVICE_ID_INFINIPATH_PE800:
+ ipath_init_pe800_funcs(dd);
+ break;
+ default:
+ ipath_dev_err(dd, "Found unknown PathScale deviceid 0x%x, "
+ "failing\n", ent->device);
+ return -ENODEV;
+ }
+
+ for (j = 0; j < 6; j++) {
+ if (!pdev->resource[j].start)
+ continue;
+ ipath_cdbg(VERBOSE, "BAR %d start %lx, end %lx, len %lx\n",
+ j, pdev->resource[j].start,
+ pdev->resource[j].end,
+ pci_resource_len(pdev, j));
+ }
+
+ pci_set_master(pdev);
+
+ if (!addr) {
+ ipath_dev_err(dd, "No valid address in BAR 0!\n");
+ return -ENODEV;
+ }
+
+ dd->ipath_deviceid = ent->device; /* save for later use */
+ dd->ipath_vendorid = ent->vendor;
+
+ ret = pci_read_config_byte(pdev, PCI_REVISION_ID, &rev);
+ if (ret) {
+ ipath_dev_err(dd, "Failed to read PCI revision ID unit "
+ "%u: err %d\n", dd->ipath_unit, -ret);
+ return ret; /* shouldn't ever happen */
+ } else
+ dd->ipath_pcirev = rev;
+
+ dd->ipath_kregbase = ioremap_nocache(addr, len);
+#if defined (pgprot_writecombine) && defined(_PAGE_MA_WC)
+ ipath_cdbg(VERBOSE, "Remapping pages WC\n");
+ remap_area_pages((unsigned long)dd->ipath_kregbase +
+ 1024 * 1024, addr + 1024 * 1024, 1024 * 1024,
+ _PAGE_MA_WC);
+ /* dd->ipath_kregbase = __ioremap(addr, len, _PAGE_MA_WC); */
+#endif
+
+ if (!dd->ipath_kregbase) {
+ ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
+ addr);
+ ret = -ENOMEM;
+ goto fail;
+ }
+ dd->ipath_kregend = (u64 __iomem *)
+ ((void __iomem *)dd->ipath_kregbase + len);
+ dd->ipath_physaddr = addr; /* used for io_remap, etc. */
+ /* for user mmap */
+ dd->ipath_kregvirt = (u64 __iomem *) phys_to_virt(addr);
+ ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p "
+ "kregvirt %p\n", addr, dd->ipath_kregbase,
+ dd->ipath_kregvirt);
+
+ /* set these up before registering the interrupt handler */
+ dd->pcidev = pdev;
+ pci_set_drvdata(pdev, dd);
+
+ /*
+ * clear ipath_flags here instead of in ipath_init_chip as it is set
+ * by ipath_setup_htconfig.
+ */
+ dd->ipath_flags = 0;
+
+ if (dd->ipath_f_bus(dd, pdev))
+ ipath_dev_err(dd, "Failed to setup config space; "
+ "continuing anyway\n");
+
+ /*
+ * set up our interrupt handler; SA_SHIRQ probably not needed,
+ * but won't hurt for now.
+ * check 0 irq after we return from chip-specific bus setup, since
+ * that can affect this due to setup
+ */
+ if (!pdev->irq)
+ ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
+ "work\n");
+ else {
+ ret = request_irq(pdev->irq, ipath_intr, SA_SHIRQ,
+ IPATH_DRV_NAME, dd);
+ if (ret)
+ ipath_dev_err(dd, "Couldn't setup irq handler, "
+ "irq=%u: %d\n", pdev->irq, ret);
+ }
+
+ ret = ipath_init_chip(dd, 0); /* do the chip-specific init */
+
+ if (!ret) {
+ ret = ipath_enable_wc(dd);
+
+ if (ret) {
+ ipath_dev_err(dd, "Write combining not enabled "
+ "(err %d): performance may be poor\n",
+ -ret);
+ ret = 0;
+ }
+ }
+
+ if (dd->ipath_kregbase && (dd->ipath_flags & IPATH_PRESENT)) {
+ if (!dd->ipath_f_intrsetup(dd)) {
+ /* now we can enable interrupts from the chip */
+ /* enable all interrupts */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask,
+ -1LL);
+ /* force re-interrupt of any pending interrupts. */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear,
+ 0ULL);
+ /* chip is usable; mark it as initialized */
+ *dd->ipath_statusp |= IPATH_STATUS_INITTED;
+ } else
+ ipath_dev_err(dd, "No interrupts enabled, couldn't "
+ "setup interrupt address\n");
+ }
+ else if (ret != -EPERM)
+ dev_info(&pdev->dev, "Not configuring unit %u "
+ "interrupts, init failed\n", dd->ipath_unit);
+
+ ipath_device_create_group(&pdev->dev, dd);
+ ipath_user_add(dd);
+
+ /*
+ * We used to cleanup here, with pci_release_regions, etc. but that
+ * can cause other problems if we want to run diags, etc., so
+ * instead defer that until driver unload. As long as the driver is
+ * unloaded, no memory leaks result.
+ */
+
+fail: /* after we've done at least some of the pci setup */
+ if (ret == -EPERM)
+ /*
+ * disabled device, don't want module load error; just want
+ * to carry status through to this point
+ */
+ ret = 0;
+
+ return ret;
+}
+
+static void __devexit ipath_remove_one(struct pci_dev *pdev)
+{
+ struct ipath_devdata *dd;
+
+ ipath_cdbg(VERBOSE, "removing, pdev=%p\n", pdev);
+ if (!pdev)
+ return;
+
+ dd = pci_get_drvdata(pdev);
+ ipath_user_del(dd);
+ ipath_device_remove_group(&pdev->dev, dd);
+ pci_set_drvdata(pdev, NULL);
+ ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
+ "unit %u\n", dd, (u32) dd->ipath_unit);
+ if (dd->ipath_kregbase) {
+ ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n",
+ dd->ipath_kregbase);
+ iounmap((volatile void __iomem *)dd->ipath_kregbase);
+ dd->ipath_kregbase = NULL;
+ }
+ pci_release_regions(pdev);
+ ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
+ pci_disable_device(pdev);
+
+ ipath_free_devdata(pdev, dd);
+ cleanup_port0_rcvhdrtail(pdev);
+}
+
+/* general driver use */
+DEFINE_MUTEX(ipath_mutex);
+static DEFINE_SPINLOCK(ipath_pioavail_lock);
+
+/**
+ * ipath_disarm_piobufs - cancel a range of PIO buffers
+ * @dd: the infinipath device
+ * @first: the first PIO buffer to cancel
+ * @cnt: the number of PIO buffers to cancel
+ *
+ * cancel a range of PIO buffers, used when they might be armed, but
+ * not triggered. Used at init to ensure buffer state, and also user
+ * process close, in case it died while writing to a PIO buffer
+ * Also after errors.
+ */
+void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
+ unsigned cnt)
+{
+ unsigned i, last = first + cnt;
+ u64 sendctrl, sendorig;
+
+ ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
+ sendorig = dd->ipath_sendctrl | INFINIPATH_S_DISARM;
+ for (i = first; i < last; i++) {
+ sendctrl = sendorig |
+ (i << INFINIPATH_S_DISARMPIOBUF_SHIFT);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ sendctrl);
+ }
+ // write it again with current value, in case ipath_sendctrl changed
+ // while we were looping; no critical bits that would require
+ // locking
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ dd->ipath_sendctrl);
+}
+
+/**
+ * ipath_wait_linkstate - wait for an IB link state change to occur
+ * @dd: the infinipath device
+ * @state: the state to wait for
+ * @msecs: the number of milliseconds to wait
+ *
+ * wait up to msecs milliseconds for IB link state change to occur for
+ * now, take the easy polling route. Currently used only by
+ * ipath_layer_set_linkstate. Returns 0 if state reached, otherwise
+ * -ETIMEDOUT state can have multiple states set, for any of several
+ * transitions.
+ */
+int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
+{
+ dd->ipath_sma_state_wanted = state;
+ wait_event_interruptible_timeout(ipath_sma_state_wait,
+ (dd->ipath_flags & state),
+ msecs_to_jiffies(msecs));
+ dd->ipath_sma_state_wanted = 0;
+
+ if (!(dd->ipath_flags & state)) {
+ u64 val;
+ ipath_cdbg(SMA, "Didn't reach linkstate %s within %u ms\n",
+ /* test INIT ahead of DOWN, both can be set */
+ (state & IPATH_LINKINIT) ? "INIT" :
+ ((state & IPATH_LINKDOWN) ? "DOWN" :
+ ((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
+ msecs);
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+ ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
+ (unsigned long long) ipath_read_kreg64(
+ dd, dd->ipath_kregs->kr_ibcctrl),
+ (unsigned long long) val,
+ ipath_ibcstatus_str[val & 0xf]);
+ }
+ return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
+}
+
+/**
+ * ipath_nopage - page fault handler
+ * @vma: the VM area
+ * @addr: the address that caused the fault
+ * @type: the output fault type
+ *
+ * For each page that is first faulted in from the
+ * mmap'ed shared address buffer, this routine is called.
+ * It's always for a single page.
+ * We use the low bits of the private_data field to tell us which case
+ * we are dealing with.
+ */
+
+struct page *ipath_nopage(struct vm_area_struct *vma, unsigned long addr,
+ int *type)
+{
+ /* original [kv]malloc virtual address */
+ unsigned long avirt;
+ /* physical address */
+ unsigned long paddr;
+ /* calculated page offset */
+ unsigned long off;
+ u32 which, chunk;
+ void *vaddr = NULL;
+ struct ipath_portdata *pd;
+ struct page *vpage = NOPAGE_SIGBUS;
+
+ avirt = (unsigned long) vma->vm_private_data;
+
+ if (!avirt) {
+ ipath_dbg("NULL private_data, vm_pgoff %lx\n",
+ vma->vm_pgoff);
+ which = 0; /* quiet incorrect gcc warning */
+ goto done;
+ }
+ which = avirt & 3;
+ avirt &= ~3ULL;
+
+ if (addr > vma->vm_end) {
+ ipath_dbg("trying to fault in addr %lx past end\n", addr);
+ goto done;
+ }
+
+ /*
+ * rcvhdr Q is physically contiguous.
+ * pgoff is virtual.
+ */
+ switch (which) {
+ case 1: /* rcvhdrq_phys */
+ /* should always be 0 */
+ off = vma->vm_pgoff - (avirt >> PAGE_SHIFT);
+ paddr = addr - vma->vm_start + (off << PAGE_SHIFT) + avirt;
+ ipath_cdbg(MM, "hdrq %lx (u=%lx)\n", paddr, addr);
+ vpage = pfn_to_page(paddr >> PAGE_SHIFT);
+ break;
+ case 2: /* PIO buffer avail regs */
+ /* should always be 0 */
+ off = vma->vm_pgoff - (avirt >> PAGE_SHIFT);
+ paddr = (addr - vma->vm_start + (off << PAGE_SHIFT) +
+ avirt);
+ ipath_cdbg(MM, "pioav %lx\n", paddr);
+ vpage = pfn_to_page(paddr >> PAGE_SHIFT);
+ break;
+ case 3:
+ /*
+ * rcvegrbufs; page_alloc()'ed like rcvhdrq, but we
+ * have to pick out which page_alloc()'ed chunk it is.
+ */
+ pd = (struct ipath_portdata *)avirt;
+ /* this should always be 0 */
+ off =
+ vma->vm_pgoff -
+ ((unsigned long)pd->port_rcvegr_phys >> PAGE_SHIFT);
+ off = (addr - vma->vm_start + (off << PAGE_SHIFT));
+
+ chunk = off / pd->port_rcvegrbuf_size;
+ if (chunk > pd->port_rcvegrbuf_chunks)
+ ipath_dbg("Bad egrbuf chunk %u (max %u); "
+ "off = %lx\n", chunk,
+ pd->port_rcvegrbuf_chunks, off);
+ vaddr = pd->port_rcvegrbuf[chunk] +
+ off % pd->port_rcvegrbuf_size;
+ paddr = pd->port_rcvegrbuf_phys[chunk] +
+ off % pd->port_rcvegrbuf_size;
+ vpage = pfn_to_page(paddr >> PAGE_SHIFT);
+ ipath_cdbg(MM, "egrb %p,%lx\n", vaddr, paddr);
+ break;
+ default:
+ ipath_dbg("trying to fault in mmap addr %lx (avirt %lx) "
+ "that isn't known (case %u)\n", addr, avirt,
+ which);
+ }
+
+done:
+ if (vpage != NOPAGE_SIGBUS && vpage != NOPAGE_OOM) {
+ if (which == 2)
+ /*
+ * media/video/video-buf.c doesn't do get_page()
+ * for buffer from alloc_page(). Hmmm.
+ *
+ * keep it from being swapped, complaints if process
+ * exits before we [kv]free it, etc, and keep shared
+ * page counts correct, etc.
+ */
+ get_page(vpage);
+ mark_page_accessed(vpage);
+ if (type)
+ *type = VM_FAULT_MINOR;
+ } else
+ ipath_dbg("faultin of addr %lx vaddr %p avirt %lx failed\n",
+ addr, vaddr, avirt);
+
+ return vpage;
+}
+
+void ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
+{
+ *buf = '\0';
+ if (err & INFINIPATH_E_RHDRLEN)
+ strlcat(buf, "rhdrlen ", blen);
+ if (err & INFINIPATH_E_RBADTID)
+ strlcat(buf, "rbadtid ", blen);
+ if (err & INFINIPATH_E_RBADVERSION)
+ strlcat(buf, "rbadversion ", blen);
+ if (err & INFINIPATH_E_RHDR)
+ strlcat(buf, "rhdr ", blen);
+ if (err & INFINIPATH_E_RLONGPKTLEN)
+ strlcat(buf, "rlongpktlen ", blen);
+ if (err & INFINIPATH_E_RSHORTPKTLEN)
+ strlcat(buf, "rshortpktlen ", blen);
+ if (err & INFINIPATH_E_RMAXPKTLEN)
+ strlcat(buf, "rmaxpktlen ", blen);
+ if (err & INFINIPATH_E_RMINPKTLEN)
+ strlcat(buf, "rminpktlen ", blen);
+ if (err & INFINIPATH_E_RFORMATERR)
+ strlcat(buf, "rformaterr ", blen);
+ if (err & INFINIPATH_E_RUNSUPVL)
+ strlcat(buf, "runsupvl ", blen);
+ if (err & INFINIPATH_E_RUNEXPCHAR)
+ strlcat(buf, "runexpchar ", blen);
+ if (err & INFINIPATH_E_RIBFLOW)
+ strlcat(buf, "ribflow ", blen);
+ if (err & INFINIPATH_E_REBP)
+ strlcat(buf, "EBP ", blen);
+ if (err & INFINIPATH_E_SUNDERRUN)
+ strlcat(buf, "sunderrun ", blen);
+ if (err & INFINIPATH_E_SPIOARMLAUNCH)
+ strlcat(buf, "spioarmlaunch ", blen);
+ if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
+ strlcat(buf, "sunexperrpktnum ", blen);
+ if (err & INFINIPATH_E_SDROPPEDDATAPKT)
+ strlcat(buf, "sdroppeddatapkt ", blen);
+ if (err & INFINIPATH_E_SDROPPEDSMPPKT)
+ strlcat(buf, "sdroppedsmppkt ", blen);
+ if (err & INFINIPATH_E_SMAXPKTLEN)
+ strlcat(buf, "smaxpktlen ", blen);
+ if (err & INFINIPATH_E_SMINPKTLEN)
+ strlcat(buf, "sminpktlen ", blen);
+ if (err & INFINIPATH_E_SUNSUPVL)
+ strlcat(buf, "sunsupVL ", blen);
+ if (err & INFINIPATH_E_SPKTLEN)
+ strlcat(buf, "spktlen ", blen);
+ if (err & INFINIPATH_E_INVALIDADDR)
+ strlcat(buf, "invalidaddr ", blen);
+ if (err & INFINIPATH_E_RICRC)
+ strlcat(buf, "CRC ", blen);
+ if (err & INFINIPATH_E_RVCRC)
+ strlcat(buf, "VCRC ", blen);
+ if (err & INFINIPATH_E_RRCVEGRFULL)
+ strlcat(buf, "rcvegrfull ", blen);
+ if (err & INFINIPATH_E_RRCVHDRFULL)
+ strlcat(buf, "rcvhdrfull ", blen);
+ if (err & INFINIPATH_E_IBSTATUSCHANGED)
+ strlcat(buf, "ibcstatuschg ", blen);
+ if (err & INFINIPATH_E_RIBLOSTLINK)
+ strlcat(buf, "riblostlink ", blen);
+ if (err & INFINIPATH_E_HARDWARE)
+ strlcat(buf, "hardware ", blen);
+ if (err & INFINIPATH_E_RESET)
+ strlcat(buf, "reset ", blen);
+}
+
+/**
+ * get_rhf_errstring - decode RHF errors
+ * @err: the err number
+ * @msg: the output buffer
+ * @len: the length of the output buffer
+ *
+ * only used one place now, may want more later
+ */
+static void get_rhf_errstring(u32 err, char *msg, size_t len)
+{
+ /* if no errors, and so don't need to check what's first */
+ *msg = '\0';
+
+ if (err & INFINIPATH_RHF_H_ICRCERR)
+ strlcat(msg, "icrcerr ", len);
+ if (err & INFINIPATH_RHF_H_VCRCERR)
+ strlcat(msg, "vcrcerr ", len);
+ if (err & INFINIPATH_RHF_H_PARITYERR)
+ strlcat(msg, "parityerr ", len);
+ if (err & INFINIPATH_RHF_H_LENERR)
+ strlcat(msg, "lenerr ", len);
+ if (err & INFINIPATH_RHF_H_MTUERR)
+ strlcat(msg, "mtuerr ", len);
+ if (err & INFINIPATH_RHF_H_IHDRERR)
+ /* infinipath hdr checksum error */
+ strlcat(msg, "ipathhdrerr ", len);
+ if (err & INFINIPATH_RHF_H_TIDERR)
+ strlcat(msg, "tiderr ", len);
+ if (err & INFINIPATH_RHF_H_MKERR)
+ /* bad port, offset, etc. */
+ strlcat(msg, "invalid ipathhdr ", len);
+ if (err & INFINIPATH_RHF_H_IBERR)
+ strlcat(msg, "iberr ", len);
+ if (err & INFINIPATH_RHF_L_SWA)
+ strlcat(msg, "swA ", len);
+ if (err & INFINIPATH_RHF_L_SWB)
+ strlcat(msg, "swB ", len);
+}
+
+/**
+ * ipath_get_egrbuf - get an eager buffer
+ * @dd: the infinipath device
+ * @bufnum: the eager buffer to get
+ * @err: unused
+ *
+ * must only be called if ipath_pd[port] is known to be allocated
+ */
+static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum,
+ int err)
+{
+ return dd->ipath_port0_skbs ?
+ (void *)dd->ipath_port0_skbs[bufnum]->data : NULL;
+
+#ifdef _USE_FOR_DEBUGGING_ONLY
+ /*
+ * want routine to be inlined and fast this is here so if we do
+ * ports other than 0, I don't have to rewrite the code, since it's
+ * slightly complicated
+ */
+ if (port != 1) {
+ void *chunkbase;
+ /*
+ * This calculation takes about 50 cycles. Could do what I
+ * did for protocol code, and have an array of addresses,
+ * getting it down to just a few cycles per lookup, at the
+ * cost of 16KB of memory.
+ */
+ if (!dd->ipath_pd[port]->port_rcvegrbuf_virt)
+ return NULL;
+ chunkbase = dd->ipath_pd[port]->port_rcvegrbuf_virt
+ [bufnum /
+ dd->ipath_pd[port]->port_rcvegrbufs_perchunk];
+ return (void *)(chunkbase +
+ (bufnum %
+ dd->ipath_pd[port]->
+ port_rcvegrbufs_perchunk)
+ * dd->ipath_rcvegrbufsize);
+ }
+#endif
+}
+
+/*
+ * ipath_rcv_sma - receive an sma packet
+ * @dd: the infinipath device
+ * @tlen: the total packet len
+ * @rc: the packet header
+ * @ebuf: the packet data
+ *
+ * Separate for better overall optimization
+ */
+static void ipath_rcv_sma(struct ipath_devdata *dd, u32 tlen, u64 * rc,
+ void *ebuf)
+{
+ int sindex, slen, elen;
+ void *smbuf;
+ u8 pad, *bthbytes;
+
+ /* another SMA packet received */
+ ipath_stats.sps_sma_rpkts++;
+
+ bthbytes = (u8 *) ((struct ips_message_header *)&rc[1])->bth;
+
+ pad = (bthbytes[1] >> 4) & 3;
+ elen = tlen - (IPATH_SMA_HDRSZ + pad + (u32) sizeof(u32));
+ if (elen > (IPATH_SMA_MAX_PKTSZ - IPATH_SMA_HDRSZ))
+ elen = IPATH_SMA_MAX_PKTSZ - IPATH_SMA_HDRSZ;
+
+ spin_lock_irq(&ipath_sma_lock);
+ sindex = ipath_sma_next;
+ smbuf = ipath_sma_data[sindex].buf;
+ ipath_sma_data[sindex].unit = dd->ipath_unit;
+ slen = ipath_sma_data[ipath_sma_next].len;
+ memcpy(smbuf, &rc[1], IPATH_SMA_HDRSZ);
+ memcpy(smbuf + IPATH_SMA_HDRSZ, ebuf, elen);
+ if (slen) {
+ /*
+ * overwriting a yet unread old one (buffer wrap),
+ * have to advance ipath_sma_first to next oldest
+ */
+
+ /* count OK packets that we drop */
+ ipath_stats.sps_krdrops++;
+ if (++ipath_sma_first >= IPATH_NUM_SMA_PKTS)
+ ipath_sma_first = 0;
+ }
+ slen = ipath_sma_data[sindex].len = elen + IPATH_SMA_HDRSZ;
+ if (++ipath_sma_next >= IPATH_NUM_SMA_PKTS)
+ ipath_sma_next = 0;
+ spin_unlock_irq(&ipath_sma_lock);
+}
+
+/**
+ * ipath_alloc_skb - allocate an skb and buffer with possible constraints
+ * @dd: the infinipath device
+ * @gfp_mask: the sk_buff SFP mask
+ */
+struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
+ gfp_t gfp_mask)
+{
+ struct sk_buff *skb;
+ u32 len;
+
+ /*
+ * Only fully supported way to handle this is to allocate lots
+ * extra, align as needed, and then do skb_reserve(). That wastes
+ * a lot of memory... I'll have to hack this into infinipath_copy
+ * also.
+ */
+
+ /*
+ * We need 4 extra bytes for unaligned transfer copying
+ */
+ if (dd->ipath_flags & IPATH_4BYTE_TID) {
+ /* we need a 4KB multiple alignment, and there is no way
+ * to do it except to allocate extra and then skb_reserve
+ * enough to bring it up to the right alignment.
+ */
+ len = dd->ipath_ibmaxlen + 4 + (1 << 11) - 1;
+ }
+ else
+ len = dd->ipath_ibmaxlen + 4;
+ skb = __dev_alloc_skb(len, gfp_mask);
+ if (!skb) {
+ dev_err(&dd->pcidev->dev, "Failed to allocate skbuff, "
+ "length %u\n", len);
+ return NULL;
+ }
+ if (dd->ipath_flags & IPATH_4BYTE_TID) {
+ u32 una = ((1 << 11) - 1) & (unsigned long)(skb->data + 4);
+ if (una)
+ skb_reserve(skb, 4 + (1 << 11) - una);
+ else
+ skb_reserve(skb, 4);
+ } else
+ skb_reserve(skb, 4);
+ return skb;
+}
+
+/**
+ * ipath_rcv_layer - receive a packet for the layered (ethernet) driver
+ * @dd: the infinipath device
+ * @etail: the sk_buff number
+ * @tlen: the total packet length
+ * @hdr: the ethernet header
+ *
+ * Separate routine for better overall optimization
+ */
+static void ipath_rcv_layer(struct ipath_devdata *dd, u32 etail,
+ u32 tlen, struct ether_header *hdr)
+{
+ u32 elen;
+ u8 pad, *bthbytes;
+ struct sk_buff *skb, *nskb;
+
+ if (dd->ipath_port0_skbs && hdr->sub_opcode == OPCODE_ENCAP) {
+ /*
+ * Allocate a new sk_buff to replace the one we give
+ * to the network stack.
+ */
+ nskb = ipath_alloc_skb(dd, GFP_ATOMIC);
+ if (!nskb) {
+ /* count OK packets that we drop */
+ ipath_stats.sps_krdrops++;
+ return;
+ }
+
+ bthbytes = (u8 *) hdr->bth;
+ pad = (bthbytes[1] >> 4) & 3;
+ /* +CRC32 */
+ elen = tlen - (sizeof(*hdr) + pad + sizeof(u32));
+
+ skb = dd->ipath_port0_skbs[etail];
+ dd->ipath_port0_skbs[etail] = nskb;
+ skb_put(skb, elen);
+
+ dd->ipath_f_put_tid(dd, etail + (u64 __iomem *)
+ ((char __iomem *) dd->ipath_kregbase
+ + dd->ipath_rcvegrbase), 0,
+ virt_to_phys(nskb->data));
+
+ dd->ipath_layer.l_rcv(dd->ipath_unit, hdr, skb);
+
+ /* another ether packet received */
+ ipath_stats.sps_ether_rpkts++;
+ } else if (hdr->sub_opcode == OPCODE_LID_ARP) {
+ if (dd->ipath_layer.l_rcv_lid)
+ dd->ipath_layer.l_rcv_lid(dd->ipath_unit, hdr);
+ }
+}
+
+/*
+ * ipath_kreceive - receive a packet
+ * @dd: the infinipath device
+ *
+ * called from interrupt handler for errors or receive interrupt
+ */
+void ipath_kreceive(struct ipath_devdata *dd)
+{
+ u64 *rc;
+ void *ebuf;
+ const u32 rsize = dd->ipath_rcvhdrentsize; /* words */
+ const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
+ u32 etail = -1, l, hdrqtail, sma_this_time = 0;
+ struct ips_message_header *hdr;
+ u32 eflags, i, etype, tlen, pkttot = 0;
+ static u64 totcalls; /* stats, may eventually remove */
+ char emsg[128];
+
+ if (!dd->ipath_hdrqtailptr) {
+ ipath_dev_err(dd,
+ "hdrqtailptr not set, can't do receives\n");
+ return;
+ }
+
+ if (test_and_set_bit(0, &dd->ipath_rcv_pending)) {
+ /* There is already a thread processing this queue. */
+ return;
+ }
+
+ if (dd->ipath_port0head ==
+ (uint32_t)__le64_to_cpu(*dd->ipath_hdrqtailptr))
+ goto done;
+
+gotmore:
+ /*
+ * read only once at start. If in flood situation, this helps
+ * performance slightly. If more arrive while we are processing,
+ * we'll come back here and do them
+ */
+ hdrqtail = (uint32_t)__le64_to_cpu(*dd->ipath_hdrqtailptr);
+
+ for (i = 0, l = dd->ipath_port0head; l != hdrqtail; i++) {
+ u32 qp;
+ u8 *bthbytes;
+
+ rc = (u64 *) (dd->ipath_pd[0]->port_rcvhdrq + (l << 2));
+ hdr = (struct ips_message_header *)&rc[1];
+ /*
+ * could make a network order version of IPATH_KD_QP, and
+ * do the obvious shift before masking to speed this up.
+ */
+ qp = ntohl(hdr->bth[1]) & 0xffffff;
+ bthbytes = (u8 *) hdr->bth;
+
+ eflags = ips_get_hdr_err_flags((u32 *) rc);
+ etype = ips_get_rcv_type((u32 *) rc);
+ /* total length */
+ tlen = ips_get_length_in_bytes((u32 *) rc);
+ ebuf = NULL;
+ if (etype != RCVHQ_RCV_TYPE_EXPECTED) {
+ /*
+ * it turns out that the chips uses an eager buffer
+ * for all non-expected packets, whether it "needs"
+ * one or not. So always get the index, but don't
+ * set ebuf (so we try to copy data) unless the
+ * length requires it.
+ */
+ etail = ips_get_index((u32 *) rc);
+ if (tlen > sizeof(*hdr) ||
+ etype == RCVHQ_RCV_TYPE_NON_KD)
+ ebuf = ipath_get_egrbuf(dd, etail, 0);
+ }
+
+ /*
+ * both tiderr and ipathhdrerr are set for all plain IB
+ * packets; only ipathhdrerr should be set.
+ */
+
+ if (etype != RCVHQ_RCV_TYPE_NON_KD &&
+ etype != RCVHQ_RCV_TYPE_ERROR &&
+ ips_get_ipath_ver(hdr->iph.ver_port_tid_offset) !=
+ IPS_PROTO_VERSION) {
+ ipath_cdbg(PKT, "Bad InfiniPath protocol version "
+ "%x\n", etype);
+ }
+
+ if (eflags & ~(INFINIPATH_RHF_H_TIDERR |
+ INFINIPATH_RHF_H_IHDRERR)) {
+ get_rhf_errstring(eflags, emsg, sizeof emsg);
+ ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
+ "tlen=%x opcode=%x egridx=%x: %s\n",
+ eflags, l, etype, tlen, bthbytes[0],
+ ips_get_index((u32 *) rc), emsg);
+ } else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
+ /*
+ * If there is a userland SMA and this is a MAD
+ * packet, then pass it to the userland SMA.
+ */
+ if (atomic_read(&ipath_sma_alive) && qp <= 1) {
+ /*
+ * count OK packets that we drop because
+ * SMA isn't yet running, or because we
+ * are in an sma flood (no point in
+ * constantly acquiring the spin lock, and
+ * overwriting previous packets).
+ * Eventually things will recover.
+ * Similarly if the sma consumer is
+ * so far behind that we would overwrite
+ * (yes, it's outside the lock)
+ */
+ if (!ipath_sma_data_spare ||
+ ipath_sma_data[ipath_sma_next].len ||
+ ++sma_this_time > IPATH_NUM_SMA_PKTS)
+ ipath_stats.sps_krdrops++;
+ else if (ebuf)
+ ipath_rcv_sma(dd, tlen, rc, ebuf);
+ }
+ else if (dd->verbs_layer.l_rcv)
+ dd->verbs_layer.l_rcv(dd->ipath_unit,
+ rc + 1,
+ ebuf, tlen);
+ else
+ ipath_cdbg(VERBOSE, "received IB packet, "
+ "not SMA (QP=%x)\n", qp);
+ } else if (etype == RCVHQ_RCV_TYPE_EAGER) {
+ if (qp == IPATH_KD_QP && bthbytes[0] ==
+ dd->ipath_layer.l_rcv_opcode && ebuf)
+ ipath_rcv_layer(dd, etail, tlen,
+ (struct ether_header *)hdr);
+ else
+ ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
+ "qp=%x), len %x; ignored\n",
+ etype, bthbytes[0], qp, tlen);
+ }
+ else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
+ ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
+ hdr->bth[0] & 0xff);
+ else if (eflags & (INFINIPATH_RHF_H_TIDERR |
+ INFINIPATH_RHF_H_IHDRERR)) {
+ /*
+ * This is a type 3 packet, only the LRH is in the
+ * rcvhdrq, the rest of the header is in the eager
+ * buffer.
+ */
+ u8 opcode;
+ if (ebuf) {
+ bthbytes = (u8 *) ebuf;
+ opcode = *bthbytes;
+ }
+ else
+ opcode = 0;
+ get_rhf_errstring(eflags, emsg, sizeof emsg);
+ ipath_dbg("Err %x (%s), opcode %x, egrbuf %x, "
+ "len %x\n", eflags, emsg, opcode, etail,
+ tlen);
+ } else {
+ /*
+ * error packet, type of error unknown.
+ * Probably type 3, but we don't know, so don't
+ * even try to print the opcode, etc.
+ */
+ ipath_dbg("Error Pkt, but no eflags! egrbuf %x, "
+ "len %x\nhdrq@%lx;hdrq+%x rhf: %llx; "
+ "hdr %llx %llx %llx %llx %llx\n",
+ etail, tlen, (unsigned long) rc, l,
+ (unsigned long long) rc[0],
+ (unsigned long long) rc[1],
+ (unsigned long long) rc[2],
+ (unsigned long long) rc[3],
+ (unsigned long long) rc[4],
+ (unsigned long long) rc[5]);
+ }
+ l += rsize;
+ if (l >= maxcnt)
+ l = 0;
+ /*
+ * update for each packet, to help prevent overflows if we
+ * have lots of packets.
+ */
+ (void)ipath_write_ureg(dd, ur_rcvhdrhead,
+ dd->ipath_rhdrhead_intr_off | l, 0);
+ if (etype != RCVHQ_RCV_TYPE_EXPECTED)
+ (void)ipath_write_ureg(dd, ur_rcvegrindexhead,
+ etail, 0);
+ }
+
+ pkttot += i;
+
+ dd->ipath_port0head = l;
+
+ if (hdrqtail != (uint32_t)__le64_to_cpu(*dd->ipath_hdrqtailptr))
+ /* more arrived while we handled first batch */
+ goto gotmore;
+
+ if (pkttot > ipath_stats.sps_maxpkts_call)
+ ipath_stats.sps_maxpkts_call = pkttot;
+ ipath_stats.sps_port0pkts += pkttot;
+ ipath_stats.sps_avgpkts_call =
+ ipath_stats.sps_port0pkts / ++totcalls;
+
+ if (sma_this_time) /* only once at end, not each time */
+ wake_up_interruptible(&ipath_sma_wait);
+
+done:
+ clear_bit(0, &dd->ipath_rcv_pending);
+ smp_mb__after_clear_bit();
+}
+
+/**
+ * ipath_update_pio_bufs - update shadow copy of the PIO availability map
+ * @dd: the infinipath device
+ *
+ * called whenever our local copy indicates we have run out of send buffers
+ * NOTE: This can be called from interrupt context by some code
+ * and from non-interrupt context by ipath_getpiobuf().
+ */
+
+static void ipath_update_pio_bufs(struct ipath_devdata *dd)
+{
+ unsigned long flags;
+ int i;
+ const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
+
+ /* If the generation (check) bits have changed, then we update the
+ * busy bit for the corresponding PIO buffer. This algorithm will
+ * modify positions to the value they already have in some cases
+ * (i.e., no change), but it's faster than changing only the bits
+ * that have changed.
+ *
+ * We would like to do this atomicly, to avoid spinlocks in the
+ * critical send path, but that's not really possible, given the
+ * type of changes, and that this routine could be called on
+ * multiple cpu's simultaneously, so we lock in this routine only,
+ * to avoid conflicting updates; all we change is the shadow, and
+ * it's a single 64 bit memory location, so by definition the update
+ * is atomic in terms of what other cpu's can see in testing the
+ * bits. The spin_lock overhead isn't too bad, since it only
+ * happens when all buffers are in use, so only cpu overhead, not
+ * latency or bandwidth is affected.
+ */
+#define _IPATH_ALL_CHECKBITS 0x5555555555555555ULL
+ if (!dd->ipath_pioavailregs_dma) {
+ ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
+ return;
+ }
+ if (ipath_debug & __IPATH_VERBDBG) {
+ /* only if packet debug and verbose */
+ volatile unsigned long long *dma =
+ (volatile unsigned long long *)
+ dd->ipath_pioavailregs_dma;
+ unsigned long *shadow = dd->ipath_pioavailshadow;
+
+ ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
+ "d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
+ "s3=%lx\n",
+ (unsigned long long) __le64_to_cpu(dma[0]),
+ shadow[0],
+ (unsigned long long) __le64_to_cpu(dma[1]),
+ shadow[1],
+ (unsigned long long) __le64_to_cpu(dma[2]),
+ shadow[2],
+ (unsigned long long) __le64_to_cpu(dma[3]),
+ shadow[3]);
+ if (piobregs > 4)
+ ipath_cdbg(PKT, "2nd group, dma4=%llx shad4=%lx, "
+ "d5=%llx s5=%lx, d6=%llx s6=%lx, "
+ "d7=%llx s7=%lx\n",
+ (unsigned long long) __le64_to_cpu(dma[4]),
+ shadow[4],
+ (unsigned long long) __le64_to_cpu(dma[5]),
+ shadow[5],
+ (unsigned long long) __le64_to_cpu(dma[6]),
+ shadow[6],
+ (unsigned long long) __le64_to_cpu(dma[7]),
+ shadow[7]);
+ }
+ spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ for (i = 0; i < piobregs; i++) {
+ u64 pchbusy, pchg, piov, pnew;
+ /*
+ * Chip Errata: bug 6641; even and odd qwords>3 are swapped
+ */
+ if (i > 3) {
+ if (i & 1)
+ piov = __le64_to_cpu(dd->ipath_pioavailregs_dma[i - 1]);
+ else
+ piov = __le64_to_cpu(dd->ipath_pioavailregs_dma[i + 1]);
+ } else
+ piov = __le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
+ pchg = _IPATH_ALL_CHECKBITS &
+ ~(dd->ipath_pioavailshadow[i] ^ piov);
+ pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
+ if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
+ pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
+ pnew |= piov & pchbusy;
+ dd->ipath_pioavailshadow[i] = pnew;
+ }
+ }
+ spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+}
+
+/**
+ * ipath_setrcvhdrsize - set the receive header size
+ * @dd: the infinipath device
+ * @rhdrsize: the receive header size
+ *
+ * called from user init code, and also layered driver init
+ */
+int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
+{
+ int ret = 0;
+
+ if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
+ if (dd->ipath_rcvhdrsize != rhdrsize) {
+ dev_info(&dd->pcidev->dev,
+ "Error: can't set protocol header "
+ "size %u, already %u\n",
+ rhdrsize, dd->ipath_rcvhdrsize);
+ ret = -EAGAIN;
+ } else
+ ipath_cdbg(VERBOSE, "Reuse same protocol header "
+ "size %u\n", dd->ipath_rcvhdrsize);
+ } else if (rhdrsize > (dd->ipath_rcvhdrentsize -
+ (sizeof(u64) / sizeof(u32)))) {
+ ipath_dbg("Error: can't set protocol header size %u "
+ "(> max %u)\n", rhdrsize,
+ dd->ipath_rcvhdrentsize -
+ (u32) (sizeof(u64) / sizeof(u32)));
+ ret = -EOVERFLOW;
+ } else {
+ dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
+ dd->ipath_rcvhdrsize = rhdrsize;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
+ dd->ipath_rcvhdrsize);
+ ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
+ dd->ipath_rcvhdrsize);
+ }
+ return ret;
+}
+
+/**
+ * ipath_getpiobuf - find an available pio buffer
+ * @dd: the infinipath device
+ * @pbufnum: the buffer number is placed here
+ *
+ * do appropriate marking as busy, etc.
+ * returns buffer number if one found (>=0), negative number is error.
+ * Used by ipath_sma_send_pkt and ipath_layer_send
+ */
+u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
+{
+ int i, j, starti, updated = 0;
+ unsigned piobcnt, iter;
+ unsigned long flags;
+ unsigned long *shadow = dd->ipath_pioavailshadow;
+ u32 __iomem *buf;
+
+ piobcnt = (unsigned)(dd->ipath_piobcnt2k
+ + dd->ipath_piobcnt4k);
+ starti = dd->ipath_lastport_piobuf;
+ iter = piobcnt - starti;
+ if (dd->ipath_upd_pio_shadow) {
+ /*
+ * Minor optimization. If we had no buffers on last call,
+ * start out by doing the update; continue and do scan even
+ * if no buffers were updated, to be paranoid
+ */
+ ipath_update_pio_bufs(dd);
+ /* we scanned here, don't do it at end of scan */
+ updated = 1;
+ i = starti;
+ } else
+ i = dd->ipath_lastpioindex;
+
+rescan:
+ /*
+ * while test_and_set_bit() is atomic, we do that and then the
+ * change_bit(), and the pair is not. See if this is the cause
+ * of the remaining armlaunch errors.
+ */
+ spin_lock_irqsave(&ipath_pioavail_lock, flags);
+ for (j = 0; j < iter; j++, i++) {
+ if (i >= piobcnt)
+ i = starti;
+ /*
+ * To avoid bus lock overhead, we first find a candidate
+ * buffer, then do the test and set, and continue if that
+ * fails.
+ */
+ if (test_bit((2 * i) + 1, shadow) ||
+ test_and_set_bit((2 * i) + 1, shadow))
+ continue;
+ /* flip generation bit */
+ change_bit(2 * i, shadow);
+ break;
+ }
+ spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+
+ if (j == iter) {
+ volatile unsigned long long *dma =
+ (volatile unsigned long long *)
+ dd->ipath_pioavailregs_dma;
+
+ /*
+ * first time through; shadow exhausted, but may be real
+ * buffers available, so go see; if any updated, rescan
+ * (once)
+ */
+ if (!updated) {
+ ipath_update_pio_bufs(dd);
+ updated = 1;
+ i = starti;
+ goto rescan;
+ }
+ dd->ipath_upd_pio_shadow = 1;
+ /*
+ * not atomic, but if we lose one once in a while, that's OK
+ */
+ ipath_stats.sps_nopiobufs++;
+ if (!(++dd->ipath_consec_nopiobuf % 100000)) {
+ ipath_dbg("%u pio sends with no bufavail; dmacopy: "
+ "%llx %llx %llx %llx; shadow: "
+ "%lx %lx %lx %lx\n",
+ dd->ipath_consec_nopiobuf,
+ (unsigned long long) __le64_to_cpu(dma[0]),
+ (unsigned long long) __le64_to_cpu(dma[1]),
+ (unsigned long long) __le64_to_cpu(dma[2]),
+ (unsigned long long) __le64_to_cpu(dma[3]),
+ shadow[0], shadow[1], shadow[2],
+ shadow[3]);
+ /*
+ * 4 buffers per byte, 4 registers above, cover rest
+ * below
+ */
+ if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
+ (sizeof(shadow[0]) * 4 * 4))
+ ipath_dbg("2nd group: dmacopy: %llx %llx "
+ "%llx %llx; shadow: %lx %lx "
+ "%lx %lx\n",
+ (unsigned long long) __le64_to_cpu(dma[4]),
+ (unsigned long long) __le64_to_cpu(dma[5]),
+ (unsigned long long) __le64_to_cpu(dma[6]),
+ (unsigned long long) __le64_to_cpu(dma[7]),
+ shadow[4], shadow[5],
+ shadow[6], shadow[7]);
+ }
+ return NULL;
+ }
+
+ if (updated && dd->ipath_layer.l_intr) {
+ /*
+ * ran out of bufs, now some (at least this one we just
+ * got) are now available, so tell the layered driver.
+ */
+ dd->ipath_layer.l_intr(dd->ipath_unit,
+ IPATH_LAYER_INT_SEND_CONTINUE);
+ }
+
+ /*
+ * set next starting place. Since it's just an optimization,
+ * it doesn't matter who wins on this, so no locking
+ */
+ dd->ipath_lastpioindex = i + 1;
+ if (dd->ipath_upd_pio_shadow)
+ dd->ipath_upd_pio_shadow = 0;
+ if (dd->ipath_consec_nopiobuf)
+ dd->ipath_consec_nopiobuf = 0;
+ if (i < dd->ipath_piobcnt2k)
+ buf = (u32 __iomem *)
+ (dd->ipath_pio2kbase + i * dd->ipath_palign);
+ else
+ buf = (u32 __iomem *)
+ (dd->ipath_pio4kbase + (i - dd->ipath_piobcnt2k) *
+ dd->ipath_4kalign);
+ ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
+ i, (i < dd->ipath_piobcnt2k) ? 2 : 4, buf);
+ if (pbufnum)
+ *pbufnum = i;
+ return buf;
+}
+
+/**
+ * ipath_create_rcvhdrq - create a receive header queue
+ * @dd: the infinipath device
+ * @pd: the port data
+ *
+ * this *must* be physically contiguous memory, and for now,
+ * that limits it to what kmalloc can do.
+ */
+int ipath_create_rcvhdrq(struct ipath_devdata *dd,
+ struct ipath_portdata *pd)
+{
+ int ret = 0, amt;
+
+ amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
+ sizeof(u32), PAGE_SIZE);
+ if (!pd->port_rcvhdrq) {
+ size_t i;
+
+ /*
+ * not using REPEAT isn't viable; at 128KB, we can
+ * easily fail this. The problem with REPEAT is we
+ * can block here "forever". There isn't an
+ * inbetween, unfortunately. We could reduce the risk
+ * by never freeing the rcvhdrq except at unload, but
+ * even then, the first time a port is used, we could
+ * delay for some time...
+ */
+ pd->port_rcvhdrq =
+ dma_alloc_coherent(&dd->pcidev->dev, amt,
+ &pd->port_rcvhdrq_phys,
+ GFP_USER);
+ if (!pd->port_rcvhdrq) {
+ ipath_dev_err(dd,
+ "attempt to allocate %d bytes for "
+ "port %u rcvhdrq failed\n",
+ amt, pd->port_port);
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ pd->port_rcvhdrq_size = amt;
+
+ ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
+ "for port %u rcvhdr Q\n",
+ amt >> PAGE_SHIFT, pd->port_rcvhdrq,
+ (unsigned long) pd->port_rcvhdrq_phys,
+ (unsigned long) pd->port_rcvhdrq_size,
+ pd->port_port);
+
+ /*
+ * mark pages as reserved, to avoid problems when user
+ * process with them mapped exits.
+ */
+ for (i = 0; i < amt; i += PAGE_SIZE)
+ SetPageReserved(virt_to_page(pd->port_rcvhdrq + i));
+ } else {
+ /*
+ * clear for security, sanity, and/or debugging, each
+ * time we reuse
+ */
+ memset(pd->port_rcvhdrq, 0, amt);
+ }
+
+ /*
+ * tell chip each time we init it, even if we are re-using previous
+ * memory (we zero it at process close)
+ */
+ ipath_cdbg(VERBOSE, "writing port %d rcvhdraddr as %lx\n",
+ pd->port_port, (unsigned long) pd->port_rcvhdrq_phys);
+ ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
+ pd->port_port, pd->port_rcvhdrq_phys);
+
+ ret = 0;
+bail:
+ return ret;
+}
+
+int ipath_waitfor_complete(struct ipath_devdata *dd, ipath_kreg reg_id,
+ u64 bits_to_wait_for, u64 * valp)
+{
+ u64 timeout, lastval, val;
+
+ lastval = ipath_read_kreg64(dd, reg_id);
+ /* wait a ridiculously long time */
+ timeout = get_cycles() + 0x10000000ULL;
+ do {
+ val = ipath_read_kreg64(dd, reg_id);
+ /* set so they have something, even on failures. */
+ *valp = val;
+ if ((val & bits_to_wait_for) == bits_to_wait_for)
+ return 0;
+ if (val != lastval)
+ ipath_cdbg(VERBOSE, "Changed from %llx to %llx, "
+ "waiting for %llx bits\n",
+ (unsigned long long) lastval,
+ (unsigned long long) val,
+ (unsigned long long) bits_to_wait_for);
+ yield();
+ if (get_cycles() > timeout) {
+ ipath_dbg("Didn't get bits %llx in register 0x%x, "
+ "got %llx\n",
+ (unsigned long long) bits_to_wait_for,
+ reg_id, (unsigned long long) *valp);
+ return ENODEV;
+ }
+ } while (1);
+}
+
+/**
+ * ipath_waitfor_mdio_cmdready - wait for last command to complete
+ * @dd: the infinipath device
+ *
+ * Like ipath_waitfor_complete(), but we wait for the CMDVALID bit to go
+ * away indicating the last command has completed. It doesn't return data
+ */
+int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
+{
+ u64 timeout;
+ u64 val;
+
+ /* wait a ridiculously long time */
+ timeout = get_cycles() + 0x10000000ULL;
+ do {
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_mdio);
+ if (!(val & IPATH_MDIO_CMDVALID))
+ return 0;
+ yield();
+ if (get_cycles() > timeout) {
+ ipath_dbg("CMDVALID stuck in mdio reg? (%llx)\n",
+ (unsigned long long) val);
+ return ENODEV;
+ }
+ } while (1);
+}
+
+void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
+{
+#if _IPATH_DEBUGGING
+ static char *what[4] = {
+ [0] = "DOWN",
+ [INFINIPATH_IBCC_LINKCMD_INIT] = "INIT",
+ [INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
+ [INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
+ };
+ ipath_cdbg(SMA, "Trying to move unit %u to %s, current ltstate "
+ "is %s\n", dd->ipath_unit,
+ what[(which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
+ INFINIPATH_IBCC_LINKCMD_MASK],
+ ipath_ibcstatus_str[
+ (ipath_read_kreg64
+ (dd, dd->ipath_kregs->kr_ibcstatus) >>
+ INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
+ INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
+#endif
+
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
+ dd->ipath_ibcctrl | which);
+}
+
+/**
+ * ipath_read_kreg64_port - read a device's per-port 64-bit kernel register
+ * @dd: the infinipath device
+ * @regno: the register number to read
+ * @port: the port containing the register
+ *
+ * Registers that vary with the chip implementation constants (port)
+ * use this routine.
+ */
+u64 ipath_read_kreg64_port(const struct ipath_devdata *dd, ipath_kreg regno,
+ unsigned port)
+{
+ u16 where;
+
+ if (port < dd->ipath_portcnt &&
+ (regno == dd->ipath_kregs->kr_rcvhdraddr ||
+ regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
+ where = regno + port;
+ else
+ where = -1;
+
+ return ipath_read_kreg64(dd, where);
+}
+
+/**
+ * ipath_write_kreg_port - write a device's per-port 64-bit kernel register
+ * @dd: the infinipath device
+ * @regno: the register number to write
+ * @port: the port containing the register
+ * @value: the value to write
+ *
+ * Registers that vary with the chip implementation constants (port)
+ * use this routine.
+ */
+void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
+ unsigned port, u64 value)
+{
+ u16 where;
+
+ if (port < dd->ipath_portcnt &&
+ (regno == dd->ipath_kregs->kr_rcvhdraddr ||
+ regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
+ where = regno + port;
+ else
+ where = -1;
+
+ ipath_write_kreg(dd, where, value);
+}
+
+/**
+ * ipath_shutdown_link - shut down a link
+ * @dd: the infinipath device
+ *
+ * do this when driver is being unloaded, or perhaps for diags, and
+ * maybe when we get an interrupt of a fatal link error that requires
+ * bringing the linkd down and back up
+ */
+static int ipath_shutdown_link(struct ipath_devdata *dd)
+{
+ int ret = 0;
+
+ ipath_dbg("Shutting down the link\n");
+ ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
+ INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+
+ /*
+ * we are shutting down, so tell the layered driver. We don't do
+ * this on just a link state change, much like ethernet, a cable
+ * unplug, etc. doesn't change driver state
+ */
+ if (dd->ipath_layer.l_intr)
+ dd->ipath_layer.l_intr(dd->ipath_unit,
+ IPATH_LAYER_INT_IF_DOWN);
+
+ /* disable IBC */
+ dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ dd->ipath_control);
+
+ dd->ipath_flags |= IPATH_LINKUNK;
+ dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
+ IPATH_LINKINIT | IPATH_LINKARMED |
+ IPATH_LINKACTIVE);
+ *dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
+ IPATH_STATUS_IB_READY);
+
+ /*
+ * clear SerdesEnable and turn the leds off; do this here because
+ * we are unloading, so don't count on interrupts to move along
+ * Turn the LEDs off explictly for the same reason.
+ */
+ dd->ipath_f_quiet_serdes(dd);
+ dd->ipath_f_setextled(dd, 0, 0);
+
+ if (dd->ipath_stats_timer_active) {
+ del_timer_sync(&dd->ipath_stats_timer);
+ dd->ipath_stats_timer_active = 0;
+ }
+ if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
+ /* can't do anything more with chip; needs re-init */
+ *dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
+ if (dd->ipath_kregbase) {
+ /*
+ * if we haven't already cleaned up before these are
+ * to ensure any register reads/writes "fail" until
+ * re-init
+ */
+ dd->ipath_kregbase = NULL;
+ dd->ipath_kregvirt = NULL;
+ dd->ipath_uregbase = 0;
+ dd->ipath_sregbase = 0;
+ dd->ipath_cregbase = 0;
+ dd->ipath_kregsize = 0;
+ }
+ ipath_disable_wc(dd);
+ }
+
+ return ret;
+}
+
+/**
+ * ipath_free_pddata - free a port's allocated data
+ * @dd: the infinipath device
+ * @port: the port
+ * @freehdrq: free the port data structure if true
+ *
+ * when closing, free up any allocated data for a port, if the
+ * reference count goes to zero
+ * Note: this also optionally frees the portdata itself!
+ * Any changes here have to be matched up with the reinit case
+ * of ipath_init_chip(), which calls this routine on reinit after reset.
+ */
+void ipath_free_pddata(struct ipath_devdata *dd, u32 port, int freehdrq)
+{
+ struct ipath_portdata *pd = dd->ipath_pd[port];
+
+ if (!pd)
+ return;
+ if (freehdrq)
+ /*
+ * only clear and free portdata if we are going to also
+ * release the hdrq, otherwise we leak the hdrq on each
+ * open/close cycle
+ */
+ dd->ipath_pd[port] = NULL;
+ if (freehdrq && pd->port_rcvhdrq) {
+ size_t i;
+ ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
+ "(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
+ (unsigned long) pd->port_rcvhdrq_size);
+ for (i = 0; i < pd->port_rcvhdrq_size; i += PAGE_SIZE)
+ ClearPageReserved(
+ virt_to_page(pd->port_rcvhdrq + i));
+ dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
+ pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
+ pd->port_rcvhdrq = NULL;
+ }
+ if (port && pd->port_rcvegrbuf) {
+ /* always free this */
+ if (pd->port_rcvegrbuf) {
+ unsigned e;
+
+ for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
+ void *buf = pd->port_rcvegrbuf[e];
+ size_t i;
+
+ for (i = 0; i < pd->port_rcvegrbuf_size;
+ i += PAGE_SIZE)
+ ClearPageReserved(
+ virt_to_page(buf + i));
+
+ ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
+ "chunk %u/%u\n", buf,
+ (unsigned long)
+ pd->port_rcvegrbuf_size,
+ e, pd->port_rcvegrbuf_chunks);
+ dma_free_coherent(
+ &dd->pcidev->dev,
+ pd->port_rcvegrbuf_size, buf,
+ pd->port_rcvegrbuf_phys[e]);
+ }
+ vfree(pd->port_rcvegrbuf);
+ pd->port_rcvegrbuf = NULL;
+ vfree(pd->port_rcvegrbuf_phys);
+ pd->port_rcvegrbuf_phys = NULL;
+ }
+ pd->port_rcvegrbuf_chunks = 0;
+ } else if (port == 0 && dd->ipath_port0_skbs) {
+ unsigned e;
+ struct sk_buff **skbs = dd->ipath_port0_skbs;
+
+ dd->ipath_port0_skbs = NULL;
+ ipath_cdbg(VERBOSE, "free closed port %d ipath_port0_skbs "
+ "@ %p\n", pd->port_port, skbs);
+ for (e = 0; e < dd->ipath_rcvegrcnt; e++)
+ if (skbs[e])
+ dev_kfree_skb(skbs[e]);
+ vfree(skbs);
+ }
+ if (freehdrq) {
+ kfree(pd->port_tid_pg_list);
+ kfree(pd);
+ }
+}
+
+int __init infinipath_init(void)
+{
+ int ret;
+
+ ipath_dbg(KERN_INFO DRIVER_LOAD_MSG "%s", ipath_core_version);
+
+ /*
+ * These must all be called before the driver is registered
+ * with the PCI subsystem.
+ */
+ spin_lock_init(&ipath_pioavail_lock);
+ spin_lock_init(&ipath_sma_lock);
+
+ spin_lock_init(&unit_table_lock);
+ idr_init(&unit_table);
+ if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ ret = pci_register_driver(&ipath_driver);
+ if (ret < 0) {
+ printk(KERN_ERR IPATH_DRV_NAME
+ ": Unable to register driver: error %d\n", -ret);
+ goto bail_unit;
+ }
+
+ ret = ipath_driver_create_group(&ipath_driver.driver);
+ if (ret < 0) {
+ printk(KERN_ERR IPATH_DRV_NAME ": Unable to create driver "
+ "sysfs entries: error %d\n", -ret);
+ goto bail_pci;
+ }
+
+ goto bail;
+
+bail_pci:
+ pci_unregister_driver(&ipath_driver);
+
+bail_unit:
+ idr_destroy(&unit_table);
+
+bail:
+ return ret;
+}
+
+static void cleanup_device(struct ipath_devdata *dd)
+{
+ int port;
+ u64 val;
+
+ /* in case unload fails, be consistent */
+ dd->ipath_rcvctrl = 0;
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+ dd->ipath_rcvctrl);
+
+ /*
+ * gracefully stop all sends allowing any in progress to
+ * trickle out first.
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, 0ULL);
+ /* flush it */
+ val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ /*
+ * enough for anything that's going to trickle out to have
+ * actually done so.
+ */
+ udelay(5);
+
+ /*
+ * abort any armed or launched PIO buffers that didn't go. (self
+ * clearing). Will cause any packet currently being transmitted to
+ * go out with an EBP, and may also cause a short packet error on
+ * the receiver.
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+ INFINIPATH_S_ABORT);
+
+ /* mask interrupts, but not errors */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
+ ipath_shutdown_link(dd);
+
+ /*
+ * clear all interrupts and errors. Next time driver is loaded,
+ * we know that whatever is set happened while we were unloaded
+ */
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, -1LL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
+ ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
+ if (dd->__ipath_pioavailregs_base) {
+ pci_free_consistent(dd->pcidev,
+ dd->ipath_pioavailregs_size,
+ (void *)dd->__ipath_pioavailregs_base,
+ dd->ipath_pioavailregs_phys);
+ dd->__ipath_pioavailregs_base = NULL;
+ dd->ipath_pioavailregs_dma = NULL;
+ }
+
+ if (dd->ipath_pageshadow) {
+ struct page **tmpp = dd->ipath_pageshadow;
+ int i, cnt = 0;
+
+ ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
+ "locked\n");
+ for (port = 0; port < dd->ipath_cfgports; port++) {
+ int port_tidbase = port * dd->ipath_rcvtidcnt;
+ int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
+ for (i = port_tidbase; i < maxtid; i++) {
+ if (tmpp[i]) {
+ ipath_release_user_pages(&tmpp[i], 1);
+ tmpp[i] = NULL;
+ cnt++;
+ }
+ }
+ }
+ if (cnt) {
+ ipath_stats.sps_pageunlocks += cnt;
+ ipath_cdbg(VERBOSE, "There were still %u expTID "
+ "entries locked\n", cnt);
+ }
+ if (ipath_stats.sps_pagelocks ||
+ ipath_stats.sps_pageunlocks)
+ ipath_cdbg(
+ VERBOSE, "%llu pages locked, %llu "
+ "unlocked via ipath_m{un}lock\n",
+ (unsigned long long)
+ ipath_stats.sps_pagelocks,
+ (unsigned long long)
+ ipath_stats.sps_pageunlocks);
+
+ ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
+ dd->ipath_pageshadow);
+ vfree(dd->ipath_pageshadow);
+ dd->ipath_pageshadow = NULL;
+ }
+
+ /*
+ * free any resources still in use (usually just kernel ports)
+ * at unload
+ */
+ for (port = 0; port < dd->ipath_cfgports; port++)
+ ipath_free_pddata(dd, port, 1);
+ kfree(dd->ipath_pd);
+ /*
+ * debuggability, in case some cleanup path tries to use it
+ * after this
+ */
+ dd->ipath_pd = NULL;
+}
+
+static void __exit infinipath_cleanup(void)
+{
+ int m;
+
+ ipath_driver_remove_group(&ipath_driver.driver);
+
+ /*
+ * turn off rcv, send, and interrupts for all ports, all drivers
+ * should also hard reset the chip here?
+ * free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
+ * for all versions of the driver, if they were allocated
+ */
+ for (m = 0; m < atomic_read(&ipath_max); m++) {
+ struct ipath_devdata *dd = ipath_lookup(m);
+ if (!dd)
+ continue;
+
+ if (dd->ipath_kregbase)
+ cleanup_device(dd);
+
+ if (dd->pcidev) {
+ if (dd->pcidev->irq) {
+ ipath_cdbg(VERBOSE,
+ "unit %u free_irq of irq %x\n",
+ m, dd->pcidev->irq);
+ free_irq(dd->pcidev->irq, dd);
+ } else
+ ipath_dbg("irq is 0, not doing free_irq "
+ "for unit %u\n", m);
+ dd->pcidev = NULL;
+ }
+
+ /*
+ * we check for NULL here, because it's outside the kregbase
+ * check, and we need to call it after the free_irq. Thus
+ * it's possible that the function pointers were never
+ * initialized.
+ */
+ if (dd->ipath_f_cleanup)
+ /* clean up chip-specific stuff */
+ dd->ipath_f_cleanup(dd);
+ }
+
+ ipath_cdbg(VERBOSE, "Unregistering pci driver unit %u\n", m);
+ pci_unregister_driver(&ipath_driver);
+
+ idr_destroy(&unit_table);
+}
+
+/**
+ * ipath_reset_device - reset the chip if possible
+ * @unit: the device to reset
+ *
+ * Whether or not reset is successful, we attempt to re-initialize the chip
+ * (that is, much like a driver unload/reload). We clear the INITTED flag
+ * so that the various entry points will fail until we reinitialize. For
+ * now, we only allow this if no user ports are open that use chip resources
+ */
+int ipath_reset_device(int unit)
+{
+ int ret, i;
+ struct ipath_devdata *dd = ipath_lookup(unit);
+
+ if (!dd)
+ return -ENODEV;
+
+ dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
+
+ if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
+ dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
+ "not initialized or not present\n", unit);
+ return -ENXIO;
+ }
+
+ if (dd->ipath_pd)
+ for (i = 1; i < dd->ipath_portcnt; i++) {
+ if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
+ ipath_dbg("unit %u port %d is in use "
+ "(PID %u cmd %s), can't reset\n",
+ unit, i,
+ dd->ipath_pd[i]->port_pid,
+ dd->ipath_pd[i]->port_comm);
+ return -EBUSY;
+ }
+ }
+
+ dd->ipath_flags &= ~IPATH_INITTED;
+ ret = dd->ipath_f_reset(dd);
+ if (ret != 1)
+ ipath_dbg("reset was not successful\n");
+ ipath_dbg("Trying to reinitialize unit %u after reset attempt\n",
+ unit);
+ ret = ipath_init_chip(dd, 1);
+ if (ret)
+ dev_err(&dd->pcidev->dev, "Reinitialize unit %u after "
+ "reset failed with %d\n", unit, ret);
+ else
+ dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
+ "resetting\n", unit);
+ return ret;
+}
+
+module_init(infinipath_init);
+module_exit(infinipath_cleanup);
More information about the general
mailing list