android_kernel_xiaomi_sm8350/drivers/infiniband/hw/ipath/ipath_driver.c
Dave Olson 826d801009 IB/ipath: Enable 4KB MTU
Enable use of 4KB MTU.  Since the driver uses more pinned memory for
receive buffers when the 4KB MTU is enabled, whether or not the fabric
supports that MTU, add a "mtu4096" module parameter that can be used to
limit the MTU to 2KB when it is known that 4KB MTUs can't be used
anyway.

Signed-off-by: Dave Olson <dave.olson@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
2008-04-16 21:01:12 -07:00

2303 lines
65 KiB
C

/*
* Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
* Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include <linux/spinlock.h>
#include <linux/idr.h>
#include <linux/pci.h>
#include <linux/io.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/vmalloc.h>
#include "ipath_kernel.h"
#include "ipath_verbs.h"
#include "ipath_common.h"
static void ipath_update_pio_bufs(struct ipath_devdata *);
const char *ipath_get_unit_name(int unit)
{
static char iname[16];
snprintf(iname, sizeof iname, "infinipath%u", unit);
return iname;
}
#define DRIVER_LOAD_MSG "QLogic " IPATH_DRV_NAME " loaded: "
#define PFX IPATH_DRV_NAME ": "
/*
* The size has to be longer than this string, so we can append
* board/chip information to it in the init code.
*/
const char ib_ipath_version[] = IPATH_IDSTR "\n";
static struct idr unit_table;
DEFINE_SPINLOCK(ipath_devs_lock);
LIST_HEAD(ipath_dev_list);
wait_queue_head_t ipath_state_wait;
unsigned ipath_debug = __IPATH_INFO;
module_param_named(debug, ipath_debug, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(debug, "mask for debug prints");
EXPORT_SYMBOL_GPL(ipath_debug);
unsigned ipath_mtu4096 = 1; /* max 4KB IB mtu by default, if supported */
module_param_named(mtu4096, ipath_mtu4096, uint, S_IRUGO);
MODULE_PARM_DESC(mtu4096, "enable MTU of 4096 bytes, if supported");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("QLogic <support@pathscale.com>");
MODULE_DESCRIPTION("QLogic InfiniPath driver");
const char *ipath_ibcstatus_str[] = {
"Disabled",
"LinkUp",
"PollActive",
"PollQuiet",
"SleepDelay",
"SleepQuiet",
"LState6", /* unused */
"LState7", /* unused */
"CfgDebounce",
"CfgRcvfCfg",
"CfgWaitRmt",
"CfgIdle",
"RecovRetrain",
"LState0xD", /* unused */
"RecovWaitRmt",
"RecovIdle",
};
static void __devexit ipath_remove_one(struct pci_dev *);
static int __devinit ipath_init_one(struct pci_dev *,
const struct pci_device_id *);
/* Only needed for registration, nothing else needs this info */
#define PCI_VENDOR_ID_PATHSCALE 0x1fc1
#define PCI_DEVICE_ID_INFINIPATH_HT 0xd
#define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
/* Number of seconds before our card status check... */
#define STATUS_TIMEOUT 60
static const struct pci_device_id ipath_pci_tbl[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, ipath_pci_tbl);
static struct pci_driver ipath_driver = {
.name = IPATH_DRV_NAME,
.probe = ipath_init_one,
.remove = __devexit_p(ipath_remove_one),
.id_table = ipath_pci_tbl,
.driver = {
.groups = ipath_driver_attr_groups,
},
};
static void ipath_check_status(struct work_struct *work)
{
struct ipath_devdata *dd = container_of(work, struct ipath_devdata,
status_work.work);
/*
* If we don't have any interrupts, let the user know and
* don't bother checking again.
*/
if (dd->ipath_int_counter == 0)
dev_err(&dd->pcidev->dev, "No interrupts detected.\n");
}
static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
u32 *bar0, u32 *bar1)
{
int ret;
ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_0, bar0);
if (ret)
ipath_dev_err(dd, "failed to read bar0 before enable: "
"error %d\n", -ret);
ret = pci_read_config_dword(dev, PCI_BASE_ADDRESS_1, bar1);
if (ret)
ipath_dev_err(dd, "failed to read bar1 before enable: "
"error %d\n", -ret);
ipath_dbg("Read bar0 %x bar1 %x\n", *bar0, *bar1);
}
static void ipath_free_devdata(struct pci_dev *pdev,
struct ipath_devdata *dd)
{
unsigned long flags;
pci_set_drvdata(pdev, NULL);
if (dd->ipath_unit != -1) {
spin_lock_irqsave(&ipath_devs_lock, flags);
idr_remove(&unit_table, dd->ipath_unit);
list_del(&dd->ipath_list);
spin_unlock_irqrestore(&ipath_devs_lock, flags);
}
vfree(dd);
}
static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
{
unsigned long flags;
struct ipath_devdata *dd;
int ret;
if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
dd = ERR_PTR(-ENOMEM);
goto bail;
}
dd = vmalloc(sizeof(*dd));
if (!dd) {
dd = ERR_PTR(-ENOMEM);
goto bail;
}
memset(dd, 0, sizeof(*dd));
dd->ipath_unit = -1;
spin_lock_irqsave(&ipath_devs_lock, flags);
ret = idr_get_new(&unit_table, dd, &dd->ipath_unit);
if (ret < 0) {
printk(KERN_ERR IPATH_DRV_NAME
": Could not allocate unit ID: error %d\n", -ret);
ipath_free_devdata(pdev, dd);
dd = ERR_PTR(ret);
goto bail_unlock;
}
dd->pcidev = pdev;
pci_set_drvdata(pdev, dd);
INIT_DELAYED_WORK(&dd->status_work, ipath_check_status);
list_add(&dd->ipath_list, &ipath_dev_list);
bail_unlock:
spin_unlock_irqrestore(&ipath_devs_lock, flags);
bail:
return dd;
}
static inline struct ipath_devdata *__ipath_lookup(int unit)
{
return idr_find(&unit_table, unit);
}
struct ipath_devdata *ipath_lookup(int unit)
{
struct ipath_devdata *dd;
unsigned long flags;
spin_lock_irqsave(&ipath_devs_lock, flags);
dd = __ipath_lookup(unit);
spin_unlock_irqrestore(&ipath_devs_lock, flags);
return dd;
}
int ipath_count_units(int *npresentp, int *nupp, int *maxportsp)
{
int nunits, npresent, nup;
struct ipath_devdata *dd;
unsigned long flags;
int maxports;
nunits = npresent = nup = maxports = 0;
spin_lock_irqsave(&ipath_devs_lock, flags);
list_for_each_entry(dd, &ipath_dev_list, ipath_list) {
nunits++;
if ((dd->ipath_flags & IPATH_PRESENT) && dd->ipath_kregbase)
npresent++;
if (dd->ipath_lid &&
!(dd->ipath_flags & (IPATH_DISABLED | IPATH_LINKDOWN
| IPATH_LINKUNK)))
nup++;
if (dd->ipath_cfgports > maxports)
maxports = dd->ipath_cfgports;
}
spin_unlock_irqrestore(&ipath_devs_lock, flags);
if (npresentp)
*npresentp = npresent;
if (nupp)
*nupp = nup;
if (maxportsp)
*maxportsp = maxports;
return nunits;
}
/*
* These next two routines are placeholders in case we don't have per-arch
* code for controlling write combining. If explicit control of write
* combining is not available, performance will probably be awful.
*/
int __attribute__((weak)) ipath_enable_wc(struct ipath_devdata *dd)
{
return -EOPNOTSUPP;
}
void __attribute__((weak)) ipath_disable_wc(struct ipath_devdata *dd)
{
}
/*
* Perform a PIO buffer bandwidth write test, to verify proper system
* configuration. Even when all the setup calls work, occasionally
* BIOS or other issues can prevent write combining from working, or
* can cause other bandwidth problems to the chip.
*
* This test simply writes the same buffer over and over again, and
* measures close to the peak bandwidth to the chip (not testing
* data bandwidth to the wire). On chips that use an address-based
* trigger to send packets to the wire, this is easy. On chips that
* use a count to trigger, we want to make sure that the packet doesn't
* go out on the wire, or trigger flow control checks.
*/
static void ipath_verify_pioperf(struct ipath_devdata *dd)
{
u32 pbnum, cnt, lcnt;
u32 __iomem *piobuf;
u32 *addr;
u64 msecs, emsecs;
piobuf = ipath_getpiobuf(dd, &pbnum);
if (!piobuf) {
dev_info(&dd->pcidev->dev,
"No PIObufs for checking perf, skipping\n");
return;
}
/*
* Enough to give us a reasonable test, less than piobuf size, and
* likely multiple of store buffer length.
*/
cnt = 1024;
addr = vmalloc(cnt);
if (!addr) {
dev_info(&dd->pcidev->dev,
"Couldn't get memory for checking PIO perf,"
" skipping\n");
goto done;
}
preempt_disable(); /* we want reasonably accurate elapsed time */
msecs = 1 + jiffies_to_msecs(jiffies);
for (lcnt = 0; lcnt < 10000U; lcnt++) {
/* wait until we cross msec boundary */
if (jiffies_to_msecs(jiffies) >= msecs)
break;
udelay(1);
}
ipath_disable_armlaunch(dd);
writeq(0, piobuf); /* length 0, no dwords actually sent */
ipath_flush_wc();
/*
* this is only roughly accurate, since even with preempt we
* still take interrupts that could take a while. Running for
* >= 5 msec seems to get us "close enough" to accurate values
*/
msecs = jiffies_to_msecs(jiffies);
for (emsecs = lcnt = 0; emsecs <= 5UL; lcnt++) {
__iowrite32_copy(piobuf + 64, addr, cnt >> 2);
emsecs = jiffies_to_msecs(jiffies) - msecs;
}
/* 1 GiB/sec, slightly over IB SDR line rate */
if (lcnt < (emsecs * 1024U))
ipath_dev_err(dd,
"Performance problem: bandwidth to PIO buffers is "
"only %u MiB/sec\n",
lcnt / (u32) emsecs);
else
ipath_dbg("PIO buffer bandwidth %u MiB/sec is OK\n",
lcnt / (u32) emsecs);
preempt_enable();
vfree(addr);
done:
/* disarm piobuf, so it's available again */
ipath_disarm_piobufs(dd, pbnum, 1);
ipath_enable_armlaunch(dd);
}
static int __devinit ipath_init_one(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
int ret, len, j;
struct ipath_devdata *dd;
unsigned long long addr;
u32 bar0 = 0, bar1 = 0;
dd = ipath_alloc_devdata(pdev);
if (IS_ERR(dd)) {
ret = PTR_ERR(dd);
printk(KERN_ERR IPATH_DRV_NAME
": Could not allocate devdata: error %d\n", -ret);
goto bail;
}
ipath_cdbg(VERBOSE, "initializing unit #%u\n", dd->ipath_unit);
ret = pci_enable_device(pdev);
if (ret) {
/* This can happen iff:
*
* We did a chip reset, and then failed to reprogram the
* BAR, or the chip reset due to an internal error. We then
* unloaded the driver and reloaded it.
*
* Both reset cases set the BAR back to initial state. For
* the latter case, the AER sticky error bit at offset 0x718
* should be set, but the Linux kernel doesn't yet know
* about that, it appears. If the original BAR was retained
* in the kernel data structures, this may be OK.
*/
ipath_dev_err(dd, "enable unit %d failed: error %d\n",
dd->ipath_unit, -ret);
goto bail_devdata;
}
addr = pci_resource_start(pdev, 0);
len = pci_resource_len(pdev, 0);
ipath_cdbg(VERBOSE, "regbase (0) %llx len %d pdev->irq %d, vend %x/%x "
"driver_data %lx\n", addr, len, pdev->irq, ent->vendor,
ent->device, ent->driver_data);
read_bars(dd, pdev, &bar0, &bar1);
if (!bar1 && !(bar0 & ~0xf)) {
if (addr) {
dev_info(&pdev->dev, "BAR is 0 (probable RESET), "
"rewriting as %llx\n", addr);
ret = pci_write_config_dword(
pdev, PCI_BASE_ADDRESS_0, addr);
if (ret) {
ipath_dev_err(dd, "rewrite of BAR0 "
"failed: err %d\n", -ret);
goto bail_disable;
}
ret = pci_write_config_dword(
pdev, PCI_BASE_ADDRESS_1, addr >> 32);
if (ret) {
ipath_dev_err(dd, "rewrite of BAR1 "
"failed: err %d\n", -ret);
goto bail_disable;
}
} else {
ipath_dev_err(dd, "BAR is 0 (probable RESET), "
"not usable until reboot\n");
ret = -ENODEV;
goto bail_disable;
}
}
ret = pci_request_regions(pdev, IPATH_DRV_NAME);
if (ret) {
dev_info(&pdev->dev, "pci_request_regions unit %u fails: "
"err %d\n", dd->ipath_unit, -ret);
goto bail_disable;
}
ret = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
if (ret) {
/*
* if the 64 bit setup fails, try 32 bit. Some systems
* do not setup 64 bit maps on systems with 2GB or less
* memory installed.
*/
ret = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
if (ret) {
dev_info(&pdev->dev,
"Unable to set DMA mask for unit %u: %d\n",
dd->ipath_unit, ret);
goto bail_regions;
}
else {
ipath_dbg("No 64bit DMA mask, used 32 bit mask\n");
ret = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
if (ret)
dev_info(&pdev->dev,
"Unable to set DMA consistent mask "
"for unit %u: %d\n",
dd->ipath_unit, ret);
}
}
else {
ret = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
if (ret)
dev_info(&pdev->dev,
"Unable to set DMA consistent mask "
"for unit %u: %d\n",
dd->ipath_unit, ret);
}
pci_set_master(pdev);
/*
* Save BARs to rewrite after device reset. Save all 64 bits of
* BAR, just in case.
*/
dd->ipath_pcibar0 = addr;
dd->ipath_pcibar1 = addr >> 32;
dd->ipath_deviceid = ent->device; /* save for later use */
dd->ipath_vendorid = ent->vendor;
/* setup the chip-specific functions, as early as possible. */
switch (ent->device) {
case PCI_DEVICE_ID_INFINIPATH_HT:
#ifdef CONFIG_HT_IRQ
ipath_init_iba6110_funcs(dd);
break;
#else
ipath_dev_err(dd, "QLogic HT device 0x%x cannot work if "
"CONFIG_HT_IRQ is not enabled\n", ent->device);
return -ENODEV;
#endif
case PCI_DEVICE_ID_INFINIPATH_PE800:
#ifdef CONFIG_PCI_MSI
ipath_init_iba6120_funcs(dd);
break;
#else
ipath_dev_err(dd, "QLogic PCIE device 0x%x cannot work if "
"CONFIG_PCI_MSI is not enabled\n", ent->device);
return -ENODEV;
#endif
default:
ipath_dev_err(dd, "Found unknown QLogic deviceid 0x%x, "
"failing\n", ent->device);
return -ENODEV;
}
for (j = 0; j < 6; j++) {
if (!pdev->resource[j].start)
continue;
ipath_cdbg(VERBOSE, "BAR %d start %llx, end %llx, len %llx\n",
j, (unsigned long long)pdev->resource[j].start,
(unsigned long long)pdev->resource[j].end,
(unsigned long long)pci_resource_len(pdev, j));
}
if (!addr) {
ipath_dev_err(dd, "No valid address in BAR 0!\n");
ret = -ENODEV;
goto bail_regions;
}
dd->ipath_pcirev = pdev->revision;
#if defined(__powerpc__)
/* There isn't a generic way to specify writethrough mappings */
dd->ipath_kregbase = __ioremap(addr, len,
(_PAGE_NO_CACHE|_PAGE_WRITETHRU));
#else
dd->ipath_kregbase = ioremap_nocache(addr, len);
#endif
if (!dd->ipath_kregbase) {
ipath_dbg("Unable to map io addr %llx to kvirt, failing\n",
addr);
ret = -ENOMEM;
goto bail_iounmap;
}
dd->ipath_kregend = (u64 __iomem *)
((void __iomem *)dd->ipath_kregbase + len);
dd->ipath_physaddr = addr; /* used for io_remap, etc. */
/* for user mmap */
ipath_cdbg(VERBOSE, "mapped io addr %llx to kregbase %p\n",
addr, dd->ipath_kregbase);
/*
* clear ipath_flags here instead of in ipath_init_chip as it is set
* by ipath_setup_htconfig.
*/
dd->ipath_flags = 0;
dd->ipath_lli_counter = 0;
dd->ipath_lli_errors = 0;
if (dd->ipath_f_bus(dd, pdev))
ipath_dev_err(dd, "Failed to setup config space; "
"continuing anyway\n");
/*
* set up our interrupt handler; IRQF_SHARED probably not needed,
* since MSI interrupts shouldn't be shared but won't hurt for now.
* check 0 irq after we return from chip-specific bus setup, since
* that can affect this due to setup
*/
if (!dd->ipath_irq)
ipath_dev_err(dd, "irq is 0, BIOS error? Interrupts won't "
"work\n");
else {
ret = request_irq(dd->ipath_irq, ipath_intr, IRQF_SHARED,
IPATH_DRV_NAME, dd);
if (ret) {
ipath_dev_err(dd, "Couldn't setup irq handler, "
"irq=%d: %d\n", dd->ipath_irq, ret);
goto bail_iounmap;
}
}
ret = ipath_init_chip(dd, 0); /* do the chip-specific init */
if (ret)
goto bail_irqsetup;
ret = ipath_enable_wc(dd);
if (ret) {
ipath_dev_err(dd, "Write combining not enabled "
"(err %d): performance may be poor\n",
-ret);
ret = 0;
}
ipath_verify_pioperf(dd);
ipath_device_create_group(&pdev->dev, dd);
ipathfs_add_device(dd);
ipath_user_add(dd);
ipath_diag_add(dd);
ipath_register_ib_device(dd);
/* Check that card status in STATUS_TIMEOUT seconds. */
schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT);
goto bail;
bail_irqsetup:
if (pdev->irq) free_irq(pdev->irq, dd);
bail_iounmap:
iounmap((volatile void __iomem *) dd->ipath_kregbase);
bail_regions:
pci_release_regions(pdev);
bail_disable:
pci_disable_device(pdev);
bail_devdata:
ipath_free_devdata(pdev, dd);
bail:
return ret;
}
static void __devexit cleanup_device(struct ipath_devdata *dd)
{
int port;
if (*dd->ipath_statusp & IPATH_STATUS_CHIP_PRESENT) {
/* can't do anything more with chip; needs re-init */
*dd->ipath_statusp &= ~IPATH_STATUS_CHIP_PRESENT;
if (dd->ipath_kregbase) {
/*
* if we haven't already cleaned up before these are
* to ensure any register reads/writes "fail" until
* re-init
*/
dd->ipath_kregbase = NULL;
dd->ipath_uregbase = 0;
dd->ipath_sregbase = 0;
dd->ipath_cregbase = 0;
dd->ipath_kregsize = 0;
}
ipath_disable_wc(dd);
}
if (dd->ipath_pioavailregs_dma) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
(void *) dd->ipath_pioavailregs_dma,
dd->ipath_pioavailregs_phys);
dd->ipath_pioavailregs_dma = NULL;
}
if (dd->ipath_dummy_hdrq) {
dma_free_coherent(&dd->pcidev->dev,
dd->ipath_pd[0]->port_rcvhdrq_size,
dd->ipath_dummy_hdrq, dd->ipath_dummy_hdrq_phys);
dd->ipath_dummy_hdrq = NULL;
}
if (dd->ipath_pageshadow) {
struct page **tmpp = dd->ipath_pageshadow;
dma_addr_t *tmpd = dd->ipath_physshadow;
int i, cnt = 0;
ipath_cdbg(VERBOSE, "Unlocking any expTID pages still "
"locked\n");
for (port = 0; port < dd->ipath_cfgports; port++) {
int port_tidbase = port * dd->ipath_rcvtidcnt;
int maxtid = port_tidbase + dd->ipath_rcvtidcnt;
for (i = port_tidbase; i < maxtid; i++) {
if (!tmpp[i])
continue;
pci_unmap_page(dd->pcidev, tmpd[i],
PAGE_SIZE, PCI_DMA_FROMDEVICE);
ipath_release_user_pages(&tmpp[i], 1);
tmpp[i] = NULL;
cnt++;
}
}
if (cnt) {
ipath_stats.sps_pageunlocks += cnt;
ipath_cdbg(VERBOSE, "There were still %u expTID "
"entries locked\n", cnt);
}
if (ipath_stats.sps_pagelocks ||
ipath_stats.sps_pageunlocks)
ipath_cdbg(VERBOSE, "%llu pages locked, %llu "
"unlocked via ipath_m{un}lock\n",
(unsigned long long)
ipath_stats.sps_pagelocks,
(unsigned long long)
ipath_stats.sps_pageunlocks);
ipath_cdbg(VERBOSE, "Free shadow page tid array at %p\n",
dd->ipath_pageshadow);
tmpp = dd->ipath_pageshadow;
dd->ipath_pageshadow = NULL;
vfree(tmpp);
}
/*
* free any resources still in use (usually just kernel ports)
* at unload; we do for portcnt, not cfgports, because cfgports
* could have changed while we were loaded.
*/
for (port = 0; port < dd->ipath_portcnt; port++) {
struct ipath_portdata *pd = dd->ipath_pd[port];
dd->ipath_pd[port] = NULL;
ipath_free_pddata(dd, pd);
}
kfree(dd->ipath_pd);
/*
* debuggability, in case some cleanup path tries to use it
* after this
*/
dd->ipath_pd = NULL;
}
static void __devexit ipath_remove_one(struct pci_dev *pdev)
{
struct ipath_devdata *dd = pci_get_drvdata(pdev);
ipath_cdbg(VERBOSE, "removing, pdev=%p, dd=%p\n", pdev, dd);
/*
* disable the IB link early, to be sure no new packets arrive, which
* complicates the shutdown process
*/
ipath_shutdown_device(dd);
cancel_delayed_work(&dd->status_work);
flush_scheduled_work();
if (dd->verbs_dev)
ipath_unregister_ib_device(dd->verbs_dev);
ipath_diag_remove(dd);
ipath_user_remove(dd);
ipathfs_remove_device(dd);
ipath_device_remove_group(&pdev->dev, dd);
ipath_cdbg(VERBOSE, "Releasing pci memory regions, dd %p, "
"unit %u\n", dd, (u32) dd->ipath_unit);
cleanup_device(dd);
/*
* turn off rcv, send, and interrupts for all ports, all drivers
* should also hard reset the chip here?
* free up port 0 (kernel) rcvhdr, egr bufs, and eventually tid bufs
* for all versions of the driver, if they were allocated
*/
if (dd->ipath_irq) {
ipath_cdbg(VERBOSE, "unit %u free irq %d\n",
dd->ipath_unit, dd->ipath_irq);
dd->ipath_f_free_irq(dd);
} else
ipath_dbg("irq is 0, not doing free_irq "
"for unit %u\n", dd->ipath_unit);
/*
* we check for NULL here, because it's outside
* the kregbase check, and we need to call it
* after the free_irq. Thus it's possible that
* the function pointers were never initialized.
*/
if (dd->ipath_f_cleanup)
/* clean up chip-specific stuff */
dd->ipath_f_cleanup(dd);
ipath_cdbg(VERBOSE, "Unmapping kregbase %p\n", dd->ipath_kregbase);
iounmap((volatile void __iomem *) dd->ipath_kregbase);
pci_release_regions(pdev);
ipath_cdbg(VERBOSE, "calling pci_disable_device\n");
pci_disable_device(pdev);
ipath_free_devdata(pdev, dd);
}
/* general driver use */
DEFINE_MUTEX(ipath_mutex);
static DEFINE_SPINLOCK(ipath_pioavail_lock);
/**
* ipath_disarm_piobufs - cancel a range of PIO buffers
* @dd: the infinipath device
* @first: the first PIO buffer to cancel
* @cnt: the number of PIO buffers to cancel
*
* cancel a range of PIO buffers, used when they might be armed, but
* not triggered. Used at init to ensure buffer state, and also user
* process close, in case it died while writing to a PIO buffer
* Also after errors.
*/
void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
unsigned cnt)
{
unsigned i, last = first + cnt;
unsigned long flags;
ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
for (i = first; i < last; i++) {
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
/*
* The disarm-related bits are write-only, so it
* is ok to OR them in with our copy of sendctrl
* while we hold the lock.
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl | INFINIPATH_S_DISARM |
(i << INFINIPATH_S_DISARMPIOBUF_SHIFT));
/* can't disarm bufs back-to-back per iba7220 spec */
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
}
/*
* Disable PIOAVAILUPD, then re-enable, reading scratch in
* between. This seems to avoid a chip timing race that causes
* pioavail updates to memory to stop. We xor as we don't
* know the state of the bit when we're called.
*/
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl ^ INFINIPATH_S_PIOBUFAVAILUPD);
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
}
/**
* ipath_wait_linkstate - wait for an IB link state change to occur
* @dd: the infinipath device
* @state: the state to wait for
* @msecs: the number of milliseconds to wait
*
* wait up to msecs milliseconds for IB link state change to occur for
* now, take the easy polling route. Currently used only by
* ipath_set_linkstate. Returns 0 if state reached, otherwise
* -ETIMEDOUT state can have multiple states set, for any of several
* transitions.
*/
int ipath_wait_linkstate(struct ipath_devdata *dd, u32 state, int msecs)
{
dd->ipath_state_wanted = state;
wait_event_interruptible_timeout(ipath_state_wait,
(dd->ipath_flags & state),
msecs_to_jiffies(msecs));
dd->ipath_state_wanted = 0;
if (!(dd->ipath_flags & state)) {
u64 val;
ipath_cdbg(VERBOSE, "Didn't reach linkstate %s within %u"
" ms\n",
/* test INIT ahead of DOWN, both can be set */
(state & IPATH_LINKINIT) ? "INIT" :
((state & IPATH_LINKDOWN) ? "DOWN" :
((state & IPATH_LINKARMED) ? "ARM" : "ACTIVE")),
msecs);
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
ipath_cdbg(VERBOSE, "ibcc=%llx ibcstatus=%llx (%s)\n",
(unsigned long long) ipath_read_kreg64(
dd, dd->ipath_kregs->kr_ibcctrl),
(unsigned long long) val,
ipath_ibcstatus_str[val & 0xf]);
}
return (dd->ipath_flags & state) ? 0 : -ETIMEDOUT;
}
/*
* Decode the error status into strings, deciding whether to always
* print * it or not depending on "normal packet errors" vs everything
* else. Return 1 if "real" errors, otherwise 0 if only packet
* errors, so caller can decide what to print with the string.
*/
int ipath_decode_err(char *buf, size_t blen, ipath_err_t err)
{
int iserr = 1;
*buf = '\0';
if (err & INFINIPATH_E_PKTERRS) {
if (!(err & ~INFINIPATH_E_PKTERRS))
iserr = 0; // if only packet errors.
if (ipath_debug & __IPATH_ERRPKTDBG) {
if (err & INFINIPATH_E_REBP)
strlcat(buf, "EBP ", blen);
if (err & INFINIPATH_E_RVCRC)
strlcat(buf, "VCRC ", blen);
if (err & INFINIPATH_E_RICRC) {
strlcat(buf, "CRC ", blen);
// clear for check below, so only once
err &= INFINIPATH_E_RICRC;
}
if (err & INFINIPATH_E_RSHORTPKTLEN)
strlcat(buf, "rshortpktlen ", blen);
if (err & INFINIPATH_E_SDROPPEDDATAPKT)
strlcat(buf, "sdroppeddatapkt ", blen);
if (err & INFINIPATH_E_SPKTLEN)
strlcat(buf, "spktlen ", blen);
}
if ((err & INFINIPATH_E_RICRC) &&
!(err&(INFINIPATH_E_RVCRC|INFINIPATH_E_REBP)))
strlcat(buf, "CRC ", blen);
if (!iserr)
goto done;
}
if (err & INFINIPATH_E_RHDRLEN)
strlcat(buf, "rhdrlen ", blen);
if (err & INFINIPATH_E_RBADTID)
strlcat(buf, "rbadtid ", blen);
if (err & INFINIPATH_E_RBADVERSION)
strlcat(buf, "rbadversion ", blen);
if (err & INFINIPATH_E_RHDR)
strlcat(buf, "rhdr ", blen);
if (err & INFINIPATH_E_RLONGPKTLEN)
strlcat(buf, "rlongpktlen ", blen);
if (err & INFINIPATH_E_RMAXPKTLEN)
strlcat(buf, "rmaxpktlen ", blen);
if (err & INFINIPATH_E_RMINPKTLEN)
strlcat(buf, "rminpktlen ", blen);
if (err & INFINIPATH_E_SMINPKTLEN)
strlcat(buf, "sminpktlen ", blen);
if (err & INFINIPATH_E_RFORMATERR)
strlcat(buf, "rformaterr ", blen);
if (err & INFINIPATH_E_RUNSUPVL)
strlcat(buf, "runsupvl ", blen);
if (err & INFINIPATH_E_RUNEXPCHAR)
strlcat(buf, "runexpchar ", blen);
if (err & INFINIPATH_E_RIBFLOW)
strlcat(buf, "ribflow ", blen);
if (err & INFINIPATH_E_SUNDERRUN)
strlcat(buf, "sunderrun ", blen);
if (err & INFINIPATH_E_SPIOARMLAUNCH)
strlcat(buf, "spioarmlaunch ", blen);
if (err & INFINIPATH_E_SUNEXPERRPKTNUM)
strlcat(buf, "sunexperrpktnum ", blen);
if (err & INFINIPATH_E_SDROPPEDSMPPKT)
strlcat(buf, "sdroppedsmppkt ", blen);
if (err & INFINIPATH_E_SMAXPKTLEN)
strlcat(buf, "smaxpktlen ", blen);
if (err & INFINIPATH_E_SUNSUPVL)
strlcat(buf, "sunsupVL ", blen);
if (err & INFINIPATH_E_INVALIDADDR)
strlcat(buf, "invalidaddr ", blen);
if (err & INFINIPATH_E_RRCVEGRFULL)
strlcat(buf, "rcvegrfull ", blen);
if (err & INFINIPATH_E_RRCVHDRFULL)
strlcat(buf, "rcvhdrfull ", blen);
if (err & INFINIPATH_E_IBSTATUSCHANGED)
strlcat(buf, "ibcstatuschg ", blen);
if (err & INFINIPATH_E_RIBLOSTLINK)
strlcat(buf, "riblostlink ", blen);
if (err & INFINIPATH_E_HARDWARE)
strlcat(buf, "hardware ", blen);
if (err & INFINIPATH_E_RESET)
strlcat(buf, "reset ", blen);
done:
return iserr;
}
/**
* get_rhf_errstring - decode RHF errors
* @err: the err number
* @msg: the output buffer
* @len: the length of the output buffer
*
* only used one place now, may want more later
*/
static void get_rhf_errstring(u32 err, char *msg, size_t len)
{
/* if no errors, and so don't need to check what's first */
*msg = '\0';
if (err & INFINIPATH_RHF_H_ICRCERR)
strlcat(msg, "icrcerr ", len);
if (err & INFINIPATH_RHF_H_VCRCERR)
strlcat(msg, "vcrcerr ", len);
if (err & INFINIPATH_RHF_H_PARITYERR)
strlcat(msg, "parityerr ", len);
if (err & INFINIPATH_RHF_H_LENERR)
strlcat(msg, "lenerr ", len);
if (err & INFINIPATH_RHF_H_MTUERR)
strlcat(msg, "mtuerr ", len);
if (err & INFINIPATH_RHF_H_IHDRERR)
/* infinipath hdr checksum error */
strlcat(msg, "ipathhdrerr ", len);
if (err & INFINIPATH_RHF_H_TIDERR)
strlcat(msg, "tiderr ", len);
if (err & INFINIPATH_RHF_H_MKERR)
/* bad port, offset, etc. */
strlcat(msg, "invalid ipathhdr ", len);
if (err & INFINIPATH_RHF_H_IBERR)
strlcat(msg, "iberr ", len);
if (err & INFINIPATH_RHF_L_SWA)
strlcat(msg, "swA ", len);
if (err & INFINIPATH_RHF_L_SWB)
strlcat(msg, "swB ", len);
}
/**
* ipath_get_egrbuf - get an eager buffer
* @dd: the infinipath device
* @bufnum: the eager buffer to get
*
* must only be called if ipath_pd[port] is known to be allocated
*/
static inline void *ipath_get_egrbuf(struct ipath_devdata *dd, u32 bufnum)
{
return dd->ipath_port0_skbinfo ?
(void *) dd->ipath_port0_skbinfo[bufnum].skb->data : NULL;
}
/**
* ipath_alloc_skb - allocate an skb and buffer with possible constraints
* @dd: the infinipath device
* @gfp_mask: the sk_buff SFP mask
*/
struct sk_buff *ipath_alloc_skb(struct ipath_devdata *dd,
gfp_t gfp_mask)
{
struct sk_buff *skb;
u32 len;
/*
* Only fully supported way to handle this is to allocate lots
* extra, align as needed, and then do skb_reserve(). That wastes
* a lot of memory... I'll have to hack this into infinipath_copy
* also.
*/
/*
* We need 2 extra bytes for ipath_ether data sent in the
* key header. In order to keep everything dword aligned,
* we'll reserve 4 bytes.
*/
len = dd->ipath_ibmaxlen + 4;
if (dd->ipath_flags & IPATH_4BYTE_TID) {
/* We need a 2KB multiple alignment, and there is no way
* to do it except to allocate extra and then skb_reserve
* enough to bring it up to the right alignment.
*/
len += 2047;
}
skb = __dev_alloc_skb(len, gfp_mask);
if (!skb) {
ipath_dev_err(dd, "Failed to allocate skbuff, length %u\n",
len);
goto bail;
}
skb_reserve(skb, 4);
if (dd->ipath_flags & IPATH_4BYTE_TID) {
u32 una = (unsigned long)skb->data & 2047;
if (una)
skb_reserve(skb, 2048 - una);
}
bail:
return skb;
}
static void ipath_rcv_hdrerr(struct ipath_devdata *dd,
u32 eflags,
u32 l,
u32 etail,
u64 *rc)
{
char emsg[128];
struct ipath_message_header *hdr;
get_rhf_errstring(eflags, emsg, sizeof emsg);
hdr = (struct ipath_message_header *)&rc[1];
ipath_cdbg(PKT, "RHFerrs %x hdrqtail=%x typ=%u "
"tlen=%x opcode=%x egridx=%x: %s\n",
eflags, l,
ipath_hdrget_rcv_type((__le32 *) rc),
ipath_hdrget_length_in_bytes((__le32 *) rc),
be32_to_cpu(hdr->bth[0]) >> 24,
etail, emsg);
/* Count local link integrity errors. */
if (eflags & (INFINIPATH_RHF_H_ICRCERR | INFINIPATH_RHF_H_VCRCERR)) {
u8 n = (dd->ipath_ibcctrl >>
INFINIPATH_IBCC_PHYERRTHRESHOLD_SHIFT) &
INFINIPATH_IBCC_PHYERRTHRESHOLD_MASK;
if (++dd->ipath_lli_counter > n) {
dd->ipath_lli_counter = 0;
dd->ipath_lli_errors++;
}
}
}
/*
* ipath_kreceive - receive a packet
* @pd: the infinipath port
*
* called from interrupt handler for errors or receive interrupt
*/
void ipath_kreceive(struct ipath_portdata *pd)
{
u64 *rc;
struct ipath_devdata *dd = pd->port_dd;
void *ebuf;
const u32 rsize = dd->ipath_rcvhdrentsize; /* words */
const u32 maxcnt = dd->ipath_rcvhdrcnt * rsize; /* words */
u32 etail = -1, l, hdrqtail;
struct ipath_message_header *hdr;
u32 eflags, i, etype, tlen, pkttot = 0, updegr=0, reloop=0;
static u64 totcalls; /* stats, may eventually remove */
if (!dd->ipath_hdrqtailptr) {
ipath_dev_err(dd,
"hdrqtailptr not set, can't do receives\n");
goto bail;
}
l = pd->port_head;
hdrqtail = ipath_get_rcvhdrtail(pd);
if (l == hdrqtail)
goto bail;
reloop:
for (i = 0; l != hdrqtail; i++) {
u32 qp;
u8 *bthbytes;
rc = (u64 *) (pd->port_rcvhdrq + (l << 2));
hdr = (struct ipath_message_header *)&rc[1];
/*
* could make a network order version of IPATH_KD_QP, and
* do the obvious shift before masking to speed this up.
*/
qp = ntohl(hdr->bth[1]) & 0xffffff;
bthbytes = (u8 *) hdr->bth;
eflags = ipath_hdrget_err_flags((__le32 *) rc);
etype = ipath_hdrget_rcv_type((__le32 *) rc);
/* total length */
tlen = ipath_hdrget_length_in_bytes((__le32 *) rc);
ebuf = NULL;
if (etype != RCVHQ_RCV_TYPE_EXPECTED) {
/*
* it turns out that the chips uses an eager buffer
* for all non-expected packets, whether it "needs"
* one or not. So always get the index, but don't
* set ebuf (so we try to copy data) unless the
* length requires it.
*/
etail = ipath_hdrget_index((__le32 *) rc);
if (tlen > sizeof(*hdr) ||
etype == RCVHQ_RCV_TYPE_NON_KD)
ebuf = ipath_get_egrbuf(dd, etail);
}
/*
* both tiderr and ipathhdrerr are set for all plain IB
* packets; only ipathhdrerr should be set.
*/
if (etype != RCVHQ_RCV_TYPE_NON_KD && etype !=
RCVHQ_RCV_TYPE_ERROR && ipath_hdrget_ipath_ver(
hdr->iph.ver_port_tid_offset) !=
IPS_PROTO_VERSION) {
ipath_cdbg(PKT, "Bad InfiniPath protocol version "
"%x\n", etype);
}
if (unlikely(eflags))
ipath_rcv_hdrerr(dd, eflags, l, etail, rc);
else if (etype == RCVHQ_RCV_TYPE_NON_KD) {
ipath_ib_rcv(dd->verbs_dev, rc + 1, ebuf, tlen);
if (dd->ipath_lli_counter)
dd->ipath_lli_counter--;
ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
"qp=%x), len %x; ignored\n",
etype, bthbytes[0], qp, tlen);
}
else if (etype == RCVHQ_RCV_TYPE_EAGER)
ipath_cdbg(PKT, "typ %x, opcode %x (eager, "
"qp=%x), len %x; ignored\n",
etype, bthbytes[0], qp, tlen);
else if (etype == RCVHQ_RCV_TYPE_EXPECTED)
ipath_dbg("Bug: Expected TID, opcode %x; ignored\n",
be32_to_cpu(hdr->bth[0]) & 0xff);
else {
/*
* error packet, type of error unknown.
* Probably type 3, but we don't know, so don't
* even try to print the opcode, etc.
*/
ipath_dbg("Error Pkt, but no eflags! egrbuf %x, "
"len %x\nhdrq@%lx;hdrq+%x rhf: %llx; "
"hdr %llx %llx %llx %llx %llx\n",
etail, tlen, (unsigned long) rc, l,
(unsigned long long) rc[0],
(unsigned long long) rc[1],
(unsigned long long) rc[2],
(unsigned long long) rc[3],
(unsigned long long) rc[4],
(unsigned long long) rc[5]);
}
l += rsize;
if (l >= maxcnt)
l = 0;
if (etype != RCVHQ_RCV_TYPE_EXPECTED)
updegr = 1;
/*
* update head regs on last packet, and every 16 packets.
* Reduce bus traffic, while still trying to prevent
* rcvhdrq overflows, for when the queue is nearly full
*/
if (l == hdrqtail || (i && !(i&0xf))) {
u64 lval;
if (l == hdrqtail)
/* request IBA6120 interrupt only on last */
lval = dd->ipath_rhdrhead_intr_off | l;
else
lval = l;
(void)ipath_write_ureg(dd, ur_rcvhdrhead, lval, 0);
if (updegr) {
(void)ipath_write_ureg(dd, ur_rcvegrindexhead,
etail, 0);
updegr = 0;
}
}
}
if (!dd->ipath_rhdrhead_intr_off && !reloop) {
/* IBA6110 workaround; we can have a race clearing chip
* interrupt with another interrupt about to be delivered,
* and can clear it before it is delivered on the GPIO
* workaround. By doing the extra check here for the
* in-memory tail register updating while we were doing
* earlier packets, we "almost" guarantee we have covered
* that case.
*/
u32 hqtail = ipath_get_rcvhdrtail(pd);
if (hqtail != hdrqtail) {
hdrqtail = hqtail;
reloop = 1; /* loop 1 extra time at most */
goto reloop;
}
}
pkttot += i;
pd->port_head = l;
if (pkttot > ipath_stats.sps_maxpkts_call)
ipath_stats.sps_maxpkts_call = pkttot;
ipath_stats.sps_port0pkts += pkttot;
ipath_stats.sps_avgpkts_call =
ipath_stats.sps_port0pkts / ++totcalls;
bail:;
}
/**
* ipath_update_pio_bufs - update shadow copy of the PIO availability map
* @dd: the infinipath device
*
* called whenever our local copy indicates we have run out of send buffers
* NOTE: This can be called from interrupt context by some code
* and from non-interrupt context by ipath_getpiobuf().
*/
static void ipath_update_pio_bufs(struct ipath_devdata *dd)
{
unsigned long flags;
int i;
const unsigned piobregs = (unsigned)dd->ipath_pioavregs;
/* If the generation (check) bits have changed, then we update the
* busy bit for the corresponding PIO buffer. This algorithm will
* modify positions to the value they already have in some cases
* (i.e., no change), but it's faster than changing only the bits
* that have changed.
*
* We would like to do this atomicly, to avoid spinlocks in the
* critical send path, but that's not really possible, given the
* type of changes, and that this routine could be called on
* multiple cpu's simultaneously, so we lock in this routine only,
* to avoid conflicting updates; all we change is the shadow, and
* it's a single 64 bit memory location, so by definition the update
* is atomic in terms of what other cpu's can see in testing the
* bits. The spin_lock overhead isn't too bad, since it only
* happens when all buffers are in use, so only cpu overhead, not
* latency or bandwidth is affected.
*/
#define _IPATH_ALL_CHECKBITS 0x5555555555555555ULL
if (!dd->ipath_pioavailregs_dma) {
ipath_dbg("Update shadow pioavail, but regs_dma NULL!\n");
return;
}
if (ipath_debug & __IPATH_VERBDBG) {
/* only if packet debug and verbose */
volatile __le64 *dma = dd->ipath_pioavailregs_dma;
unsigned long *shadow = dd->ipath_pioavailshadow;
ipath_cdbg(PKT, "Refill avail, dma0=%llx shad0=%lx, "
"d1=%llx s1=%lx, d2=%llx s2=%lx, d3=%llx "
"s3=%lx\n",
(unsigned long long) le64_to_cpu(dma[0]),
shadow[0],
(unsigned long long) le64_to_cpu(dma[1]),
shadow[1],
(unsigned long long) le64_to_cpu(dma[2]),
shadow[2],
(unsigned long long) le64_to_cpu(dma[3]),
shadow[3]);
if (piobregs > 4)
ipath_cdbg(
PKT, "2nd group, dma4=%llx shad4=%lx, "
"d5=%llx s5=%lx, d6=%llx s6=%lx, "
"d7=%llx s7=%lx\n",
(unsigned long long) le64_to_cpu(dma[4]),
shadow[4],
(unsigned long long) le64_to_cpu(dma[5]),
shadow[5],
(unsigned long long) le64_to_cpu(dma[6]),
shadow[6],
(unsigned long long) le64_to_cpu(dma[7]),
shadow[7]);
}
spin_lock_irqsave(&ipath_pioavail_lock, flags);
for (i = 0; i < piobregs; i++) {
u64 pchbusy, pchg, piov, pnew;
/*
* Chip Errata: bug 6641; even and odd qwords>3 are swapped
*/
if (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS))
piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i ^ 1]);
else
piov = le64_to_cpu(dd->ipath_pioavailregs_dma[i]);
pchg = _IPATH_ALL_CHECKBITS &
~(dd->ipath_pioavailshadow[i] ^ piov);
pchbusy = pchg << INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT;
if (pchg && (pchbusy & dd->ipath_pioavailshadow[i])) {
pnew = dd->ipath_pioavailshadow[i] & ~pchbusy;
pnew |= piov & pchbusy;
dd->ipath_pioavailshadow[i] = pnew;
}
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
}
/**
* ipath_setrcvhdrsize - set the receive header size
* @dd: the infinipath device
* @rhdrsize: the receive header size
*
* called from user init code, and also layered driver init
*/
int ipath_setrcvhdrsize(struct ipath_devdata *dd, unsigned rhdrsize)
{
int ret = 0;
if (dd->ipath_flags & IPATH_RCVHDRSZ_SET) {
if (dd->ipath_rcvhdrsize != rhdrsize) {
dev_info(&dd->pcidev->dev,
"Error: can't set protocol header "
"size %u, already %u\n",
rhdrsize, dd->ipath_rcvhdrsize);
ret = -EAGAIN;
} else
ipath_cdbg(VERBOSE, "Reuse same protocol header "
"size %u\n", dd->ipath_rcvhdrsize);
} else if (rhdrsize > (dd->ipath_rcvhdrentsize -
(sizeof(u64) / sizeof(u32)))) {
ipath_dbg("Error: can't set protocol header size %u "
"(> max %u)\n", rhdrsize,
dd->ipath_rcvhdrentsize -
(u32) (sizeof(u64) / sizeof(u32)));
ret = -EOVERFLOW;
} else {
dd->ipath_flags |= IPATH_RCVHDRSZ_SET;
dd->ipath_rcvhdrsize = rhdrsize;
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvhdrsize,
dd->ipath_rcvhdrsize);
ipath_cdbg(VERBOSE, "Set protocol header size to %u\n",
dd->ipath_rcvhdrsize);
}
return ret;
}
/**
* ipath_getpiobuf - find an available pio buffer
* @dd: the infinipath device
* @pbufnum: the buffer number is placed here
*
* do appropriate marking as busy, etc.
* returns buffer number if one found (>=0), negative number is error.
* Used by ipath_layer_send
*/
u32 __iomem *ipath_getpiobuf(struct ipath_devdata *dd, u32 * pbufnum)
{
int i, j, starti, updated = 0;
unsigned piobcnt, iter;
unsigned long flags;
unsigned long *shadow = dd->ipath_pioavailshadow;
u32 __iomem *buf;
piobcnt = (unsigned)(dd->ipath_piobcnt2k
+ dd->ipath_piobcnt4k);
starti = dd->ipath_lastport_piobuf;
iter = piobcnt - starti;
if (dd->ipath_upd_pio_shadow) {
/*
* Minor optimization. If we had no buffers on last call,
* start out by doing the update; continue and do scan even
* if no buffers were updated, to be paranoid
*/
ipath_update_pio_bufs(dd);
/* we scanned here, don't do it at end of scan */
updated = 1;
i = starti;
} else
i = dd->ipath_lastpioindex;
rescan:
/*
* while test_and_set_bit() is atomic, we do that and then the
* change_bit(), and the pair is not. See if this is the cause
* of the remaining armlaunch errors.
*/
spin_lock_irqsave(&ipath_pioavail_lock, flags);
for (j = 0; j < iter; j++, i++) {
if (i >= piobcnt)
i = starti;
/*
* To avoid bus lock overhead, we first find a candidate
* buffer, then do the test and set, and continue if that
* fails.
*/
if (test_bit((2 * i) + 1, shadow) ||
test_and_set_bit((2 * i) + 1, shadow))
continue;
/* flip generation bit */
change_bit(2 * i, shadow);
break;
}
spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
if (j == iter) {
volatile __le64 *dma = dd->ipath_pioavailregs_dma;
/*
* first time through; shadow exhausted, but may be real
* buffers available, so go see; if any updated, rescan
* (once)
*/
if (!updated) {
ipath_update_pio_bufs(dd);
updated = 1;
i = starti;
goto rescan;
}
dd->ipath_upd_pio_shadow = 1;
/*
* not atomic, but if we lose one once in a while, that's OK
*/
ipath_stats.sps_nopiobufs++;
if (!(++dd->ipath_consec_nopiobuf % 100000)) {
ipath_dbg(
"%u pio sends with no bufavail; dmacopy: "
"%llx %llx %llx %llx; shadow: "
"%lx %lx %lx %lx\n",
dd->ipath_consec_nopiobuf,
(unsigned long long) le64_to_cpu(dma[0]),
(unsigned long long) le64_to_cpu(dma[1]),
(unsigned long long) le64_to_cpu(dma[2]),
(unsigned long long) le64_to_cpu(dma[3]),
shadow[0], shadow[1], shadow[2],
shadow[3]);
/*
* 4 buffers per byte, 4 registers above, cover rest
* below
*/
if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
(sizeof(shadow[0]) * 4 * 4))
ipath_dbg("2nd group: dmacopy: %llx %llx "
"%llx %llx; shadow: %lx %lx "
"%lx %lx\n",
(unsigned long long)
le64_to_cpu(dma[4]),
(unsigned long long)
le64_to_cpu(dma[5]),
(unsigned long long)
le64_to_cpu(dma[6]),
(unsigned long long)
le64_to_cpu(dma[7]),
shadow[4], shadow[5],
shadow[6], shadow[7]);
}
buf = NULL;
goto bail;
}
/*
* set next starting place. Since it's just an optimization,
* it doesn't matter who wins on this, so no locking
*/
dd->ipath_lastpioindex = i + 1;
if (dd->ipath_upd_pio_shadow)
dd->ipath_upd_pio_shadow = 0;
if (dd->ipath_consec_nopiobuf)
dd->ipath_consec_nopiobuf = 0;
if (i < dd->ipath_piobcnt2k)
buf = (u32 __iomem *) (dd->ipath_pio2kbase +
i * dd->ipath_palign);
else
buf = (u32 __iomem *)
(dd->ipath_pio4kbase +
(i - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
ipath_cdbg(VERBOSE, "Return piobuf%u %uk @ %p\n",
i, (i < dd->ipath_piobcnt2k) ? 2 : 4, buf);
if (pbufnum)
*pbufnum = i;
bail:
return buf;
}
/**
* ipath_create_rcvhdrq - create a receive header queue
* @dd: the infinipath device
* @pd: the port data
*
* this must be contiguous memory (from an i/o perspective), and must be
* DMA'able (which means for some systems, it will go through an IOMMU,
* or be forced into a low address range).
*/
int ipath_create_rcvhdrq(struct ipath_devdata *dd,
struct ipath_portdata *pd)
{
int ret = 0;
if (!pd->port_rcvhdrq) {
dma_addr_t phys_hdrqtail;
gfp_t gfp_flags = GFP_USER | __GFP_COMP;
int amt = ALIGN(dd->ipath_rcvhdrcnt * dd->ipath_rcvhdrentsize *
sizeof(u32), PAGE_SIZE);
pd->port_rcvhdrq = dma_alloc_coherent(
&dd->pcidev->dev, amt, &pd->port_rcvhdrq_phys,
gfp_flags);
if (!pd->port_rcvhdrq) {
ipath_dev_err(dd, "attempt to allocate %d bytes "
"for port %u rcvhdrq failed\n",
amt, pd->port_port);
ret = -ENOMEM;
goto bail;
}
pd->port_rcvhdrtail_kvaddr = dma_alloc_coherent(
&dd->pcidev->dev, PAGE_SIZE, &phys_hdrqtail, GFP_KERNEL);
if (!pd->port_rcvhdrtail_kvaddr) {
ipath_dev_err(dd, "attempt to allocate 1 page "
"for port %u rcvhdrqtailaddr failed\n",
pd->port_port);
ret = -ENOMEM;
dma_free_coherent(&dd->pcidev->dev, amt,
pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
pd->port_rcvhdrq = NULL;
goto bail;
}
pd->port_rcvhdrqtailaddr_phys = phys_hdrqtail;
pd->port_rcvhdrq_size = amt;
ipath_cdbg(VERBOSE, "%d pages at %p (phys %lx) size=%lu "
"for port %u rcvhdr Q\n",
amt >> PAGE_SHIFT, pd->port_rcvhdrq,
(unsigned long) pd->port_rcvhdrq_phys,
(unsigned long) pd->port_rcvhdrq_size,
pd->port_port);
ipath_cdbg(VERBOSE, "port %d hdrtailaddr, %llx physical\n",
pd->port_port,
(unsigned long long) phys_hdrqtail);
}
else
ipath_cdbg(VERBOSE, "reuse port %d rcvhdrq @%p %llx phys; "
"hdrtailaddr@%p %llx physical\n",
pd->port_port, pd->port_rcvhdrq,
(unsigned long long) pd->port_rcvhdrq_phys,
pd->port_rcvhdrtail_kvaddr, (unsigned long long)
pd->port_rcvhdrqtailaddr_phys);
/* clear for security and sanity on each use */
memset(pd->port_rcvhdrq, 0, pd->port_rcvhdrq_size);
if (pd->port_rcvhdrtail_kvaddr)
memset(pd->port_rcvhdrtail_kvaddr, 0, PAGE_SIZE);
/*
* tell chip each time we init it, even if we are re-using previous
* memory (we zero the register at process close)
*/
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdrtailaddr,
pd->port_port, pd->port_rcvhdrqtailaddr_phys);
ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
pd->port_port, pd->port_rcvhdrq_phys);
ret = 0;
bail:
return ret;
}
/*
* Flush all sends that might be in the ready to send state, as well as any
* that are in the process of being sent. Used whenever we need to be
* sure the send side is idle. Cleans up all buffer state by canceling
* all pio buffers, and issuing an abort, which cleans up anything in the
* launch fifo. The cancel is superfluous on some chip versions, but
* it's safer to always do it.
* PIOAvail bits are updated by the chip as if normal send had happened.
*/
void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
{
ipath_dbg("Cancelling all in-progress send buffers\n");
dd->ipath_lastcancel = jiffies+HZ/2; /* skip armlaunch errs a bit */
/*
* the abort bit is auto-clearing. We read scratch to be sure
* that cancels and the abort have taken effect in the chip.
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
INFINIPATH_S_ABORT);
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
ipath_disarm_piobufs(dd, 0,
(unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k));
if (restore_sendctrl) /* else done by caller later */
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
dd->ipath_sendctrl);
/* and again, be sure all have hit the chip */
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
}
static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
{
static const char *what[4] = {
[0] = "NOP",
[INFINIPATH_IBCC_LINKCMD_DOWN] = "DOWN",
[INFINIPATH_IBCC_LINKCMD_ARMED] = "ARMED",
[INFINIPATH_IBCC_LINKCMD_ACTIVE] = "ACTIVE"
};
int linkcmd = (which >> INFINIPATH_IBCC_LINKCMD_SHIFT) &
INFINIPATH_IBCC_LINKCMD_MASK;
ipath_cdbg(VERBOSE, "Trying to move unit %u to %s, current ltstate "
"is %s\n", dd->ipath_unit,
what[linkcmd],
ipath_ibcstatus_str[
(ipath_read_kreg64
(dd, dd->ipath_kregs->kr_ibcstatus) >>
INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
/* flush all queued sends when going to DOWN to be sure that
* they don't block MAD packets */
if (linkcmd == INFINIPATH_IBCC_LINKCMD_DOWN)
ipath_cancel_sends(dd, 1);
ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
dd->ipath_ibcctrl | which);
}
int ipath_set_linkstate(struct ipath_devdata *dd, u8 newstate)
{
u32 lstate;
int ret;
switch (newstate) {
case IPATH_IB_LINKDOWN_ONLY:
ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_DOWN <<
INFINIPATH_IBCC_LINKCMD_SHIFT);
/* don't wait */
ret = 0;
goto bail;
case IPATH_IB_LINKDOWN:
ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_POLL <<
INFINIPATH_IBCC_LINKINITCMD_SHIFT);
/* don't wait */
ret = 0;
goto bail;
case IPATH_IB_LINKDOWN_SLEEP:
ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_SLEEP <<
INFINIPATH_IBCC_LINKINITCMD_SHIFT);
/* don't wait */
ret = 0;
goto bail;
case IPATH_IB_LINKDOWN_DISABLE:
ipath_set_ib_lstate(dd,
INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
INFINIPATH_IBCC_LINKINITCMD_SHIFT);
/* don't wait */
ret = 0;
goto bail;
case IPATH_IB_LINKARM:
if (dd->ipath_flags & IPATH_LINKARMED) {
ret = 0;
goto bail;
}
if (!(dd->ipath_flags &
(IPATH_LINKINIT | IPATH_LINKACTIVE))) {
ret = -EINVAL;
goto bail;
}
ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ARMED <<
INFINIPATH_IBCC_LINKCMD_SHIFT);
/*
* Since the port can transition to ACTIVE by receiving
* a non VL 15 packet, wait for either state.
*/
lstate = IPATH_LINKARMED | IPATH_LINKACTIVE;
break;
case IPATH_IB_LINKACTIVE:
if (dd->ipath_flags & IPATH_LINKACTIVE) {
ret = 0;
goto bail;
}
if (!(dd->ipath_flags & IPATH_LINKARMED)) {
ret = -EINVAL;
goto bail;
}
ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKCMD_ACTIVE <<
INFINIPATH_IBCC_LINKCMD_SHIFT);
lstate = IPATH_LINKACTIVE;
break;
case IPATH_IB_LINK_LOOPBACK:
dev_info(&dd->pcidev->dev, "Enabling IB local loopback\n");
dd->ipath_ibcctrl |= INFINIPATH_IBCC_LOOPBACK;
ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
dd->ipath_ibcctrl);
ret = 0;
goto bail; // no state change to wait for
case IPATH_IB_LINK_EXTERNAL:
dev_info(&dd->pcidev->dev, "Disabling IB local loopback (normal)\n");
dd->ipath_ibcctrl &= ~INFINIPATH_IBCC_LOOPBACK;
ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
dd->ipath_ibcctrl);
ret = 0;
goto bail; // no state change to wait for
default:
ipath_dbg("Invalid linkstate 0x%x requested\n", newstate);
ret = -EINVAL;
goto bail;
}
ret = ipath_wait_linkstate(dd, lstate, 2000);
bail:
return ret;
}
/**
* ipath_set_mtu - set the MTU
* @dd: the infinipath device
* @arg: the new MTU
*
* we can handle "any" incoming size, the issue here is whether we
* need to restrict our outgoing size. For now, we don't do any
* sanity checking on this, and we don't deal with what happens to
* programs that are already running when the size changes.
* NOTE: changing the MTU will usually cause the IBC to go back to
* link initialize (IPATH_IBSTATE_INIT) state...
*/
int ipath_set_mtu(struct ipath_devdata *dd, u16 arg)
{
u32 piosize;
int changed = 0;
int ret;
/*
* mtu is IB data payload max. It's the largest power of 2 less
* than piosize (or even larger, since it only really controls the
* largest we can receive; we can send the max of the mtu and
* piosize). We check that it's one of the valid IB sizes.
*/
if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 &&
(arg != 4096 || !ipath_mtu4096)) {
ipath_dbg("Trying to set invalid mtu %u, failing\n", arg);
ret = -EINVAL;
goto bail;
}
if (dd->ipath_ibmtu == arg) {
ret = 0; /* same as current */
goto bail;
}
piosize = dd->ipath_ibmaxlen;
dd->ipath_ibmtu = arg;
if (arg >= (piosize - IPATH_PIO_MAXIBHDR)) {
/* Only if it's not the initial value (or reset to it) */
if (piosize != dd->ipath_init_ibmaxlen) {
if (arg > piosize && arg <= dd->ipath_init_ibmaxlen)
piosize = dd->ipath_init_ibmaxlen;
dd->ipath_ibmaxlen = piosize;
changed = 1;
}
} else if ((arg + IPATH_PIO_MAXIBHDR) != dd->ipath_ibmaxlen) {
piosize = arg + IPATH_PIO_MAXIBHDR;
ipath_cdbg(VERBOSE, "ibmaxlen was 0x%x, setting to 0x%x "
"(mtu 0x%x)\n", dd->ipath_ibmaxlen, piosize,
arg);
dd->ipath_ibmaxlen = piosize;
changed = 1;
}
if (changed) {
u64 ibc = dd->ipath_ibcctrl, ibdw;
/*
* update our housekeeping variables, and set IBC max
* size, same as init code; max IBC is max we allow in
* buffer, less the qword pbc, plus 1 for ICRC, in dwords
*/
dd->ipath_ibmaxlen = piosize - 2 * sizeof(u32);
ibdw = (dd->ipath_ibmaxlen >> 2) + 1;
ibc &= ~(INFINIPATH_IBCC_MAXPKTLEN_MASK <<
dd->ibcc_mpl_shift);
ibc |= ibdw << dd->ibcc_mpl_shift;
dd->ipath_ibcctrl = ibc;
ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
dd->ipath_ibcctrl);
dd->ipath_f_tidtemplate(dd);
}
ret = 0;
bail:
return ret;
}
int ipath_set_lid(struct ipath_devdata *dd, u32 arg, u8 lmc)
{
dd->ipath_lid = arg;
dd->ipath_lmc = lmc;
return 0;
}
/**
* ipath_write_kreg_port - write a device's per-port 64-bit kernel register
* @dd: the infinipath device
* @regno: the register number to write
* @port: the port containing the register
* @value: the value to write
*
* Registers that vary with the chip implementation constants (port)
* use this routine.
*/
void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
unsigned port, u64 value)
{
u16 where;
if (port < dd->ipath_portcnt &&
(regno == dd->ipath_kregs->kr_rcvhdraddr ||
regno == dd->ipath_kregs->kr_rcvhdrtailaddr))
where = regno + port;
else
where = -1;
ipath_write_kreg(dd, where, value);
}
/*
* Following deal with the "obviously simple" task of overriding the state
* of the LEDS, which normally indicate link physical and logical status.
* The complications arise in dealing with different hardware mappings
* and the board-dependent routine being called from interrupts.
* and then there's the requirement to _flash_ them.
*/
#define LED_OVER_FREQ_SHIFT 8
#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
/* Below is "non-zero" to force override, but both actual LEDs are off */
#define LED_OVER_BOTH_OFF (8)
static void ipath_run_led_override(unsigned long opaque)
{
struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
int timeoff;
int pidx;
u64 lstate, ltstate, val;
if (!(dd->ipath_flags & IPATH_INITTED))
return;
pidx = dd->ipath_led_override_phase++ & 1;
dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
timeoff = dd->ipath_led_override_timeoff;
/*
* below potentially restores the LED values per current status,
* should also possibly setup the traffic-blink register,
* but leave that to per-chip functions.
*/
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
INFINIPATH_IBCS_LINKSTATE_MASK;
dd->ipath_f_setextled(dd, lstate, ltstate);
mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
}
void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
{
int timeoff, freq;
if (!(dd->ipath_flags & IPATH_INITTED))
return;
/* First check if we are blinking. If not, use 1HZ polling */
timeoff = HZ;
freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
if (freq) {
/* For blink, set each phase from one nybble of val */
dd->ipath_led_override_vals[0] = val & 0xF;
dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
timeoff = (HZ << 4)/freq;
} else {
/* Non-blink set both phases the same. */
dd->ipath_led_override_vals[0] = val & 0xF;
dd->ipath_led_override_vals[1] = val & 0xF;
}
dd->ipath_led_override_timeoff = timeoff;
/*
* If the timer has not already been started, do so. Use a "quick"
* timeout so the function will be called soon, to look at our request.
*/
if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
/* Need to start timer */
init_timer(&dd->ipath_led_override_timer);
dd->ipath_led_override_timer.function =
ipath_run_led_override;
dd->ipath_led_override_timer.data = (unsigned long) dd;
dd->ipath_led_override_timer.expires = jiffies + 1;
add_timer(&dd->ipath_led_override_timer);
} else {
atomic_dec(&dd->ipath_led_override_timer_active);
}
}
/**
* ipath_shutdown_device - shut down a device
* @dd: the infinipath device
*
* This is called to make the device quiet when we are about to
* unload the driver, and also when the device is administratively
* disabled. It does not free any data structures.
* Everything it does has to be setup again by ipath_init_chip(dd,1)
*/
void ipath_shutdown_device(struct ipath_devdata *dd)
{
unsigned long flags;
ipath_dbg("Shutting down the device\n");
dd->ipath_flags |= IPATH_LINKUNK;
dd->ipath_flags &= ~(IPATH_INITTED | IPATH_LINKDOWN |
IPATH_LINKINIT | IPATH_LINKARMED |
IPATH_LINKACTIVE);
*dd->ipath_statusp &= ~(IPATH_STATUS_IB_CONF |
IPATH_STATUS_IB_READY);
/* mask interrupts, but not errors */
ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
dd->ipath_rcvctrl = 0;
ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
dd->ipath_rcvctrl);
/*
* gracefully stop all sends allowing any in progress to trickle out
* first.
*/
spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
dd->ipath_sendctrl = 0;
ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
/* flush it */
ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
/*
* enough for anything that's going to trickle out to have actually
* done so.
*/
udelay(5);
ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
INFINIPATH_IBCC_LINKINITCMD_SHIFT);
ipath_cancel_sends(dd, 0);
signal_ib_event(dd, IB_EVENT_PORT_ERR);
/* disable IBC */
dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
dd->ipath_control | INFINIPATH_C_FREEZEMODE);
/*
* clear SerdesEnable and turn the leds off; do this here because
* we are unloading, so don't count on interrupts to move along
* Turn the LEDs off explictly for the same reason.
*/
dd->ipath_f_quiet_serdes(dd);
if (dd->ipath_stats_timer_active) {
del_timer_sync(&dd->ipath_stats_timer);
dd->ipath_stats_timer_active = 0;
}
/*
* clear all interrupts and errors, so that the next time the driver
* is loaded or device is enabled, we know that whatever is set
* happened while we were unloaded
*/
ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear,
~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
ipath_update_eeprom_log(dd);
}
/**
* ipath_free_pddata - free a port's allocated data
* @dd: the infinipath device
* @pd: the portdata structure
*
* free up any allocated data for a port
* This should not touch anything that would affect a simultaneous
* re-allocation of port data, because it is called after ipath_mutex
* is released (and can be called from reinit as well).
* It should never change any chip state, or global driver state.
* (The only exception to global state is freeing the port0 port0_skbs.)
*/
void ipath_free_pddata(struct ipath_devdata *dd, struct ipath_portdata *pd)
{
if (!pd)
return;
if (pd->port_rcvhdrq) {
ipath_cdbg(VERBOSE, "free closed port %d rcvhdrq @ %p "
"(size=%lu)\n", pd->port_port, pd->port_rcvhdrq,
(unsigned long) pd->port_rcvhdrq_size);
dma_free_coherent(&dd->pcidev->dev, pd->port_rcvhdrq_size,
pd->port_rcvhdrq, pd->port_rcvhdrq_phys);
pd->port_rcvhdrq = NULL;
if (pd->port_rcvhdrtail_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
pd->port_rcvhdrtail_kvaddr,
pd->port_rcvhdrqtailaddr_phys);
pd->port_rcvhdrtail_kvaddr = NULL;
}
}
if (pd->port_port && pd->port_rcvegrbuf) {
unsigned e;
for (e = 0; e < pd->port_rcvegrbuf_chunks; e++) {
void *base = pd->port_rcvegrbuf[e];
size_t size = pd->port_rcvegrbuf_size;
ipath_cdbg(VERBOSE, "egrbuf free(%p, %lu), "
"chunk %u/%u\n", base,
(unsigned long) size,
e, pd->port_rcvegrbuf_chunks);
dma_free_coherent(&dd->pcidev->dev, size,
base, pd->port_rcvegrbuf_phys[e]);
}
kfree(pd->port_rcvegrbuf);
pd->port_rcvegrbuf = NULL;
kfree(pd->port_rcvegrbuf_phys);
pd->port_rcvegrbuf_phys = NULL;
pd->port_rcvegrbuf_chunks = 0;
} else if (pd->port_port == 0 && dd->ipath_port0_skbinfo) {
unsigned e;
struct ipath_skbinfo *skbinfo = dd->ipath_port0_skbinfo;
dd->ipath_port0_skbinfo = NULL;
ipath_cdbg(VERBOSE, "free closed port %d "
"ipath_port0_skbinfo @ %p\n", pd->port_port,
skbinfo);
for (e = 0; e < dd->ipath_rcvegrcnt; e++)
if (skbinfo[e].skb) {
pci_unmap_single(dd->pcidev, skbinfo[e].phys,
dd->ipath_ibmaxlen,
PCI_DMA_FROMDEVICE);
dev_kfree_skb(skbinfo[e].skb);
}
vfree(skbinfo);
}
kfree(pd->port_tid_pg_list);
vfree(pd->subport_uregbase);
vfree(pd->subport_rcvegrbuf);
vfree(pd->subport_rcvhdr_base);
kfree(pd);
}
static int __init infinipath_init(void)
{
int ret;
if (ipath_debug & __IPATH_DBG)
printk(KERN_INFO DRIVER_LOAD_MSG "%s", ib_ipath_version);
/*
* These must be called before the driver is registered with
* the PCI subsystem.
*/
idr_init(&unit_table);
if (!idr_pre_get(&unit_table, GFP_KERNEL)) {
ret = -ENOMEM;
goto bail;
}
ret = pci_register_driver(&ipath_driver);
if (ret < 0) {
printk(KERN_ERR IPATH_DRV_NAME
": Unable to register driver: error %d\n", -ret);
goto bail_unit;
}
ret = ipath_init_ipathfs();
if (ret < 0) {
printk(KERN_ERR IPATH_DRV_NAME ": Unable to create "
"ipathfs: error %d\n", -ret);
goto bail_pci;
}
goto bail;
bail_pci:
pci_unregister_driver(&ipath_driver);
bail_unit:
idr_destroy(&unit_table);
bail:
return ret;
}
static void __exit infinipath_cleanup(void)
{
ipath_exit_ipathfs();
ipath_cdbg(VERBOSE, "Unregistering pci driver\n");
pci_unregister_driver(&ipath_driver);
idr_destroy(&unit_table);
}
/**
* ipath_reset_device - reset the chip if possible
* @unit: the device to reset
*
* Whether or not reset is successful, we attempt to re-initialize the chip
* (that is, much like a driver unload/reload). We clear the INITTED flag
* so that the various entry points will fail until we reinitialize. For
* now, we only allow this if no user ports are open that use chip resources
*/
int ipath_reset_device(int unit)
{
int ret, i;
struct ipath_devdata *dd = ipath_lookup(unit);
if (!dd) {
ret = -ENODEV;
goto bail;
}
if (atomic_read(&dd->ipath_led_override_timer_active)) {
/* Need to stop LED timer, _then_ shut off LEDs */
del_timer_sync(&dd->ipath_led_override_timer);
atomic_set(&dd->ipath_led_override_timer_active, 0);
}
/* Shut off LEDs after we are sure timer is not running */
dd->ipath_led_override = LED_OVER_BOTH_OFF;
dd->ipath_f_setextled(dd, 0, 0);
dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
dev_info(&dd->pcidev->dev, "Invalid unit number %u or "
"not initialized or not present\n", unit);
ret = -ENXIO;
goto bail;
}
if (dd->ipath_pd)
for (i = 1; i < dd->ipath_cfgports; i++) {
if (dd->ipath_pd[i] && dd->ipath_pd[i]->port_cnt) {
ipath_dbg("unit %u port %d is in use "
"(PID %u cmd %s), can't reset\n",
unit, i,
dd->ipath_pd[i]->port_pid,
dd->ipath_pd[i]->port_comm);
ret = -EBUSY;
goto bail;
}
}
dd->ipath_flags &= ~IPATH_INITTED;
ret = dd->ipath_f_reset(dd);
if (ret != 1)
ipath_dbg("reset was not successful\n");
ipath_dbg("Trying to reinitialize unit %u after reset attempt\n",
unit);
ret = ipath_init_chip(dd, 1);
if (ret)
ipath_dev_err(dd, "Reinitialize unit %u after "
"reset failed with %d\n", unit, ret);
else
dev_info(&dd->pcidev->dev, "Reinitialized unit %u after "
"resetting\n", unit);
bail:
return ret;
}
int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv)
{
u64 val;
if ( new_pol_inv > INFINIPATH_XGXS_RX_POL_MASK ) {
return -1;
}
if ( dd->ipath_rx_pol_inv != new_pol_inv ) {
dd->ipath_rx_pol_inv = new_pol_inv;
val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_xgxsconfig);
val &= ~(INFINIPATH_XGXS_RX_POL_MASK <<
INFINIPATH_XGXS_RX_POL_SHIFT);
val |= ((u64)dd->ipath_rx_pol_inv) <<
INFINIPATH_XGXS_RX_POL_SHIFT;
ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
}
return 0;
}
/*
* Disable and enable the armlaunch error. Used for PIO bandwidth testing on
* the 7220, which is count-based, rather than trigger-based. Safe for the
* driver check, since it's at init. Not completely safe when used for
* user-mode checking, since some error checking can be lost, but not
* particularly risky, and only has problematic side-effects in the face of
* very buggy user code. There is no reference counting, but that's also
* fine, given the intended use.
*/
void ipath_enable_armlaunch(struct ipath_devdata *dd)
{
dd->ipath_lasterror &= ~INFINIPATH_E_SPIOARMLAUNCH;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
INFINIPATH_E_SPIOARMLAUNCH);
dd->ipath_errormask |= INFINIPATH_E_SPIOARMLAUNCH;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
dd->ipath_errormask);
}
void ipath_disable_armlaunch(struct ipath_devdata *dd)
{
/* so don't re-enable if already set */
dd->ipath_maskederrs &= ~INFINIPATH_E_SPIOARMLAUNCH;
dd->ipath_errormask &= ~INFINIPATH_E_SPIOARMLAUNCH;
ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
dd->ipath_errormask);
}
module_init(infinipath_init);
module_exit(infinipath_cleanup);