diff options
author | Ralph Campbell <ralph.campbell@qlogic.com> | 2010-05-23 21:44:54 -0700 |
---|---|---|
committer | Roland Dreier <rolandd@cisco.com> | 2010-05-23 21:44:54 -0700 |
commit | f931551bafe1f10ded7f5282e2aa162c267a2e5d (patch) | |
tree | e81b4656a8116abf5fd0bc0bbc46560aff536159 /drivers/infiniband/hw/qib/qib_driver.c | |
parent | 9a6edb60ec10d86b1025a0cdad68fd89f1ddaf02 (diff) | |
download | op-kernel-dev-f931551bafe1f10ded7f5282e2aa162c267a2e5d.zip op-kernel-dev-f931551bafe1f10ded7f5282e2aa162c267a2e5d.tar.gz |
IB/qib: Add new qib driver for QLogic PCIe InfiniBand adapters
Add a low-level IB driver for QLogic PCIe adapters.
Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
Diffstat (limited to 'drivers/infiniband/hw/qib/qib_driver.c')
-rw-r--r-- | drivers/infiniband/hw/qib/qib_driver.c | 665 |
1 files changed, 665 insertions, 0 deletions
diff --git a/drivers/infiniband/hw/qib/qib_driver.c b/drivers/infiniband/hw/qib/qib_driver.c new file mode 100644 index 0000000..f15ce07 --- /dev/null +++ b/drivers/infiniband/hw/qib/qib_driver.c @@ -0,0 +1,665 @@ +/* + * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved. + * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include <linux/spinlock.h> +#include <linux/pci.h> +#include <linux/io.h> +#include <linux/delay.h> +#include <linux/netdevice.h> +#include <linux/vmalloc.h> + +#include "qib.h" + +/* + * The size has to be longer than this string, so we can append + * board/chip information to it in the init code. + */ +const char ib_qib_version[] = QIB_IDSTR "\n"; + +DEFINE_SPINLOCK(qib_devs_lock); +LIST_HEAD(qib_dev_list); +DEFINE_MUTEX(qib_mutex); /* general driver use */ + +unsigned qib_ibmtu; +module_param_named(ibmtu, qib_ibmtu, uint, S_IRUGO); +MODULE_PARM_DESC(ibmtu, "Set max IB MTU (0=2KB, 1=256, 2=512, ... 5=4096"); + +unsigned qib_compat_ddr_negotiate = 1; +module_param_named(compat_ddr_negotiate, qib_compat_ddr_negotiate, uint, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(compat_ddr_negotiate, + "Attempt pre-IBTA 1.2 DDR speed negotiation"); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("QLogic <support@qlogic.com>"); +MODULE_DESCRIPTION("QLogic IB driver"); + +/* + * QIB_PIO_MAXIBHDR is the max IB header size allowed for in our + * PIO send buffers. This is well beyond anything currently + * defined in the InfiniBand spec. + */ +#define QIB_PIO_MAXIBHDR 128 + +struct qlogic_ib_stats qib_stats; + +const char *qib_get_unit_name(int unit) +{ + static char iname[16]; + + snprintf(iname, sizeof iname, "infinipath%u", unit); + return iname; +} + +/* + * Return count of units with at least one port ACTIVE. + */ +int qib_count_active_units(void) +{ + struct qib_devdata *dd; + struct qib_pportdata *ppd; + unsigned long flags; + int pidx, nunits_active = 0; + + spin_lock_irqsave(&qib_devs_lock, flags); + list_for_each_entry(dd, &qib_dev_list, list) { + if (!(dd->flags & QIB_PRESENT) || !dd->kregbase) + continue; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT | + QIBL_LINKARMED | QIBL_LINKACTIVE))) { + nunits_active++; + break; + } + } + } + spin_unlock_irqrestore(&qib_devs_lock, flags); + return nunits_active; +} + +/* + * Return count of all units, optionally return in arguments + * the number of usable (present) units, and the number of + * ports that are up. + */ +int qib_count_units(int *npresentp, int *nupp) +{ + int nunits = 0, npresent = 0, nup = 0; + struct qib_devdata *dd; + unsigned long flags; + int pidx; + struct qib_pportdata *ppd; + + spin_lock_irqsave(&qib_devs_lock, flags); + + list_for_each_entry(dd, &qib_dev_list, list) { + nunits++; + if ((dd->flags & QIB_PRESENT) && dd->kregbase) + npresent++; + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (ppd->lid && (ppd->lflags & (QIBL_LINKINIT | + QIBL_LINKARMED | QIBL_LINKACTIVE))) + nup++; + } + } + + spin_unlock_irqrestore(&qib_devs_lock, flags); + + if (npresentp) + *npresentp = npresent; + if (nupp) + *nupp = nup; + + return nunits; +} + +/** + * qib_wait_linkstate - wait for an IB link state change to occur + * @dd: the qlogic_ib device + * @state: the state to wait for + * @msecs: the number of milliseconds to wait + * + * wait up to msecs milliseconds for IB link state change to occur for + * now, take the easy polling route. Currently used only by + * qib_set_linkstate. Returns 0 if state reached, otherwise + * -ETIMEDOUT state can have multiple states set, for any of several + * transitions. + */ +int qib_wait_linkstate(struct qib_pportdata *ppd, u32 state, int msecs) +{ + int ret; + unsigned long flags; + + spin_lock_irqsave(&ppd->lflags_lock, flags); + if (ppd->state_wanted) { + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + ret = -EBUSY; + goto bail; + } + ppd->state_wanted = state; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + wait_event_interruptible_timeout(ppd->state_wait, + (ppd->lflags & state), + msecs_to_jiffies(msecs)); + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->state_wanted = 0; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + + if (!(ppd->lflags & state)) + ret = -ETIMEDOUT; + else + ret = 0; +bail: + return ret; +} + +int qib_set_linkstate(struct qib_pportdata *ppd, u8 newstate) +{ + u32 lstate; + int ret; + struct qib_devdata *dd = ppd->dd; + unsigned long flags; + + switch (newstate) { + case QIB_IB_LINKDOWN_ONLY: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_NOP); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKDOWN: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_POLL); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKDOWN_SLEEP: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_SLEEP); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKDOWN_DISABLE: + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_DOWN | IB_LINKINITCMD_DISABLE); + /* don't wait */ + ret = 0; + goto bail; + + case QIB_IB_LINKARM: + if (ppd->lflags & QIBL_LINKARMED) { + ret = 0; + goto bail; + } + if (!(ppd->lflags & (QIBL_LINKINIT | QIBL_LINKACTIVE))) { + ret = -EINVAL; + goto bail; + } + /* + * Since the port can be ACTIVE when we ask for ARMED, + * clear QIBL_LINKV so we can wait for a transition. + * If the link isn't ARMED, then something else happened + * and there is no point waiting for ARMED. + */ + spin_lock_irqsave(&ppd->lflags_lock, flags); + ppd->lflags &= ~QIBL_LINKV; + spin_unlock_irqrestore(&ppd->lflags_lock, flags); + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_ARMED | IB_LINKINITCMD_NOP); + lstate = QIBL_LINKV; + break; + + case QIB_IB_LINKACTIVE: + if (ppd->lflags & QIBL_LINKACTIVE) { + ret = 0; + goto bail; + } + if (!(ppd->lflags & QIBL_LINKARMED)) { + ret = -EINVAL; + goto bail; + } + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LSTATE, + IB_LINKCMD_ACTIVE | IB_LINKINITCMD_NOP); + lstate = QIBL_LINKACTIVE; + break; + + default: + ret = -EINVAL; + goto bail; + } + ret = qib_wait_linkstate(ppd, lstate, 10); + +bail: + return ret; +} + +/* + * Get address of eager buffer from it's index (allocated in chunks, not + * contiguous). + */ +static inline void *qib_get_egrbuf(const struct qib_ctxtdata *rcd, u32 etail) +{ + const u32 chunk = etail / rcd->rcvegrbufs_perchunk; + const u32 idx = etail % rcd->rcvegrbufs_perchunk; + + return rcd->rcvegrbuf[chunk] + idx * rcd->dd->rcvegrbufsize; +} + +/* + * Returns 1 if error was a CRC, else 0. + * Needed for some chip's synthesized error counters. + */ +static u32 qib_rcv_hdrerr(struct qib_pportdata *ppd, u32 ctxt, + u32 eflags, u32 l, u32 etail, __le32 *rhf_addr, + struct qib_message_header *hdr) +{ + u32 ret = 0; + + if (eflags & (QLOGIC_IB_RHF_H_ICRCERR | QLOGIC_IB_RHF_H_VCRCERR)) + ret = 1; + return ret; +} + +/* + * qib_kreceive - receive a packet + * @rcd: the qlogic_ib context + * @llic: gets count of good packets needed to clear lli, + * (used with chips that need need to track crcs for lli) + * + * called from interrupt handler for errors or receive interrupt + * Returns number of CRC error packets, needed by some chips for + * local link integrity tracking. crcs are adjusted down by following + * good packets, if any, and count of good packets is also tracked. + */ +u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts) +{ + struct qib_devdata *dd = rcd->dd; + struct qib_pportdata *ppd = rcd->ppd; + __le32 *rhf_addr; + void *ebuf; + const u32 rsize = dd->rcvhdrentsize; /* words */ + const u32 maxcnt = dd->rcvhdrcnt * rsize; /* words */ + u32 etail = -1, l, hdrqtail; + struct qib_message_header *hdr; + u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0; + int last; + u64 lval; + struct qib_qp *qp, *nqp; + + l = rcd->head; + rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset; + if (dd->flags & QIB_NODMA_RTAIL) { + u32 seq = qib_hdrget_seq(rhf_addr); + if (seq != rcd->seq_cnt) + goto bail; + hdrqtail = 0; + } else { + hdrqtail = qib_get_rcvhdrtail(rcd); + if (l == hdrqtail) + goto bail; + smp_rmb(); /* prevent speculative reads of dma'ed hdrq */ + } + + for (last = 0, i = 1; !last; i += !last) { + hdr = dd->f_get_msgheader(dd, rhf_addr); + eflags = qib_hdrget_err_flags(rhf_addr); + etype = qib_hdrget_rcv_type(rhf_addr); + /* total length */ + tlen = qib_hdrget_length_in_bytes(rhf_addr); + ebuf = NULL; + if ((dd->flags & QIB_NODMA_RTAIL) ? + qib_hdrget_use_egr_buf(rhf_addr) : + (etype != RCVHQ_RCV_TYPE_EXPECTED)) { + etail = qib_hdrget_index(rhf_addr); + updegr = 1; + if (tlen > sizeof(*hdr) || + etype >= RCVHQ_RCV_TYPE_NON_KD) + ebuf = qib_get_egrbuf(rcd, etail); + } + if (!eflags) { + u16 lrh_len = be16_to_cpu(hdr->lrh[2]) << 2; + + if (lrh_len != tlen) { + qib_stats.sps_lenerrs++; + goto move_along; + } + } + if (etype == RCVHQ_RCV_TYPE_NON_KD && !eflags && + ebuf == NULL && + tlen > (dd->rcvhdrentsize - 2 + 1 - + qib_hdrget_offset(rhf_addr)) << 2) { + goto move_along; + } + + /* + * Both tiderr and qibhdrerr are set for all plain IB + * packets; only qibhdrerr should be set. + */ + if (unlikely(eflags)) + crcs += qib_rcv_hdrerr(ppd, rcd->ctxt, eflags, l, + etail, rhf_addr, hdr); + else if (etype == RCVHQ_RCV_TYPE_NON_KD) { + qib_ib_rcv(rcd, hdr, ebuf, tlen); + if (crcs) + crcs--; + else if (llic && *llic) + --*llic; + } +move_along: + l += rsize; + if (l >= maxcnt) + l = 0; + rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset; + if (dd->flags & QIB_NODMA_RTAIL) { + u32 seq = qib_hdrget_seq(rhf_addr); + + if (++rcd->seq_cnt > 13) + rcd->seq_cnt = 1; + if (seq != rcd->seq_cnt) + last = 1; + } else if (l == hdrqtail) + last = 1; + /* + * Update head regs etc., every 16 packets, if not last pkt, + * to help prevent rcvhdrq overflows, when many packets + * are processed and queue is nearly full. + * Don't request an interrupt for intermediate updates. + */ + lval = l; + if (!last && !(i & 0xf)) { + dd->f_update_usrhead(rcd, lval, updegr, etail); + updegr = 0; + } + } + + rcd->head = l; + rcd->pkt_count += i; + + /* + * Iterate over all QPs waiting to respond. + * The list won't change since the IRQ is only run on one CPU. + */ + list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) { + list_del_init(&qp->rspwait); + if (qp->r_flags & QIB_R_RSP_NAK) { + qp->r_flags &= ~QIB_R_RSP_NAK; + qib_send_rc_ack(qp); + } + if (qp->r_flags & QIB_R_RSP_SEND) { + unsigned long flags; + + qp->r_flags &= ~QIB_R_RSP_SEND; + spin_lock_irqsave(&qp->s_lock, flags); + if (ib_qib_state_ops[qp->state] & + QIB_PROCESS_OR_FLUSH_SEND) + qib_schedule_send(qp); + spin_unlock_irqrestore(&qp->s_lock, flags); + } + if (atomic_dec_and_test(&qp->refcount)) + wake_up(&qp->wait); + } + +bail: + /* Report number of packets consumed */ + if (npkts) + *npkts = i; + + /* + * Always write head at end, and setup rcv interrupt, even + * if no packets were processed. + */ + lval = (u64)rcd->head | dd->rhdrhead_intr_off; + dd->f_update_usrhead(rcd, lval, updegr, etail); + return crcs; +} + +/** + * qib_set_mtu - set the MTU + * @ppd: the perport data + * @arg: the new MTU + * + * We can handle "any" incoming size, the issue here is whether we + * need to restrict our outgoing size. For now, we don't do any + * sanity checking on this, and we don't deal with what happens to + * programs that are already running when the size changes. + * NOTE: changing the MTU will usually cause the IBC to go back to + * link INIT state... + */ +int qib_set_mtu(struct qib_pportdata *ppd, u16 arg) +{ + u32 piosize; + int ret, chk; + + if (arg != 256 && arg != 512 && arg != 1024 && arg != 2048 && + arg != 4096) { + ret = -EINVAL; + goto bail; + } + chk = ib_mtu_enum_to_int(qib_ibmtu); + if (chk > 0 && arg > chk) { + ret = -EINVAL; + goto bail; + } + + piosize = ppd->ibmaxlen; + ppd->ibmtu = arg; + + if (arg >= (piosize - QIB_PIO_MAXIBHDR)) { + /* Only if it's not the initial value (or reset to it) */ + if (piosize != ppd->init_ibmaxlen) { + if (arg > piosize && arg <= ppd->init_ibmaxlen) + piosize = ppd->init_ibmaxlen - 2 * sizeof(u32); + ppd->ibmaxlen = piosize; + } + } else if ((arg + QIB_PIO_MAXIBHDR) != ppd->ibmaxlen) { + piosize = arg + QIB_PIO_MAXIBHDR - 2 * sizeof(u32); + ppd->ibmaxlen = piosize; + } + + ppd->dd->f_set_ib_cfg(ppd, QIB_IB_CFG_MTU, 0); + + ret = 0; + +bail: + return ret; +} + +int qib_set_lid(struct qib_pportdata *ppd, u32 lid, u8 lmc) +{ + struct qib_devdata *dd = ppd->dd; + ppd->lid = lid; + ppd->lmc = lmc; + + dd->f_set_ib_cfg(ppd, QIB_IB_CFG_LIDLMC, + lid | (~((1U << lmc) - 1)) << 16); + + qib_devinfo(dd->pcidev, "IB%u:%u got a lid: 0x%x\n", + dd->unit, ppd->port, lid); + + return 0; +} + +/* + * Following deal with the "obviously simple" task of overriding the state + * of the LEDS, which normally indicate link physical and logical status. + * The complications arise in dealing with different hardware mappings + * and the board-dependent routine being called from interrupts. + * and then there's the requirement to _flash_ them. + */ +#define LED_OVER_FREQ_SHIFT 8 +#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT) +/* Below is "non-zero" to force override, but both actual LEDs are off */ +#define LED_OVER_BOTH_OFF (8) + +static void qib_run_led_override(unsigned long opaque) +{ + struct qib_pportdata *ppd = (struct qib_pportdata *)opaque; + struct qib_devdata *dd = ppd->dd; + int timeoff; + int ph_idx; + + if (!(dd->flags & QIB_INITTED)) + return; + + ph_idx = ppd->led_override_phase++ & 1; + ppd->led_override = ppd->led_override_vals[ph_idx]; + timeoff = ppd->led_override_timeoff; + + dd->f_setextled(ppd, 1); + /* + * don't re-fire the timer if user asked for it to be off; we let + * it fire one more time after they turn it off to simplify + */ + if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) + mod_timer(&ppd->led_override_timer, jiffies + timeoff); +} + +void qib_set_led_override(struct qib_pportdata *ppd, unsigned int val) +{ + struct qib_devdata *dd = ppd->dd; + int timeoff, freq; + + if (!(dd->flags & QIB_INITTED)) + return; + + /* First check if we are blinking. If not, use 1HZ polling */ + timeoff = HZ; + freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT; + + if (freq) { + /* For blink, set each phase from one nybble of val */ + ppd->led_override_vals[0] = val & 0xF; + ppd->led_override_vals[1] = (val >> 4) & 0xF; + timeoff = (HZ << 4)/freq; + } else { + /* Non-blink set both phases the same. */ + ppd->led_override_vals[0] = val & 0xF; + ppd->led_override_vals[1] = val & 0xF; + } + ppd->led_override_timeoff = timeoff; + + /* + * If the timer has not already been started, do so. Use a "quick" + * timeout so the function will be called soon, to look at our request. + */ + if (atomic_inc_return(&ppd->led_override_timer_active) == 1) { + /* Need to start timer */ + init_timer(&ppd->led_override_timer); + ppd->led_override_timer.function = qib_run_led_override; + ppd->led_override_timer.data = (unsigned long) ppd; + ppd->led_override_timer.expires = jiffies + 1; + add_timer(&ppd->led_override_timer); + } else { + if (ppd->led_override_vals[0] || ppd->led_override_vals[1]) + mod_timer(&ppd->led_override_timer, jiffies + 1); + atomic_dec(&ppd->led_override_timer_active); + } +} + +/** + * qib_reset_device - reset the chip if possible + * @unit: the device to reset + * + * Whether or not reset is successful, we attempt to re-initialize the chip + * (that is, much like a driver unload/reload). We clear the INITTED flag + * so that the various entry points will fail until we reinitialize. For + * now, we only allow this if no user contexts are open that use chip resources + */ +int qib_reset_device(int unit) +{ + int ret, i; + struct qib_devdata *dd = qib_lookup(unit); + struct qib_pportdata *ppd; + unsigned long flags; + int pidx; + + if (!dd) { + ret = -ENODEV; + goto bail; + } + + qib_devinfo(dd->pcidev, "Reset on unit %u requested\n", unit); + + if (!dd->kregbase || !(dd->flags & QIB_PRESENT)) { + qib_devinfo(dd->pcidev, "Invalid unit number %u or " + "not initialized or not present\n", unit); + ret = -ENXIO; + goto bail; + } + + spin_lock_irqsave(&dd->uctxt_lock, flags); + if (dd->rcd) + for (i = dd->first_user_ctxt; i < dd->cfgctxts; i++) { + if (!dd->rcd[i] || !dd->rcd[i]->cnt) + continue; + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + ret = -EBUSY; + goto bail; + } + spin_unlock_irqrestore(&dd->uctxt_lock, flags); + + for (pidx = 0; pidx < dd->num_pports; ++pidx) { + ppd = dd->pport + pidx; + if (atomic_read(&ppd->led_override_timer_active)) { + /* Need to stop LED timer, _then_ shut off LEDs */ + del_timer_sync(&ppd->led_override_timer); + atomic_set(&ppd->led_override_timer_active, 0); + } + + /* Shut off LEDs after we are sure timer is not running */ + ppd->led_override = LED_OVER_BOTH_OFF; + dd->f_setextled(ppd, 0); + if (dd->flags & QIB_HAS_SEND_DMA) + qib_teardown_sdma(ppd); + } + + ret = dd->f_reset(dd); + if (ret == 1) + ret = qib_init(dd, 1); + else + ret = -EAGAIN; + if (ret) + qib_dev_err(dd, "Reinitialize unit %u after " + "reset failed with %d\n", unit, ret); + else + qib_devinfo(dd->pcidev, "Reinitialized unit %u after " + "resetting\n", unit); + +bail: + return ret; +} |